/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/3dnow.c |
---|
0,0 → 1,91 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2003 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/* |
* 3DNow! optimizations contributed by |
* Holger Waechtler <holger@akaflieg.extern.tu-berlin.de> |
*/ |
#include "main/glheader.h" |
#include "main/context.h" |
#include "math/m_xform.h" |
#include "tnl/t_context.h" |
#include "3dnow.h" |
#include "x86_xform.h" |
#ifdef DEBUG_MATH |
#include "math/m_debug.h" |
#endif |
#ifdef USE_3DNOW_ASM |
DECLARE_XFORM_GROUP( 3dnow, 2 ) |
DECLARE_XFORM_GROUP( 3dnow, 3 ) |
DECLARE_XFORM_GROUP( 3dnow, 4 ) |
DECLARE_NORM_GROUP( 3dnow ) |
extern void _ASMAPI |
_mesa_v16_3dnow_general_xform( GLfloat *first_vert, |
const GLfloat *m, |
const GLfloat *src, |
GLuint src_stride, |
GLuint count ); |
extern void _ASMAPI |
_mesa_3dnow_project_vertices( GLfloat *first, |
GLfloat *last, |
const GLfloat *m, |
GLuint stride ); |
extern void _ASMAPI |
_mesa_3dnow_project_clipped_vertices( GLfloat *first, |
GLfloat *last, |
const GLfloat *m, |
GLuint stride, |
const GLubyte *clipmask ); |
#endif |
void _mesa_init_3dnow_transform_asm( void ) |
{ |
#ifdef USE_3DNOW_ASM |
ASSIGN_XFORM_GROUP( 3dnow, 2 ); |
ASSIGN_XFORM_GROUP( 3dnow, 3 ); |
ASSIGN_XFORM_GROUP( 3dnow, 4 ); |
/* There's a bug somewhere in the 3dnow_normal.S file that causes |
* bad shading. Disable for now. |
ASSIGN_NORM_GROUP( 3dnow ); |
*/ |
#ifdef DEBUG_MATH |
_math_test_all_transform_functions( "3DNow!" ); |
_math_test_all_normal_transform_functions( "3DNow!" ); |
#endif |
#endif |
} |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/3dnow.h |
---|
0,0 → 1,36 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/* |
* 3DNow! optimizations contributed by |
* Holger Waechtler <holger@akaflieg.extern.tu-berlin.de> |
*/ |
#ifndef __3DNOW_H__ |
#define __3DNOW_H__ |
void _mesa_init_3dnow_transform_asm( void ); |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/3dnow_normal.S |
---|
0,0 → 1,852 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2003 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/* |
* 3Dnow assembly code by Holger Waechtler |
*/ |
#ifdef USE_3DNOW_ASM |
#include "assyntax.h" |
#include "matypes.h" |
#include "norm_args.h" |
SEG_TEXT |
#define M(i) REGOFF(i * 4, ECX) |
#define STRIDE REGOFF(12, ESI) |
ALIGNTEXT16 |
GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals) |
HIDDEN(_mesa_3dnow_transform_normalize_normals) |
GLNAME(_mesa_3dnow_transform_normalize_normals): |
#define FRAME_OFFSET 12 |
PUSH_L ( EDI ) |
PUSH_L ( ESI ) |
PUSH_L ( EBP ) |
MOV_L ( ARG_LENGTHS, EDI ) |
MOV_L ( ARG_IN, ESI ) |
MOV_L ( ARG_DEST, EAX ) |
MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */ |
MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) ) |
MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */ |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */ |
MOV_L ( ARG_MAT, ECX ) |
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ |
CMP_L ( CONST(0), EBP ) /* count > 0 ?? */ |
JE ( LLBL (G3TN_end) ) |
MOV_L ( REGOFF (V4F_COUNT, ESI), EBP ) |
FEMMS |
PUSH_L ( EBP ) |
PUSH_L ( EAX ) |
PUSH_L ( EDX ) /* save counter & pointer for */ |
/* the normalize pass */ |
#undef FRAME_OFFSET |
#define FRAME_OFFSET 24 |
MOVQ ( M(0), MM3 ) /* m1 | m0 */ |
MOVQ ( M(4), MM4 ) /* m5 | m4 */ |
MOVD ( M(2), MM5 ) /* | m2 */ |
PUNPCKLDQ ( M(6), MM5 ) /* m6 | m2 */ |
MOVQ ( M(8), MM6 ) /* m9 | m8 */ |
MOVQ ( M(10), MM7 ) /* | m10 */ |
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */ |
JNE ( LLBL (G3TN_scale_end ) ) |
MOVD ( ARG_SCALE, MM0 ) /* | scale */ |
PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */ |
PFMUL ( MM0, MM3 ) /* scale * m1 | scale * m0 */ |
PFMUL ( MM0, MM4 ) /* scale * m5 | scale * m4 */ |
PFMUL ( MM0, MM5 ) /* scale * m6 | scale * m2 */ |
PFMUL ( MM0, MM6 ) /* scale * m9 | scale * m8 */ |
PFMUL ( MM0, MM7 ) /* | scale * m10 */ |
ALIGNTEXT32 |
LLBL (G3TN_scale_end): |
LLBL (G3TN_transform): |
MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */ |
MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */ |
MOVQ ( MM0, MM1 ) /* x1 | x0 */ |
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */ |
PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
PREFETCHW ( REGIND(EAX) ) |
PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */ |
PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */ |
PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */ |
PFADD ( MM2, MM0 ) /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/ |
MOVQ ( REGIND (EDX), MM1 ) /* x1 | x0 */ |
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */ |
PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */ |
MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */ |
PFMUL ( MM7, MM2 ) /* | x2*m10 */ |
PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */ |
PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m*/ |
ADD_L ( STRIDE, EDX ) /* next normal */ |
PREFETCH ( REGIND(EDX) ) |
MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */ |
SUB_L ( CONST(1), EBP ) /* decrement normal counter */ |
JNZ ( LLBL (G3TN_transform) ) |
POP_L ( EDX ) /* end of transform --- */ |
POP_L ( EAX ) /* now normalizing ... */ |
POP_L ( EBP ) |
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */ |
JE ( LLBL (G3TN_norm ) ) /* calculate lengths */ |
ALIGNTEXT32 |
LLBL (G3TN_norm_w_lengths): |
PREFETCHW ( REGOFF(12,EAX) ) |
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ |
MOVD ( REGIND (EDI), MM3 ) /* | length (x) */ |
PFMUL ( MM3, MM1 ) /* | x2 (normalize*/ |
PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */ |
PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalize*/ |
ADD_L ( STRIDE, EDX ) /* next normal */ |
ADD_L ( CONST(4), EDI ) /* next length */ |
PREFETCH ( REGIND(EDI) ) |
MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */ |
MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
SUB_L ( CONST(1), EBP ) /* decrement normal counter */ |
JNZ ( LLBL (G3TN_norm_w_lengths) ) |
JMP ( LLBL (G3TN_exit_3dnow) ) |
ALIGNTEXT32 |
LLBL (G3TN_norm): |
PREFETCHW ( REGIND(EAX) ) |
MOVQ ( REGIND (EAX), MM0 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ |
MOVQ ( MM0, MM3 ) /* x1 | x0 */ |
MOVQ ( MM1, MM4 ) /* | x2 */ |
PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
PFMUL ( MM1, MM4 ) /* | x2*x2 */ |
PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */ |
PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1+x2**/ |
PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */ |
MOVQ ( MM5, MM4 ) |
PUNPCKLDQ ( MM3, MM3 ) |
SUB_L ( CONST(1), EBP ) /* decrement normal counter */ |
PFMUL ( MM5, MM5 ) |
PFRSQIT1 ( MM3, MM5 ) |
PFRCPIT2 ( MM4, MM5 ) |
PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalize*/ |
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */ |
PFMUL ( MM5, MM1 ) /* | x2 (normalize*/ |
MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */ |
JNZ ( LLBL (G3TN_norm) ) |
LLBL (G3TN_exit_3dnow): |
FEMMS |
LLBL (G3TN_end): |
POP_L ( EBP ) |
POP_L ( ESI ) |
POP_L ( EDI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot) |
HIDDEN(_mesa_3dnow_transform_normalize_normals_no_rot) |
GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot): |
#undef FRAME_OFFSET |
#define FRAME_OFFSET 12 |
PUSH_L ( EDI ) |
PUSH_L ( ESI ) |
PUSH_L ( EBP ) |
MOV_L ( ARG_LENGTHS, EDI ) |
MOV_L ( ARG_IN, ESI ) |
MOV_L ( ARG_DEST, EAX ) |
MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */ |
MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) ) |
MOV_L ( ARG_MAT, ECX ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */ |
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ |
MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */ |
CMP_L ( CONST(0), EBP ) /* count > 0 ?? */ |
JE ( LLBL (G3TNNR_end) ) |
FEMMS |
MOVD ( M(0), MM0 ) /* | m0 */ |
PUNPCKLDQ ( M(5), MM0 ) /* m5 | m0 */ |
MOVD ( M(10), MM2 ) /* | m10 */ |
PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */ |
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */ |
JNE ( LLBL (G3TNNR_scale_end ) ) |
MOVD ( ARG_SCALE, MM7 ) /* | scale */ |
PUNPCKLDQ ( MM7, MM7 ) /* scale | scale */ |
PFMUL ( MM7, MM0 ) /* scale * m5 | scale * m0 */ |
PFMUL ( MM7, MM2 ) /* scale * m10 | scale * m10 */ |
ALIGNTEXT32 |
LLBL (G3TNNR_scale_end): |
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */ |
JE ( LLBL (G3TNNR_norm) ) /* need to calculate lengths */ |
MOVD ( REGIND(EDI), MM3 ) /* | length (x) */ |
ALIGNTEXT32 |
LLBL (G3TNNR_norm_w_lengths): /* use precalculated lengths */ |
PREFETCHW ( REGIND(EAX) ) |
MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */ |
PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */ |
ADD_L ( STRIDE, EDX ) /* next normal */ |
PREFETCH ( REGIND(EDX) ) |
PFMUL ( MM2, MM7 ) /* | x2*m10 */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
PFMUL ( MM3, MM7 ) /* | x2 (normalized) */ |
PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */ |
ADD_L ( CONST(4), EDI ) /* next length */ |
PFMUL ( MM3, MM6 ) /* x1 (normalized) | x0 (normalized) */ |
SUB_L ( CONST(1), EBP ) /* decrement normal counter */ |
MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */ |
MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */ |
MOVD ( REGIND(EDI), MM3 ) /* | length (x) */ |
JNZ ( LLBL (G3TNNR_norm_w_lengths) ) |
JMP ( LLBL (G3TNNR_exit_3dnow) ) |
ALIGNTEXT32 |
LLBL (G3TNNR_norm): /* need to calculate lengths */ |
PREFETCHW ( REGIND(EAX) ) |
MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */ |
PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
PFMUL ( MM2, MM7 ) /* | x2*m10 */ |
MOVQ ( MM6, MM3 ) /* x1 (transformed)| x0 (transformed) */ |
MOVQ ( MM7, MM4 ) /* | x2 (transformed) */ |
PFMUL ( MM6, MM3 ) /* x1*x1 | x0*x0 */ |
PFMUL ( MM7, MM4 ) /* | x2*x2 */ |
PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1 */ |
PFADD ( MM4, MM3 ) /* | x0*x0+x1*x1+x2*x2*/ |
ADD_L ( STRIDE, EDX ) /* next normal */ |
PREFETCH ( REGIND(EDX) ) |
PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */ |
MOVQ ( MM5, MM4 ) |
PUNPCKLDQ ( MM3, MM3 ) |
PFMUL ( MM5, MM5 ) |
PFRSQIT1 ( MM3, MM5 ) |
SUB_L ( CONST(1), EBP ) /* decrement normal counter */ |
PFRCPIT2 ( MM4, MM5 ) |
PFMUL ( MM5, MM6 ) /* x1 (normalized) | x0 (normalized) */ |
MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */ |
PFMUL ( MM5, MM7 ) /* | x2 (normalized) */ |
MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */ |
JNZ ( LLBL (G3TNNR_norm) ) |
LLBL (G3TNNR_exit_3dnow): |
FEMMS |
LLBL (G3TNNR_end): |
POP_L ( EBP ) |
POP_L ( ESI ) |
POP_L ( EDI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot) |
HIDDEN(_mesa_3dnow_transform_rescale_normals_no_rot) |
GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot): |
#undef FRAME_OFFSET |
#define FRAME_OFFSET 12 |
PUSH_L ( EDI ) |
PUSH_L ( ESI ) |
PUSH_L ( EBP ) |
MOV_L ( ARG_IN, EAX ) |
MOV_L ( ARG_DEST, EDX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EBP ) /* dest->count = in->count */ |
MOV_L ( EBP, REGOFF(V4F_COUNT, EDX) ) |
MOV_L ( ARG_IN, ESI ) |
MOV_L ( ARG_MAT, ECX ) |
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ |
MOV_L ( REGOFF(V4F_START, EDX), EAX ) /* dest->start */ |
MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */ |
CMP_L ( CONST(0), EBP ) |
JE ( LLBL (G3TRNR_end) ) |
FEMMS |
MOVD ( ARG_SCALE, MM6 ) /* | scale */ |
PUNPCKLDQ ( MM6, MM6 ) /* scale | scale */ |
MOVD ( REGIND(ECX), MM0 ) /* | m0 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */ |
PFMUL ( MM6, MM0 ) /* scale*m5 | scale*m0 */ |
MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */ |
PFMUL ( MM6, MM2 ) /* | scale*m10 */ |
ALIGNTEXT32 |
LLBL (G3TRNR_rescale): |
PREFETCHW ( REGIND(EAX) ) |
MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */ |
PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */ |
ADD_L ( STRIDE, EDX ) /* next normal */ |
PREFETCH ( REGIND(EDX) ) |
PFMUL ( MM2, MM5 ) /* | x2*m10 */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
SUB_L ( CONST(1), EBP ) /* decrement normal counter */ |
MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */ |
MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */ |
JNZ ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal */ |
FEMMS |
LLBL (G3TRNR_end): |
POP_L ( EBP ) |
POP_L ( ESI ) |
POP_L ( EDI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals) |
HIDDEN(_mesa_3dnow_transform_rescale_normals) |
GLNAME(_mesa_3dnow_transform_rescale_normals): |
#undef FRAME_OFFSET |
#define FRAME_OFFSET 8 |
PUSH_L ( EDI ) |
PUSH_L ( ESI ) |
MOV_L ( ARG_IN, ESI ) |
MOV_L ( ARG_DEST, EAX ) |
MOV_L ( ARG_MAT, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */ |
MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */ |
MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */ |
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ |
CMP_L ( CONST(0), EDI ) |
JE ( LLBL (G3TR_end) ) |
FEMMS |
MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */ |
MOVQ ( REGOFF(16,ECX), MM4 ) /* m5 | m4 */ |
MOVD ( ARG_SCALE, MM0 ) /* scale */ |
MOVD ( REGOFF(8,ECX), MM5 ) /* | m2 */ |
PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */ |
PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) |
PFMUL ( MM0, MM3 ) /* scale*m1 | scale*m0 */ |
MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8*/ |
PFMUL ( MM0, MM4 ) /* scale*m5 | scale*m4 */ |
MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */ |
PFMUL ( MM0, MM5 ) /* scale*m6 | scale*m2 */ |
PFMUL ( MM0, MM6 ) /* scale*m9 | scale*m8 */ |
PFMUL ( MM0, MM7 ) /* | scale*m10 */ |
ALIGNTEXT32 |
LLBL (G3TR_rescale): |
PREFETCHW ( REGIND(EAX) ) |
MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ |
MOVQ ( MM0, MM1 ) /* x1 | x0 */ |
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */ |
PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */ |
PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */ |
MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */ |
PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */ |
PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */ |
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ |
ADD_L ( STRIDE, EDX ) /* next normal */ |
PREFETCH ( REGIND(EDX) ) |
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */ |
PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */ |
PFMUL ( MM7, MM2 ) /* | x2*m10 */ |
PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */ |
PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */ |
MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */ |
SUB_L ( CONST(1), EDI ) /* decrement normal counter */ |
JNZ ( LLBL (G3TR_rescale) ) |
FEMMS |
LLBL (G3TR_end): |
POP_L ( ESI ) |
POP_L ( EDI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot) |
HIDDEN(_mesa_3dnow_transform_normals_no_rot) |
GLNAME(_mesa_3dnow_transform_normals_no_rot): |
#undef FRAME_OFFSET |
#define FRAME_OFFSET 8 |
PUSH_L ( EDI ) |
PUSH_L ( ESI ) |
MOV_L ( ARG_IN, ESI ) |
MOV_L ( ARG_DEST, EAX ) |
MOV_L ( ARG_MAT, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */ |
MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */ |
MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */ |
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ |
CMP_L ( CONST(0), EDI ) |
JE ( LLBL (G3TNR_end) ) |
FEMMS |
MOVD ( REGIND(ECX), MM0 ) /* | m0 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */ |
MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */ |
PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */ |
ALIGNTEXT32 |
LLBL (G3TNR_transform): |
PREFETCHW ( REGIND(EAX) ) |
MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */ |
PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */ |
ADD_L ( STRIDE, EDX) /* next normal */ |
PREFETCH ( REGIND(EDX) ) |
PFMUL ( MM2, MM5 ) /* | x2*m10 */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
SUB_L ( CONST(1), EDI ) /* decrement normal counter */ |
MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */ |
MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */ |
JNZ ( LLBL (G3TNR_transform) ) |
FEMMS |
LLBL (G3TNR_end): |
POP_L ( ESI ) |
POP_L ( EDI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME(_mesa_3dnow_transform_normals) |
HIDDEN(_mesa_3dnow_transform_normals) |
GLNAME(_mesa_3dnow_transform_normals): |
#undef FRAME_OFFSET |
#define FRAME_OFFSET 8 |
PUSH_L ( EDI ) |
PUSH_L ( ESI ) |
MOV_L ( ARG_IN, ESI ) |
MOV_L ( ARG_DEST, EAX ) |
MOV_L ( ARG_MAT, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */ |
MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */ |
MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */ |
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ |
CMP_L ( CONST(0), EDI ) /* count > 0 ?? */ |
JE ( LLBL (G3T_end) ) |
FEMMS |
MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */ |
MOVQ ( REGOFF(16, ECX), MM4 ) /* m5 | m4 */ |
MOVD ( REGOFF(8, ECX), MM5 ) /* | m2 */ |
PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) /* m6 | m2 */ |
MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8 */ |
MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */ |
ALIGNTEXT32 |
LLBL (G3T_transform): |
PREFETCHW ( REGIND(EAX) ) |
MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ |
MOVQ ( MM0, MM1 ) /* x1 | x0 */ |
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */ |
PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */ |
PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */ |
PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */ |
PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */ |
MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */ |
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */ |
PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */ |
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ |
PFMUL ( MM7, MM2 ) /* | x2*m10 */ |
ADD_L ( STRIDE, EDX ) /* next normal */ |
PREFETCH ( REGIND(EDX) ) |
PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */ |
PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */ |
MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */ |
SUB_L ( CONST(1), EDI ) /* decrement normal counter */ |
JNZ ( LLBL (G3T_transform) ) |
FEMMS |
LLBL (G3T_end): |
POP_L ( ESI ) |
POP_L ( EDI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME(_mesa_3dnow_normalize_normals) |
HIDDEN(_mesa_3dnow_normalize_normals) |
GLNAME(_mesa_3dnow_normalize_normals): |
#undef FRAME_OFFSET |
#define FRAME_OFFSET 12 |
PUSH_L ( EDI ) |
PUSH_L ( ESI ) |
PUSH_L ( EBP ) |
MOV_L ( ARG_IN, ESI ) |
MOV_L ( ARG_DEST, EAX ) |
MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */ |
MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */ |
MOV_L ( REGOFF(V4F_START, ESI), ECX ) /* in->start */ |
MOV_L ( ARG_LENGTHS, EDX ) |
CMP_L ( CONST(0), EBP ) /* count > 0 ?? */ |
JE ( LLBL (G3N_end) ) |
FEMMS |
CMP_L ( CONST(0), EDX ) /* lengths == 0 ? */ |
JE ( LLBL (G3N_norm2) ) /* calculate lengths */ |
ALIGNTEXT32 |
LLBL (G3N_norm1): /* use precalculated lengths */ |
PREFETCH ( REGIND(EAX) ) |
MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */ |
MOVD ( REGIND(EDX), MM3 ) /* | length (x) */ |
PFMUL ( MM3, MM1 ) /* | x2 (normalized) */ |
PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */ |
ADD_L ( STRIDE, ECX ) /* next normal */ |
PREFETCH ( REGIND(ECX) ) |
PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalized) */ |
MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */ |
MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
ADD_L ( CONST(4), EDX ) /* next length */ |
SUB_L ( CONST(1), EBP ) /* decrement normal counter */ |
JNZ ( LLBL (G3N_norm1) ) |
JMP ( LLBL (G3N_end1) ) |
ALIGNTEXT32 |
LLBL (G3N_norm2): /* need to calculate lengths */ |
PREFETCHW ( REGIND(EAX) ) |
PREFETCH ( REGIND(ECX) ) |
MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */ |
MOVQ ( MM0, MM3 ) /* x1 | x0 */ |
ADD_L ( STRIDE, ECX ) /* next normal */ |
PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */ |
MOVQ ( MM1, MM4 ) /* | x2 */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
PFMUL ( MM1, MM4 ) /* | x2*x2 */ |
PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */ |
PFACC ( MM3, MM3 ) /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/ |
PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */ |
MOVQ ( MM5, MM4 ) |
PUNPCKLDQ ( MM3, MM3 ) |
PFMUL ( MM5, MM5 ) |
PFRSQIT1 ( MM3, MM5 ) |
SUB_L ( CONST(1), EBP ) /* decrement normal counter */ |
PFRCPIT2 ( MM4, MM5 ) |
PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalized) */ |
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */ |
PFMUL ( MM5, MM1 ) /* | x2 (normalized) */ |
MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */ |
JNZ ( LLBL (G3N_norm2) ) |
LLBL (G3N_end1): |
FEMMS |
LLBL (G3N_end): |
POP_L ( EBP ) |
POP_L ( ESI ) |
POP_L ( EDI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME(_mesa_3dnow_rescale_normals) |
HIDDEN(_mesa_3dnow_rescale_normals) |
GLNAME(_mesa_3dnow_rescale_normals): |
#undef FRAME_OFFSET |
#define FRAME_OFFSET 8 |
PUSH_L ( EDI ) |
PUSH_L ( ESI ) |
MOV_L ( ARG_IN, ESI ) |
MOV_L ( ARG_DEST, EAX ) |
MOV_L ( REGOFF(V4F_COUNT, ESI), EDX ) /* dest->count = in->count */ |
MOV_L ( EDX, REGOFF(V4F_COUNT, EAX) ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */ |
MOV_L ( REGOFF(V4F_START, ESI), ECX ) /* in->start */ |
CMP_L ( CONST(0), EDX ) |
JE ( LLBL (G3R_end) ) |
FEMMS |
MOVD ( ARG_SCALE, MM0 ) /* scale */ |
PUNPCKLDQ ( MM0, MM0 ) |
ALIGNTEXT32 |
LLBL (G3R_rescale): |
PREFETCHW ( REGIND(EAX) ) |
MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */ |
PFMUL ( MM0, MM1 ) /* x1*scale | x0*scale */ |
ADD_L ( STRIDE, ECX ) /* next normal */ |
PREFETCH ( REGIND(ECX) ) |
PFMUL ( MM0, MM2 ) /* | x2*scale */ |
ADD_L ( CONST(16), EAX ) /* next r */ |
MOVQ ( MM1, REGOFF(-16, EAX) ) /* write r0, r1 */ |
MOVD ( MM2, REGOFF(-8, EAX) ) /* write r2 */ |
SUB_L ( CONST(1), EDX ) /* decrement normal counter */ |
JNZ ( LLBL (G3R_rescale) ) |
FEMMS |
LLBL (G3R_end): |
POP_L ( ESI ) |
POP_L ( EDI ) |
RET |
#endif |
#if defined (__ELF__) && defined (__linux__) |
.section .note.GNU-stack,"",%progbits |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/3dnow_xform1.S |
---|
0,0 → 1,437 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#ifdef USE_3DNOW_ASM |
#include "assyntax.h" |
#include "matypes.h" |
#include "xform_args.h" |
SEG_TEXT |
#define FRAME_OFFSET 4 |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points1_general ) |
HIDDEN(_mesa_3dnow_transform_points1_general) |
GLNAME( _mesa_3dnow_transform_points1_general ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(4, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPGR_3 ) ) |
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ |
MOVQ ( REGOFF(8, ECX), MM1 ) /* m03 | m02 */ |
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ |
MOVQ ( REGOFF(56, ECX), MM3 ) /* m33 | m32 */ |
ALIGNTEXT16 |
LLBL( G3TPGR_2 ): |
MOVD ( REGIND(EAX), MM4 ) /* | x0 */ |
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ |
MOVQ ( MM4, MM5 ) /* x0 | x0 */ |
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ |
PFMUL ( MM1, MM5 ) /* x0*m03 | x0*m02 */ |
PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */ |
PFADD ( MM3, MM5 ) /* x0*m03+m33 | x0*m02+m32 */ |
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ |
MOVQ ( MM5, REGOFF(8, EDX) ) /* write r3, r2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TPGR_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPGR_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points1_identity ) |
HIDDEN(_mesa_3dnow_transform_points1_identity) |
GLNAME( _mesa_3dnow_transform_points1_identity ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(1), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(4, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPIR_4) ) |
ALIGNTEXT16 |
LLBL( G3TPIR_3 ): |
MOVD ( REGIND(EAX), MM0 ) /* | x0 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
MOVD ( MM0, REGIND(EDX) ) /* | r0 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TPIR_3 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPIR_4 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points1_3d_no_rot ) |
HIDDEN(_mesa_3dnow_transform_points1_3d_no_rot) |
GLNAME( _mesa_3dnow_transform_points1_3d_no_rot ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(4, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP3NRR_3 ) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ |
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ |
ALIGNTEXT16 |
LLBL( G3TP3NRR_2 ): |
MOVD ( REGIND(EAX), MM4 ) /* | x0 */ |
PFMUL ( MM0, MM4 ) /* | x0*m00 */ |
PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */ |
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ |
MOVD ( MM3, REGOFF(8, EDX) ) /* write r2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP3NRR_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP3NRR_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points1_perspective ) |
HIDDEN(_mesa_3dnow_transform_points1_perspective) |
GLNAME( _mesa_3dnow_transform_points1_perspective ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(4, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPPR_3 ) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ |
ALIGNTEXT16 |
LLBL( G3TPPR_2 ): |
MOVD ( REGIND(EAX), MM4 ) /* 0 | x0 */ |
PFMUL ( MM0, MM4 ) /* 0 | x0*m00 */ |
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ |
MOVQ ( MM3, REGOFF(8, EDX) ) /* write r2 (=m32), r3 (=0) */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TPPR_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPPR_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points1_2d ) |
HIDDEN(_mesa_3dnow_transform_points1_2d) |
GLNAME( _mesa_3dnow_transform_points1_2d ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(4, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP2R_3 ) ) |
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ |
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ |
ALIGNTEXT16 |
LLBL( G3TP2R_2 ): |
MOVD ( REGIND(EAX), MM4 ) /* | x0 */ |
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ |
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ |
PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */ |
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP2R_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points1_2d_no_rot ) |
HIDDEN(_mesa_3dnow_transform_points1_2d_no_rot) |
GLNAME( _mesa_3dnow_transform_points1_2d_no_rot ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(4, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP2NRR_3 ) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ |
ALIGNTEXT16 |
LLBL( G3TP2NRR_2 ): |
MOVD ( REGIND(EAX), MM4 ) /* | x0 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PFMUL ( MM0, MM4 ) /* | x0*m00 */ |
PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */ |
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP2NRR_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points1_3d ) |
HIDDEN(_mesa_3dnow_transform_points1_3d) |
GLNAME( _mesa_3dnow_transform_points1_3d ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(4, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP3R_3 ) ) |
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ |
MOVD ( REGOFF(8, ECX), MM1 ) /* | m02 */ |
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ |
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ |
ALIGNTEXT16 |
LLBL( G3TP3R_2 ): |
MOVD ( REGIND(EAX), MM4 ) /* | x0 */ |
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ |
MOVQ ( MM4, MM5 ) /* | x0 */ |
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ |
PFMUL ( MM1, MM5 ) /* | x0*m02 */ |
PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */ |
PFADD ( MM3, MM5 ) /* | x0*m02+m32 */ |
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ |
MOVD ( MM5, REGOFF(8, EDX) ) /* write r2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP3R_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP3R_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#endif |
#if defined (__ELF__) && defined (__linux__) |
.section .note.GNU-stack,"",%progbits |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/3dnow_xform2.S |
---|
0,0 → 1,477 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#ifdef USE_3DNOW_ASM |
#include "assyntax.h" |
#include "matypes.h" |
#include "xform_args.h" |
SEG_TEXT |
#define FRAME_OFFSET 4 |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points2_general ) |
HIDDEN(_mesa_3dnow_transform_points2_general) |
GLNAME( _mesa_3dnow_transform_points2_general ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPGR_3 ) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */ |
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */ |
MOVD ( REGOFF(8, ECX), MM2 ) /* | m02 */ |
PUNPCKLDQ ( REGOFF(24, ECX), MM2 ) /* m12 | m02 */ |
MOVD ( REGOFF(12, ECX), MM3 ) /* | m03 */ |
PUNPCKLDQ ( REGOFF(28, ECX), MM3 ) /* m13 | m03 */ |
MOVQ ( REGOFF(48, ECX), MM4 ) /* m31 | m30 */ |
MOVQ ( REGOFF(56, ECX), MM5 ) /* m33 | m32 */ |
ALIGNTEXT16 |
LLBL( G3TPGR_2 ): |
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ |
MOVQ ( MM6, MM7 ) /* x1 | x0 */ |
PFMUL ( MM0, MM6 ) /* x1*m10 | x0*m00 */ |
PFMUL ( MM1, MM7 ) /* x1*m11 | x0*m01 */ |
PFACC ( MM7, MM6 ) /* x0*m01+x1*m11 | x0*x00+x1*m10 */ |
PFADD ( MM4, MM6 ) /* x0*...*m11+m31 | x0*...*m10+m30 */ |
MOVQ ( MM6, REGIND(EDX) ) /* write r1, r0 */ |
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ |
MOVQ ( MM6, MM7 ) /* x1 | x0 */ |
PFMUL ( MM2, MM6 ) /* x1*m12 | x0*m02 */ |
PFMUL ( MM3, MM7 ) /* x1*m13 | x0*m03 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PFACC ( MM7, MM6 ) /* x0*m03+x1*m13 | x0*x02+x1*m12 */ |
PFADD ( MM5, MM6 ) /* x0*...*m13+m33 | x0*...*m12+m32 */ |
MOVQ ( MM6, REGOFF(8, EDX) ) /* write r3, r2 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TPGR_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPGR_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points2_perspective ) |
HIDDEN(_mesa_3dnow_transform_points2_perspective) |
GLNAME( _mesa_3dnow_transform_points2_perspective ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPPR_3 ) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ |
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ |
ALIGNTEXT16 |
LLBL( G3TPPR_2 ): |
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ |
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ |
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ |
MOVQ ( MM3, REGOFF(8, EDX) ) /* write r2 (=m32), r3 (=0) */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TPPR_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPPR_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points2_3d ) |
HIDDEN(_mesa_3dnow_transform_points2_3d) |
GLNAME( _mesa_3dnow_transform_points2_3d ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP3R_3 ) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */ |
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */ |
MOVD ( REGOFF(8, ECX), MM2 ) /* | m02 */ |
PUNPCKLDQ ( REGOFF(24, ECX), MM2 ) /* m12 | m02 */ |
MOVQ ( REGOFF(48, ECX), MM4 ) /* m31 | m30 */ |
MOVD ( REGOFF(56, ECX), MM5 ) /* | m32 */ |
ALIGNTEXT16 |
LLBL( G3TP3R_2 ): |
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ |
MOVQ ( MM6, MM7 ) /* x1 | x0 */ |
PFMUL ( MM0, MM6 ) /* x1*m10 | x0*m00 */ |
PFMUL ( MM1, MM7 ) /* x1*m11 | x0*m01 */ |
PFACC ( MM7, MM6 ) /* x0*m01+x1*m11 | x0*x00+x1*m10 */ |
PFADD ( MM4, MM6 ) /* x0*...*m11+m31 | x0*...*m10+m30 */ |
MOVQ ( MM6, REGIND(EDX) ) /* write r1, r0 */ |
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ |
MOVQ ( MM6, MM7 ) /* x1 | x0 */ |
PFMUL ( MM2, MM6 ) /* x1*m12 | x0*m02 */ |
PFACC ( MM7, MM6 ) /* ***trash*** | x0*x02+x1*m12 */ |
PFADD ( MM5, MM6 ) /* ***trash*** | x0*...*m12+m32 */ |
MOVD ( MM6, REGOFF(8, EDX) ) /* write r2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP3R_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP3R_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points2_3d_no_rot ) |
HIDDEN(_mesa_3dnow_transform_points2_3d_no_rot) |
GLNAME( _mesa_3dnow_transform_points2_3d_no_rot ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP3NRR_3 ) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ |
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ |
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ |
ALIGNTEXT16 |
LLBL( G3TP3NRR_2 ): |
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ |
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ |
PFADD ( MM2, MM4 ) /* x1*m11+m31 | x0*m00+m30 */ |
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ |
MOVD ( MM3, REGOFF(8, EDX) ) /* write r2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP3NRR_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP3NRR_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points2_2d ) |
HIDDEN(_mesa_3dnow_transform_points2_2d) |
GLNAME( _mesa_3dnow_transform_points2_2d ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP2R_3 ) ) |
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ |
MOVQ ( REGOFF(16, ECX), MM1 ) /* m11 | m10 */ |
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ |
ALIGNTEXT16 |
LLBL( G3TP2R_2 ): |
MOVD ( REGIND(EAX), MM4 ) /* | x0 */ |
MOVD ( REGOFF(4, EAX), MM5 ) /* | x1 */ |
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ |
PUNPCKLDQ ( MM5, MM5 ) /* x1 | x1 */ |
PFMUL ( MM1, MM5 ) /* x1*m11 | x1*m10 */ |
PFADD ( MM2, MM4 ) /* x...x1*m11+31 | x0*..*m10+m30 */ |
PFADD ( MM5, MM4 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */ |
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP2R_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points2_2d_no_rot ) |
HIDDEN(_mesa_3dnow_transform_points2_2d_no_rot) |
GLNAME( _mesa_3dnow_transform_points2_2d_no_rot ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP2NRR_3 ) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ |
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ |
ALIGNTEXT16 |
LLBL( G3TP2NRR_2 ): |
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ |
PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */ |
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP2NRR_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points2_identity ) |
HIDDEN(_mesa_3dnow_transform_points2_identity) |
GLNAME( _mesa_3dnow_transform_points2_identity ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPIR_3 ) ) |
ALIGNTEXT16 |
LLBL( G3TPIR_3 ): |
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
MOVQ ( MM0, REGIND(EDX) ) /* r1 | r0 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TPIR_3 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPIR_4 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#endif |
#if defined (__ELF__) && defined (__linux__) |
.section .note.GNU-stack,"",%progbits |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/3dnow_xform3.S |
---|
0,0 → 1,561 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#ifdef USE_3DNOW_ASM |
#include "assyntax.h" |
#include "matypes.h" |
#include "xform_args.h" |
SEG_TEXT |
#define FRAME_OFFSET 4 |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points3_general ) |
HIDDEN(_mesa_3dnow_transform_points3_general) |
GLNAME( _mesa_3dnow_transform_points3_general ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPGR_2 ) ) |
PREFETCHW ( REGIND(EDX) ) |
ALIGNTEXT16 |
LLBL( G3TPGR_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EAX), MM2 ) /* | x2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCH ( REGIND(EAX) ) |
MOVQ ( MM0, MM1 ) /* x1 | x0 */ |
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */ |
PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */ |
MOVQ ( MM2, MM5 ) /* x2 | x2 */ |
PUNPCKHDQ ( MM1, MM1 ) /* x1 | x1 */ |
PFMUL ( REGOFF(32, ECX), MM2 ) /* x2*m9 | x2*m8 */ |
MOVQ ( MM0, MM3 ) /* x0 | x0 */ |
PFMUL ( REGOFF(40, ECX), MM5 ) /* x2*m11 | x2*m10 */ |
MOVQ ( MM1, MM4 ) /* x1 | x1 */ |
PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */ |
PFADD ( REGOFF(48, ECX), MM2 ) /* x2*m9+m13 | x2*m8+m12 */ |
PFMUL ( REGOFF(16, ECX), MM1 ) /* x1*m5 | x1*m4 */ |
PFADD ( REGOFF(56, ECX), MM5 ) /* x2*m11+m15 | x2*m10+m14 */ |
PFADD ( MM0, MM1 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */ |
PFMUL ( REGOFF(8, ECX), MM3 ) /* x0*m3 | x0*m2 */ |
PFADD ( MM1, MM2 ) /* r1 | r0 */ |
PFMUL ( REGOFF(24, ECX), MM4 ) /* x1*m7 | x1*m6 */ |
ADD_L ( CONST(16), EDX ) /* next output vertex */ |
PFADD ( MM3, MM4 ) /* x0*m3+x1*m7 | x0*m2+x1*m6 */ |
MOVQ ( MM2, REGOFF(-16, EDX) ) /* write r0, r1 */ |
PFADD ( MM4, MM5 ) /* r3 | r2 */ |
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TPGR_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPGR_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points3_perspective ) |
HIDDEN(_mesa_3dnow_transform_points3_perspective) |
GLNAME( _mesa_3dnow_transform_points3_perspective ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPPR_2 ) ) |
PREFETCH ( REGIND(EAX) ) |
PREFETCHW ( REGIND(EDX) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ |
MOVQ ( REGOFF(32, ECX), MM1 ) /* m21 | m20 */ |
MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ |
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ |
ALIGNTEXT16 |
LLBL( G3TPPR_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ |
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCH ( REGIND(EAX) ) |
PXOR ( MM7, MM7 ) /* 0 | 0 */ |
MOVQ ( MM5, MM6 ) /* | x2 */ |
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ |
PFSUB ( MM5, MM7 ) /* | -x2 */ |
PFMUL ( MM2, MM6 ) /* | x2*m22 */ |
PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
PFMUL ( MM1, MM5 ) /* x2*m21 | x2*m20 */ |
PFADD ( MM3, MM6 ) /* | x2*m22+m32 */ |
PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */ |
MOVQ ( MM5, REGOFF(-16, EDX) ) /* write r0, r1 */ |
MOVD ( MM6, REGOFF(-8, EDX) ) /* write r2 */ |
MOVD ( MM7, REGOFF(-4, EDX) ) /* write r3 */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TPPR_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPPR_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points3_3d ) |
HIDDEN(_mesa_3dnow_transform_points3_3d) |
GLNAME( _mesa_3dnow_transform_points3_3d ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP3R_2 ) ) |
PREFETCH ( REGIND(EAX) ) |
PREFETCH ( REGIND(EDX) ) |
MOVD ( REGOFF(8, ECX), MM7 ) /* | m2 */ |
PUNPCKLDQ ( REGOFF(24, ECX), MM7 ) /* m6 | m2 */ |
ALIGNTEXT16 |
LLBL( G3TP3R_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCH ( REGIND(EAX) ) |
MOVQ ( MM0, MM2 ) /* x1 | x0 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
PUNPCKLDQ ( MM2, MM2 ) /* x0 | x0 */ |
MOVQ ( MM0, MM3 ) /* x1 | x0 */ |
PFMUL ( REGIND(ECX), MM2 ) /* x0*m1 | x0*m0 */ |
PUNPCKHDQ ( MM3, MM3 ) /* x1 | x1 */ |
MOVQ ( MM1, MM4 ) /* | x2 */ |
PFMUL ( REGOFF(16, ECX), MM3 ) /* x1*m5 | x1*m4 */ |
PUNPCKLDQ ( MM4, MM4 ) /* x2 | x2 */ |
PFADD ( MM2, MM3 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */ |
PFMUL ( REGOFF(32, ECX), MM4 ) /* x2*m9 | x2*m8 */ |
PFADD ( REGOFF(48, ECX), MM3 ) /* x0*m1+...+m11 | x0*m0+x1*m4+m12 */ |
PFMUL ( MM7, MM0 ) /* x1*m6 | x0*m2 */ |
PFADD ( MM4, MM3 ) /* r1 | r0 */ |
PFMUL ( REGOFF(40, ECX), MM1 ) /* | x2*m10 */ |
PUNPCKLDQ ( REGOFF(56, ECX), MM1 ) /* m14 | x2*m10 */ |
PFACC ( MM0, MM1 ) |
MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */ |
PFACC ( MM1, MM1 ) /* | r2 */ |
MOVD ( MM1, REGOFF(-8, EDX) ) /* write r2 */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP3R_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP3R_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points3_3d_no_rot ) |
HIDDEN(_mesa_3dnow_transform_points3_3d_no_rot) |
GLNAME( _mesa_3dnow_transform_points3_3d_no_rot ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP3NRR_2 ) ) |
PREFETCH ( REGIND(EAX) ) |
PREFETCHW ( REGIND(EDX) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ |
MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ |
PUNPCKLDQ ( MM2, MM2 ) /* m22 | m22 */ |
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ |
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ |
PUNPCKLDQ ( MM3, MM3 ) /* m32 | m32 */ |
ALIGNTEXT16 |
LLBL( G3TP3NRR_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCHW ( REGIND(EAX) ) |
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ |
PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */ |
PFMUL ( MM2, MM5 ) /* | x2*m22 */ |
PFADD ( MM3, MM5 ) /* | x2*m22+m32 */ |
MOVQ ( MM4, REGIND(EDX) ) /* write r0, r1 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 */ |
JNZ ( LLBL( G3TP3NRR_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP3NRR_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points3_2d ) |
HIDDEN(_mesa_3dnow_transform_points3_2d) |
GLNAME( _mesa_3dnow_transform_points3_2d ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP2R_3) ) |
PREFETCH ( REGIND(EAX) ) |
PREFETCHW ( REGIND(EDX) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */ |
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */ |
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ |
ALIGNTEXT16 |
LLBL( G3TP2R_2 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCH ( REGIND(EAX) ) |
MOVQ ( MM3, MM4 ) /* x1 | x0 */ |
PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */ |
PFACC ( MM4, MM3 ) /* x0*m00+x1*m10 | x0*m01+x1*m11 */ |
MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 (=x2) */ |
PFADD ( MM2, MM3 ) /* x0*...*m10+m30 | x0*...*m11+m31 */ |
MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP2R_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points3_2d_no_rot ) |
HIDDEN(_mesa_3dnow_transform_points3_2d_no_rot) |
GLNAME( _mesa_3dnow_transform_points3_2d_no_rot ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP2NRR_2 ) ) |
PREFETCH ( REGIND(EAX) ) |
PREFETCHW ( REGIND(EDX) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ |
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ |
ALIGNTEXT16 |
LLBL( G3TP2NRR_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCH ( REGIND(EAX) ) |
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */ |
MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */ |
MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 (=x2) */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP2NRR_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP2NRR_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points3_identity ) |
HIDDEN(_mesa_3dnow_transform_points3_identity) |
GLNAME( _mesa_3dnow_transform_points3_identity ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPIR_2 ) ) |
PREFETCHW ( REGIND(EDX) ) |
ALIGNTEXT16 |
LLBL( G3TPIR_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) |
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ |
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
MOVQ ( MM0, REGOFF(-16, EDX) ) /* r1 | r0 */ |
MOVD ( MM1, REGOFF(-8, EDX) ) /* | r2 */ |
JNZ ( LLBL( G3TPIR_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPIR_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#endif |
#if defined (__ELF__) && defined (__linux__) |
.section .note.GNU-stack,"",%progbits |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/3dnow_xform4.S |
---|
0,0 → 1,570 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#ifdef USE_3DNOW_ASM |
#include "assyntax.h" |
#include "matypes.h" |
#include "xform_args.h" |
SEG_TEXT |
#define FRAME_OFFSET 4 |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points4_general ) |
HIDDEN(_mesa_3dnow_transform_points4_general) |
GLNAME( _mesa_3dnow_transform_points4_general ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPGR_2 ) ) |
PREFETCHW ( REGIND(EDX) ) |
ALIGNTEXT16 |
LLBL( G3TPGR_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ |
MOVQ ( REGOFF(8, EAX), MM4 ) /* x3 | x2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCH ( REGIND(EAX) ) |
MOVQ ( MM0, MM2 ) /* x1 | x0 */ |
MOVQ ( MM4, MM6 ) /* x3 | x2 */ |
PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */ |
PUNPCKHDQ ( MM2, MM2 ) /* x1 | x1 */ |
MOVQ ( MM0, MM1 ) /* x0 | x0 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */ |
MOVQ ( MM2, MM3 ) /* x1 | x1 */ |
PFMUL ( REGOFF(8, ECX), MM1 ) /* x0*m3 | x0*m2 */ |
PUNPCKLDQ ( MM4, MM4 ) /* x2 | x2 */ |
PFMUL ( REGOFF(16, ECX), MM2 ) /* x1*m5 | x1*m4 */ |
MOVQ ( MM4, MM5 ) /* x2 | x2 */ |
PFMUL ( REGOFF(24, ECX), MM3 ) /* x1*m7 | x1*m6 */ |
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ |
PFMUL ( REGOFF(32, ECX), MM4 ) /* x2*m9 | x2*m8 */ |
MOVQ ( MM6, MM7 ) /* x3 | x3 */ |
PFMUL ( REGOFF(40, ECX), MM5 ) /* x2*m11 | x2*m10 */ |
PFADD ( MM0, MM2 ) |
PFMUL ( REGOFF(48, ECX), MM6 ) /* x3*m13 | x3*m12 */ |
PFADD ( MM1, MM3 ) |
PFMUL ( REGOFF(56, ECX), MM7 ) /* x3*m15 | x3*m14 */ |
PFADD ( MM4, MM6 ) |
PFADD ( MM5, MM7 ) |
PFADD ( MM2, MM6 ) |
PFADD ( MM3, MM7 ) |
MOVQ ( MM6, REGOFF(-16, EDX) ) |
MOVQ ( MM7, REGOFF(-8, EDX) ) |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TPGR_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPGR_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points4_perspective ) |
HIDDEN(_mesa_3dnow_transform_points4_perspective) |
GLNAME( _mesa_3dnow_transform_points4_perspective ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPPR_2 ) ) |
PREFETCH ( REGIND(EAX) ) |
PREFETCHW ( REGIND(EDX) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ |
MOVD ( REGOFF(40, ECX), MM1 ) /* | m22 */ |
PUNPCKLDQ ( REGOFF(56, ECX), MM1 ) /* m32 | m22 */ |
MOVQ ( REGOFF(32, ECX), MM2 ) /* m21 | m20 */ |
PXOR ( MM7, MM7 ) /* 0 | 0 */ |
ALIGNTEXT16 |
LLBL( G3TPPR_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ |
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ |
MOVD ( REGOFF(8, EAX), MM3 ) /* | x2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */ |
MOVQ ( MM5, MM6 ) /* x3 | x2 */ |
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ |
PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
PFMUL ( MM2, MM5 ) /* x2*m21 | x2*m20 */ |
PFSUBR ( MM7, MM3 ) /* | -x2 */ |
PFMUL ( MM1, MM6 ) /* x3*m32 | x2*m22 */ |
PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */ |
PFACC ( MM3, MM6 ) /* -x2 | x2*m22+x3*m32 */ |
MOVQ ( MM5, REGOFF(-16, EDX) ) /* write r0, r1 */ |
MOVQ ( MM6, REGOFF(-8, EDX) ) /* write r2, r3 */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TPPR_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPPR_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points4_3d ) |
HIDDEN(_mesa_3dnow_transform_points4_3d) |
GLNAME( _mesa_3dnow_transform_points4_3d ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP3R_2 ) ) |
MOVD ( REGOFF(8, ECX), MM6 ) /* | m2 */ |
PUNPCKLDQ ( REGOFF(24, ECX), MM6 ) /* m6 | m2 */ |
MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */ |
PUNPCKLDQ ( REGOFF(56, ECX), MM7 ) /* m14 | m10 */ |
ALIGNTEXT16 |
LLBL( G3TP3R_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
PREFETCH ( REGOFF(32, EAX) ) /* hopefully array is tightly packed */ |
MOVQ ( REGIND(EAX), MM2 ) /* x1 | x0 */ |
MOVQ ( REGOFF(8, EAX), MM3 ) /* x3 | x2 */ |
MOVQ ( MM2, MM0 ) /* x1 | x0 */ |
MOVQ ( MM3, MM4 ) /* x3 | x2 */ |
MOVQ ( MM0, MM1 ) /* x1 | x0 */ |
MOVQ ( MM4, MM5 ) /* x3 | x2 */ |
PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */ |
PUNPCKHDQ ( MM1, MM1 ) /* x1 | x1 */ |
PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */ |
PUNPCKLDQ ( MM3, MM3 ) /* x2 | x2 */ |
PFMUL ( REGOFF(16, ECX), MM1 ) /* x1*m5 | x1*m4 */ |
PUNPCKHDQ ( MM4, MM4 ) /* x3 | x3 */ |
PFMUL ( MM6, MM2 ) /* x1*m6 | x0*m2 */ |
PFADD ( MM0, MM1 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */ |
PFMUL ( REGOFF(32, ECX), MM3 ) /* x2*m9 | x2*m8 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
PFMUL ( REGOFF(48, ECX), MM4 ) /* x3*m13 | x3*m12 */ |
PFADD ( MM1, MM3 ) /* x0*m1+..+x2*m9 | x0*m0+...+x2*m8 */ |
PFMUL ( MM7, MM5 ) /* x3*m14 | x2*m10 */ |
PFADD ( MM3, MM4 ) /* r1 | r0 */ |
PFACC ( MM2, MM5 ) /* x0*m2+x1*m6 | x2*m10+x3*m14 */ |
MOVD ( REGOFF(12, EAX), MM0 ) /* | x3 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PFACC ( MM0, MM5 ) /* r3 | r2 */ |
MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */ |
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP3R_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP3R_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points4_3d_no_rot ) |
HIDDEN(_mesa_3dnow_transform_points4_3d_no_rot) |
GLNAME( _mesa_3dnow_transform_points4_3d_no_rot ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP3NRR_2 ) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ |
MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ |
PUNPCKLDQ ( REGOFF(56, ECX), MM2 ) /* m32 | m22 */ |
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ |
ALIGNTEXT16 |
LLBL( G3TP3NRR_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ |
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ |
MOVD ( REGOFF(12, EAX), MM7 ) /* | x3 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */ |
MOVQ ( MM5, MM6 ) /* x3 | x2 */ |
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ |
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ |
PFMUL ( MM2, MM5 ) /* x3*m32 | x2*m22 */ |
PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */ |
PFACC ( MM7, MM5 ) /* x3 | x2*m22+x3*m32 */ |
PFADD ( MM6, MM4 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */ |
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP3NRR_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP3NRR_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points4_2d ) |
HIDDEN(_mesa_3dnow_transform_points4_2d) |
GLNAME( _mesa_3dnow_transform_points4_2d ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP2R_2 ) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */ |
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */ |
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ |
ALIGNTEXT16 |
LLBL( G3TP2R_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */ |
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCH ( REGIND(EAX) ) |
MOVQ ( MM3, MM4 ) /* x1 | x0 */ |
MOVQ ( MM5, MM6 ) /* x3 | x2 */ |
PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */ |
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ |
PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
PFACC ( MM4, MM3 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */ |
PFMUL ( MM2, MM6 ) /* x3*m31 | x3*m30 */ |
PFADD ( MM6, MM3 ) /* r1 | r0 */ |
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ |
MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP2R_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP2R_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points4_2d_no_rot ) |
HIDDEN(_mesa_3dnow_transform_points4_2d_no_rot) |
GLNAME( _mesa_3dnow_transform_points4_2d_no_rot ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TP2NRR_3 ) ) |
MOVD ( REGIND(ECX), MM0 ) /* | m00 */ |
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ |
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ |
ALIGNTEXT16 |
LLBL( G3TP2NRR_2 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ |
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCH ( REGIND(EAX) ) |
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ |
MOVQ ( MM5, MM6 ) /* x3 | x2 */ |
ADD_L ( CONST(16), EDX ) /* next r */ |
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ |
PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */ |
PFADD ( MM4, MM6 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */ |
MOVQ ( MM6, REGOFF(-16, EDX) ) /* write r0, r1 */ |
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TP2NRR_3 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_3dnow_transform_points4_identity ) |
HIDDEN(_mesa_3dnow_transform_points4_identity) |
GLNAME( _mesa_3dnow_transform_points4_identity ): |
PUSH_L ( ESI ) |
MOV_L ( ARG_DEST, ECX ) |
MOV_L ( ARG_MATRIX, ESI ) |
MOV_L ( ARG_SOURCE, EAX ) |
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) |
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) |
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) |
PUSH_L ( EDI ) |
MOV_L ( REGOFF(V4F_START, ECX), EDX ) |
MOV_L ( ESI, ECX ) |
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) |
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) |
MOV_L ( REGOFF(V4F_START, EAX), EAX ) |
TEST_L ( ESI, ESI ) |
JZ ( LLBL( G3TPIR_2 ) ) |
ALIGNTEXT16 |
LLBL( G3TPIR_1 ): |
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ |
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ |
MOVQ ( REGOFF(8, EAX), MM1 ) /* x3 | x2 */ |
ADD_L ( EDI, EAX ) /* next vertex */ |
PREFETCH ( REGIND(EAX) ) |
ADD_L ( CONST(16), EDX ) /* next r */ |
MOVQ ( MM0, REGOFF(-16, EDX) ) /* r1 | r0 */ |
MOVQ ( MM1, REGOFF(-8, EDX) ) /* r3 | r2 */ |
DEC_L ( ESI ) /* decrement vertex counter */ |
JNZ ( LLBL( G3TPIR_1 ) ) /* cnt > 0 ? -> process next vertex */ |
LLBL( G3TPIR_2 ): |
FEMMS |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#endif |
#if defined (__ELF__) && defined (__linux__) |
.section .note.GNU-stack,"",%progbits |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/Makefile.am |
---|
0,0 → 1,49 |
# Copyright © 2012 Intel Corporation |
# |
# Permission is hereby granted, free of charge, to any person obtaining a |
# copy of this software and associated documentation files (the "Software"), |
# to deal in the Software without restriction, including without limitation |
# the rights to use, copy, modify, merge, publish, distribute, sublicense, |
# and/or sell copies of the Software, and to permit persons to whom the |
# Software is furnished to do so, subject to the following conditions: |
# |
# The above copyright notice and this permission notice (including the next |
# paragraph) shall be included in all copies or substantial portions of the |
# Software. |
# |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
# IN THE SOFTWARE. |
if HAVE_X86_ASM |
AM_CPPFLAGS = \ |
-I$(top_srcdir)/include \ |
-I$(top_srcdir)/src/mesa \ |
-I$(top_srcdir)/src/mapi \ |
$(DEFINES) |
noinst_PROGRAMS = gen_matypes |
gen_matypes_SOURCES = gen_matypes.c |
BUILT_SOURCES = matypes.h |
CLEANFILES = matypes.h |
if GEN_ASM_OFFSETS |
matypes.h: $(gen_matypes_SOURCES) |
$(AM_V_GEN)$(COMPILE) $< -DASM_OFFSETS -S -o - | \ |
sed -n '/^->/{s:^->::;/[$$]/{s:^:#define :;s:[$$]::};p}' > $@ |
else |
matypes.h: gen_matypes |
$(AM_V_GEN)./gen_matypes > $@ |
endif |
endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/Makefile.in |
---|
0,0 → 1,784 |
# Makefile.in generated by automake 1.14 from Makefile.am. |
# @configure_input@ |
# Copyright (C) 1994-2013 Free Software Foundation, Inc. |
# This Makefile.in is free software; the Free Software Foundation |
# gives unlimited permission to copy and/or distribute it, |
# with or without modifications, as long as this notice is preserved. |
# This program is distributed in the hope that it will be useful, |
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without |
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A |
# PARTICULAR PURPOSE. |
@SET_MAKE@ |
# Copyright © 2012 Intel Corporation |
# |
# Permission is hereby granted, free of charge, to any person obtaining a |
# copy of this software and associated documentation files (the "Software"), |
# to deal in the Software without restriction, including without limitation |
# the rights to use, copy, modify, merge, publish, distribute, sublicense, |
# and/or sell copies of the Software, and to permit persons to whom the |
# Software is furnished to do so, subject to the following conditions: |
# |
# The above copyright notice and this permission notice (including the next |
# paragraph) shall be included in all copies or substantial portions of the |
# Software. |
# |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
# IN THE SOFTWARE. |
VPATH = @srcdir@ |
am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' |
am__make_running_with_option = \ |
case $${target_option-} in \ |
?) ;; \ |
*) echo "am__make_running_with_option: internal error: invalid" \ |
"target option '$${target_option-}' specified" >&2; \ |
exit 1;; \ |
esac; \ |
has_opt=no; \ |
sane_makeflags=$$MAKEFLAGS; \ |
if $(am__is_gnu_make); then \ |
sane_makeflags=$$MFLAGS; \ |
else \ |
case $$MAKEFLAGS in \ |
*\\[\ \ ]*) \ |
bs=\\; \ |
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ |
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ |
esac; \ |
fi; \ |
skip_next=no; \ |
strip_trailopt () \ |
{ \ |
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ |
}; \ |
for flg in $$sane_makeflags; do \ |
test $$skip_next = yes && { skip_next=no; continue; }; \ |
case $$flg in \ |
*=*|--*) continue;; \ |
-*I) strip_trailopt 'I'; skip_next=yes;; \ |
-*I?*) strip_trailopt 'I';; \ |
-*O) strip_trailopt 'O'; skip_next=yes;; \ |
-*O?*) strip_trailopt 'O';; \ |
-*l) strip_trailopt 'l'; skip_next=yes;; \ |
-*l?*) strip_trailopt 'l';; \ |
-[dEDm]) skip_next=yes;; \ |
-[JT]) skip_next=yes;; \ |
esac; \ |
case $$flg in \ |
*$$target_option*) has_opt=yes; break;; \ |
esac; \ |
done; \ |
test $$has_opt = yes |
am__make_dryrun = (target_option=n; $(am__make_running_with_option)) |
am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) |
pkgdatadir = $(datadir)/@PACKAGE@ |
pkgincludedir = $(includedir)/@PACKAGE@ |
pkglibdir = $(libdir)/@PACKAGE@ |
pkglibexecdir = $(libexecdir)/@PACKAGE@ |
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd |
install_sh_DATA = $(install_sh) -c -m 644 |
install_sh_PROGRAM = $(install_sh) -c |
install_sh_SCRIPT = $(install_sh) -c |
INSTALL_HEADER = $(INSTALL_DATA) |
transform = $(program_transform_name) |
NORMAL_INSTALL = : |
PRE_INSTALL = : |
POST_INSTALL = : |
NORMAL_UNINSTALL = : |
PRE_UNINSTALL = : |
POST_UNINSTALL = : |
build_triplet = @build@ |
host_triplet = @host@ |
target_triplet = @target@ |
@HAVE_X86_ASM_TRUE@noinst_PROGRAMS = gen_matypes$(EXEEXT) |
subdir = src/mesa/x86 |
DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ |
$(top_srcdir)/bin/depcomp |
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 |
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_prog_bison.m4 \ |
$(top_srcdir)/m4/ax_prog_cc_for_build.m4 \ |
$(top_srcdir)/m4/ax_prog_cxx_for_build.m4 \ |
$(top_srcdir)/m4/ax_prog_flex.m4 \ |
$(top_srcdir)/m4/ax_pthread.m4 \ |
$(top_srcdir)/m4/ax_python_module.m4 \ |
$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \ |
$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ |
$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac |
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ |
$(ACLOCAL_M4) |
mkinstalldirs = $(install_sh) -d |
CONFIG_CLEAN_FILES = |
CONFIG_CLEAN_VPATH_FILES = |
PROGRAMS = $(noinst_PROGRAMS) |
am__gen_matypes_SOURCES_DIST = gen_matypes.c |
@HAVE_X86_ASM_TRUE@am_gen_matypes_OBJECTS = gen_matypes.$(OBJEXT) |
gen_matypes_OBJECTS = $(am_gen_matypes_OBJECTS) |
gen_matypes_LDADD = $(LDADD) |
AM_V_lt = $(am__v_lt_@AM_V@) |
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) |
am__v_lt_0 = --silent |
am__v_lt_1 = |
AM_V_P = $(am__v_P_@AM_V@) |
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) |
am__v_P_0 = false |
am__v_P_1 = : |
AM_V_GEN = $(am__v_GEN_@AM_V@) |
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) |
am__v_GEN_0 = @echo " GEN " $@; |
am__v_GEN_1 = |
AM_V_at = $(am__v_at_@AM_V@) |
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) |
am__v_at_0 = @ |
am__v_at_1 = |
DEFAULT_INCLUDES = -I.@am__isrc@ |
depcomp = $(SHELL) $(top_srcdir)/bin/depcomp |
am__depfiles_maybe = depfiles |
am__mv = mv -f |
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ |
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) |
LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ |
$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ |
$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ |
$(AM_CFLAGS) $(CFLAGS) |
AM_V_CC = $(am__v_CC_@AM_V@) |
am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) |
am__v_CC_0 = @echo " CC " $@; |
am__v_CC_1 = |
CCLD = $(CC) |
LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ |
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ |
$(AM_LDFLAGS) $(LDFLAGS) -o $@ |
AM_V_CCLD = $(am__v_CCLD_@AM_V@) |
am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) |
am__v_CCLD_0 = @echo " CCLD " $@; |
am__v_CCLD_1 = |
SOURCES = $(gen_matypes_SOURCES) |
DIST_SOURCES = $(am__gen_matypes_SOURCES_DIST) |
am__can_run_installinfo = \ |
case $$AM_UPDATE_INFO_DIR in \ |
n|no|NO) false;; \ |
*) (install-info --version) >/dev/null 2>&1;; \ |
esac |
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) |
# Read a list of newline-separated strings from the standard input, |
# and print each of them once, without duplicates. Input order is |
# *not* preserved. |
am__uniquify_input = $(AWK) '\ |
BEGIN { nonempty = 0; } \ |
{ items[$$0] = 1; nonempty = 1; } \ |
END { if (nonempty) { for (i in items) print i; }; } \ |
' |
# Make sure the list of sources is unique. This is necessary because, |
# e.g., the same source file might be shared among _SOURCES variables |
# for different programs/libraries. |
am__define_uniq_tagged_files = \ |
list='$(am__tagged_files)'; \ |
unique=`for i in $$list; do \ |
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ |
done | $(am__uniquify_input)` |
ETAGS = etags |
CTAGS = ctags |
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) |
ACLOCAL = @ACLOCAL@ |
AMTAR = @AMTAR@ |
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ |
AR = @AR@ |
AUTOCONF = @AUTOCONF@ |
AUTOHEADER = @AUTOHEADER@ |
AUTOMAKE = @AUTOMAKE@ |
AWK = @AWK@ |
BUILD_EXEEXT = @BUILD_EXEEXT@ |
BUILD_OBJEXT = @BUILD_OBJEXT@ |
CC = @CC@ |
CCAS = @CCAS@ |
CCASDEPMODE = @CCASDEPMODE@ |
CCASFLAGS = @CCASFLAGS@ |
CCDEPMODE = @CCDEPMODE@ |
CC_FOR_BUILD = @CC_FOR_BUILD@ |
CFLAGS = @CFLAGS@ |
CFLAGS_FOR_BUILD = @CFLAGS_FOR_BUILD@ |
CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@ |
CLOCK_LIB = @CLOCK_LIB@ |
CPP = @CPP@ |
CPPFLAGS = @CPPFLAGS@ |
CPPFLAGS_FOR_BUILD = @CPPFLAGS_FOR_BUILD@ |
CPP_FOR_BUILD = @CPP_FOR_BUILD@ |
CXX = @CXX@ |
CXXCPP = @CXXCPP@ |
CXXCPPFLAGS_FOR_BUILD = @CXXCPPFLAGS_FOR_BUILD@ |
CXXCPP_FOR_BUILD = @CXXCPP_FOR_BUILD@ |
CXXDEPMODE = @CXXDEPMODE@ |
CXXFLAGS = @CXXFLAGS@ |
CXXFLAGS_FOR_BUILD = @CXXFLAGS_FOR_BUILD@ |
CXX_FOR_BUILD = @CXX_FOR_BUILD@ |
CYGPATH_W = @CYGPATH_W@ |
DEFINES = @DEFINES@ |
DEFINES_FOR_BUILD = @DEFINES_FOR_BUILD@ |
DEFS = @DEFS@ |
DEPDIR = @DEPDIR@ |
DLLTOOL = @DLLTOOL@ |
DLOPEN_LIBS = @DLOPEN_LIBS@ |
DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@ |
DRI2PROTO_LIBS = @DRI2PROTO_LIBS@ |
DRIGL_CFLAGS = @DRIGL_CFLAGS@ |
DRIGL_LIBS = @DRIGL_LIBS@ |
DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@ |
DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@ |
DRI_LIB_DEPS = @DRI_LIB_DEPS@ |
DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@ |
DSYMUTIL = @DSYMUTIL@ |
DUMPBIN = @DUMPBIN@ |
ECHO_C = @ECHO_C@ |
ECHO_N = @ECHO_N@ |
ECHO_T = @ECHO_T@ |
EGL_CFLAGS = @EGL_CFLAGS@ |
EGL_CLIENT_APIS = @EGL_CLIENT_APIS@ |
EGL_DRIVER_INSTALL_DIR = @EGL_DRIVER_INSTALL_DIR@ |
EGL_LIB_DEPS = @EGL_LIB_DEPS@ |
EGL_LIB_GLOB = @EGL_LIB_GLOB@ |
EGL_LIB_NAME = @EGL_LIB_NAME@ |
EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@ |
EGL_PLATFORMS = @EGL_PLATFORMS@ |
EGREP = @EGREP@ |
ELF_LIB = @ELF_LIB@ |
EXEEXT = @EXEEXT@ |
EXPAT_INCLUDES = @EXPAT_INCLUDES@ |
FGREP = @FGREP@ |
FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@ |
FREEDRENO_LIBS = @FREEDRENO_LIBS@ |
GALLIUM_DRI_LIB_DEPS = @GALLIUM_DRI_LIB_DEPS@ |
GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@ |
GALLIUM_PIPE_LOADER_LIBS = @GALLIUM_PIPE_LOADER_LIBS@ |
GALLIUM_PIPE_LOADER_XCB_CFLAGS = @GALLIUM_PIPE_LOADER_XCB_CFLAGS@ |
GALLIUM_PIPE_LOADER_XCB_LIBS = @GALLIUM_PIPE_LOADER_XCB_LIBS@ |
GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@ |
GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@ |
GLAPI_LIB_GLOB = @GLAPI_LIB_GLOB@ |
GLAPI_LIB_NAME = @GLAPI_LIB_NAME@ |
GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@ |
GLESv1_CM_LIB_GLOB = @GLESv1_CM_LIB_GLOB@ |
GLESv1_CM_LIB_NAME = @GLESv1_CM_LIB_NAME@ |
GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@ |
GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@ |
GLESv2_LIB_GLOB = @GLESv2_LIB_GLOB@ |
GLESv2_LIB_NAME = @GLESv2_LIB_NAME@ |
GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@ |
GLPROTO_CFLAGS = @GLPROTO_CFLAGS@ |
GLPROTO_LIBS = @GLPROTO_LIBS@ |
GLX_TLS = @GLX_TLS@ |
GL_LIB = @GL_LIB@ |
GL_LIB_DEPS = @GL_LIB_DEPS@ |
GL_LIB_GLOB = @GL_LIB_GLOB@ |
GL_LIB_NAME = @GL_LIB_NAME@ |
GL_PC_CFLAGS = @GL_PC_CFLAGS@ |
GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@ |
GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@ |
GREP = @GREP@ |
HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@ |
INDENT = @INDENT@ |
INDENT_FLAGS = @INDENT_FLAGS@ |
INSTALL = @INSTALL@ |
INSTALL_DATA = @INSTALL_DATA@ |
INSTALL_PROGRAM = @INSTALL_PROGRAM@ |
INSTALL_SCRIPT = @INSTALL_SCRIPT@ |
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ |
INTEL_CFLAGS = @INTEL_CFLAGS@ |
INTEL_LIBS = @INTEL_LIBS@ |
LD = @LD@ |
LDFLAGS = @LDFLAGS@ |
LDFLAGS_FOR_BUILD = @LDFLAGS_FOR_BUILD@ |
LEX = @LEX@ |
LEXLIB = @LEXLIB@ |
LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@ |
LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@ |
LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@ |
LIBDRM_CFLAGS = @LIBDRM_CFLAGS@ |
LIBDRM_LIBS = @LIBDRM_LIBS@ |
LIBDRM_XORG_CFLAGS = @LIBDRM_XORG_CFLAGS@ |
LIBDRM_XORG_LIBS = @LIBDRM_XORG_LIBS@ |
LIBKMS_XORG_CFLAGS = @LIBKMS_XORG_CFLAGS@ |
LIBKMS_XORG_LIBS = @LIBKMS_XORG_LIBS@ |
LIBOBJS = @LIBOBJS@ |
LIBS = @LIBS@ |
LIBTOOL = @LIBTOOL@ |
LIBUDEV_CFLAGS = @LIBUDEV_CFLAGS@ |
LIBUDEV_LIBS = @LIBUDEV_LIBS@ |
LIB_DIR = @LIB_DIR@ |
LIPO = @LIPO@ |
LLVM_BINDIR = @LLVM_BINDIR@ |
LLVM_CFLAGS = @LLVM_CFLAGS@ |
LLVM_CONFIG = @LLVM_CONFIG@ |
LLVM_CPPFLAGS = @LLVM_CPPFLAGS@ |
LLVM_CXXFLAGS = @LLVM_CXXFLAGS@ |
LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@ |
LLVM_LDFLAGS = @LLVM_LDFLAGS@ |
LLVM_LIBDIR = @LLVM_LIBDIR@ |
LLVM_LIBS = @LLVM_LIBS@ |
LLVM_VERSION = @LLVM_VERSION@ |
LN_S = @LN_S@ |
LTLIBOBJS = @LTLIBOBJS@ |
MAKE = @MAKE@ |
MAKEINFO = @MAKEINFO@ |
MANIFEST_TOOL = @MANIFEST_TOOL@ |
MESA_LLVM = @MESA_LLVM@ |
MKDIR_P = @MKDIR_P@ |
NM = @NM@ |
NMEDIT = @NMEDIT@ |
NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@ |
NOUVEAU_LIBS = @NOUVEAU_LIBS@ |
OBJDUMP = @OBJDUMP@ |
OBJEXT = @OBJEXT@ |
OPENCL_LIB_INSTALL_DIR = @OPENCL_LIB_INSTALL_DIR@ |
OSMESA_LIB = @OSMESA_LIB@ |
OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@ |
OSMESA_LIB_NAME = @OSMESA_LIB_NAME@ |
OSMESA_MESA_DEPS = @OSMESA_MESA_DEPS@ |
OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@ |
OSMESA_PC_REQ = @OSMESA_PC_REQ@ |
OSMESA_VERSION = @OSMESA_VERSION@ |
OTOOL = @OTOOL@ |
OTOOL64 = @OTOOL64@ |
PACKAGE = @PACKAGE@ |
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ |
PACKAGE_NAME = @PACKAGE_NAME@ |
PACKAGE_STRING = @PACKAGE_STRING@ |
PACKAGE_TARNAME = @PACKAGE_TARNAME@ |
PACKAGE_URL = @PACKAGE_URL@ |
PACKAGE_VERSION = @PACKAGE_VERSION@ |
PATH_SEPARATOR = @PATH_SEPARATOR@ |
PERL = @PERL@ |
PKG_CONFIG = @PKG_CONFIG@ |
PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ |
PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ |
POSIX_SHELL = @POSIX_SHELL@ |
PTHREAD_CC = @PTHREAD_CC@ |
PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ |
PTHREAD_LIBS = @PTHREAD_LIBS@ |
PYTHON2 = @PYTHON2@ |
RADEON_CFLAGS = @RADEON_CFLAGS@ |
RADEON_LIBS = @RADEON_LIBS@ |
RANLIB = @RANLIB@ |
SED = @SED@ |
SELINUX_LIBS = @SELINUX_LIBS@ |
SET_MAKE = @SET_MAKE@ |
SHELL = @SHELL@ |
STRIP = @STRIP@ |
VDPAU_CFLAGS = @VDPAU_CFLAGS@ |
VDPAU_LIBS = @VDPAU_LIBS@ |
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@ |
VDPAU_MAJOR = @VDPAU_MAJOR@ |
VDPAU_MINOR = @VDPAU_MINOR@ |
VERSION = @VERSION@ |
VG_LIB_DEPS = @VG_LIB_DEPS@ |
VG_LIB_GLOB = @VG_LIB_GLOB@ |
VG_LIB_NAME = @VG_LIB_NAME@ |
VG_PC_LIB_PRIV = @VG_PC_LIB_PRIV@ |
VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@ |
VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@ |
WAYLAND_CFLAGS = @WAYLAND_CFLAGS@ |
WAYLAND_LIBS = @WAYLAND_LIBS@ |
WAYLAND_SCANNER = @WAYLAND_SCANNER@ |
X11_INCLUDES = @X11_INCLUDES@ |
XA_MAJOR = @XA_MAJOR@ |
XA_MINOR = @XA_MINOR@ |
XA_TINY = @XA_TINY@ |
XA_VERSION = @XA_VERSION@ |
XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@ |
XCB_DRI2_LIBS = @XCB_DRI2_LIBS@ |
XEXT_CFLAGS = @XEXT_CFLAGS@ |
XEXT_LIBS = @XEXT_LIBS@ |
XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@ |
XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@ |
XLIBGL_CFLAGS = @XLIBGL_CFLAGS@ |
XLIBGL_LIBS = @XLIBGL_LIBS@ |
XORG_CFLAGS = @XORG_CFLAGS@ |
XORG_DRIVER_INSTALL_DIR = @XORG_DRIVER_INSTALL_DIR@ |
XORG_LIBS = @XORG_LIBS@ |
XVMC_CFLAGS = @XVMC_CFLAGS@ |
XVMC_LIBS = @XVMC_LIBS@ |
XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@ |
XVMC_MAJOR = @XVMC_MAJOR@ |
XVMC_MINOR = @XVMC_MINOR@ |
YACC = @YACC@ |
YFLAGS = @YFLAGS@ |
abs_builddir = @abs_builddir@ |
abs_srcdir = @abs_srcdir@ |
abs_top_builddir = @abs_top_builddir@ |
abs_top_srcdir = @abs_top_srcdir@ |
ac_ct_AR = @ac_ct_AR@ |
ac_ct_CC = @ac_ct_CC@ |
ac_ct_CC_FOR_BUILD = @ac_ct_CC_FOR_BUILD@ |
ac_ct_CXX = @ac_ct_CXX@ |
ac_ct_CXX_FOR_BUILD = @ac_ct_CXX_FOR_BUILD@ |
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ |
am__include = @am__include@ |
am__leading_dot = @am__leading_dot@ |
am__quote = @am__quote@ |
am__tar = @am__tar@ |
am__untar = @am__untar@ |
ax_pthread_config = @ax_pthread_config@ |
bindir = @bindir@ |
build = @build@ |
build_alias = @build_alias@ |
build_cpu = @build_cpu@ |
build_os = @build_os@ |
build_vendor = @build_vendor@ |
builddir = @builddir@ |
datadir = @datadir@ |
datarootdir = @datarootdir@ |
docdir = @docdir@ |
dvidir = @dvidir@ |
exec_prefix = @exec_prefix@ |
host = @host@ |
host_alias = @host_alias@ |
host_cpu = @host_cpu@ |
host_os = @host_os@ |
host_vendor = @host_vendor@ |
htmldir = @htmldir@ |
includedir = @includedir@ |
infodir = @infodir@ |
install_sh = @install_sh@ |
libdir = @libdir@ |
libexecdir = @libexecdir@ |
localedir = @localedir@ |
localstatedir = @localstatedir@ |
mandir = @mandir@ |
mkdir_p = @mkdir_p@ |
oldincludedir = @oldincludedir@ |
pdfdir = @pdfdir@ |
prefix = @prefix@ |
program_transform_name = @program_transform_name@ |
psdir = @psdir@ |
sbindir = @sbindir@ |
sharedstatedir = @sharedstatedir@ |
srcdir = @srcdir@ |
sysconfdir = @sysconfdir@ |
target = @target@ |
target_alias = @target_alias@ |
target_cpu = @target_cpu@ |
target_os = @target_os@ |
target_vendor = @target_vendor@ |
top_build_prefix = @top_build_prefix@ |
top_builddir = @top_builddir@ |
top_srcdir = @top_srcdir@ |
@HAVE_X86_ASM_TRUE@AM_CPPFLAGS = \ |
@HAVE_X86_ASM_TRUE@ -I$(top_srcdir)/include \ |
@HAVE_X86_ASM_TRUE@ -I$(top_srcdir)/src/mesa \ |
@HAVE_X86_ASM_TRUE@ -I$(top_srcdir)/src/mapi \ |
@HAVE_X86_ASM_TRUE@ $(DEFINES) |
@HAVE_X86_ASM_TRUE@gen_matypes_SOURCES = gen_matypes.c |
@HAVE_X86_ASM_TRUE@BUILT_SOURCES = matypes.h |
@HAVE_X86_ASM_TRUE@CLEANFILES = matypes.h |
all: $(BUILT_SOURCES) |
$(MAKE) $(AM_MAKEFLAGS) all-am |
.SUFFIXES: |
.SUFFIXES: .c .lo .o .obj |
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) |
@for dep in $?; do \ |
case '$(am__configure_deps)' in \ |
*$$dep*) \ |
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ |
&& { if test -f $@; then exit 0; else break; fi; }; \ |
exit 1;; \ |
esac; \ |
done; \ |
echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/mesa/x86/Makefile'; \ |
$(am__cd) $(top_srcdir) && \ |
$(AUTOMAKE) --foreign src/mesa/x86/Makefile |
.PRECIOUS: Makefile |
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status |
@case '$?' in \ |
*config.status*) \ |
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ |
*) \ |
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ |
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ |
esac; |
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) |
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
$(top_srcdir)/configure: $(am__configure_deps) |
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
$(ACLOCAL_M4): $(am__aclocal_m4_deps) |
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
$(am__aclocal_m4_deps): |
clean-noinstPROGRAMS: |
@list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ |
echo " rm -f" $$list; \ |
rm -f $$list || exit $$?; \ |
test -n "$(EXEEXT)" || exit 0; \ |
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ |
echo " rm -f" $$list; \ |
rm -f $$list |
gen_matypes$(EXEEXT): $(gen_matypes_OBJECTS) $(gen_matypes_DEPENDENCIES) $(EXTRA_gen_matypes_DEPENDENCIES) |
@rm -f gen_matypes$(EXEEXT) |
$(AM_V_CCLD)$(LINK) $(gen_matypes_OBJECTS) $(gen_matypes_LDADD) $(LIBS) |
mostlyclean-compile: |
-rm -f *.$(OBJEXT) |
distclean-compile: |
-rm -f *.tab.c |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen_matypes.Po@am__quote@ |
.c.o: |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< |
.c.obj: |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` |
.c.lo: |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< |
mostlyclean-libtool: |
-rm -f *.lo |
clean-libtool: |
-rm -rf .libs _libs |
ID: $(am__tagged_files) |
$(am__define_uniq_tagged_files); mkid -fID $$unique |
tags: tags-am |
TAGS: tags |
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) |
set x; \ |
here=`pwd`; \ |
$(am__define_uniq_tagged_files); \ |
shift; \ |
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ |
test -n "$$unique" || unique=$$empty_fix; \ |
if test $$# -gt 0; then \ |
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ |
"$$@" $$unique; \ |
else \ |
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ |
$$unique; \ |
fi; \ |
fi |
ctags: ctags-am |
CTAGS: ctags |
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) |
$(am__define_uniq_tagged_files); \ |
test -z "$(CTAGS_ARGS)$$unique" \ |
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ |
$$unique |
GTAGS: |
here=`$(am__cd) $(top_builddir) && pwd` \ |
&& $(am__cd) $(top_srcdir) \ |
&& gtags -i $(GTAGS_ARGS) "$$here" |
cscopelist: cscopelist-am |
cscopelist-am: $(am__tagged_files) |
list='$(am__tagged_files)'; \ |
case "$(srcdir)" in \ |
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ |
*) sdir=$(subdir)/$(srcdir) ;; \ |
esac; \ |
for i in $$list; do \ |
if test -f "$$i"; then \ |
echo "$(subdir)/$$i"; \ |
else \ |
echo "$$sdir/$$i"; \ |
fi; \ |
done >> $(top_builddir)/cscope.files |
distclean-tags: |
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags |
distdir: $(DISTFILES) |
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ |
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ |
list='$(DISTFILES)'; \ |
dist_files=`for file in $$list; do echo $$file; done | \ |
sed -e "s|^$$srcdirstrip/||;t" \ |
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ |
case $$dist_files in \ |
*/*) $(MKDIR_P) `echo "$$dist_files" | \ |
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ |
sort -u` ;; \ |
esac; \ |
for file in $$dist_files; do \ |
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ |
if test -d $$d/$$file; then \ |
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ |
if test -d "$(distdir)/$$file"; then \ |
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ |
fi; \ |
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ |
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ |
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ |
fi; \ |
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ |
else \ |
test -f "$(distdir)/$$file" \ |
|| cp -p $$d/$$file "$(distdir)/$$file" \ |
|| exit 1; \ |
fi; \ |
done |
check-am: all-am |
check: $(BUILT_SOURCES) |
$(MAKE) $(AM_MAKEFLAGS) check-am |
all-am: Makefile $(PROGRAMS) |
installdirs: |
install: $(BUILT_SOURCES) |
$(MAKE) $(AM_MAKEFLAGS) install-am |
install-exec: install-exec-am |
install-data: install-data-am |
uninstall: uninstall-am |
install-am: all-am |
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am |
installcheck: installcheck-am |
install-strip: |
if test -z '$(STRIP)'; then \ |
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ |
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ |
install; \ |
else \ |
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ |
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ |
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ |
fi |
mostlyclean-generic: |
clean-generic: |
-test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) |
distclean-generic: |
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) |
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) |
maintainer-clean-generic: |
@echo "This command is intended for maintainers to use" |
@echo "it deletes files that may require special tools to rebuild." |
-test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) |
clean: clean-am |
clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \ |
mostlyclean-am |
distclean: distclean-am |
-rm -rf ./$(DEPDIR) |
-rm -f Makefile |
distclean-am: clean-am distclean-compile distclean-generic \ |
distclean-tags |
dvi: dvi-am |
dvi-am: |
html: html-am |
html-am: |
info: info-am |
info-am: |
install-data-am: |
install-dvi: install-dvi-am |
install-dvi-am: |
install-exec-am: |
install-html: install-html-am |
install-html-am: |
install-info: install-info-am |
install-info-am: |
install-man: |
install-pdf: install-pdf-am |
install-pdf-am: |
install-ps: install-ps-am |
install-ps-am: |
installcheck-am: |
maintainer-clean: maintainer-clean-am |
-rm -rf ./$(DEPDIR) |
-rm -f Makefile |
maintainer-clean-am: distclean-am maintainer-clean-generic |
mostlyclean: mostlyclean-am |
mostlyclean-am: mostlyclean-compile mostlyclean-generic \ |
mostlyclean-libtool |
pdf: pdf-am |
pdf-am: |
ps: ps-am |
ps-am: |
uninstall-am: |
.MAKE: all check install install-am install-strip |
.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ |
clean-libtool clean-noinstPROGRAMS cscopelist-am ctags \ |
ctags-am distclean distclean-compile distclean-generic \ |
distclean-libtool distclean-tags distdir dvi dvi-am html \ |
html-am info info-am install install-am install-data \ |
install-data-am install-dvi install-dvi-am install-exec \ |
install-exec-am install-html install-html-am install-info \ |
install-info-am install-man install-pdf install-pdf-am \ |
install-ps install-ps-am install-strip installcheck \ |
installcheck-am installdirs maintainer-clean \ |
maintainer-clean-generic mostlyclean mostlyclean-compile \ |
mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ |
tags tags-am uninstall uninstall-am |
@GEN_ASM_OFFSETS_TRUE@@HAVE_X86_ASM_TRUE@matypes.h: $(gen_matypes_SOURCES) |
@GEN_ASM_OFFSETS_TRUE@@HAVE_X86_ASM_TRUE@ $(AM_V_GEN)$(COMPILE) $< -DASM_OFFSETS -S -o - | \ |
@GEN_ASM_OFFSETS_TRUE@@HAVE_X86_ASM_TRUE@ sed -n '/^->/{s:^->::;/[$$]/{s:^:#define :;s:[$$]::};p}' > $@ |
@GEN_ASM_OFFSETS_FALSE@@HAVE_X86_ASM_TRUE@matypes.h: gen_matypes |
@GEN_ASM_OFFSETS_FALSE@@HAVE_X86_ASM_TRUE@ $(AM_V_GEN)./gen_matypes > $@ |
# Tell versions [3.59,3.63) of GNU make to not export all variables. |
# Otherwise a system limit (for SysV at least) may be exceeded. |
.NOEXPORT: |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/assyntax.h |
---|
0,0 → 1,1728 |
#ifndef __ASSYNTAX_H__ |
#define __ASSYNTAX_H__ |
/* |
* Copyright 1992 Vrije Universiteit, The Netherlands |
* |
* Permission to use, copy, modify, and distribute this software and its |
* documentation for any purpose and without fee is hereby granted, provided |
* that the above copyright notice appear in all copies and that both that |
* copyright notice and this permission notice appear in supporting |
* documentation, and that the name of the Vrije Universiteit not be used in |
* advertising or publicity pertaining to distribution of the software without |
* specific, written prior permission. The Vrije Universiteit makes no |
* representations about the suitability of this software for any purpose. |
* It is provided "as is" without express or implied warranty. |
* |
* The Vrije Universiteit DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS |
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, |
* IN NO EVENT SHALL The Vrije Universiteit BE LIABLE FOR ANY SPECIAL, |
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM |
* LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE |
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR |
* PERFORMANCE OF THIS SOFTWARE. |
*/ |
/* |
* assyntax.h |
* |
* Select the syntax appropriate to the 386 assembler being used |
* To add support for more assemblers add more columns to the CHOICE |
* macro. Note that register names must also have uppercase names |
* to avoid macro recursion. e.g., #define ah %ah recurses! |
* |
* NB 1. Some of the macros for certain assemblers imply that the code is to |
* run in protected mode!! Caveat emptor. |
* |
* NB 2. 486 specific instructions are not included. This is to discourage |
* their accidental use in code that is intended to run on 386 and 486 |
* systems. |
* |
* Supported assemblers: |
* |
* (a) AT&T SysVr4 as(1): define ATT_ASSEMBLER |
* (b) GNU Assembler gas: define GNU_ASSEMBLER (default) |
* (c) Amsterdam Compiler kit: define ACK_ASSEMBLER |
* (d) The Netwide Assembler: define NASM_ASSEMBLER |
* (e) Microsoft Assembler: define MASM_ASSEMBLER (UNTESTED!) |
* |
* The following naming conventions have been used to identify the various |
* data types: |
* _SR = segment register version |
* Integer: |
* _Q = quadword = 64 bits |
* _L = long = 32 bits |
* _W = short = 16 bits |
* _B = byte = 8 bits |
* Floating-point: |
* _X = m80real = 80 bits |
* _D = double = 64 bits |
* _S = single = 32 bits |
* |
* Author: Gregory J. Sharp, Sept 1992 |
* Vrije Universiteit, Amsterdam, The Netherlands |
* |
* [support for Intel syntax added by Josh Vanderhoof, 1999] |
*/ |
#if !(defined(NASM_ASSEMBLER) || defined(MASM_ASSEMBLER)) |
/* Default to ATT_ASSEMBLER when SVR4 or SYSV are defined */ |
#if (defined(SVR4) || defined(SYSV)) && !defined(GNU_ASSEMBLER) |
#define ATT_ASSEMBLER |
#endif |
#if !defined(ATT_ASSEMBLER) && !defined(GNU_ASSEMBLER) && !defined(ACK_ASSEMBLER) |
#define GNU_ASSEMBLER |
#endif |
#if (defined(__STDC__) && !defined(UNIXCPP)) || (defined (sun) && defined (i386) && defined (SVR4) && defined (__STDC__) && !defined (__GNUC__)) |
#define CONCAT(x, y) x ## y |
#define CONCAT3(x, y, z) x ## y ## z |
#else |
#define CONCAT(x, y) x/**/y |
#define CONCAT3(x, y, z) x/**/y/**/z |
#endif |
#ifdef ACK_ASSEMBLER |
/* Assume we write code for 32-bit protected mode! */ |
/* Redefine register names for GAS & AT&T assemblers */ |
#define AL al |
#define AH ah |
#define AX ax |
#define EAX ax |
#define BL bl |
#define BH bh |
#define BX bx |
#define EBX bx |
#define CL cl |
#define CH ch |
#define CX cx |
#define ECX cx |
#define DL dl |
#define DH dh |
#define DX dx |
#define EDX dx |
#define BP bp |
#define EBP bp |
#define SI si |
#define ESI si |
#define DI di |
#define EDI di |
#define SP sp |
#define ESP sp |
#define CS cs |
#define SS ss |
#define DS ds |
#define ES es |
#define FS fs |
#define GS gs |
/* Control Registers */ |
#define CR0 cr0 |
#define CR1 cr1 |
#define CR2 cr2 |
#define CR3 cr3 |
/* Debug Registers */ |
#define DR0 dr0 |
#define DR1 dr1 |
#define DR2 dr2 |
#define DR3 dr3 |
#define DR4 dr4 |
#define DR5 dr5 |
#define DR6 dr6 |
#define DR7 dr7 |
/* Floating-point Stack */ |
#define ST st |
#define AS_BEGIN .sect .text; .sect .rom; .sect .data; .sect .bss; .sect .text |
#define _WTOG o16 /* word toggle for _W instructions */ |
#define _LTOG /* long toggle for _L instructions */ |
#define ADDR_TOGGLE a16 |
#define OPSZ_TOGGLE o16 |
#define USE16 .use16 |
#define USE32 .use32 |
#define CHOICE(a,b,c) c |
#else /* AT&T or GAS */ |
/* Redefine register names for GAS & AT&T assemblers */ |
#define AL %al |
#define AH %ah |
#define AX %ax |
#define EAX %eax |
#define BL %bl |
#define BH %bh |
#define BX %bx |
#define EBX %ebx |
#define CL %cl |
#define CH %ch |
#define CX %cx |
#define ECX %ecx |
#define DL %dl |
#define DH %dh |
#define DX %dx |
#define EDX %edx |
#define BP %bp |
#define EBP %ebp |
#define SI %si |
#define ESI %esi |
#define DI %di |
#define EDI %edi |
#define SP %sp |
#define ESP %esp |
#define CS %cs |
#define SS %ss |
#define DS %ds |
#define ES %es |
#define FS %fs |
#define GS %gs |
/* Control Registers */ |
#define CR0 %cr0 |
#define CR1 %cr1 |
#define CR2 %cr2 |
#define CR3 %cr3 |
/* Debug Registers */ |
#define DR0 %db0 |
#define DR1 %db1 |
#define DR2 %db2 |
#define DR3 %db3 |
#define DR4 %db4 |
#define DR5 %db5 |
#define DR6 %db6 |
#define DR7 %db7 |
/* Floating-point Stack */ |
#define _STX0 %st(0) |
#define _STX1 %st(1) |
#define _STX2 %st(2) |
#define _STX3 %st(3) |
#define _STX4 %st(4) |
#define _STX5 %st(5) |
#define _STX6 %st(6) |
#define _STX7 %st(7) |
#define ST(x) CONCAT(_STX,x) |
#ifdef GNU_ASSEMBLER |
#define ST0 %st(0) |
#else |
#define ST0 %st |
#endif |
/* MMX Registers */ |
#define MM0 %mm0 |
#define MM1 %mm1 |
#define MM2 %mm2 |
#define MM3 %mm3 |
#define MM4 %mm4 |
#define MM5 %mm5 |
#define MM6 %mm6 |
#define MM7 %mm7 |
/* SSE Registers */ |
#define XMM0 %xmm0 |
#define XMM1 %xmm1 |
#define XMM2 %xmm2 |
#define XMM3 %xmm3 |
#define XMM4 %xmm4 |
#define XMM5 %xmm5 |
#define XMM6 %xmm6 |
#define XMM7 %xmm7 |
#define AS_BEGIN |
#define USE16 |
#define USE32 |
#ifdef GNU_ASSEMBLER |
#define ADDR_TOGGLE aword |
#define OPSZ_TOGGLE word |
#define CHOICE(a,b,c) b |
#else |
/* |
* AT&T ASSEMBLER SYNTAX |
* ********************* |
*/ |
#define CHOICE(a,b,c) a |
#define ADDR_TOGGLE addr16 |
#define OPSZ_TOGGLE data16 |
#endif /* GNU_ASSEMBLER */ |
#endif /* ACK_ASSEMBLER */ |
#define GLNAME(a) CONCAT(_,a) |
/****************************************/ |
/* */ |
/* Select the various choices */ |
/* */ |
/****************************************/ |
/* Redefine assembler directives */ |
/*********************************/ |
#define GLOBL CHOICE(.globl, .globl, .extern) |
#define GLOBAL GLOBL |
#define EXTERN GLOBL |
#ifndef __AOUT__ |
#define ALIGNTEXT32 CHOICE(.align 32, .balign 32, .align 32) |
#define ALIGNTEXT16 CHOICE(.align 16, .balign 16, .align 16) |
#define ALIGNTEXT8 CHOICE(.align 8, .balign 8, .align 8) |
#define ALIGNTEXT4 CHOICE(.align 4, .balign 4, .align 4) |
#define ALIGNTEXT2 CHOICE(.align 2, .balign 2, .align 2) |
/* ALIGNTEXT4ifNOP is the same as ALIGNTEXT4, but only if the space is |
* guaranteed to be filled with NOPs. Otherwise it does nothing. |
*/ |
#define ALIGNTEXT32ifNOP CHOICE(.align 32, .balign ARG2(32,0x90), /*can't do it*/) |
#define ALIGNTEXT16ifNOP CHOICE(.align 16, .balign ARG2(16,0x90), /*can't do it*/) |
#define ALIGNTEXT8ifNOP CHOICE(.align 8, .balign ARG2(8,0x90), /*can't do it*/) |
#define ALIGNTEXT4ifNOP CHOICE(.align 4, .balign ARG2(4,0x90), /*can't do it*/) |
#define ALIGNDATA32 CHOICE(.align 32, .balign ARG2(32,0x0), .align 32) |
#define ALIGNDATA16 CHOICE(.align 16, .balign ARG2(16,0x0), .align 16) |
#define ALIGNDATA8 CHOICE(.align 8, .balign ARG2(8,0x0), .align 8) |
#define ALIGNDATA4 CHOICE(.align 4, .balign ARG2(4,0x0), .align 4) |
#define ALIGNDATA2 CHOICE(.align 2, .balign ARG2(2,0x0), .align 2) |
#else |
/* 'as -aout' on FreeBSD doesn't have .balign */ |
#define ALIGNTEXT32 CHOICE(.align 32, .align ARG2(5,0x90), .align 32) |
#define ALIGNTEXT16 CHOICE(.align 16, .align ARG2(4,0x90), .align 16) |
#define ALIGNTEXT8 CHOICE(.align 8, .align ARG2(3,0x90), .align 8) |
#define ALIGNTEXT4 CHOICE(.align 4, .align ARG2(2,0x90), .align 4) |
#define ALIGNTEXT2 CHOICE(.align 2, .align ARG2(1,0x90), .align 2) |
/* ALIGNTEXT4ifNOP is the same as ALIGNTEXT4, but only if the space is |
* guaranteed to be filled with NOPs. Otherwise it does nothing. |
*/ |
#define ALIGNTEXT32ifNOP CHOICE(.align 32, .align ARG2(5,0x90), /*can't do it*/) |
#define ALIGNTEXT16ifNOP CHOICE(.align 16, .align ARG2(4,0x90), /*can't do it*/) |
#define ALIGNTEXT8ifNOP CHOICE(.align 8, .align ARG2(3,0x90), /*can't do it*/) |
#define ALIGNTEXT4ifNOP CHOICE(.align 4, .align ARG2(2,0x90), /*can't do it*/) |
#define ALIGNDATA32 CHOICE(.align 32, .align ARG2(5,0x0), .align 32) |
#define ALIGNDATA16 CHOICE(.align 16, .align ARG2(4,0x0), .align 16) |
#define ALIGNDATA8 CHOICE(.align 8, .align ARG2(3,0x0), .align 8) |
#define ALIGNDATA4 CHOICE(.align 4, .align ARG2(2,0x0), .align 4) |
#define ALIGNDATA2 CHOICE(.align 2, .align ARG2(1,0x0), .align 2) |
#endif /* __AOUT__ */ |
#define FILE(s) CHOICE(.file s, .file s, .file s) |
#define STRING(s) CHOICE(.string s, .asciz s, .asciz s) |
#define D_LONG CHOICE(.long, .long, .data4) |
#define D_WORD CHOICE(.value, .short, .data2) |
#define D_BYTE CHOICE(.byte, .byte, .data1) |
#define SPACE CHOICE(.comm, .space, .space) |
#define COMM CHOICE(.comm, .comm, .comm) |
#define SEG_DATA CHOICE(.data, .data, .sect .data) |
#define SEG_TEXT CHOICE(.text, .text, .sect .text) |
#define SEG_BSS CHOICE(.bss, .bss, .sect .bss) |
#ifdef GNU_ASSEMBLER |
#define D_SPACE(n) . = . + n |
#else |
#define D_SPACE(n) .space n |
#endif |
/* Addressing Modes */ |
/* Immediate Mode */ |
#define ADDR(a) CHOICE(CONCAT($,a), $a, a) |
#define CONST(a) CHOICE(CONCAT($,a), $a, a) |
/* Indirect Mode */ |
#define CONTENT(a) CHOICE(a, a, (a)) /* take contents of variable */ |
#define REGIND(a) CHOICE((a), (a), (a)) /* Register a indirect */ |
/* Register b indirect plus displacement a */ |
#define REGOFF(a, b) CHOICE(a(b), a(b), a(b)) |
/* Reg indirect Base + Index + Displacement - this is mainly for 16-bit mode |
* which has no scaling |
*/ |
#define REGBID(b,i,d) CHOICE(d(b,i), d(b,i), d(b)(i)) |
/* Reg indirect Base + (Index * Scale) */ |
#define REGBIS(b,i,s) CHOICE((b,i,s), (b,i,s), (b)(i*s)) |
/* Reg indirect Base + (Index * Scale) + Displacement */ |
#define REGBISD(b,i,s,d) CHOICE(d(b,i,s), d(b,i,s), d(b)(i*s)) |
/* Displaced Scaled Index: */ |
#define REGDIS(d,i,s) CHOICE(d(,i,s), d(,i,s), d(i * s)) |
/* Indexed Base: */ |
#define REGBI(b,i) CHOICE((b,i), (b,i), (b)(i)) |
/* Displaced Base: */ |
#define REGDB(d,b) CHOICE(d(b), d(b), d(b)) |
/* Variable indirect: */ |
#define VARINDIRECT(var) CHOICE(*var, *var, (var)) |
/* Use register contents as jump/call target: */ |
#define CODEPTR(reg) CHOICE(*reg, *reg, reg) |
/* For expressions requiring bracketing |
* eg. (CRT0_PM | CRT_EM) |
*/ |
#define EXPR(a) CHOICE([a], (a), [a]) |
#define ENOT(a) CHOICE(0!a, ~a, ~a) |
#define EMUL(a,b) CHOICE(a\*b, a*b, a*b) |
#define EDIV(a,b) CHOICE(a\/b, a/b, a/b) |
/* |
* We have to beat the problem of commas within arguments to choice. |
* eg. choice (add a,b, add b,a) will get argument mismatch. Luckily ANSI |
* and other known cpp definitions evaluate arguments before substitution |
* so the following works. |
*/ |
#define ARG2(a, b) a,b |
#define ARG3(a,b,c) a,b,c |
/* Redefine assembler commands */ |
#define AAA CHOICE(aaa, aaa, aaa) |
#define AAD CHOICE(aad, aad, aad) |
#define AAM CHOICE(aam, aam, aam) |
#define AAS CHOICE(aas, aas, aas) |
#define ADC_L(a, b) CHOICE(adcl ARG2(a,b), adcl ARG2(a,b), _LTOG adc ARG2(b,a)) |
#define ADC_W(a, b) CHOICE(adcw ARG2(a,b), adcw ARG2(a,b), _WTOG adc ARG2(b,a)) |
#define ADC_B(a, b) CHOICE(adcb ARG2(a,b), adcb ARG2(a,b), adcb ARG2(b,a)) |
#define ADD_L(a, b) CHOICE(addl ARG2(a,b), addl ARG2(a,b), _LTOG add ARG2(b,a)) |
#define ADD_W(a, b) CHOICE(addw ARG2(a,b), addw ARG2(a,b), _WTOG add ARG2(b,a)) |
#define ADD_B(a, b) CHOICE(addb ARG2(a,b), addb ARG2(a,b), addb ARG2(b,a)) |
#define AND_L(a, b) CHOICE(andl ARG2(a,b), andl ARG2(a,b), _LTOG and ARG2(b,a)) |
#define AND_W(a, b) CHOICE(andw ARG2(a,b), andw ARG2(a,b), _WTOG and ARG2(b,a)) |
#define AND_B(a, b) CHOICE(andb ARG2(a,b), andb ARG2(a,b), andb ARG2(b,a)) |
#define ARPL(a,b) CHOICE(arpl ARG2(a,b), arpl ARG2(a,b), arpl ARG2(b,a)) |
#define BOUND_L(a, b) CHOICE(boundl ARG2(a,b), boundl ARG2(b,a), _LTOG bound ARG2(b,a)) |
#define BOUND_W(a, b) CHOICE(boundw ARG2(a,b), boundw ARG2(b,a), _WTOG bound ARG2(b,a)) |
#define BSF_L(a, b) CHOICE(bsfl ARG2(a,b), bsfl ARG2(a,b), _LTOG bsf ARG2(b,a)) |
#define BSF_W(a, b) CHOICE(bsfw ARG2(a,b), bsfw ARG2(a,b), _WTOG bsf ARG2(b,a)) |
#define BSR_L(a, b) CHOICE(bsrl ARG2(a,b), bsrl ARG2(a,b), _LTOG bsr ARG2(b,a)) |
#define BSR_W(a, b) CHOICE(bsrw ARG2(a,b), bsrw ARG2(a,b), _WTOG bsr ARG2(b,a)) |
#define BT_L(a, b) CHOICE(btl ARG2(a,b), btl ARG2(a,b), _LTOG bt ARG2(b,a)) |
#define BT_W(a, b) CHOICE(btw ARG2(a,b), btw ARG2(a,b), _WTOG bt ARG2(b,a)) |
#define BTC_L(a, b) CHOICE(btcl ARG2(a,b), btcl ARG2(a,b), _LTOG btc ARG2(b,a)) |
#define BTC_W(a, b) CHOICE(btcw ARG2(a,b), btcw ARG2(a,b), _WTOG btc ARG2(b,a)) |
#define BTR_L(a, b) CHOICE(btrl ARG2(a,b), btrl ARG2(a,b), _LTOG btr ARG2(b,a)) |
#define BTR_W(a, b) CHOICE(btrw ARG2(a,b), btrw ARG2(a,b), _WTOG btr ARG2(b,a)) |
#define BTS_L(a, b) CHOICE(btsl ARG2(a,b), btsl ARG2(a,b), _LTOG bts ARG2(b,a)) |
#define BTS_W(a, b) CHOICE(btsw ARG2(a,b), btsw ARG2(a,b), _WTOG bts ARG2(b,a)) |
#define CALL(a) CHOICE(call a, call a, call a) |
#define CALLF(s,a) CHOICE(lcall ARG2(s,a), lcall ARG2(s,a), callf s:a) |
#define CBW CHOICE(cbtw, cbw, cbw) |
#define CWDE CHOICE(cwtd, cwde, cwde) |
#define CLC CHOICE(clc, clc, clc) |
#define CLD CHOICE(cld, cld, cld) |
#define CLI CHOICE(cli, cli, cli) |
#define CLTS CHOICE(clts, clts, clts) |
#define CMC CHOICE(cmc, cmc, cmc) |
#define CMP_L(a, b) CHOICE(cmpl ARG2(a,b), cmpl ARG2(a,b), _LTOG cmp ARG2(b,a)) |
#define CMP_W(a, b) CHOICE(cmpw ARG2(a,b), cmpw ARG2(a,b), _WTOG cmp ARG2(b,a)) |
#define CMP_B(a, b) CHOICE(cmpb ARG2(a,b), cmpb ARG2(a,b), cmpb ARG2(b,a)) |
#define CMPS_L CHOICE(cmpsl, cmpsl, _LTOG cmps) |
#define CMPS_W CHOICE(cmpsw, cmpsw, _WTOG cmps) |
#define CMPS_B CHOICE(cmpsb, cmpsb, cmpsb) |
#define CWD CHOICE(cwtl, cwd, cwd) |
#define CDQ CHOICE(cltd, cdq, cdq) |
#define DAA CHOICE(daa, daa, daa) |
#define DAS CHOICE(das, das, das) |
#define DEC_L(a) CHOICE(decl a, decl a, _LTOG dec a) |
#define DEC_W(a) CHOICE(decw a, decw a, _WTOG dec a) |
#define DEC_B(a) CHOICE(decb a, decb a, decb a) |
#define DIV_L(a) CHOICE(divl a, divl a, div a) |
#define DIV_W(a) CHOICE(divw a, divw a, div a) |
#define DIV_B(a) CHOICE(divb a, divb a, divb a) |
#define ENTER(a,b) CHOICE(enter ARG2(a,b), enter ARG2(a,b), enter ARG2(b,a)) |
#define HLT CHOICE(hlt, hlt, hlt) |
#define IDIV_L(a) CHOICE(idivl a, idivl a, _LTOG idiv a) |
#define IDIV_W(a) CHOICE(idivw a, idivw a, _WTOG idiv a) |
#define IDIV_B(a) CHOICE(idivb a, idivb a, idivb a) |
/* More forms than this for imul!! */ |
#define IMUL_L(a, b) CHOICE(imull ARG2(a,b), imull ARG2(a,b), _LTOG imul ARG2(b,a)) |
#define IMUL_W(a, b) CHOICE(imulw ARG2(a,b), imulw ARG2(a,b), _WTOG imul ARG2(b,a)) |
#define IMUL_B(a) CHOICE(imulb a, imulb a, imulb a) |
#define IN_L CHOICE(inl (DX), inl ARG2(DX,EAX), _LTOG in DX) |
#define IN_W CHOICE(inw (DX), inw ARG2(DX,AX), _WTOG in DX) |
#define IN_B CHOICE(inb (DX), inb ARG2(DX,AL), inb DX) |
/* Please AS code writer: use the following ONLY, if you refer to ports<256 |
* directly, but not in IN1_W(DX), for instance, even if IN1_ looks nicer |
*/ |
#if defined (sun) |
#define IN1_L(a) CHOICE(inl (a), inl ARG2(a,EAX), _LTOG in a) |
#define IN1_W(a) CHOICE(inw (a), inw ARG2(a,AX), _WTOG in a) |
#define IN1_B(a) CHOICE(inb (a), inb ARG2(a,AL), inb a) |
#else |
#define IN1_L(a) CHOICE(inl a, inl ARG2(a,EAX), _LTOG in a) |
#define IN1_W(a) CHOICE(inw a, inw ARG2(a,AX), _WTOG in a) |
#define IN1_B(a) CHOICE(inb a, inb ARG2(a,AL), inb a) |
#endif |
#define INC_L(a) CHOICE(incl a, incl a, _LTOG inc a) |
#define INC_W(a) CHOICE(incw a, incw a, _WTOG inc a) |
#define INC_B(a) CHOICE(incb a, incb a, incb a) |
#define INS_L CHOICE(insl, insl, _LTOG ins) |
#define INS_W CHOICE(insw, insw, _WTOG ins) |
#define INS_B CHOICE(insb, insb, insb) |
#define INT(a) CHOICE(int a, int a, int a) |
#define INT3 CHOICE(int CONST(3), int3, int CONST(3)) |
#define INTO CHOICE(into, into, into) |
#define IRET CHOICE(iret, iret, iret) |
#define IRETD CHOICE(iret, iret, iretd) |
#define JA(a) CHOICE(ja a, ja a, ja a) |
#define JAE(a) CHOICE(jae a, jae a, jae a) |
#define JB(a) CHOICE(jb a, jb a, jb a) |
#define JBE(a) CHOICE(jbe a, jbe a, jbe a) |
#define JC(a) CHOICE(jc a, jc a, jc a) |
#define JE(a) CHOICE(je a, je a, je a) |
#define JG(a) CHOICE(jg a, jg a, jg a) |
#define JGE(a) CHOICE(jge a, jge a, jge a) |
#define JL(a) CHOICE(jl a, jl a, jl a) |
#define JLE(a) CHOICE(jle a, jle a, jle a) |
#define JNA(a) CHOICE(jna a, jna a, jna a) |
#define JNAE(a) CHOICE(jnae a, jnae a, jnae a) |
#define JNB(a) CHOICE(jnb a, jnb a, jnb a) |
#define JNBE(a) CHOICE(jnbe a, jnbe a, jnbe a) |
#define JNC(a) CHOICE(jnc a, jnc a, jnc a) |
#define JNE(a) CHOICE(jne a, jne a, jne a) |
#define JNG(a) CHOICE(jng a, jng a, jng a) |
#define JNGE(a) CHOICE(jnge a, jnge a, jnge a) |
#define JNL(a) CHOICE(jnl a, jnl a, jnl a) |
#define JNLE(a) CHOICE(jnle a, jnle a, jnle a) |
#define JNO(a) CHOICE(jno a, jno a, jno a) |
#define JNP(a) CHOICE(jnp a, jnp a, jnp a) |
#define JNS(a) CHOICE(jns a, jns a, jns a) |
#define JNZ(a) CHOICE(jnz a, jnz a, jnz a) |
#define JO(a) CHOICE(jo a, jo a, jo a) |
#define JP(a) CHOICE(jp a, jp a, jp a) |
#define JPE(a) CHOICE(jpe a, jpe a, jpe a) |
#define JPO(a) CHOICE(jpo a, jpo a, jpo a) |
#define JS(a) CHOICE(js a, js a, js a) |
#define JZ(a) CHOICE(jz a, jz a, jz a) |
#define JMP(a) CHOICE(jmp a, jmp a, jmp a) |
#define JMPF(s,a) CHOICE(ljmp ARG2(s,a), ljmp ARG2(s,a), jmpf s:a) |
#define LAHF CHOICE(lahf, lahf, lahf) |
#if !defined(_REAL_MODE) && !defined(_V86_MODE) |
#define LAR(a, b) CHOICE(lar ARG2(a, b), lar ARG2(a, b), lar ARG2(b, a)) |
#endif |
#define LEA_L(a, b) CHOICE(leal ARG2(a,b), leal ARG2(a,b), _LTOG lea ARG2(b,a)) |
#define LEA_W(a, b) CHOICE(leaw ARG2(a,b), leaw ARG2(a,b), _WTOG lea ARG2(b,a)) |
#define LEAVE CHOICE(leave, leave, leave) |
#define LGDT(a) CHOICE(lgdt a, lgdt a, lgdt a) |
#define LIDT(a) CHOICE(lidt a, lidt a, lidt a) |
#define LDS(a, b) CHOICE(ldsl ARG2(a,b), lds ARG2(a,b), lds ARG2(b,a)) |
#define LES(a, b) CHOICE(lesl ARG2(a,b), les ARG2(a,b), les ARG2(b,a)) |
#define LFS(a, b) CHOICE(lfsl ARG2(a,b), lfs ARG2(a,b), lfs ARG2(b,a)) |
#define LGS(a, b) CHOICE(lgsl ARG2(a,b), lgs ARG2(a,b), lgs ARG2(b,a)) |
#define LSS(a, b) CHOICE(lssl ARG2(a,b), lss ARG2(a,b), lss ARG2(b,a)) |
#define LLDT(a) CHOICE(lldt a, lldt a, lldt a) |
#define LMSW(a) CHOICE(lmsw a, lmsw a, lmsw a) |
#define LOCK CHOICE(lock, lock, lock) |
#define LODS_L CHOICE(lodsl, lodsl, _LTOG lods) |
#define LODS_W CHOICE(lodsw, lodsw, _WTOG lods) |
#define LODS_B CHOICE(lodsb, lodsb, lodsb) |
#define LOOP(a) CHOICE(loop a, loop a, loop a) |
#define LOOPE(a) CHOICE(loope a, loope a, loope a) |
#define LOOPZ(a) CHOICE(loopz a, loopz a, loopz a) |
#define LOOPNE(a) CHOICE(loopne a, loopne a, loopne a) |
#define LOOPNZ(a) CHOICE(loopnz a, loopnz a, loopnz a) |
#if !defined(_REAL_MODE) && !defined(_V86_MODE) |
#define LSL(a, b) CHOICE(lsl ARG2(a,b), lsl ARG2(a,b), lsl ARG2(b,a)) |
#endif |
#define LTR(a) CHOICE(ltr a, ltr a, ltr a) |
#define MOV_SR(a, b) CHOICE(movw ARG2(a,b), mov ARG2(a,b), mov ARG2(b,a)) |
#define MOV_L(a, b) CHOICE(movl ARG2(a,b), movl ARG2(a,b), _LTOG mov ARG2(b,a)) |
#define MOV_W(a, b) CHOICE(movw ARG2(a,b), movw ARG2(a,b), _WTOG mov ARG2(b,a)) |
#define MOV_B(a, b) CHOICE(movb ARG2(a,b), movb ARG2(a,b), movb ARG2(b,a)) |
#define MOVS_L CHOICE(movsl, movsl, _LTOG movs) |
#define MOVS_W CHOICE(movsw, movsw, _WTOG movs) |
#define MOVS_B CHOICE(movsb, movsb, movsb) |
#define MOVSX_BL(a, b) CHOICE(movsbl ARG2(a,b), movsbl ARG2(a,b), movsx ARG2(b,a)) |
#define MOVSX_BW(a, b) CHOICE(movsbw ARG2(a,b), movsbw ARG2(a,b), movsx ARG2(b,a)) |
#define MOVSX_WL(a, b) CHOICE(movswl ARG2(a,b), movswl ARG2(a,b), movsx ARG2(b,a)) |
#define MOVZX_BL(a, b) CHOICE(movzbl ARG2(a,b), movzbl ARG2(a,b), movzx ARG2(b,a)) |
#define MOVZX_BW(a, b) CHOICE(movzbw ARG2(a,b), movzbw ARG2(a,b), movzx ARG2(b,a)) |
#define MOVZX_WL(a, b) CHOICE(movzwl ARG2(a,b), movzwl ARG2(a,b), movzx ARG2(b,a)) |
#define MUL_L(a) CHOICE(mull a, mull a, _LTOG mul a) |
#define MUL_W(a) CHOICE(mulw a, mulw a, _WTOG mul a) |
#define MUL_B(a) CHOICE(mulb a, mulb a, mulb a) |
#define NEG_L(a) CHOICE(negl a, negl a, _LTOG neg a) |
#define NEG_W(a) CHOICE(negw a, negw a, _WTOG neg a) |
#define NEG_B(a) CHOICE(negb a, negb a, negb a) |
#define NOP CHOICE(nop, nop, nop) |
#define NOT_L(a) CHOICE(notl a, notl a, _LTOG not a) |
#define NOT_W(a) CHOICE(notw a, notw a, _WTOG not a) |
#define NOT_B(a) CHOICE(notb a, notb a, notb a) |
#define OR_L(a,b) CHOICE(orl ARG2(a,b), orl ARG2(a,b), _LTOG or ARG2(b,a)) |
#define OR_W(a,b) CHOICE(orw ARG2(a,b), orw ARG2(a,b), _WTOG or ARG2(b,a)) |
#define OR_B(a,b) CHOICE(orb ARG2(a,b), orb ARG2(a,b), orb ARG2(b,a)) |
#define OUT_L CHOICE(outl (DX), outl ARG2(EAX,DX), _LTOG out DX) |
#define OUT_W CHOICE(outw (DX), outw ARG2(AX,DX), _WTOG out DX) |
#define OUT_B CHOICE(outb (DX), outb ARG2(AL,DX), outb DX) |
/* Please AS code writer: use the following ONLY, if you refer to ports<256 |
* directly, but not in OUT1_W(DX), for instance, even if OUT1_ looks nicer |
*/ |
#define OUT1_L(a) CHOICE(outl (a), outl ARG2(EAX,a), _LTOG out a) |
#define OUT1_W(a) CHOICE(outw (a), outw ARG2(AX,a), _WTOG out a) |
#define OUT1_B(a) CHOICE(outb (a), outb ARG2(AL,a), outb a) |
#define OUTS_L CHOICE(outsl, outsl, _LTOG outs) |
#define OUTS_W CHOICE(outsw, outsw, _WTOG outs) |
#define OUTS_B CHOICE(outsb, outsb, outsb) |
#define POP_SR(a) CHOICE(pop a, pop a, pop a) |
#define POP_L(a) CHOICE(popl a, popl a, _LTOG pop a) |
#define POP_W(a) CHOICE(popw a, popw a, _WTOG pop a) |
#define POPA_L CHOICE(popal, popal, _LTOG popa) |
#define POPA_W CHOICE(popaw, popaw, _WTOG popa) |
#define POPF_L CHOICE(popfl, popfl, _LTOG popf) |
#define POPF_W CHOICE(popfw, popfw, _WTOG popf) |
#define PUSH_SR(a) CHOICE(push a, push a, push a) |
#define PUSH_L(a) CHOICE(pushl a, pushl a, _LTOG push a) |
#define PUSH_W(a) CHOICE(pushw a, pushw a, _WTOG push a) |
#define PUSH_B(a) CHOICE(push a, pushb a, push a) |
#define PUSHA_L CHOICE(pushal, pushal, _LTOG pusha) |
#define PUSHA_W CHOICE(pushaw, pushaw, _WTOG pusha) |
#define PUSHF_L CHOICE(pushfl, pushfl, _LTOG pushf) |
#define PUSHF_W CHOICE(pushfw, pushfw, _WTOG pushf) |
#define RCL_L(a, b) CHOICE(rcll ARG2(a,b), rcll ARG2(a,b), _LTOG rcl ARG2(b,a)) |
#define RCL_W(a, b) CHOICE(rclw ARG2(a,b), rclw ARG2(a,b), _WTOG rcl ARG2(b,a)) |
#define RCL_B(a, b) CHOICE(rclb ARG2(a,b), rclb ARG2(a,b), rclb ARG2(b,a)) |
#define RCR_L(a, b) CHOICE(rcrl ARG2(a,b), rcrl ARG2(a,b), _LTOG rcr ARG2(b,a)) |
#define RCR_W(a, b) CHOICE(rcrw ARG2(a,b), rcrw ARG2(a,b), _WTOG rcr ARG2(b,a)) |
#define RCR_B(a, b) CHOICE(rcrb ARG2(a,b), rcrb ARG2(a,b), rcrb ARG2(b,a)) |
#define ROL_L(a, b) CHOICE(roll ARG2(a,b), roll ARG2(a,b), _LTOG rol ARG2(b,a)) |
#define ROL_W(a, b) CHOICE(rolw ARG2(a,b), rolw ARG2(a,b), _WTOG rol ARG2(b,a)) |
#define ROL_B(a, b) CHOICE(rolb ARG2(a,b), rolb ARG2(a,b), rolb ARG2(b,a)) |
#define ROR_L(a, b) CHOICE(rorl ARG2(a,b), rorl ARG2(a,b), _LTOG ror ARG2(b,a)) |
#define ROR_W(a, b) CHOICE(rorw ARG2(a,b), rorw ARG2(a,b), _WTOG ror ARG2(b,a)) |
#define ROR_B(a, b) CHOICE(rorb ARG2(a,b), rorb ARG2(a,b), rorb ARG2(b,a)) |
#define REP CHOICE(rep ;, rep ;, repe) |
#define REPE CHOICE(repz ;, repe ;, repe) |
#define REPNE CHOICE(repnz ;, repne ;, repne) |
#define REPNZ REPNE |
#define REPZ REPE |
#define RET CHOICE(ret, ret, ret) |
#define SAHF CHOICE(sahf, sahf, sahf) |
#define SAL_L(a, b) CHOICE(sall ARG2(a,b), sall ARG2(a,b), _LTOG sal ARG2(b,a)) |
#define SAL_W(a, b) CHOICE(salw ARG2(a,b), salw ARG2(a,b), _WTOG sal ARG2(b,a)) |
#define SAL_B(a, b) CHOICE(salb ARG2(a,b), salb ARG2(a,b), salb ARG2(b,a)) |
#define SAR_L(a, b) CHOICE(sarl ARG2(a,b), sarl ARG2(a,b), _LTOG sar ARG2(b,a)) |
#define SAR_W(a, b) CHOICE(sarw ARG2(a,b), sarw ARG2(a,b), _WTOG sar ARG2(b,a)) |
#define SAR_B(a, b) CHOICE(sarb ARG2(a,b), sarb ARG2(a,b), sarb ARG2(b,a)) |
#define SBB_L(a, b) CHOICE(sbbl ARG2(a,b), sbbl ARG2(a,b), _LTOG sbb ARG2(b,a)) |
#define SBB_W(a, b) CHOICE(sbbw ARG2(a,b), sbbw ARG2(a,b), _WTOG sbb ARG2(b,a)) |
#define SBB_B(a, b) CHOICE(sbbb ARG2(a,b), sbbb ARG2(a,b), sbbb ARG2(b,a)) |
#define SCAS_L CHOICE(scasl, scasl, _LTOG scas) |
#define SCAS_W CHOICE(scasw, scasw, _WTOG scas) |
#define SCAS_B CHOICE(scasb, scasb, scasb) |
#define SETA(a) CHOICE(seta a, seta a, seta a) |
#define SETAE(a) CHOICE(setae a, setae a, setae a) |
#define SETB(a) CHOICE(setb a, setb a, setb a) |
#define SETBE(a) CHOICE(setbe a, setbe a, setbe a) |
#define SETC(a) CHOICE(setc a, setb a, setb a) |
#define SETE(a) CHOICE(sete a, sete a, sete a) |
#define SETG(a) CHOICE(setg a, setg a, setg a) |
#define SETGE(a) CHOICE(setge a, setge a, setge a) |
#define SETL(a) CHOICE(setl a, setl a, setl a) |
#define SETLE(a) CHOICE(setle a, setle a, setle a) |
#define SETNA(a) CHOICE(setna a, setna a, setna a) |
#define SETNAE(a) CHOICE(setnae a, setnae a, setnae a) |
#define SETNB(a) CHOICE(setnb a, setnb a, setnb a) |
#define SETNBE(a) CHOICE(setnbe a, setnbe a, setnbe a) |
#define SETNC(a) CHOICE(setnc a, setnb a, setnb a) |
#define SETNE(a) CHOICE(setne a, setne a, setne a) |
#define SETNG(a) CHOICE(setng a, setng a, setng a) |
#define SETNGE(a) CHOICE(setnge a, setnge a, setnge a) |
#define SETNL(a) CHOICE(setnl a, setnl a, setnl a) |
#define SETNLE(a) CHOICE(setnle a, setnle a, setnle a) |
#define SETNO(a) CHOICE(setno a, setno a, setno a) |
#define SETNP(a) CHOICE(setnp a, setnp a, setnp a) |
#define SETNS(a) CHOICE(setns a, setns a, setna a) |
#define SETNZ(a) CHOICE(setnz a, setnz a, setnz a) |
#define SETO(a) CHOICE(seto a, seto a, seto a) |
#define SETP(a) CHOICE(setp a, setp a, setp a) |
#define SETPE(a) CHOICE(setpe a, setpe a, setpe a) |
#define SETPO(a) CHOICE(setpo a, setpo a, setpo a) |
#define SETS(a) CHOICE(sets a, sets a, seta a) |
#define SETZ(a) CHOICE(setz a, setz a, setz a) |
#define SGDT(a) CHOICE(sgdt a, sgdt a, sgdt a) |
#define SIDT(a) CHOICE(sidt a, sidt a, sidt a) |
#define SHL_L(a, b) CHOICE(shll ARG2(a,b), shll ARG2(a,b), _LTOG shl ARG2(b,a)) |
#define SHL_W(a, b) CHOICE(shlw ARG2(a,b), shlw ARG2(a,b), _WTOG shl ARG2(b,a)) |
#define SHL_B(a, b) CHOICE(shlb ARG2(a,b), shlb ARG2(a,b), shlb ARG2(b,a)) |
#define SHLD_L(a,b,c) CHOICE(shldl ARG3(a,b,c), shldl ARG3(a,b,c), _LTOG shld ARG3(c,b,a)) |
#define SHLD2_L(a,b) CHOICE(shldl ARG2(a,b), shldl ARG3(CL,a,b), _LTOG shld ARG3(b,a,CL)) |
#define SHLD_W(a,b,c) CHOICE(shldw ARG3(a,b,c), shldw ARG3(a,b,c), _WTOG shld ARG3(c,b,a)) |
#define SHLD2_W(a,b) CHOICE(shldw ARG2(a,b), shldw ARG3(CL,a,b), _WTOG shld ARG3(b,a,CL)) |
#define SHR_L(a, b) CHOICE(shrl ARG2(a,b), shrl ARG2(a,b), _LTOG shr ARG2(b,a)) |
#define SHR_W(a, b) CHOICE(shrw ARG2(a,b), shrw ARG2(a,b), _WTOG shr ARG2(b,a)) |
#define SHR_B(a, b) CHOICE(shrb ARG2(a,b), shrb ARG2(a,b), shrb ARG2(b,a)) |
#define SHRD_L(a,b,c) CHOICE(shrdl ARG3(a,b,c), shrdl ARG3(a,b,c), _LTOG shrd ARG3(c,b,a)) |
#define SHRD2_L(a,b) CHOICE(shrdl ARG2(a,b), shrdl ARG3(CL,a,b), _LTOG shrd ARG3(b,a,CL)) |
#define SHRD_W(a,b,c) CHOICE(shrdw ARG3(a,b,c), shrdw ARG3(a,b,c), _WTOG shrd ARG3(c,b,a)) |
#define SHRD2_W(a,b) CHOICE(shrdw ARG2(a,b), shrdw ARG3(CL,a,b), _WTOG shrd ARG3(b,a,CL)) |
#define SLDT(a) CHOICE(sldt a, sldt a, sldt a) |
#define SMSW(a) CHOICE(smsw a, smsw a, smsw a) |
#define STC CHOICE(stc, stc, stc) |
#define STD CHOICE(std, std, std) |
#define STI CHOICE(sti, sti, sti) |
#define STOS_L CHOICE(stosl, stosl, _LTOG stos) |
#define STOS_W CHOICE(stosw, stosw, _WTOG stos) |
#define STOS_B CHOICE(stosb, stosb, stosb) |
#define STR(a) CHOICE(str a, str a, str a) |
#define SUB_L(a, b) CHOICE(subl ARG2(a,b), subl ARG2(a,b), _LTOG sub ARG2(b,a)) |
#define SUB_W(a, b) CHOICE(subw ARG2(a,b), subw ARG2(a,b), _WTOG sub ARG2(b,a)) |
#define SUB_B(a, b) CHOICE(subb ARG2(a,b), subb ARG2(a,b), subb ARG2(b,a)) |
#define TEST_L(a, b) CHOICE(testl ARG2(a,b), testl ARG2(a,b), _LTOG test ARG2(b,a)) |
#define TEST_W(a, b) CHOICE(testw ARG2(a,b), testw ARG2(a,b), _WTOG test ARG2(b,a)) |
#define TEST_B(a, b) CHOICE(testb ARG2(a,b), testb ARG2(a,b), testb ARG2(b,a)) |
#define VERR(a) CHOICE(verr a, verr a, verr a) |
#define VERW(a) CHOICE(verw a, verw a, verw a) |
#define WAIT CHOICE(wait, wait, wait) |
#define XCHG_L(a, b) CHOICE(xchgl ARG2(a,b), xchgl ARG2(a,b), _LTOG xchg ARG2(b,a)) |
#define XCHG_W(a, b) CHOICE(xchgw ARG2(a,b), xchgw ARG2(a,b), _WTOG xchg ARG2(b,a)) |
#define XCHG_B(a, b) CHOICE(xchgb ARG2(a,b), xchgb ARG2(a,b), xchgb ARG2(b,a)) |
#define XLAT CHOICE(xlat, xlat, xlat) |
#define XOR_L(a, b) CHOICE(xorl ARG2(a,b), xorl ARG2(a,b), _LTOG xor ARG2(b,a)) |
#define XOR_W(a, b) CHOICE(xorw ARG2(a,b), xorw ARG2(a,b), _WTOG xor ARG2(b,a)) |
#define XOR_B(a, b) CHOICE(xorb ARG2(a,b), xorb ARG2(a,b), xorb ARG2(b,a)) |
/* Floating Point Instructions */ |
#define F2XM1 CHOICE(f2xm1, f2xm1, f2xm1) |
#define FABS CHOICE(fabs, fabs, fabs) |
#define FADD_D(a) CHOICE(faddl a, faddl a, faddd a) |
#define FADD_S(a) CHOICE(fadds a, fadds a, fadds a) |
#define FADD2(a, b) CHOICE(fadd ARG2(a,b), fadd ARG2(a,b), fadd ARG2(b,a)) |
#define FADDP(a, b) CHOICE(faddp ARG2(a,b), faddp ARG2(a,b), faddp ARG2(b,a)) |
#define FIADD_L(a) CHOICE(fiaddl a, fiaddl a, fiaddl a) |
#define FIADD_W(a) CHOICE(fiadd a, fiadds a, fiadds a) |
#define FBLD(a) CHOICE(fbld a, fbld a, fbld a) |
#define FBSTP(a) CHOICE(fbstp a, fbstp a, fbstp a) |
#define FCHS CHOICE(fchs, fchs, fchs) |
#define FCLEX CHOICE(fclex, wait; fnclex, wait; fclex) |
#define FNCLEX CHOICE(fnclex, fnclex, fclex) |
#define FCOM(a) CHOICE(fcom a, fcom a, fcom a) |
#define FCOM_D(a) CHOICE(fcoml a, fcoml a, fcomd a) |
#define FCOM_S(a) CHOICE(fcoms a, fcoms a, fcoms a) |
#define FCOMP(a) CHOICE(fcomp a, fcomp a, fcomp a) |
#define FCOMP_D(a) CHOICE(fcompl a, fcompl a, fcompd a) |
#define FCOMP_S(a) CHOICE(fcomps a, fcomps a, fcomps a) |
#define FCOMPP CHOICE(fcompp, fcompp, fcompp) |
#define FCOS CHOICE(fcos, fcos, fcos) |
#define FDECSTP CHOICE(fdecstp, fdecstp, fdecstp) |
#define FDIV_D(a) CHOICE(fdivl a, fdivl a, fdivd a) |
#define FDIV_S(a) CHOICE(fdivs a, fdivs a, fdivs a) |
#define FDIV2(a, b) CHOICE(fdiv ARG2(a,b), fdiv ARG2(a,b), fdiv ARG2(b,a)) |
#define FDIVP(a, b) CHOICE(fdivp ARG2(a,b), fdivp ARG2(a,b), fdivp ARG2(b,a)) |
#define FIDIV_L(a) CHOICE(fidivl a, fidivl a, fidivl a) |
#define FIDIV_W(a) CHOICE(fidiv a, fidivs a, fidivs a) |
#define FDIVR_D(a) CHOICE(fdivrl a, fdivrl a, fdivrd a) |
#define FDIVR_S(a) CHOICE(fdivrs a, fdivrs a, fdivrs a) |
#define FDIVR2(a, b) CHOICE(fdivr ARG2(a,b), fdivr ARG2(a,b), fdivr ARG2(b,a)) |
#define FDIVRP(a, b) CHOICE(fdivrp ARG2(a,b), fdivrp ARG2(a,b), fdivrp ARG2(b,a)) |
#define FIDIVR_L(a) CHOICE(fidivrl a, fidivrl a, fidivrl a) |
#define FIDIVR_W(a) CHOICE(fidivr a, fidivrs a, fidivrs a) |
#define FFREE(a) CHOICE(ffree a, ffree a, ffree a) |
#define FICOM_L(a) CHOICE(ficoml a, ficoml a, ficoml a) |
#define FICOM_W(a) CHOICE(ficom a, ficoms a, ficoms a) |
#define FICOMP_L(a) CHOICE(ficompl a, ficompl a, ficompl a) |
#define FICOMP_W(a) CHOICE(ficomp a, ficomps a, ficomps a) |
#define FILD_Q(a) CHOICE(fildll a, fildq a, fildq a) |
#define FILD_L(a) CHOICE(fildl a, fildl a, fildl a) |
#define FILD_W(a) CHOICE(fild a, filds a, filds a) |
#define FINCSTP CHOICE(fincstp, fincstp, fincstp) |
#define FINIT CHOICE(finit, wait; fninit, wait; finit) |
#define FNINIT CHOICE(fninit, fninit, finit) |
#define FIST_L(a) CHOICE(fistl a, fistl a, fistl a) |
#define FIST_W(a) CHOICE(fist a, fists a, fists a) |
#define FISTP_Q(a) CHOICE(fistpll a, fistpq a, fistpq a) |
#define FISTP_L(a) CHOICE(fistpl a, fistpl a, fistpl a) |
#define FISTP_W(a) CHOICE(fistp a, fistps a, fistps a) |
#define FLD_X(a) CHOICE(fldt a, fldt a, fldx a) /* 80 bit data type! */ |
#define FLD_D(a) CHOICE(fldl a, fldl a, fldd a) |
#define FLD_S(a) CHOICE(flds a, flds a, flds a) |
#define FLD1 CHOICE(fld1, fld1, fld1) |
#define FLDL2T CHOICE(fldl2t, fldl2t, fldl2t) |
#define FLDL2E CHOICE(fldl2e, fldl2e, fldl2e) |
#define FLDPI CHOICE(fldpi, fldpi, fldpi) |
#define FLDLG2 CHOICE(fldlg2, fldlg2, fldlg2) |
#define FLDLN2 CHOICE(fldln2, fldln2, fldln2) |
#define FLDZ CHOICE(fldz, fldz, fldz) |
#define FLDCW(a) CHOICE(fldcw a, fldcw a, fldcw a) |
#define FLDENV(a) CHOICE(fldenv a, fldenv a, fldenv a) |
#define FMUL_S(a) CHOICE(fmuls a, fmuls a, fmuls a) |
#define FMUL_D(a) CHOICE(fmull a, fmull a, fmuld a) |
#define FMUL2(a, b) CHOICE(fmul ARG2(a,b), fmul ARG2(a,b), fmul ARG2(b,a)) |
#define FMULP(a, b) CHOICE(fmulp ARG2(a,b), fmulp ARG2(a,b), fmulp ARG2(b,a)) |
#define FIMUL_L(a) CHOICE(fimull a, fimull a, fimull a) |
#define FIMUL_W(a) CHOICE(fimul a, fimuls a, fimuls a) |
#define FNOP CHOICE(fnop, fnop, fnop) |
#define FPATAN CHOICE(fpatan, fpatan, fpatan) |
#define FPREM CHOICE(fprem, fprem, fprem) |
#define FPREM1 CHOICE(fprem1, fprem1, fprem1) |
#define FPTAN CHOICE(fptan, fptan, fptan) |
#define FRNDINT CHOICE(frndint, frndint, frndint) |
#define FRSTOR(a) CHOICE(frstor a, frstor a, frstor a) |
#define FSAVE(a) CHOICE(fsave a, wait; fnsave a, wait; fsave a) |
#define FNSAVE(a) CHOICE(fnsave a, fnsave a, fsave a) |
#define FSCALE CHOICE(fscale, fscale, fscale) |
#define FSIN CHOICE(fsin, fsin, fsin) |
#define FSINCOS CHOICE(fsincos, fsincos, fsincos) |
#define FSQRT CHOICE(fsqrt, fsqrt, fsqrt) |
#define FST_D(a) CHOICE(fstl a, fstl a, fstd a) |
#define FST_S(a) CHOICE(fsts a, fsts a, fsts a) |
#define FSTP_X(a) CHOICE(fstpt a, fstpt a, fstpx a) |
#define FSTP_D(a) CHOICE(fstpl a, fstpl a, fstpd a) |
#define FSTP_S(a) CHOICE(fstps a, fstps a, fstps a) |
#define FSTP(a) CHOICE(fstp a, fstp a, fstp a) |
#define FSTCW(a) CHOICE(fstcw a, wait; fnstcw a, wait; fstcw a) |
#define FNSTCW(a) CHOICE(fnstcw a, fnstcw a, fstcw a) |
#define FSTENV(a) CHOICE(fstenv a, wait; fnstenv a, fstenv a) |
#define FNSTENV(a) CHOICE(fnstenv a, fnstenv a, fstenv a) |
#define FSTSW(a) CHOICE(fstsw a, wait; fnstsw a, wait; fstsw a) |
#define FNSTSW(a) CHOICE(fnstsw a, fnstsw a, fstsw a) |
#define FSUB_S(a) CHOICE(fsubs a, fsubs a, fsubs a) |
#define FSUB_D(a) CHOICE(fsubl a, fsubl a, fsubd a) |
#define FSUB2(a, b) CHOICE(fsub ARG2(a,b), fsub ARG2(a,b), fsub ARG2(b,a)) |
#define FSUBP(a, b) CHOICE(fsubp ARG2(a,b), fsubp ARG2(a,b), fsubp ARG2(b,a)) |
#define FISUB_L(a) CHOICE(fisubl a, fisubl a, fisubl a) |
#define FISUB_W(a) CHOICE(fisub a, fisubs a, fisubs a) |
#define FSUBR_S(a) CHOICE(fsubrs a, fsubrs a, fsubrs a) |
#define FSUBR_D(a) CHOICE(fsubrl a, fsubrl a, fsubrd a) |
#define FSUBR2(a, b) CHOICE(fsubr ARG2(a,b), fsubr ARG2(a,b), fsubr ARG2(b,a)) |
#define FSUBRP(a, b) CHOICE(fsubrp ARG2(a,b), fsubrp ARG2(a,b), fsubrp ARG2(b,a)) |
#define FISUBR_L(a) CHOICE(fisubrl a, fisubrl a, fisubrl a) |
#define FISUBR_W(a) CHOICE(fisubr a, fisubrs a, fisubrs a) |
#define FTST CHOICE(ftst, ftst, ftst) |
#define FUCOM(a) CHOICE(fucom a, fucom a, fucom a) |
#define FUCOMP(a) CHOICE(fucomp a, fucomp a, fucomp a) |
#define FUCOMPP CHOICE(fucompp, fucompp, fucompp) |
#define FWAIT CHOICE(wait, wait, wait) |
#define FXAM CHOICE(fxam, fxam, fxam) |
#define FXCH(a) CHOICE(fxch a, fxch a, fxch a) |
#define FXTRACT CHOICE(fxtract, fxtract, fxtract) |
#define FYL2X CHOICE(fyl2x, fyl2x, fyl2x) |
#define FYL2XP1 CHOICE(fyl2xp1, fyl2xp1, fyl2xp1) |
/* New instructions */ |
#define CPUID CHOICE(D_BYTE ARG2(15, 162), cpuid, D_BYTE ARG2(15, 162)) |
#define RDTSC CHOICE(D_BYTE ARG2(15, 49), rdtsc, D_BYTE ARG2(15, 49)) |
#else /* NASM_ASSEMBLER || MASM_ASSEMBLER is defined */ |
/****************************************/ |
/* */ |
/* Intel style assemblers. */ |
/* (NASM and MASM) */ |
/* */ |
/****************************************/ |
#define P_EAX EAX |
#define L_EAX EAX |
#define W_AX AX |
#define B_AH AH |
#define B_AL AL |
#define P_EBX EBX |
#define L_EBX EBX |
#define W_BX BX |
#define B_BH BH |
#define B_BL BL |
#define P_ECX ECX |
#define L_ECX ECX |
#define W_CX CX |
#define B_CH CH |
#define B_CL CL |
#define P_EDX EDX |
#define L_EDX EDX |
#define W_DX DX |
#define B_DH DH |
#define B_DL DL |
#define P_EBP EBP |
#define L_EBP EBP |
#define W_BP BP |
#define P_ESI ESI |
#define L_ESI ESI |
#define W_SI SI |
#define P_EDI EDI |
#define L_EDI EDI |
#define W_DI DI |
#define P_ESP ESP |
#define L_ESP ESP |
#define W_SP SP |
#define W_CS CS |
#define W_SS SS |
#define W_DS DS |
#define W_ES ES |
#define W_FS FS |
#define W_GS GS |
#define X_ST ST |
#define D_ST ST |
#define L_ST ST |
#define P_MM0 mm0 |
#define P_MM1 mm1 |
#define P_MM2 mm2 |
#define P_MM3 mm3 |
#define P_MM4 mm4 |
#define P_MM5 mm5 |
#define P_MM6 mm6 |
#define P_MM7 mm7 |
#define P_XMM0 xmm0 |
#define P_XMM1 xmm1 |
#define P_XMM2 xmm2 |
#define P_XMM3 xmm3 |
#define P_XMM4 xmm4 |
#define P_XMM5 xmm5 |
#define P_XMM6 xmm6 |
#define P_XMM7 xmm7 |
#define CONCAT(x, y) x ## y |
#define CONCAT3(x, y, z) x ## y ## z |
#if defined(NASM_ASSEMBLER) |
#define ST(n) st ## n |
#define ST0 st0 |
#define TBYTE_PTR tword |
#define QWORD_PTR qword |
#define DWORD_PTR dword |
#define WORD_PTR word |
#define BYTE_PTR byte |
#define OFFSET |
#define GLOBL GLOBAL |
#define ALIGNTEXT32 ALIGN 32 |
#define ALIGNTEXT16 ALIGN 16 |
#define ALIGNTEXT8 ALIGN 8 |
#define ALIGNTEXT4 ALIGN 4 |
#define ALIGNTEXT2 ALIGN 2 |
#define ALIGNTEXT32ifNOP ALIGN 32 |
#define ALIGNTEXT16ifNOP ALIGN 16 |
#define ALIGNTEXT8ifNOP ALIGN 8 |
#define ALIGNTEXT4ifNOP ALIGN 4 |
#define ALIGNDATA32 ALIGN 32 |
#define ALIGNDATA16 ALIGN 16 |
#define ALIGNDATA8 ALIGN 8 |
#define ALIGNDATA4 ALIGN 4 |
#define ALIGNDATA2 ALIGN 2 |
#define FILE(s) |
#define STRING(s) db s |
#define D_LONG dd |
#define D_WORD dw |
#define D_BYTE db |
/* #define SPACE */ |
/* #define COMM */ |
#if defined(__WATCOMC__) |
SECTION _TEXT public align=16 class=CODE use32 flat |
SECTION _DATA public align=16 class=DATA use32 flat |
#define SEG_TEXT SECTION _TEXT |
#define SEG_DATA SECTION _DATA |
#define SEG_BSS SECTION .bss |
#else |
#define SEG_DATA SECTION .data |
#define SEG_TEXT SECTION .text |
#define SEG_BSS SECTION .bss |
#endif |
#define D_SPACE(n) db n REP 0 |
#define AS_BEGIN |
/* Jcc's should be handled better than this... */ |
#define NEAR near |
#else /* MASM */ |
#define TBYTE_PTR tbyte ptr |
#define QWORD_PTR qword ptr |
#define DWORD_PTR dword ptr |
#define WORD_PTR word ptr |
#define BYTE_PTR byte ptr |
#define OFFSET offset |
#define GLOBL GLOBAL |
#define ALIGNTEXT32 ALIGN 32 |
#define ALIGNTEXT16 ALIGN 16 |
#define ALIGNTEXT8 ALIGN 8 |
#define ALIGNTEXT4 ALIGN 4 |
#define ALIGNTEXT2 ALIGN 2 |
#define ALIGNTEXT32ifNOP ALIGN 32 |
#define ALIGNTEXT16ifNOP ALIGN 16 |
#define ALIGNTEXT8ifNOP ALIGN 8 |
#define ALIGNTEXT4ifNOP ALIGN 4 |
#define ALIGNDATA32 ALIGN 32 |
#define ALIGNDATA16 ALIGN 16 |
#define ALIGNDATA8 ALIGN 8 |
#define ALIGNDATA4 ALIGN 4 |
#define ALIGNDATA2 ALIGN 2 |
#define FILE(s) |
#define STRING(s) db s |
#define D_LONG dd |
#define D_WORD dw |
#define D_BYTE db |
/* #define SPACE */ |
/* #define COMM */ |
#define SEG_DATA .DATA |
#define SEG_TEXT .CODE |
#define SEG_BSS .DATA |
#define D_SPACE(n) db n REP 0 |
#define AS_BEGIN |
#define NEAR |
#endif |
#define GLNAME(a) CONCAT(_, a) |
/* |
* Addressing Modes |
*/ |
/* Immediate Mode */ |
#define P_ADDR(a) OFFSET a |
#define X_ADDR(a) OFFSET a |
#define D_ADDR(a) OFFSET a |
#define L_ADDR(a) OFFSET a |
#define W_ADDR(a) OFFSET a |
#define B_ADDR(a) OFFSET a |
#define P_CONST(a) a |
#define X_CONST(a) a |
#define D_CONST(a) a |
#define L_CONST(a) a |
#define W_CONST(a) a |
#define B_CONST(a) a |
/* Indirect Mode */ |
#ifdef NASM_ASSEMBLER |
#define P_CONTENT(a) [a] |
#define X_CONTENT(a) TBYTE_PTR [a] |
#define D_CONTENT(a) QWORD_PTR [a] |
#define L_CONTENT(a) DWORD_PTR [a] |
#define W_CONTENT(a) WORD_PTR [a] |
#define B_CONTENT(a) BYTE_PTR [a] |
#else |
#define P_CONTENT(a) a |
#define X_CONTENT(a) TBYTE_PTR a |
#define D_CONTENT(a) QWORD_PTR a |
#define L_CONTENT(a) DWORD_PTR a |
#define W_CONTENT(a) WORD_PTR a |
#define B_CONTENT(a) BYTE_PTR a |
#endif |
/* Register a indirect */ |
#define P_REGIND(a) [a] |
#define X_REGIND(a) TBYTE_PTR [a] |
#define D_REGIND(a) QWORD_PTR [a] |
#define L_REGIND(a) DWORD_PTR [a] |
#define W_REGIND(a) WORD_PTR [a] |
#define B_REGIND(a) BYTE_PTR [a] |
/* Register b indirect plus displacement a */ |
#define P_REGOFF(a, b) [b + a] |
#define X_REGOFF(a, b) TBYTE_PTR [b + a] |
#define D_REGOFF(a, b) QWORD_PTR [b + a] |
#define L_REGOFF(a, b) DWORD_PTR [b + a] |
#define W_REGOFF(a, b) WORD_PTR [b + a] |
#define B_REGOFF(a, b) BYTE_PTR [b + a] |
/* Reg indirect Base + Index + Displacement - this is mainly for 16-bit mode |
* which has no scaling |
*/ |
#define P_REGBID(b, i, d) [b + i + d] |
#define X_REGBID(b, i, d) TBYTE_PTR [b + i + d] |
#define D_REGBID(b, i, d) QWORD_PTR [b + i + d] |
#define L_REGBID(b, i, d) DWORD_PTR [b + i + d] |
#define W_REGBID(b, i, d) WORD_PTR [b + i + d] |
#define B_REGBID(b, i, d) BYTE_PTR [b + i + d] |
/* Reg indirect Base + (Index * Scale) */ |
#define P_REGBIS(b, i, s) [b + i * s] |
#define X_REGBIS(b, i, s) TBYTE_PTR [b + i * s] |
#define D_REGBIS(b, i, s) QWORD_PTR [b + i * s] |
#define L_REGBIS(b, i, s) DWORD_PTR [b + i * s] |
#define W_REGBIS(b, i, s) WORD_PTR [b + i * s] |
#define B_REGBIS(b, i, s) BYTE_PTR [b + i * s] |
/* Reg indirect Base + (Index * Scale) + Displacement */ |
#define P_REGBISD(b, i, s, d) [b + i * s + d] |
#define X_REGBISD(b, i, s, d) TBYTE_PTR [b + i * s + d] |
#define D_REGBISD(b, i, s, d) QWORD_PTR [b + i * s + d] |
#define L_REGBISD(b, i, s, d) DWORD_PTR [b + i * s + d] |
#define W_REGBISD(b, i, s, d) WORD_PTR [b + i * s + d] |
#define B_REGBISD(b, i, s, d) BYTE_PTR [b + i * s + d] |
/* Displaced Scaled Index: */ |
#define P_REGDIS(d, i, s) [i * s + d] |
#define X_REGDIS(d, i, s) TBYTE_PTR [i * s + d] |
#define D_REGDIS(d, i, s) QWORD_PTR [i * s + d] |
#define L_REGDIS(d, i, s) DWORD_PTR [i * s + d] |
#define W_REGDIS(d, i, s) WORD_PTR [i * s + d] |
#define B_REGDIS(d, i, s) BYTE_PTR [i * s + d] |
/* Indexed Base: */ |
#define P_REGBI(b, i) [b + i] |
#define X_REGBI(b, i) TBYTE_PTR [b + i] |
#define D_REGBI(b, i) QWORD_PTR [b + i] |
#define L_REGBI(b, i) DWORD_PTR [b + i] |
#define W_REGBI(b, i) WORD_PTR [b + i] |
#define B_REGBI(b, i) BYTE_PTR [b + i] |
/* Displaced Base: */ |
#define P_REGDB(d, b) [b + d] |
#define X_REGDB(d, b) TBYTE_PTR [b + d] |
#define D_REGDB(d, b) QWORD_PTR [b + d] |
#define L_REGDB(d, b) DWORD_PTR [b + d] |
#define W_REGDB(d, b) WORD_PTR [b + d] |
#define B_REGDB(d, b) BYTE_PTR [b + d] |
/* Variable indirect: */ |
#define VARINDIRECT(var) [var] |
/* Use register contents as jump/call target: */ |
#define CODEPTR(reg) P_(reg) |
/* |
* Redefine assembler commands |
*/ |
#define P_(a) P_ ## a |
#define X_(a) X_ ## a |
#define D_(a) D_ ## a |
#define SR_(a) W_ ## a |
#define S_(a) L_ ## a |
#define L_(a) L_ ## a |
#define W_(a) W_ ## a |
#define B_(a) B_ ## a |
#define AAA aaa |
#define AAD aad |
#define AAM aam |
#define AAS aas |
#define ADC_L(a, b) adc L_(b), L_(a) |
#define ADC_W(a, b) adc W_(b), W_(a) |
#define ADC_B(a, b) adc B_(b), B_(a) |
#define ADD_L(a, b) add L_(b), L_(a) |
#define ADD_W(a, b) add W_(b), W_(a) |
#define ADD_B(a, b) add B_(b), B_(a) |
#define AND_L(a, b) and L_(b), L_(a) |
#define AND_W(a, b) and W_(b), W_(a) |
#define AND_B(a, b) and B_(b), B_(a) |
#define ARPL(a,b) arpl W_(b), a |
#define BOUND_L(a, b) bound L_(b), L_(a) |
#define BOUND_W(a, b) bound W_(b), W_(a) |
#define BSF_L(a, b) bsf L_(b), L_(a) |
#define BSF_W(a, b) bsf W_(b), W_(a) |
#define BSR_L(a, b) bsr L_(b), L_(a) |
#define BSR_W(a, b) bsr W_(b), W_(a) |
#define BT_L(a, b) bt L_(b), L_(a) |
#define BT_W(a, b) bt W_(b), W_(a) |
#define BTC_L(a, b) btc L_(b), L_(a) |
#define BTC_W(a, b) btc W_(b), W_(a) |
#define BTR_L(a, b) btr L_(b), L_(a) |
#define BTR_W(a, b) btr W_(b), W_(a) |
#define BTS_L(a, b) bts L_(b), L_(a) |
#define BTS_W(a, b) bts W_(b), W_(a) |
#define CALL(a) call a |
#define CALLF(s,a) call far s:a |
#define CBW cbw |
#define CWDE cwde |
#define CLC clc |
#define CLD cld |
#define CLI cli |
#define CLTS clts |
#define CMC cmc |
#define CMP_L(a, b) cmp L_(b), L_(a) |
#define CMP_W(a, b) cmp W_(b), W_(a) |
#define CMP_B(a, b) cmp B_(b), B_(a) |
#define CMPS_L cmpsd |
#define CMPS_W cmpsw |
#define CMPS_B cmpsb |
#define CPUID cpuid |
#define CWD cwd |
#define CDQ cdq |
#define DAA daa |
#define DAS das |
#define DEC_L(a) dec L_(a) |
#define DEC_W(a) dec W_(a) |
#define DEC_B(a) dec B_(a) |
#define DIV_L(a) div L_(a) |
#define DIV_W(a) div W_(a) |
#define DIV_B(a) div B_(a) |
#define ENTER(a,b) enter b, a |
#define HLT hlt |
#define IDIV_L(a) idiv L_(a) |
#define IDIV_W(a) idiv W_(a) |
#define IDIV_B(a) idiv B_(a) |
#define IMUL_L(a, b) imul L_(b), L_(a) |
#define IMUL_W(a, b) imul W_(b), W_(a) |
#define IMUL_B(a) imul B_(a) |
#define IN_L in EAX, DX |
#define IN_W in AX, DX |
#define IN_B in AL, DX |
#define IN1_L(a) in1 L_(a) |
#define IN1_W(a) in1 W_(a) |
#define IN1_B(a) in1 B_(a) |
#define INC_L(a) inc L_(a) |
#define INC_W(a) inc W_(a) |
#define INC_B(a) inc B_(a) |
#define INS_L ins |
#define INS_W ins |
#define INS_B ins |
#define INT(a) int B_(a) |
#define INT3 int3 |
#define INTO into |
#define IRET iret |
#define IRETD iretd |
#define JA(a) ja NEAR a |
#define JAE(a) jae NEAR a |
#define JB(a) jb NEAR a |
#define JBE(a) jbe NEAR a |
#define JC(a) jc NEAR a |
#define JE(a) je NEAR a |
#define JG(a) jg NEAR a |
#define JGE(a) jge NEAR a |
#define JL(a) jl NEAR a |
#define JLE(a) jle NEAR a |
#define JNA(a) jna NEAR a |
#define JNAE(a) jnae NEAR a |
#define JNB(a) jnb NEAR a |
#define JNBE(a) jnbe NEAR a |
#define JNC(a) jnc NEAR a |
#define JNE(a) jne NEAR a |
#define JNG(a) jng NEAR a |
#define JNGE(a) jnge NEAR a |
#define JNL(a) jnl NEAR a |
#define JNLE(a) jnle NEAR a |
#define JNO(a) jno NEAR a |
#define JNP(a) jnp NEAR a |
#define JNS(a) jns NEAR a |
#define JNZ(a) jnz NEAR a |
#define JO(a) jo NEAR a |
#define JP(a) jp NEAR a |
#define JPE(a) jpe NEAR a |
#define JPO(a) jpo NEAR a |
#define JS(a) js NEAR a |
#define JZ(a) jz NEAR a |
#define JMP(a) jmp a |
#define JMPF(s,a) jmp far s:a |
#define LAHF lahf |
#define LAR(a, b) lar b, a |
#define LEA_L(a, b) lea P_(b), P_(a) |
#define LEA_W(a, b) lea P_(b), P_(a) |
#define LEAVE leave |
#define LGDT(a) lgdt a |
#define LIDT(a) lidt a |
#define LDS(a, b) lds b, P_(a) |
#define LES(a, b) les b, P_(a) |
#define LFS(a, b) lfs b, P_(a) |
#define LGS(a, b) lgs b, P_(a) |
#define LSS(a, b) lss b, P_(a) |
#define LLDT(a) lldt a |
#define LMSW(a) lmsw a |
#define LOCK lock |
#define LODS_L lodsd |
#define LODS_W lodsw |
#define LODS_B lodsb |
#define LOOP(a) loop a |
#define LOOPE(a) loope a |
#define LOOPZ(a) loopz a |
#define LOOPNE(a) loopne a |
#define LOOPNZ(a) loopnz a |
#define LSL(a, b) lsl b, a |
#define LTR(a) ltr a |
#define MOV_SR(a, b) mov SR_(b), SR_(a) |
#define MOV_L(a, b) mov L_(b), L_(a) |
#define MOV_W(a, b) mov W_(b), W_(a) |
#define MOV_B(a, b) mov B_(b), B_(a) |
#define MOVS_L movsd |
#define MOVS_W movsw |
#define MOVS_B movsb |
#define MOVSX_BL(a, b) movsx B_(b), B_(a) |
#define MOVSX_BW(a, b) movsx B_(b), B_(a) |
#define MOVSX_WL(a, b) movsx W_(b), W_(a) |
#define MOVZX_BL(a, b) movzx B_(b), B_(a) |
#define MOVZX_BW(a, b) movzx B_(b), B_(a) |
#define MOVZX_WL(a, b) movzx W_(b), W_(a) |
#define MUL_L(a) mul L_(a) |
#define MUL_W(a) mul W_(a) |
#define MUL_B(a) mul B_(a) |
#define NEG_L(a) neg L_(a) |
#define NEG_W(a) neg W_(a) |
#define NEG_B(a) neg B_(a) |
#define NOP nop |
#define NOT_L(a) not L_(a) |
#define NOT_W(a) not W_(a) |
#define NOT_B(a) not B_(a) |
#define OR_L(a,b) or L_(b), L_(a) |
#define OR_W(a,b) or W_(b), W_(a) |
#define OR_B(a,b) or B_(b), B_(a) |
#define OUT_L out DX, EAX |
#define OUT_W out DX, AX |
#define OUT_B out DX, AL |
#define OUT1_L(a) out1 L_(a) |
#define OUT1_W(a) out1 W_(a) |
#define OUT1_B(a) out1 B_(a) |
#define OUTS_L outsd |
#define OUTS_W outsw |
#define OUTS_B outsb |
#define POP_SR(a) pop SR_(a) |
#define POP_L(a) pop L_(a) |
#define POP_W(a) pop W_(a) |
#define POPA_L popad |
#define POPA_W popa |
#define POPF_L popfd |
#define POPF_W popf |
#define PUSH_SR(a) push SR_(a) |
#define PUSH_L(a) push L_(a) |
#define PUSH_W(a) push W_(a) |
#define PUSH_B(a) push B_(a) |
#define PUSHA_L pushad |
#define PUSHA_W pusha |
#define PUSHF_L pushfd |
#define PUSHF_W pushf |
#define RCL_L(a, b) rcl L_(b), L_(a) |
#define RCL_W(a, b) rcl W_(b), W_(a) |
#define RCL_B(a, b) rcl B_(b), B_(a) |
#define RCR_L(a, b) rcr L_(b), L_(a) |
#define RCR_W(a, b) rcr W_(b), W_(a) |
#define RCR_B(a, b) rcr B_(b), B_(a) |
#define RDTSC rdtsc |
#define ROL_L(a, b) rol L_(b), L_(a) |
#define ROL_W(a, b) rol W_(b), W_(a) |
#define ROL_B(a, b) rol B_(b), B_(a) |
#define ROR_L(a, b) ror L_(b), L_(a) |
#define ROR_W(a, b) ror W_(b), W_(a) |
#define ROR_B(a, b) ror B_(b), B_(a) |
#define REP rep |
#define REPE repe |
#define REPNE repne |
#define REPNZ REPNE |
#define REPZ REPE |
#define RET ret |
#define SAHF sahf |
#define SAL_L(a, b) sal L_(b), B_(a) |
#define SAL_W(a, b) sal W_(b), B_(a) |
#define SAL_B(a, b) sal B_(b), B_(a) |
#define SAR_L(a, b) sar L_(b), B_(a) |
#define SAR_W(a, b) sar W_(b), B_(a) |
#define SAR_B(a, b) sar B_(b), B_(a) |
#define SBB_L(a, b) sbb L_(b), L_(a) |
#define SBB_W(a, b) sbb W_(b), W_(a) |
#define SBB_B(a, b) sbb B_(b), B_(a) |
#define SCAS_L scas |
#define SCAS_W scas |
#define SCAS_B scas |
#define SETA(a) seta a |
#define SETAE(a) setae a |
#define SETB(a) setb a |
#define SETBE(a) setbe a |
#define SETC(a) setc a |
#define SETE(a) sete a |
#define SETG(a) setg a |
#define SETGE(a) setge a |
#define SETL(a) setl a |
#define SETLE(a) setle a |
#define SETNA(a) setna a |
#define SETNAE(a) setnae a |
#define SETNB(a) setnb a |
#define SETNBE(a) setnbe a |
#define SETNC(a) setnc a |
#define SETNE(a) setne a |
#define SETNG(a) setng a |
#define SETNGE(a) setnge a |
#define SETNL(a) setnl a |
#define SETNLE(a) setnle a |
#define SETNO(a) setno a |
#define SETNP(a) setnp a |
#define SETNS(a) setns a |
#define SETNZ(a) setnz a |
#define SETO(a) seto a |
#define SETP(a) setp a |
#define SETPE(a) setpe a |
#define SETPO(a) setpo a |
#define SETS(a) sets a |
#define SETZ(a) setz a |
#define SGDT(a) sgdt a |
#define SIDT(a) sidt a |
#define SHL_L(a, b) shl L_(b), B_(a) |
#define SHL_W(a, b) shl W_(b), B_(a) |
#define SHL_B(a, b) shl B_(b), B_(a) |
#define SHLD_L(a,b,c) shld |
#define SHLD2_L(a,b) shld L_(b), L_(a) |
#define SHLD_W(a,b,c) shld |
#define SHLD2_W(a,b) shld W_(b), W_(a) |
#define SHR_L(a, b) shr L_(b), B_(a) |
#define SHR_W(a, b) shr W_(b), B_(a) |
#define SHR_B(a, b) shr B_(b), B_(a) |
#define SHRD_L(a,b,c) shrd |
#define SHRD2_L(a,b) shrd L_(b), L_(a) |
#define SHRD_W(a,b,c) shrd |
#define SHRD2_W(a,b) shrd W_(b), W_(a) |
#define SLDT(a) sldt a |
#define SMSW(a) smsw a |
#define STC stc |
#define STD std |
#define STI sti |
#define STOS_L stosd |
#define STOS_W stosw |
#define STOS_B stosb |
#define STR(a) str a |
#define SUB_L(a, b) sub L_(b), L_(a) |
#define SUB_W(a, b) sub W_(b), W_(a) |
#define SUB_B(a, b) sub B_(b), B_(a) |
#define TEST_L(a, b) test L_(b), L_(a) |
#define TEST_W(a, b) test W_(b), W_(a) |
#define TEST_B(a, b) test B_(b), B_(a) |
#define VERR(a) verr a |
#define VERW(a) verw a |
#define WAIT wait |
#define XCHG_L(a, b) xchg L_(b), L_(a) |
#define XCHG_W(a, b) xchg W_(b), W_(a) |
#define XCHG_B(a, b) xchg B_(b), B_(a) |
#define XLAT xlat |
#define XOR_L(a, b) xor L_(b), L_(a) |
#define XOR_W(a, b) xor W_(b), W_(a) |
#define XOR_B(a, b) xor B_(b), B_(a) |
/* Floating Point Instructions */ |
#define F2XM1 f2xm1 |
#define FABS fabs |
#define FADD_D(a) fadd D_(a) |
#define FADD_S(a) fadd S_(a) |
#define FADD2(a, b) fadd b, a |
#define FADDP(a, b) faddp b, a |
#define FIADD_L(a) fiadd L_(a) |
#define FIADD_W(a) fiadd W_(a) |
#define FBLD(a) fbld a |
#define FBSTP(a) fbstp a |
#define FCHS fchs |
#define FCLEX fclex |
#define FNCLEX fnclex |
#define FCOM(a) fcom a |
#define FCOM_D(a) fcom D_(a) |
#define FCOM_S(a) fcom S_(a) |
#define FCOMP(a) fcomp a |
#define FCOMP_D(a) fcomp D_(a) |
#define FCOMP_S(a) fcomp S_(a) |
#define FCOMPP fcompp |
#define FCOS fcos |
#define FDECSTP fdecstp |
#define FDIV_D(a) fdiv D_(a) |
#define FDIV_S(a) fdiv S_(a) |
#define FDIV2(a, b) fdiv b, a |
#define FDIVP(a, b) fdivp b, a |
#define FIDIV_L(a) fidiv L_(a) |
#define FIDIV_W(a) fidiv W_(a) |
#define FDIVR_D(a) fdivr D_(a) |
#define FDIVR_S(a) fdivr S_(a) |
#define FDIVR2(a, b) fdivr b, a |
#define FDIVRP(a, b) fdivrp b, a |
#define FIDIVR_L(a) fidivr L_(a) |
#define FIDIVR_W(a) fidivr W_(a) |
#define FFREE(a) ffree a |
#define FICOM_L(a) ficom L_(a) |
#define FICOM_W(a) ficom W_(a) |
#define FICOMP_L(a) ficomp L_(a) |
#define FICOMP_W(a) ficomp W_(a) |
#define FILD_Q(a) fild D_(a) |
#define FILD_L(a) fild L_(a) |
#define FILD_W(a) fild W_(a) |
#define FINCSTP fincstp |
#define FINIT finit |
#define FNINIT fninit |
#define FIST_L(a) fist L_(a) |
#define FIST_W(a) fist W_(a) |
#define FISTP_Q(a) fistp D_(a) |
#define FISTP_L(a) fistp L_(a) |
#define FISTP_W(a) fistp W_(a) |
#define FLD_X(a) fld X_(a) |
#define FLD_D(a) fld D_(a) |
#define FLD_S(a) fld S_(a) |
#define FLD1 fld1 |
#define FLDL2T fldl2t |
#define FLDL2E fldl2e |
#define FLDPI fldpi |
#define FLDLG2 fldlg2 |
#define FLDLN2 fldln2 |
#define FLDZ fldz |
#define FLDCW(a) fldcw a |
#define FLDENV(a) fldenv a |
#define FMUL_S(a) fmul S_(a) |
#define FMUL_D(a) fmul D_(a) |
#define FMUL2(a, b) fmul b, a |
#define FMULP(a, b) fmulp b, a |
#define FIMUL_L(a) fimul L_(a) |
#define FIMUL_W(a) fimul W_(a) |
#define FNOP fnop |
#define FPATAN fpatan |
#define FPREM fprem |
#define FPREM1 fprem1 |
#define FPTAN fptan |
#define FRNDINT frndint |
#define FRSTOR(a) frstor a |
#define FSAVE(a) fsave a |
#define FNSAVE(a) fnsave a |
#define FSCALE fscale |
#define FSIN fsin |
#define FSINCOS fsincos |
#define FSQRT fsqrt |
#define FST_D(a) fst D_(a) |
#define FST_S(a) fst S_(a) |
#define FSTP_X(a) fstp X_(a) |
#define FSTP_D(a) fstp D_(a) |
#define FSTP_S(a) fstp S_(a) |
#define FSTP(a) fstp a |
#define FSTCW(a) fstcw a |
#define FNSTCW(a) fnstcw a |
#define FSTENV(a) fstenv a |
#define FNSTENV(a) fnstenv a |
#define FSTSW(a) fstsw a |
#define FNSTSW(a) fnstsw a |
#define FSUB_S(a) fsub S_(a) |
#define FSUB_D(a) fsub D_(a) |
#define FSUB2(a, b) fsub b, a |
#define FSUBP(a, b) fsubp b, a |
#define FISUB_L(a) fisub L_(a) |
#define FISUB_W(a) fisub W_(a) |
#define FSUBR_S(a) fsubr S_(a) |
#define FSUBR_D(a) fsubr D_(a) |
#define FSUBR2(a, b) fsubr b, a |
#define FSUBRP(a, b) fsubrp b, a |
#define FISUBR_L(a) fisubr L_(a) |
#define FISUBR_W(a) fisubr W_(a) |
#define FTST ftst |
#define FUCOM(a) fucom a |
#define FUCOMP(a) fucomp a |
#define FUCOMPP fucompp |
#define FWAIT fwait |
#define FXAM fxam |
#define FXCH(a) fxch a |
#define FXTRACT fxtract |
#define FYL2X fyl2x |
#define FYL2XP1 fyl2xp1 |
#endif /* NASM_ASSEMBLER, MASM_ASSEMBLER */ |
/****************************************/ |
/* */ |
/* Extensions to x86 insn set - */ |
/* MMX, 3DNow! */ |
/* */ |
/****************************************/ |
#if defined(NASM_ASSEMBLER) || defined(MASM_ASSEMBLER) |
#define P_ARG1(a) P_ ## a |
#define P_ARG2(a, b) P_ ## b, P_ ## a |
#define P_ARG3(a, b, c) P_ ## c, P_ ## b, P_ ## a |
#else |
#define P_ARG1(a) a |
#define P_ARG2(a, b) a, b |
#define P_ARG3(a, b, c) a, b, c |
#endif |
/* MMX */ |
#define MOVD(a, b) movd P_ARG2(a, b) |
#define MOVQ(a, b) movq P_ARG2(a, b) |
#define PADDB(a, b) paddb P_ARG2(a, b) |
#define PADDW(a, b) paddw P_ARG2(a, b) |
#define PADDD(a, b) paddd P_ARG2(a, b) |
#define PADDSB(a, b) paddsb P_ARG2(a, b) |
#define PADDSW(a, b) paddsw P_ARG2(a, b) |
#define PADDUSB(a, b) paddusb P_ARG2(a, b) |
#define PADDUSW(a, b) paddusw P_ARG2(a, b) |
#define PSUBB(a, b) psubb P_ARG2(a, b) |
#define PSUBW(a, b) psubw P_ARG2(a, b) |
#define PSUBD(a, b) psubd P_ARG2(a, b) |
#define PSUBSB(a, b) psubsb P_ARG2(a, b) |
#define PSUBSW(a, b) psubsw P_ARG2(a, b) |
#define PSUBUSB(a, b) psubusb P_ARG2(a, b) |
#define PSUBUSW(a, b) psubusw P_ARG2(a, b) |
#define PCMPEQB(a, b) pcmpeqb P_ARG2(a, b) |
#define PCMPEQW(a, b) pcmpeqw P_ARG2(a, b) |
#define PCMPEQD(a, b) pcmpeqd P_ARG2(a, b) |
#define PCMPGTB(a, b) pcmpgtb P_ARG2(a, b) |
#define PCMPGTW(a, b) pcmpgtw P_ARG2(a, b) |
#define PCMPGTD(a, b) pcmpgtd P_ARG2(a, b) |
#define PMULHW(a, b) pmulhw P_ARG2(a, b) |
#define PMULLW(a, b) pmullw P_ARG2(a, b) |
#define PMADDWD(a, b) pmaddwd P_ARG2(a, b) |
#define PAND(a, b) pand P_ARG2(a, b) |
#define PANDN(a, b) pandn P_ARG2(a, b) |
#define POR(a, b) por P_ARG2(a, b) |
#define PXOR(a, b) pxor P_ARG2(a, b) |
#define PSRAW(a, b) psraw P_ARG2(a, b) |
#define PSRAD(a, b) psrad P_ARG2(a, b) |
#define PSRLW(a, b) psrlw P_ARG2(a, b) |
#define PSRLD(a, b) psrld P_ARG2(a, b) |
#define PSRLQ(a, b) psrlq P_ARG2(a, b) |
#define PSLLW(a, b) psllw P_ARG2(a, b) |
#define PSLLD(a, b) pslld P_ARG2(a, b) |
#define PSLLQ(a, b) psllq P_ARG2(a, b) |
#define PACKSSWB(a, b) packsswb P_ARG2(a, b) |
#define PACKSSDW(a, b) packssdw P_ARG2(a, b) |
#define PACKUSWB(a, b) packuswb P_ARG2(a, b) |
#define PUNPCKHBW(a, b) punpckhbw P_ARG2(a, b) |
#define PUNPCKHWD(a, b) punpckhwd P_ARG2(a, b) |
#define PUNPCKHDQ(a, b) punpckhdq P_ARG2(a, b) |
#define PUNPCKLBW(a, b) punpcklbw P_ARG2(a, b) |
#define PUNPCKLWD(a, b) punpcklwd P_ARG2(a, b) |
#define PUNPCKLDQ(a, b) punpckldq P_ARG2(a, b) |
#define EMMS emms |
/* AMD 3DNow! */ |
#define PAVGUSB(a, b) pavgusb P_ARG2(a, b) |
#define PFADD(a, b) pfadd P_ARG2(a, b) |
#define PFSUB(a, b) pfsub P_ARG2(a, b) |
#define PFSUBR(a, b) pfsubr P_ARG2(a, b) |
#define PFACC(a, b) pfacc P_ARG2(a, b) |
#define PFCMPGE(a, b) pfcmpge P_ARG2(a, b) |
#define PFCMPGT(a, b) pfcmpgt P_ARG2(a, b) |
#define PFCMPEQ(a, b) pfcmpeq P_ARG2(a, b) |
#define PFMIN(a, b) pfmin P_ARG2(a, b) |
#define PFMAX(a, b) pfmax P_ARG2(a, b) |
#define PI2FD(a, b) pi2fd P_ARG2(a, b) |
#define PF2ID(a, b) pf2id P_ARG2(a, b) |
#define PFRCP(a, b) pfrcp P_ARG2(a, b) |
#define PFRSQRT(a, b) pfrsqrt P_ARG2(a, b) |
#define PFMUL(a, b) pfmul P_ARG2(a, b) |
#define PFRCPIT1(a, b) pfrcpit1 P_ARG2(a, b) |
#define PFRSQIT1(a, b) pfrsqit1 P_ARG2(a, b) |
#define PFRCPIT2(a, b) pfrcpit2 P_ARG2(a, b) |
#define PMULHRW(a, b) pmulhrw P_ARG2(a, b) |
#define FEMMS femms |
#define PREFETCH(a) prefetch P_ARG1(a) |
#define PREFETCHW(a) prefetchw P_ARG1(a) |
/* Intel SSE */ |
#define ADDPS(a, b) addps P_ARG2(a, b) |
#define ADDSS(a, b) addss P_ARG2(a, b) |
#define ANDNPS(a, b) andnps P_ARG2(a, b) |
#define ANDPS(a, b) andps P_ARG2(a, b) |
/* NASM only knows the pseudo ops for these. |
#define CMPPS(a, b, c) cmpps P_ARG3(a, b, c) |
#define CMPSS(a, b, c) cmpss P_ARG3(a, b, c) |
*/ |
#define CMPEQPS(a, b) cmpeqps P_ARG2(a, b) |
#define CMPLTPS(a, b) cmpltps P_ARG2(a, b) |
#define CMPLEPS(a, b) cmpleps P_ARG2(a, b) |
#define CMPUNORDPS(a, b) cmpunordps P_ARG2(a, b) |
#define CMPNEQPS(a, b) cmpneqps P_ARG2(a, b) |
#define CMPNLTPS(a, b) cmpnltps P_ARG2(a, b) |
#define CMPNLEPS(a, b) cmpnleps P_ARG2(a, b) |
#define CMPORDPS(a, b) cmpordps P_ARG2(a, b) |
#define CMPEQSS(a, b) cmpeqss P_ARG2(a, b) |
#define CMPLTSS(a, b) cmpltss P_ARG2(a, b) |
#define CMPLESS(a, b) cmpless P_ARG2(a, b) |
#define CMPUNORDSS(a, b) cmpunordss P_ARG2(a, b) |
#define CMPNEQSS(a, b) cmpneqss P_ARG2(a, b) |
#define CMPNLTSS(a, b) cmpnltss P_ARG2(a, b) |
#define CMPNLESS(a, b) cmpnless P_ARG2(a, b) |
#define CMPORDSS(a, b) cmpordss P_ARG2(a, b) |
#define COMISS(a, b) comiss P_ARG2(a, b) |
#define CVTPI2PS(a, b) cvtpi2ps P_ARG2(a, b) |
#define CVTPS2PI(a, b) cvtps2pi P_ARG2(a, b) |
#define CVTSI2SS(a, b) cvtsi2ss P_ARG2(a, b) |
#define CVTSS2SI(a, b) cvtss2si P_ARG2(a, b) |
#define CVTTPS2PI(a, b) cvttps2pi P_ARG2(a, b) |
#define CVTTSS2SI(a, b) cvttss2si P_ARG2(a, b) |
#define DIVPS(a, b) divps P_ARG2(a, b) |
#define DIVSS(a, b) divss P_ARG2(a, b) |
#define FXRSTOR(a) fxrstor P_ARG1(a) |
#define FXSAVE(a) fxsave P_ARG1(a) |
#define LDMXCSR(a) ldmxcsr P_ARG1(a) |
#define MAXPS(a, b) maxps P_ARG2(a, b) |
#define MAXSS(a, b) maxss P_ARG2(a, b) |
#define MINPS(a, b) minps P_ARG2(a, b) |
#define MINSS(a, b) minss P_ARG2(a, b) |
#define MOVAPS(a, b) movaps P_ARG2(a, b) |
#define MOVHLPS(a, b) movhlps P_ARG2(a, b) |
#define MOVHPS(a, b) movhps P_ARG2(a, b) |
#define MOVLHPS(a, b) movlhps P_ARG2(a, b) |
#define MOVLPS(a, b) movlps P_ARG2(a, b) |
#define MOVMSKPS(a, b) movmskps P_ARG2(a, b) |
#define MOVNTPS(a, b) movntps P_ARG2(a, b) |
#define MOVNTQ(a, b) movntq P_ARG2(a, b) |
#define MOVSS(a, b) movss P_ARG2(a, b) |
#define MOVUPS(a, b) movups P_ARG2(a, b) |
#define MULPS(a, b) mulps P_ARG2(a, b) |
#define MULSS(a, b) mulss P_ARG2(a, b) |
#define ORPS(a, b) orps P_ARG2(a, b) |
#define RCPPS(a, b) rcpps P_ARG2(a, b) |
#define RCPSS(a, b) rcpss P_ARG2(a, b) |
#define RSQRTPS(a, b) rsqrtps P_ARG2(a, b) |
#define RSQRTSS(a, b) rsqrtss P_ARG2(a, b) |
#define SHUFPS(a, b, c) shufps P_ARG3(a, b, c) |
#define SQRTPS(a, b) sqrtps P_ARG2(a, b) |
#define SQRTSS(a, b) sqrtss P_ARG2(a, b) |
#define STMXCSR(a) stmxcsr P_ARG1(a) |
#define SUBPS(a, b) subps P_ARG2(a, b) |
#define UCOMISS(a, b) ucomiss P_ARG2(a, b) |
#define UNPCKHPS(a, b) unpckhps P_ARG2(a, b) |
#define UNPCKLPS(a, b) unpcklps P_ARG2(a, b) |
#define XORPS(a, b) xorps P_ARG2(a, b) |
#define PREFETCHNTA(a) prefetchnta P_ARG1(a) |
#define PREFETCHT0(a) prefetcht0 P_ARG1(a) |
#define PREFETCHT1(a) prefetcht1 P_ARG1(a) |
#define PREFETCHT2(a) prefetcht2 P_ARG1(a) |
#define SFENCE sfence |
/* Added by BrianP for FreeBSD (per David Dawes) */ |
#if !defined(NASM_ASSEMBLER) && !defined(MASM_ASSEMBLER) && !defined(__bsdi__) |
#define LLBL(a) CONCAT(.L,a) |
#define LLBL2(a,b) CONCAT3(.L,a,b) |
#else |
#define LLBL(a) a |
#define LLBL2(a,b) CONCAT(a,b) |
#endif |
/* Segment overrides */ |
#define SEGCS D_BYTE 46 |
#define SEGDS D_BYTE 62 |
#define SEGES D_BYTE 38 |
#define SEGFS D_BYTE 100 |
#define SEGGS D_BYTE 101 |
/* Temporary labels: valid until next non-local label */ |
#ifdef NASM_ASSEMBLER |
#define TLBL(a) CONCAT(.,a) |
#else |
#define TLBL(a) CONCAT(a,$) |
#endif |
/* Hidden symbol visibility support. |
* If we build with gcc's -fvisibility=hidden flag, we'll need to change |
* the symbol visibility mode to 'default'. |
*/ |
#define HIDDEN(x) |
#endif /* __ASSYNTAX_H__ */ |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/clip_args.h |
---|
0,0 → 1,59 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/* |
* Clip test function interface for assembly code. Simply define |
* FRAME_OFFSET to the number of bytes pushed onto the stack before |
* using the ARG_* argument macros. |
* |
* Gareth Hughes |
*/ |
#ifndef __CLIP_ARGS_H__ |
#define __CLIP_ARGS_H__ |
/* |
* Offsets for clip_func arguments |
* |
* typedef GLvector4f *(*clip_func)( GLvector4f *clip_vec, |
* GLvector4f *proj_vec, |
* GLubyte clipMask[], |
* GLubyte *orMask, |
* GLubyte *andMask ); |
*/ |
#define OFFSET_SOURCE 4 |
#define OFFSET_DEST 8 |
#define OFFSET_CLIP 12 |
#define OFFSET_OR 16 |
#define OFFSET_AND 20 |
#define ARG_SOURCE REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP) |
#define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP) |
#define ARG_CLIP REGOFF(FRAME_OFFSET+OFFSET_CLIP, ESP) |
#define ARG_OR REGOFF(FRAME_OFFSET+OFFSET_OR, ESP) |
#define ARG_AND REGOFF(FRAME_OFFSET+OFFSET_AND, ESP) |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/common_x86.c |
---|
0,0 → 1,336 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2006 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/** |
* \file common_x86.c |
* |
* Check CPU capabilities & initialize optimized funtions for this particular |
* processor. |
* |
* Changed by Andre Werthmann for using the new SSE functions. |
* |
* \author Holger Waechtler <holger@akaflieg.extern.tu-berlin.de> |
* \author Andre Werthmann <wertmann@cs.uni-potsdam.de> |
*/ |
/* XXX these includes should probably go into imports.h or glheader.h */ |
#if defined(USE_SSE_ASM) && defined(__linux__) |
#include <linux/version.h> |
#endif |
#if defined(USE_SSE_ASM) && defined(__FreeBSD__) |
#include <sys/types.h> |
#include <sys/sysctl.h> |
#endif |
#if defined(USE_SSE_ASM) && defined(__OpenBSD__) |
#include <sys/param.h> |
#include <sys/sysctl.h> |
#include <machine/cpu.h> |
#endif |
#include "main/imports.h" |
#include "common_x86_asm.h" |
/** Bitmask of X86_FEATURE_x bits */ |
int _mesa_x86_cpu_features = 0x0; |
static int detection_debug = GL_FALSE; |
/* No reason for this to be public. |
*/ |
extern GLuint _ASMAPI _mesa_x86_has_cpuid(void); |
extern void _ASMAPI _mesa_x86_cpuid(GLuint op, GLuint *reg_eax, GLuint *reg_ebx, GLuint *reg_ecx, GLuint *reg_edx); |
extern GLuint _ASMAPI _mesa_x86_cpuid_eax(GLuint op); |
extern GLuint _ASMAPI _mesa_x86_cpuid_ebx(GLuint op); |
extern GLuint _ASMAPI _mesa_x86_cpuid_ecx(GLuint op); |
extern GLuint _ASMAPI _mesa_x86_cpuid_edx(GLuint op); |
#if defined(USE_SSE_ASM) |
/* |
* We must verify that the Streaming SIMD Extensions are truly supported |
* on this processor before we go ahead and hook out the optimized code. |
* |
* However, I have been told by Alan Cox that all 2.4 (and later) Linux |
* kernels provide full SSE support on all processors that expose SSE via |
* the CPUID mechanism. |
*/ |
/* These are assembly functions: */ |
extern void _mesa_test_os_sse_support( void ); |
extern void _mesa_test_os_sse_exception_support( void ); |
#if defined(_WIN32) |
#ifndef STATUS_FLOAT_MULTIPLE_TRAPS |
# define STATUS_FLOAT_MULTIPLE_TRAPS (0xC00002B5L) |
#endif |
static LONG WINAPI ExceptionFilter(LPEXCEPTION_POINTERS exp) |
{ |
PEXCEPTION_RECORD rec = exp->ExceptionRecord; |
PCONTEXT ctx = exp->ContextRecord; |
if ( rec->ExceptionCode == EXCEPTION_ILLEGAL_INSTRUCTION ) { |
_mesa_debug(NULL, "EXCEPTION_ILLEGAL_INSTRUCTION\n" ); |
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM); |
} else if ( rec->ExceptionCode == STATUS_FLOAT_MULTIPLE_TRAPS ) { |
_mesa_debug(NULL, "STATUS_FLOAT_MULTIPLE_TRAPS\n"); |
/* Windows seems to clear the exception flag itself, we just have to increment Eip */ |
} else { |
_mesa_debug(NULL, "UNEXPECTED EXCEPTION (0x%08x), terminating!\n" ); |
return EXCEPTION_EXECUTE_HANDLER; |
} |
if ( (ctx->ContextFlags & CONTEXT_CONTROL) != CONTEXT_CONTROL ) { |
_mesa_debug(NULL, "Context does not contain control registers, terminating!\n"); |
return EXCEPTION_EXECUTE_HANDLER; |
} |
ctx->Eip += 3; |
return EXCEPTION_CONTINUE_EXECUTION; |
} |
#endif /* _WIN32 */ |
/** |
* Check if SSE is supported. |
* If not, turn off the X86_FEATURE_XMM flag in _mesa_x86_cpu_features. |
*/ |
void _mesa_check_os_sse_support( void ) |
{ |
#if defined(__FreeBSD__) |
{ |
int ret, enabled; |
unsigned int len; |
len = sizeof(enabled); |
ret = sysctlbyname("hw.instruction_sse", &enabled, &len, NULL, 0); |
if (ret || !enabled) |
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM); |
} |
#elif defined (__NetBSD__) |
{ |
int ret, enabled; |
size_t len = sizeof(enabled); |
ret = sysctlbyname("machdep.sse", &enabled, &len, (void *)NULL, 0); |
if (ret || !enabled) |
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM); |
} |
#elif defined(__OpenBSD__) |
{ |
int mib[2]; |
int ret, enabled; |
size_t len = sizeof(enabled); |
mib[0] = CTL_MACHDEP; |
mib[1] = CPU_SSE; |
ret = sysctl(mib, 2, &enabled, &len, NULL, 0); |
if (ret || !enabled) |
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM); |
} |
#elif defined(_WIN32) |
LPTOP_LEVEL_EXCEPTION_FILTER oldFilter; |
/* Install our ExceptionFilter */ |
oldFilter = SetUnhandledExceptionFilter( ExceptionFilter ); |
if ( cpu_has_xmm ) { |
_mesa_debug(NULL, "Testing OS support for SSE...\n"); |
_mesa_test_os_sse_support(); |
if ( cpu_has_xmm ) { |
_mesa_debug(NULL, "Yes.\n"); |
} else { |
_mesa_debug(NULL, "No!\n"); |
} |
} |
if ( cpu_has_xmm ) { |
_mesa_debug(NULL, "Testing OS support for SSE unmasked exceptions...\n"); |
_mesa_test_os_sse_exception_support(); |
if ( cpu_has_xmm ) { |
_mesa_debug(NULL, "Yes.\n"); |
} else { |
_mesa_debug(NULL, "No!\n"); |
} |
} |
/* Restore previous exception filter */ |
SetUnhandledExceptionFilter( oldFilter ); |
if ( cpu_has_xmm ) { |
_mesa_debug(NULL, "Tests of OS support for SSE passed.\n"); |
} else { |
_mesa_debug(NULL, "Tests of OS support for SSE failed!\n"); |
} |
#else |
/* Do nothing on other platforms for now. |
*/ |
if (detection_debug) |
_mesa_debug(NULL, "Not testing OS support for SSE, leaving enabled.\n"); |
#endif /* __FreeBSD__ */ |
} |
#endif /* USE_SSE_ASM */ |
/** |
* Initialize the _mesa_x86_cpu_features bitfield. |
* This is a no-op if called more than once. |
*/ |
void |
_mesa_get_x86_features(void) |
{ |
static int called = 0; |
if (called) |
return; |
called = 1; |
#ifdef USE_X86_ASM |
_mesa_x86_cpu_features = 0x0; |
if (_mesa_getenv( "MESA_NO_ASM")) { |
return; |
} |
if (!_mesa_x86_has_cpuid()) { |
_mesa_debug(NULL, "CPUID not detected\n"); |
} |
else { |
GLuint cpu_features; |
GLuint cpu_ext_features; |
GLuint cpu_ext_info; |
char cpu_vendor[13]; |
GLuint result; |
/* get vendor name */ |
_mesa_x86_cpuid(0, &result, (GLuint *)(cpu_vendor + 0), (GLuint *)(cpu_vendor + 8), (GLuint *)(cpu_vendor + 4)); |
cpu_vendor[12] = '\0'; |
if (detection_debug) |
_mesa_debug(NULL, "CPU vendor: %s\n", cpu_vendor); |
/* get cpu features */ |
cpu_features = _mesa_x86_cpuid_edx(1); |
if (cpu_features & X86_CPU_FPU) |
_mesa_x86_cpu_features |= X86_FEATURE_FPU; |
if (cpu_features & X86_CPU_CMOV) |
_mesa_x86_cpu_features |= X86_FEATURE_CMOV; |
#ifdef USE_MMX_ASM |
if (cpu_features & X86_CPU_MMX) |
_mesa_x86_cpu_features |= X86_FEATURE_MMX; |
#endif |
#ifdef USE_SSE_ASM |
if (cpu_features & X86_CPU_XMM) |
_mesa_x86_cpu_features |= X86_FEATURE_XMM; |
if (cpu_features & X86_CPU_XMM2) |
_mesa_x86_cpu_features |= X86_FEATURE_XMM2; |
#endif |
/* query extended cpu features */ |
if ((cpu_ext_info = _mesa_x86_cpuid_eax(0x80000000)) > 0x80000000) { |
if (cpu_ext_info >= 0x80000001) { |
cpu_ext_features = _mesa_x86_cpuid_edx(0x80000001); |
if (cpu_features & X86_CPU_MMX) { |
#ifdef USE_3DNOW_ASM |
if (cpu_ext_features & X86_CPUEXT_3DNOW) |
_mesa_x86_cpu_features |= X86_FEATURE_3DNOW; |
if (cpu_ext_features & X86_CPUEXT_3DNOW_EXT) |
_mesa_x86_cpu_features |= X86_FEATURE_3DNOWEXT; |
#endif |
#ifdef USE_MMX_ASM |
if (cpu_ext_features & X86_CPUEXT_MMX_EXT) |
_mesa_x86_cpu_features |= X86_FEATURE_MMXEXT; |
#endif |
} |
} |
/* query cpu name */ |
if (cpu_ext_info >= 0x80000002) { |
GLuint ofs; |
char cpu_name[49]; |
for (ofs = 0; ofs < 3; ofs++) |
_mesa_x86_cpuid(0x80000002+ofs, (GLuint *)(cpu_name + (16*ofs)+0), (GLuint *)(cpu_name + (16*ofs)+4), (GLuint *)(cpu_name + (16*ofs)+8), (GLuint *)(cpu_name + (16*ofs)+12)); |
cpu_name[48] = '\0'; /* the name should be NULL terminated, but just to be sure */ |
if (detection_debug) |
_mesa_debug(NULL, "CPU name: %s\n", cpu_name); |
} |
} |
} |
#ifdef USE_MMX_ASM |
if ( cpu_has_mmx ) { |
if ( _mesa_getenv( "MESA_NO_MMX" ) == 0 ) { |
if (detection_debug) |
_mesa_debug(NULL, "MMX cpu detected.\n"); |
} else { |
_mesa_x86_cpu_features &= ~(X86_FEATURE_MMX); |
} |
} |
#endif |
#ifdef USE_3DNOW_ASM |
if ( cpu_has_3dnow ) { |
if ( _mesa_getenv( "MESA_NO_3DNOW" ) == 0 ) { |
if (detection_debug) |
_mesa_debug(NULL, "3DNow! cpu detected.\n"); |
} else { |
_mesa_x86_cpu_features &= ~(X86_FEATURE_3DNOW); |
} |
} |
#endif |
#ifdef USE_SSE_ASM |
if ( cpu_has_xmm ) { |
if ( _mesa_getenv( "MESA_NO_SSE" ) == 0 ) { |
if (detection_debug) |
_mesa_debug(NULL, "SSE cpu detected.\n"); |
if ( _mesa_getenv( "MESA_FORCE_SSE" ) == 0 ) { |
_mesa_check_os_sse_support(); |
} |
} else { |
_mesa_debug(NULL, "SSE cpu detected, but switched off by user.\n"); |
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM); |
} |
} |
#endif |
#endif /* USE_X86_ASM */ |
(void) detection_debug; |
} |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/common_x86_asm.S |
---|
0,0 → 1,220 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2004 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/* |
* Check extended CPU capabilities. Now justs returns the raw CPUID |
* feature information, allowing the higher level code to interpret the |
* results. |
* |
* Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de> |
* |
* Cleaned up and simplified by Gareth Hughes <gareth@valinux.com> |
* |
*/ |
/* |
* NOTE: Avoid using spaces in between '(' ')' and arguments, especially |
* with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces |
* in there will break the build on some platforms. |
*/ |
#include "matypes.h" |
#include "assyntax.h" |
#include "common_x86_features.h" |
SEG_TEXT |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_x86_has_cpuid) |
HIDDEN(_mesa_x86_has_cpuid) |
GLNAME(_mesa_x86_has_cpuid): |
/* Test for the CPUID command. If the ID Flag bit in EFLAGS |
* (bit 21) is writable, the CPUID command is present */ |
PUSHF_L |
POP_L (EAX) |
MOV_L (EAX, ECX) |
XOR_L (CONST(0x00200000), EAX) |
PUSH_L (EAX) |
POPF_L |
PUSHF_L |
POP_L (EAX) |
/* Verify the ID Flag bit has been written. */ |
CMP_L (ECX, EAX) |
SETNE (AL) |
XOR_L (CONST(0xff), EAX) |
RET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_x86_cpuid) |
HIDDEN(_mesa_x86_cpuid) |
GLNAME(_mesa_x86_cpuid): |
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */ |
PUSH_L (EDI) |
PUSH_L (EBX) |
CPUID |
MOV_L (REGOFF(16, ESP), EDI) /* *eax */ |
MOV_L (EAX, REGIND(EDI)) |
MOV_L (REGOFF(20, ESP), EDI) /* *ebx */ |
MOV_L (EBX, REGIND(EDI)) |
MOV_L (REGOFF(24, ESP), EDI) /* *ecx */ |
MOV_L (ECX, REGIND(EDI)) |
MOV_L (REGOFF(28, ESP), EDI) /* *edx */ |
MOV_L (EDX, REGIND(EDI)) |
POP_L (EBX) |
POP_L (EDI) |
RET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_x86_cpuid_eax) |
HIDDEN(_mesa_x86_cpuid_eax) |
GLNAME(_mesa_x86_cpuid_eax): |
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */ |
PUSH_L (EBX) |
CPUID |
POP_L (EBX) |
RET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_x86_cpuid_ebx) |
HIDDEN(_mesa_x86_cpuid_ebx) |
GLNAME(_mesa_x86_cpuid_ebx): |
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */ |
PUSH_L (EBX) |
CPUID |
MOV_L (EBX, EAX) /* return EBX */ |
POP_L (EBX) |
RET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_x86_cpuid_ecx) |
HIDDEN(_mesa_x86_cpuid_ecx) |
GLNAME(_mesa_x86_cpuid_ecx): |
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */ |
PUSH_L (EBX) |
CPUID |
MOV_L (ECX, EAX) /* return ECX */ |
POP_L (EBX) |
RET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_x86_cpuid_edx) |
HIDDEN(_mesa_x86_cpuid_edx) |
GLNAME(_mesa_x86_cpuid_edx): |
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */ |
PUSH_L (EBX) |
CPUID |
MOV_L (EDX, EAX) /* return EDX */ |
POP_L (EBX) |
RET |
#ifdef USE_SSE_ASM |
/* Execute an SSE instruction to see if the operating system correctly |
* supports SSE. A signal handler for SIGILL should have been set |
* before calling this function, otherwise this could kill the client |
* application. |
* |
* -----> !!!! ATTENTION DEVELOPERS !!!! <----- |
* |
* If you're debugging with gdb and you get stopped in this function, |
* just type 'continue'! Execution will proceed normally. |
* See freedesktop.org bug #1709 for more info. |
*/ |
ALIGNTEXT4 |
GLOBL GLNAME( _mesa_test_os_sse_support ) |
HIDDEN(_mesa_test_os_sse_support) |
GLNAME( _mesa_test_os_sse_support ): |
XORPS ( XMM0, XMM0 ) |
RET |
/* Perform an SSE divide-by-zero to see if the operating system |
* correctly supports unmasked SIMD FPU exceptions. Signal handlers for |
* SIGILL and SIGFPE should have been set before calling this function, |
* otherwise this could kill the client application. |
*/ |
ALIGNTEXT4 |
GLOBL GLNAME( _mesa_test_os_sse_exception_support ) |
HIDDEN(_mesa_test_os_sse_exception_support) |
GLNAME( _mesa_test_os_sse_exception_support ): |
PUSH_L ( EBP ) |
MOV_L ( ESP, EBP ) |
SUB_L ( CONST( 8 ), ESP ) |
/* Save the original MXCSR register value. |
*/ |
STMXCSR ( REGOFF( -4, EBP ) ) |
/* Unmask the divide-by-zero exception and perform one. |
*/ |
STMXCSR ( REGOFF( -8, EBP ) ) |
AND_L ( CONST( 0xfffffdff ), REGOFF( -8, EBP ) ) |
LDMXCSR ( REGOFF( -8, EBP ) ) |
XORPS ( XMM0, XMM0 ) |
PUSH_L ( CONST( 0x3f800000 ) ) |
PUSH_L ( CONST( 0x3f800000 ) ) |
PUSH_L ( CONST( 0x3f800000 ) ) |
PUSH_L ( CONST( 0x3f800000 ) ) |
MOVUPS ( REGIND( ESP ), XMM1 ) |
DIVPS ( XMM0, XMM1 ) |
/* Restore the original MXCSR register value. |
*/ |
LDMXCSR ( REGOFF( -4, EBP ) ) |
LEAVE |
RET |
#endif |
#if defined (__ELF__) && defined (__linux__) |
.section .note.GNU-stack,"",%progbits |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/common_x86_asm.h |
---|
0,0 → 1,53 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/* |
* Check CPU capabilities & initialize optimized funtions for this particular |
* processor. |
* |
* Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de> |
* Changed by Andre Werthmann <wertmann@cs.uni-potsdam.de> for using the |
* new SSE functions |
* |
* Reimplemented by Gareth Hughes in a more |
* future-proof manner, based on code in the Linux kernel. |
*/ |
#ifndef __COMMON_X86_ASM_H__ |
#define __COMMON_X86_ASM_H__ |
/* Do not reference mtypes.h from this file. |
*/ |
#include "common_x86_features.h" |
extern int _mesa_x86_cpu_features; |
extern void _mesa_get_x86_features(void); |
extern void _mesa_check_os_sse_support(void); |
extern void _mesa_init_all_x86_transform_asm( void ); |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/common_x86_features.h |
---|
0,0 → 1,67 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/* |
* x86 CPUID feature information. The raw data is returned by |
* _mesa_identify_x86_cpu_features() and interpreted with the cpu_has_* |
* helper macros. |
* |
* Gareth Hughes |
*/ |
#ifndef __COMMON_X86_FEATURES_H__ |
#define __COMMON_X86_FEATURES_H__ |
#define X86_FEATURE_FPU (1<<0) |
#define X86_FEATURE_CMOV (1<<1) |
#define X86_FEATURE_MMXEXT (1<<2) |
#define X86_FEATURE_MMX (1<<3) |
#define X86_FEATURE_FXSR (1<<4) |
#define X86_FEATURE_XMM (1<<5) |
#define X86_FEATURE_XMM2 (1<<6) |
#define X86_FEATURE_3DNOWEXT (1<<7) |
#define X86_FEATURE_3DNOW (1<<8) |
/* standard X86 CPU features */ |
#define X86_CPU_FPU (1<<0) |
#define X86_CPU_CMOV (1<<15) |
#define X86_CPU_MMX (1<<23) |
#define X86_CPU_XMM (1<<25) |
#define X86_CPU_XMM2 (1<<26) |
/* extended X86 CPU features */ |
#define X86_CPUEXT_MMX_EXT (1<<22) |
#define X86_CPUEXT_3DNOW_EXT (1<<30) |
#define X86_CPUEXT_3DNOW (1<<31) |
#define cpu_has_mmx (_mesa_x86_cpu_features & X86_FEATURE_MMX) |
#define cpu_has_mmxext (_mesa_x86_cpu_features & X86_FEATURE_MMXEXT) |
#define cpu_has_xmm (_mesa_x86_cpu_features & X86_FEATURE_XMM) |
#define cpu_has_xmm2 (_mesa_x86_cpu_features & X86_FEATURE_XMM2) |
#define cpu_has_3dnow (_mesa_x86_cpu_features & X86_FEATURE_3DNOW) |
#define cpu_has_3dnowext (_mesa_x86_cpu_features & X86_FEATURE_3DNOWEXT) |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/gen_matypes.c |
---|
0,0 → 1,263 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2006 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Gareth Hughes |
*/ |
/* |
* This generates an asm version of mtypes.h (called matypes.h), so that |
* Mesa's x86 assembly code can access the internal structures easily. |
* This will be particularly useful when developing new x86 asm code for |
* Mesa, including lighting, clipping, texture image conversion etc. |
*/ |
#ifndef __STDC_FORMAT_MACROS |
#define __STDC_FORMAT_MACROS |
#endif |
#include <inttypes.h> |
#include "main/glheader.h" |
#include "main/mtypes.h" |
#include "tnl/t_context.h" |
#undef offsetof |
#define offsetof( type, member ) ((size_t) &((type *)0)->member) |
#define OFFSET_HEADER( x ) \ |
do { \ |
printf( "\n" ); \ |
printf( "\n" ); \ |
printf( "/* =====================================================" \ |
"========\n" ); \ |
printf( " * Offsets for " x "\n" ); \ |
printf( " */\n" ); \ |
printf( "\n" ); \ |
} while (0) |
#define DEFINE_HEADER( x ) \ |
do { \ |
printf( "\n" ); \ |
printf( "/*\n" ); \ |
printf( " * Flags for " x "\n" ); \ |
printf( " */\n" ); \ |
printf( "\n" ); \ |
} while (0) |
#ifdef ASM_OFFSETS |
/* |
* Format the asm output in a special way that we can manipulate |
* after the fact and turn into the final header for the target. |
*/ |
#define DEFINE_UL( s, ul ) \ |
__asm__ __volatile__ ( "\n->" s " %0" : : "i" (ul) ) |
#define DEFINE( s, d ) \ |
DEFINE_UL( s, d ) |
#define printf( x ) \ |
__asm__ __volatile__ ( "\n->" x ) |
#else |
#define DEFINE_UL( s, ul ) \ |
printf( "#define %s\t%lu\n", s, (unsigned long) (ul) ); |
#define DEFINE( s, d ) \ |
printf( "#define %s\t0x%" PRIx64 "\n", s, (uint64_t) d ); |
#endif |
#define OFFSET( s, t, m ) \ |
DEFINE_UL( s, offsetof( t, m ) ) |
#define SIZEOF( s, t ) \ |
DEFINE_UL( s, sizeof(t) ) |
int main( int argc, char **argv ) |
{ |
printf( "/*\n" ); |
printf( " * This file is automatically generated from the Mesa internal type\n" ); |
printf( " * definitions. Do not edit directly.\n" ); |
printf( " */\n" ); |
printf( "\n" ); |
printf( "#ifndef __ASM_TYPES_H__\n" ); |
printf( "#define __ASM_TYPES_H__\n" ); |
printf( "\n" ); |
/* struct gl_context offsets: |
*/ |
OFFSET_HEADER( "struct gl_context" ); |
printf( "\n" ); |
OFFSET( "CTX_LIGHT_ENABLED ", struct gl_context, Light.Enabled ); |
OFFSET( "CTX_LIGHT_SHADE_MODEL ", struct gl_context, Light.ShadeModel ); |
OFFSET( "CTX_LIGHT_COLOR_MAT_FACE ", struct gl_context, Light.ColorMaterialFace ); |
OFFSET( "CTX_LIGHT_COLOR_MAT_MODE ", struct gl_context, Light.ColorMaterialMode ); |
OFFSET( "CTX_LIGHT_COLOR_MAT_MASK ", struct gl_context, Light._ColorMaterialBitmask ); |
OFFSET( "CTX_LIGHT_COLOR_MAT_ENABLED ", struct gl_context, Light.ColorMaterialEnabled ); |
OFFSET( "CTX_LIGHT_ENABLED_LIST ", struct gl_context, Light.EnabledList ); |
OFFSET( "CTX_LIGHT_NEED_VERTS ", struct gl_context, Light._NeedVertices ); |
OFFSET( "CTX_LIGHT_BASE_COLOR ", struct gl_context, Light._BaseColor ); |
/* struct vertex_buffer offsets: |
*/ |
OFFSET_HEADER( "struct vertex_buffer" ); |
OFFSET( "VB_SIZE ", struct vertex_buffer, Size ); |
OFFSET( "VB_COUNT ", struct vertex_buffer, Count ); |
printf( "\n" ); |
OFFSET( "VB_ELTS ", struct vertex_buffer, Elts ); |
OFFSET( "VB_OBJ_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_POS] ); |
OFFSET( "VB_EYE_PTR ", struct vertex_buffer, EyePtr ); |
OFFSET( "VB_CLIP_PTR ", struct vertex_buffer, ClipPtr ); |
OFFSET( "VB_PROJ_CLIP_PTR ", struct vertex_buffer, NdcPtr ); |
OFFSET( "VB_CLIP_OR_MASK ", struct vertex_buffer, ClipOrMask ); |
OFFSET( "VB_CLIP_MASK ", struct vertex_buffer, ClipMask ); |
OFFSET( "VB_NORMAL_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_NORMAL] ); |
OFFSET( "VB_EDGE_FLAG ", struct vertex_buffer, EdgeFlag ); |
OFFSET( "VB_TEX0_COORD_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_TEX0] ); |
OFFSET( "VB_TEX1_COORD_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_TEX1] ); |
OFFSET( "VB_TEX2_COORD_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_TEX2] ); |
OFFSET( "VB_TEX3_COORD_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_TEX3] ); |
OFFSET( "VB_INDEX_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_COLOR_INDEX] ); |
OFFSET( "VB_COLOR_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_COLOR0] ); |
OFFSET( "VB_SECONDARY_COLOR_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_COLOR1] ); |
OFFSET( "VB_FOG_COORD_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_FOG] ); |
OFFSET( "VB_PRIMITIVE ", struct vertex_buffer, Primitive ); |
printf( "\n" ); |
DEFINE_HEADER( "struct vertex_buffer" ); |
/* XXX use new labels here someday after vertex proram is done */ |
DEFINE( "VERT_BIT_OBJ ", VERT_BIT_POS ); |
DEFINE( "VERT_BIT_NORM ", VERT_BIT_NORMAL ); |
DEFINE( "VERT_BIT_RGBA ", VERT_BIT_COLOR0 ); |
DEFINE( "VERT_BIT_SPEC_RGB ", VERT_BIT_COLOR1 ); |
DEFINE( "VERT_BIT_FOG_COORD ", VERT_BIT_FOG ); |
DEFINE( "VERT_BIT_TEX0 ", VERT_BIT_TEX0 ); |
DEFINE( "VERT_BIT_TEX1 ", VERT_BIT_TEX1 ); |
DEFINE( "VERT_BIT_TEX2 ", VERT_BIT_TEX2 ); |
DEFINE( "VERT_BIT_TEX3 ", VERT_BIT_TEX3 ); |
/* GLvector4f offsets: |
*/ |
OFFSET_HEADER( "GLvector4f" ); |
OFFSET( "V4F_DATA ", GLvector4f, data ); |
OFFSET( "V4F_START ", GLvector4f, start ); |
OFFSET( "V4F_COUNT ", GLvector4f, count ); |
OFFSET( "V4F_STRIDE ", GLvector4f, stride ); |
OFFSET( "V4F_SIZE ", GLvector4f, size ); |
OFFSET( "V4F_FLAGS ", GLvector4f, flags ); |
DEFINE_HEADER( "GLvector4f" ); |
DEFINE( "VEC_MALLOC ", VEC_MALLOC ); |
DEFINE( "VEC_NOT_WRITEABLE ", VEC_NOT_WRITEABLE ); |
DEFINE( "VEC_BAD_STRIDE ", VEC_BAD_STRIDE ); |
printf( "\n" ); |
DEFINE( "VEC_SIZE_1 ", VEC_SIZE_1 ); |
DEFINE( "VEC_SIZE_2 ", VEC_SIZE_2 ); |
DEFINE( "VEC_SIZE_3 ", VEC_SIZE_3 ); |
DEFINE( "VEC_SIZE_4 ", VEC_SIZE_4 ); |
/* GLmatrix offsets: |
*/ |
OFFSET_HEADER( "GLmatrix" ); |
OFFSET( "MATRIX_DATA ", GLmatrix, m ); |
OFFSET( "MATRIX_INV ", GLmatrix, inv ); |
OFFSET( "MATRIX_FLAGS ", GLmatrix, flags ); |
OFFSET( "MATRIX_TYPE ", GLmatrix, type ); |
/* struct gl_light offsets: |
*/ |
OFFSET_HEADER( "struct gl_light" ); |
OFFSET( "LIGHT_NEXT ", struct gl_light, next ); |
OFFSET( "LIGHT_PREV ", struct gl_light, prev ); |
printf( "\n" ); |
OFFSET( "LIGHT_AMBIENT ", struct gl_light, Ambient ); |
OFFSET( "LIGHT_DIFFUSE ", struct gl_light, Diffuse ); |
OFFSET( "LIGHT_SPECULAR ", struct gl_light, Specular ); |
OFFSET( "LIGHT_EYE_POSITION ", struct gl_light, EyePosition ); |
OFFSET( "LIGHT_SPOT_DIRECTION ", struct gl_light, SpotDirection ); |
OFFSET( "LIGHT_SPOT_EXPONENT ", struct gl_light, SpotExponent ); |
OFFSET( "LIGHT_SPOT_CUTOFF ", struct gl_light, SpotCutoff ); |
OFFSET( "LIGHT_COS_CUTOFF ", struct gl_light, _CosCutoff ); |
OFFSET( "LIGHT_CONST_ATTEN ", struct gl_light, ConstantAttenuation ); |
OFFSET( "LIGHT_LINEAR_ATTEN ", struct gl_light, LinearAttenuation ); |
OFFSET( "LIGHT_QUADRATIC_ATTEN ", struct gl_light, QuadraticAttenuation ); |
OFFSET( "LIGHT_ENABLED ", struct gl_light, Enabled ); |
printf( "\n" ); |
OFFSET( "LIGHT_FLAGS ", struct gl_light, _Flags ); |
printf( "\n" ); |
OFFSET( "LIGHT_POSITION ", struct gl_light, _Position ); |
OFFSET( "LIGHT_VP_INF_NORM ", struct gl_light, _VP_inf_norm ); |
OFFSET( "LIGHT_H_INF_NORM ", struct gl_light, _h_inf_norm ); |
OFFSET( "LIGHT_NORM_DIRECTION ", struct gl_light, _NormSpotDirection ); |
OFFSET( "LIGHT_VP_INF_SPOT_ATTEN ", struct gl_light, _VP_inf_spot_attenuation ); |
printf( "\n" ); |
OFFSET( "LIGHT_MAT_AMBIENT ", struct gl_light, _MatAmbient ); |
OFFSET( "LIGHT_MAT_DIFFUSE ", struct gl_light, _MatDiffuse ); |
OFFSET( "LIGHT_MAT_SPECULAR ", struct gl_light, _MatSpecular ); |
printf( "\n" ); |
SIZEOF( "SIZEOF_GL_LIGHT ", struct gl_light ); |
DEFINE_HEADER( "struct gl_light" ); |
DEFINE( "LIGHT_SPOT ", LIGHT_SPOT ); |
DEFINE( "LIGHT_LOCAL_VIEWER ", LIGHT_LOCAL_VIEWER ); |
DEFINE( "LIGHT_POSITIONAL ", LIGHT_POSITIONAL ); |
printf( "\n" ); |
DEFINE( "LIGHT_NEED_VERTICES ", LIGHT_NEED_VERTICES ); |
/* struct gl_lightmodel offsets: |
*/ |
OFFSET_HEADER( "struct gl_lightmodel" ); |
OFFSET( "LIGHT_MODEL_AMBIENT ", struct gl_lightmodel, Ambient ); |
OFFSET( "LIGHT_MODEL_LOCAL_VIEWER ", struct gl_lightmodel, LocalViewer ); |
OFFSET( "LIGHT_MODEL_TWO_SIDE ", struct gl_lightmodel, TwoSide ); |
OFFSET( "LIGHT_MODEL_COLOR_CONTROL ", struct gl_lightmodel, ColorControl ); |
printf( "\n" ); |
printf( "\n" ); |
printf( "#endif /* __ASM_TYPES_H__ */\n" ); |
return 0; |
} |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/matypes.h |
---|
0,0 → 1,159 |
/* |
* This file is automatically generated from the Mesa internal type |
* definitions. Do not edit directly. |
*/ |
#ifndef __ASM_TYPES_H__ |
#define __ASM_TYPES_H__ |
/* ============================================================= |
* Offsets for struct gl_context |
*/ |
#define CTX_LIGHT_ENABLED 7116 |
#define CTX_LIGHT_SHADE_MODEL 7120 |
#define CTX_LIGHT_COLOR_MAT_FACE 7128 |
#define CTX_LIGHT_COLOR_MAT_MODE 7132 |
#define CTX_LIGHT_COLOR_MAT_MASK 7136 |
#define CTX_LIGHT_COLOR_MAT_ENABLED 7140 |
#define CTX_LIGHT_ENABLED_LIST 7152 |
#define CTX_LIGHT_NEED_VERTS 7405 |
#define CTX_LIGHT_BASE_COLOR 7408 |
/* ============================================================= |
* Offsets for struct vertex_buffer |
*/ |
#define VB_SIZE 0 |
#define VB_COUNT 4 |
#define VB_ELTS 8 |
#define VB_OBJ_PTR 60 |
#define VB_EYE_PTR 12 |
#define VB_CLIP_PTR 16 |
#define VB_PROJ_CLIP_PTR 20 |
#define VB_CLIP_OR_MASK 24 |
#define VB_CLIP_MASK 28 |
#define VB_NORMAL_PTR 68 |
#define VB_EDGE_FLAG 36 |
#define VB_TEX0_COORD_PTR 92 |
#define VB_TEX1_COORD_PTR 96 |
#define VB_TEX2_COORD_PTR 100 |
#define VB_TEX3_COORD_PTR 104 |
#define VB_INDEX_PTR 84 |
#define VB_COLOR_PTR 72 |
#define VB_SECONDARY_COLOR_PTR 76 |
#define VB_FOG_COORD_PTR 80 |
#define VB_PRIMITIVE 52 |
/* |
* Flags for struct vertex_buffer |
*/ |
#define VERT_BIT_OBJ 0x1 |
#define VERT_BIT_NORM 0x4 |
#define VERT_BIT_RGBA 0x8 |
#define VERT_BIT_SPEC_RGB 0x10 |
#define VERT_BIT_FOG_COORD 0x20 |
#define VERT_BIT_TEX0 0x100 |
#define VERT_BIT_TEX1 0x200 |
#define VERT_BIT_TEX2 0x400 |
#define VERT_BIT_TEX3 0x800 |
/* ============================================================= |
* Offsets for GLvector4f |
*/ |
#define V4F_DATA 0 |
#define V4F_START 4 |
#define V4F_COUNT 8 |
#define V4F_STRIDE 12 |
#define V4F_SIZE 16 |
#define V4F_FLAGS 20 |
/* |
* Flags for GLvector4f |
*/ |
#define VEC_MALLOC 0x10 |
#define VEC_NOT_WRITEABLE 0x40 |
#define VEC_BAD_STRIDE 0x100 |
#define VEC_SIZE_1 0x1 |
#define VEC_SIZE_2 0x3 |
#define VEC_SIZE_3 0x7 |
#define VEC_SIZE_4 0xf |
/* ============================================================= |
* Offsets for GLmatrix |
*/ |
#define MATRIX_DATA 0 |
#define MATRIX_INV 4 |
#define MATRIX_FLAGS 8 |
#define MATRIX_TYPE 12 |
/* ============================================================= |
* Offsets for struct gl_light |
*/ |
#define LIGHT_NEXT 0 |
#define LIGHT_PREV 4 |
#define LIGHT_AMBIENT 8 |
#define LIGHT_DIFFUSE 24 |
#define LIGHT_SPECULAR 40 |
#define LIGHT_EYE_POSITION 56 |
#define LIGHT_SPOT_DIRECTION 72 |
#define LIGHT_SPOT_EXPONENT 88 |
#define LIGHT_SPOT_CUTOFF 92 |
#define LIGHT_COS_CUTOFF 96 |
#define LIGHT_CONST_ATTEN 100 |
#define LIGHT_LINEAR_ATTEN 104 |
#define LIGHT_QUADRATIC_ATTEN 108 |
#define LIGHT_ENABLED 112 |
#define LIGHT_FLAGS 116 |
#define LIGHT_POSITION 120 |
#define LIGHT_VP_INF_NORM 136 |
#define LIGHT_H_INF_NORM 148 |
#define LIGHT_NORM_DIRECTION 160 |
#define LIGHT_VP_INF_SPOT_ATTEN 176 |
#define LIGHT_MAT_AMBIENT 180 |
#define LIGHT_MAT_DIFFUSE 204 |
#define LIGHT_MAT_SPECULAR 228 |
#define SIZEOF_GL_LIGHT 252 |
/* |
* Flags for struct gl_light |
*/ |
#define LIGHT_SPOT 0x1 |
#define LIGHT_LOCAL_VIEWER 0x2 |
#define LIGHT_POSITIONAL 0x4 |
#define LIGHT_NEED_VERTICES 0x6 |
/* ============================================================= |
* Offsets for struct gl_lightmodel |
*/ |
#define LIGHT_MODEL_AMBIENT 0 |
#define LIGHT_MODEL_LOCAL_VIEWER 16 |
#define LIGHT_MODEL_TWO_SIDE 17 |
#define LIGHT_MODEL_COLOR_CONTROL 20 |
#endif /* __ASM_TYPES_H__ */ |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/mmx.h |
---|
0,0 → 1,59 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2006 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#ifndef ASM_MMX_H |
#define ASM_MMX_H |
#include "main/compiler.h" |
#include "main/glheader.h" |
struct gl_context; |
extern void _ASMAPI |
_mesa_mmx_blend_transparency( struct gl_context *ctx, GLuint n, const GLubyte mask[], |
GLvoid *rgba, const GLvoid *dest, |
GLenum chanType ); |
extern void _ASMAPI |
_mesa_mmx_blend_add( struct gl_context *ctx, GLuint n, const GLubyte mask[], |
GLvoid *rgba, const GLvoid *dest, |
GLenum chanType ); |
extern void _ASMAPI |
_mesa_mmx_blend_min( struct gl_context *ctx, GLuint n, const GLubyte mask[], |
GLvoid *rgba, const GLvoid *dest, |
GLenum chanType ); |
extern void _ASMAPI |
_mesa_mmx_blend_max( struct gl_context *ctx, GLuint n, const GLubyte mask[], |
GLvoid *rgba, const GLvoid *dest, |
GLenum chanType ); |
extern void _ASMAPI |
_mesa_mmx_blend_modulate( struct gl_context *ctx, GLuint n, const GLubyte mask[], |
GLvoid *rgba, const GLvoid *dest, |
GLenum chanType ); |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/mmx_blend.S |
---|
0,0 → 1,402 |
; |
/* |
* Written by Jos� Fonseca <j_r_fonseca@yahoo.co.uk> |
*/ |
#ifdef USE_MMX_ASM |
#include "assyntax.h" |
#include "matypes.h" |
/* integer multiplication - alpha plus one |
* |
* makes the following approximation to the division (Sree) |
* |
* rgb*a/255 ~= (rgb*(a+1)) >> 256 |
* |
* which is the fastest method that satisfies the following OpenGL criteria |
* |
* 0*0 = 0 and 255*255 = 255 |
* |
* note that MX1 is a register with 0xffffffffffffffff constant which can be easily obtained making |
* |
* PCMPEQW ( MX1, MX1 ) |
*/ |
#define GMB_MULT_AP1( MP1, MA1, MP2, MA2, MX1 ) \ |
PSUBW ( MX1, MA1 ) /* a1 + 1 | a1 + 1 | a1 + 1 | a1 + 1 */ ;\ |
PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\ |
;\ |
TWO(PSUBW ( MX1, MA2 )) /* a2 + 1 | a2 + 1 | a2 + 1 | a2 + 1 */ ;\ |
TWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\ |
;\ |
PSRLW ( CONST(8), MA1 ) /* t1 >> 8 ~= t1/255 */ ;\ |
TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 ~= t2/255 */ |
/* integer multiplication - geometric series |
* |
* takes the geometric series approximation to the division |
* |
* t/255 = (t >> 8) + (t >> 16) + (t >> 24) .. |
* |
* in this case just the first two terms to fit in 16bit arithmetic |
* |
* t/255 ~= (t + (t >> 8)) >> 8 |
* |
* note that just by itself it doesn't satisfies the OpenGL criteria, as 255*255 = 254, |
* so the special case a = 255 must be accounted or roundoff must be used |
*/ |
#define GMB_MULT_GS( MP1, MA1, MP2, MA2 ) \ |
PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\ |
TWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\ |
;\ |
MOVQ ( MA1, MP1 ) ;\ |
PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ |
;\ |
TWO(MOVQ ( MA2, MP2 )) ;\ |
TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ |
;\ |
PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ |
PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ |
;\ |
TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ |
TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ |
/* integer multiplication - geometric series plus rounding |
* |
* when using a geometric series division instead of truncating the result |
* use roundoff in the approximation (Jim Blinn) |
* |
* t = rgb*a + 0x80 |
* |
* achieving the exact results |
* |
* note that M80 is register with the 0x0080008000800080 constant |
*/ |
#define GMB_MULT_GSR( MP1, MA1, MP2, MA2, M80 ) \ |
PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\ |
PADDW ( M80, MA1 ) /* t1 += 0x80 */ ;\ |
;\ |
TWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\ |
TWO(PADDW ( M80, MA2 )) /* t2 += 0x80 */ ;\ |
;\ |
MOVQ ( MA1, MP1 ) ;\ |
PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ |
;\ |
TWO(MOVQ ( MA2, MP2 )) ;\ |
TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ |
;\ |
PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ |
PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ |
;\ |
TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ |
TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ |
/* linear interpolation - geometric series |
*/ |
#define GMB_LERP_GS( MP1, MQ1, MA1, MP2, MQ2, MA2) \ |
PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\ |
PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\ |
PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\ |
;\ |
TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ |
TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ |
TWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\ |
;\ |
MOVQ ( MA1, MP1 ) ;\ |
PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ |
;\ |
TWO(MOVQ ( MA2, MP2 )) ;\ |
TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ |
;\ |
PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ |
TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ |
;\ |
PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\ |
TWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\ |
;\ |
PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ |
TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ |
/* linear interpolation - geometric series with roundoff |
* |
* this is a generalization of Blinn's formula to signed arithmetic |
* |
* note that M80 is a register with the 0x0080008000800080 constant |
*/ |
#define GMB_LERP_GSR( MP1, MQ1, MA1, MP2, MQ2, MA2, M80) \ |
PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\ |
PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\ |
PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\ |
;\ |
TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ |
TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ |
TWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\ |
;\ |
PSRLW ( CONST(15), MP1 ) /* q1 > p1 ? 1 : 0 */ ;\ |
TWO(PSRLW ( CONST(15), MP2 )) /* q2 > q2 ? 1 : 0 */ ;\ |
;\ |
PSLLW ( CONST(8), MP1 ) /* q1 > p1 ? 0x100 : 0 */ ;\ |
TWO(PSLLW ( CONST(8), MP2 )) /* q2 > q2 ? 0x100 : 0 */ ;\ |
;\ |
PSUBW ( MP1, MA1 ) /* t1 -=? 0x100 */ ;\ |
TWO(PSUBW ( MP2, MA2 )) /* t2 -=? 0x100 */ ;\ |
;\ |
PADDW ( M80, MA1 ) /* t1 += 0x80 */ ;\ |
TWO(PADDW ( M80, MA2 )) /* t2 += 0x80 */ ;\ |
;\ |
MOVQ ( MA1, MP1 ) ;\ |
PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ |
;\ |
TWO(MOVQ ( MA2, MP2 )) ;\ |
TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ |
;\ |
PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ |
TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ |
;\ |
PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\ |
TWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\ |
;\ |
PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ |
TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ |
/* linear interpolation - geometric series with correction |
* |
* instead of the roundoff this adds a small correction to satisfy the OpenGL criteria |
* |
* t/255 ~= (t + (t >> 8) + (t >> 15)) >> 8 |
* |
* note that although is faster than rounding off it doesn't give always the exact results |
*/ |
#define GMB_LERP_GSC( MP1, MQ1, MA1, MP2, MQ2, MA2) \ |
PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\ |
PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\ |
PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\ |
;\ |
TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ |
TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ |
TWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\ |
;\ |
MOVQ ( MA1, MP1 ) ;\ |
PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ |
;\ |
TWO(MOVQ ( MA2, MP2 )) ;\ |
TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ |
;\ |
PADDW ( MA1, MP1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ |
PSRLW ( CONST(7), MA1 ) /* t1 >> 15 */ ;\ |
;\ |
TWO(PADDW ( MA2, MP2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ |
TWO(PSRLW ( CONST(7), MA2 )) /* t2 >> 15 */ ;\ |
;\ |
PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) + (t1 >>15) ~= (t1/255) << 8 */ ;\ |
TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) + (t2 >>15) ~= (t2/255) << 8 */ ;\ |
;\ |
PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\ |
TWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\ |
;\ |
PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ |
TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ |
/* common blending setup code |
* |
* note that M00 is a register with 0x0000000000000000 constant which can be easily obtained making |
* |
* PXOR ( M00, M00 ) |
*/ |
#define GMB_LOAD(rgba, dest, MPP, MQQ) \ |
ONE(MOVD ( REGIND(rgba), MPP )) /* | | | | qa1 | qb1 | qg1 | qr1 */ ;\ |
ONE(MOVD ( REGIND(dest), MQQ )) /* | | | | pa1 | pb1 | pg1 | pr1 */ ;\ |
;\ |
TWO(MOVQ ( REGIND(rgba), MPP )) /* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */ ;\ |
TWO(MOVQ ( REGIND(dest), MQQ )) /* pa2 | pb2 | pg2 | pr2 | pa1 | pb1 | pg1 | pr1 */ |
#define GMB_UNPACK(MP1, MQ1, MP2, MQ2, M00) \ |
TWO(MOVQ ( MP1, MP2 )) ;\ |
TWO(MOVQ ( MQ1, MQ2 )) ;\ |
;\ |
PUNPCKLBW ( M00, MQ1 ) /* qa1 | qb1 | qg1 | qr1 */ ;\ |
TWO(PUNPCKHBW ( M00, MQ2 )) /* qa2 | qb2 | qg2 | qr2 */ ;\ |
PUNPCKLBW ( M00, MP1 ) /* pa1 | pb1 | pg1 | pr1 */ ;\ |
TWO(PUNPCKHBW ( M00, MP2 )) /* pa2 | pb2 | pg2 | pr2 */ |
#define GMB_ALPHA(MP1, MA1, MP2, MA2) \ |
MOVQ ( MP1, MA1 ) ;\ |
TWO(MOVQ ( MP2, MA2 )) ;\ |
;\ |
PUNPCKHWD ( MA1, MA1 ) /* pa1 | pa1 | | */ ;\ |
TWO(PUNPCKHWD ( MA2, MA2 )) /* pa2 | pa2 | | */ ;\ |
PUNPCKHDQ ( MA1, MA1 ) /* pa1 | pa1 | pa1 | pa1 */ ;\ |
TWO(PUNPCKHDQ ( MA2, MA2 )) /* pa2 | pa2 | pa2 | pa2 */ |
#define GMB_PACK( MS1, MS2 ) \ |
PACKUSWB ( MS2, MS1 ) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ ;\ |
#define GMB_STORE(rgba, MSS ) \ |
ONE(MOVD ( MSS, REGIND(rgba) )) /* | | | | sa1 | sb1 | sg1 | sr1 */ ;\ |
TWO(MOVQ ( MSS, REGIND(rgba) )) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ |
/* Kevin F. Quinn <kevquinn@gentoo.org> 2 July 2006 |
* Replace data segment constants with text-segment |
* constants (via pushl/movq) |
SEG_DATA |
ALIGNDATA8 |
const_0080: |
D_LONG 0x00800080, 0x00800080 |
const_80: |
D_LONG 0x80808080, 0x80808080 |
*/ |
#define const_0080_l 0x00800080 |
#define const_0080_h 0x00800080 |
#define const_80_l 0x80808080 |
#define const_80_h 0x80808080 |
SEG_TEXT |
/* Blend transparency function |
*/ |
#define TAG(x) CONCAT(x,_transparency) |
#define LLTAG(x) LLBL2(x,_transparency) |
#define INIT \ |
PXOR ( MM0, MM0 ) /* 0x0000 | 0x0000 | 0x0000 | 0x0000 */ |
#define MAIN( rgba, dest ) \ |
GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ |
GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 ) ;\ |
GMB_ALPHA( MM1, MM3, MM4, MM6 ) ;\ |
GMB_LERP_GSC( MM1, MM2, MM3, MM4, MM5, MM6 ) ;\ |
GMB_PACK( MM3, MM6 ) ;\ |
GMB_STORE( rgba, MM3 ) |
#include "mmx_blendtmp.h" |
/* Blend add function |
* |
* FIXME: Add some loop unrolling here... |
*/ |
#define TAG(x) CONCAT(x,_add) |
#define LLTAG(x) LLBL2(x,_add) |
#define INIT |
#define MAIN( rgba, dest ) \ |
ONE(MOVD ( REGIND(rgba), MM1 )) /* | | | | qa1 | qb1 | qg1 | qr1 */ ;\ |
ONE(MOVD ( REGIND(dest), MM2 )) /* | | | | pa1 | pb1 | pg1 | pr1 */ ;\ |
ONE(PADDUSB ( MM2, MM1 )) ;\ |
ONE(MOVD ( MM1, REGIND(rgba) )) /* | | | | sa1 | sb1 | sg1 | sr1 */ ;\ |
;\ |
TWO(MOVQ ( REGIND(rgba), MM1 )) /* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */ ;\ |
TWO(PADDUSB ( REGIND(dest), MM1 )) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ ;\ |
TWO(MOVQ ( MM1, REGIND(rgba) )) |
#include "mmx_blendtmp.h" |
/* Blend min function |
*/ |
#define TAG(x) CONCAT(x,_min) |
#define LLTAG(x) LLBL2(x,_min) |
/* Kevin F. Quinn 2nd July 2006 |
* Replace data segment constants with text-segment instructions |
#define INIT \ |
MOVQ ( CONTENT(const_80), MM7 ) |
*/ |
#define INIT \ |
PUSH_L ( CONST(const_80_h) ) /* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/ ;\ |
PUSH_L ( CONST(const_80_l) ) ;\ |
MOVQ ( REGIND(ESP), MM7 ) ;\ |
ADD_L ( CONST(8), ESP) |
#define MAIN( rgba, dest ) \ |
GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ |
MOVQ ( MM1, MM3 ) ;\ |
MOVQ ( MM2, MM4 ) ;\ |
PXOR ( MM7, MM3 ) /* unsigned -> signed */ ;\ |
PXOR ( MM7, MM4 ) /* unsigned -> signed */ ;\ |
PCMPGTB ( MM3, MM4 ) /* q > p ? 0xff : 0x00 */ ;\ |
PAND ( MM4, MM1 ) /* q > p ? p : 0 */ ;\ |
PANDN ( MM2, MM4 ) /* q > p ? 0 : q */ ;\ |
POR ( MM1, MM4 ) /* q > p ? p : q */ ;\ |
GMB_STORE( rgba, MM4 ) |
#include "mmx_blendtmp.h" |
/* Blend max function |
*/ |
#define TAG(x) CONCAT(x,_max) |
#define LLTAG(x) LLBL2(x,_max) |
/* Kevin F. Quinn 2nd July 2006 |
* Replace data segment constants with text-segment instructions |
#define INIT \ |
MOVQ ( CONTENT(const_80), MM7 ) |
*/ |
#define INIT \ |
PUSH_L ( CONST(const_80_l) ) /* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/ ;\ |
PUSH_L ( CONST(const_80_h) ) ;\ |
MOVQ ( REGIND(ESP), MM7 ) ;\ |
ADD_L ( CONST(8), ESP) |
#define MAIN( rgba, dest ) \ |
GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ |
MOVQ ( MM1, MM3 ) ;\ |
MOVQ ( MM2, MM4 ) ;\ |
PXOR ( MM7, MM3 ) /* unsigned -> signed */ ;\ |
PXOR ( MM7, MM4 ) /* unsigned -> signed */ ;\ |
PCMPGTB ( MM3, MM4 ) /* q > p ? 0xff : 0x00 */ ;\ |
PAND ( MM4, MM2 ) /* q > p ? q : 0 */ ;\ |
PANDN ( MM1, MM4 ) /* q > p ? 0 : p */ ;\ |
POR ( MM2, MM4 ) /* q > p ? p : q */ ;\ |
GMB_STORE( rgba, MM4 ) |
#include "mmx_blendtmp.h" |
/* Blend modulate function |
*/ |
#define TAG(x) CONCAT(x,_modulate) |
#define LLTAG(x) LLBL2(x,_modulate) |
/* Kevin F. Quinn 2nd July 2006 |
* Replace data segment constants with text-segment instructions |
#define INIT \ |
MOVQ ( CONTENT(const_0080), MM7 ) |
*/ |
#define INIT \ |
PXOR ( MM0, MM0 ) /* 0x0000 | 0x0000 | 0x0000 | 0x0000 */ ;\ |
PUSH_L ( CONST(const_0080_l) ) /* 0x0080 | 0x0080 | 0x0080 | 0x0080 */ ;\ |
PUSH_L ( CONST(const_0080_h) ) ;\ |
MOVQ ( REGIND(ESP), MM7 ) ;\ |
ADD_L ( CONST(8), ESP) |
#define MAIN( rgba, dest ) \ |
GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ |
GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 ) ;\ |
GMB_MULT_GSR( MM1, MM2, MM4, MM5, MM7 ) ;\ |
GMB_PACK( MM2, MM5 ) ;\ |
GMB_STORE( rgba, MM2 ) |
#include "mmx_blendtmp.h" |
#endif |
#if defined (__ELF__) && defined (__linux__) |
.section .note.GNU-stack,"",%progbits |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/mmx_blendtmp.h |
---|
0,0 → 1,114 |
/* |
* Written by José Fonseca <j_r_fonseca@yahoo.co.uk> |
*/ |
/* |
* void _mesa_mmx_blend( struct gl_context *ctx, |
* GLuint n, |
* const GLubyte mask[], |
* GLchan rgba[][4], |
* CONST GLchan dest[][4] ) |
* |
*/ |
ALIGNTEXT16 |
GLOBL GLNAME( TAG(_mesa_mmx_blend) ) |
HIDDEN( TAG(_mesa_mmx_blend) ) |
GLNAME( TAG(_mesa_mmx_blend) ): |
PUSH_L ( EBP ) |
MOV_L ( ESP, EBP ) |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
PUSH_L ( EBX ) |
MOV_L ( REGOFF(12, EBP), ECX ) /* n */ |
CMP_L ( CONST(0), ECX) |
JE ( LLTAG(GMB_return) ) |
MOV_L ( REGOFF(16, EBP), EBX ) /* mask */ |
MOV_L ( REGOFF(20, EBP), EDI ) /* rgba */ |
MOV_L ( REGOFF(24, EBP), ESI ) /* dest */ |
INIT |
TEST_L ( CONST(4), EDI ) /* align rgba on an 8-byte boundary */ |
JZ ( LLTAG(GMB_align_end) ) |
CMP_B ( CONST(0), REGIND(EBX) ) /* *mask == 0 */ |
JE ( LLTAG(GMB_align_continue) ) |
/* runin */ |
#define ONE(x) x |
#define TWO(x) |
MAIN ( EDI, ESI ) |
#undef ONE |
#undef TWO |
LLTAG(GMB_align_continue): |
DEC_L ( ECX ) /* n -= 1 */ |
INC_L ( EBX ) /* mask += 1 */ |
ADD_L ( CONST(4), EDI ) /* rgba += 1 */ |
ADD_L ( CONST(4), ESI ) /* dest += 1 */ |
LLTAG(GMB_align_end): |
CMP_L ( CONST(2), ECX) |
JB ( LLTAG(GMB_loop_end) ) |
ALIGNTEXT16 |
LLTAG(GMB_loop_begin): |
CMP_W ( CONST(0), REGIND(EBX) ) /* *mask == 0 && *(mask + 1) == 0 */ |
JE ( LLTAG(GMB_loop_continue) ) |
/* main loop */ |
#define ONE(x) |
#define TWO(x) x |
MAIN ( EDI, ESI ) |
#undef ONE |
#undef TWO |
LLTAG(GMB_loop_continue): |
DEC_L ( ECX ) |
DEC_L ( ECX ) /* n -= 2 */ |
ADD_L ( CONST(2), EBX ) /* mask += 2 */ |
ADD_L ( CONST(8), EDI ) /* rgba += 2 */ |
ADD_L ( CONST(8), ESI ) /* dest += 2 */ |
CMP_L ( CONST(2), ECX ) |
JAE ( LLTAG(GMB_loop_begin) ) |
LLTAG(GMB_loop_end): |
CMP_L ( CONST(1), ECX ) |
JB ( LLTAG(GMB_done) ) |
CMP_B ( CONST(0), REGIND(EBX) ) /* *mask == 0 */ |
JE ( LLTAG(GMB_done) ) |
/* runout */ |
#define ONE(x) x |
#define TWO(x) |
MAIN ( EDI, ESI ) |
#undef ONE |
#undef TWO |
LLTAG(GMB_done): |
EMMS |
LLTAG(GMB_return): |
POP_L ( EBX ) |
POP_L ( EDI ) |
POP_L ( ESI ) |
MOV_L ( EBP, ESP ) |
POP_L ( EBP ) |
RET |
#undef TAG |
#undef LLTAG |
#undef INIT |
#undef MAIN |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/norm_args.h |
---|
0,0 → 1,57 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/* |
* Normal transform function interface for assembly code. Simply define |
* FRAME_OFFSET to the number of bytes pushed onto the stack before |
* using the ARG_* argument macros. |
* |
* Gareth Hughes |
*/ |
#ifndef __NORM_ARGS_H__ |
#define __NORM_ARGS_H__ |
/* Offsets for normal_func arguments |
* |
* typedef void (*normal_func)( const GLmatrix *mat, |
* GLfloat scale, |
* const GLvector4f *in, |
* const GLfloat lengths[], |
* GLvector4f *dest ); |
*/ |
#define OFFSET_MAT 4 |
#define OFFSET_SCALE 8 |
#define OFFSET_IN 12 |
#define OFFSET_LENGTHS 16 |
#define OFFSET_DEST 20 |
#define ARG_MAT REGOFF(FRAME_OFFSET+OFFSET_MAT, ESP) |
#define ARG_SCALE REGOFF(FRAME_OFFSET+OFFSET_SCALE, ESP) |
#define ARG_IN REGOFF(FRAME_OFFSET+OFFSET_IN, ESP) |
#define ARG_LENGTHS REGOFF(FRAME_OFFSET+OFFSET_LENGTHS, ESP) |
#define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP) |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/read_rgba_span_x86.S |
---|
0,0 → 1,686 |
/* |
* (C) Copyright IBM Corporation 2004 |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/** |
* \file read_rgba_span_x86.S |
* Optimized routines to transfer pixel data from the framebuffer to a |
* buffer in main memory. |
* |
* \author Ian Romanick <idr@us.ibm.com> |
*/ |
.file "read_rgba_span_x86.S" |
#if !defined(__DJGPP__) && !defined(__MINGW32__) && !defined(__APPLE__) /* this one cries for assyntax.h */ |
/* Kevin F. Quinn 2nd July 2006 |
* Replaced data segment constants with text-segment instructions. |
*/ |
#define LOAD_MASK(mvins,m1,m2) \ |
pushl $0xff00ff00 ;\ |
pushl $0xff00ff00 ;\ |
pushl $0xff00ff00 ;\ |
pushl $0xff00ff00 ;\ |
mvins (%esp), m1 ;\ |
pushl $0x00ff0000 ;\ |
pushl $0x00ff0000 ;\ |
pushl $0x00ff0000 ;\ |
pushl $0x00ff0000 ;\ |
mvins (%esp), m2 ;\ |
addl $32, %esp |
/* I implemented these as macros because they appear in several places, |
* and I've tweaked them a number of times. I got tired of changing every |
* place they appear. :) |
*/ |
#define DO_ONE_PIXEL() \ |
movl (%ebx), %eax ; \ |
addl $4, %ebx ; \ |
bswap %eax /* ARGB -> BGRA */ ; \ |
rorl $8, %eax /* BGRA -> ABGR */ ; \ |
movl %eax, (%ecx) /* ABGR -> R, G, B, A */ ; \ |
addl $4, %ecx |
#define DO_ONE_LAST_PIXEL() \ |
movl (%ebx), %eax ; \ |
bswap %eax /* ARGB -> BGRA */ ; \ |
rorl $8, %eax /* BGRA -> ABGR */ ; \ |
movl %eax, (%ecx) /* ABGR -> R, G, B, A */ ; \ |
/** |
* MMX optimized version of the BGRA8888_REV to RGBA copy routine. |
* |
* \warning |
* This function assumes that the caller will issue the EMMS instruction |
* at the correct places. |
*/ |
.globl _generic_read_RGBA_span_BGRA8888_REV_MMX |
#ifndef USE_DRICORE |
.hidden _generic_read_RGBA_span_BGRA8888_REV_MMX |
#endif |
.type _generic_read_RGBA_span_BGRA8888_REV_MMX, @function |
_generic_read_RGBA_span_BGRA8888_REV_MMX: |
pushl %ebx |
#ifdef USE_INNER_EMMS |
emms |
#endif |
LOAD_MASK(movq,%mm1,%mm2) |
movl 8(%esp), %ebx /* source pointer */ |
movl 16(%esp), %edx /* number of pixels to copy */ |
movl 12(%esp), %ecx /* destination pointer */ |
testl %edx, %edx |
jle .L20 /* Bail if there's nothing to do. */ |
movl %ebx, %eax |
negl %eax |
sarl $2, %eax |
andl $1, %eax |
je .L17 |
subl %eax, %edx |
DO_ONE_PIXEL() |
.L17: |
/* Would it be faster to unroll this loop once and process 4 pixels |
* per pass, instead of just two? |
*/ |
movl %edx, %eax |
shrl %eax |
jmp .L18 |
.L19: |
movq (%ebx), %mm0 |
addl $8, %ebx |
/* These 9 instructions do what PSHUFB (if there were such an |
* instruction) could do in 1. :( |
*/ |
movq %mm0, %mm3 |
movq %mm0, %mm4 |
pand %mm2, %mm3 |
psllq $16, %mm4 |
psrlq $16, %mm3 |
pand %mm2, %mm4 |
pand %mm1, %mm0 |
por %mm4, %mm3 |
por %mm3, %mm0 |
movq %mm0, (%ecx) |
addl $8, %ecx |
subl $1, %eax |
.L18: |
jne .L19 |
#ifdef USE_INNER_EMMS |
emms |
#endif |
/* At this point there are either 1 or 0 pixels remaining to be |
* converted. Convert the last pixel, if needed. |
*/ |
testl $1, %edx |
je .L20 |
DO_ONE_LAST_PIXEL() |
.L20: |
popl %ebx |
ret |
.size _generic_read_RGBA_span_BGRA8888_REV_MMX, .-_generic_read_RGBA_span_BGRA8888_REV_MMX |
/** |
* SSE optimized version of the BGRA8888_REV to RGBA copy routine. SSE |
* instructions are only actually used to read data from the framebuffer. |
* In practice, the speed-up is pretty small. |
* |
* \todo |
* Do some more testing and determine if there's any reason to have this |
* function in addition to the MMX version. |
* |
* \warning |
* This function assumes that the caller will issue the EMMS instruction |
* at the correct places. |
*/ |
.globl _generic_read_RGBA_span_BGRA8888_REV_SSE |
#ifndef USE_DRICORE |
.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE |
#endif |
.type _generic_read_RGBA_span_BGRA8888_REV_SSE, @function |
_generic_read_RGBA_span_BGRA8888_REV_SSE: |
pushl %esi |
pushl %ebx |
pushl %ebp |
#ifdef USE_INNER_EMMS |
emms |
#endif |
LOAD_MASK(movq,%mm1,%mm2) |
movl 16(%esp), %ebx /* source pointer */ |
movl 24(%esp), %edx /* number of pixels to copy */ |
movl 20(%esp), %ecx /* destination pointer */ |
testl %edx, %edx |
jle .L35 /* Bail if there's nothing to do. */ |
movl %esp, %ebp |
subl $16, %esp |
andl $0xfffffff0, %esp |
movl %ebx, %eax |
movl %edx, %esi |
negl %eax |
andl $15, %eax |
sarl $2, %eax |
cmpl %edx, %eax |
cmovle %eax, %esi |
subl %esi, %edx |
testl $1, %esi |
je .L32 |
DO_ONE_PIXEL() |
.L32: |
testl $2, %esi |
je .L31 |
movq (%ebx), %mm0 |
addl $8, %ebx |
movq %mm0, %mm3 |
movq %mm0, %mm4 |
pand %mm2, %mm3 |
psllq $16, %mm4 |
psrlq $16, %mm3 |
pand %mm2, %mm4 |
pand %mm1, %mm0 |
por %mm4, %mm3 |
por %mm3, %mm0 |
movq %mm0, (%ecx) |
addl $8, %ecx |
.L31: |
movl %edx, %eax |
shrl $2, %eax |
jmp .L33 |
.L34: |
movaps (%ebx), %xmm0 |
addl $16, %ebx |
/* This would be so much better if we could just move directly from |
* an SSE register to an MMX register. Unfortunately, that |
* functionality wasn't introduced until SSE2 with the MOVDQ2Q |
* instruction. |
*/ |
movaps %xmm0, (%esp) |
movq (%esp), %mm0 |
movq 8(%esp), %mm5 |
movq %mm0, %mm3 |
movq %mm0, %mm4 |
movq %mm5, %mm6 |
movq %mm5, %mm7 |
pand %mm2, %mm3 |
pand %mm2, %mm6 |
psllq $16, %mm4 |
psllq $16, %mm7 |
psrlq $16, %mm3 |
psrlq $16, %mm6 |
pand %mm2, %mm4 |
pand %mm2, %mm7 |
pand %mm1, %mm0 |
pand %mm1, %mm5 |
por %mm4, %mm3 |
por %mm7, %mm6 |
por %mm3, %mm0 |
por %mm6, %mm5 |
movq %mm0, (%ecx) |
movq %mm5, 8(%ecx) |
addl $16, %ecx |
subl $1, %eax |
.L33: |
jne .L34 |
#ifdef USE_INNER_EMMS |
emms |
#endif |
movl %ebp, %esp |
/* At this point there are either [0, 3] pixels remaining to be |
* converted. |
*/ |
testl $2, %edx |
je .L36 |
movq (%ebx), %mm0 |
addl $8, %ebx |
movq %mm0, %mm3 |
movq %mm0, %mm4 |
pand %mm2, %mm3 |
psllq $16, %mm4 |
psrlq $16, %mm3 |
pand %mm2, %mm4 |
pand %mm1, %mm0 |
por %mm4, %mm3 |
por %mm3, %mm0 |
movq %mm0, (%ecx) |
addl $8, %ecx |
.L36: |
testl $1, %edx |
je .L35 |
DO_ONE_LAST_PIXEL() |
.L35: |
popl %ebp |
popl %ebx |
popl %esi |
ret |
.size _generic_read_RGBA_span_BGRA8888_REV_SSE, .-_generic_read_RGBA_span_BGRA8888_REV_SSE |
/** |
* SSE2 optimized version of the BGRA8888_REV to RGBA copy routine. |
*/ |
.text |
.globl _generic_read_RGBA_span_BGRA8888_REV_SSE2 |
#ifndef USE_DRICORE |
.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE2 |
#endif |
.type _generic_read_RGBA_span_BGRA8888_REV_SSE2, @function |
_generic_read_RGBA_span_BGRA8888_REV_SSE2: |
pushl %esi |
pushl %ebx |
LOAD_MASK(movdqu,%xmm1,%xmm2) |
movl 12(%esp), %ebx /* source pointer */ |
movl 20(%esp), %edx /* number of pixels to copy */ |
movl 16(%esp), %ecx /* destination pointer */ |
movl %ebx, %eax |
movl %edx, %esi |
testl %edx, %edx |
jle .L46 /* Bail if there's nothing to do. */ |
/* If the source pointer isn't a multiple of 16 we have to process |
* a few pixels the "slow" way to get the address aligned for |
* the SSE fetch intsructions. |
*/ |
negl %eax |
andl $15, %eax |
sarl $2, %eax |
cmpl %edx, %eax |
cmovbe %eax, %esi |
subl %esi, %edx |
testl $1, %esi |
je .L41 |
DO_ONE_PIXEL() |
.L41: |
testl $2, %esi |
je .L40 |
movq (%ebx), %xmm0 |
addl $8, %ebx |
movdqa %xmm0, %xmm3 |
movdqa %xmm0, %xmm4 |
andps %xmm1, %xmm0 |
andps %xmm2, %xmm3 |
pslldq $2, %xmm4 |
psrldq $2, %xmm3 |
andps %xmm2, %xmm4 |
orps %xmm4, %xmm3 |
orps %xmm3, %xmm0 |
movq %xmm0, (%ecx) |
addl $8, %ecx |
.L40: |
/* Would it be worth having a specialized version of this loop for |
* the case where the destination is 16-byte aligned? That version |
* would be identical except that it could use movedqa instead of |
* movdqu. |
*/ |
movl %edx, %eax |
shrl $2, %eax |
jmp .L42 |
.L43: |
movdqa (%ebx), %xmm0 |
addl $16, %ebx |
movdqa %xmm0, %xmm3 |
movdqa %xmm0, %xmm4 |
andps %xmm1, %xmm0 |
andps %xmm2, %xmm3 |
pslldq $2, %xmm4 |
psrldq $2, %xmm3 |
andps %xmm2, %xmm4 |
orps %xmm4, %xmm3 |
orps %xmm3, %xmm0 |
movdqu %xmm0, (%ecx) |
addl $16, %ecx |
subl $1, %eax |
.L42: |
jne .L43 |
/* There may be upto 3 pixels remaining to be copied. Take care |
* of them now. We do the 2 pixel case first because the data |
* will be aligned. |
*/ |
testl $2, %edx |
je .L47 |
movq (%ebx), %xmm0 |
addl $8, %ebx |
movdqa %xmm0, %xmm3 |
movdqa %xmm0, %xmm4 |
andps %xmm1, %xmm0 |
andps %xmm2, %xmm3 |
pslldq $2, %xmm4 |
psrldq $2, %xmm3 |
andps %xmm2, %xmm4 |
orps %xmm4, %xmm3 |
orps %xmm3, %xmm0 |
movq %xmm0, (%ecx) |
addl $8, %ecx |
.L47: |
testl $1, %edx |
je .L46 |
DO_ONE_LAST_PIXEL() |
.L46: |
popl %ebx |
popl %esi |
ret |
.size _generic_read_RGBA_span_BGRA8888_REV_SSE2, .-_generic_read_RGBA_span_BGRA8888_REV_SSE2 |
#define MASK_565_L 0x07e0f800 |
#define MASK_565_H 0x0000001f |
/* Setting SCALE_ADJUST to 5 gives a perfect match with the |
* classic C implementation in Mesa. Setting SCALE_ADJUST |
* to 0 is slightly faster but at a small cost to accuracy. |
*/ |
#define SCALE_ADJUST 5 |
#if SCALE_ADJUST == 5 |
#define PRESCALE_L 0x00100001 |
#define PRESCALE_H 0x00000200 |
#define SCALE_L 0x40C620E8 |
#define SCALE_H 0x0000839d |
#elif SCALE_ADJUST == 0 |
#define PRESCALE_L 0x00200001 |
#define PRESCALE_H 0x00000800 |
#define SCALE_L 0x01040108 |
#define SCALE_H 0x00000108 |
#else |
#error SCALE_ADJUST must either be 5 or 0. |
#endif |
#define ALPHA_L 0x00000000 |
#define ALPHA_H 0x00ff0000 |
/** |
* MMX optimized version of the RGB565 to RGBA copy routine. |
*/ |
.text |
.globl _generic_read_RGBA_span_RGB565_MMX |
#ifndef USE_DRICORE |
.hidden _generic_read_RGBA_span_RGB565_MMX |
#endif |
.type _generic_read_RGBA_span_RGB565_MMX, @function |
_generic_read_RGBA_span_RGB565_MMX: |
#ifdef USE_INNER_EMMS |
emms |
#endif |
movl 4(%esp), %eax /* source pointer */ |
movl 8(%esp), %edx /* destination pointer */ |
movl 12(%esp), %ecx /* number of pixels to copy */ |
pushl $MASK_565_H |
pushl $MASK_565_L |
movq (%esp), %mm5 |
pushl $PRESCALE_H |
pushl $PRESCALE_L |
movq (%esp), %mm6 |
pushl $SCALE_H |
pushl $SCALE_L |
movq (%esp), %mm7 |
pushl $ALPHA_H |
pushl $ALPHA_L |
movq (%esp), %mm3 |
addl $32,%esp |
sarl $2, %ecx |
jl .L01 /* Bail early if the count is negative. */ |
jmp .L02 |
.L03: |
/* Fetch 4 RGB565 pixels into %mm4. Distribute the first and |
* second pixels into the four words of %mm0 and %mm2. |
*/ |
movq (%eax), %mm4 |
addl $8, %eax |
pshufw $0x00, %mm4, %mm0 |
pshufw $0x55, %mm4, %mm2 |
/* Mask the pixels so that each word of each register contains only |
* one color component. |
*/ |
pand %mm5, %mm0 |
pand %mm5, %mm2 |
/* Adjust the component values so that they are as small as possible, |
* but large enough so that we can multiply them by an unsigned 16-bit |
* number and get a value as large as 0x00ff0000. |
*/ |
pmullw %mm6, %mm0 |
pmullw %mm6, %mm2 |
#if SCALE_ADJUST > 0 |
psrlw $SCALE_ADJUST, %mm0 |
psrlw $SCALE_ADJUST, %mm2 |
#endif |
/* Scale the input component values to be on the range |
* [0, 0x00ff0000]. This it the real magic of the whole routine. |
*/ |
pmulhuw %mm7, %mm0 |
pmulhuw %mm7, %mm2 |
/* Always set the alpha value to 0xff. |
*/ |
por %mm3, %mm0 |
por %mm3, %mm2 |
/* Pack the 16-bit values to 8-bit values and store the converted |
* pixel data. |
*/ |
packuswb %mm2, %mm0 |
movq %mm0, (%edx) |
addl $8, %edx |
pshufw $0xaa, %mm4, %mm0 |
pshufw $0xff, %mm4, %mm2 |
pand %mm5, %mm0 |
pand %mm5, %mm2 |
pmullw %mm6, %mm0 |
pmullw %mm6, %mm2 |
#if SCALE_ADJUST > 0 |
psrlw $SCALE_ADJUST, %mm0 |
psrlw $SCALE_ADJUST, %mm2 |
#endif |
pmulhuw %mm7, %mm0 |
pmulhuw %mm7, %mm2 |
por %mm3, %mm0 |
por %mm3, %mm2 |
packuswb %mm2, %mm0 |
movq %mm0, (%edx) |
addl $8, %edx |
subl $1, %ecx |
.L02: |
jne .L03 |
/* At this point there can be at most 3 pixels left to process. If |
* there is either 2 or 3 left, process 2. |
*/ |
movl 12(%esp), %ecx |
testl $0x02, %ecx |
je .L04 |
movd (%eax), %mm4 |
addl $4, %eax |
pshufw $0x00, %mm4, %mm0 |
pshufw $0x55, %mm4, %mm2 |
pand %mm5, %mm0 |
pand %mm5, %mm2 |
pmullw %mm6, %mm0 |
pmullw %mm6, %mm2 |
#if SCALE_ADJUST > 0 |
psrlw $SCALE_ADJUST, %mm0 |
psrlw $SCALE_ADJUST, %mm2 |
#endif |
pmulhuw %mm7, %mm0 |
pmulhuw %mm7, %mm2 |
por %mm3, %mm0 |
por %mm3, %mm2 |
packuswb %mm2, %mm0 |
movq %mm0, (%edx) |
addl $8, %edx |
.L04: |
/* At this point there can be at most 1 pixel left to process. |
* Process it if needed. |
*/ |
testl $0x01, %ecx |
je .L01 |
movzwl (%eax), %ecx |
movd %ecx, %mm4 |
pshufw $0x00, %mm4, %mm0 |
pand %mm5, %mm0 |
pmullw %mm6, %mm0 |
#if SCALE_ADJUST > 0 |
psrlw $SCALE_ADJUST, %mm0 |
#endif |
pmulhuw %mm7, %mm0 |
por %mm3, %mm0 |
packuswb %mm0, %mm0 |
movd %mm0, (%edx) |
.L01: |
#ifdef USE_INNER_EMMS |
emms |
#endif |
ret |
#endif /* !defined(__DJGPP__) && !defined(__MINGW32__) && !defined(__APPLE__) */ |
#if defined (__ELF__) && defined (__linux__) |
.section .note.GNU-stack,"",%progbits |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/read_rgba_span_x86.h |
---|
0,0 → 1,56 |
/* |
* (C) Copyright IBM Corporation 2004 |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/** |
* \file read_rgba_span_x86.h |
* |
* \author Ian Romanick <idr@us.ibm.com> |
*/ |
#ifndef READ_RGBA_SPAN_X86_H |
#define READ_RGBA_SPAN_X86_H |
#if defined(USE_SSE_ASM) || defined(USE_MMX_ASM) |
#include "x86/common_x86_asm.h" |
#endif |
#if defined(USE_SSE_ASM) |
extern void _generic_read_RGBA_span_BGRA8888_REV_SSE2( const unsigned char *, |
unsigned char *, unsigned ); |
#endif |
#if defined(USE_SSE_ASM) |
extern void _generic_read_RGBA_span_BGRA8888_REV_SSE( const unsigned char *, |
unsigned char *, unsigned ); |
#endif |
#if defined(USE_MMX_ASM) |
extern void _generic_read_RGBA_span_BGRA8888_REV_MMX( const unsigned char *, |
unsigned char *, unsigned ); |
extern void _generic_read_RGBA_span_RGB565_MMX( const unsigned char *, |
unsigned char *, unsigned ); |
#endif |
#endif /* READ_RGBA_SPAN_X86_H */ |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/rtasm/x86sse.c |
---|
0,0 → 1,1203 |
#ifdef USE_X86_ASM |
#if defined(__i386__) || defined(__386__) |
#include "main/imports.h" |
#include "x86sse.h" |
#define DISASSEM 0 |
#define X86_TWOB 0x0f |
#if 0 |
static unsigned char *cptr( void (*label)() ) |
{ |
return (unsigned char *)(unsigned long)label; |
} |
#endif |
static void do_realloc( struct x86_function *p ) |
{ |
if (p->size == 0) { |
p->size = 1024; |
p->store = _mesa_exec_malloc(p->size); |
p->csr = p->store; |
} |
else { |
unsigned used = p->csr - p->store; |
unsigned char *tmp = p->store; |
p->size *= 2; |
p->store = _mesa_exec_malloc(p->size); |
memcpy(p->store, tmp, used); |
p->csr = p->store + used; |
_mesa_exec_free(tmp); |
} |
} |
/* Emit bytes to the instruction stream: |
*/ |
static unsigned char *reserve( struct x86_function *p, int bytes ) |
{ |
if (p->csr + bytes - p->store > p->size) |
do_realloc(p); |
{ |
unsigned char *csr = p->csr; |
p->csr += bytes; |
return csr; |
} |
} |
static void emit_1b( struct x86_function *p, char b0 ) |
{ |
char *csr = (char *)reserve(p, 1); |
*csr = b0; |
} |
static void emit_1i( struct x86_function *p, int i0 ) |
{ |
int *icsr = (int *)reserve(p, sizeof(i0)); |
*icsr = i0; |
} |
static void emit_1ub( struct x86_function *p, unsigned char b0 ) |
{ |
unsigned char *csr = reserve(p, 1); |
*csr++ = b0; |
} |
static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 ) |
{ |
unsigned char *csr = reserve(p, 2); |
*csr++ = b0; |
*csr++ = b1; |
} |
static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 ) |
{ |
unsigned char *csr = reserve(p, 3); |
*csr++ = b0; |
*csr++ = b1; |
*csr++ = b2; |
} |
/* Build a modRM byte + possible displacement. No treatment of SIB |
* indexing. BZZT - no way to encode an absolute address. |
*/ |
static void emit_modrm( struct x86_function *p, |
struct x86_reg reg, |
struct x86_reg regmem ) |
{ |
unsigned char val = 0; |
assert(reg.mod == mod_REG); |
val |= regmem.mod << 6; /* mod field */ |
val |= reg.idx << 3; /* reg field */ |
val |= regmem.idx; /* r/m field */ |
emit_1ub(p, val); |
/* Oh-oh we've stumbled into the SIB thing. |
*/ |
if (regmem.file == file_REG32 && |
regmem.idx == reg_SP) { |
emit_1ub(p, 0x24); /* simplistic! */ |
} |
switch (regmem.mod) { |
case mod_REG: |
case mod_INDIRECT: |
break; |
case mod_DISP8: |
emit_1b(p, regmem.disp); |
break; |
case mod_DISP32: |
emit_1i(p, regmem.disp); |
break; |
default: |
assert(0); |
break; |
} |
} |
static void emit_modrm_noreg( struct x86_function *p, |
unsigned op, |
struct x86_reg regmem ) |
{ |
struct x86_reg dummy = x86_make_reg(file_REG32, op); |
emit_modrm(p, dummy, regmem); |
} |
/* Many x86 instructions have two opcodes to cope with the situations |
* where the destination is a register or memory reference |
* respectively. This function selects the correct opcode based on |
* the arguments presented. |
*/ |
static void emit_op_modrm( struct x86_function *p, |
unsigned char op_dst_is_reg, |
unsigned char op_dst_is_mem, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
switch (dst.mod) { |
case mod_REG: |
emit_1ub(p, op_dst_is_reg); |
emit_modrm(p, dst, src); |
break; |
case mod_INDIRECT: |
case mod_DISP32: |
case mod_DISP8: |
assert(src.mod == mod_REG); |
emit_1ub(p, op_dst_is_mem); |
emit_modrm(p, src, dst); |
break; |
default: |
assert(0); |
break; |
} |
} |
/* Create and manipulate registers and regmem values: |
*/ |
struct x86_reg x86_make_reg( enum x86_reg_file file, |
enum x86_reg_name idx ) |
{ |
struct x86_reg reg; |
reg.file = file; |
reg.idx = idx; |
reg.mod = mod_REG; |
reg.disp = 0; |
return reg; |
} |
struct x86_reg x86_make_disp( struct x86_reg reg, |
int disp ) |
{ |
assert(reg.file == file_REG32); |
if (reg.mod == mod_REG) |
reg.disp = disp; |
else |
reg.disp += disp; |
if (reg.disp == 0) |
reg.mod = mod_INDIRECT; |
else if (reg.disp <= 127 && reg.disp >= -128) |
reg.mod = mod_DISP8; |
else |
reg.mod = mod_DISP32; |
return reg; |
} |
struct x86_reg x86_deref( struct x86_reg reg ) |
{ |
return x86_make_disp(reg, 0); |
} |
struct x86_reg x86_get_base_reg( struct x86_reg reg ) |
{ |
return x86_make_reg( reg.file, reg.idx ); |
} |
unsigned char *x86_get_label( struct x86_function *p ) |
{ |
return p->csr; |
} |
/*********************************************************************** |
* x86 instructions |
*/ |
void x86_jcc( struct x86_function *p, |
enum x86_cc cc, |
unsigned char *label ) |
{ |
int offset = label - (x86_get_label(p) + 2); |
if (offset <= 127 && offset >= -128) { |
emit_1ub(p, 0x70 + cc); |
emit_1b(p, (char) offset); |
} |
else { |
offset = label - (x86_get_label(p) + 6); |
emit_2ub(p, 0x0f, 0x80 + cc); |
emit_1i(p, offset); |
} |
} |
/* Always use a 32bit offset for forward jumps: |
*/ |
unsigned char *x86_jcc_forward( struct x86_function *p, |
enum x86_cc cc ) |
{ |
emit_2ub(p, 0x0f, 0x80 + cc); |
emit_1i(p, 0); |
return x86_get_label(p); |
} |
unsigned char *x86_jmp_forward( struct x86_function *p) |
{ |
emit_1ub(p, 0xe9); |
emit_1i(p, 0); |
return x86_get_label(p); |
} |
unsigned char *x86_call_forward( struct x86_function *p) |
{ |
emit_1ub(p, 0xe8); |
emit_1i(p, 0); |
return x86_get_label(p); |
} |
/* Fixup offset from forward jump: |
*/ |
void x86_fixup_fwd_jump( struct x86_function *p, |
unsigned char *fixup ) |
{ |
*(int *)(fixup - 4) = x86_get_label(p) - fixup; |
} |
void x86_jmp( struct x86_function *p, unsigned char *label) |
{ |
emit_1ub(p, 0xe9); |
emit_1i(p, label - x86_get_label(p) - 4); |
} |
#if 0 |
/* This doesn't work once we start reallocating & copying the |
* generated code on buffer fills, because the call is relative to the |
* current pc. |
*/ |
void x86_call( struct x86_function *p, void (*label)()) |
{ |
emit_1ub(p, 0xe8); |
emit_1i(p, cptr(label) - x86_get_label(p) - 4); |
} |
#else |
void x86_call( struct x86_function *p, struct x86_reg reg) |
{ |
emit_1ub(p, 0xff); |
emit_modrm_noreg(p, 2, reg); |
} |
#endif |
/* michal: |
* Temporary. As I need immediate operands, and dont want to mess with the codegen, |
* I load the immediate into general purpose register and use it. |
*/ |
void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) |
{ |
assert(dst.mod == mod_REG); |
emit_1ub(p, 0xb8 + dst.idx); |
emit_1i(p, imm); |
} |
void x86_push( struct x86_function *p, |
struct x86_reg reg ) |
{ |
assert(reg.mod == mod_REG); |
emit_1ub(p, 0x50 + reg.idx); |
p->stack_offset += 4; |
} |
void x86_pop( struct x86_function *p, |
struct x86_reg reg ) |
{ |
assert(reg.mod == mod_REG); |
emit_1ub(p, 0x58 + reg.idx); |
p->stack_offset -= 4; |
} |
void x86_inc( struct x86_function *p, |
struct x86_reg reg ) |
{ |
assert(reg.mod == mod_REG); |
emit_1ub(p, 0x40 + reg.idx); |
} |
void x86_dec( struct x86_function *p, |
struct x86_reg reg ) |
{ |
assert(reg.mod == mod_REG); |
emit_1ub(p, 0x48 + reg.idx); |
} |
void x86_ret( struct x86_function *p ) |
{ |
emit_1ub(p, 0xc3); |
} |
void x86_sahf( struct x86_function *p ) |
{ |
emit_1ub(p, 0x9e); |
} |
void x86_mov( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_op_modrm( p, 0x8b, 0x89, dst, src ); |
} |
void x86_xor( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_op_modrm( p, 0x33, 0x31, dst, src ); |
} |
void x86_cmp( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_op_modrm( p, 0x3b, 0x39, dst, src ); |
} |
void x86_lea( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_1ub(p, 0x8d); |
emit_modrm( p, dst, src ); |
} |
void x86_test( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_1ub(p, 0x85); |
emit_modrm( p, dst, src ); |
} |
void x86_add( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_op_modrm(p, 0x03, 0x01, dst, src ); |
} |
void x86_mul( struct x86_function *p, |
struct x86_reg src ) |
{ |
assert (src.file == file_REG32 && src.mod == mod_REG); |
emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src ); |
} |
void x86_sub( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_op_modrm(p, 0x2b, 0x29, dst, src ); |
} |
void x86_or( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_op_modrm( p, 0x0b, 0x09, dst, src ); |
} |
void x86_and( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_op_modrm( p, 0x23, 0x21, dst, src ); |
} |
/*********************************************************************** |
* SSE instructions |
*/ |
void sse_movss( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_2ub(p, 0xF3, X86_TWOB); |
emit_op_modrm( p, 0x10, 0x11, dst, src ); |
} |
void sse_movaps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_1ub(p, X86_TWOB); |
emit_op_modrm( p, 0x28, 0x29, dst, src ); |
} |
void sse_movups( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_1ub(p, X86_TWOB); |
emit_op_modrm( p, 0x10, 0x11, dst, src ); |
} |
void sse_movhps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
assert(dst.mod != mod_REG || src.mod != mod_REG); |
emit_1ub(p, X86_TWOB); |
emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ |
} |
void sse_movlps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
assert(dst.mod != mod_REG || src.mod != mod_REG); |
emit_1ub(p, X86_TWOB); |
emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ |
} |
void sse_maxps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_2ub(p, X86_TWOB, 0x5F); |
emit_modrm( p, dst, src ); |
} |
void sse_maxss( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_3ub(p, 0xF3, X86_TWOB, 0x5F); |
emit_modrm( p, dst, src ); |
} |
void sse_divss( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_3ub(p, 0xF3, X86_TWOB, 0x5E); |
emit_modrm( p, dst, src ); |
} |
void sse_minps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_2ub(p, X86_TWOB, 0x5D); |
emit_modrm( p, dst, src ); |
} |
void sse_subps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_2ub(p, X86_TWOB, 0x5C); |
emit_modrm( p, dst, src ); |
} |
void sse_mulps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_2ub(p, X86_TWOB, 0x59); |
emit_modrm( p, dst, src ); |
} |
void sse_mulss( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_3ub(p, 0xF3, X86_TWOB, 0x59); |
emit_modrm( p, dst, src ); |
} |
void sse_addps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_2ub(p, X86_TWOB, 0x58); |
emit_modrm( p, dst, src ); |
} |
void sse_addss( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_3ub(p, 0xF3, X86_TWOB, 0x58); |
emit_modrm( p, dst, src ); |
} |
void sse_andnps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_2ub(p, X86_TWOB, 0x55); |
emit_modrm( p, dst, src ); |
} |
void sse_andps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_2ub(p, X86_TWOB, 0x54); |
emit_modrm( p, dst, src ); |
} |
void sse_rsqrtps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_2ub(p, X86_TWOB, 0x52); |
emit_modrm( p, dst, src ); |
} |
void sse_rsqrtss( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_3ub(p, 0xF3, X86_TWOB, 0x52); |
emit_modrm( p, dst, src ); |
} |
void sse_movhlps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
assert(dst.mod == mod_REG && src.mod == mod_REG); |
emit_2ub(p, X86_TWOB, 0x12); |
emit_modrm( p, dst, src ); |
} |
void sse_movlhps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
assert(dst.mod == mod_REG && src.mod == mod_REG); |
emit_2ub(p, X86_TWOB, 0x16); |
emit_modrm( p, dst, src ); |
} |
void sse_orps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_2ub(p, X86_TWOB, 0x56); |
emit_modrm( p, dst, src ); |
} |
void sse_xorps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_2ub(p, X86_TWOB, 0x57); |
emit_modrm( p, dst, src ); |
} |
void sse_cvtps2pi( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
assert(dst.file == file_MMX && |
(src.file == file_XMM || src.mod != mod_REG)); |
p->need_emms = 1; |
emit_2ub(p, X86_TWOB, 0x2d); |
emit_modrm( p, dst, src ); |
} |
/* Shufps can also be used to implement a reduced swizzle when dest == |
* arg0. |
*/ |
void sse_shufps( struct x86_function *p, |
struct x86_reg dest, |
struct x86_reg arg0, |
unsigned char shuf) |
{ |
emit_2ub(p, X86_TWOB, 0xC6); |
emit_modrm(p, dest, arg0); |
emit_1ub(p, shuf); |
} |
void sse_cmpps( struct x86_function *p, |
struct x86_reg dest, |
struct x86_reg arg0, |
unsigned char cc) |
{ |
emit_2ub(p, X86_TWOB, 0xC2); |
emit_modrm(p, dest, arg0); |
emit_1ub(p, cc); |
} |
void sse_pmovmskb( struct x86_function *p, |
struct x86_reg dest, |
struct x86_reg src) |
{ |
emit_3ub(p, 0x66, X86_TWOB, 0xD7); |
emit_modrm(p, dest, src); |
} |
/*********************************************************************** |
* SSE2 instructions |
*/ |
/** |
* Perform a reduced swizzle: |
*/ |
void sse2_pshufd( struct x86_function *p, |
struct x86_reg dest, |
struct x86_reg arg0, |
unsigned char shuf) |
{ |
emit_3ub(p, 0x66, X86_TWOB, 0x70); |
emit_modrm(p, dest, arg0); |
emit_1ub(p, shuf); |
} |
void sse2_cvttps2dq( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); |
emit_modrm( p, dst, src ); |
} |
void sse2_cvtps2dq( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_3ub(p, 0x66, X86_TWOB, 0x5B); |
emit_modrm( p, dst, src ); |
} |
void sse2_packssdw( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_3ub(p, 0x66, X86_TWOB, 0x6B); |
emit_modrm( p, dst, src ); |
} |
void sse2_packsswb( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_3ub(p, 0x66, X86_TWOB, 0x63); |
emit_modrm( p, dst, src ); |
} |
void sse2_packuswb( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_3ub(p, 0x66, X86_TWOB, 0x67); |
emit_modrm( p, dst, src ); |
} |
void sse2_rcpps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_2ub(p, X86_TWOB, 0x53); |
emit_modrm( p, dst, src ); |
} |
void sse2_rcpss( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_3ub(p, 0xF3, X86_TWOB, 0x53); |
emit_modrm( p, dst, src ); |
} |
void sse2_movd( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
emit_2ub(p, 0x66, X86_TWOB); |
emit_op_modrm( p, 0x6e, 0x7e, dst, src ); |
} |
/*********************************************************************** |
* x87 instructions |
*/ |
void x87_fist( struct x86_function *p, struct x86_reg dst ) |
{ |
emit_1ub(p, 0xdb); |
emit_modrm_noreg(p, 2, dst); |
} |
void x87_fistp( struct x86_function *p, struct x86_reg dst ) |
{ |
emit_1ub(p, 0xdb); |
emit_modrm_noreg(p, 3, dst); |
} |
void x87_fild( struct x86_function *p, struct x86_reg arg ) |
{ |
emit_1ub(p, 0xdf); |
emit_modrm_noreg(p, 0, arg); |
} |
void x87_fldz( struct x86_function *p ) |
{ |
emit_2ub(p, 0xd9, 0xee); |
} |
void x87_fldcw( struct x86_function *p, struct x86_reg arg ) |
{ |
assert(arg.file == file_REG32); |
assert(arg.mod != mod_REG); |
emit_1ub(p, 0xd9); |
emit_modrm_noreg(p, 5, arg); |
} |
void x87_fld1( struct x86_function *p ) |
{ |
emit_2ub(p, 0xd9, 0xe8); |
} |
void x87_fldl2e( struct x86_function *p ) |
{ |
emit_2ub(p, 0xd9, 0xea); |
} |
void x87_fldln2( struct x86_function *p ) |
{ |
emit_2ub(p, 0xd9, 0xed); |
} |
void x87_fwait( struct x86_function *p ) |
{ |
emit_1ub(p, 0x9b); |
} |
void x87_fnclex( struct x86_function *p ) |
{ |
emit_2ub(p, 0xdb, 0xe2); |
} |
void x87_fclex( struct x86_function *p ) |
{ |
x87_fwait(p); |
x87_fnclex(p); |
} |
static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, |
unsigned char dst0ub0, |
unsigned char dst0ub1, |
unsigned char arg0ub0, |
unsigned char arg0ub1, |
unsigned char argmem_noreg) |
{ |
assert(dst.file == file_x87); |
if (arg.file == file_x87) { |
if (dst.idx == 0) |
emit_2ub(p, dst0ub0, dst0ub1+arg.idx); |
else if (arg.idx == 0) |
emit_2ub(p, arg0ub0, arg0ub1+arg.idx); |
else |
assert(0); |
} |
else if (dst.idx == 0) { |
assert(arg.file == file_REG32); |
emit_1ub(p, 0xd8); |
emit_modrm_noreg(p, argmem_noreg, arg); |
} |
else |
assert(0); |
} |
void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) |
{ |
x87_arith_op(p, dst, arg, |
0xd8, 0xc8, |
0xdc, 0xc8, |
4); |
} |
void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) |
{ |
x87_arith_op(p, dst, arg, |
0xd8, 0xe0, |
0xdc, 0xe8, |
4); |
} |
void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) |
{ |
x87_arith_op(p, dst, arg, |
0xd8, 0xe8, |
0xdc, 0xe0, |
5); |
} |
void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) |
{ |
x87_arith_op(p, dst, arg, |
0xd8, 0xc0, |
0xdc, 0xc0, |
0); |
} |
void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) |
{ |
x87_arith_op(p, dst, arg, |
0xd8, 0xf0, |
0xdc, 0xf8, |
6); |
} |
void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) |
{ |
x87_arith_op(p, dst, arg, |
0xd8, 0xf8, |
0xdc, 0xf0, |
7); |
} |
void x87_fmulp( struct x86_function *p, struct x86_reg dst ) |
{ |
assert(dst.file == file_x87); |
assert(dst.idx >= 1); |
emit_2ub(p, 0xde, 0xc8+dst.idx); |
} |
void x87_fsubp( struct x86_function *p, struct x86_reg dst ) |
{ |
assert(dst.file == file_x87); |
assert(dst.idx >= 1); |
emit_2ub(p, 0xde, 0xe8+dst.idx); |
} |
void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) |
{ |
assert(dst.file == file_x87); |
assert(dst.idx >= 1); |
emit_2ub(p, 0xde, 0xe0+dst.idx); |
} |
void x87_faddp( struct x86_function *p, struct x86_reg dst ) |
{ |
assert(dst.file == file_x87); |
assert(dst.idx >= 1); |
emit_2ub(p, 0xde, 0xc0+dst.idx); |
} |
void x87_fdivp( struct x86_function *p, struct x86_reg dst ) |
{ |
assert(dst.file == file_x87); |
assert(dst.idx >= 1); |
emit_2ub(p, 0xde, 0xf8+dst.idx); |
} |
void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) |
{ |
assert(dst.file == file_x87); |
assert(dst.idx >= 1); |
emit_2ub(p, 0xde, 0xf0+dst.idx); |
} |
void x87_fucom( struct x86_function *p, struct x86_reg arg ) |
{ |
assert(arg.file == file_x87); |
emit_2ub(p, 0xdd, 0xe0+arg.idx); |
} |
void x87_fucomp( struct x86_function *p, struct x86_reg arg ) |
{ |
assert(arg.file == file_x87); |
emit_2ub(p, 0xdd, 0xe8+arg.idx); |
} |
void x87_fucompp( struct x86_function *p ) |
{ |
emit_2ub(p, 0xda, 0xe9); |
} |
void x87_fxch( struct x86_function *p, struct x86_reg arg ) |
{ |
assert(arg.file == file_x87); |
emit_2ub(p, 0xd9, 0xc8+arg.idx); |
} |
void x87_fabs( struct x86_function *p ) |
{ |
emit_2ub(p, 0xd9, 0xe1); |
} |
void x87_fchs( struct x86_function *p ) |
{ |
emit_2ub(p, 0xd9, 0xe0); |
} |
void x87_fcos( struct x86_function *p ) |
{ |
emit_2ub(p, 0xd9, 0xff); |
} |
void x87_fprndint( struct x86_function *p ) |
{ |
emit_2ub(p, 0xd9, 0xfc); |
} |
void x87_fscale( struct x86_function *p ) |
{ |
emit_2ub(p, 0xd9, 0xfd); |
} |
void x87_fsin( struct x86_function *p ) |
{ |
emit_2ub(p, 0xd9, 0xfe); |
} |
void x87_fsincos( struct x86_function *p ) |
{ |
emit_2ub(p, 0xd9, 0xfb); |
} |
void x87_fsqrt( struct x86_function *p ) |
{ |
emit_2ub(p, 0xd9, 0xfa); |
} |
void x87_fxtract( struct x86_function *p ) |
{ |
emit_2ub(p, 0xd9, 0xf4); |
} |
/* st0 = (2^st0)-1 |
* |
* Restrictions: -1.0 <= st0 <= 1.0 |
*/ |
void x87_f2xm1( struct x86_function *p ) |
{ |
emit_2ub(p, 0xd9, 0xf0); |
} |
/* st1 = st1 * log2(st0); |
* pop_stack; |
*/ |
void x87_fyl2x( struct x86_function *p ) |
{ |
emit_2ub(p, 0xd9, 0xf1); |
} |
/* st1 = st1 * log2(st0 + 1.0); |
* pop_stack; |
* |
* A fast operation, with restrictions: -.29 < st0 < .29 |
*/ |
void x87_fyl2xp1( struct x86_function *p ) |
{ |
emit_2ub(p, 0xd9, 0xf9); |
} |
void x87_fld( struct x86_function *p, struct x86_reg arg ) |
{ |
if (arg.file == file_x87) |
emit_2ub(p, 0xd9, 0xc0 + arg.idx); |
else { |
emit_1ub(p, 0xd9); |
emit_modrm_noreg(p, 0, arg); |
} |
} |
void x87_fst( struct x86_function *p, struct x86_reg dst ) |
{ |
if (dst.file == file_x87) |
emit_2ub(p, 0xdd, 0xd0 + dst.idx); |
else { |
emit_1ub(p, 0xd9); |
emit_modrm_noreg(p, 2, dst); |
} |
} |
void x87_fstp( struct x86_function *p, struct x86_reg dst ) |
{ |
if (dst.file == file_x87) |
emit_2ub(p, 0xdd, 0xd8 + dst.idx); |
else { |
emit_1ub(p, 0xd9); |
emit_modrm_noreg(p, 3, dst); |
} |
} |
void x87_fcom( struct x86_function *p, struct x86_reg dst ) |
{ |
if (dst.file == file_x87) |
emit_2ub(p, 0xd8, 0xd0 + dst.idx); |
else { |
emit_1ub(p, 0xd8); |
emit_modrm_noreg(p, 2, dst); |
} |
} |
void x87_fcomp( struct x86_function *p, struct x86_reg dst ) |
{ |
if (dst.file == file_x87) |
emit_2ub(p, 0xd8, 0xd8 + dst.idx); |
else { |
emit_1ub(p, 0xd8); |
emit_modrm_noreg(p, 3, dst); |
} |
} |
void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) |
{ |
assert(dst.file == file_REG32); |
if (dst.idx == reg_AX && |
dst.mod == mod_REG) |
emit_2ub(p, 0xdf, 0xe0); |
else { |
emit_1ub(p, 0xdd); |
emit_modrm_noreg(p, 7, dst); |
} |
} |
/*********************************************************************** |
* MMX instructions |
*/ |
void mmx_emms( struct x86_function *p ) |
{ |
assert(p->need_emms); |
emit_2ub(p, 0x0f, 0x77); |
p->need_emms = 0; |
} |
void mmx_packssdw( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
assert(dst.file == file_MMX && |
(src.file == file_MMX || src.mod != mod_REG)); |
p->need_emms = 1; |
emit_2ub(p, X86_TWOB, 0x6b); |
emit_modrm( p, dst, src ); |
} |
void mmx_packuswb( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
assert(dst.file == file_MMX && |
(src.file == file_MMX || src.mod != mod_REG)); |
p->need_emms = 1; |
emit_2ub(p, X86_TWOB, 0x67); |
emit_modrm( p, dst, src ); |
} |
void mmx_movd( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
p->need_emms = 1; |
emit_1ub(p, X86_TWOB); |
emit_op_modrm( p, 0x6e, 0x7e, dst, src ); |
} |
void mmx_movq( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
p->need_emms = 1; |
emit_1ub(p, X86_TWOB); |
emit_op_modrm( p, 0x6f, 0x7f, dst, src ); |
} |
/*********************************************************************** |
* Helper functions |
*/ |
/* Retreive a reference to one of the function arguments, taking into |
* account any push/pop activity: |
*/ |
struct x86_reg x86_fn_arg( struct x86_function *p, |
unsigned arg ) |
{ |
return x86_make_disp(x86_make_reg(file_REG32, reg_SP), |
p->stack_offset + arg * 4); /* ??? */ |
} |
void x86_init_func( struct x86_function *p ) |
{ |
p->size = 0; |
p->store = NULL; |
p->csr = p->store; |
} |
int x86_init_func_size( struct x86_function *p, unsigned code_size ) |
{ |
p->size = code_size; |
p->store = _mesa_exec_malloc(code_size); |
p->csr = p->store; |
return p->store != NULL; |
} |
void x86_release_func( struct x86_function *p ) |
{ |
_mesa_exec_free(p->store); |
p->store = NULL; |
p->csr = NULL; |
p->size = 0; |
} |
void (*x86_get_func( struct x86_function *p ))(void) |
{ |
if (DISASSEM && p->store) |
printf("disassemble %p %p\n", p->store, p->csr); |
return (void (*)(void)) (unsigned long) p->store; |
} |
#else |
void x86sse_dummy( void ) |
{ |
} |
#endif |
#else /* USE_X86_ASM */ |
int x86sse_c_dummy_var; /* silence warning */ |
#endif /* USE_X86_ASM */ |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/rtasm/x86sse.h |
---|
0,0 → 1,256 |
#ifndef _X86SSE_H_ |
#define _X86SSE_H_ |
#if defined(__i386__) || defined(__386__) |
/* It is up to the caller to ensure that instructions issued are |
* suitable for the host cpu. There are no checks made in this module |
* for mmx/sse/sse2 support on the cpu. |
*/ |
struct x86_reg { |
unsigned file:3; |
unsigned idx:3; |
unsigned mod:2; /* mod_REG if this is just a register */ |
int disp:24; /* only +/- 23bits of offset - should be enough... */ |
}; |
struct x86_function { |
unsigned size; |
unsigned char *store; |
unsigned char *csr; |
unsigned stack_offset; |
int need_emms; |
const char *fn; |
}; |
enum x86_reg_file { |
file_REG32, |
file_MMX, |
file_XMM, |
file_x87 |
}; |
/* Values for mod field of modr/m byte |
*/ |
enum x86_reg_mod { |
mod_INDIRECT, |
mod_DISP8, |
mod_DISP32, |
mod_REG |
}; |
enum x86_reg_name { |
reg_AX, |
reg_CX, |
reg_DX, |
reg_BX, |
reg_SP, |
reg_BP, |
reg_SI, |
reg_DI |
}; |
enum x86_cc { |
cc_O, /* overflow */ |
cc_NO, /* not overflow */ |
cc_NAE, /* not above or equal / carry */ |
cc_AE, /* above or equal / not carry */ |
cc_E, /* equal / zero */ |
cc_NE /* not equal / not zero */ |
}; |
enum sse_cc { |
cc_Equal, |
cc_LessThan, |
cc_LessThanEqual, |
cc_Unordered, |
cc_NotEqual, |
cc_NotLessThan, |
cc_NotLessThanEqual, |
cc_Ordered |
}; |
#define cc_Z cc_E |
#define cc_NZ cc_NE |
/* Begin/end/retreive function creation: |
*/ |
void x86_init_func( struct x86_function *p ); |
int x86_init_func_size( struct x86_function *p, unsigned code_size ); |
void x86_release_func( struct x86_function *p ); |
void (*x86_get_func( struct x86_function *p ))( void ); |
/* Create and manipulate registers and regmem values: |
*/ |
struct x86_reg x86_make_reg( enum x86_reg_file file, |
enum x86_reg_name idx ); |
struct x86_reg x86_make_disp( struct x86_reg reg, |
int disp ); |
struct x86_reg x86_deref( struct x86_reg reg ); |
struct x86_reg x86_get_base_reg( struct x86_reg reg ); |
/* Labels, jumps and fixup: |
*/ |
unsigned char *x86_get_label( struct x86_function *p ); |
void x86_jcc( struct x86_function *p, |
enum x86_cc cc, |
unsigned char *label ); |
unsigned char *x86_jcc_forward( struct x86_function *p, |
enum x86_cc cc ); |
unsigned char *x86_jmp_forward( struct x86_function *p); |
unsigned char *x86_call_forward( struct x86_function *p); |
void x86_fixup_fwd_jump( struct x86_function *p, |
unsigned char *fixup ); |
void x86_jmp( struct x86_function *p, unsigned char *label ); |
/* void x86_call( struct x86_function *p, void (*label)() ); */ |
void x86_call( struct x86_function *p, struct x86_reg reg); |
/* michal: |
* Temporary. As I need immediate operands, and dont want to mess with the codegen, |
* I load the immediate into general purpose register and use it. |
*/ |
void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); |
/* Macro for sse_shufps() and sse2_pshufd(): |
*/ |
#define SHUF(_x,_y,_z,_w) (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6)) |
#define SHUF_NOOP RSW(0,1,2,3) |
#define GET_SHUF(swz, idx) (((swz) >> ((idx)*2)) & 0x3) |
void mmx_emms( struct x86_function *p ); |
void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, |
unsigned char shuf ); |
void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, |
unsigned char cc ); |
void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, |
unsigned char shuf ); |
void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src ); |
void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x86_dec( struct x86_function *p, struct x86_reg reg ); |
void x86_inc( struct x86_function *p, struct x86_reg reg ); |
void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x86_mul( struct x86_function *p, struct x86_reg src ); |
void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x86_pop( struct x86_function *p, struct x86_reg reg ); |
void x86_push( struct x86_function *p, struct x86_reg reg ); |
void x86_ret( struct x86_function *p ); |
void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x86_sahf( struct x86_function *p ); |
void x87_f2xm1( struct x86_function *p ); |
void x87_fabs( struct x86_function *p ); |
void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); |
void x87_faddp( struct x86_function *p, struct x86_reg dst ); |
void x87_fchs( struct x86_function *p ); |
void x87_fclex( struct x86_function *p ); |
void x87_fcom( struct x86_function *p, struct x86_reg dst ); |
void x87_fcomp( struct x86_function *p, struct x86_reg dst ); |
void x87_fcos( struct x86_function *p ); |
void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); |
void x87_fdivp( struct x86_function *p, struct x86_reg dst ); |
void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); |
void x87_fdivrp( struct x86_function *p, struct x86_reg dst ); |
void x87_fild( struct x86_function *p, struct x86_reg arg ); |
void x87_fist( struct x86_function *p, struct x86_reg dst ); |
void x87_fistp( struct x86_function *p, struct x86_reg dst ); |
void x87_fld( struct x86_function *p, struct x86_reg arg ); |
void x87_fld1( struct x86_function *p ); |
void x87_fldcw( struct x86_function *p, struct x86_reg arg ); |
void x87_fldl2e( struct x86_function *p ); |
void x87_fldln2( struct x86_function *p ); |
void x87_fldz( struct x86_function *p ); |
void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); |
void x87_fmulp( struct x86_function *p, struct x86_reg dst ); |
void x87_fnclex( struct x86_function *p ); |
void x87_fprndint( struct x86_function *p ); |
void x87_fscale( struct x86_function *p ); |
void x87_fsin( struct x86_function *p ); |
void x87_fsincos( struct x86_function *p ); |
void x87_fsqrt( struct x86_function *p ); |
void x87_fst( struct x86_function *p, struct x86_reg dst ); |
void x87_fstp( struct x86_function *p, struct x86_reg dst ); |
void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); |
void x87_fsubp( struct x86_function *p, struct x86_reg dst ); |
void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); |
void x87_fsubrp( struct x86_function *p, struct x86_reg dst ); |
void x87_fxch( struct x86_function *p, struct x86_reg dst ); |
void x87_fxtract( struct x86_function *p ); |
void x87_fyl2x( struct x86_function *p ); |
void x87_fyl2xp1( struct x86_function *p ); |
void x87_fwait( struct x86_function *p ); |
void x87_fnstsw( struct x86_function *p, struct x86_reg dst ); |
void x87_fucompp( struct x86_function *p ); |
void x87_fucomp( struct x86_function *p, struct x86_reg arg ); |
void x87_fucom( struct x86_function *p, struct x86_reg arg ); |
/* Retreive a reference to one of the function arguments, taking into |
* account any push/pop activity. Note - doesn't track explict |
* manipulation of ESP by other instructions. |
*/ |
struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg ); |
#endif |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/sse.c |
---|
0,0 → 1,123 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2004 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/* |
* PentiumIII-SIMD (SSE) optimizations contributed by |
* Andre Werthmann <wertmann@cs.uni-potsdam.de> |
*/ |
#include "main/glheader.h" |
#include "main/context.h" |
#include "math/m_xform.h" |
#include "tnl/t_context.h" |
#include "sse.h" |
#include "x86_xform.h" |
#ifdef DEBUG_MATH |
#include "math/m_debug.h" |
#endif |
#ifdef USE_SSE_ASM |
DECLARE_XFORM_GROUP( sse, 2 ) |
DECLARE_XFORM_GROUP( sse, 3 ) |
#if 1 |
/* Some functions are not written in SSE-assembly, because the fpu ones are faster */ |
extern void _ASMAPI _mesa_sse_transform_normals_no_rot( NORM_ARGS ); |
extern void _ASMAPI _mesa_sse_transform_rescale_normals( NORM_ARGS ); |
extern void _ASMAPI _mesa_sse_transform_rescale_normals_no_rot( NORM_ARGS ); |
extern void _ASMAPI _mesa_sse_transform_points4_general( XFORM_ARGS ); |
extern void _ASMAPI _mesa_sse_transform_points4_3d( XFORM_ARGS ); |
/* XXX this function segfaults, see below */ |
extern void _ASMAPI _mesa_sse_transform_points4_identity( XFORM_ARGS ); |
/* XXX this one works, see below */ |
extern void _ASMAPI _mesa_x86_transform_points4_identity( XFORM_ARGS ); |
#else |
DECLARE_NORM_GROUP( sse ) |
#endif |
extern void _ASMAPI |
_mesa_v16_sse_general_xform( GLfloat *first_vert, |
const GLfloat *m, |
const GLfloat *src, |
GLuint src_stride, |
GLuint count ); |
extern void _ASMAPI |
_mesa_sse_project_vertices( GLfloat *first, |
GLfloat *last, |
const GLfloat *m, |
GLuint stride ); |
extern void _ASMAPI |
_mesa_sse_project_clipped_vertices( GLfloat *first, |
GLfloat *last, |
const GLfloat *m, |
GLuint stride, |
const GLubyte *clipmask ); |
#endif |
void _mesa_init_sse_transform_asm( void ) |
{ |
#ifdef USE_SSE_ASM |
ASSIGN_XFORM_GROUP( sse, 2 ); |
ASSIGN_XFORM_GROUP( sse, 3 ); |
#if 1 |
/* TODO: Finish these off. |
*/ |
_mesa_transform_tab[4][MATRIX_GENERAL] = |
_mesa_sse_transform_points4_general; |
_mesa_transform_tab[4][MATRIX_3D] = |
_mesa_sse_transform_points4_3d; |
/* XXX NOTE: _mesa_sse_transform_points4_identity segfaults with the |
conformance tests, so use the x86 version. |
*/ |
_mesa_transform_tab[4][MATRIX_IDENTITY] = |
_mesa_x86_transform_points4_identity;/*_mesa_sse_transform_points4_identity;*/ |
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT] = |
_mesa_sse_transform_normals_no_rot; |
_mesa_normal_tab[NORM_TRANSFORM | NORM_RESCALE] = |
_mesa_sse_transform_rescale_normals; |
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE] = |
_mesa_sse_transform_rescale_normals_no_rot; |
#else |
ASSIGN_XFORM_GROUP( sse, 4 ); |
ASSIGN_NORM_GROUP( sse ); |
#endif |
#ifdef DEBUG_MATH |
_math_test_all_transform_functions( "SSE" ); |
_math_test_all_normal_transform_functions( "SSE" ); |
#endif |
#endif |
} |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/sse.h |
---|
0,0 → 1,36 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/* |
* PentiumIII-SIMD (SSE) optimizations contributed by |
* Andre Werthmann <wertmann@cs.uni-potsdam.de> |
*/ |
#ifndef __SSE_H__ |
#define __SSE_H__ |
void _mesa_init_sse_transform_asm( void ); |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/sse_normal.S |
---|
0,0 → 1,261 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/** TODO: |
* - insert PREFETCH instructions to avoid cache-misses ! |
* - some more optimizations are possible... |
* - for 40-50% more performance in the SSE-functions, the |
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! |
*/ |
#ifdef USE_SSE_ASM |
#include "assyntax.h" |
#include "matypes.h" |
#include "norm_args.h" |
SEG_TEXT |
#define M(i) REGOFF(i * 4, EDX) |
#define S(i) REGOFF(i * 4, ESI) |
#define D(i) REGOFF(i * 4, EDI) |
#define STRIDE REGOFF(12, ESI) |
ALIGNTEXT16 |
GLOBL GLNAME(_mesa_sse_transform_rescale_normals_no_rot) |
HIDDEN(_mesa_sse_transform_rescale_normals_no_rot) |
GLNAME(_mesa_sse_transform_rescale_normals_no_rot): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L ( ARG_IN, ESI ) /* ptr to source GLvector3f */ |
MOV_L ( ARG_DEST, EDI ) /* ptr to dest GLvector3f */ |
MOV_L ( ARG_MAT, EDX ) /* ptr to matrix */ |
MOV_L ( REGOFF(MATRIX_INV, EDX), EDX) /* matrix->inv */ |
MOV_L ( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L ( ECX, ECX ) |
JZ( LLBL(K_G3TRNNRR_finish) ) /* count was zero; go to finish */ |
MOV_L ( STRIDE, EAX ) /* stride */ |
MOV_L ( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest-count */ |
IMUL_L( CONST(16), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVSS ( M(0), XMM1 ) /* m0 */ |
MOVSS ( M(5), XMM2 ) /* m5 */ |
UNPCKLPS( XMM2, XMM1 ) /* m5 | m0 */ |
MOVSS ( ARG_SCALE, XMM0 ) /* scale */ |
SHUFPS ( CONST(0x0), XMM0, XMM0 ) /* scale | scale */ |
MULPS ( XMM0, XMM1 ) /* m5*scale | m0*scale */ |
MULSS ( M(10), XMM0 ) /* m10*scale */ |
ALIGNTEXT32 |
LLBL(K_G3TRNNRR_top): |
MOVLPS ( S(0), XMM2 ) /* uy | ux */ |
MULPS ( XMM1, XMM2 ) /* uy*m5*scale | ux*m0*scale */ |
MOVLPS ( XMM2, D(0) ) /* ->D(1) | D(0) */ |
MOVSS ( S(2), XMM2 ) /* uz */ |
MULSS ( XMM0, XMM2 ) /* uz*m10*scale */ |
MOVSS ( XMM2, D(2) ) /* ->D(2) */ |
LLBL(K_G3TRNNRR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_G3TRNNRR_top) ) |
LLBL(K_G3TRNNRR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT16 |
GLOBL GLNAME(_mesa_sse_transform_rescale_normals) |
HIDDEN(_mesa_sse_transform_rescale_normals) |
GLNAME(_mesa_sse_transform_rescale_normals): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L ( ARG_IN, ESI ) /* ptr to source GLvector3f */ |
MOV_L ( ARG_DEST, EDI ) /* ptr to dest GLvector3f */ |
MOV_L ( ARG_MAT, EDX ) /* ptr to matrix */ |
MOV_L ( REGOFF(MATRIX_INV, EDX), EDX) /* matrix->inv */ |
MOV_L ( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L ( ECX, ECX ) |
JZ( LLBL(K_G3TRNR_finish) ) /* count was zero; go to finish */ |
MOV_L ( STRIDE, EAX ) /* stride */ |
MOV_L ( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest-count */ |
IMUL_L( CONST(16), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVSS ( M(0), XMM0 ) /* m0 */ |
MOVSS ( M(4), XMM1 ) /* m4 */ |
UNPCKLPS( XMM1, XMM0 ) /* m4 | m0 */ |
MOVSS ( ARG_SCALE, XMM4 ) /* scale */ |
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* scale | scale */ |
MULPS ( XMM4, XMM0 ) /* m4*scale | m0*scale */ |
MOVSS ( M(1), XMM1 ) /* m1 */ |
MOVSS ( M(5), XMM2 ) /* m5 */ |
UNPCKLPS( XMM2, XMM1 ) /* m5 | m1 */ |
MULPS ( XMM4, XMM1 ) /* m5*scale | m1*scale */ |
MOVSS ( M(2), XMM2 ) /* m2 */ |
MOVSS ( M(6), XMM3 ) /* m6 */ |
UNPCKLPS( XMM3, XMM2 ) /* m6 | m2 */ |
MULPS ( XMM4, XMM2 ) /* m6*scale | m2*scale */ |
MOVSS ( M(8), XMM6 ) /* m8 */ |
MULSS ( ARG_SCALE, XMM6 ) /* m8*scale */ |
MOVSS ( M(9), XMM7 ) /* m9 */ |
MULSS ( ARG_SCALE, XMM7 ) /* m9*scale */ |
ALIGNTEXT32 |
LLBL(K_G3TRNR_top): |
MOVSS ( S(0), XMM3 ) /* ux */ |
SHUFPS ( CONST(0x0), XMM3, XMM3 ) /* ux | ux */ |
MULPS ( XMM0, XMM3 ) /* ux*m4 | ux*m0 */ |
MOVSS ( S(1), XMM4 ) /* uy */ |
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* uy | uy */ |
MULPS ( XMM1, XMM4 ) /* uy*m5 | uy*m1 */ |
MOVSS ( S(2), XMM5 ) /* uz */ |
SHUFPS ( CONST(0x0), XMM5, XMM5 ) /* uz | uz */ |
MULPS ( XMM2, XMM5 ) /* uz*m6 | uz*m2 */ |
ADDPS ( XMM4, XMM3 ) |
ADDPS ( XMM5, XMM3 ) |
MOVLPS ( XMM3, D(0) ) |
MOVSS ( M(10), XMM3 ) /* m10 */ |
MULSS ( ARG_SCALE, XMM3 ) /* m10*scale */ |
MULSS ( S(2), XMM3 ) /* m10*scale*uz */ |
MOVSS ( S(1), XMM4 ) /* uy */ |
MULSS ( XMM7, XMM4 ) /* uy*m9*scale */ |
MOVSS ( S(0), XMM5 ) /* ux */ |
MULSS ( XMM6, XMM5 ) /* ux*m8*scale */ |
ADDSS ( XMM4, XMM3 ) |
ADDSS ( XMM5, XMM3 ) |
MOVSS ( XMM3, D(2) ) |
LLBL(K_G3TRNR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_G3TRNR_top) ) |
LLBL(K_G3TRNR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT16 |
GLOBL GLNAME(_mesa_sse_transform_normals_no_rot) |
HIDDEN(_mesa_sse_transform_normals_no_rot) |
GLNAME(_mesa_sse_transform_normals_no_rot): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L ( ARG_IN, ESI ) /* ptr to source GLvector3f */ |
MOV_L ( ARG_DEST, EDI ) /* ptr to dest GLvector3f */ |
MOV_L ( ARG_MAT, EDX ) /* ptr to matrix */ |
MOV_L ( REGOFF(MATRIX_INV, EDX), EDX) /* matrix->inv */ |
MOV_L ( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L ( ECX, ECX ) |
JZ( LLBL(K_G3TNNRR_finish) ) /* count was zero; go to finish */ |
MOV_L ( STRIDE, EAX ) /* stride */ |
MOV_L ( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest-count */ |
IMUL_L( CONST(16), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVSS( M(0), XMM0 ) /* m0 */ |
MOVSS( M(5), XMM1 ) /* m5 */ |
UNPCKLPS( XMM1, XMM0 ) /* m5 | m0 */ |
MOVSS( M(10), XMM1 ) /* m10 */ |
ALIGNTEXT32 |
LLBL(K_G3TNNRR_top): |
MOVLPS( S(0), XMM2 ) /* uy | ux */ |
MULPS( XMM0, XMM2 ) /* uy*m5 | ux*m0 */ |
MOVLPS( XMM2, D(0) ) |
MOVSS( S(2), XMM2 ) /* uz */ |
MULSS( XMM1, XMM2 ) /* uz*m10 */ |
MOVSS( XMM2, D(2) ) |
LLBL(K_G3TNNRR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_G3TNNRR_top) ) |
LLBL(K_G3TNNRR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
#endif |
#if defined (__ELF__) && defined (__linux__) |
.section .note.GNU-stack,"",%progbits |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/sse_xform1.S |
---|
0,0 → 1,446 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/** TODO: |
* - insert PREFETCH instructions to avoid cache-misses ! |
* - some more optimizations are possible... |
* - for 40-50% more performance in the SSE-functions, the |
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! |
*/ |
#ifdef USE_SSE_ASM |
#include "assyntax.h" |
#include "matypes.h" |
#include "xform_args.h" |
SEG_TEXT |
#define S(i) REGOFF(i * 4, ESI) |
#define D(i) REGOFF(i * 4, EDI) |
#define M(i) REGOFF(i * 4, EDX) |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points1_general) |
HIDDEN( _mesa_sse_transform_points1_general ) |
GLNAME( _mesa_sse_transform_points1_general ): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
CMP_L( CONST(0), ECX ) /* count == 0 ? */ |
JE( LLBL(K_GTP1GR_finish) ) /* yes -> nothing to do. */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ |
MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */ |
ALIGNTEXT32 |
LLBL(K_GTP1GR_top): |
MOVSS( S(0), XMM2 ) /* ox */ |
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */ |
MULPS( XMM0, XMM2 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ |
ADDPS( XMM1, XMM2 ) /* + | + | + | + */ |
MOVUPS( XMM2, D(0) ) |
LLBL(K_GTP1GR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTP1GR_top) ) |
LLBL(K_GTP1GR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points1_identity) |
HIDDEN(_mesa_sse_transform_points1_identity) |
GLNAME( _mesa_sse_transform_points1_identity ): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP1IR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(1), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
CMP_L( ESI, EDI ) |
JE( LLBL(K_GTP1IR_finish) ) |
ALIGNTEXT32 |
LLBL(K_GTP1IR_top): |
MOV_L( S(0), EDX ) |
MOV_L( EDX, D(0) ) |
LLBL(K_GTP1IR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTP1IR_top) ) |
LLBL(K_GTP1IR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points1_3d_no_rot) |
HIDDEN(_mesa_sse_transform_points1_3d_no_rot) |
GLNAME(_mesa_sse_transform_points1_3d_no_rot): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP13DNRR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVSS( M(0), XMM0 ) /* m0 */ |
MOVSS( M(12), XMM1 ) /* m12 */ |
MOVSS( M(13), XMM2 ) /* m13 */ |
MOVSS( M(14), XMM3 ) /* m14 */ |
ALIGNTEXT32 |
LLBL(K_GTP13DNRR_top): |
MOVSS( S(0), XMM4 ) /* ox */ |
MULSS( XMM0, XMM4 ) /* ox*m0 */ |
ADDSS( XMM1, XMM4 ) /* ox*m0+m12 */ |
MOVSS( XMM4, D(0) ) |
MOVSS( XMM2, D(1) ) |
MOVSS( XMM3, D(2) ) |
LLBL(K_GTP13DNRR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTP13DNRR_top) ) |
LLBL(K_GTP13DNRR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points1_perspective) |
HIDDEN(_mesa_sse_transform_points1_perspective) |
GLNAME(_mesa_sse_transform_points1_perspective): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP13PR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
XORPS( XMM0, XMM0 ) /* 0 | 0 | 0 | 0 */ |
MOVSS( M(0), XMM1 ) /* m0 */ |
MOVSS( M(14), XMM2 ) /* m14 */ |
ALIGNTEXT32 |
LLBL(K_GTP13PR_top): |
MOVSS( S(0), XMM3 ) /* ox */ |
MULSS( XMM1, XMM3 ) /* ox*m0 */ |
MOVSS( XMM3, D(0) ) /* ox*m0->D(0) */ |
MOVSS( XMM2, D(2) ) /* m14->D(2) */ |
MOVSS( XMM0, D(1) ) |
MOVSS( XMM0, D(3) ) |
LLBL(K_GTP13PR_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(K_GTP13PR_top) ) |
LLBL(K_GTP13PR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points1_2d) |
HIDDEN(_mesa_sse_transform_points1_2d) |
GLNAME(_mesa_sse_transform_points1_2d): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP13P2DR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVLPS( M(0), XMM0 ) /* m1 | m0 */ |
MOVLPS( M(12), XMM1 ) /* m13 | m12 */ |
ALIGNTEXT32 |
LLBL(K_GTP13P2DR_top): |
MOVSS( S(0), XMM2 ) /* ox */ |
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */ |
MULPS( XMM0, XMM2 ) /* - | - | ox*m1 | ox*m0 */ |
ADDPS( XMM1, XMM2 ) /* - | - | ox*m1+m13 | ox*m0+m12 */ |
MOVLPS( XMM2, D(0) ) |
LLBL(K_GTP13P2DR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTP13P2DR_top) ) |
LLBL(K_GTP13P2DR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points1_2d_no_rot) |
HIDDEN(_mesa_sse_transform_points1_2d_no_rot) |
GLNAME(_mesa_sse_transform_points1_2d_no_rot): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP13P2DNRR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVSS( M(0), XMM0 ) /* m0 */ |
MOVSS( M(12), XMM1 ) /* m12 */ |
MOVSS( M(13), XMM2 ) /* m13 */ |
ALIGNTEXT32 |
LLBL(K_GTP13P2DNRR_top): |
MOVSS( S(0), XMM3 ) /* ox */ |
MULSS( XMM0, XMM3 ) /* ox*m0 */ |
ADDSS( XMM1, XMM3 ) /* ox*m0+m12 */ |
MOVSS( XMM3, D(0) ) |
MOVSS( XMM2, D(1) ) |
LLBL(K_GTP13P2DNRR_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(K_GTP13P2DNRR_top) ) |
LLBL(K_GTP13P2DNRR_finish): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points1_3d) |
HIDDEN(_mesa_sse_transform_points1_3d) |
GLNAME(_mesa_sse_transform_points1_3d): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP13P3DR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ |
MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */ |
ALIGNTEXT32 |
LLBL(K_GTP13P3DR_top): |
MOVSS( S(0), XMM2 ) /* ox */ |
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */ |
MULPS( XMM0, XMM2 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ |
ADDPS( XMM1, XMM2 ) /* +m15 | +m14 | +m13 | +m12 */ |
MOVLPS( XMM2, D(0) ) /* - | - | ->D(1)| ->D(0)*/ |
UNPCKHPS( XMM2, XMM2 ) /* ox*m3+m15 | ox*m3+m15 | ox*m2+m14 | ox*m2+m14 */ |
MOVSS( XMM2, D(2) ) |
LLBL(K_GTP13P3DR_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(K_GTP13P3DR_top) ) |
LLBL(K_GTP13P3DR_finish): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
#endif |
#if defined (__ELF__) && defined (__linux__) |
.section .note.GNU-stack,"",%progbits |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/sse_xform2.S |
---|
0,0 → 1,466 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/** TODO: |
* - insert PREFETCH instructions to avoid cache-misses ! |
* - some more optimizations are possible... |
* - for 40-50% more performance in the SSE-functions, the |
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! |
*/ |
#ifdef USE_SSE_ASM |
#include "assyntax.h" |
#include "matypes.h" |
#include "xform_args.h" |
SEG_TEXT |
#define S(i) REGOFF(i * 4, ESI) |
#define D(i) REGOFF(i * 4, EDI) |
#define M(i) REGOFF(i * 4, EDX) |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points2_general) |
HIDDEN (_mesa_sse_transform_points2_general) |
GLNAME( _mesa_sse_transform_points2_general ): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX ) |
JZ( LLBL(K_GTP2GR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ |
MOVAPS( M(4), XMM1 ) /* m7 | m6 | m5 | m4 */ |
MOVAPS( M(12), XMM2 ) /* m15 | m14 | m13 | m12 */ |
ALIGNTEXT32 |
LLBL(K_GTP2GR_top): |
MOVSS( S(0), XMM3 ) /* ox */ |
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox | ox | ox */ |
MULPS( XMM0, XMM3 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ |
MOVSS( S(1), XMM4 ) /* oy */ |
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy | oy | oy */ |
MULPS( XMM1, XMM4 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */ |
ADDPS( XMM4, XMM3 ) |
ADDPS( XMM2, XMM3 ) |
MOVAPS( XMM3, D(0) ) |
LLBL(K_GTP2GR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTP2GR_top) ) |
LLBL(K_GTP2GR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points2_identity) |
HIDDEN(_mesa_sse_transform_points2_identity) |
GLNAME( _mesa_sse_transform_points2_identity ): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP2IR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
CMP_L( ESI, EDI ) |
JE( LLBL(K_GTP2IR_finish) ) |
ALIGNTEXT32 |
LLBL(K_GTP2IR_top): |
MOV_L ( S(0), EDX ) |
MOV_L ( EDX, D(0) ) |
MOV_L ( S(1), EDX ) |
MOV_L ( EDX, D(1) ) |
LLBL(K_GTP2IR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTP2IR_top) ) |
LLBL(K_GTP2IR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points2_3d_no_rot) |
HIDDEN(_mesa_sse_transform_points2_3d_no_rot) |
GLNAME(_mesa_sse_transform_points2_3d_no_rot): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP23DNRR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
XORPS( XMM0, XMM0 ) /* clean the working register */ |
ALIGNTEXT32 |
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */ |
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */ |
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */ |
MOVLPS ( M(12), XMM2 ) /* - | - | m13 | m12 */ |
MOVSS ( M(14), XMM3 ) /* - | - | - | m14 */ |
ALIGNTEXT32 |
LLBL(K_GTP23DNRR_top): |
MOVLPS ( S(0), XMM0 ) /* - | - | oy | ox */ |
MULPS ( XMM1, XMM0 ) /* - | - | oy*m5 | ox*m0 */ |
ADDPS ( XMM2, XMM0 ) /* - | - | +m13 | +m12 */ |
MOVLPS ( XMM0, D(0) ) /* -> D(1) | -> D(0) */ |
MOVSS ( XMM3, D(2) ) /* -> D(2) */ |
LLBL(K_GTP23DNRR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTP23DNRR_top) ) |
LLBL(K_GTP23DNRR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points2_perspective) |
HIDDEN(_mesa_sse_transform_points2_perspective) |
GLNAME(_mesa_sse_transform_points2_perspective): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP23PR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */ |
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */ |
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */ |
MOVSS ( M(14), XMM3 ) /* m14 */ |
XORPS ( XMM0, XMM0 ) /* 0 | 0 | 0 | 0 */ |
ALIGNTEXT32 |
LLBL(K_GTP23PR_top): |
MOVLPS( S(0), XMM4 ) /* oy | ox */ |
MULPS( XMM1, XMM4 ) /* oy*m5 | ox*m0 */ |
MOVLPS( XMM4, D(0) ) /* ->D(1) | ->D(0) */ |
MOVSS( XMM3, D(2) ) /* ->D(2) */ |
MOVSS( XMM0, D(3) ) /* ->D(3) */ |
LLBL(K_GTP23PR_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(K_GTP23PR_top) ) |
LLBL(K_GTP23PR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points2_2d) |
HIDDEN(_mesa_sse_transform_points2_2d) |
GLNAME(_mesa_sse_transform_points2_2d): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP23P2DR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVLPS( M(0), XMM0 ) /* m1 | m0 */ |
MOVLPS( M(4), XMM1 ) /* m5 | m4 */ |
MOVLPS( M(12), XMM2 ) /* m13 | m12 */ |
ALIGNTEXT32 |
LLBL(K_GTP23P2DR_top): |
MOVSS( S(0), XMM3 ) /* ox */ |
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox */ |
MULPS( XMM0, XMM3 ) /* ox*m1 | ox*m0 */ |
MOVSS( S(1), XMM4 ) /* oy */ |
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy */ |
MULPS( XMM1, XMM4 ) /* oy*m5 | oy*m4 */ |
ADDPS( XMM4, XMM3 ) |
ADDPS( XMM2, XMM3 ) |
MOVLPS( XMM3, D(0) ) /* ->D(1) | ->D(0) */ |
LLBL(K_GTP23P2DR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTP23P2DR_top) ) |
LLBL(K_GTP23P2DR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points2_2d_no_rot) |
HIDDEN(_mesa_sse_transform_points2_2d_no_rot) |
GLNAME(_mesa_sse_transform_points2_2d_no_rot): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP23P2DNRR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVSS ( M(0), XMM1 ) /* m0 */ |
MOVSS ( M(5), XMM2 ) /* m5 */ |
UNPCKLPS ( XMM2, XMM1 ) /* m5 | m0 */ |
MOVLPS ( M(12), XMM2 ) /* m13 | m12 */ |
ALIGNTEXT32 |
LLBL(K_GTP23P2DNRR_top): |
MOVLPS( S(0), XMM0 ) /* oy | ox */ |
MULPS( XMM1, XMM0 ) /* oy*m5 | ox*m0 */ |
ADDPS( XMM2, XMM0 ) /* +m13 | +m12 */ |
MOVLPS( XMM0, D(0) ) /* ->D(1) | ->D(0) */ |
LLBL(K_GTP23P2DNRR_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(K_GTP23P2DNRR_top) ) |
LLBL(K_GTP23P2DNRR_finish): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points2_3d) |
HIDDEN(_mesa_sse_transform_points2_3d) |
GLNAME(_mesa_sse_transform_points2_3d): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP23P3DR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVAPS( M(0), XMM0 ) /* m2 | m1 | m0 */ |
MOVAPS( M(4), XMM1 ) /* m6 | m5 | m4 */ |
MOVAPS( M(12), XMM2 ) /* m14 | m13 | m12 */ |
ALIGNTEXT32 |
LLBL(K_GTP23P3DR_top): |
MOVSS( S(0), XMM3 ) /* ox */ |
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox | ox */ |
MULPS( XMM0, XMM3 ) /* ox*m2 | ox*m1 | ox*m0 */ |
MOVSS( S(1), XMM4 ) /* oy */ |
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy | oy */ |
MULPS( XMM1, XMM4 ) /* oy*m6 | oy*m5 | oy*m4 */ |
ADDPS( XMM4, XMM3 ) |
ADDPS( XMM2, XMM3 ) |
MOVLPS( XMM3, D(0) ) /* ->D(1) | ->D(0) */ |
UNPCKHPS( XMM3, XMM3 ) |
MOVSS( XMM3, D(2) ) /* ->D(2) */ |
LLBL(K_GTP23P3DR_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(K_GTP23P3DR_top) ) |
LLBL(K_GTP23P3DR_finish): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
#endif |
#if defined (__ELF__) && defined (__linux__) |
.section .note.GNU-stack,"",%progbits |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/sse_xform3.S |
---|
0,0 → 1,512 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/** TODO: |
* - insert PREFETCH instructions to avoid cache-misses ! |
* - some more optimizations are possible... |
* - for 40-50% more performance in the SSE-functions, the |
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! |
*/ |
#ifdef USE_SSE_ASM |
#include "assyntax.h" |
#include "matypes.h" |
#include "xform_args.h" |
SEG_TEXT |
#define S(i) REGOFF(i * 4, ESI) |
#define D(i) REGOFF(i * 4, EDI) |
#define M(i) REGOFF(i * 4, EDX) |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points3_general) |
HIDDEN(_mesa_sse_transform_points3_general) |
GLNAME( _mesa_sse_transform_points3_general ): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
CMP_L ( CONST(0), ECX ) /* count == 0 ? */ |
JE ( LLBL(K_GTPGR_finish) ) /* yes -> nothing to do. */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVAPS ( REGOFF(0, EDX), XMM0 ) /* m0 | m1 | m2 | m3 */ |
MOVAPS ( REGOFF(16, EDX), XMM1 ) /* m4 | m5 | m6 | m7 */ |
MOVAPS ( REGOFF(32, EDX), XMM2 ) /* m8 | m9 | m10 | m11 */ |
MOVAPS ( REGOFF(48, EDX), XMM3 ) /* m12 | m13 | m14 | m15 */ |
ALIGNTEXT32 |
LLBL(K_GTPGR_top): |
MOVSS ( REGOFF(0, ESI), XMM4 ) /* | | | ox */ |
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox | ox */ |
MOVSS ( REGOFF(4, ESI), XMM5 ) /* | | | oy */ |
SHUFPS ( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy | oy */ |
MOVSS ( REGOFF(8, ESI), XMM6 ) /* | | | oz */ |
SHUFPS ( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz | oz */ |
MULPS ( XMM0, XMM4 ) /* m3*ox | m2*ox | m1*ox | m0*ox */ |
MULPS ( XMM1, XMM5 ) /* m7*oy | m6*oy | m5*oy | m4*oy */ |
MULPS ( XMM2, XMM6 ) /* m11*oz | m10*oz | m9*oz | m8*oz */ |
ADDPS ( XMM5, XMM4 ) |
ADDPS ( XMM6, XMM4 ) |
ADDPS ( XMM3, XMM4 ) |
MOVAPS ( XMM4, REGOFF(0, EDI) ) |
LLBL(K_GTPGR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTPGR_top) ) |
LLBL(K_GTPGR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points3_identity) |
HIDDEN(_mesa_sse_transform_points3_identity) |
GLNAME( _mesa_sse_transform_points3_identity ): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTPIR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
CMP_L( ESI, EDI ) |
JE( LLBL(K_GTPIR_finish) ) |
ALIGNTEXT32 |
LLBL(K_GTPIR_top): |
MOVLPS ( S(0), XMM0 ) |
MOVLPS ( XMM0, D(0) ) |
MOVSS ( S(2), XMM0 ) |
MOVSS ( XMM0, D(2) ) |
LLBL(K_GTPIR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTPIR_top) ) |
LLBL(K_GTPIR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points3_3d_no_rot) |
HIDDEN(_mesa_sse_transform_points3_3d_no_rot) |
GLNAME(_mesa_sse_transform_points3_3d_no_rot): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP3DNRR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
XORPS( XMM0, XMM0 ) /* clean the working register */ |
ALIGNTEXT32 |
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */ |
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */ |
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */ |
MOVLPS ( M(12), XMM2 ) /* - | - | m13 | m12 */ |
MOVSS ( M(10), XMM3 ) /* - | - | - | m10 */ |
MOVSS ( M(14), XMM4 ) /* - | - | - | m14 */ |
ALIGNTEXT32 |
LLBL(K_GTP3DNRR_top): |
MOVLPS ( S(0), XMM0 ) /* - | - | s1 | s0 */ |
MULPS ( XMM1, XMM0 ) /* - | - | s1*m5 | s0*m0 */ |
ADDPS ( XMM2, XMM0 ) /* - | - | +m13 | +m12 */ |
MOVLPS ( XMM0, D(0) ) /* -> D(1) | -> D(0) */ |
MOVSS ( S(2), XMM0 ) /* sz */ |
MULSS ( XMM3, XMM0 ) /* sz*m10 */ |
ADDSS ( XMM4, XMM0 ) /* +m14 */ |
MOVSS ( XMM0, D(2) ) /* -> D(2) */ |
LLBL(K_GTP3DNRR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTP3DNRR_top) ) |
LLBL(K_GTP3DNRR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points3_perspective) |
HIDDEN(_mesa_sse_transform_points3_perspective) |
GLNAME(_mesa_sse_transform_points3_perspective): |
#define FRAME_OFFSET 8 |
PUSH_L ( ESI ) |
PUSH_L ( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP3PR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */ |
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */ |
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */ |
MOVLPS ( M(8), XMM2 ) /* - | - | m9 | m8 */ |
MOVSS ( M(10), XMM3 ) /* m10 */ |
MOVSS ( M(14), XMM4 ) /* m14 */ |
XORPS ( XMM6, XMM6 ) /* 0 */ |
ALIGNTEXT32 |
LLBL(K_GTP3PR_top): |
MOVLPS ( S(0), XMM0 ) /* oy | ox */ |
MULPS ( XMM1, XMM0 ) /* oy*m5 | ox*m0 */ |
MOVSS ( S(2), XMM5 ) /* oz */ |
SHUFPS ( CONST(0x0), XMM5, XMM5 ) /* oz | oz */ |
MULPS ( XMM2, XMM5 ) /* oz*m9 | oz*m8 */ |
ADDPS ( XMM5, XMM0 ) /* +oy*m5 | +ox*m0 */ |
MOVLPS ( XMM0, D(0) ) /* ->D(1) | ->D(0) */ |
MOVSS ( S(2), XMM0 ) /* oz */ |
MULSS ( XMM3, XMM0 ) /* oz*m10 */ |
ADDSS ( XMM4, XMM0 ) /* +m14 */ |
MOVSS ( XMM0, D(2) ) /* ->D(2) */ |
MOVSS ( S(2), XMM0 ) /* oz */ |
MOVSS ( XMM6, XMM5 ) /* 0 */ |
SUBPS ( XMM0, XMM5 ) /* -oz */ |
MOVSS ( XMM5, D(3) ) /* ->D(3) */ |
LLBL(K_GTP3PR_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(K_GTP3PR_top) ) |
LLBL(K_GTP3PR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points3_2d) |
HIDDEN(_mesa_sse_transform_points3_2d) |
GLNAME(_mesa_sse_transform_points3_2d): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP3P2DR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVLPS( M(0), XMM0 ) /* m1 | m0 */ |
MOVLPS( M(4), XMM1 ) /* m5 | m4 */ |
MOVLPS( M(12), XMM2 ) /* m13 | m12 */ |
ALIGNTEXT32 |
LLBL(K_GTP3P2DR_top): |
MOVSS ( S(0), XMM3 ) /* ox */ |
SHUFPS ( CONST(0x0), XMM3, XMM3 ) /* ox | ox */ |
MULPS ( XMM0, XMM3 ) /* ox*m1 | ox*m0 */ |
MOVSS ( S(1), XMM4 ) /* oy */ |
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* oy | oy */ |
MULPS ( XMM1, XMM4 ) /* oy*m5 | oy*m4 */ |
ADDPS ( XMM4, XMM3 ) |
ADDPS ( XMM2, XMM3 ) |
MOVLPS ( XMM3, D(0) ) |
MOVSS ( S(2), XMM3 ) |
MOVSS ( XMM3, D(2) ) |
LLBL(K_GTP3P2DR_skip): |
ADD_L ( CONST(16), EDI ) |
ADD_L ( EAX, ESI ) |
CMP_L ( ECX, EDI ) |
JNE ( LLBL(K_GTP3P2DR_top) ) |
LLBL(K_GTP3P2DR_finish): |
POP_L ( EDI ) |
POP_L ( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points3_2d_no_rot) |
HIDDEN(_mesa_sse_transform_points3_2d_no_rot) |
GLNAME(_mesa_sse_transform_points3_2d_no_rot): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP3P2DNRR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVSS ( M(0), XMM1 ) /* m0 */ |
MOVSS ( M(5), XMM2 ) /* m5 */ |
UNPCKLPS ( XMM2, XMM1 ) /* m5 | m0 */ |
MOVLPS ( M(12), XMM2 ) /* m13 | m12 */ |
ALIGNTEXT32 |
LLBL(K_GTP3P2DNRR_top): |
MOVLPS( S(0), XMM0 ) /* oy | ox */ |
MULPS( XMM1, XMM0 ) /* oy*m5 | ox*m0 */ |
ADDPS( XMM2, XMM0 ) /* +m13 | +m12 */ |
MOVLPS( XMM0, D(0) ) /* ->D(1) | ->D(0) */ |
MOVSS( S(2), XMM0 ) |
MOVSS( XMM0, D(2) ) |
LLBL(K_GTP3P2DNRR_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(K_GTP3P2DNRR_top) ) |
LLBL(K_GTP3P2DNRR_finish): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME(_mesa_sse_transform_points3_3d) |
HIDDEN(_mesa_sse_transform_points3_3d) |
GLNAME(_mesa_sse_transform_points3_3d): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ |
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP3P3DR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
ALIGNTEXT32 |
MOVAPS( M(0), XMM0 ) /* m2 | m1 | m0 */ |
MOVAPS( M(4), XMM1 ) /* m6 | m5 | m4 */ |
MOVAPS( M(8), XMM2 ) /* m10 | m9 | m8 */ |
MOVAPS( M(12), XMM3 ) /* m14 | m13 | m12 */ |
ALIGNTEXT32 |
LLBL(K_GTP3P3DR_top): |
MOVSS( S(0), XMM4 ) |
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox */ |
MULPS( XMM0, XMM4 ) /* ox*m2 | ox*m1 | ox*m0 */ |
MOVSS( S(1), XMM5 ) |
SHUFPS( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy */ |
MULPS( XMM1, XMM5 ) /* oy*m6 | oy*m5 | oy*m4 */ |
MOVSS( S(2), XMM6 ) |
SHUFPS( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz */ |
MULPS( XMM2, XMM6 ) /* oz*m10 | oz*m9 | oz*m8 */ |
ADDPS( XMM5, XMM4 ) /* + | + | + */ |
ADDPS( XMM6, XMM4 ) /* + | + | + */ |
ADDPS( XMM3, XMM4 ) /* + | + | + */ |
MOVLPS( XMM4, D(0) ) /* => D(1) | => D(0) */ |
UNPCKHPS( XMM4, XMM4 ) |
MOVSS( XMM4, D(2) ) |
LLBL(K_GTP3P3DR_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(K_GTP3P3DR_top) ) |
LLBL(K_GTP3P3DR_finish): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
#endif |
#if defined (__ELF__) && defined (__linux__) |
.section .note.GNU-stack,"",%progbits |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/sse_xform4.S |
---|
0,0 → 1,235 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#ifdef USE_SSE_ASM |
#include "assyntax.h" |
#include "matypes.h" |
#include "xform_args.h" |
SEG_TEXT |
#define FRAME_OFFSET 8 |
#define SRC(i) REGOFF(i * 4, ESI) |
#define DST(i) REGOFF(i * 4, EDI) |
#define MAT(i) REGOFF(i * 4, EDX) |
#define SELECT(r0, r1, r2, r3) CONST( r0 * 64 + r1 * 16 + r2 * 4 + r3 ) |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_sse_transform_points4_general ) |
HIDDEN(_mesa_sse_transform_points4_general) |
GLNAME( _mesa_sse_transform_points4_general ): |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) /* verify non-zero count */ |
JE( LLBL( sse_general_done ) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )/* set dest size */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
PREFETCHT0( REGIND(ESI) ) |
MOVAPS( MAT(0), XMM4 ) /* m3 | m2 | m1 | m0 */ |
MOVAPS( MAT(4), XMM5 ) /* m7 | m6 | m5 | m4 */ |
MOVAPS( MAT(8), XMM6 ) /* m11 | m10 | m9 | m8 */ |
MOVAPS( MAT(12), XMM7 ) /* m15 | m14 | m13 | m12 */ |
ALIGNTEXT16 |
LLBL( sse_general_loop ): |
MOVSS( SRC(0), XMM0 ) /* ox */ |
SHUFPS( CONST(0x0), XMM0, XMM0 ) /* ox | ox | ox | ox */ |
MULPS( XMM4, XMM0 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ |
MOVSS( SRC(1), XMM1 ) /* oy */ |
SHUFPS( CONST(0x0), XMM1, XMM1 ) /* oy | oy | oy | oy */ |
MULPS( XMM5, XMM1 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */ |
MOVSS( SRC(2), XMM2 ) /* oz */ |
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* oz | oz | oz | oz */ |
MULPS( XMM6, XMM2 ) /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */ |
MOVSS( SRC(3), XMM3 ) /* ow */ |
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ow | ow | ow | ow */ |
MULPS( XMM7, XMM3 ) /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */ |
ADDPS( XMM1, XMM0 ) /* ox*m3+oy*m7 | ... */ |
ADDPS( XMM2, XMM0 ) /* ox*m3+oy*m7+oz*m11 | ... */ |
ADDPS( XMM3, XMM0 ) /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */ |
MOVAPS( XMM0, DST(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */ |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
DEC_L( ECX ) |
JNZ( LLBL( sse_general_loop ) ) |
LLBL( sse_general_done ): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
ALIGNTEXT4 |
GLOBL GLNAME( _mesa_sse_transform_points4_3d ) |
HIDDEN(_mesa_sse_transform_points4_3d) |
GLNAME( _mesa_sse_transform_points4_3d ): |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ |
MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ |
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
TEST_L( ECX, ECX) |
JZ( LLBL(K_GTP43P3DR_finish) ) /* count was zero; go to finish */ |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )/* set dest size */ |
SHL_L( CONST(4), ECX ) /* count *= 16 */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ADD_L( EDI, ECX ) /* count += dest ptr */ |
MOVAPS( MAT(0), XMM0 ) /* m3 | m2 | m1 | m0 */ |
MOVAPS( MAT(4), XMM1 ) /* m7 | m6 | m5 | m4 */ |
MOVAPS( MAT(8), XMM2 ) /* m11 | m10 | m9 | m8 */ |
MOVAPS( MAT(12), XMM3 ) /* m15 | m14 | m13 | m12 */ |
ALIGNTEXT32 |
LLBL( K_GTP43P3DR_top ): |
MOVSS( SRC(0), XMM4 ) /* ox */ |
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox | ox */ |
MULPS( XMM0, XMM4 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ |
MOVSS( SRC(1), XMM5 ) /* oy */ |
SHUFPS( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy | oy */ |
MULPS( XMM1, XMM5 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */ |
MOVSS( SRC(2), XMM6 ) /* oz */ |
SHUFPS( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz | oz */ |
MULPS( XMM2, XMM6 ) /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */ |
MOVSS( SRC(3), XMM7 ) /* ow */ |
SHUFPS( CONST(0x0), XMM7, XMM7 ) /* ow | ow | ow | ow */ |
MULPS( XMM3, XMM7 ) /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */ |
ADDPS( XMM5, XMM4 ) /* ox*m3+oy*m7 | ... */ |
ADDPS( XMM6, XMM4 ) /* ox*m3+oy*m7+oz*m11 | ... */ |
ADDPS( XMM7, XMM4 ) /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */ |
MOVAPS( XMM4, DST(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */ |
MOVSS( SRC(3), XMM4 ) /* ow */ |
MOVSS( XMM4, DST(3) ) /* ->D(3) */ |
LLBL( K_GTP43P3DR_skip ): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(K_GTP43P3DR_top) ) |
LLBL( K_GTP43P3DR_finish ): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_sse_transform_points4_identity ) |
HIDDEN(_mesa_sse_transform_points4_identity) |
GLNAME( _mesa_sse_transform_points4_identity ): |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) /* verify non-zero count */ |
JE( LLBL( sse_identity_done ) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )/* set dest size */ |
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ |
ALIGNTEXT16 |
LLBL( sse_identity_loop ): |
PREFETCHNTA( REGOFF(32, ESI) ) |
MOVAPS( REGIND(ESI), XMM0 ) |
ADD_L( EAX, ESI ) |
MOVAPS( XMM0, REGIND(EDI) ) |
ADD_L( CONST(16), EDI ) |
DEC_L( ECX ) |
JNZ( LLBL( sse_identity_loop ) ) |
LLBL( sse_identity_done ): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#endif |
#if defined (__ELF__) && defined (__linux__) |
.section .note.GNU-stack,"",%progbits |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/x86_cliptest.S |
---|
0,0 → 1,407 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/* |
* NOTE: Avoid using spaces in between '(' ')' and arguments, especially |
* with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces |
* in there will break the build on some platforms. |
*/ |
#include "assyntax.h" |
#include "matypes.h" |
#include "clip_args.h" |
#define SRC0 REGOFF(0, ESI) |
#define SRC1 REGOFF(4, ESI) |
#define SRC2 REGOFF(8, ESI) |
#define SRC3 REGOFF(12, ESI) |
#define DST0 REGOFF(0, EDI) |
#define DST1 REGOFF(4, EDI) |
#define DST2 REGOFF(8, EDI) |
#define DST3 REGOFF(12, EDI) |
#define MAT0 REGOFF(0, EDX) |
#define MAT1 REGOFF(4, EDX) |
#define MAT2 REGOFF(8, EDX) |
#define MAT3 REGOFF(12, EDX) |
/* |
* Table for clip test. |
* |
* bit6 = SRC3 < 0 |
* bit5 = SRC2 < 0 |
* bit4 = abs(S(2)) > abs(S(3)) |
* bit3 = SRC1 < 0 |
* bit2 = abs(S(1)) > abs(S(3)) |
* bit1 = SRC0 < 0 |
* bit0 = abs(S(0)) > abs(S(3)) |
*/ |
SEG_DATA |
clip_table: |
D_BYTE 0x00, 0x01, 0x00, 0x02, 0x04, 0x05, 0x04, 0x06 |
D_BYTE 0x00, 0x01, 0x00, 0x02, 0x08, 0x09, 0x08, 0x0a |
D_BYTE 0x20, 0x21, 0x20, 0x22, 0x24, 0x25, 0x24, 0x26 |
D_BYTE 0x20, 0x21, 0x20, 0x22, 0x28, 0x29, 0x28, 0x2a |
D_BYTE 0x00, 0x01, 0x00, 0x02, 0x04, 0x05, 0x04, 0x06 |
D_BYTE 0x00, 0x01, 0x00, 0x02, 0x08, 0x09, 0x08, 0x0a |
D_BYTE 0x10, 0x11, 0x10, 0x12, 0x14, 0x15, 0x14, 0x16 |
D_BYTE 0x10, 0x11, 0x10, 0x12, 0x18, 0x19, 0x18, 0x1a |
D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x37, 0x35, 0x37, 0x36 |
D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x3b, 0x39, 0x3b, 0x3a |
D_BYTE 0x2f, 0x2d, 0x2f, 0x2e, 0x27, 0x25, 0x27, 0x26 |
D_BYTE 0x2f, 0x2d, 0x2f, 0x2e, 0x2b, 0x29, 0x2b, 0x2a |
D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x37, 0x35, 0x37, 0x36 |
D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x3b, 0x39, 0x3b, 0x3a |
D_BYTE 0x1f, 0x1d, 0x1f, 0x1e, 0x17, 0x15, 0x17, 0x16 |
D_BYTE 0x1f, 0x1d, 0x1f, 0x1e, 0x1b, 0x19, 0x1b, 0x1a |
SEG_TEXT |
/* |
* _mesa_x86_cliptest_points4 |
* |
* AL: ormask |
* AH: andmask |
* EBX: temp0 |
* ECX: temp1 |
* EDX: clipmask[] |
* ESI: clip[] |
* EDI: proj[] |
* EBP: temp2 |
*/ |
#if defined(__ELF__) && defined(__PIC__) && defined(GNU_ASSEMBLER) && !defined(ELFPIC) |
#define ELFPIC |
#endif |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_x86_cliptest_points4 ) |
HIDDEN(_mesa_x86_cliptest_points4) |
GLNAME( _mesa_x86_cliptest_points4 ): |
#ifdef ELFPIC |
#define FRAME_OFFSET 20 |
#else |
#define FRAME_OFFSET 16 |
#endif |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
PUSH_L( EBP ) |
PUSH_L( EBX ) |
#ifdef ELFPIC |
/* store pointer to clip_table on stack */ |
CALL( LLBL(ctp4_get_eip) ) |
ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX ) |
MOV_L( REGOFF(clip_table@GOT, EBX), EBX ) |
PUSH_L( EBX ) |
JMP( LLBL(ctp4_clip_table_ready) ) |
LLBL(ctp4_get_eip): |
/* store eip in ebx */ |
MOV_L( REGIND(ESP), EBX ) |
RET |
LLBL(ctp4_clip_table_ready): |
#endif |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_CLIP, EDX ) |
MOV_L( ARG_OR, EBX ) |
MOV_L( ARG_AND, EBP ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
MOV_L( REGOFF(V4F_START, ESI), ESI ) |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) |
MOV_L( EAX, ARG_SOURCE ) /* put stride in ARG_SOURCE */ |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) |
MOV_L( REGOFF(V4F_START, EDI), EDI ) |
ADD_L( EDX, ECX ) |
MOV_L( ECX, ARG_CLIP ) /* put clipmask + count in ARG_CLIP */ |
CMP_L( ECX, EDX ) |
MOV_B( REGIND(EBX), AL ) |
MOV_B( REGIND(EBP), AH ) |
JZ( LLBL(ctp4_finish) ) |
ALIGNTEXT16 |
LLBL(ctp4_top): |
FLD1 /* F3 */ |
FDIV_S( SRC3 ) /* GH: don't care about div-by-zero */ |
MOV_L( SRC3, EBP ) |
MOV_L( SRC2, EBX ) |
XOR_L( ECX, ECX ) |
ADD_L( EBP, EBP ) /* ebp = abs(S(3))*2 ; carry = sign of S(3) */ |
ADC_L( ECX, ECX ) |
ADD_L( EBX, EBX ) /* ebx = abs(S(2))*2 ; carry = sign of S(2) */ |
ADC_L( ECX, ECX ) |
CMP_L( EBX, EBP ) /* carry = abs(S(2))*2 > abs(S(3))*2 */ |
ADC_L( ECX, ECX ) |
MOV_L( SRC1, EBX ) |
ADD_L( EBX, EBX ) /* ebx = abs(S(1))*2 ; carry = sign of S(1) */ |
ADC_L( ECX, ECX ) |
CMP_L( EBX, EBP ) /* carry = abs(S(1))*2 > abs(S(3))*2 */ |
ADC_L( ECX, ECX ) |
MOV_L( SRC0, EBX ) |
ADD_L( EBX, EBX ) /* ebx = abs(S(0))*2 ; carry = sign of S(0) */ |
ADC_L( ECX, ECX ) |
CMP_L( EBX, EBP ) /* carry = abs(S(0))*2 > abs(S(3))*2 */ |
ADC_L( ECX, ECX ) |
#ifdef ELFPIC |
MOV_L( REGIND(ESP), EBP ) /* clip_table */ |
MOV_B( REGBI(EBP, ECX), CL ) |
#else |
MOV_B( REGOFF(clip_table,ECX), CL ) |
#endif |
OR_B( CL, AL ) |
AND_B( CL, AH ) |
TEST_B( CL, CL ) |
MOV_B( CL, REGIND(EDX) ) |
JZ( LLBL(ctp4_proj) ) |
LLBL(ctp4_noproj): |
FSTP( ST(0) ) /* */ |
MOV_L( CONST(0), DST0 ) |
MOV_L( CONST(0), DST1 ) |
MOV_L( CONST(0), DST2 ) |
MOV_L( CONST(0x3f800000), DST3 ) |
JMP( LLBL(ctp4_next) ) |
LLBL(ctp4_proj): |
FLD_S( SRC0 ) /* F0 F3 */ |
FMUL2( ST(1), ST0 ) |
FLD_S( SRC1 ) /* F1 F0 F3 */ |
FMUL2( ST(2), ST0 ) |
FLD_S( SRC2 ) /* F2 F1 F0 F3 */ |
FMUL2( ST(3), ST0 ) |
FXCH( ST(2) ) /* F0 F1 F2 F3 */ |
FSTP_S( DST0 ) /* F1 F2 F3 */ |
FSTP_S( DST1 ) /* F2 F3 */ |
FSTP_S( DST2 ) /* F3 */ |
FSTP_S( DST3 ) /* */ |
LLBL(ctp4_next): |
INC_L( EDX ) |
ADD_L( CONST(16), EDI ) |
ADD_L( ARG_SOURCE, ESI ) |
CMP_L( EDX, ARG_CLIP ) |
JNZ( LLBL(ctp4_top) ) |
MOV_L( ARG_OR, ECX ) |
MOV_L( ARG_AND, EDX ) |
MOV_B( AL, REGIND(ECX) ) |
MOV_B( AH, REGIND(EDX) ) |
LLBL(ctp4_finish): |
MOV_L( ARG_DEST, EAX ) |
#ifdef ELFPIC |
POP_L( ESI ) /* discard ptr to clip_table */ |
#endif |
POP_L( EBX ) |
POP_L( EBP ) |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_x86_cliptest_points4_np ) |
HIDDEN(_mesa_x86_cliptest_points4_np) |
GLNAME( _mesa_x86_cliptest_points4_np ): |
#ifdef ELFPIC |
#define FRAME_OFFSET 20 |
#else |
#define FRAME_OFFSET 16 |
#endif |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
PUSH_L( EBP ) |
PUSH_L( EBX ) |
#ifdef ELFPIC |
/* store pointer to clip_table on stack */ |
CALL( LLBL(ctp4_np_get_eip) ) |
ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX ) |
MOV_L( REGOFF(clip_table@GOT, EBX), EBX ) |
PUSH_L( EBX ) |
JMP( LLBL(ctp4_np_clip_table_ready) ) |
LLBL(ctp4_np_get_eip): |
/* store eip in ebx */ |
MOV_L( REGIND(ESP), EBX ) |
RET |
LLBL(ctp4_np_clip_table_ready): |
#endif |
MOV_L( ARG_SOURCE, ESI ) |
/* slot */ |
MOV_L( ARG_CLIP, EDX ) |
MOV_L( ARG_OR, EBX ) |
MOV_L( ARG_AND, EBP ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
MOV_L( REGOFF(V4F_START, ESI), ESI ) |
MOV_L( EAX, ARG_DEST ) /* put stride in ARG_DEST */ |
ADD_L( EDX, ECX ) |
MOV_L( ECX, EDI ) /* put clipmask + count in EDI */ |
CMP_L( ECX, EDX ) |
MOV_B( REGIND(EBX), AL ) |
MOV_B( REGIND(EBP), AH ) |
JZ( LLBL(ctp4_np_finish) ) |
ALIGNTEXT16 |
LLBL(ctp4_np_top): |
MOV_L( SRC3, EBP ) |
MOV_L( SRC2, EBX ) |
XOR_L( ECX, ECX ) |
ADD_L( EBP, EBP ) /* ebp = abs(S(3))*2 ; carry = sign of S(3) */ |
ADC_L( ECX, ECX ) |
ADD_L( EBX, EBX ) /* ebx = abs(S(2))*2 ; carry = sign of S(2) */ |
ADC_L( ECX, ECX ) |
CMP_L( EBX, EBP ) /* carry = abs(S(2))*2 > abs(S(3))*2 */ |
ADC_L( ECX, ECX ) |
MOV_L( SRC1, EBX ) |
ADD_L( EBX, EBX ) /* ebx = abs(S(1))*2 ; carry = sign of S(1) */ |
ADC_L( ECX, ECX ) |
CMP_L( EBX, EBP ) /* carry = abs(S(1))*2 > abs(S(3))*2 */ |
ADC_L( ECX, ECX ) |
MOV_L( SRC0, EBX ) |
ADD_L( EBX, EBX ) /* ebx = abs(S(0))*2 ; carry = sign of S(0) */ |
ADC_L( ECX, ECX ) |
CMP_L( EBX, EBP ) /* carry = abs(S(0))*2 > abs(S(3))*2 */ |
ADC_L( ECX, ECX ) |
#ifdef ELFPIC |
MOV_L( REGIND(ESP), EBP ) /* clip_table */ |
MOV_B( REGBI(EBP, ECX), CL ) |
#else |
MOV_B( REGOFF(clip_table,ECX), CL ) |
#endif |
OR_B( CL, AL ) |
AND_B( CL, AH ) |
TEST_B( CL, CL ) |
MOV_B( CL, REGIND(EDX) ) |
INC_L( EDX ) |
/* slot */ |
ADD_L( ARG_DEST, ESI ) |
CMP_L( EDX, EDI ) |
JNZ( LLBL(ctp4_np_top) ) |
MOV_L( ARG_OR, ECX ) |
MOV_L( ARG_AND, EDX ) |
MOV_B( AL, REGIND(ECX) ) |
MOV_B( AH, REGIND(EDX) ) |
LLBL(ctp4_np_finish): |
MOV_L( ARG_SOURCE, EAX ) |
#ifdef ELFPIC |
POP_L( ESI ) /* discard ptr to clip_table */ |
#endif |
POP_L( EBX ) |
POP_L( EBP ) |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#if defined (__ELF__) && defined (__linux__) |
.section .note.GNU-stack,"",%progbits |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/x86_xform.c |
---|
0,0 → 1,122 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/* |
* Intel x86 assembly code by Josh Vanderhoof |
*/ |
#include "main/glheader.h" |
#include "main/context.h" |
#include "math/m_xform.h" |
#include "x86_xform.h" |
#include "common_x86_asm.h" |
#ifdef USE_X86_ASM |
#ifdef USE_3DNOW_ASM |
#include "3dnow.h" |
#endif |
#ifdef USE_SSE_ASM |
#include "sse.h" |
#endif |
#endif |
#ifdef DEBUG_MATH |
#include "math/m_debug.h" |
#endif |
#ifdef USE_X86_ASM |
DECLARE_XFORM_GROUP( x86, 2 ) |
DECLARE_XFORM_GROUP( x86, 3 ) |
DECLARE_XFORM_GROUP( x86, 4 ) |
extern GLvector4f * _ASMAPI |
_mesa_x86_cliptest_points4( GLvector4f *clip_vec, |
GLvector4f *proj_vec, |
GLubyte clipMask[], |
GLubyte *orMask, |
GLubyte *andMask, |
GLboolean viewport_z_clip ); |
extern GLvector4f * _ASMAPI |
_mesa_x86_cliptest_points4_np( GLvector4f *clip_vec, |
GLvector4f *proj_vec, |
GLubyte clipMask[], |
GLubyte *orMask, |
GLubyte *andMask, |
GLboolean viewport_z_clip ); |
extern void _ASMAPI |
_mesa_v16_x86_cliptest_points4( GLfloat *first_vert, |
GLfloat *last_vert, |
GLubyte *or_mask, |
GLubyte *and_mask, |
GLubyte *clip_mask, |
GLboolean viewport_z_clip ); |
extern void _ASMAPI |
_mesa_v16_x86_general_xform( GLfloat *dest, |
const GLfloat *m, |
const GLfloat *src, |
GLuint src_stride, |
GLuint count ); |
#endif |
#ifdef USE_X86_ASM |
static void _mesa_init_x86_transform_asm( void ) |
{ |
ASSIGN_XFORM_GROUP( x86, 2 ); |
ASSIGN_XFORM_GROUP( x86, 3 ); |
ASSIGN_XFORM_GROUP( x86, 4 ); |
_mesa_clip_tab[4] = _mesa_x86_cliptest_points4; |
_mesa_clip_np_tab[4] = _mesa_x86_cliptest_points4_np; |
#ifdef DEBUG_MATH |
_math_test_all_transform_functions( "x86" ); |
_math_test_all_cliptest_functions( "x86" ); |
#endif |
} |
#endif |
void _mesa_init_all_x86_transform_asm( void ) |
{ |
_mesa_get_x86_features(); |
#ifdef USE_X86_ASM |
if ( _mesa_x86_cpu_features ) { |
_mesa_init_x86_transform_asm(); |
} |
if ( cpu_has_xmm ) { |
_mesa_init_sse_transform_asm(); |
} |
#endif |
} |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/x86_xform.h |
---|
0,0 → 1,106 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Gareth Hughes |
*/ |
#ifndef X86_XFORM_H |
#define X86_XFORM_H |
/* ============================================================= |
* Transformation function declarations: |
*/ |
#define XFORM_ARGS GLvector4f *to_vec, \ |
const GLfloat m[16], \ |
const GLvector4f *from_vec |
#define DECLARE_XFORM_GROUP( pfx, sz ) \ |
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_general( XFORM_ARGS ); \ |
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_identity( XFORM_ARGS ); \ |
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d_no_rot( XFORM_ARGS ); \ |
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_perspective( XFORM_ARGS ); \ |
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d( XFORM_ARGS ); \ |
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d_no_rot( XFORM_ARGS ); \ |
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d( XFORM_ARGS ); |
#define ASSIGN_XFORM_GROUP( pfx, sz ) \ |
_mesa_transform_tab[sz][MATRIX_GENERAL] = \ |
_mesa_##pfx##_transform_points##sz##_general; \ |
_mesa_transform_tab[sz][MATRIX_IDENTITY] = \ |
_mesa_##pfx##_transform_points##sz##_identity; \ |
_mesa_transform_tab[sz][MATRIX_3D_NO_ROT] = \ |
_mesa_##pfx##_transform_points##sz##_3d_no_rot; \ |
_mesa_transform_tab[sz][MATRIX_PERSPECTIVE] = \ |
_mesa_##pfx##_transform_points##sz##_perspective; \ |
_mesa_transform_tab[sz][MATRIX_2D] = \ |
_mesa_##pfx##_transform_points##sz##_2d; \ |
_mesa_transform_tab[sz][MATRIX_2D_NO_ROT] = \ |
_mesa_##pfx##_transform_points##sz##_2d_no_rot; \ |
_mesa_transform_tab[sz][MATRIX_3D] = \ |
_mesa_##pfx##_transform_points##sz##_3d; |
/* ============================================================= |
* Normal transformation function declarations: |
*/ |
#define NORM_ARGS const GLmatrix *mat, \ |
GLfloat scale, \ |
const GLvector4f *in, \ |
const GLfloat *lengths, \ |
GLvector4f *dest |
#define DECLARE_NORM_GROUP( pfx ) \ |
extern void _ASMAPI _mesa_##pfx##_rescale_normals( NORM_ARGS ); \ |
extern void _ASMAPI _mesa_##pfx##_normalize_normals( NORM_ARGS ); \ |
extern void _ASMAPI _mesa_##pfx##_transform_normals( NORM_ARGS ); \ |
extern void _ASMAPI _mesa_##pfx##_transform_normals_no_rot( NORM_ARGS ); \ |
extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals( NORM_ARGS ); \ |
extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals_no_rot( NORM_ARGS ); \ |
extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals( NORM_ARGS ); \ |
extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals_no_rot( NORM_ARGS ); |
#define ASSIGN_NORM_GROUP( pfx ) \ |
_mesa_normal_tab[NORM_RESCALE] = \ |
_mesa_##pfx##_rescale_normals; \ |
_mesa_normal_tab[NORM_NORMALIZE] = \ |
_mesa_##pfx##_normalize_normals; \ |
_mesa_normal_tab[NORM_TRANSFORM] = \ |
_mesa_##pfx##_transform_normals; \ |
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT] = \ |
_mesa_##pfx##_transform_normals_no_rot; \ |
_mesa_normal_tab[NORM_TRANSFORM | NORM_RESCALE] = \ |
_mesa_##pfx##_transform_rescale_normals; \ |
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE] = \ |
_mesa_##pfx##_transform_rescale_normals_no_rot; \ |
_mesa_normal_tab[NORM_TRANSFORM | NORM_NORMALIZE] = \ |
_mesa_##pfx##_transform_normalize_normals; \ |
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_NORMALIZE] = \ |
_mesa_##pfx##_transform_normalize_normals_no_rot; |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/x86_xform2.S |
---|
0,0 → 1,574 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/* |
* NOTE: Avoid using spaces in between '(' ')' and arguments, especially |
* with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces |
* in there will break the build on some platforms. |
*/ |
#include "assyntax.h" |
#include "matypes.h" |
#include "xform_args.h" |
SEG_TEXT |
#define FP_ONE 1065353216 |
#define FP_ZERO 0 |
#define SRC0 REGOFF(0, ESI) |
#define SRC1 REGOFF(4, ESI) |
#define SRC2 REGOFF(8, ESI) |
#define SRC3 REGOFF(12, ESI) |
#define DST0 REGOFF(0, EDI) |
#define DST1 REGOFF(4, EDI) |
#define DST2 REGOFF(8, EDI) |
#define DST3 REGOFF(12, EDI) |
#define MAT0 REGOFF(0, EDX) |
#define MAT1 REGOFF(4, EDX) |
#define MAT2 REGOFF(8, EDX) |
#define MAT3 REGOFF(12, EDX) |
#define MAT4 REGOFF(16, EDX) |
#define MAT5 REGOFF(20, EDX) |
#define MAT6 REGOFF(24, EDX) |
#define MAT7 REGOFF(28, EDX) |
#define MAT8 REGOFF(32, EDX) |
#define MAT9 REGOFF(36, EDX) |
#define MAT10 REGOFF(40, EDX) |
#define MAT11 REGOFF(44, EDX) |
#define MAT12 REGOFF(48, EDX) |
#define MAT13 REGOFF(52, EDX) |
#define MAT14 REGOFF(56, EDX) |
#define MAT15 REGOFF(60, EDX) |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_x86_transform_points2_general ) |
HIDDEN(_mesa_x86_transform_points2_general) |
GLNAME( _mesa_x86_transform_points2_general ): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) |
JZ( LLBL(x86_p2_gr_done) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) |
SHL_L( CONST(4), ECX ) |
MOV_L( REGOFF(V4F_START, ESI), ESI ) |
MOV_L( REGOFF(V4F_START, EDI), EDI ) |
ADD_L( EDI, ECX ) |
ALIGNTEXT16 |
LLBL(x86_p2_gr_loop): |
FLD_S( SRC0 ) /* F4 */ |
FMUL_S( MAT0 ) |
FLD_S( SRC0 ) /* F5 F4 */ |
FMUL_S( MAT1 ) |
FLD_S( SRC0 ) /* F6 F5 F4 */ |
FMUL_S( MAT2 ) |
FLD_S( SRC0 ) /* F7 F6 F5 F4 */ |
FMUL_S( MAT3 ) |
FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */ |
FMUL_S( MAT4 ) |
FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */ |
FMUL_S( MAT5 ) |
FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */ |
FMUL_S( MAT6 ) |
FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ |
FMUL_S( MAT7 ) |
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ |
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */ |
FXCH( ST(3) ) /* F4 F6 F5 F7 */ |
FADD_S( MAT12 ) |
FXCH( ST(2) ) /* F5 F6 F4 F7 */ |
FADD_S( MAT13 ) |
FXCH( ST(1) ) /* F6 F5 F4 F7 */ |
FADD_S( MAT14 ) |
FXCH( ST(3) ) /* F7 F5 F4 F6 */ |
FADD_S( MAT15 ) |
FXCH( ST(2) ) /* F4 F5 F7 F6 */ |
FSTP_S( DST0 ) /* F5 F7 F6 */ |
FSTP_S( DST1 ) /* F7 F6 */ |
FXCH( ST(1) ) /* F6 F7 */ |
FSTP_S( DST2 ) /* F7 */ |
FSTP_S( DST3 ) /* */ |
LLBL(x86_p2_gr_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(x86_p2_gr_loop) ) |
LLBL(x86_p2_gr_done): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_x86_transform_points2_perspective ) |
HIDDEN(_mesa_x86_transform_points2_perspective) |
GLNAME( _mesa_x86_transform_points2_perspective ): |
#define FRAME_OFFSET 12 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
PUSH_L( EBX ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) |
JZ( LLBL(x86_p2_pr_done) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) |
SHL_L( CONST(4), ECX ) |
MOV_L( REGOFF(V4F_START, ESI), ESI ) |
MOV_L( REGOFF(V4F_START, EDI), EDI ) |
ADD_L( EDI, ECX ) |
MOV_L( MAT14, EBX ) |
ALIGNTEXT16 |
LLBL(x86_p2_pr_loop): |
FLD_S( SRC0 ) /* F4 */ |
FMUL_S( MAT0 ) |
FLD_S( SRC1 ) /* F1 F4 */ |
FMUL_S( MAT5 ) |
FXCH( ST(1) ) /* F4 F1 */ |
FSTP_S( DST0 ) /* F1 */ |
FSTP_S( DST1 ) /* */ |
MOV_L( EBX, DST2 ) |
MOV_L( CONST(FP_ZERO), DST3 ) |
LLBL(x86_p2_pr_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(x86_p2_pr_loop) ) |
LLBL(x86_p2_pr_done): |
POP_L( EBX ) |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_x86_transform_points2_3d ) |
HIDDEN(_mesa_x86_transform_points2_3d) |
GLNAME( _mesa_x86_transform_points2_3d ): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) |
JZ( LLBL(x86_p2_3dr_done) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) |
SHL_L( CONST(4), ECX ) |
MOV_L( REGOFF(V4F_START, ESI), ESI ) |
MOV_L( REGOFF(V4F_START, EDI), EDI ) |
ADD_L( EDI, ECX ) |
ALIGNTEXT16 |
LLBL(x86_p2_3dr_loop): |
FLD_S( SRC0 ) /* F4 */ |
FMUL_S( MAT0 ) |
FLD_S( SRC0 ) /* F5 F4 */ |
FMUL_S( MAT1 ) |
FLD_S( SRC0 ) /* F6 F5 F4 */ |
FMUL_S( MAT2 ) |
FLD_S( SRC1 ) /* F0 F6 F5 F4 */ |
FMUL_S( MAT4 ) |
FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */ |
FMUL_S( MAT5 ) |
FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */ |
FMUL_S( MAT6 ) |
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ |
FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */ |
FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */ |
FADDP( ST0, ST(1) ) /* F6 F5 F4 */ |
FXCH( ST(2) ) /* F4 F5 F6 */ |
FADD_S( MAT12 ) |
FXCH( ST(1) ) /* F5 F4 F6 */ |
FADD_S( MAT13 ) |
FXCH( ST(2) ) /* F6 F4 F5 */ |
FADD_S( MAT14 ) |
FXCH( ST(1) ) /* F4 F6 F5 */ |
FSTP_S( DST0 ) /* F6 F5 */ |
FXCH( ST(1) ) /* F5 F6 */ |
FSTP_S( DST1 ) /* F6 */ |
FSTP_S( DST2 ) /* */ |
LLBL(x86_p2_3dr_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(x86_p2_3dr_loop) ) |
LLBL(x86_p2_3dr_done): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_x86_transform_points2_3d_no_rot ) |
HIDDEN(_mesa_x86_transform_points2_3d_no_rot) |
GLNAME( _mesa_x86_transform_points2_3d_no_rot ): |
#define FRAME_OFFSET 12 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
PUSH_L( EBX ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) |
JZ( LLBL(x86_p2_3dnrr_done) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) |
SHL_L( CONST(4), ECX ) |
MOV_L( REGOFF(V4F_START, ESI), ESI ) |
MOV_L( REGOFF(V4F_START, EDI), EDI ) |
ADD_L( EDI, ECX ) |
MOV_L( MAT14, EBX ) |
ALIGNTEXT16 |
LLBL(x86_p2_3dnrr_loop): |
FLD_S( SRC0 ) /* F4 */ |
FMUL_S( MAT0 ) |
FLD_S( SRC1 ) /* F1 F4 */ |
FMUL_S( MAT5 ) |
FXCH( ST(1) ) /* F4 F1 */ |
FADD_S( MAT12 ) |
FLD_S( MAT13 ) /* F5 F4 F1 */ |
FXCH( ST(2) ) /* F1 F4 F5 */ |
FADDP( ST0, ST(2) ) /* F4 F5 */ |
FSTP_S( DST0 ) /* F5 */ |
FSTP_S( DST1 ) /* */ |
MOV_L( EBX, DST2 ) |
LLBL(x86_p2_3dnrr_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(x86_p2_3dnrr_loop) ) |
LLBL(x86_p2_3dnrr_done): |
POP_L( EBX ) |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_x86_transform_points2_2d ) |
HIDDEN(_mesa_x86_transform_points2_2d) |
GLNAME( _mesa_x86_transform_points2_2d ): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) |
JZ( LLBL(x86_p2_2dr_done) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) |
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) |
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) |
SHL_L( CONST(4), ECX ) |
MOV_L( REGOFF(V4F_START, ESI), ESI ) |
MOV_L( REGOFF(V4F_START, EDI), EDI ) |
ADD_L( EDI, ECX ) |
ALIGNTEXT16 |
LLBL(x86_p2_2dr_loop): |
FLD_S( SRC0 ) /* F4 */ |
FMUL_S( MAT0 ) |
FLD_S( SRC0 ) /* F5 F4 */ |
FMUL_S( MAT1 ) |
FLD_S( SRC1 ) /* F0 F5 F4 */ |
FMUL_S( MAT4 ) |
FLD_S( SRC1 ) /* F1 F0 F5 F4 */ |
FMUL_S( MAT5 ) |
FXCH( ST(1) ) /* F0 F1 F5 F4 */ |
FADDP( ST0, ST(3) ) /* F1 F5 F4 */ |
FADDP( ST0, ST(1) ) /* F5 F4 */ |
FXCH( ST(1) ) /* F4 F5 */ |
FADD_S( MAT12 ) |
FXCH( ST(1) ) /* F5 F4 */ |
FADD_S( MAT13 ) |
FXCH( ST(1) ) /* F4 F5 */ |
FSTP_S( DST0 ) /* F5 */ |
FSTP_S( DST1 ) /* */ |
LLBL(x86_p2_2dr_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(x86_p2_2dr_loop) ) |
LLBL(x86_p2_2dr_done): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT4 |
GLOBL GLNAME( _mesa_x86_transform_points2_2d_no_rot ) |
HIDDEN(_mesa_x86_transform_points2_2d_no_rot) |
GLNAME( _mesa_x86_transform_points2_2d_no_rot ): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) |
JZ( LLBL(x86_p2_2dnrr_done) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) |
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) |
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) |
SHL_L( CONST(4), ECX ) |
MOV_L( REGOFF(V4F_START, ESI), ESI ) |
MOV_L( REGOFF(V4F_START, EDI), EDI ) |
ADD_L( EDI, ECX ) |
ALIGNTEXT16 |
LLBL(x86_p2_2dnrr_loop): |
FLD_S( SRC0 ) /* F4 */ |
FMUL_S( MAT0 ) |
FLD_S( SRC1 ) /* F1 F4 */ |
FMUL_S( MAT5 ) |
FXCH( ST(1) ) /* F4 F1 */ |
FADD_S( MAT12 ) |
FLD_S( MAT13 ) /* F5 F4 F1 */ |
FXCH( ST(2) ) /* F1 F4 F5 */ |
FADDP( ST0, ST(2) ) /* F4 F5 */ |
FSTP_S( DST0 ) /* F5 */ |
FSTP_S( DST1 ) /* */ |
LLBL(x86_p2_2dnrr_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(x86_p2_2dnrr_loop) ) |
LLBL(x86_p2_2dnrr_done): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_x86_transform_points2_identity ) |
HIDDEN(_mesa_x86_transform_points2_identity) |
GLNAME( _mesa_x86_transform_points2_identity ): |
#define FRAME_OFFSET 12 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
PUSH_L( EBX ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) |
JZ( LLBL(x86_p2_ir_done) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) |
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) |
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) |
SHL_L( CONST(4), ECX ) |
MOV_L( REGOFF(V4F_START, ESI), ESI ) |
MOV_L( REGOFF(V4F_START, EDI), EDI ) |
ADD_L( EDI, ECX ) |
CMP_L( ESI, EDI ) |
JE( LLBL(x86_p2_ir_done) ) |
ALIGNTEXT16 |
LLBL(x86_p2_ir_loop): |
MOV_L( SRC0, EBX ) |
MOV_L( SRC1, EDX ) |
MOV_L( EBX, DST0 ) |
MOV_L( EDX, DST1 ) |
LLBL(x86_p2_ir_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(x86_p2_ir_loop) ) |
LLBL(x86_p2_ir_done): |
POP_L( EBX ) |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
#if defined (__ELF__) && defined (__linux__) |
.section .note.GNU-stack,"",%progbits |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/x86_xform3.S |
---|
0,0 → 1,644 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/* |
* NOTE: Avoid using spaces in between '(' ')' and arguments, especially |
* with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces |
* in there will break the build on some platforms. |
*/ |
#include "assyntax.h" |
#include "matypes.h" |
#include "xform_args.h" |
SEG_TEXT |
#define FP_ONE 1065353216 |
#define FP_ZERO 0 |
#define SRC0 REGOFF(0, ESI) |
#define SRC1 REGOFF(4, ESI) |
#define SRC2 REGOFF(8, ESI) |
#define SRC3 REGOFF(12, ESI) |
#define DST0 REGOFF(0, EDI) |
#define DST1 REGOFF(4, EDI) |
#define DST2 REGOFF(8, EDI) |
#define DST3 REGOFF(12, EDI) |
#define MAT0 REGOFF(0, EDX) |
#define MAT1 REGOFF(4, EDX) |
#define MAT2 REGOFF(8, EDX) |
#define MAT3 REGOFF(12, EDX) |
#define MAT4 REGOFF(16, EDX) |
#define MAT5 REGOFF(20, EDX) |
#define MAT6 REGOFF(24, EDX) |
#define MAT7 REGOFF(28, EDX) |
#define MAT8 REGOFF(32, EDX) |
#define MAT9 REGOFF(36, EDX) |
#define MAT10 REGOFF(40, EDX) |
#define MAT11 REGOFF(44, EDX) |
#define MAT12 REGOFF(48, EDX) |
#define MAT13 REGOFF(52, EDX) |
#define MAT14 REGOFF(56, EDX) |
#define MAT15 REGOFF(60, EDX) |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_x86_transform_points3_general ) |
HIDDEN(_mesa_x86_transform_points3_general) |
GLNAME( _mesa_x86_transform_points3_general ): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) |
JZ( LLBL(x86_p3_gr_done) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) |
SHL_L( CONST(4), ECX ) |
MOV_L( REGOFF(V4F_START, ESI), ESI ) |
MOV_L( REGOFF(V4F_START, EDI), EDI ) |
ADD_L( EDI, ECX ) |
ALIGNTEXT16 |
LLBL(x86_p3_gr_loop): |
FLD_S( SRC0 ) /* F4 */ |
FMUL_S( MAT0 ) |
FLD_S( SRC0 ) /* F5 F4 */ |
FMUL_S( MAT1 ) |
FLD_S( SRC0 ) /* F6 F5 F4 */ |
FMUL_S( MAT2 ) |
FLD_S( SRC0 ) /* F7 F6 F5 F4 */ |
FMUL_S( MAT3 ) |
FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */ |
FMUL_S( MAT4 ) |
FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */ |
FMUL_S( MAT5 ) |
FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */ |
FMUL_S( MAT6 ) |
FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ |
FMUL_S( MAT7 ) |
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ |
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */ |
FLD_S( SRC2 ) /* F0 F7 F6 F5 F4 */ |
FMUL_S( MAT8 ) |
FLD_S( SRC2 ) /* F1 F0 F7 F6 F5 F4 */ |
FMUL_S( MAT9 ) |
FLD_S( SRC2 ) /* F2 F1 F0 F7 F6 F5 F4 */ |
FMUL_S( MAT10 ) |
FLD_S( SRC2 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ |
FMUL_S( MAT11 ) |
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ |
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */ |
FXCH( ST(3) ) /* F4 F6 F5 F7 */ |
FADD_S( MAT12 ) |
FXCH( ST(2) ) /* F5 F6 F4 F7 */ |
FADD_S( MAT13 ) |
FXCH( ST(1) ) /* F6 F5 F4 F7 */ |
FADD_S( MAT14 ) |
FXCH( ST(3) ) /* F7 F5 F4 F6 */ |
FADD_S( MAT15 ) |
FXCH( ST(2) ) /* F4 F5 F7 F6 */ |
FSTP_S( DST0 ) /* F5 F7 F6 */ |
FSTP_S( DST1 ) /* F7 F6 */ |
FXCH( ST(1) ) /* F6 F7 */ |
FSTP_S( DST2 ) /* F7 */ |
FSTP_S( DST3 ) /* */ |
LLBL(x86_p3_gr_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(x86_p3_gr_loop) ) |
LLBL(x86_p3_gr_done): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_x86_transform_points3_perspective ) |
HIDDEN(_mesa_x86_transform_points3_perspective) |
GLNAME( _mesa_x86_transform_points3_perspective ): |
#define FRAME_OFFSET 12 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
PUSH_L( EBX ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) |
JZ( LLBL(x86_p3_pr_done) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) |
SHL_L( CONST(4), ECX ) |
MOV_L( REGOFF(V4F_START, ESI), ESI ) |
MOV_L( REGOFF(V4F_START, EDI), EDI ) |
ADD_L( EDI, ECX ) |
ALIGNTEXT16 |
LLBL(x86_p3_pr_loop): |
FLD_S( SRC0 ) /* F4 */ |
FMUL_S( MAT0 ) |
FLD_S( SRC1 ) /* F5 F4 */ |
FMUL_S( MAT5 ) |
FLD_S( SRC2 ) /* F0 F5 F4 */ |
FMUL_S( MAT8 ) |
FLD_S( SRC2 ) /* F1 F0 F5 F4 */ |
FMUL_S( MAT9 ) |
FLD_S( SRC2 ) /* F2 F1 F0 F5 F4 */ |
FMUL_S( MAT10 ) |
FXCH( ST(2) ) /* F0 F1 F2 F5 F4 */ |
FADDP( ST0, ST(4) ) /* F1 F2 F5 F4 */ |
FADDP( ST0, ST(2) ) /* F2 F5 F4 */ |
FLD_S( MAT14 ) /* F6 F2 F5 F4 */ |
FXCH( ST(1) ) /* F2 F6 F5 F4 */ |
FADDP( ST0, ST(1) ) /* F6 F5 F4 */ |
MOV_L( SRC2, EBX ) |
XOR_L( CONST(-2147483648), EBX )/* change sign */ |
FXCH( ST(2) ) /* F4 F5 F6 */ |
FSTP_S( DST0 ) /* F5 F6 */ |
FSTP_S( DST1 ) /* F6 */ |
FSTP_S( DST2 ) /* */ |
MOV_L( EBX, DST3 ) |
LLBL(x86_p3_pr_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(x86_p3_pr_loop) ) |
LLBL(x86_p3_pr_done): |
POP_L( EBX ) |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_x86_transform_points3_3d ) |
HIDDEN(_mesa_x86_transform_points3_3d) |
GLNAME( _mesa_x86_transform_points3_3d ): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) |
JZ( LLBL(x86_p3_3dr_done) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) |
SHL_L( CONST(4), ECX ) |
MOV_L( REGOFF(V4F_START, ESI), ESI ) |
MOV_L( REGOFF(V4F_START, EDI), EDI ) |
ADD_L( EDI, ECX ) |
ALIGNTEXT16 |
LLBL(x86_p3_3dr_loop): |
FLD_S( SRC0 ) /* F4 */ |
FMUL_S( MAT0 ) |
FLD_S( SRC0 ) /* F5 F4 */ |
FMUL_S( MAT1 ) |
FLD_S( SRC0 ) /* F6 F5 F4 */ |
FMUL_S( MAT2 ) |
FLD_S( SRC1 ) /* F0 F6 F5 F4 */ |
FMUL_S( MAT4 ) |
FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */ |
FMUL_S( MAT5 ) |
FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */ |
FMUL_S( MAT6 ) |
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ |
FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */ |
FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */ |
FADDP( ST0, ST(1) ) /* F6 F5 F4 */ |
FLD_S( SRC2 ) /* F0 F6 F5 F4 */ |
FMUL_S( MAT8 ) |
FLD_S( SRC2 ) /* F1 F0 F6 F5 F4 */ |
FMUL_S( MAT9 ) |
FLD_S( SRC2 ) /* F2 F1 F0 F6 F5 F4 */ |
FMUL_S( MAT10 ) |
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ |
FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */ |
FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */ |
FADDP( ST0, ST(1) ) /* F6 F5 F4 */ |
FXCH( ST(2) ) /* F4 F5 F6 */ |
FADD_S( MAT12 ) |
FXCH( ST(1) ) /* F5 F4 F6 */ |
FADD_S( MAT13 ) |
FXCH( ST(2) ) /* F6 F4 F5 */ |
FADD_S( MAT14 ) |
FXCH( ST(1) ) /* F4 F6 F5 */ |
FSTP_S( DST0 ) /* F6 F5 */ |
FXCH( ST(1) ) /* F5 F6 */ |
FSTP_S( DST1 ) /* F6 */ |
FSTP_S( DST2 ) /* */ |
LLBL(x86_p3_3dr_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(x86_p3_3dr_loop) ) |
LLBL(x86_p3_3dr_done): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_x86_transform_points3_3d_no_rot ) |
HIDDEN(_mesa_x86_transform_points3_3d_no_rot) |
GLNAME( _mesa_x86_transform_points3_3d_no_rot ): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) |
JZ( LLBL(x86_p3_3dnrr_done) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) |
SHL_L( CONST(4), ECX ) |
MOV_L( REGOFF(V4F_START, ESI), ESI ) |
MOV_L( REGOFF(V4F_START, EDI), EDI ) |
ADD_L( EDI, ECX ) |
ALIGNTEXT16 |
LLBL(x86_p3_3dnrr_loop): |
FLD_S( SRC0 ) /* F4 */ |
FMUL_S( MAT0 ) |
FLD_S( SRC1 ) /* F1 F4 */ |
FMUL_S( MAT5 ) |
FLD_S( SRC2 ) /* F2 F1 F4 */ |
FMUL_S( MAT10 ) |
FXCH( ST(2) ) /* F4 F1 F2 */ |
FADD_S( MAT12 ) |
FLD_S( MAT13 ) /* F5 F4 F1 F2 */ |
FXCH( ST(2) ) /* F1 F4 F5 F2 */ |
FADDP( ST0, ST(2) ) /* F4 F5 F2 */ |
FLD_S( MAT14 ) /* F6 F4 F5 F2 */ |
FXCH( ST(3) ) /* F2 F4 F5 F6 */ |
FADDP( ST0, ST(3) ) /* F4 F5 F6 */ |
FSTP_S( DST0 ) /* F5 F6 */ |
FSTP_S( DST1 ) /* F6 */ |
FSTP_S( DST2 ) /* */ |
LLBL(x86_p3_3dnrr_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(x86_p3_3dnrr_loop) ) |
LLBL(x86_p3_3dnrr_done): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_x86_transform_points3_2d ) |
HIDDEN(_mesa_x86_transform_points3_2d) |
GLNAME( _mesa_x86_transform_points3_2d ): |
#define FRAME_OFFSET 12 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
PUSH_L( EBX ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) |
JZ( LLBL(x86_p3_2dr_done) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) |
SHL_L( CONST(4), ECX ) |
MOV_L( REGOFF(V4F_START, ESI), ESI ) |
MOV_L( REGOFF(V4F_START, EDI), EDI ) |
ADD_L( EDI, ECX ) |
ALIGNTEXT16 |
LLBL(x86_p3_2dr_loop): |
FLD_S( SRC0 ) /* F4 */ |
FMUL_S( MAT0 ) |
FLD_S( SRC0 ) /* F5 F4 */ |
FMUL_S( MAT1 ) |
FLD_S( SRC1 ) /* F0 F5 F4 */ |
FMUL_S( MAT4 ) |
FLD_S( SRC1 ) /* F1 F0 F5 F4 */ |
FMUL_S( MAT5 ) |
FXCH( ST(1) ) /* F0 F1 F5 F4 */ |
FADDP( ST0, ST(3) ) /* F1 F5 F4 */ |
FADDP( ST0, ST(1) ) /* F5 F4 */ |
FXCH( ST(1) ) /* F4 F5 */ |
FADD_S( MAT12 ) |
FXCH( ST(1) ) /* F5 F4 */ |
FADD_S( MAT13 ) |
MOV_L( SRC2, EBX ) |
FXCH( ST(1) ) /* F4 F5 */ |
FSTP_S( DST0 ) /* F5 */ |
FSTP_S( DST1 ) /* */ |
MOV_L( EBX, DST2 ) |
LLBL(x86_p3_2dr_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(x86_p3_2dr_loop) ) |
LLBL(x86_p3_2dr_done): |
POP_L( EBX ) |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_x86_transform_points3_2d_no_rot ) |
HIDDEN(_mesa_x86_transform_points3_2d_no_rot) |
GLNAME( _mesa_x86_transform_points3_2d_no_rot ): |
#define FRAME_OFFSET 12 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
PUSH_L( EBX ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) |
JZ( LLBL(x86_p3_2dnrr_done) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) |
SHL_L( CONST(4), ECX ) |
MOV_L( REGOFF(V4F_START, ESI), ESI ) |
MOV_L( REGOFF(V4F_START, EDI), EDI ) |
ADD_L( EDI, ECX ) |
ALIGNTEXT16 |
LLBL(x86_p3_2dnrr_loop): |
FLD_S( SRC0 ) /* F4 */ |
FMUL_S( MAT0 ) |
FLD_S( SRC1 ) /* F1 F4 */ |
FMUL_S( MAT5 ) |
FXCH( ST(1) ) /* F4 F1 */ |
FADD_S( MAT12 ) |
FLD_S( MAT13 ) /* F5 F4 F1 */ |
FXCH( ST(2) ) /* F1 F4 F5 */ |
FADDP( ST0, ST(2) ) /* F4 F5 */ |
MOV_L( SRC2, EBX ) |
FSTP_S( DST0 ) /* F5 */ |
FSTP_S( DST1 ) /* */ |
MOV_L( EBX, DST2 ) |
LLBL(x86_p3_2dnrr_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(x86_p3_2dnrr_loop) ) |
LLBL(x86_p3_2dnrr_done): |
POP_L( EBX ) |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_x86_transform_points3_identity ) |
HIDDEN(_mesa_x86_transform_points3_identity) |
GLNAME(_mesa_x86_transform_points3_identity ): |
#define FRAME_OFFSET 16 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
PUSH_L( EBX ) |
PUSH_L( EBP ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) |
JZ( LLBL(x86_p3_ir_done) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) |
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) |
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) |
SHL_L( CONST(4), ECX ) |
MOV_L( REGOFF(V4F_START, ESI), ESI ) |
MOV_L( REGOFF(V4F_START, EDI), EDI ) |
ADD_L( EDI, ECX ) |
CMP_L( ESI, EDI ) |
JE( LLBL(x86_p3_ir_done) ) |
ALIGNTEXT16 |
LLBL(x86_p3_ir_loop): |
#if 1 |
MOV_L( SRC0, EBX ) |
MOV_L( SRC1, EBP ) |
MOV_L( SRC2, EDX ) |
MOV_L( EBX, DST0 ) |
MOV_L( EBP, DST1 ) |
MOV_L( EDX, DST2 ) |
#else |
FLD_S( SRC0 ) |
FLD_S( SRC1 ) |
FLD_S( SRC2 ) |
FSTP_S( DST2 ) |
FSTP_S( DST1 ) |
FSTP_S( DST0 ) |
#endif |
LLBL(x86_p3_ir_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(x86_p3_ir_loop) ) |
LLBL(x86_p3_ir_done): |
POP_L( EBP ) |
POP_L( EBX ) |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#if defined (__ELF__) && defined (__linux__) |
.section .note.GNU-stack,"",%progbits |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/x86_xform4.S |
---|
0,0 → 1,677 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/* |
* NOTE: Avoid using spaces in between '(' ')' and arguments, especially |
* with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces |
* in there will break the build on some platforms. |
*/ |
#include "assyntax.h" |
#include "matypes.h" |
#include "xform_args.h" |
SEG_TEXT |
#define FP_ONE 1065353216 |
#define FP_ZERO 0 |
#define SRC0 REGOFF(0, ESI) |
#define SRC1 REGOFF(4, ESI) |
#define SRC2 REGOFF(8, ESI) |
#define SRC3 REGOFF(12, ESI) |
#define DST0 REGOFF(0, EDI) |
#define DST1 REGOFF(4, EDI) |
#define DST2 REGOFF(8, EDI) |
#define DST3 REGOFF(12, EDI) |
#define MAT0 REGOFF(0, EDX) |
#define MAT1 REGOFF(4, EDX) |
#define MAT2 REGOFF(8, EDX) |
#define MAT3 REGOFF(12, EDX) |
#define MAT4 REGOFF(16, EDX) |
#define MAT5 REGOFF(20, EDX) |
#define MAT6 REGOFF(24, EDX) |
#define MAT7 REGOFF(28, EDX) |
#define MAT8 REGOFF(32, EDX) |
#define MAT9 REGOFF(36, EDX) |
#define MAT10 REGOFF(40, EDX) |
#define MAT11 REGOFF(44, EDX) |
#define MAT12 REGOFF(48, EDX) |
#define MAT13 REGOFF(52, EDX) |
#define MAT14 REGOFF(56, EDX) |
#define MAT15 REGOFF(60, EDX) |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_x86_transform_points4_general ) |
HIDDEN(_mesa_x86_transform_points4_general) |
GLNAME( _mesa_x86_transform_points4_general ): |
#define FRAME_OFFSET 8 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) |
JZ( LLBL(x86_p4_gr_done) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) |
SHL_L( CONST(4), ECX ) |
MOV_L( REGOFF(V4F_START, ESI), ESI ) |
MOV_L( REGOFF(V4F_START, EDI), EDI ) |
ADD_L( EDI, ECX ) |
ALIGNTEXT16 |
LLBL(x86_p4_gr_loop): |
FLD_S( SRC0 ) /* F4 */ |
FMUL_S( MAT0 ) |
FLD_S( SRC0 ) /* F5 F4 */ |
FMUL_S( MAT1 ) |
FLD_S( SRC0 ) /* F6 F5 F4 */ |
FMUL_S( MAT2 ) |
FLD_S( SRC0 ) /* F7 F6 F5 F4 */ |
FMUL_S( MAT3 ) |
FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */ |
FMUL_S( MAT4 ) |
FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */ |
FMUL_S( MAT5 ) |
FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */ |
FMUL_S( MAT6 ) |
FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ |
FMUL_S( MAT7 ) |
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ |
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */ |
FLD_S( SRC2 ) /* F0 F7 F6 F5 F4 */ |
FMUL_S( MAT8 ) |
FLD_S( SRC2 ) /* F1 F0 F7 F6 F5 F4 */ |
FMUL_S( MAT9 ) |
FLD_S( SRC2 ) /* F2 F1 F0 F7 F6 F5 F4 */ |
FMUL_S( MAT10 ) |
FLD_S( SRC2 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ |
FMUL_S( MAT11 ) |
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ |
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */ |
FLD_S( SRC3 ) /* F0 F7 F6 F5 F4 */ |
FMUL_S( MAT12 ) |
FLD_S( SRC3 ) /* F1 F0 F7 F6 F5 F4 */ |
FMUL_S( MAT13 ) |
FLD_S( SRC3 ) /* F2 F1 F0 F7 F6 F5 F4 */ |
FMUL_S( MAT14 ) |
FLD_S( SRC3 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ |
FMUL_S( MAT15 ) |
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ |
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */ |
FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */ |
FXCH( ST(3) ) /* F4 F6 F5 F7 */ |
FSTP_S( DST0 ) /* F6 F5 F7 */ |
FXCH( ST(1) ) /* F5 F6 F7 */ |
FSTP_S( DST1 ) /* F6 F7 */ |
FSTP_S( DST2 ) /* F7 */ |
FSTP_S( DST3 ) /* */ |
LLBL(x86_p4_gr_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(x86_p4_gr_loop) ) |
LLBL(x86_p4_gr_done): |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_x86_transform_points4_perspective ) |
HIDDEN(_mesa_x86_transform_points4_perspective) |
GLNAME( _mesa_x86_transform_points4_perspective ): |
#define FRAME_OFFSET 12 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
PUSH_L( EBX ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) |
JZ( LLBL(x86_p4_pr_done) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) |
SHL_L( CONST(4), ECX ) |
MOV_L( REGOFF(V4F_START, ESI), ESI ) |
MOV_L( REGOFF(V4F_START, EDI), EDI ) |
ADD_L( EDI, ECX ) |
ALIGNTEXT16 |
LLBL(x86_p4_pr_loop): |
FLD_S( SRC0 ) /* F4 */ |
FMUL_S( MAT0 ) |
FLD_S( SRC1 ) /* F5 F4 */ |
FMUL_S( MAT5 ) |
FLD_S( SRC2 ) /* F0 F5 F4 */ |
FMUL_S( MAT8 ) |
FLD_S( SRC2 ) /* F1 F0 F5 F4 */ |
FMUL_S( MAT9 ) |
FLD_S( SRC2 ) /* F6 F1 F0 F5 F4 */ |
FMUL_S( MAT10 ) |
FXCH( ST(2) ) /* F0 F1 F6 F5 F4 */ |
FADDP( ST0, ST(4) ) /* F1 F6 F5 F4 */ |
FADDP( ST0, ST(2) ) /* F6 F5 F4 */ |
FLD_S( SRC3 ) /* F2 F6 F5 F4 */ |
FMUL_S( MAT14 ) |
FADDP( ST0, ST(1) ) /* F6 F5 F4 */ |
MOV_L( SRC2, EBX ) |
XOR_L( CONST(-2147483648), EBX )/* change sign */ |
FXCH( ST(2) ) /* F4 F5 F6 */ |
FSTP_S( DST0 ) /* F5 F6 */ |
FSTP_S( DST1 ) /* F6 */ |
FSTP_S( DST2 ) /* */ |
MOV_L( EBX, DST3 ) |
LLBL(x86_p4_pr_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(x86_p4_pr_loop) ) |
LLBL(x86_p4_pr_done): |
POP_L( EBX ) |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_x86_transform_points4_3d ) |
HIDDEN(_mesa_x86_transform_points4_3d) |
GLNAME( _mesa_x86_transform_points4_3d ): |
#define FRAME_OFFSET 12 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
PUSH_L( EBX ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) |
JZ( LLBL(x86_p4_3dr_done) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) |
SHL_L( CONST(4), ECX ) |
MOV_L( REGOFF(V4F_START, ESI), ESI ) |
MOV_L( REGOFF(V4F_START, EDI), EDI ) |
ADD_L( EDI, ECX ) |
ALIGNTEXT16 |
LLBL(x86_p4_3dr_loop): |
FLD_S( SRC0 ) /* F4 */ |
FMUL_S( MAT0 ) |
FLD_S( SRC0 ) /* F5 F4 */ |
FMUL_S( MAT1 ) |
FLD_S( SRC0 ) /* F6 F5 F4 */ |
FMUL_S( MAT2 ) |
FLD_S( SRC1 ) /* F0 F6 F5 F4 */ |
FMUL_S( MAT4 ) |
FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */ |
FMUL_S( MAT5 ) |
FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */ |
FMUL_S( MAT6 ) |
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ |
FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */ |
FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */ |
FADDP( ST0, ST(1) ) /* F6 F5 F4 */ |
FLD_S( SRC2 ) /* F0 F6 F5 F4 */ |
FMUL_S( MAT8 ) |
FLD_S( SRC2 ) /* F1 F0 F6 F5 F4 */ |
FMUL_S( MAT9 ) |
FLD_S( SRC2 ) /* F2 F1 F0 F6 F5 F4 */ |
FMUL_S( MAT10 ) |
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ |
FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */ |
FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */ |
FADDP( ST0, ST(1) ) /* F6 F5 F4 */ |
FLD_S( SRC3 ) /* F0 F6 F5 F4 */ |
FMUL_S( MAT12 ) |
FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */ |
FMUL_S( MAT13 ) |
FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */ |
FMUL_S( MAT14 ) |
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ |
FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */ |
FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */ |
FADDP( ST0, ST(1) ) /* F6 F5 F4 */ |
MOV_L( SRC3, EBX ) |
FXCH( ST(2) ) /* F4 F5 F6 */ |
FSTP_S( DST0 ) /* F5 F6 */ |
FSTP_S( DST1 ) /* F6 */ |
FSTP_S( DST2 ) /* */ |
MOV_L( EBX, DST3 ) |
LLBL(x86_p4_3dr_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(x86_p4_3dr_loop) ) |
LLBL(x86_p4_3dr_done): |
POP_L( EBX ) |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT16 |
GLOBL GLNAME(_mesa_x86_transform_points4_3d_no_rot) |
HIDDEN(_mesa_x86_transform_points4_3d_no_rot) |
GLNAME(_mesa_x86_transform_points4_3d_no_rot): |
#define FRAME_OFFSET 12 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
PUSH_L( EBX ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) |
JZ( LLBL(x86_p4_3dnrr_done) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) |
SHL_L( CONST(4), ECX ) |
MOV_L( REGOFF(V4F_START, ESI), ESI ) |
MOV_L( REGOFF(V4F_START, EDI), EDI ) |
ADD_L( EDI, ECX ) |
ALIGNTEXT16 |
LLBL(x86_p4_3dnrr_loop): |
FLD_S( SRC0 ) /* F4 */ |
FMUL_S( MAT0 ) |
FLD_S( SRC1 ) /* F5 F4 */ |
FMUL_S( MAT5 ) |
FLD_S( SRC2 ) /* F6 F5 F4 */ |
FMUL_S( MAT10 ) |
FLD_S( SRC3 ) /* F0 F6 F5 F4 */ |
FMUL_S( MAT12 ) |
FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */ |
FMUL_S( MAT13 ) |
FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */ |
FMUL_S( MAT14 ) |
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ |
FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */ |
FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */ |
FADDP( ST0, ST(1) ) /* F6 F5 F4 */ |
MOV_L( SRC3, EBX ) |
FXCH( ST(2) ) /* F4 F5 F6 */ |
FSTP_S( DST0 ) /* F5 F6 */ |
FSTP_S( DST1 ) /* F6 */ |
FSTP_S( DST2 ) /* */ |
MOV_L( EBX, DST3 ) |
LLBL(x86_p4_3dnrr_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(x86_p4_3dnrr_loop) ) |
LLBL(x86_p4_3dnrr_done): |
POP_L( EBX ) |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_x86_transform_points4_2d ) |
HIDDEN(_mesa_x86_transform_points4_2d) |
GLNAME( _mesa_x86_transform_points4_2d ): |
#define FRAME_OFFSET 16 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
PUSH_L( EBX ) |
PUSH_L( EBP ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) |
JZ( LLBL(x86_p4_2dr_done) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) |
SHL_L( CONST(4), ECX ) |
MOV_L( REGOFF(V4F_START, ESI), ESI ) |
MOV_L( REGOFF(V4F_START, EDI), EDI ) |
ADD_L( EDI, ECX ) |
ALIGNTEXT16 |
LLBL(x86_p4_2dr_loop): |
FLD_S( SRC0 ) /* F4 */ |
FMUL_S( MAT0 ) |
FLD_S( SRC0 ) /* F5 F4 */ |
FMUL_S( MAT1 ) |
FLD_S( SRC1 ) /* F0 F5 F4 */ |
FMUL_S( MAT4 ) |
FLD_S( SRC1 ) /* F1 F0 F5 F4 */ |
FMUL_S( MAT5 ) |
FXCH( ST(1) ) /* F0 F1 F5 F4 */ |
FADDP( ST0, ST(3) ) /* F1 F5 F4 */ |
FADDP( ST0, ST(1) ) /* F5 F4 */ |
FLD_S( SRC3 ) /* F0 F5 F4 */ |
FMUL_S( MAT12 ) |
FLD_S( SRC3 ) /* F1 F0 F5 F4 */ |
FMUL_S( MAT13 ) |
FXCH( ST(1) ) /* F0 F1 F5 F4 */ |
FADDP( ST0, ST(3) ) /* F1 F5 F4 */ |
FADDP( ST0, ST(1) ) /* F5 F4 */ |
MOV_L( SRC2, EBX ) |
MOV_L( SRC3, EBP ) |
FXCH( ST(1) ) /* F4 F5 */ |
FSTP_S( DST0 ) /* F5 */ |
FSTP_S( DST1 ) /* */ |
MOV_L( EBX, DST2 ) |
MOV_L( EBP, DST3 ) |
LLBL(x86_p4_2dr_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(x86_p4_2dr_loop) ) |
LLBL(x86_p4_2dr_done): |
POP_L( EBP ) |
POP_L( EBX ) |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_x86_transform_points4_2d_no_rot ) |
HIDDEN(_mesa_x86_transform_points4_2d_no_rot) |
GLNAME( _mesa_x86_transform_points4_2d_no_rot ): |
#define FRAME_OFFSET 16 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
PUSH_L( EBX ) |
PUSH_L( EBP ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) |
JZ( LLBL(x86_p4_2dnrr_done) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) |
SHL_L( CONST(4), ECX ) |
MOV_L( REGOFF(V4F_START, ESI), ESI ) |
MOV_L( REGOFF(V4F_START, EDI), EDI ) |
ADD_L( EDI, ECX ) |
ALIGNTEXT16 |
LLBL(x86_p4_2dnrr_loop): |
FLD_S( SRC0 ) /* F4 */ |
FMUL_S( MAT0 ) |
FLD_S( SRC1 ) /* F5 F4 */ |
FMUL_S( MAT5 ) |
FLD_S( SRC3 ) /* F0 F5 F4 */ |
FMUL_S( MAT12 ) |
FLD_S( SRC3 ) /* F1 F0 F5 F4 */ |
FMUL_S( MAT13 ) |
FXCH( ST(1) ) /* F0 F1 F5 F4 */ |
FADDP( ST0, ST(3) ) /* F1 F5 F4 */ |
FADDP( ST0, ST(1) ) /* F5 F4 */ |
MOV_L( SRC2, EBX ) |
MOV_L( SRC3, EBP ) |
FXCH( ST(1) ) /* F4 F5 */ |
FSTP_S( DST0 ) /* F5 */ |
FSTP_S( DST1 ) /* */ |
MOV_L( EBX, DST2 ) |
MOV_L( EBP, DST3 ) |
LLBL(x86_p4_2dnrr_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(x86_p4_2dnrr_loop) ) |
LLBL(x86_p4_2dnrr_done): |
POP_L( EBP ) |
POP_L( EBX ) |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#undef FRAME_OFFSET |
ALIGNTEXT16 |
GLOBL GLNAME( _mesa_x86_transform_points4_identity ) |
HIDDEN(_mesa_x86_transform_points4_identity) |
GLNAME( _mesa_x86_transform_points4_identity ): |
#define FRAME_OFFSET 12 |
PUSH_L( ESI ) |
PUSH_L( EDI ) |
PUSH_L( EBX ) |
MOV_L( ARG_SOURCE, ESI ) |
MOV_L( ARG_DEST, EDI ) |
MOV_L( ARG_MATRIX, EDX ) |
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) |
TEST_L( ECX, ECX ) |
JZ( LLBL(x86_p4_ir_done) ) |
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) |
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) |
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) |
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) |
SHL_L( CONST(4), ECX ) |
MOV_L( REGOFF(V4F_START, ESI), ESI ) |
MOV_L( REGOFF(V4F_START, EDI), EDI ) |
ADD_L( EDI, ECX ) |
CMP_L( ESI, EDI ) |
JE( LLBL(x86_p4_ir_done) ) |
ALIGNTEXT16 |
LLBL(x86_p4_ir_loop): |
MOV_L( SRC0, EBX ) |
MOV_L( SRC1, EDX ) |
MOV_L( EBX, DST0 ) |
MOV_L( EDX, DST1 ) |
MOV_L( SRC2, EBX ) |
MOV_L( SRC3, EDX ) |
MOV_L( EBX, DST2 ) |
MOV_L( EDX, DST3 ) |
LLBL(x86_p4_ir_skip): |
ADD_L( CONST(16), EDI ) |
ADD_L( EAX, ESI ) |
CMP_L( ECX, EDI ) |
JNE( LLBL(x86_p4_ir_loop) ) |
LLBL(x86_p4_ir_done): |
POP_L( EBX ) |
POP_L( EDI ) |
POP_L( ESI ) |
RET |
#if defined (__ELF__) && defined (__linux__) |
.section .note.GNU-stack,"",%progbits |
#endif |
/contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/xform_args.h |
---|
0,0 → 1,51 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/* |
* Transform function interface for assembly code. Simply define |
* FRAME_OFFSET to the number of bytes pushed onto the stack before |
* using the ARG_* argument macros. |
* |
* Gareth Hughes |
*/ |
#ifndef __XFORM_ARGS_H__ |
#define __XFORM_ARGS_H__ |
/* Offsets for transform_func arguments |
* |
* typedef void (*transform_func)( GLvector4f *to_vec, |
* const GLfloat m[16], |
* const GLvector4f *from_vec ); |
*/ |
#define OFFSET_DEST 4 |
#define OFFSET_MATRIX 8 |
#define OFFSET_SOURCE 12 |
#define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP) |
#define ARG_MATRIX REGOFF(FRAME_OFFSET+OFFSET_MATRIX, ESP) |
#define ARG_SOURCE REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP) |
#endif |