WebSVN – Kolibri OS – Path Comparison – / – /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/ Rev 5562 and /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/ Rev 5563

Regard whitespace Rev 5562 → Rev 5563

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/3dnow.c
 ,0 → 1,91
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2003  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/*
+ * 3DNow! optimizations contributed by
+ * Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
+ */
+#include "main/glheader.h"
+#include "main/context.h"
+#include "math/m_xform.h"
+#include "tnl/t_context.h"
+#include "3dnow.h"
+#include "x86_xform.h"
+#ifdef DEBUG_MATH
+#include "math/m_debug.h"
+#endif
+#ifdef USE_3DNOW_ASM
+DECLARE_XFORM_GROUP( 3dnow, 2 )
+DECLARE_XFORM_GROUP( 3dnow, 3 )
+DECLARE_XFORM_GROUP( 3dnow, 4 )
+DECLARE_NORM_GROUP( 3dnow )
+extern void _ASMAPI
+_mesa_v16_3dnow_general_xform( GLfloat *first_vert,
+                               const GLfloat *m,
+                               const GLfloat *src,
+                               GLuint src_stride,
+                               GLuint count );
+extern void _ASMAPI
+_mesa_3dnow_project_vertices( GLfloat *first,
+                              GLfloat *last,
+                              const GLfloat *m,
+                              GLuint stride );
+extern void _ASMAPI
+_mesa_3dnow_project_clipped_vertices( GLfloat *first,
+                                      GLfloat *last,
+                                      const GLfloat *m,
+                                      GLuint stride,
+                                      const GLubyte *clipmask );
+#endif
+void _mesa_init_3dnow_transform_asm( void )
+{
+#ifdef USE_3DNOW_ASM
+   ASSIGN_XFORM_GROUP( 3dnow, 2 );
+   ASSIGN_XFORM_GROUP( 3dnow, 3 );
+   ASSIGN_XFORM_GROUP( 3dnow, 4 );
+   /* There's a bug somewhere in the 3dnow_normal.S file that causes
+    * bad shading.  Disable for now.
+   ASSIGN_NORM_GROUP( 3dnow );
+   */
+#ifdef DEBUG_MATH
+   _math_test_all_transform_functions( "3DNow!" );
+   _math_test_all_normal_transform_functions( "3DNow!" );
+#endif
+#endif
+}

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/3dnow.h
 ,0 → 1,36
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/*
+ * 3DNow! optimizations contributed by
+ * Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
+ */
+#ifndef __3DNOW_H__
+#define __3DNOW_H__
+void _mesa_init_3dnow_transform_asm( void );
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/3dnow_normal.S
 ,0 → 1,852
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2003  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/*
+ * 3Dnow assembly code by Holger Waechtler
+ */
+#ifdef USE_3DNOW_ASM
+#include "assyntax.h"
+#include "matypes.h"
+#include "norm_args.h"
+        SEG_TEXT
+#define M(i)    REGOFF(i * 4, ECX)
+#define STRIDE  REGOFF(12, ESI)
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals)
+HIDDEN(_mesa_3dnow_transform_normalize_normals)
+GLNAME(_mesa_3dnow_transform_normalize_normals):
+#define FRAME_OFFSET 12
+    PUSH_L     ( EDI )
+    PUSH_L     ( ESI )
+    PUSH_L     ( EBP )
+    MOV_L      ( ARG_LENGTHS, EDI )
+    MOV_L      ( ARG_IN, ESI )
+    MOV_L      ( ARG_DEST, EAX )
+    MOV_L      ( REGOFF(V4F_COUNT, ESI), EBP ) /*  dest->count = in->count   */
+    MOV_L      ( EBP, REGOFF(V4F_COUNT, EAX) )
+    MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
+    MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
+    MOV_L      ( ARG_MAT, ECX )
+    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
+    CMP_L      ( CONST(0), EBP )        /*   count > 0 ??  */
+    JE         ( LLBL (G3TN_end) )
+    MOV_L      ( REGOFF (V4F_COUNT, ESI), EBP )
+    FEMMS
+    PUSH_L     ( EBP )
+    PUSH_L     ( EAX )
+    PUSH_L     ( EDX )                  /*  save counter & pointer for   */
+                                        /*  the normalize pass           */
+#undef  FRAME_OFFSET
+#define FRAME_OFFSET 24
+    MOVQ       ( M(0), MM3 )            /*  m1              | m0         */
+    MOVQ       ( M(4), MM4 )            /*  m5              | m4         */
+    MOVD       ( M(2), MM5 )            /*                  | m2         */
+    PUNPCKLDQ  ( M(6), MM5 )            /*  m6              | m2         */
+    MOVQ       ( M(8), MM6 )            /*  m9              | m8         */
+    MOVQ       ( M(10), MM7 )           /*                  | m10        */
+    CMP_L      ( CONST(0), EDI )        /*  lengths == 0 ?                 */
+    JNE        ( LLBL (G3TN_scale_end ) )
+    MOVD       ( ARG_SCALE, MM0 )       /*               | scale           */
+    PUNPCKLDQ  ( MM0, MM0 )             /* scale         | scale           */
+    PFMUL      ( MM0, MM3 )             /* scale * m1    | scale * m0      */
+    PFMUL      ( MM0, MM4 )             /* scale * m5    | scale * m4      */
+    PFMUL      ( MM0, MM5 )             /* scale * m6    | scale * m2      */
+    PFMUL      ( MM0, MM6 )             /* scale * m9    | scale * m8      */
+    PFMUL      ( MM0, MM7 )             /*               | scale * m10     */
+ALIGNTEXT32
+LLBL (G3TN_scale_end):
+LLBL (G3TN_transform):
+    MOVQ       ( REGIND (EDX), MM0 )    /*  x1              | x0         */
+    MOVD       ( REGOFF (8, EDX), MM2 ) /*                  | x2         */
+    MOVQ       ( MM0, MM1 )             /*  x1              | x0           */
+    PUNPCKLDQ  ( MM2, MM2 )             /*  x2              | x2           */
+    PFMUL      ( MM3, MM0 )             /*  x1*m1           | x0*m0        */
+    ADD_L      ( CONST(16), EAX )       /*  next r                         */
+    PREFETCHW  ( REGIND(EAX) )
+    PFMUL      ( MM4, MM1 )             /*  x1*m5           | x0*m4        */
+    PFACC      ( MM1, MM0 )             /*  x0*m4+x1*m5     | x0*m0+x1*m1  */
+    PFMUL      ( MM5, MM2 )             /*  x2*m6           | x2*m2        */
+    PFADD      ( MM2, MM0 )             /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/
+    MOVQ       ( REGIND (EDX), MM1 )    /*  x1           | x0              */
+    MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write r0, r1                   */
+    PFMUL      ( MM6, MM1 )             /* x1*m9         | x0*m8           */
+    MOVD       ( REGOFF (8, EDX), MM2 ) /*               | x2              */
+    PFMUL      ( MM7, MM2 )             /*               | x2*m10          */
+    PFACC      ( MM1, MM1 )             /*  *not used*   | x0*m8+x1*m9     */
+    PFADD      ( MM2, MM1 )             /*  *not used*   | x0*m8+x1*m9+x2*m*/
+    ADD_L      ( STRIDE, EDX )          /*  next normal                    */
+    PREFETCH   ( REGIND(EDX) )
+    MOVD       ( MM1, REGOFF(-8, EAX) ) /*  write r2                       */
+    SUB_L      ( CONST(1), EBP )                  /*  decrement normal counter       */
+    JNZ        ( LLBL (G3TN_transform) )
+    POP_L      ( EDX )                  /*  end of transform ---           */
+    POP_L      ( EAX )                  /*    now normalizing ...          */
+    POP_L      ( EBP )
+    CMP_L      ( CONST(0), EDI )        /*  lengths == 0 ?                 */
+    JE         ( LLBL (G3TN_norm ) )    /*  calculate lengths              */
+ALIGNTEXT32
+LLBL (G3TN_norm_w_lengths):
+    PREFETCHW  ( REGOFF(12,EAX) )
+    MOVQ       ( REGIND(EAX), MM0 )     /*  x1              | x0           */
+    MOVD       ( REGOFF(8, EAX), MM1 )  /*                  | x2           */
+    MOVD       ( REGIND (EDI), MM3 )    /*                  | length (x)   */
+    PFMUL      ( MM3, MM1 )             /*                  | x2 (normalize*/
+    PUNPCKLDQ  ( MM3, MM3 )             /*  length (x)      | length (x)   */
+    PFMUL      ( MM3, MM0 )             /*  x1 (normalized) | x0 (normalize*/
+    ADD_L      ( STRIDE, EDX )          /*  next normal                    */
+    ADD_L      ( CONST(4), EDI )        /*  next length                    */
+    PREFETCH   ( REGIND(EDI) )
+    MOVQ       ( MM0, REGIND(EAX) )     /*  write new x0, x1               */
+    MOVD       ( MM1, REGOFF(8, EAX) )  /*  write new x2                   */
+    ADD_L      ( CONST(16), EAX )       /*  next r                         */
+    SUB_L      ( CONST(1), EBP )        /*  decrement normal counter       */
+    JNZ        ( LLBL (G3TN_norm_w_lengths) )
+    JMP        ( LLBL (G3TN_exit_3dnow) )
+ALIGNTEXT32
+LLBL (G3TN_norm):
+    PREFETCHW  ( REGIND(EAX) )
+    MOVQ       ( REGIND (EAX), MM0 )    /*  x1             | x0           */
+    MOVD       ( REGOFF(8, EAX), MM1 )  /*                 | x2           */
+    MOVQ       ( MM0, MM3 )             /*  x1              | x0           */
+    MOVQ       ( MM1, MM4 )             /*                  | x2           */
+    PFMUL      ( MM0, MM3 )             /*  x1*x1           | x0*x0        */
+    ADD_L      ( CONST(16), EAX )       /*  next r                         */
+    PFMUL      ( MM1, MM4 )             /*                  | x2*x2        */
+    PFADD      ( MM4, MM3 )             /*                  | x0*x0+x2*x2  */
+    PFACC      ( MM3, MM3 )             /* **not used**    | x0*x0+x1*x1+x2**/
+    PFRSQRT    ( MM3, MM5 )             /*  1/sqrt (x0*x0+x1*x1+x2*x2)     */
+    MOVQ       ( MM5, MM4 )
+    PUNPCKLDQ  ( MM3, MM3 )
+    SUB_L      ( CONST(1), EBP )                  /*  decrement normal counter       */
+    PFMUL      ( MM5, MM5 )
+    PFRSQIT1   ( MM3, MM5 )
+    PFRCPIT2   ( MM4, MM5 )
+    PFMUL      ( MM5, MM0 )             /*  x1 (normalized) | x0 (normalize*/
+    MOVQ       ( MM0, REGOFF(-16, EAX) ) /*  write new x0, x1              */
+    PFMUL      ( MM5, MM1 )             /*                 | x2 (normalize*/
+    MOVD       ( MM1, REGOFF(-8, EAX) ) /*  write new x2                  */
+    JNZ        ( LLBL (G3TN_norm) )
+LLBL (G3TN_exit_3dnow):
+    FEMMS
+LLBL (G3TN_end):
+    POP_L      ( EBP )
+    POP_L      ( ESI )
+    POP_L      ( EDI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot)
+HIDDEN(_mesa_3dnow_transform_normalize_normals_no_rot)
+GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot):
+#undef FRAME_OFFSET
+#define FRAME_OFFSET 12
+    PUSH_L     ( EDI )
+    PUSH_L     ( ESI )
+    PUSH_L     ( EBP )
+    MOV_L      ( ARG_LENGTHS, EDI )
+    MOV_L      ( ARG_IN, ESI )
+    MOV_L      ( ARG_DEST, EAX )
+    MOV_L      ( REGOFF(V4F_COUNT, ESI), EBP ) /*  dest->count = in->count   */
+    MOV_L      ( EBP, REGOFF(V4F_COUNT, EAX) )
+    MOV_L      ( ARG_MAT, ECX )
+    MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
+    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
+    MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
+    CMP_L      ( CONST(0), EBP ) /*   count > 0 ??  */
+    JE         ( LLBL (G3TNNR_end) )
+    FEMMS
+    MOVD       ( M(0), MM0 )            /*               | m0                 */
+    PUNPCKLDQ  ( M(5), MM0 )            /* m5            | m0                 */
+    MOVD       ( M(10), MM2 )           /*               | m10                */
+    PUNPCKLDQ  ( MM2, MM2 )             /* m10           | m10                */
+    CMP_L      ( CONST(0), EDI )        /*  lengths == 0 ?                    */
+    JNE        ( LLBL (G3TNNR_scale_end ) )
+    MOVD       ( ARG_SCALE, MM7 )       /*               | scale              */
+    PUNPCKLDQ  ( MM7, MM7 )             /* scale         | scale              */
+    PFMUL      ( MM7, MM0 )             /* scale * m5    | scale * m0         */
+    PFMUL      ( MM7, MM2 )             /* scale * m10   | scale * m10        */
+ALIGNTEXT32
+LLBL (G3TNNR_scale_end):
+    CMP_L      ( CONST(0), EDI )        /* lengths == 0 ?                     */
+    JE         ( LLBL (G3TNNR_norm) )   /* need to calculate lengths          */
+    MOVD       ( REGIND(EDI), MM3 )     /*                 | length (x)       */
+ALIGNTEXT32
+LLBL (G3TNNR_norm_w_lengths):           /* use precalculated lengths          */
+    PREFETCHW  ( REGIND(EAX) )
+    MOVQ       ( REGIND(EDX), MM6 )     /* x1            | x0                 */
+    MOVD       ( REGOFF(8, EDX), MM7 )  /*               | x2                 */
+    PFMUL      ( MM0, MM6 )             /* x1*m5         | x0*m0              */
+    ADD_L      ( STRIDE, EDX )          /* next normal                        */
+    PREFETCH   ( REGIND(EDX) )
+    PFMUL      ( MM2, MM7 )             /*               | x2*m10             */
+    ADD_L      ( CONST(16), EAX )       /* next r                             */
+    PFMUL      ( MM3, MM7 )             /*               | x2 (normalized)  */
+    PUNPCKLDQ  ( MM3, MM3 )             /* length (x)    | length (x)       */
+    ADD_L      ( CONST(4), EDI )        /* next length                        */
+    PFMUL      ( MM3, MM6 )             /* x1 (normalized) | x0 (normalized)  */
+    SUB_L      ( CONST(1), EBP )        /* decrement normal counter           */
+    MOVQ       ( MM6, REGOFF(-16, EAX) ) /* write r0, r1                      */
+    MOVD       ( MM7, REGOFF(-8, EAX) ) /* write r2                           */
+    MOVD       ( REGIND(EDI), MM3 )     /*                 | length (x)       */
+    JNZ        ( LLBL (G3TNNR_norm_w_lengths) )
+    JMP        ( LLBL (G3TNNR_exit_3dnow) )
+ALIGNTEXT32
+LLBL (G3TNNR_norm):                     /* need to calculate lengths          */
+    PREFETCHW  ( REGIND(EAX) )
+    MOVQ       ( REGIND(EDX), MM6 )     /* x1              | x0               */
+    MOVD       ( REGOFF(8, EDX), MM7 )  /*                 | x2               */
+    PFMUL      ( MM0, MM6 )             /* x1*m5           | x0*m0            */
+    ADD_L      ( CONST(16), EAX )       /* next r                             */
+    PFMUL      ( MM2, MM7 )             /*                 | x2*m10           */
+    MOVQ       ( MM6, MM3 )             /* x1 (transformed)| x0 (transformed) */
+    MOVQ       ( MM7, MM4 )             /*                 | x2 (transformed) */
+    PFMUL      ( MM6, MM3 )             /* x1*x1           | x0*x0            */
+    PFMUL      ( MM7, MM4 )             /*                 | x2*x2            */
+    PFACC      ( MM3, MM3 )             /* **not used**    | x0*x0+x1*x1      */
+    PFADD      ( MM4, MM3 )             /*                 | x0*x0+x1*x1+x2*x2*/
+    ADD_L      ( STRIDE, EDX )          /* next normal            */
+    PREFETCH   ( REGIND(EDX) )
+    PFRSQRT    ( MM3, MM5 )             /* 1/sqrt (x0*x0+x1*x1+x2*x2)         */
+    MOVQ       ( MM5, MM4 )
+    PUNPCKLDQ  ( MM3, MM3 )
+    PFMUL      ( MM5, MM5 )
+    PFRSQIT1   ( MM3, MM5 )
+    SUB_L      ( CONST(1), EBP )        /* decrement normal counter           */
+    PFRCPIT2   ( MM4, MM5 )
+    PFMUL      ( MM5, MM6 )             /* x1 (normalized) | x0 (normalized)  */
+    MOVQ       ( MM6, REGOFF(-16, EAX) ) /* write r0, r1                      */
+    PFMUL      ( MM5, MM7 )             /*                 | x2 (normalized)  */
+    MOVD       ( MM7, REGOFF(-8, EAX) ) /* write r2                           */
+    JNZ        ( LLBL (G3TNNR_norm) )
+LLBL (G3TNNR_exit_3dnow):
+    FEMMS
+LLBL (G3TNNR_end):
+    POP_L      ( EBP )
+    POP_L      ( ESI )
+    POP_L      ( EDI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot)
+HIDDEN(_mesa_3dnow_transform_rescale_normals_no_rot)
+GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot):
+#undef FRAME_OFFSET
+#define FRAME_OFFSET 12
+    PUSH_L     ( EDI )
+    PUSH_L     ( ESI )
+    PUSH_L     ( EBP )
+    MOV_L      ( ARG_IN, EAX )
+    MOV_L      ( ARG_DEST, EDX )
+    MOV_L      ( REGOFF(V4F_COUNT, EAX), EBP ) /*  dest->count = in->count   */
+    MOV_L      ( EBP, REGOFF(V4F_COUNT, EDX) )
+    MOV_L      ( ARG_IN, ESI )
+    MOV_L      ( ARG_MAT, ECX )
+    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
+    MOV_L      ( REGOFF(V4F_START, EDX), EAX ) /*  dest->start  */
+    MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
+    CMP_L      ( CONST(0), EBP )
+    JE         ( LLBL (G3TRNR_end) )
+    FEMMS
+    MOVD       ( ARG_SCALE, MM6 )       /*               | scale              */
+    PUNPCKLDQ  ( MM6, MM6 )             /* scale         | scale              */
+    MOVD       ( REGIND(ECX), MM0 )     /*               | m0                 */
+    PUNPCKLDQ  ( REGOFF(20, ECX), MM0 ) /* m5            | m0                 */
+    PFMUL      ( MM6, MM0 )             /* scale*m5      | scale*m0           */
+    MOVD       ( REGOFF(40, ECX), MM2 ) /*               | m10                */
+    PFMUL      ( MM6, MM2 )             /*               | scale*m10          */
+ALIGNTEXT32
+LLBL (G3TRNR_rescale):
+    PREFETCHW  ( REGIND(EAX) )
+    MOVQ       ( REGIND(EDX), MM4 )     /* x1            | x0                 */
+    MOVD       ( REGOFF(8, EDX), MM5 )  /*               | x2                 */
+    PFMUL      ( MM0, MM4 )             /* x1*m5         | x0*m0              */
+    ADD_L      ( STRIDE, EDX )          /* next normal                        */
+    PREFETCH   ( REGIND(EDX) )
+    PFMUL      ( MM2, MM5 )             /*               | x2*m10             */
+    ADD_L      ( CONST(16), EAX )       /* next r                             */
+    SUB_L      ( CONST(1), EBP )        /* decrement normal counter           */
+    MOVQ       ( MM4, REGOFF(-16, EAX) ) /* write r0, r1                      */
+    MOVD       ( MM5, REGOFF(-8, EAX) ) /* write r2                           */
+    JNZ        ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal  */
+    FEMMS
+LLBL (G3TRNR_end):
+    POP_L      ( EBP )
+    POP_L      ( ESI )
+    POP_L      ( EDI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals)
+HIDDEN(_mesa_3dnow_transform_rescale_normals)
+GLNAME(_mesa_3dnow_transform_rescale_normals):
+#undef  FRAME_OFFSET
+#define FRAME_OFFSET 8
+    PUSH_L     ( EDI )
+    PUSH_L     ( ESI )
+    MOV_L      ( ARG_IN, ESI )
+    MOV_L      ( ARG_DEST, EAX )
+    MOV_L      ( ARG_MAT, ECX )
+    MOV_L      ( REGOFF(V4F_COUNT, ESI), EDI ) /*  dest->count = in->count   */
+    MOV_L      ( EDI, REGOFF(V4F_COUNT, EAX) )
+    MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
+    MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
+    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
+    CMP_L      ( CONST(0), EDI )
+    JE         ( LLBL (G3TR_end) )
+    FEMMS
+    MOVQ       ( REGIND(ECX), MM3 )     /* m1            | m0                 */
+    MOVQ       ( REGOFF(16,ECX), MM4 )  /* m5            | m4                 */
+    MOVD       ( ARG_SCALE, MM0 )       /* scale       */
+    MOVD       ( REGOFF(8,ECX), MM5 )   /*               | m2                 */
+    PUNPCKLDQ  ( MM0, MM0 )             /* scale         | scale              */
+    PUNPCKLDQ  ( REGOFF(24, ECX), MM5 )
+    PFMUL      ( MM0, MM3 )             /* scale*m1      | scale*m0           */
+    MOVQ       ( REGOFF(32, ECX), MM6 ) /* m9            | m8*/
+    PFMUL      ( MM0, MM4 )             /* scale*m5      | scale*m4           */
+    MOVD       ( REGOFF(40, ECX), MM7 ) /*               | m10                */
+    PFMUL      ( MM0, MM5 )             /* scale*m6      | scale*m2           */
+    PFMUL      ( MM0, MM6 )             /* scale*m9      | scale*m8           */
+    PFMUL      ( MM0, MM7 )             /*               | scale*m10          */
+ALIGNTEXT32
+LLBL (G3TR_rescale):
+    PREFETCHW  ( REGIND(EAX) )
+    MOVQ       ( REGIND(EDX), MM0 )     /* x1            | x0                 */
+    MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
+    MOVQ       ( MM0, MM1 )             /* x1            | x0                 */
+    PUNPCKLDQ  ( MM2, MM2 )             /* x2            | x2                 */
+    PFMUL      ( MM3, MM0 )             /* x1*m1         | x0*m0              */
+    ADD_L      ( CONST(16), EAX )       /* next r                             */
+    PFMUL      ( MM4, MM1 )             /* x1*m5         | x0*m4              */
+    PFACC      ( MM1, MM0 )             /* x0*m4+x1*m5   | x0*m0+x1*m1        */
+    MOVQ       ( REGIND(EDX), MM1 )     /* x1            | x0                 */
+    PFMUL      ( MM5, MM2 )             /* x2*m6         | x2*m2              */
+    PFADD      ( MM2, MM0 )             /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2  */
+    MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
+    ADD_L      ( STRIDE, EDX )          /* next normal                    */
+    PREFETCH   ( REGIND(EDX) )
+    MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write r0, r1                      */
+    PFMUL      ( MM6, MM1 )             /* x1*m9         | x0*m8              */
+    PFMUL      ( MM7, MM2 )             /*               | x2*m10             */
+    PFACC      ( MM1, MM1 )             /* *not used*    | x0*m8+x1*m9        */
+    PFADD      ( MM2, MM1 )             /* *not used*    | x0*m8+x1*m9+x2*m10 */
+    MOVD       ( MM1, REGOFF(-8, EAX) ) /* write r2                           */
+    SUB_L      ( CONST(1), EDI )        /* decrement normal counter           */
+    JNZ        ( LLBL (G3TR_rescale) )
+    FEMMS
+LLBL (G3TR_end):
+    POP_L       ( ESI )
+    POP_L       ( EDI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot)
+HIDDEN(_mesa_3dnow_transform_normals_no_rot)
+GLNAME(_mesa_3dnow_transform_normals_no_rot):
+#undef  FRAME_OFFSET
+#define FRAME_OFFSET 8
+    PUSH_L     ( EDI )
+    PUSH_L     ( ESI )
+    MOV_L      ( ARG_IN, ESI )
+    MOV_L      ( ARG_DEST, EAX )
+    MOV_L      ( ARG_MAT, ECX )
+    MOV_L      ( REGOFF(V4F_COUNT, ESI), EDI ) /*  dest->count = in->count   */
+    MOV_L      ( EDI, REGOFF(V4F_COUNT, EAX) )
+    MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
+    MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
+    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
+    CMP_L      ( CONST(0), EDI )
+    JE         ( LLBL (G3TNR_end) )
+    FEMMS
+    MOVD       ( REGIND(ECX), MM0 )     /*               | m0                 */
+    PUNPCKLDQ  ( REGOFF(20, ECX), MM0 ) /* m5            | m0                 */
+    MOVD       ( REGOFF(40, ECX), MM2 ) /*               | m10                */
+    PUNPCKLDQ  ( MM2, MM2 )             /* m10           | m10                */
+ALIGNTEXT32
+LLBL (G3TNR_transform):
+    PREFETCHW  ( REGIND(EAX) )
+    MOVQ       ( REGIND(EDX), MM4 )     /* x1            | x0                 */
+    MOVD       ( REGOFF(8, EDX), MM5 )  /*               | x2                 */
+    PFMUL      ( MM0, MM4 )             /* x1*m5         | x0*m0              */
+    ADD_L      ( STRIDE, EDX)           /* next normal      */
+    PREFETCH   ( REGIND(EDX) )
+    PFMUL      ( MM2, MM5 )             /*               | x2*m10             */
+    ADD_L      ( CONST(16), EAX )       /* next r                             */
+    SUB_L      ( CONST(1), EDI )        /* decrement normal counter           */
+    MOVQ       ( MM4, REGOFF(-16, EAX) ) /* write r0, r1                      */
+    MOVD       ( MM5, REGOFF(-8, EAX) ) /* write r2                           */
+    JNZ        ( LLBL (G3TNR_transform) )
+    FEMMS
+LLBL (G3TNR_end):
+    POP_L       ( ESI )
+    POP_L       ( EDI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_3dnow_transform_normals)
+HIDDEN(_mesa_3dnow_transform_normals)
+GLNAME(_mesa_3dnow_transform_normals):
+#undef  FRAME_OFFSET
+#define FRAME_OFFSET 8
+    PUSH_L     ( EDI )
+    PUSH_L     ( ESI )
+    MOV_L      ( ARG_IN, ESI )
+    MOV_L      ( ARG_DEST, EAX )
+    MOV_L      ( ARG_MAT, ECX )
+    MOV_L      ( REGOFF(V4F_COUNT, ESI), EDI ) /*  dest->count = in->count   */
+    MOV_L      ( EDI, REGOFF(V4F_COUNT, EAX) )
+    MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
+    MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
+    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
+    CMP_L      ( CONST(0), EDI )        /* count > 0 ??                       */
+    JE         ( LLBL (G3T_end) )
+    FEMMS
+    MOVQ       ( REGIND(ECX), MM3 )     /* m1            | m0                 */
+    MOVQ       ( REGOFF(16, ECX), MM4 ) /* m5            | m4                 */
+    MOVD       ( REGOFF(8, ECX), MM5 )  /*               | m2                 */
+    PUNPCKLDQ  ( REGOFF(24, ECX), MM5 ) /* m6            | m2                 */
+    MOVQ       ( REGOFF(32, ECX), MM6 ) /* m9            | m8                 */
+    MOVD       ( REGOFF(40, ECX), MM7 ) /*               | m10                */
+ALIGNTEXT32
+LLBL (G3T_transform):
+    PREFETCHW  ( REGIND(EAX) )
+    MOVQ       ( REGIND(EDX), MM0 )     /* x1            | x0                 */
+    MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
+    MOVQ       ( MM0, MM1 )             /* x1            | x0                 */
+    PUNPCKLDQ  ( MM2, MM2 )             /* x2            | x2                 */
+    PFMUL      ( MM3, MM0 )             /* x1*m1         | x0*m0              */
+    ADD_L      ( CONST(16), EAX )       /* next r                             */
+    PFMUL      ( MM4, MM1 )             /* x1*m5         | x0*m4              */
+    PFACC      ( MM1, MM0 )             /* x0*m4+x1*m5   | x0*m0+x1*m1        */
+    PFMUL      ( MM5, MM2 )             /* x2*m6         | x2*m2              */
+    PFADD      ( MM2, MM0 )             /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2  */
+    MOVQ       ( REGIND(EDX), MM1 )     /* x1            | x0                 */
+    MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write r0, r1                      */
+    PFMUL      ( MM6, MM1 )             /* x1*m9         | x0*m8              */
+    MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
+    PFMUL      ( MM7, MM2 )             /*               | x2*m10             */
+    ADD_L      ( STRIDE, EDX )          /* next normal               */
+    PREFETCH   ( REGIND(EDX) )
+    PFACC      ( MM1, MM1 )             /* *not used*    | x0*m8+x1*m9        */
+    PFADD      ( MM2, MM1 )             /* *not used*    | x0*m8+x1*m9+x2*m10 */
+    MOVD       ( MM1, REGOFF(-8, EAX) ) /* write r2                           */
+    SUB_L      ( CONST(1), EDI )        /* decrement normal counter           */
+    JNZ        ( LLBL (G3T_transform) )
+    FEMMS
+LLBL (G3T_end):
+    POP_L  ( ESI )
+    POP_L  ( EDI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_3dnow_normalize_normals)
+HIDDEN(_mesa_3dnow_normalize_normals)
+GLNAME(_mesa_3dnow_normalize_normals):
+#undef  FRAME_OFFSET
+#define FRAME_OFFSET 12
+    PUSH_L     ( EDI )
+    PUSH_L     ( ESI )
+    PUSH_L     ( EBP )
+    MOV_L      ( ARG_IN, ESI )
+    MOV_L      ( ARG_DEST, EAX )
+    MOV_L      ( REGOFF(V4F_COUNT, ESI), EBP ) /*  dest->count = in->count   */
+    MOV_L      ( EBP, REGOFF(V4F_COUNT, EAX) )
+    MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
+    MOV_L      ( REGOFF(V4F_START, ESI), ECX ) /*  in->start    */
+    MOV_L      ( ARG_LENGTHS, EDX )
+    CMP_L      ( CONST(0), EBP ) /* count > 0 ?? */
+    JE         ( LLBL (G3N_end) )
+    FEMMS
+    CMP_L      ( CONST(0), EDX )        /* lengths == 0 ?                     */
+    JE         ( LLBL (G3N_norm2) )     /* calculate lengths                  */
+ALIGNTEXT32
+LLBL (G3N_norm1):                       /* use precalculated lengths          */
+    PREFETCH   ( REGIND(EAX) )
+    MOVQ       ( REGIND(ECX), MM0 )     /* x1              | x0               */
+    MOVD       ( REGOFF(8, ECX), MM1 )  /*                 | x2               */
+    MOVD       ( REGIND(EDX), MM3 )     /*                 | length (x)       */
+    PFMUL      ( MM3, MM1 )             /*                 | x2 (normalized)  */
+    PUNPCKLDQ  ( MM3, MM3 )             /* length (x)      | length (x)       */
+    ADD_L      ( STRIDE, ECX )          /* next normal            */
+    PREFETCH   ( REGIND(ECX) )
+    PFMUL      ( MM3, MM0 )             /* x1 (normalized) | x0 (normalized)  */
+    MOVQ       ( MM0, REGIND(EAX) )     /* write new x0, x1                   */
+    MOVD       ( MM1, REGOFF(8, EAX) )  /* write new x2                       */
+    ADD_L      ( CONST(16), EAX )       /* next r                             */
+    ADD_L      ( CONST(4), EDX )        /* next length                        */
+    SUB_L      ( CONST(1), EBP )        /* decrement normal counter           */
+    JNZ        ( LLBL (G3N_norm1) )
+    JMP        ( LLBL (G3N_end1) )
+ALIGNTEXT32
+LLBL (G3N_norm2):                       /* need to calculate lengths          */
+    PREFETCHW  ( REGIND(EAX) )
+    PREFETCH   ( REGIND(ECX) )
+    MOVQ       ( REGIND(ECX), MM0 )     /* x1              | x0               */
+    MOVD       ( REGOFF(8, ECX), MM1 )  /*                 | x2               */
+    MOVQ       ( MM0, MM3 )             /* x1              | x0               */
+    ADD_L      ( STRIDE, ECX )          /* next normal    */
+    PFMUL      ( MM0, MM3 )             /* x1*x1           | x0*x0            */
+    MOVQ       ( MM1, MM4 )             /*                 | x2               */
+    ADD_L      ( CONST(16), EAX )       /* next r                             */
+    PFMUL      ( MM1, MM4 )             /*                 | x2*x2            */
+    PFADD      ( MM4, MM3 )             /*                 | x0*x0+x2*x2      */
+    PFACC      ( MM3, MM3 )             /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/
+    PFRSQRT    ( MM3, MM5 )             /* 1/sqrt (x0*x0+x1*x1+x2*x2)         */
+    MOVQ       ( MM5, MM4 )
+    PUNPCKLDQ  ( MM3, MM3 )
+    PFMUL      ( MM5, MM5 )
+    PFRSQIT1   ( MM3, MM5 )
+    SUB_L      ( CONST(1), EBP )        /* decrement normal counter           */
+    PFRCPIT2   ( MM4, MM5 )
+    PFMUL      ( MM5, MM0 )             /* x1 (normalized) | x0 (normalized)  */
+    MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1                  */
+    PFMUL      ( MM5, MM1 )             /*                 | x2 (normalized)  */
+    MOVD       ( MM1, REGOFF(-8, EAX) ) /* write new x2                       */
+    JNZ        ( LLBL (G3N_norm2) )
+LLBL (G3N_end1):
+    FEMMS
+LLBL (G3N_end):
+    POP_L      ( EBP )
+    POP_L      ( ESI )
+    POP_L      ( EDI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_3dnow_rescale_normals)
+HIDDEN(_mesa_3dnow_rescale_normals)
+GLNAME(_mesa_3dnow_rescale_normals):
+#undef  FRAME_OFFSET
+#define FRAME_OFFSET 8
+    PUSH_L     ( EDI )
+    PUSH_L     ( ESI )
+    MOV_L      ( ARG_IN, ESI )
+    MOV_L      ( ARG_DEST, EAX )
+    MOV_L      ( REGOFF(V4F_COUNT, ESI), EDX ) /*  dest->count = in->count   */
+    MOV_L      ( EDX, REGOFF(V4F_COUNT, EAX) )
+    MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
+    MOV_L      ( REGOFF(V4F_START, ESI), ECX ) /*  in->start    */
+    CMP_L      ( CONST(0), EDX )
+    JE         ( LLBL (G3R_end) )
+    FEMMS
+    MOVD       ( ARG_SCALE, MM0 )       /* scale                              */
+    PUNPCKLDQ  ( MM0, MM0 )
+ALIGNTEXT32
+LLBL (G3R_rescale):
+    PREFETCHW  ( REGIND(EAX) )
+    MOVQ       ( REGIND(ECX), MM1 )     /* x1            | x0                 */
+    MOVD       ( REGOFF(8, ECX), MM2 )  /*               | x2                 */
+    PFMUL      ( MM0, MM1 )             /* x1*scale      | x0*scale           */
+    ADD_L      ( STRIDE, ECX )          /* next normal                  */
+    PREFETCH   ( REGIND(ECX) )
+    PFMUL      ( MM0, MM2 )             /*               | x2*scale           */
+    ADD_L      ( CONST(16), EAX )       /* next r                             */
+    MOVQ       ( MM1, REGOFF(-16, EAX) ) /* write r0, r1                      */
+    MOVD       ( MM2, REGOFF(-8, EAX) ) /* write r2                           */
+    SUB_L      ( CONST(1), EDX )        /* decrement normal counter           */
+    JNZ        ( LLBL (G3R_rescale) )
+    FEMMS
+LLBL (G3R_end):
+    POP_L      ( ESI )
+    POP_L      ( EDI )
+    RET
+#endif
+#if defined (__ELF__) && defined (__linux__)
+        .section .note.GNU-stack,"",%progbits
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/3dnow_xform1.S
 ,0 → 1,437
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifdef USE_3DNOW_ASM
+#include "assyntax.h"
+#include "matypes.h"
+#include "xform_args.h"
+    SEG_TEXT
+#define FRAME_OFFSET    4
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points1_general )
+HIDDEN(_mesa_3dnow_transform_points1_general)
+GLNAME( _mesa_3dnow_transform_points1_general ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(4, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPGR_3 ) )
+    MOVQ      ( REGIND(ECX), MM0 )      /* m01             | m00             */
+    MOVQ      ( REGOFF(8, ECX), MM1 )   /* m03             | m02             */
+    MOVQ      ( REGOFF(48, ECX), MM2 )  /* m31             | m30             */
+    MOVQ      ( REGOFF(56, ECX), MM3 )  /* m33             | m32             */
+ALIGNTEXT16
+LLBL( G3TPGR_2 ):
+    MOVD      ( REGIND(EAX), MM4 )      /*                 | x0              */
+    PUNPCKLDQ ( MM4, MM4 )              /* x0              | x0              */
+    MOVQ      ( MM4, MM5 )              /* x0              | x0              */
+    PFMUL     ( MM0, MM4 )              /* x0*m01          | x0*m00          */
+    PFMUL     ( MM1, MM5 )              /* x0*m03          | x0*m02          */
+    PFADD     ( MM2, MM4 )              /* x0*m01+m31      | x0*m00+m30      */
+    PFADD     ( MM3, MM5 )              /* x0*m03+m33      | x0*m02+m32      */
+    MOVQ      ( MM4, REGIND(EDX) )      /* write r1, r0                      */
+    MOVQ      ( MM5, REGOFF(8, EDX) )   /* write r3, r2                      */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TPGR_2 ) )      /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TPGR_3 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points1_identity )
+HIDDEN(_mesa_3dnow_transform_points1_identity)
+GLNAME( _mesa_3dnow_transform_points1_identity ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(1), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(4, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPIR_4) )
+ALIGNTEXT16
+LLBL( G3TPIR_3 ):
+    MOVD      ( REGIND(EAX), MM0 )      /*                 | x0              */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    MOVD      ( MM0, REGIND(EDX) )      /*                 | r0              */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TPIR_3 ) )      /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TPIR_4 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points1_3d_no_rot )
+HIDDEN(_mesa_3dnow_transform_points1_3d_no_rot)
+GLNAME( _mesa_3dnow_transform_points1_3d_no_rot ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(4, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP3NRR_3 ) )
+    MOVD      ( REGIND(ECX), MM0 )      /*                 | m00             */
+    MOVQ      ( REGOFF(48, ECX), MM2 )  /* m31             | m30             */
+    MOVD      ( REGOFF(56, ECX), MM3 )  /*                 | m32             */
+ALIGNTEXT16
+LLBL( G3TP3NRR_2 ):
+    MOVD      ( REGIND(EAX), MM4 )      /*                 | x0              */
+    PFMUL     ( MM0, MM4 )              /*                 | x0*m00          */
+    PFADD     ( MM2, MM4 )              /* m31             | x0*m00+m30      */
+    MOVQ      ( MM4, REGIND(EDX) )      /* write r1, r0                      */
+    MOVD      ( MM3, REGOFF(8, EDX) )   /* write r2                          */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TP3NRR_2 ) )    /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TP3NRR_3 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points1_perspective )
+HIDDEN(_mesa_3dnow_transform_points1_perspective)
+GLNAME( _mesa_3dnow_transform_points1_perspective ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(4, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPPR_3 ) )
+    MOVD      ( REGIND(ECX), MM0 )      /*                 | m00             */
+    MOVD      ( REGOFF(56, ECX), MM3 )  /*                 | m32             */
+ALIGNTEXT16
+LLBL( G3TPPR_2 ):
+    MOVD      ( REGIND(EAX), MM4 )      /* 0               | x0              */
+    PFMUL     ( MM0, MM4 )              /* 0               | x0*m00          */
+    MOVQ      ( MM4, REGIND(EDX) )      /* write r1, r0                      */
+    MOVQ      ( MM3, REGOFF(8, EDX) )   /* write r2  (=m32), r3 (=0)         */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TPPR_2 ) )      /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TPPR_3 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points1_2d )
+HIDDEN(_mesa_3dnow_transform_points1_2d)
+GLNAME( _mesa_3dnow_transform_points1_2d ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(2), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(4, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP2R_3 ) )
+    MOVQ      ( REGIND(ECX), MM0 )      /* m01             | m00             */
+    MOVQ      ( REGOFF(48, ECX), MM2 )  /* m31             | m30             */
+ALIGNTEXT16
+LLBL( G3TP2R_2 ):
+    MOVD      ( REGIND(EAX), MM4 )      /*                 | x0              */
+    PUNPCKLDQ ( MM4, MM4 )              /* x0              | x0              */
+    PFMUL     ( MM0, MM4 )              /* x0*m01          | x0*m00          */
+    PFADD     ( MM2, MM4 )              /* x0*m01+m31      | x0*m00+m30      */
+    MOVQ      ( MM4, REGIND(EDX) )      /* write r1, r0                      */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TP2R_2 ) )      /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TP2R_3 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points1_2d_no_rot )
+HIDDEN(_mesa_3dnow_transform_points1_2d_no_rot)
+GLNAME( _mesa_3dnow_transform_points1_2d_no_rot ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(2), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(4, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP2NRR_3 ) )
+    MOVD      ( REGIND(ECX), MM0 )      /*                 | m00             */
+    MOVQ      ( REGOFF(48, ECX), MM2 )  /* m31             | m30             */
+ALIGNTEXT16
+LLBL( G3TP2NRR_2 ):
+    MOVD      ( REGIND(EAX), MM4 )      /*                 | x0              */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    PFMUL     ( MM0, MM4 )              /*                 | x0*m00          */
+    PFADD     ( MM2, MM4 )              /* m31             | x0*m00+m30      */
+    MOVQ      ( MM4, REGIND(EDX) )      /* write r1, r0                      */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TP2NRR_2 ) )    /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TP2NRR_3 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points1_3d )
+HIDDEN(_mesa_3dnow_transform_points1_3d)
+GLNAME( _mesa_3dnow_transform_points1_3d ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(4, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP3R_3 ) )
+    MOVQ      ( REGIND(ECX), MM0 )      /* m01             | m00             */
+    MOVD      ( REGOFF(8, ECX), MM1 )   /*                 | m02             */
+    MOVQ      ( REGOFF(48, ECX), MM2 )  /* m31             | m30             */
+    MOVD      ( REGOFF(56, ECX), MM3 )  /*                 | m32             */
+ALIGNTEXT16
+LLBL( G3TP3R_2 ):
+    MOVD      ( REGIND(EAX), MM4 )      /*                 | x0              */
+    PUNPCKLDQ ( MM4, MM4 )              /* x0              | x0              */
+    MOVQ      ( MM4, MM5 )              /*                 | x0              */
+    PFMUL     ( MM0, MM4 )              /* x0*m01          | x0*m00          */
+    PFMUL     ( MM1, MM5 )              /*                 | x0*m02          */
+    PFADD     ( MM2, MM4 )              /* x0*m01+m31      | x0*m00+m30      */
+    PFADD     ( MM3, MM5 )              /*                 | x0*m02+m32      */
+    MOVQ      ( MM4, REGIND(EDX) )      /* write r1, r0                      */
+    MOVD      ( MM5, REGOFF(8, EDX) )   /* write r2                          */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TP3R_2 ) )      /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TP3R_3 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+#endif
+#if defined (__ELF__) && defined (__linux__)
+        .section .note.GNU-stack,"",%progbits
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/3dnow_xform2.S
 ,0 → 1,477
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifdef USE_3DNOW_ASM
+#include "assyntax.h"
+#include "matypes.h"
+#include "xform_args.h"
+    SEG_TEXT
+#define FRAME_OFFSET    4
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points2_general )
+HIDDEN(_mesa_3dnow_transform_points2_general)
+GLNAME( _mesa_3dnow_transform_points2_general ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPGR_3 ) )
+    MOVD      ( REGIND(ECX), MM0 )      /*                 | m00             */
+    PUNPCKLDQ ( REGOFF(16, ECX), MM0 )  /* m10             | m00             */
+    MOVD      ( REGOFF(4, ECX), MM1 )   /*                 | m01             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM1 )  /* m11             | m01             */
+    MOVD      ( REGOFF(8, ECX), MM2 )   /*                 | m02             */
+    PUNPCKLDQ ( REGOFF(24, ECX), MM2 )  /* m12             | m02             */
+    MOVD      ( REGOFF(12, ECX), MM3 )  /*                 | m03             */
+    PUNPCKLDQ ( REGOFF(28, ECX), MM3 )  /* m13             | m03             */
+    MOVQ      ( REGOFF(48, ECX), MM4 )  /* m31             | m30             */
+    MOVQ      ( REGOFF(56, ECX), MM5 )  /* m33             | m32             */
+ALIGNTEXT16
+LLBL( G3TPGR_2 ):
+    MOVQ      ( REGIND(EAX), MM6 )      /* x1              | x0              */
+    MOVQ      ( MM6, MM7 )              /* x1              | x0              */
+    PFMUL     ( MM0, MM6 )              /* x1*m10          | x0*m00          */
+    PFMUL     ( MM1, MM7 )              /* x1*m11          | x0*m01          */
+    PFACC     ( MM7, MM6 )              /* x0*m01+x1*m11   | x0*x00+x1*m10   */
+    PFADD     ( MM4, MM6 )              /* x0*...*m11+m31  | x0*...*m10+m30  */
+    MOVQ      ( MM6, REGIND(EDX) )      /* write r1, r0                      */
+    MOVQ      ( REGIND(EAX), MM6 )      /* x1              | x0              */
+    MOVQ      ( MM6, MM7 )              /* x1              | x0              */
+    PFMUL     ( MM2, MM6 )              /* x1*m12          | x0*m02          */
+    PFMUL     ( MM3, MM7 )              /* x1*m13          | x0*m03          */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    PFACC     ( MM7, MM6 )              /* x0*m03+x1*m13   | x0*x02+x1*m12   */
+    PFADD     ( MM5, MM6 )              /* x0*...*m13+m33  | x0*...*m12+m32  */
+    MOVQ      ( MM6, REGOFF(8, EDX) )   /* write r3, r2                      */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TPGR_2 ) )      /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TPGR_3 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points2_perspective )
+HIDDEN(_mesa_3dnow_transform_points2_perspective)
+GLNAME( _mesa_3dnow_transform_points2_perspective ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPPR_3 ) )
+    MOVD      ( REGIND(ECX), MM0 )      /*                 | m00             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )  /* m11             | m00             */
+    MOVD      ( REGOFF(56, ECX), MM3 )  /*                 | m32             */
+ALIGNTEXT16
+LLBL( G3TPPR_2 ):
+    MOVQ      ( REGIND(EAX), MM4 )      /* x1              | x0              */
+    PFMUL     ( MM0, MM4 )              /* x1*m11          | x0*m00          */
+    MOVQ      ( MM4, REGIND(EDX) )      /* write r1, r0                      */
+    MOVQ      ( MM3, REGOFF(8, EDX) )   /* write r2  (=m32), r3 (=0)         */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TPPR_2 ) )      /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TPPR_3 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points2_3d )
+HIDDEN(_mesa_3dnow_transform_points2_3d)
+GLNAME( _mesa_3dnow_transform_points2_3d ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP3R_3 ) )
+    MOVD      ( REGIND(ECX), MM0 )      /*                 | m00             */
+    PUNPCKLDQ ( REGOFF(16, ECX), MM0 )  /* m10             | m00             */
+    MOVD      ( REGOFF(4, ECX), MM1 )   /*                 | m01             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM1 )  /* m11             | m01             */
+    MOVD      ( REGOFF(8, ECX), MM2 )   /*                 | m02             */
+    PUNPCKLDQ ( REGOFF(24, ECX), MM2 )  /* m12             | m02             */
+    MOVQ      ( REGOFF(48, ECX), MM4 )  /* m31             | m30             */
+    MOVD      ( REGOFF(56, ECX), MM5 )  /*                 | m32             */
+ALIGNTEXT16
+LLBL( G3TP3R_2 ):
+    MOVQ      ( REGIND(EAX), MM6 )      /* x1              | x0              */
+    MOVQ      ( MM6, MM7 )              /* x1              | x0              */
+    PFMUL     ( MM0, MM6 )              /* x1*m10          | x0*m00          */
+    PFMUL     ( MM1, MM7 )              /* x1*m11          | x0*m01          */
+    PFACC     ( MM7, MM6 )              /* x0*m01+x1*m11   | x0*x00+x1*m10   */
+    PFADD     ( MM4, MM6 )              /* x0*...*m11+m31  | x0*...*m10+m30  */
+    MOVQ      ( MM6, REGIND(EDX) )      /* write r1, r0                      */
+    MOVQ      ( REGIND(EAX), MM6 )      /* x1              | x0              */
+    MOVQ      ( MM6, MM7 )              /* x1              | x0              */
+    PFMUL     ( MM2, MM6 )              /* x1*m12          | x0*m02          */
+    PFACC     ( MM7, MM6 )              /* ***trash***     | x0*x02+x1*m12   */
+    PFADD     ( MM5, MM6 )              /* ***trash***     | x0*...*m12+m32  */
+    MOVD      ( MM6, REGOFF(8, EDX) )   /* write r2                          */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TP3R_2 ) )      /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TP3R_3 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points2_3d_no_rot )
+HIDDEN(_mesa_3dnow_transform_points2_3d_no_rot)
+GLNAME( _mesa_3dnow_transform_points2_3d_no_rot ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP3NRR_3 ) )
+    MOVD      ( REGIND(ECX), MM0 )      /*                 | m00             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )  /* m11             | m00             */
+    MOVQ      ( REGOFF(48, ECX), MM2 )  /* m31             | m30             */
+    MOVD      ( REGOFF(56, ECX), MM3 )  /*                 | m32             */
+ALIGNTEXT16
+LLBL( G3TP3NRR_2 ):
+    MOVQ      ( REGIND(EAX), MM4 )      /* x1              | x0              */
+    PFMUL     ( MM0, MM4 )              /* x1*m11          | x0*m00          */
+    PFADD     ( MM2, MM4 )              /* x1*m11+m31      | x0*m00+m30      */
+    MOVQ      ( MM4, REGIND(EDX) )      /* write r1, r0                      */
+    MOVD      ( MM3, REGOFF(8, EDX) )   /* write r2                          */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TP3NRR_2 ) )    /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TP3NRR_3 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points2_2d )
+HIDDEN(_mesa_3dnow_transform_points2_2d)
+GLNAME( _mesa_3dnow_transform_points2_2d ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(2), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP2R_3 ) )
+    MOVQ      ( REGIND(ECX), MM0 )      /* m01             | m00             */
+    MOVQ      ( REGOFF(16, ECX), MM1 )  /* m11             | m10             */
+    MOVQ      ( REGOFF(48, ECX), MM2 )  /* m31             | m30             */
+ALIGNTEXT16
+LLBL( G3TP2R_2 ):
+    MOVD      ( REGIND(EAX), MM4 )      /*                 | x0              */
+    MOVD      ( REGOFF(4, EAX), MM5 )   /*                 | x1              */
+    PUNPCKLDQ ( MM4, MM4 )              /* x0              | x0              */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    PFMUL     ( MM0, MM4 )              /* x0*m01          | x0*m00          */
+    PUNPCKLDQ ( MM5, MM5 )              /* x1              | x1              */
+    PFMUL     ( MM1, MM5 )              /* x1*m11          | x1*m10          */
+    PFADD     ( MM2, MM4 )              /* x...x1*m11+31   | x0*..*m10+m30   */
+    PFADD     ( MM5, MM4 )              /* x0*m01+x1*m11   | x0*m00+x1*m10   */
+    MOVQ      ( MM4, REGIND(EDX) )      /* write r1, r0                      */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TP2R_2 ) )      /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TP2R_3 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points2_2d_no_rot )
+HIDDEN(_mesa_3dnow_transform_points2_2d_no_rot)
+GLNAME( _mesa_3dnow_transform_points2_2d_no_rot ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(2), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP2NRR_3 ) )
+    MOVD      ( REGIND(ECX), MM0 )      /*                 | m00             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )  /* m11             | m00             */
+    MOVQ      ( REGOFF(48, ECX), MM2 )  /* m31             | m30             */
+ALIGNTEXT16
+LLBL( G3TP2NRR_2 ):
+    MOVQ      ( REGIND(EAX), MM4 )      /* x1              | x0              */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    PFMUL     ( MM0, MM4 )              /* x1*m11          | x0*m00          */
+    PFADD     ( MM2, MM4 )              /* m31             | x0*m00+m30      */
+    MOVQ      ( MM4, REGIND(EDX) )      /* write r1, r0                      */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TP2NRR_2 ) )    /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TP2NRR_3 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points2_identity )
+HIDDEN(_mesa_3dnow_transform_points2_identity)
+GLNAME( _mesa_3dnow_transform_points2_identity ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(2), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPIR_3 ) )
+ALIGNTEXT16
+LLBL( G3TPIR_3 ):
+    MOVQ      ( REGIND(EAX), MM0 )      /* x1              | x0              */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    MOVQ      ( MM0, REGIND(EDX) )      /* r1              | r0              */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TPIR_3 ) )      /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TPIR_4 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+#endif
+#if defined (__ELF__) && defined (__linux__)
+        .section .note.GNU-stack,"",%progbits
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/3dnow_xform3.S
 ,0 → 1,561
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifdef USE_3DNOW_ASM
+#include "assyntax.h"
+#include "matypes.h"
+#include "xform_args.h"
+    SEG_TEXT
+#define FRAME_OFFSET    4
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points3_general )
+HIDDEN(_mesa_3dnow_transform_points3_general)
+GLNAME( _mesa_3dnow_transform_points3_general ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPGR_2 ) )
+    PREFETCHW ( REGIND(EDX) )
+ALIGNTEXT16
+LLBL( G3TPGR_1 ):
+    PREFETCHW ( REGOFF(32, EDX) )       /* prefetch 2 vertices ahead         */
+    MOVQ      ( REGIND(EAX), MM0 )      /* x1              | x0              */
+    MOVD      ( REGOFF(8, EAX), MM2 )   /*                 | x2              */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    PREFETCH  ( REGIND(EAX) )
+    MOVQ      ( MM0, MM1 )              /* x1              | x0              */
+    PUNPCKLDQ ( MM2, MM2 )              /* x2              | x2              */
+    PUNPCKLDQ ( MM0, MM0 )              /* x0              | x0              */
+    MOVQ      ( MM2, MM5 )              /* x2              | x2              */
+    PUNPCKHDQ ( MM1, MM1 )              /* x1              | x1              */
+    PFMUL     ( REGOFF(32, ECX), MM2 )  /* x2*m9           | x2*m8           */
+    MOVQ      ( MM0, MM3 )              /* x0              | x0              */
+    PFMUL     ( REGOFF(40, ECX), MM5 )  /* x2*m11          | x2*m10          */
+    MOVQ      ( MM1, MM4 )              /* x1              | x1              */
+    PFMUL     ( REGIND(ECX), MM0 )      /* x0*m1           | x0*m0           */
+    PFADD     ( REGOFF(48, ECX), MM2 )  /* x2*m9+m13       | x2*m8+m12       */
+    PFMUL     ( REGOFF(16, ECX), MM1 )  /* x1*m5           | x1*m4           */
+    PFADD     ( REGOFF(56, ECX), MM5 )  /* x2*m11+m15      | x2*m10+m14      */
+    PFADD     ( MM0, MM1 )              /* x0*m1+x1*m5     | x0*m0+x1*m4     */
+    PFMUL     ( REGOFF(8, ECX), MM3 )   /* x0*m3           | x0*m2           */
+    PFADD     ( MM1, MM2 )              /* r1              | r0              */
+    PFMUL     ( REGOFF(24, ECX), MM4 )  /* x1*m7           | x1*m6           */
+    ADD_L     ( CONST(16), EDX )        /* next output vertex                */
+    PFADD     ( MM3, MM4 )              /* x0*m3+x1*m7     | x0*m2+x1*m6     */
+    MOVQ      ( MM2, REGOFF(-16, EDX) ) /* write r0, r1                      */
+    PFADD     ( MM4, MM5 )              /* r3              | r2              */
+    MOVQ      ( MM5, REGOFF(-8, EDX) )  /* write r2, r3                      */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TPGR_1 ) )      /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TPGR_2 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points3_perspective )
+HIDDEN(_mesa_3dnow_transform_points3_perspective)
+GLNAME( _mesa_3dnow_transform_points3_perspective ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPPR_2 ) )
+    PREFETCH  ( REGIND(EAX) )
+    PREFETCHW ( REGIND(EDX) )
+    MOVD      ( REGIND(ECX), MM0 )      /*                 | m00             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )  /* m11             | m00             */
+    MOVQ      ( REGOFF(32, ECX), MM1 )  /* m21             | m20             */
+    MOVD      ( REGOFF(40, ECX), MM2 )  /*                 | m22             */
+    MOVD      ( REGOFF(56, ECX), MM3 )  /*                 | m32             */
+ALIGNTEXT16
+LLBL( G3TPPR_1 ):
+    PREFETCHW ( REGOFF(32, EDX) )       /* prefetch 2 vertices ahead         */
+    MOVD      ( REGOFF(8, EAX), MM5 )   /*                 | x2              */
+    MOVQ      ( REGIND(EAX), MM4 )      /* x1              | x0              */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    PREFETCH  ( REGIND(EAX) )
+    PXOR      ( MM7, MM7 )              /* 0               | 0               */
+    MOVQ      ( MM5, MM6 )              /*                 | x2              */
+    PFMUL     ( MM0, MM4 )              /* x1*m11          | x0*m00          */
+    PFSUB     ( MM5, MM7 )              /*                 | -x2             */
+    PFMUL     ( MM2, MM6 )              /*                 | x2*m22          */
+    PUNPCKLDQ ( MM5, MM5 )              /* x2              | x2              */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    PFMUL     ( MM1, MM5 )              /* x2*m21          | x2*m20          */
+    PFADD     ( MM3, MM6 )              /*                 | x2*m22+m32      */
+    PFADD     ( MM4, MM5 )              /* x1*m11+x2*m21   | x0*m00+x2*m20   */
+    MOVQ      ( MM5, REGOFF(-16, EDX) ) /* write r0, r1                      */
+    MOVD      ( MM6, REGOFF(-8, EDX) )  /* write r2                          */
+    MOVD      ( MM7, REGOFF(-4, EDX) )  /* write r3                          */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TPPR_1 ) )      /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TPPR_2 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points3_3d )
+HIDDEN(_mesa_3dnow_transform_points3_3d)
+GLNAME( _mesa_3dnow_transform_points3_3d ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP3R_2 ) )
+    PREFETCH  ( REGIND(EAX) )
+    PREFETCH  ( REGIND(EDX) )
+    MOVD      ( REGOFF(8, ECX), MM7 )   /*                 | m2              */
+    PUNPCKLDQ ( REGOFF(24, ECX), MM7 )  /* m6              | m2              */
+ALIGNTEXT16
+LLBL( G3TP3R_1 ):
+    PREFETCHW ( REGOFF(32, EDX) )       /* prefetch 2 vertices ahead         */
+    MOVQ      ( REGIND(EAX), MM0 )      /* x1              | x0              */
+    MOVD      ( REGOFF(8, EAX), MM1 )   /*                 | x2              */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    PREFETCH  ( REGIND(EAX) )
+    MOVQ      ( MM0, MM2 )              /* x1              | x0              */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    PUNPCKLDQ ( MM2, MM2 )              /* x0              | x0              */
+    MOVQ      ( MM0, MM3 )              /* x1              | x0              */
+    PFMUL     ( REGIND(ECX), MM2 )      /* x0*m1           | x0*m0           */
+    PUNPCKHDQ ( MM3, MM3 )              /* x1              | x1              */
+    MOVQ      ( MM1, MM4 )              /*                 | x2              */
+    PFMUL     ( REGOFF(16, ECX), MM3 )  /* x1*m5           | x1*m4           */
+    PUNPCKLDQ ( MM4, MM4 )              /* x2              | x2              */
+    PFADD     ( MM2, MM3 )              /* x0*m1+x1*m5     | x0*m0+x1*m4     */
+    PFMUL     ( REGOFF(32, ECX), MM4 )  /* x2*m9           | x2*m8           */
+    PFADD     ( REGOFF(48, ECX), MM3 )  /* x0*m1+...+m11   | x0*m0+x1*m4+m12 */
+    PFMUL     ( MM7, MM0 )              /* x1*m6           | x0*m2           */
+    PFADD     ( MM4, MM3 )              /* r1              | r0              */
+    PFMUL     ( REGOFF(40, ECX), MM1 )  /*                 | x2*m10          */
+    PUNPCKLDQ ( REGOFF(56, ECX), MM1 )  /* m14             | x2*m10          */
+    PFACC     ( MM0, MM1 )
+    MOVQ      ( MM3, REGOFF(-16, EDX) ) /* write r0, r1                      */
+    PFACC     ( MM1, MM1 )              /*                 | r2              */
+    MOVD      ( MM1, REGOFF(-8, EDX) )  /* write r2                          */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TP3R_1 ) )      /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TP3R_2 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points3_3d_no_rot )
+HIDDEN(_mesa_3dnow_transform_points3_3d_no_rot)
+GLNAME( _mesa_3dnow_transform_points3_3d_no_rot ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP3NRR_2 ) )
+    PREFETCH  ( REGIND(EAX) )
+    PREFETCHW ( REGIND(EDX) )
+    MOVD      ( REGIND(ECX), MM0 )      /*                 | m00             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )  /* m11             | m00             */
+    MOVD      ( REGOFF(40, ECX), MM2 )  /*                 | m22             */
+    PUNPCKLDQ ( MM2, MM2 )              /* m22             | m22             */
+    MOVQ      ( REGOFF(48, ECX), MM1 )  /* m31             | m30             */
+    MOVD      ( REGOFF(56, ECX), MM3 )  /*                 | m32             */
+    PUNPCKLDQ ( MM3, MM3 )              /* m32             | m32             */
+ALIGNTEXT16
+LLBL( G3TP3NRR_1 ):
+    PREFETCHW ( REGOFF(32, EDX) )       /* prefetch 2 vertices ahead         */
+    MOVQ      ( REGIND(EAX), MM4 )      /* x1              | x0              */
+    MOVD      ( REGOFF(8, EAX), MM5 )   /*                 | x2              */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    PREFETCHW ( REGIND(EAX) )
+    PFMUL     ( MM0, MM4 )              /* x1*m11          | x0*m00          */
+    PFADD     ( MM1, MM4 )              /* x1*m11+m31      | x0*m00+m30      */
+    PFMUL     ( MM2, MM5 )              /*                 | x2*m22          */
+    PFADD     ( MM3, MM5 )              /*                 | x2*m22+m32      */
+    MOVQ      ( MM4, REGIND(EDX) )      /* write r0, r1                      */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    MOVD      ( MM5, REGOFF(-8, EDX) )  /* write r2                          */
+    JNZ       ( LLBL( G3TP3NRR_1 ) )    /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TP3NRR_2 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points3_2d )
+HIDDEN(_mesa_3dnow_transform_points3_2d)
+GLNAME( _mesa_3dnow_transform_points3_2d ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP2R_3) )
+    PREFETCH  ( REGIND(EAX) )
+    PREFETCHW ( REGIND(EDX) )
+    MOVD      ( REGIND(ECX), MM0 )      /*                 | m00             */
+    PUNPCKLDQ ( REGOFF(16, ECX), MM0 )  /* m10             | m00             */
+    MOVD      ( REGOFF(4, ECX), MM1 )   /*                 | m01             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM1 )  /* m11             | m01             */
+    MOVQ      ( REGOFF(48, ECX), MM2 )  /* m31             | m30             */
+ALIGNTEXT16
+LLBL( G3TP2R_2 ):
+    PREFETCHW ( REGOFF(32, EDX) )       /* prefetch 2 vertices ahead         */
+    MOVQ      ( REGIND(EAX), MM3 )      /* x1              | x0              */
+    MOVD      ( REGOFF(8, EAX), MM5 )   /*                 | x2              */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    PREFETCH  ( REGIND(EAX) )
+    MOVQ      ( MM3, MM4 )              /* x1              | x0              */
+    PFMUL     ( MM0, MM3 )              /* x1*m10          | x0*m00          */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    PFMUL     ( MM1, MM4 )              /* x1*m11          | x0*m01          */
+    PFACC     ( MM4, MM3 )              /* x0*m00+x1*m10   | x0*m01+x1*m11   */
+    MOVD      ( MM5, REGOFF(-8, EDX) )  /* write r2 (=x2)                    */
+    PFADD     ( MM2, MM3 )              /* x0*...*m10+m30  | x0*...*m11+m31  */
+    MOVQ      ( MM3, REGOFF(-16, EDX) ) /* write r0, r1                      */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TP2R_2 ) )      /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TP2R_3 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points3_2d_no_rot )
+HIDDEN(_mesa_3dnow_transform_points3_2d_no_rot)
+GLNAME( _mesa_3dnow_transform_points3_2d_no_rot ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP2NRR_2 ) )
+    PREFETCH  ( REGIND(EAX) )
+    PREFETCHW ( REGIND(EDX) )
+    MOVD      ( REGIND(ECX), MM0 )      /*                 | m00             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )  /* m11             | m00             */
+    MOVQ      ( REGOFF(48, ECX), MM1 )  /* m31             | m30             */
+ALIGNTEXT16
+LLBL( G3TP2NRR_1 ):
+    PREFETCHW ( REGOFF(32, EDX) )       /* prefetch 2 vertices ahead         */
+    MOVQ      ( REGIND(EAX), MM4 )      /* x1              | x0              */
+    MOVD      ( REGOFF(8, EAX), MM5 )   /*                 | x2              */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    PREFETCH  ( REGIND(EAX) )
+    PFMUL     ( MM0, MM4 )              /* x1*m11          | x0*m00          */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    PFADD     ( MM1, MM4 )              /* x1*m11+m31      | x0*m00+m30      */
+    MOVQ      ( MM4, REGOFF(-16, EDX) ) /* write r0, r1                      */
+    MOVD      ( MM5, REGOFF(-8, EDX) )  /* write r2 (=x2)                    */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TP2NRR_1 ) )    /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TP2NRR_2 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points3_identity )
+HIDDEN(_mesa_3dnow_transform_points3_identity)
+GLNAME( _mesa_3dnow_transform_points3_identity ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPIR_2 ) )
+    PREFETCHW ( REGIND(EDX) )
+ALIGNTEXT16
+LLBL( G3TPIR_1 ):
+    PREFETCHW ( REGOFF(32, EDX) )
+    MOVQ      ( REGIND(EAX), MM0 )      /* x1              | x0              */
+    MOVD      ( REGOFF(8, EAX), MM1 )   /*                 | x2              */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    MOVQ      ( MM0, REGOFF(-16, EDX) ) /* r1              | r0              */
+    MOVD      ( MM1, REGOFF(-8, EDX) )  /*                 | r2              */
+    JNZ       ( LLBL( G3TPIR_1 ) )      /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TPIR_2 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+#endif
+#if defined (__ELF__) && defined (__linux__)
+        .section .note.GNU-stack,"",%progbits
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/3dnow_xform4.S
 ,0 → 1,570
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifdef USE_3DNOW_ASM
+#include "assyntax.h"
+#include "matypes.h"
+#include "xform_args.h"
+    SEG_TEXT
+#define FRAME_OFFSET    4
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points4_general )
+HIDDEN(_mesa_3dnow_transform_points4_general)
+GLNAME( _mesa_3dnow_transform_points4_general ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPGR_2 ) )
+    PREFETCHW ( REGIND(EDX) )
+ALIGNTEXT16
+LLBL( G3TPGR_1 ):
+    PREFETCHW ( REGOFF(32, EDX) )       /* prefetch 2 vertices ahead         */
+    MOVQ      ( REGIND(EAX), MM0 )      /* x1            | x0                */
+    MOVQ      ( REGOFF(8, EAX), MM4 )   /* x3            | x2                */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    PREFETCH  ( REGIND(EAX) )
+    MOVQ      ( MM0, MM2 )              /* x1              | x0              */
+    MOVQ      ( MM4, MM6 )              /* x3              | x2              */
+    PUNPCKLDQ ( MM0, MM0 )              /* x0              | x0              */
+    PUNPCKHDQ ( MM2, MM2 )              /* x1              | x1              */
+    MOVQ      ( MM0, MM1 )              /* x0              | x0              */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    PFMUL     ( REGIND(ECX), MM0 )      /* x0*m1           | x0*m0           */
+    MOVQ      ( MM2, MM3 )              /* x1              | x1              */
+    PFMUL     ( REGOFF(8, ECX), MM1 )   /* x0*m3           | x0*m2           */
+    PUNPCKLDQ ( MM4, MM4 )              /* x2              | x2              */
+    PFMUL     ( REGOFF(16, ECX), MM2 )  /* x1*m5           | x1*m4           */
+    MOVQ      ( MM4, MM5 )              /* x2              | x2              */
+    PFMUL     ( REGOFF(24, ECX), MM3 )  /* x1*m7           | x1*m6           */
+    PUNPCKHDQ ( MM6, MM6 )              /* x3              | x3              */
+    PFMUL     ( REGOFF(32, ECX), MM4 )  /* x2*m9           | x2*m8           */
+    MOVQ      ( MM6, MM7 )              /* x3              | x3              */
+    PFMUL     ( REGOFF(40, ECX), MM5 )  /* x2*m11          | x2*m10          */
+    PFADD     ( MM0, MM2 )
+    PFMUL     ( REGOFF(48, ECX), MM6 )  /* x3*m13          | x3*m12          */
+    PFADD     ( MM1, MM3 )
+    PFMUL     ( REGOFF(56, ECX), MM7 )  /* x3*m15          | x3*m14          */
+    PFADD     ( MM4, MM6 )
+    PFADD     ( MM5, MM7 )
+    PFADD     ( MM2, MM6 )
+    PFADD     ( MM3, MM7 )
+    MOVQ      ( MM6, REGOFF(-16, EDX) )
+    MOVQ      ( MM7, REGOFF(-8, EDX) )
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TPGR_1 ) )      /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TPGR_2 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points4_perspective )
+HIDDEN(_mesa_3dnow_transform_points4_perspective)
+GLNAME( _mesa_3dnow_transform_points4_perspective ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPPR_2 ) )
+    PREFETCH  ( REGIND(EAX) )
+    PREFETCHW ( REGIND(EDX) )
+    MOVD      ( REGIND(ECX), MM0 )      /*                 | m00             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )  /* m11             | m00             */
+    MOVD      ( REGOFF(40, ECX), MM1 )  /*                 | m22             */
+    PUNPCKLDQ ( REGOFF(56, ECX), MM1 )  /* m32             | m22             */
+    MOVQ      ( REGOFF(32, ECX), MM2 )  /* m21             | m20             */
+    PXOR      ( MM7, MM7 )              /* 0               | 0               */
+ALIGNTEXT16
+LLBL( G3TPPR_1 ):
+    PREFETCHW ( REGOFF(32, EDX) )       /* prefetch 2 vertices ahead         */
+    MOVQ      ( REGIND(EAX), MM4 )      /* x1              | x0              */
+    MOVQ      ( REGOFF(8, EAX), MM5 )   /* x3              | x2              */
+    MOVD      ( REGOFF(8, EAX), MM3 )   /*                 | x2              */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    PREFETCH  ( REGOFF(32, EAX) )       /* hopefully stride is zero          */
+    MOVQ      ( MM5, MM6 )              /* x3              | x2              */
+    PFMUL     ( MM0, MM4 )              /* x1*m11          | x0*m00          */
+    PUNPCKLDQ ( MM5, MM5 )              /* x2              | x2              */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    PFMUL     ( MM2, MM5 )              /* x2*m21          | x2*m20          */
+    PFSUBR    ( MM7, MM3 )              /*                 | -x2             */
+    PFMUL     ( MM1, MM6 )              /* x3*m32          | x2*m22          */
+    PFADD     ( MM4, MM5 )              /* x1*m11+x2*m21   | x0*m00+x2*m20   */
+    PFACC     ( MM3, MM6 )              /* -x2             | x2*m22+x3*m32   */
+    MOVQ      ( MM5, REGOFF(-16, EDX) ) /* write r0, r1                      */
+    MOVQ      ( MM6, REGOFF(-8, EDX) )  /* write r2, r3                      */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TPPR_1 ) )      /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TPPR_2 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points4_3d )
+HIDDEN(_mesa_3dnow_transform_points4_3d)
+GLNAME( _mesa_3dnow_transform_points4_3d ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP3R_2 ) )
+    MOVD      ( REGOFF(8, ECX), MM6 )   /*                 | m2              */
+    PUNPCKLDQ ( REGOFF(24, ECX), MM6 )  /* m6              | m2              */
+    MOVD      ( REGOFF(40, ECX), MM7 )  /*                 | m10             */
+    PUNPCKLDQ ( REGOFF(56, ECX), MM7 )  /* m14             | m10             */
+ALIGNTEXT16
+LLBL( G3TP3R_1 ):
+    PREFETCHW ( REGOFF(32, EDX) )       /* prefetch 2 vertices ahead         */
+    PREFETCH  ( REGOFF(32, EAX) )       /* hopefully array is tightly packed */
+    MOVQ      ( REGIND(EAX), MM2 )      /* x1              | x0              */
+    MOVQ      ( REGOFF(8, EAX), MM3 )   /* x3              | x2              */
+    MOVQ      ( MM2, MM0 )              /* x1              | x0              */
+    MOVQ      ( MM3, MM4 )              /* x3              | x2              */
+    MOVQ      ( MM0, MM1 )              /* x1              | x0              */
+    MOVQ      ( MM4, MM5 )              /* x3              | x2              */
+    PUNPCKLDQ ( MM0, MM0 )              /* x0              | x0              */
+    PUNPCKHDQ ( MM1, MM1 )              /* x1              | x1              */
+    PFMUL     ( REGIND(ECX), MM0 )      /* x0*m1           | x0*m0           */
+    PUNPCKLDQ ( MM3, MM3 )              /* x2              | x2              */
+    PFMUL     ( REGOFF(16, ECX), MM1 )  /* x1*m5           | x1*m4           */
+    PUNPCKHDQ ( MM4, MM4 )              /* x3              | x3              */
+    PFMUL     ( MM6, MM2 )              /* x1*m6           | x0*m2           */
+    PFADD     ( MM0, MM1 )              /* x0*m1+x1*m5     | x0*m0+x1*m4     */
+    PFMUL     ( REGOFF(32, ECX), MM3 )  /* x2*m9           | x2*m8           */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    PFMUL     ( REGOFF(48, ECX), MM4 )  /* x3*m13          | x3*m12          */
+    PFADD     ( MM1, MM3 )              /* x0*m1+..+x2*m9  | x0*m0+...+x2*m8 */
+    PFMUL     ( MM7, MM5 )              /* x3*m14          | x2*m10          */
+    PFADD     ( MM3, MM4 )              /* r1              | r0              */
+    PFACC     ( MM2, MM5 )              /* x0*m2+x1*m6     | x2*m10+x3*m14   */
+    MOVD      ( REGOFF(12, EAX), MM0 )  /*                 | x3              */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    PFACC     ( MM0, MM5 )              /* r3              | r2              */
+    MOVQ      ( MM4, REGOFF(-16, EDX) ) /* write r0, r1                      */
+    MOVQ      ( MM5, REGOFF(-8, EDX) )  /* write r2, r3                      */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TP3R_1 ) )      /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TP3R_2 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points4_3d_no_rot )
+HIDDEN(_mesa_3dnow_transform_points4_3d_no_rot)
+GLNAME( _mesa_3dnow_transform_points4_3d_no_rot ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP3NRR_2 ) )
+    MOVD      ( REGIND(ECX), MM0 )      /*                 | m00             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )  /* m11             | m00             */
+    MOVD      ( REGOFF(40, ECX), MM2 )  /*                 | m22             */
+    PUNPCKLDQ ( REGOFF(56, ECX), MM2 )  /* m32             | m22             */
+    MOVQ      ( REGOFF(48, ECX), MM1 )  /* m31             | m30             */
+ALIGNTEXT16
+LLBL( G3TP3NRR_1 ):
+    PREFETCHW ( REGOFF(32, EDX) )       /* prefetch 2 vertices ahead         */
+    MOVQ      ( REGIND(EAX), MM4 )      /* x1              | x0              */
+    MOVQ      ( REGOFF(8, EAX), MM5 )   /* x3              | x2              */
+    MOVD      ( REGOFF(12, EAX), MM7 )  /*                 | x3              */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    PREFETCH  ( REGOFF(32, EAX) )       /* hopefully stride is zero          */
+    MOVQ      ( MM5, MM6 )              /* x3              | x2              */
+    PFMUL     ( MM0, MM4 )              /* x1*m11          | x0*m00          */
+    PUNPCKHDQ ( MM6, MM6 )              /* x3              | x3              */
+    PFMUL     ( MM2, MM5 )              /* x3*m32          | x2*m22          */
+    PFMUL     ( MM1, MM6 )              /* x3*m31          | x3*m30          */
+    PFACC     ( MM7, MM5 )              /* x3              | x2*m22+x3*m32   */
+    PFADD     ( MM6, MM4 )              /* x1*m11+x3*m31   | x0*m00+x3*m30   */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    MOVQ      ( MM4, REGOFF(-16, EDX) ) /* write r0, r1                      */
+    MOVQ      ( MM5, REGOFF(-8, EDX) )  /* write r2, r3                      */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TP3NRR_1 ) )    /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TP3NRR_2 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points4_2d )
+HIDDEN(_mesa_3dnow_transform_points4_2d)
+GLNAME( _mesa_3dnow_transform_points4_2d ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP2R_2 ) )
+    MOVD      ( REGIND(ECX), MM0 )      /*                 | m00             */
+    PUNPCKLDQ ( REGOFF(16, ECX), MM0 )  /* m10             | m00             */
+    MOVD      ( REGOFF(4, ECX), MM1 )   /*                 | m01             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM1 )  /* m11             | m01             */
+    MOVQ      ( REGOFF(48, ECX), MM2 )  /* m31             | m30             */
+ALIGNTEXT16
+LLBL( G3TP2R_1 ):
+    PREFETCHW ( REGOFF(32, EDX) )       /* prefetch 2 vertices ahead         */
+    MOVQ      ( REGIND(EAX), MM3 )      /* x1              | x0              */
+    MOVQ      ( REGOFF(8, EAX), MM5 )   /* x3              | x2              */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    PREFETCH  ( REGIND(EAX) )
+    MOVQ      ( MM3, MM4 )              /* x1              | x0              */
+    MOVQ      ( MM5, MM6 )              /* x3              | x2              */
+    PFMUL     ( MM1, MM4 )              /* x1*m11          | x0*m01          */
+    PUNPCKHDQ ( MM6, MM6 )              /* x3              | x3              */
+    PFMUL     ( MM0, MM3 )              /* x1*m10          | x0*m00          */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    PFACC     ( MM4, MM3 )              /* x0*m01+x1*m11   | x0*m00+x1*m10   */
+    PFMUL     ( MM2, MM6 )              /* x3*m31          | x3*m30          */
+    PFADD     ( MM6, MM3 )              /* r1              | r0              */
+    MOVQ      ( MM5, REGOFF(-8, EDX) )  /* write r2, r3                      */
+    MOVQ      ( MM3, REGOFF(-16, EDX) ) /* write r0, r1                      */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TP2R_1 ) )      /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TP2R_2 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points4_2d_no_rot )
+HIDDEN(_mesa_3dnow_transform_points4_2d_no_rot)
+GLNAME( _mesa_3dnow_transform_points4_2d_no_rot ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP2NRR_3 ) )
+    MOVD      ( REGIND(ECX), MM0 )      /*                 | m00             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )  /* m11             | m00             */
+    MOVQ      ( REGOFF(48, ECX), MM1 )  /* m31             | m30             */
+ALIGNTEXT16
+LLBL( G3TP2NRR_2 ):
+    PREFETCHW ( REGOFF(32, EDX) )       /* prefetch 2 vertices ahead         */
+    MOVQ      ( REGIND(EAX), MM4 )      /* x1              | x0              */
+    MOVQ      ( REGOFF(8, EAX), MM5 )   /* x3              | x2              */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    PREFETCH  ( REGIND(EAX) )
+    PFMUL     ( MM0, MM4 )              /* x1*m11          | x0*m00          */
+    MOVQ      ( MM5, MM6 )              /* x3              | x2              */
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    PUNPCKHDQ ( MM6, MM6 )              /* x3              | x3              */
+    PFMUL     ( MM1, MM6 )              /* x3*m31          | x3*m30          */
+    PFADD     ( MM4, MM6 )              /* x1*m11+x3*m31   | x0*m00+x3*m30   */
+    MOVQ      ( MM6, REGOFF(-16, EDX) ) /* write r0, r1                      */
+    MOVQ      ( MM5, REGOFF(-8, EDX) )  /* write r2, r3                      */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TP2NRR_2 ) )    /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TP2NRR_3 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points4_identity )
+HIDDEN(_mesa_3dnow_transform_points4_identity)
+GLNAME( _mesa_3dnow_transform_points4_identity ):
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+    PUSH_L    ( EDI )
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPIR_2 ) )
+ALIGNTEXT16
+LLBL( G3TPIR_1 ):
+    PREFETCHW ( REGOFF(32, EDX) )       /* prefetch 2 vertices ahead         */
+    MOVQ      ( REGIND(EAX), MM0 )      /* x1              | x0              */
+    MOVQ      ( REGOFF(8, EAX), MM1 )   /* x3              | x2              */
+    ADD_L     ( EDI, EAX )              /* next vertex                       */
+    PREFETCH  ( REGIND(EAX) )
+    ADD_L     ( CONST(16), EDX )        /* next r                            */
+    MOVQ      ( MM0, REGOFF(-16, EDX) ) /* r1              | r0              */
+    MOVQ      ( MM1, REGOFF(-8, EDX) )  /* r3              | r2              */
+    DEC_L     ( ESI )                   /* decrement vertex counter          */
+    JNZ       ( LLBL( G3TPIR_1 ) )      /* cnt > 0 ? -> process next vertex  */
+LLBL( G3TPIR_2 ):
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+#endif
+#if defined (__ELF__) && defined (__linux__)
+        .section .note.GNU-stack,"",%progbits
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/Makefile.am
 ,0 → 1,49
+# Copyright © 2012 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+if HAVE_X86_ASM
+AM_CPPFLAGS = \
+        -I$(top_srcdir)/include \
+        -I$(top_srcdir)/src/mesa \
+        -I$(top_srcdir)/src/mapi \
+        $(DEFINES)
+noinst_PROGRAMS = gen_matypes
+gen_matypes_SOURCES = gen_matypes.c
+BUILT_SOURCES = matypes.h
+CLEANFILES = matypes.h
+if GEN_ASM_OFFSETS
+matypes.h: $(gen_matypes_SOURCES)
+        $(AM_V_GEN)$(COMPILE) $< -DASM_OFFSETS -S -o - | \
+                sed -n '/^->/{s:^->::;/[$$]/{s:^:#define :;s:[$$]::};p}' > $@
+else
+matypes.h: gen_matypes
+        $(AM_V_GEN)./gen_matypes > $@
+endif
+endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/Makefile.in
 ,0 → 1,784
+# Makefile.in generated by automake 1.14 from Makefile.am.
+# @configure_input@
+# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+@SET_MAKE@
+# Copyright © 2012 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+VPATH = @srcdir@
+am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \   ]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs   ]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+@HAVE_X86_ASM_TRUE@noinst_PROGRAMS = gen_matypes$(EXEEXT)
+subdir = src/mesa/x86
+DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
+        $(top_srcdir)/bin/depcomp
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_prog_bison.m4 \
+        $(top_srcdir)/m4/ax_prog_cc_for_build.m4 \
+        $(top_srcdir)/m4/ax_prog_cxx_for_build.m4 \
+        $(top_srcdir)/m4/ax_prog_flex.m4 \
+        $(top_srcdir)/m4/ax_pthread.m4 \
+        $(top_srcdir)/m4/ax_python_module.m4 \
+        $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+        $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+        $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+        $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+PROGRAMS = $(noinst_PROGRAMS)
+am__gen_matypes_SOURCES_DIST = gen_matypes.c
+@HAVE_X86_ASM_TRUE@am_gen_matypes_OBJECTS = gen_matypes.$(OBJEXT)
+gen_matypes_OBJECTS = $(am_gen_matypes_OBJECTS)
+gen_matypes_LDADD = $(LDADD)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/bin/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+        $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+        $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+        $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+        $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 =
+SOURCES = $(gen_matypes_SOURCES)
+DIST_SOURCES = $(am__gen_matypes_SOURCES_DIST)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BUILD_EXEEXT = @BUILD_EXEEXT@
+BUILD_OBJEXT = @BUILD_OBJEXT@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CFLAGS_FOR_BUILD = @CFLAGS_FOR_BUILD@
+CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@
+CLOCK_LIB = @CLOCK_LIB@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPPFLAGS_FOR_BUILD = @CPPFLAGS_FOR_BUILD@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXCPPFLAGS_FOR_BUILD = @CXXCPPFLAGS_FOR_BUILD@
+CXXCPP_FOR_BUILD = @CXXCPP_FOR_BUILD@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CXXFLAGS_FOR_BUILD = @CXXFLAGS_FOR_BUILD@
+CXX_FOR_BUILD = @CXX_FOR_BUILD@
+CYGPATH_W = @CYGPATH_W@
+DEFINES = @DEFINES@
+DEFINES_FOR_BUILD = @DEFINES_FOR_BUILD@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DLOPEN_LIBS = @DLOPEN_LIBS@
+DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
+DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
+DRIGL_CFLAGS = @DRIGL_CFLAGS@
+DRIGL_LIBS = @DRIGL_LIBS@
+DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
+DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@
+DRI_LIB_DEPS = @DRI_LIB_DEPS@
+DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGL_CFLAGS = @EGL_CFLAGS@
+EGL_CLIENT_APIS = @EGL_CLIENT_APIS@
+EGL_DRIVER_INSTALL_DIR = @EGL_DRIVER_INSTALL_DIR@
+EGL_LIB_DEPS = @EGL_LIB_DEPS@
+EGL_LIB_GLOB = @EGL_LIB_GLOB@
+EGL_LIB_NAME = @EGL_LIB_NAME@
+EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
+EGL_PLATFORMS = @EGL_PLATFORMS@
+EGREP = @EGREP@
+ELF_LIB = @ELF_LIB@
+EXEEXT = @EXEEXT@
+EXPAT_INCLUDES = @EXPAT_INCLUDES@
+FGREP = @FGREP@
+FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@
+FREEDRENO_LIBS = @FREEDRENO_LIBS@
+GALLIUM_DRI_LIB_DEPS = @GALLIUM_DRI_LIB_DEPS@
+GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
+GALLIUM_PIPE_LOADER_LIBS = @GALLIUM_PIPE_LOADER_LIBS@
+GALLIUM_PIPE_LOADER_XCB_CFLAGS = @GALLIUM_PIPE_LOADER_XCB_CFLAGS@
+GALLIUM_PIPE_LOADER_XCB_LIBS = @GALLIUM_PIPE_LOADER_XCB_LIBS@
+GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@
+GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@
+GLAPI_LIB_GLOB = @GLAPI_LIB_GLOB@
+GLAPI_LIB_NAME = @GLAPI_LIB_NAME@
+GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@
+GLESv1_CM_LIB_GLOB = @GLESv1_CM_LIB_GLOB@
+GLESv1_CM_LIB_NAME = @GLESv1_CM_LIB_NAME@
+GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@
+GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@
+GLESv2_LIB_GLOB = @GLESv2_LIB_GLOB@
+GLESv2_LIB_NAME = @GLESv2_LIB_NAME@
+GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@
+GLPROTO_CFLAGS = @GLPROTO_CFLAGS@
+GLPROTO_LIBS = @GLPROTO_LIBS@
+GLX_TLS = @GLX_TLS@
+GL_LIB = @GL_LIB@
+GL_LIB_DEPS = @GL_LIB_DEPS@
+GL_LIB_GLOB = @GL_LIB_GLOB@
+GL_LIB_NAME = @GL_LIB_NAME@
+GL_PC_CFLAGS = @GL_PC_CFLAGS@
+GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@
+GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@
+GREP = @GREP@
+HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@
+INDENT = @INDENT@
+INDENT_FLAGS = @INDENT_FLAGS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INTEL_CFLAGS = @INTEL_CFLAGS@
+INTEL_LIBS = @INTEL_LIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LDFLAGS_FOR_BUILD = @LDFLAGS_FOR_BUILD@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@
+LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@
+LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
+LIBDRM_LIBS = @LIBDRM_LIBS@
+LIBDRM_XORG_CFLAGS = @LIBDRM_XORG_CFLAGS@
+LIBDRM_XORG_LIBS = @LIBDRM_XORG_LIBS@
+LIBKMS_XORG_CFLAGS = @LIBKMS_XORG_CFLAGS@
+LIBKMS_XORG_LIBS = @LIBKMS_XORG_LIBS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBUDEV_CFLAGS = @LIBUDEV_CFLAGS@
+LIBUDEV_LIBS = @LIBUDEV_LIBS@
+LIB_DIR = @LIB_DIR@
+LIPO = @LIPO@
+LLVM_BINDIR = @LLVM_BINDIR@
+LLVM_CFLAGS = @LLVM_CFLAGS@
+LLVM_CONFIG = @LLVM_CONFIG@
+LLVM_CPPFLAGS = @LLVM_CPPFLAGS@
+LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
+LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
+LLVM_LDFLAGS = @LLVM_LDFLAGS@
+LLVM_LIBDIR = @LLVM_LIBDIR@
+LLVM_LIBS = @LLVM_LIBS@
+LLVM_VERSION = @LLVM_VERSION@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKE = @MAKE@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MESA_LLVM = @MESA_LLVM@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@
+NOUVEAU_LIBS = @NOUVEAU_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OPENCL_LIB_INSTALL_DIR = @OPENCL_LIB_INSTALL_DIR@
+OSMESA_LIB = @OSMESA_LIB@
+OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
+OSMESA_LIB_NAME = @OSMESA_LIB_NAME@
+OSMESA_MESA_DEPS = @OSMESA_MESA_DEPS@
+OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
+OSMESA_PC_REQ = @OSMESA_PC_REQ@
+OSMESA_VERSION = @OSMESA_VERSION@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PERL = @PERL@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+POSIX_SHELL = @POSIX_SHELL@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+PYTHON2 = @PYTHON2@
+RADEON_CFLAGS = @RADEON_CFLAGS@
+RADEON_LIBS = @RADEON_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SELINUX_LIBS = @SELINUX_LIBS@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VDPAU_CFLAGS = @VDPAU_CFLAGS@
+VDPAU_LIBS = @VDPAU_LIBS@
+VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
+VDPAU_MAJOR = @VDPAU_MAJOR@
+VDPAU_MINOR = @VDPAU_MINOR@
+VERSION = @VERSION@
+VG_LIB_DEPS = @VG_LIB_DEPS@
+VG_LIB_GLOB = @VG_LIB_GLOB@
+VG_LIB_NAME = @VG_LIB_NAME@
+VG_PC_LIB_PRIV = @VG_PC_LIB_PRIV@
+VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
+VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
+WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
+WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_SCANNER = @WAYLAND_SCANNER@
+X11_INCLUDES = @X11_INCLUDES@
+XA_MAJOR = @XA_MAJOR@
+XA_MINOR = @XA_MINOR@
+XA_TINY = @XA_TINY@
+XA_VERSION = @XA_VERSION@
+XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@
+XCB_DRI2_LIBS = @XCB_DRI2_LIBS@
+XEXT_CFLAGS = @XEXT_CFLAGS@
+XEXT_LIBS = @XEXT_LIBS@
+XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@
+XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@
+XLIBGL_CFLAGS = @XLIBGL_CFLAGS@
+XLIBGL_LIBS = @XLIBGL_LIBS@
+XORG_CFLAGS = @XORG_CFLAGS@
+XORG_DRIVER_INSTALL_DIR = @XORG_DRIVER_INSTALL_DIR@
+XORG_LIBS = @XORG_LIBS@
+XVMC_CFLAGS = @XVMC_CFLAGS@
+XVMC_LIBS = @XVMC_LIBS@
+XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
+XVMC_MAJOR = @XVMC_MAJOR@
+XVMC_MINOR = @XVMC_MINOR@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CC_FOR_BUILD = @ac_ct_CC_FOR_BUILD@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_CXX_FOR_BUILD = @ac_ct_CXX_FOR_BUILD@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+ax_pthread_config = @ax_pthread_config@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+@HAVE_X86_ASM_TRUE@AM_CPPFLAGS = \
+@HAVE_X86_ASM_TRUE@     -I$(top_srcdir)/include \
+@HAVE_X86_ASM_TRUE@     -I$(top_srcdir)/src/mesa \
+@HAVE_X86_ASM_TRUE@     -I$(top_srcdir)/src/mapi \
+@HAVE_X86_ASM_TRUE@     $(DEFINES)
+@HAVE_X86_ASM_TRUE@gen_matypes_SOURCES = gen_matypes.c
+@HAVE_X86_ASM_TRUE@BUILT_SOURCES = matypes.h
+@HAVE_X86_ASM_TRUE@CLEANFILES = matypes.h
+all: $(BUILT_SOURCES)
+        $(MAKE) $(AM_MAKEFLAGS) all-am
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
+        @for dep in $?; do \
+          case '$(am__configure_deps)' in \
+            *$$dep*) \
+              ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+                && { if test -f $@; then exit 0; else break; fi; }; \
+              exit 1;; \
+          esac; \
+        done; \
+        echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/mesa/x86/Makefile'; \
+        $(am__cd) $(top_srcdir) && \
+          $(AUTOMAKE) --foreign src/mesa/x86/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+        @case '$?' in \
+          *config.status*) \
+            cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+          *) \
+            echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+            cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+        esac;
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(top_srcdir)/configure:  $(am__configure_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+clean-noinstPROGRAMS:
+        @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \
+        echo " rm -f" $$list; \
+        rm -f $$list || exit $$?; \
+        test -n "$(EXEEXT)" || exit 0; \
+        list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+        echo " rm -f" $$list; \
+        rm -f $$list
+gen_matypes$(EXEEXT): $(gen_matypes_OBJECTS) $(gen_matypes_DEPENDENCIES) $(EXTRA_gen_matypes_DEPENDENCIES)
+        @rm -f gen_matypes$(EXEEXT)
+        $(AM_V_CCLD)$(LINK) $(gen_matypes_OBJECTS) $(gen_matypes_LDADD) $(LIBS)
+mostlyclean-compile:
+        -rm -f *.$(OBJEXT)
+distclean-compile:
+        -rm -f *.tab.c
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen_matypes.Po@am__quote@
+.c.o:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+.c.obj:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+.c.lo:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+mostlyclean-libtool:
+        -rm -f *.lo
+clean-libtool:
+        -rm -rf .libs _libs
+ID: $(am__tagged_files)
+        $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+        set x; \
+        here=`pwd`; \
+        $(am__define_uniq_tagged_files); \
+        shift; \
+        if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+          test -n "$$unique" || unique=$$empty_fix; \
+          if test $$# -gt 0; then \
+            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+              "$$@" $$unique; \
+          else \
+            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+              $$unique; \
+          fi; \
+        fi
+ctags: ctags-am
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+        $(am__define_uniq_tagged_files); \
+        test -z "$(CTAGS_ARGS)$$unique" \
+          || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+             $$unique
+GTAGS:
+        here=`$(am__cd) $(top_builddir) && pwd` \
+          && $(am__cd) $(top_srcdir) \
+          && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+cscopelist-am: $(am__tagged_files)
+        list='$(am__tagged_files)'; \
+        case "$(srcdir)" in \
+          [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+          *) sdir=$(subdir)/$(srcdir) ;; \
+        esac; \
+        for i in $$list; do \
+          if test -f "$$i"; then \
+            echo "$(subdir)/$$i"; \
+          else \
+            echo "$$sdir/$$i"; \
+          fi; \
+        done >> $(top_builddir)/cscope.files
+distclean-tags:
+        -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+distdir: $(DISTFILES)
+        @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        list='$(DISTFILES)'; \
+          dist_files=`for file in $$list; do echo $$file; done | \
+          sed -e "s|^$$srcdirstrip/||;t" \
+              -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+        case $$dist_files in \
+          */*) $(MKDIR_P) `echo "$$dist_files" | \
+                           sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                           sort -u` ;; \
+        esac; \
+        for file in $$dist_files; do \
+          if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+          if test -d $$d/$$file; then \
+            dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+            if test -d "$(distdir)/$$file"; then \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+              cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+          else \
+            test -f "$(distdir)/$$file" \
+            || cp -p $$d/$$file "$(distdir)/$$file" \
+            || exit 1; \
+          fi; \
+        done
+check-am: all-am
+check: $(BUILT_SOURCES)
+        $(MAKE) $(AM_MAKEFLAGS) check-am
+all-am: Makefile $(PROGRAMS)
+installdirs:
+install: $(BUILT_SOURCES)
+        $(MAKE) $(AM_MAKEFLAGS) install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+install-am: all-am
+        @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+installcheck: installcheck-am
+install-strip:
+        if test -z '$(STRIP)'; then \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+              install; \
+        else \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+            "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+        fi
+mostlyclean-generic:
+clean-generic:
+        -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
+distclean-generic:
+        -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+        -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+maintainer-clean-generic:
+        @echo "This command is intended for maintainers to use"
+        @echo "it deletes files that may require special tools to rebuild."
+        -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES)
+clean: clean-am
+clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \
+        mostlyclean-am
+distclean: distclean-am
+        -rm -rf ./$(DEPDIR)
+        -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+        distclean-tags
+dvi: dvi-am
+dvi-am:
+html: html-am
+html-am:
+info: info-am
+info-am:
+install-data-am:
+install-dvi: install-dvi-am
+install-dvi-am:
+install-exec-am:
+install-html: install-html-am
+install-html-am:
+install-info: install-info-am
+install-info-am:
+install-man:
+install-pdf: install-pdf-am
+install-pdf-am:
+install-ps: install-ps-am
+install-ps-am:
+installcheck-am:
+maintainer-clean: maintainer-clean-am
+        -rm -rf ./$(DEPDIR)
+        -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+mostlyclean: mostlyclean-am
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+        mostlyclean-libtool
+pdf: pdf-am
+pdf-am:
+ps: ps-am
+ps-am:
+uninstall-am:
+.MAKE: all check install install-am install-strip
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+        clean-libtool clean-noinstPROGRAMS cscopelist-am ctags \
+        ctags-am distclean distclean-compile distclean-generic \
+        distclean-libtool distclean-tags distdir dvi dvi-am html \
+        html-am info info-am install install-am install-data \
+        install-data-am install-dvi install-dvi-am install-exec \
+        install-exec-am install-html install-html-am install-info \
+        install-info-am install-man install-pdf install-pdf-am \
+        install-ps install-ps-am install-strip installcheck \
+        installcheck-am installdirs maintainer-clean \
+        maintainer-clean-generic mostlyclean mostlyclean-compile \
+        mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+        tags tags-am uninstall uninstall-am
+@GEN_ASM_OFFSETS_TRUE@@HAVE_X86_ASM_TRUE@matypes.h: $(gen_matypes_SOURCES)
+@GEN_ASM_OFFSETS_TRUE@@HAVE_X86_ASM_TRUE@       $(AM_V_GEN)$(COMPILE) $< -DASM_OFFSETS -S -o - | \
+@GEN_ASM_OFFSETS_TRUE@@HAVE_X86_ASM_TRUE@               sed -n '/^->/{s:^->::;/[$$]/{s:^:#define :;s:[$$]::};p}' > $@
+@GEN_ASM_OFFSETS_FALSE@@HAVE_X86_ASM_TRUE@matypes.h: gen_matypes
+@GEN_ASM_OFFSETS_FALSE@@HAVE_X86_ASM_TRUE@      $(AM_V_GEN)./gen_matypes > $@
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/assyntax.h
 ,0 → 1,1728
+#ifndef __ASSYNTAX_H__
+#define __ASSYNTAX_H__
+/*
+ * Copyright 1992 Vrije Universiteit, The Netherlands
+ *
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for any purpose and without fee is hereby granted, provided
+ * that the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of the Vrije Universiteit not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  The Vrije Universiteit makes no
+ * representations about the suitability of this software for any purpose.
+ * It is provided "as is" without express or implied warranty.
+ *
+ * The Vrije Universiteit DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
+ * IN NO EVENT SHALL The Vrije Universiteit BE LIABLE FOR ANY SPECIAL,
+ * INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ * assyntax.h
+ *
+ * Select the syntax appropriate to the 386 assembler being used
+ * To add support for more assemblers add more columns to the CHOICE
+ * macro.  Note that register names must also have uppercase names
+ * to avoid macro recursion. e.g., #define ah %ah recurses!
+ *
+ * NB 1.  Some of the macros for certain assemblers imply that the code is to
+ *        run in protected mode!!  Caveat emptor.
+ *
+ * NB 2.  486 specific instructions are not included.  This is to discourage
+ *        their accidental use in code that is intended to run on 386 and 486
+ *        systems.
+ *
+ * Supported assemblers:
+ *
+ * (a) AT&T SysVr4 as(1):       define ATT_ASSEMBLER
+ * (b) GNU Assembler gas:       define GNU_ASSEMBLER (default)
+ * (c) Amsterdam Compiler kit:  define ACK_ASSEMBLER
+ * (d) The Netwide Assembler:   define NASM_ASSEMBLER
+ * (e) Microsoft Assembler:     define MASM_ASSEMBLER (UNTESTED!)
+ *
+ * The following naming conventions have been used to identify the various
+ * data types:
+ *              _SR = segment register version
+ *      Integer:
+ *              _Q = quadword   = 64 bits
+ *              _L = long       = 32 bits
+ *              _W = short      = 16 bits
+ *              _B = byte       =  8 bits
+ *      Floating-point:
+ *              _X = m80real    = 80 bits
+ *              _D = double     = 64 bits
+ *              _S = single     = 32 bits
+ *
+ * Author: Gregory J. Sharp, Sept 1992
+ *         Vrije Universiteit, Amsterdam, The Netherlands
+ *
+ *         [support for Intel syntax added by Josh Vanderhoof, 1999]
+ */
+#if !(defined(NASM_ASSEMBLER) || defined(MASM_ASSEMBLER))
+/* Default to ATT_ASSEMBLER when SVR4 or SYSV are defined */
+#if (defined(SVR4) || defined(SYSV)) && !defined(GNU_ASSEMBLER)
+#define ATT_ASSEMBLER
+#endif
+#if !defined(ATT_ASSEMBLER) && !defined(GNU_ASSEMBLER) && !defined(ACK_ASSEMBLER)
+#define GNU_ASSEMBLER
+#endif
+#if (defined(__STDC__) && !defined(UNIXCPP)) || (defined (sun) && defined (i386) && defined (SVR4) && defined (__STDC__) && !defined (__GNUC__))
+#define CONCAT(x, y)            x ## y
+#define CONCAT3(x, y, z)        x ## y ## z
+#else
+#define CONCAT(x, y)            x/**/y
+#define CONCAT3(x, y, z)        x/**/y/**/z
+#endif
+#ifdef ACK_ASSEMBLER
+/* Assume we write code for 32-bit protected mode! */
+/* Redefine register names for GAS & AT&T assemblers */
+#define AL              al
+#define AH              ah
+#define AX              ax
+#define EAX             ax
+#define BL              bl
+#define BH              bh
+#define BX              bx
+#define EBX             bx
+#define CL              cl
+#define CH              ch
+#define CX              cx
+#define ECX             cx
+#define DL              dl
+#define DH              dh
+#define DX              dx
+#define EDX             dx
+#define BP              bp
+#define EBP             bp
+#define SI              si
+#define ESI             si
+#define DI              di
+#define EDI             di
+#define SP              sp
+#define ESP             sp
+#define CS              cs
+#define SS              ss
+#define DS              ds
+#define ES              es
+#define FS              fs
+#define GS              gs
+/* Control Registers */
+#define CR0             cr0
+#define CR1             cr1
+#define CR2             cr2
+#define CR3             cr3
+/* Debug Registers */
+#define DR0             dr0
+#define DR1             dr1
+#define DR2             dr2
+#define DR3             dr3
+#define DR4             dr4
+#define DR5             dr5
+#define DR6             dr6
+#define DR7             dr7
+/* Floating-point Stack */
+#define ST              st
+#define AS_BEGIN        .sect .text; .sect .rom; .sect .data; .sect .bss; .sect .text
+#define _WTOG           o16     /* word toggle for _W instructions */
+#define _LTOG                   /* long toggle for _L instructions */
+#define ADDR_TOGGLE     a16
+#define OPSZ_TOGGLE     o16
+#define USE16           .use16
+#define USE32           .use32
+#define CHOICE(a,b,c)   c
+#else /* AT&T or GAS */
+/* Redefine register names for GAS & AT&T assemblers */
+#define AL              %al
+#define AH              %ah
+#define AX              %ax
+#define EAX             %eax
+#define BL              %bl
+#define BH              %bh
+#define BX              %bx
+#define EBX             %ebx
+#define CL              %cl
+#define CH              %ch
+#define CX              %cx
+#define ECX             %ecx
+#define DL              %dl
+#define DH              %dh
+#define DX              %dx
+#define EDX             %edx
+#define BP              %bp
+#define EBP             %ebp
+#define SI              %si
+#define ESI             %esi
+#define DI              %di
+#define EDI             %edi
+#define SP              %sp
+#define ESP             %esp
+#define CS              %cs
+#define SS              %ss
+#define DS              %ds
+#define ES              %es
+#define FS              %fs
+#define GS              %gs
+/* Control Registers */
+#define CR0             %cr0
+#define CR1             %cr1
+#define CR2             %cr2
+#define CR3             %cr3
+/* Debug Registers */
+#define DR0             %db0
+#define DR1             %db1
+#define DR2             %db2
+#define DR3             %db3
+#define DR4             %db4
+#define DR5             %db5
+#define DR6             %db6
+#define DR7             %db7
+/* Floating-point Stack */
+#define _STX0           %st(0)
+#define _STX1           %st(1)
+#define _STX2           %st(2)
+#define _STX3           %st(3)
+#define _STX4           %st(4)
+#define _STX5           %st(5)
+#define _STX6           %st(6)
+#define _STX7           %st(7)
+#define ST(x)           CONCAT(_STX,x)
+#ifdef GNU_ASSEMBLER
+#define ST0             %st(0)
+#else
+#define ST0             %st
+#endif
+/* MMX Registers */
+#define MM0             %mm0
+#define MM1             %mm1
+#define MM2             %mm2
+#define MM3             %mm3
+#define MM4             %mm4
+#define MM5             %mm5
+#define MM6             %mm6
+#define MM7             %mm7
+/* SSE Registers */
+#define XMM0            %xmm0
+#define XMM1            %xmm1
+#define XMM2            %xmm2
+#define XMM3            %xmm3
+#define XMM4            %xmm4
+#define XMM5            %xmm5
+#define XMM6            %xmm6
+#define XMM7            %xmm7
+#define AS_BEGIN
+#define USE16
+#define USE32
+#ifdef GNU_ASSEMBLER
+#define ADDR_TOGGLE     aword
+#define OPSZ_TOGGLE     word
+#define CHOICE(a,b,c)   b
+#else
+/*
+ * AT&T ASSEMBLER SYNTAX
+ * *********************
+ */
+#define CHOICE(a,b,c)   a
+#define ADDR_TOGGLE     addr16
+#define OPSZ_TOGGLE     data16
+#endif /* GNU_ASSEMBLER */
+#endif /* ACK_ASSEMBLER */
+#define GLNAME(a)   CONCAT(_,a)
+        /****************************************/
+        /*                                      */
+        /*      Select the various choices      */
+        /*                                      */
+        /****************************************/
+/* Redefine assembler directives */
+/*********************************/
+#define GLOBL           CHOICE(.globl, .globl, .extern)
+#define GLOBAL          GLOBL
+#define EXTERN          GLOBL
+#ifndef __AOUT__
+#define ALIGNTEXT32     CHOICE(.align 32, .balign 32, .align 32)
+#define ALIGNTEXT16     CHOICE(.align 16, .balign 16, .align 16)
+#define ALIGNTEXT8      CHOICE(.align 8, .balign 8, .align 8)
+#define ALIGNTEXT4      CHOICE(.align 4, .balign 4, .align 4)
+#define ALIGNTEXT2      CHOICE(.align 2, .balign 2, .align 2)
+/* ALIGNTEXT4ifNOP is the same as ALIGNTEXT4, but only if the space is
+ * guaranteed to be filled with NOPs.  Otherwise it does nothing.
+ */
+#define ALIGNTEXT32ifNOP        CHOICE(.align 32, .balign ARG2(32,0x90), /*can't do it*/)
+#define ALIGNTEXT16ifNOP        CHOICE(.align 16, .balign ARG2(16,0x90), /*can't do it*/)
+#define ALIGNTEXT8ifNOP CHOICE(.align 8, .balign ARG2(8,0x90), /*can't do it*/)
+#define ALIGNTEXT4ifNOP CHOICE(.align 4, .balign ARG2(4,0x90), /*can't do it*/)
+#define ALIGNDATA32     CHOICE(.align 32, .balign ARG2(32,0x0), .align 32)
+#define ALIGNDATA16     CHOICE(.align 16, .balign ARG2(16,0x0), .align 16)
+#define ALIGNDATA8      CHOICE(.align 8, .balign ARG2(8,0x0), .align 8)
+#define ALIGNDATA4      CHOICE(.align 4, .balign ARG2(4,0x0), .align 4)
+#define ALIGNDATA2      CHOICE(.align 2, .balign ARG2(2,0x0), .align 2)
+#else
+/* 'as -aout' on FreeBSD doesn't have .balign */
+#define ALIGNTEXT32     CHOICE(.align 32, .align ARG2(5,0x90), .align 32)
+#define ALIGNTEXT16     CHOICE(.align 16, .align ARG2(4,0x90), .align 16)
+#define ALIGNTEXT8      CHOICE(.align 8, .align ARG2(3,0x90), .align 8)
+#define ALIGNTEXT4      CHOICE(.align 4, .align ARG2(2,0x90), .align 4)
+#define ALIGNTEXT2      CHOICE(.align 2, .align ARG2(1,0x90), .align 2)
+/* ALIGNTEXT4ifNOP is the same as ALIGNTEXT4, but only if the space is
+ * guaranteed to be filled with NOPs.  Otherwise it does nothing.
+ */
+#define ALIGNTEXT32ifNOP        CHOICE(.align 32, .align ARG2(5,0x90), /*can't do it*/)
+#define ALIGNTEXT16ifNOP        CHOICE(.align 16, .align ARG2(4,0x90), /*can't do it*/)
+#define ALIGNTEXT8ifNOP CHOICE(.align 8, .align ARG2(3,0x90), /*can't do it*/)
+#define ALIGNTEXT4ifNOP CHOICE(.align 4, .align ARG2(2,0x90), /*can't do it*/)
+#define ALIGNDATA32     CHOICE(.align 32, .align ARG2(5,0x0), .align 32)
+#define ALIGNDATA16     CHOICE(.align 16, .align ARG2(4,0x0), .align 16)
+#define ALIGNDATA8      CHOICE(.align 8, .align ARG2(3,0x0), .align 8)
+#define ALIGNDATA4      CHOICE(.align 4, .align ARG2(2,0x0), .align 4)
+#define ALIGNDATA2      CHOICE(.align 2, .align ARG2(1,0x0), .align 2)
+#endif /* __AOUT__ */
+#define FILE(s)         CHOICE(.file s, .file s, .file s)
+#define STRING(s)       CHOICE(.string s, .asciz s, .asciz s)
+#define D_LONG          CHOICE(.long, .long, .data4)
+#define D_WORD          CHOICE(.value, .short, .data2)
+#define D_BYTE          CHOICE(.byte, .byte, .data1)
+#define SPACE           CHOICE(.comm, .space, .space)
+#define COMM            CHOICE(.comm, .comm, .comm)
+#define SEG_DATA        CHOICE(.data, .data, .sect .data)
+#define SEG_TEXT        CHOICE(.text, .text, .sect .text)
+#define SEG_BSS         CHOICE(.bss, .bss, .sect .bss)
+#ifdef GNU_ASSEMBLER
+#define D_SPACE(n)      . = . + n
+#else
+#define D_SPACE(n)      .space n
+#endif
+/* Addressing Modes */
+/* Immediate Mode */
+#define ADDR(a)         CHOICE(CONCAT($,a), $a, a)
+#define CONST(a)        CHOICE(CONCAT($,a), $a, a)
+/* Indirect Mode */
+#define CONTENT(a)      CHOICE(a, a, (a))        /* take contents of variable */
+#define REGIND(a)       CHOICE((a), (a), (a))    /* Register a indirect */
+/* Register b indirect plus displacement a */
+#define REGOFF(a, b)    CHOICE(a(b), a(b), a(b))
+/* Reg indirect Base + Index + Displacement  - this is mainly for 16-bit mode
+ * which has no scaling
+ */
+#define REGBID(b,i,d)   CHOICE(d(b,i), d(b,i), d(b)(i))
+/* Reg indirect Base + (Index * Scale) */
+#define REGBIS(b,i,s)   CHOICE((b,i,s), (b,i,s), (b)(i*s))
+/* Reg indirect Base + (Index * Scale) + Displacement */
+#define REGBISD(b,i,s,d) CHOICE(d(b,i,s), d(b,i,s), d(b)(i*s))
+/* Displaced Scaled Index: */
+#define REGDIS(d,i,s)   CHOICE(d(,i,s), d(,i,s), d(i * s))
+/* Indexed Base: */
+#define REGBI(b,i)      CHOICE((b,i), (b,i), (b)(i))
+/* Displaced Base: */
+#define REGDB(d,b)      CHOICE(d(b), d(b), d(b))
+/* Variable indirect: */
+#define VARINDIRECT(var) CHOICE(*var, *var, (var))
+/* Use register contents as jump/call target: */
+#define CODEPTR(reg)    CHOICE(*reg, *reg, reg)
+/* For expressions requiring bracketing
+ * eg. (CRT0_PM | CRT_EM)
+ */
+#define EXPR(a)         CHOICE([a], (a), [a])
+#define ENOT(a)         CHOICE(0!a, ~a, ~a)
+#define EMUL(a,b)       CHOICE(a\*b, a*b, a*b)
+#define EDIV(a,b)       CHOICE(a\/b, a/b, a/b)
+/*
+ * We have to beat the problem of commas within arguments to choice.
+ * eg. choice (add a,b, add b,a) will get argument mismatch.  Luckily ANSI
+ * and other known cpp definitions evaluate arguments before substitution
+ * so the following works.
+ */
+#define ARG2(a, b)      a,b
+#define ARG3(a,b,c)     a,b,c
+/* Redefine assembler commands */
+#define AAA             CHOICE(aaa, aaa, aaa)
+#define AAD             CHOICE(aad, aad, aad)
+#define AAM             CHOICE(aam, aam, aam)
+#define AAS             CHOICE(aas, aas, aas)
+#define ADC_L(a, b)     CHOICE(adcl ARG2(a,b), adcl ARG2(a,b), _LTOG adc ARG2(b,a))
+#define ADC_W(a, b)     CHOICE(adcw ARG2(a,b), adcw ARG2(a,b), _WTOG adc ARG2(b,a))
+#define ADC_B(a, b)     CHOICE(adcb ARG2(a,b), adcb ARG2(a,b), adcb ARG2(b,a))
+#define ADD_L(a, b)     CHOICE(addl ARG2(a,b), addl ARG2(a,b), _LTOG add ARG2(b,a))
+#define ADD_W(a, b)     CHOICE(addw ARG2(a,b), addw ARG2(a,b), _WTOG add ARG2(b,a))
+#define ADD_B(a, b)     CHOICE(addb ARG2(a,b), addb ARG2(a,b), addb ARG2(b,a))
+#define AND_L(a, b)     CHOICE(andl ARG2(a,b), andl ARG2(a,b), _LTOG and ARG2(b,a))
+#define AND_W(a, b)     CHOICE(andw ARG2(a,b), andw ARG2(a,b), _WTOG and ARG2(b,a))
+#define AND_B(a, b)     CHOICE(andb ARG2(a,b), andb ARG2(a,b), andb ARG2(b,a))
+#define ARPL(a,b)       CHOICE(arpl ARG2(a,b), arpl ARG2(a,b), arpl ARG2(b,a))
+#define BOUND_L(a, b)   CHOICE(boundl ARG2(a,b), boundl ARG2(b,a), _LTOG bound ARG2(b,a))
+#define BOUND_W(a, b)   CHOICE(boundw ARG2(a,b), boundw ARG2(b,a), _WTOG bound ARG2(b,a))
+#define BSF_L(a, b)     CHOICE(bsfl ARG2(a,b), bsfl ARG2(a,b), _LTOG bsf ARG2(b,a))
+#define BSF_W(a, b)     CHOICE(bsfw ARG2(a,b), bsfw ARG2(a,b), _WTOG bsf ARG2(b,a))
+#define BSR_L(a, b)     CHOICE(bsrl ARG2(a,b), bsrl ARG2(a,b), _LTOG bsr ARG2(b,a))
+#define BSR_W(a, b)     CHOICE(bsrw ARG2(a,b), bsrw ARG2(a,b), _WTOG bsr ARG2(b,a))
+#define BT_L(a, b)      CHOICE(btl ARG2(a,b), btl ARG2(a,b), _LTOG bt ARG2(b,a))
+#define BT_W(a, b)      CHOICE(btw ARG2(a,b), btw ARG2(a,b), _WTOG bt ARG2(b,a))
+#define BTC_L(a, b)     CHOICE(btcl ARG2(a,b), btcl ARG2(a,b), _LTOG btc ARG2(b,a))
+#define BTC_W(a, b)     CHOICE(btcw ARG2(a,b), btcw ARG2(a,b), _WTOG btc ARG2(b,a))
+#define BTR_L(a, b)     CHOICE(btrl ARG2(a,b), btrl ARG2(a,b), _LTOG btr ARG2(b,a))
+#define BTR_W(a, b)     CHOICE(btrw ARG2(a,b), btrw ARG2(a,b), _WTOG btr ARG2(b,a))
+#define BTS_L(a, b)     CHOICE(btsl ARG2(a,b), btsl ARG2(a,b), _LTOG bts ARG2(b,a))
+#define BTS_W(a, b)     CHOICE(btsw ARG2(a,b), btsw ARG2(a,b), _WTOG bts ARG2(b,a))
+#define CALL(a)         CHOICE(call a, call a, call a)
+#define CALLF(s,a)      CHOICE(lcall ARG2(s,a), lcall ARG2(s,a), callf s:a)
+#define CBW             CHOICE(cbtw, cbw, cbw)
+#define CWDE            CHOICE(cwtd, cwde, cwde)
+#define CLC             CHOICE(clc, clc, clc)
+#define CLD             CHOICE(cld, cld, cld)
+#define CLI             CHOICE(cli, cli, cli)
+#define CLTS            CHOICE(clts, clts, clts)
+#define CMC             CHOICE(cmc, cmc, cmc)
+#define CMP_L(a, b)     CHOICE(cmpl ARG2(a,b), cmpl ARG2(a,b), _LTOG cmp ARG2(b,a))
+#define CMP_W(a, b)     CHOICE(cmpw ARG2(a,b), cmpw ARG2(a,b), _WTOG cmp ARG2(b,a))
+#define CMP_B(a, b)     CHOICE(cmpb ARG2(a,b), cmpb ARG2(a,b), cmpb ARG2(b,a))
+#define CMPS_L          CHOICE(cmpsl, cmpsl, _LTOG cmps)
+#define CMPS_W          CHOICE(cmpsw, cmpsw, _WTOG cmps)
+#define CMPS_B          CHOICE(cmpsb, cmpsb, cmpsb)
+#define CWD             CHOICE(cwtl, cwd, cwd)
+#define CDQ             CHOICE(cltd, cdq, cdq)
+#define DAA             CHOICE(daa, daa, daa)
+#define DAS             CHOICE(das, das, das)
+#define DEC_L(a)        CHOICE(decl a, decl a, _LTOG dec a)
+#define DEC_W(a)        CHOICE(decw a, decw a, _WTOG dec a)
+#define DEC_B(a)        CHOICE(decb a, decb a, decb a)
+#define DIV_L(a)        CHOICE(divl a, divl a, div a)
+#define DIV_W(a)        CHOICE(divw a, divw a, div a)
+#define DIV_B(a)        CHOICE(divb a, divb a, divb a)
+#define ENTER(a,b)      CHOICE(enter ARG2(a,b), enter ARG2(a,b), enter ARG2(b,a))
+#define HLT             CHOICE(hlt, hlt, hlt)
+#define IDIV_L(a)       CHOICE(idivl a, idivl a, _LTOG idiv a)
+#define IDIV_W(a)       CHOICE(idivw a, idivw a, _WTOG idiv a)
+#define IDIV_B(a)       CHOICE(idivb a, idivb a, idivb a)
+/* More forms than this for imul!! */
+#define IMUL_L(a, b)    CHOICE(imull ARG2(a,b), imull ARG2(a,b), _LTOG imul ARG2(b,a))
+#define IMUL_W(a, b)    CHOICE(imulw ARG2(a,b), imulw ARG2(a,b), _WTOG imul ARG2(b,a))
+#define IMUL_B(a)       CHOICE(imulb a, imulb a, imulb a)
+#define IN_L            CHOICE(inl (DX), inl ARG2(DX,EAX), _LTOG in DX)
+#define IN_W            CHOICE(inw (DX), inw ARG2(DX,AX), _WTOG in DX)
+#define IN_B            CHOICE(inb (DX), inb ARG2(DX,AL), inb DX)
+/* Please AS code writer: use the following ONLY, if you refer to ports<256
+ * directly, but not in IN1_W(DX), for instance, even if IN1_ looks nicer
+ */
+#if defined (sun)
+#define IN1_L(a)        CHOICE(inl (a), inl ARG2(a,EAX), _LTOG in a)
+#define IN1_W(a)        CHOICE(inw (a), inw ARG2(a,AX), _WTOG in a)
+#define IN1_B(a)        CHOICE(inb (a), inb ARG2(a,AL), inb a)
+#else
+#define IN1_L(a)        CHOICE(inl a, inl ARG2(a,EAX), _LTOG in a)
+#define IN1_W(a)        CHOICE(inw a, inw ARG2(a,AX), _WTOG in a)
+#define IN1_B(a)        CHOICE(inb a, inb ARG2(a,AL), inb a)
+#endif
+#define INC_L(a)        CHOICE(incl a, incl a, _LTOG inc a)
+#define INC_W(a)        CHOICE(incw a, incw a, _WTOG inc a)
+#define INC_B(a)        CHOICE(incb a, incb a, incb a)
+#define INS_L           CHOICE(insl, insl, _LTOG ins)
+#define INS_W           CHOICE(insw, insw, _WTOG ins)
+#define INS_B           CHOICE(insb, insb, insb)
+#define INT(a)          CHOICE(int a, int a, int a)
+#define INT3            CHOICE(int CONST(3), int3, int CONST(3))
+#define INTO            CHOICE(into, into, into)
+#define IRET            CHOICE(iret, iret, iret)
+#define IRETD           CHOICE(iret, iret, iretd)
+#define JA(a)           CHOICE(ja a, ja a, ja a)
+#define JAE(a)          CHOICE(jae a, jae a, jae a)
+#define JB(a)           CHOICE(jb a, jb a, jb a)
+#define JBE(a)          CHOICE(jbe a, jbe a, jbe a)
+#define JC(a)           CHOICE(jc a, jc a, jc a)
+#define JE(a)           CHOICE(je a, je a, je a)
+#define JG(a)           CHOICE(jg a, jg a, jg a)
+#define JGE(a)          CHOICE(jge a, jge a, jge a)
+#define JL(a)           CHOICE(jl a, jl a, jl a)
+#define JLE(a)          CHOICE(jle a, jle a, jle a)
+#define JNA(a)          CHOICE(jna a, jna a, jna a)
+#define JNAE(a)         CHOICE(jnae a, jnae a, jnae a)
+#define JNB(a)          CHOICE(jnb a, jnb a, jnb a)
+#define JNBE(a)         CHOICE(jnbe a, jnbe a, jnbe a)
+#define JNC(a)          CHOICE(jnc a, jnc a, jnc a)
+#define JNE(a)          CHOICE(jne a, jne a, jne a)
+#define JNG(a)          CHOICE(jng a, jng a, jng a)
+#define JNGE(a)         CHOICE(jnge a, jnge a, jnge a)
+#define JNL(a)          CHOICE(jnl a, jnl a, jnl a)
+#define JNLE(a)         CHOICE(jnle a, jnle a, jnle a)
+#define JNO(a)          CHOICE(jno a, jno a, jno a)
+#define JNP(a)          CHOICE(jnp a, jnp a, jnp a)
+#define JNS(a)          CHOICE(jns a, jns a, jns a)
+#define JNZ(a)          CHOICE(jnz a, jnz a, jnz a)
+#define JO(a)           CHOICE(jo a, jo a, jo a)
+#define JP(a)           CHOICE(jp a, jp a, jp a)
+#define JPE(a)          CHOICE(jpe a, jpe a, jpe a)
+#define JPO(a)          CHOICE(jpo a, jpo a, jpo a)
+#define JS(a)           CHOICE(js a, js a, js a)
+#define JZ(a)           CHOICE(jz a, jz a, jz a)
+#define JMP(a)          CHOICE(jmp a, jmp a, jmp a)
+#define JMPF(s,a)       CHOICE(ljmp ARG2(s,a), ljmp ARG2(s,a), jmpf s:a)
+#define LAHF            CHOICE(lahf, lahf, lahf)
+#if !defined(_REAL_MODE) && !defined(_V86_MODE)
+#define LAR(a, b)       CHOICE(lar ARG2(a, b), lar ARG2(a, b), lar ARG2(b, a))
+#endif
+#define LEA_L(a, b)     CHOICE(leal ARG2(a,b), leal ARG2(a,b), _LTOG lea ARG2(b,a))
+#define LEA_W(a, b)     CHOICE(leaw ARG2(a,b), leaw ARG2(a,b), _WTOG lea ARG2(b,a))
+#define LEAVE           CHOICE(leave, leave, leave)
+#define LGDT(a)         CHOICE(lgdt a, lgdt a, lgdt a)
+#define LIDT(a)         CHOICE(lidt a, lidt a, lidt a)
+#define LDS(a, b)       CHOICE(ldsl ARG2(a,b), lds ARG2(a,b), lds ARG2(b,a))
+#define LES(a, b)       CHOICE(lesl ARG2(a,b), les ARG2(a,b), les ARG2(b,a))
+#define LFS(a, b)       CHOICE(lfsl ARG2(a,b), lfs ARG2(a,b), lfs ARG2(b,a))
+#define LGS(a, b)       CHOICE(lgsl ARG2(a,b), lgs ARG2(a,b), lgs ARG2(b,a))
+#define LSS(a, b)       CHOICE(lssl ARG2(a,b), lss ARG2(a,b), lss ARG2(b,a))
+#define LLDT(a)         CHOICE(lldt a, lldt a, lldt a)
+#define LMSW(a)         CHOICE(lmsw a, lmsw a, lmsw a)
+#define LOCK            CHOICE(lock, lock, lock)
+#define LODS_L          CHOICE(lodsl, lodsl, _LTOG lods)
+#define LODS_W          CHOICE(lodsw, lodsw, _WTOG lods)
+#define LODS_B          CHOICE(lodsb, lodsb, lodsb)
+#define LOOP(a)         CHOICE(loop a, loop a, loop a)
+#define LOOPE(a)        CHOICE(loope a, loope a, loope a)
+#define LOOPZ(a)        CHOICE(loopz a, loopz a, loopz a)
+#define LOOPNE(a)       CHOICE(loopne a, loopne a, loopne a)
+#define LOOPNZ(a)       CHOICE(loopnz a, loopnz a, loopnz a)
+#if !defined(_REAL_MODE) && !defined(_V86_MODE)
+#define LSL(a, b)       CHOICE(lsl ARG2(a,b), lsl ARG2(a,b), lsl ARG2(b,a))
+#endif
+#define LTR(a)          CHOICE(ltr a, ltr a, ltr a)
+#define MOV_SR(a, b)    CHOICE(movw ARG2(a,b), mov ARG2(a,b), mov ARG2(b,a))
+#define MOV_L(a, b)     CHOICE(movl ARG2(a,b), movl ARG2(a,b), _LTOG mov ARG2(b,a))
+#define MOV_W(a, b)     CHOICE(movw ARG2(a,b), movw ARG2(a,b), _WTOG mov ARG2(b,a))
+#define MOV_B(a, b)     CHOICE(movb ARG2(a,b), movb ARG2(a,b), movb ARG2(b,a))
+#define MOVS_L          CHOICE(movsl, movsl, _LTOG movs)
+#define MOVS_W          CHOICE(movsw, movsw, _WTOG movs)
+#define MOVS_B          CHOICE(movsb, movsb, movsb)
+#define MOVSX_BL(a, b)  CHOICE(movsbl ARG2(a,b), movsbl ARG2(a,b), movsx ARG2(b,a))
+#define MOVSX_BW(a, b)  CHOICE(movsbw ARG2(a,b), movsbw ARG2(a,b), movsx ARG2(b,a))
+#define MOVSX_WL(a, b)  CHOICE(movswl ARG2(a,b), movswl ARG2(a,b), movsx ARG2(b,a))
+#define MOVZX_BL(a, b)  CHOICE(movzbl ARG2(a,b), movzbl ARG2(a,b), movzx ARG2(b,a))
+#define MOVZX_BW(a, b)  CHOICE(movzbw ARG2(a,b), movzbw ARG2(a,b), movzx ARG2(b,a))
+#define MOVZX_WL(a, b)  CHOICE(movzwl ARG2(a,b), movzwl ARG2(a,b), movzx ARG2(b,a))
+#define MUL_L(a)        CHOICE(mull a, mull a, _LTOG mul a)
+#define MUL_W(a)        CHOICE(mulw a, mulw a, _WTOG mul a)
+#define MUL_B(a)        CHOICE(mulb a, mulb a, mulb a)
+#define NEG_L(a)        CHOICE(negl a, negl a, _LTOG neg a)
+#define NEG_W(a)        CHOICE(negw a, negw a, _WTOG neg a)
+#define NEG_B(a)        CHOICE(negb a, negb a, negb a)
+#define NOP             CHOICE(nop, nop, nop)
+#define NOT_L(a)        CHOICE(notl a, notl a, _LTOG not a)
+#define NOT_W(a)        CHOICE(notw a, notw a, _WTOG not a)
+#define NOT_B(a)        CHOICE(notb a, notb a, notb a)
+#define OR_L(a,b)       CHOICE(orl ARG2(a,b), orl ARG2(a,b), _LTOG or ARG2(b,a))
+#define OR_W(a,b)       CHOICE(orw ARG2(a,b), orw ARG2(a,b), _WTOG or ARG2(b,a))
+#define OR_B(a,b)       CHOICE(orb ARG2(a,b), orb ARG2(a,b), orb ARG2(b,a))
+#define OUT_L           CHOICE(outl (DX), outl ARG2(EAX,DX), _LTOG out DX)
+#define OUT_W           CHOICE(outw (DX), outw ARG2(AX,DX), _WTOG out DX)
+#define OUT_B           CHOICE(outb (DX), outb ARG2(AL,DX), outb DX)
+/* Please AS code writer: use the following ONLY, if you refer to ports<256
+ * directly, but not in OUT1_W(DX), for instance, even if OUT1_ looks nicer
+ */
+#define OUT1_L(a)       CHOICE(outl (a), outl ARG2(EAX,a), _LTOG out a)
+#define OUT1_W(a)       CHOICE(outw (a), outw ARG2(AX,a), _WTOG out a)
+#define OUT1_B(a)       CHOICE(outb (a), outb ARG2(AL,a), outb a)
+#define OUTS_L          CHOICE(outsl, outsl, _LTOG outs)
+#define OUTS_W          CHOICE(outsw, outsw, _WTOG outs)
+#define OUTS_B          CHOICE(outsb, outsb, outsb)
+#define POP_SR(a)       CHOICE(pop a, pop a, pop a)
+#define POP_L(a)        CHOICE(popl a, popl a, _LTOG pop a)
+#define POP_W(a)        CHOICE(popw a, popw a, _WTOG pop a)
+#define POPA_L          CHOICE(popal, popal, _LTOG popa)
+#define POPA_W          CHOICE(popaw, popaw, _WTOG popa)
+#define POPF_L          CHOICE(popfl, popfl, _LTOG popf)
+#define POPF_W          CHOICE(popfw, popfw, _WTOG popf)
+#define PUSH_SR(a)      CHOICE(push a, push a, push a)
+#define PUSH_L(a)       CHOICE(pushl a, pushl a, _LTOG push a)
+#define PUSH_W(a)       CHOICE(pushw a, pushw a, _WTOG push a)
+#define PUSH_B(a)       CHOICE(push a, pushb a, push a)
+#define PUSHA_L         CHOICE(pushal, pushal, _LTOG pusha)
+#define PUSHA_W         CHOICE(pushaw, pushaw, _WTOG pusha)
+#define PUSHF_L         CHOICE(pushfl, pushfl, _LTOG pushf)
+#define PUSHF_W         CHOICE(pushfw, pushfw, _WTOG pushf)
+#define RCL_L(a, b)     CHOICE(rcll ARG2(a,b), rcll ARG2(a,b), _LTOG rcl ARG2(b,a))
+#define RCL_W(a, b)     CHOICE(rclw ARG2(a,b), rclw ARG2(a,b), _WTOG rcl ARG2(b,a))
+#define RCL_B(a, b)     CHOICE(rclb ARG2(a,b), rclb ARG2(a,b), rclb ARG2(b,a))
+#define RCR_L(a, b)     CHOICE(rcrl ARG2(a,b), rcrl ARG2(a,b), _LTOG rcr ARG2(b,a))
+#define RCR_W(a, b)     CHOICE(rcrw ARG2(a,b), rcrw ARG2(a,b), _WTOG rcr ARG2(b,a))
+#define RCR_B(a, b)     CHOICE(rcrb ARG2(a,b), rcrb ARG2(a,b), rcrb ARG2(b,a))
+#define ROL_L(a, b)     CHOICE(roll ARG2(a,b), roll ARG2(a,b), _LTOG rol ARG2(b,a))
+#define ROL_W(a, b)     CHOICE(rolw ARG2(a,b), rolw ARG2(a,b), _WTOG rol ARG2(b,a))
+#define ROL_B(a, b)     CHOICE(rolb ARG2(a,b), rolb ARG2(a,b), rolb ARG2(b,a))
+#define ROR_L(a, b)     CHOICE(rorl ARG2(a,b), rorl ARG2(a,b), _LTOG ror ARG2(b,a))
+#define ROR_W(a, b)     CHOICE(rorw ARG2(a,b), rorw ARG2(a,b), _WTOG ror ARG2(b,a))
+#define ROR_B(a, b)     CHOICE(rorb ARG2(a,b), rorb ARG2(a,b), rorb ARG2(b,a))
+#define REP             CHOICE(rep ;, rep ;, repe)
+#define REPE            CHOICE(repz ;, repe ;, repe)
+#define REPNE           CHOICE(repnz ;, repne ;, repne)
+#define REPNZ           REPNE
+#define REPZ            REPE
+#define RET             CHOICE(ret, ret, ret)
+#define SAHF            CHOICE(sahf, sahf, sahf)
+#define SAL_L(a, b)     CHOICE(sall ARG2(a,b), sall ARG2(a,b), _LTOG sal ARG2(b,a))
+#define SAL_W(a, b)     CHOICE(salw ARG2(a,b), salw ARG2(a,b), _WTOG sal ARG2(b,a))
+#define SAL_B(a, b)     CHOICE(salb ARG2(a,b), salb ARG2(a,b), salb ARG2(b,a))
+#define SAR_L(a, b)     CHOICE(sarl ARG2(a,b), sarl ARG2(a,b), _LTOG sar ARG2(b,a))
+#define SAR_W(a, b)     CHOICE(sarw ARG2(a,b), sarw ARG2(a,b), _WTOG sar ARG2(b,a))
+#define SAR_B(a, b)     CHOICE(sarb ARG2(a,b), sarb ARG2(a,b), sarb ARG2(b,a))
+#define SBB_L(a, b)     CHOICE(sbbl ARG2(a,b), sbbl ARG2(a,b), _LTOG sbb ARG2(b,a))
+#define SBB_W(a, b)     CHOICE(sbbw ARG2(a,b), sbbw ARG2(a,b), _WTOG sbb ARG2(b,a))
+#define SBB_B(a, b)     CHOICE(sbbb ARG2(a,b), sbbb ARG2(a,b), sbbb ARG2(b,a))
+#define SCAS_L          CHOICE(scasl, scasl, _LTOG scas)
+#define SCAS_W          CHOICE(scasw, scasw, _WTOG scas)
+#define SCAS_B          CHOICE(scasb, scasb, scasb)
+#define SETA(a)         CHOICE(seta a, seta a, seta a)
+#define SETAE(a)        CHOICE(setae a, setae a, setae a)
+#define SETB(a)         CHOICE(setb a, setb a, setb a)
+#define SETBE(a)        CHOICE(setbe a, setbe a, setbe a)
+#define SETC(a)         CHOICE(setc a, setb a, setb a)
+#define SETE(a)         CHOICE(sete a, sete a, sete a)
+#define SETG(a)         CHOICE(setg a, setg a, setg a)
+#define SETGE(a)        CHOICE(setge a, setge a, setge a)
+#define SETL(a)         CHOICE(setl a, setl a, setl a)
+#define SETLE(a)        CHOICE(setle a, setle a, setle a)
+#define SETNA(a)        CHOICE(setna a, setna a, setna a)
+#define SETNAE(a)       CHOICE(setnae a, setnae a, setnae a)
+#define SETNB(a)        CHOICE(setnb a, setnb a, setnb a)
+#define SETNBE(a)       CHOICE(setnbe a, setnbe a, setnbe a)
+#define SETNC(a)        CHOICE(setnc a, setnb a, setnb a)
+#define SETNE(a)        CHOICE(setne a, setne a, setne a)
+#define SETNG(a)        CHOICE(setng a, setng a, setng a)
+#define SETNGE(a)       CHOICE(setnge a, setnge a, setnge a)
+#define SETNL(a)        CHOICE(setnl a, setnl a, setnl a)
+#define SETNLE(a)       CHOICE(setnle a, setnle a, setnle a)
+#define SETNO(a)        CHOICE(setno a, setno a, setno a)
+#define SETNP(a)        CHOICE(setnp a, setnp a, setnp a)
+#define SETNS(a)        CHOICE(setns a, setns a, setna a)
+#define SETNZ(a)        CHOICE(setnz a, setnz a, setnz a)
+#define SETO(a)         CHOICE(seto a, seto a, seto a)
+#define SETP(a)         CHOICE(setp a, setp a, setp a)
+#define SETPE(a)        CHOICE(setpe a, setpe a, setpe a)
+#define SETPO(a)        CHOICE(setpo a, setpo a, setpo a)
+#define SETS(a)         CHOICE(sets a, sets a, seta a)
+#define SETZ(a)         CHOICE(setz a, setz a, setz a)
+#define SGDT(a)         CHOICE(sgdt a, sgdt a, sgdt a)
+#define SIDT(a)         CHOICE(sidt a, sidt a, sidt a)
+#define SHL_L(a, b)     CHOICE(shll ARG2(a,b), shll ARG2(a,b), _LTOG shl ARG2(b,a))
+#define SHL_W(a, b)     CHOICE(shlw ARG2(a,b), shlw ARG2(a,b), _WTOG shl ARG2(b,a))
+#define SHL_B(a, b)     CHOICE(shlb ARG2(a,b), shlb ARG2(a,b), shlb ARG2(b,a))
+#define SHLD_L(a,b,c)   CHOICE(shldl ARG3(a,b,c), shldl ARG3(a,b,c), _LTOG shld ARG3(c,b,a))
+#define SHLD2_L(a,b)    CHOICE(shldl ARG2(a,b), shldl ARG3(CL,a,b), _LTOG shld ARG3(b,a,CL))
+#define SHLD_W(a,b,c)   CHOICE(shldw ARG3(a,b,c), shldw ARG3(a,b,c), _WTOG shld ARG3(c,b,a))
+#define SHLD2_W(a,b)    CHOICE(shldw ARG2(a,b), shldw ARG3(CL,a,b), _WTOG shld ARG3(b,a,CL))
+#define SHR_L(a, b)     CHOICE(shrl ARG2(a,b), shrl ARG2(a,b), _LTOG shr ARG2(b,a))
+#define SHR_W(a, b)     CHOICE(shrw ARG2(a,b), shrw ARG2(a,b), _WTOG shr ARG2(b,a))
+#define SHR_B(a, b)     CHOICE(shrb ARG2(a,b), shrb ARG2(a,b), shrb ARG2(b,a))
+#define SHRD_L(a,b,c)   CHOICE(shrdl ARG3(a,b,c), shrdl ARG3(a,b,c), _LTOG shrd ARG3(c,b,a))
+#define SHRD2_L(a,b)    CHOICE(shrdl ARG2(a,b), shrdl ARG3(CL,a,b), _LTOG shrd ARG3(b,a,CL))
+#define SHRD_W(a,b,c)   CHOICE(shrdw ARG3(a,b,c), shrdw ARG3(a,b,c), _WTOG shrd ARG3(c,b,a))
+#define SHRD2_W(a,b)    CHOICE(shrdw ARG2(a,b), shrdw ARG3(CL,a,b), _WTOG shrd ARG3(b,a,CL))
+#define SLDT(a)         CHOICE(sldt a, sldt a, sldt a)
+#define SMSW(a)         CHOICE(smsw a, smsw a, smsw a)
+#define STC             CHOICE(stc, stc, stc)
+#define STD             CHOICE(std, std, std)
+#define STI             CHOICE(sti, sti, sti)
+#define STOS_L          CHOICE(stosl, stosl, _LTOG stos)
+#define STOS_W          CHOICE(stosw, stosw, _WTOG stos)
+#define STOS_B          CHOICE(stosb, stosb, stosb)
+#define STR(a)          CHOICE(str a, str a, str a)
+#define SUB_L(a, b)     CHOICE(subl ARG2(a,b), subl ARG2(a,b), _LTOG sub ARG2(b,a))
+#define SUB_W(a, b)     CHOICE(subw ARG2(a,b), subw ARG2(a,b), _WTOG sub ARG2(b,a))
+#define SUB_B(a, b)     CHOICE(subb ARG2(a,b), subb ARG2(a,b), subb ARG2(b,a))
+#define TEST_L(a, b)    CHOICE(testl ARG2(a,b), testl ARG2(a,b), _LTOG test ARG2(b,a))
+#define TEST_W(a, b)    CHOICE(testw ARG2(a,b), testw ARG2(a,b), _WTOG test ARG2(b,a))
+#define TEST_B(a, b)    CHOICE(testb ARG2(a,b), testb ARG2(a,b), testb ARG2(b,a))
+#define VERR(a)         CHOICE(verr a, verr a, verr a)
+#define VERW(a)         CHOICE(verw a, verw a, verw a)
+#define WAIT            CHOICE(wait, wait, wait)
+#define XCHG_L(a, b)    CHOICE(xchgl ARG2(a,b), xchgl ARG2(a,b), _LTOG xchg ARG2(b,a))
+#define XCHG_W(a, b)    CHOICE(xchgw ARG2(a,b), xchgw ARG2(a,b), _WTOG xchg ARG2(b,a))
+#define XCHG_B(a, b)    CHOICE(xchgb ARG2(a,b), xchgb ARG2(a,b), xchgb ARG2(b,a))
+#define XLAT            CHOICE(xlat, xlat, xlat)
+#define XOR_L(a, b)     CHOICE(xorl ARG2(a,b), xorl ARG2(a,b), _LTOG xor ARG2(b,a))
+#define XOR_W(a, b)     CHOICE(xorw ARG2(a,b), xorw ARG2(a,b), _WTOG xor ARG2(b,a))
+#define XOR_B(a, b)     CHOICE(xorb ARG2(a,b), xorb ARG2(a,b), xorb ARG2(b,a))
+/* Floating Point Instructions */
+#define F2XM1           CHOICE(f2xm1, f2xm1, f2xm1)
+#define FABS            CHOICE(fabs, fabs, fabs)
+#define FADD_D(a)       CHOICE(faddl a, faddl a, faddd a)
+#define FADD_S(a)       CHOICE(fadds a, fadds a, fadds a)
+#define FADD2(a, b)     CHOICE(fadd ARG2(a,b), fadd ARG2(a,b), fadd ARG2(b,a))
+#define FADDP(a, b)     CHOICE(faddp ARG2(a,b), faddp ARG2(a,b), faddp ARG2(b,a))
+#define FIADD_L(a)      CHOICE(fiaddl a, fiaddl a, fiaddl a)
+#define FIADD_W(a)      CHOICE(fiadd a, fiadds a, fiadds a)
+#define FBLD(a)         CHOICE(fbld a, fbld a, fbld a)
+#define FBSTP(a)        CHOICE(fbstp a, fbstp a, fbstp a)
+#define FCHS            CHOICE(fchs, fchs, fchs)
+#define FCLEX           CHOICE(fclex, wait; fnclex, wait; fclex)
+#define FNCLEX          CHOICE(fnclex, fnclex, fclex)
+#define FCOM(a)         CHOICE(fcom a, fcom a, fcom a)
+#define FCOM_D(a)       CHOICE(fcoml a, fcoml a, fcomd a)
+#define FCOM_S(a)       CHOICE(fcoms a, fcoms a, fcoms a)
+#define FCOMP(a)        CHOICE(fcomp a, fcomp a, fcomp a)
+#define FCOMP_D(a)      CHOICE(fcompl a, fcompl a, fcompd a)
+#define FCOMP_S(a)      CHOICE(fcomps a, fcomps a, fcomps a)
+#define FCOMPP          CHOICE(fcompp, fcompp, fcompp)
+#define FCOS            CHOICE(fcos, fcos, fcos)
+#define FDECSTP         CHOICE(fdecstp, fdecstp, fdecstp)
+#define FDIV_D(a)       CHOICE(fdivl a, fdivl a, fdivd a)
+#define FDIV_S(a)       CHOICE(fdivs a, fdivs a, fdivs a)
+#define FDIV2(a, b)     CHOICE(fdiv ARG2(a,b), fdiv ARG2(a,b), fdiv ARG2(b,a))
+#define FDIVP(a, b)     CHOICE(fdivp ARG2(a,b), fdivp ARG2(a,b), fdivp ARG2(b,a))
+#define FIDIV_L(a)      CHOICE(fidivl a, fidivl a, fidivl a)
+#define FIDIV_W(a)      CHOICE(fidiv a, fidivs a, fidivs a)
+#define FDIVR_D(a)      CHOICE(fdivrl a, fdivrl a, fdivrd a)
+#define FDIVR_S(a)      CHOICE(fdivrs a, fdivrs a, fdivrs a)
+#define FDIVR2(a, b)    CHOICE(fdivr ARG2(a,b), fdivr ARG2(a,b), fdivr ARG2(b,a))
+#define FDIVRP(a, b)    CHOICE(fdivrp ARG2(a,b), fdivrp ARG2(a,b), fdivrp ARG2(b,a))
+#define FIDIVR_L(a)     CHOICE(fidivrl a, fidivrl a, fidivrl a)
+#define FIDIVR_W(a)     CHOICE(fidivr a, fidivrs a, fidivrs a)
+#define FFREE(a)        CHOICE(ffree a, ffree a, ffree a)
+#define FICOM_L(a)      CHOICE(ficoml a, ficoml a, ficoml a)
+#define FICOM_W(a)      CHOICE(ficom a, ficoms a, ficoms a)
+#define FICOMP_L(a)     CHOICE(ficompl a, ficompl a, ficompl a)
+#define FICOMP_W(a)     CHOICE(ficomp a, ficomps a, ficomps a)
+#define FILD_Q(a)       CHOICE(fildll a, fildq a, fildq a)
+#define FILD_L(a)       CHOICE(fildl a, fildl a, fildl a)
+#define FILD_W(a)       CHOICE(fild a, filds a, filds a)
+#define FINCSTP         CHOICE(fincstp, fincstp, fincstp)
+#define FINIT           CHOICE(finit, wait; fninit, wait; finit)
+#define FNINIT          CHOICE(fninit, fninit, finit)
+#define FIST_L(a)       CHOICE(fistl a, fistl a, fistl a)
+#define FIST_W(a)       CHOICE(fist a, fists a, fists a)
+#define FISTP_Q(a)      CHOICE(fistpll a, fistpq a, fistpq a)
+#define FISTP_L(a)      CHOICE(fistpl a, fistpl a, fistpl a)
+#define FISTP_W(a)      CHOICE(fistp a, fistps a, fistps a)
+#define FLD_X(a)        CHOICE(fldt a, fldt a, fldx a) /* 80 bit data type! */
+#define FLD_D(a)        CHOICE(fldl a, fldl a, fldd a)
+#define FLD_S(a)        CHOICE(flds a, flds a, flds a)
+#define FLD1            CHOICE(fld1, fld1, fld1)
+#define FLDL2T          CHOICE(fldl2t, fldl2t, fldl2t)
+#define FLDL2E          CHOICE(fldl2e, fldl2e, fldl2e)
+#define FLDPI           CHOICE(fldpi, fldpi, fldpi)
+#define FLDLG2          CHOICE(fldlg2, fldlg2, fldlg2)
+#define FLDLN2          CHOICE(fldln2, fldln2, fldln2)
+#define FLDZ            CHOICE(fldz, fldz, fldz)
+#define FLDCW(a)        CHOICE(fldcw a, fldcw a, fldcw a)
+#define FLDENV(a)       CHOICE(fldenv a, fldenv a, fldenv a)
+#define FMUL_S(a)       CHOICE(fmuls a, fmuls a, fmuls a)
+#define FMUL_D(a)       CHOICE(fmull a, fmull a, fmuld a)
+#define FMUL2(a, b)     CHOICE(fmul ARG2(a,b), fmul ARG2(a,b), fmul ARG2(b,a))
+#define FMULP(a, b)     CHOICE(fmulp ARG2(a,b), fmulp ARG2(a,b), fmulp ARG2(b,a))
+#define FIMUL_L(a)      CHOICE(fimull a, fimull a, fimull a)
+#define FIMUL_W(a)      CHOICE(fimul a, fimuls a, fimuls a)
+#define FNOP            CHOICE(fnop, fnop, fnop)
+#define FPATAN          CHOICE(fpatan, fpatan, fpatan)
+#define FPREM           CHOICE(fprem, fprem, fprem)
+#define FPREM1          CHOICE(fprem1, fprem1, fprem1)
+#define FPTAN           CHOICE(fptan, fptan, fptan)
+#define FRNDINT         CHOICE(frndint, frndint, frndint)
+#define FRSTOR(a)       CHOICE(frstor a, frstor a, frstor a)
+#define FSAVE(a)        CHOICE(fsave a, wait; fnsave a, wait; fsave a)
+#define FNSAVE(a)       CHOICE(fnsave a, fnsave a, fsave a)
+#define FSCALE          CHOICE(fscale, fscale, fscale)
+#define FSIN            CHOICE(fsin, fsin, fsin)
+#define FSINCOS         CHOICE(fsincos, fsincos, fsincos)
+#define FSQRT           CHOICE(fsqrt, fsqrt, fsqrt)
+#define FST_D(a)        CHOICE(fstl a, fstl a, fstd a)
+#define FST_S(a)        CHOICE(fsts a, fsts a, fsts a)
+#define FSTP_X(a)       CHOICE(fstpt a, fstpt a, fstpx a)
+#define FSTP_D(a)       CHOICE(fstpl a, fstpl a, fstpd a)
+#define FSTP_S(a)       CHOICE(fstps a, fstps a, fstps a)
+#define FSTP(a)         CHOICE(fstp a, fstp a, fstp a)
+#define FSTCW(a)        CHOICE(fstcw a, wait; fnstcw a, wait; fstcw a)
+#define FNSTCW(a)       CHOICE(fnstcw a, fnstcw a, fstcw a)
+#define FSTENV(a)       CHOICE(fstenv a, wait; fnstenv a, fstenv a)
+#define FNSTENV(a)      CHOICE(fnstenv a, fnstenv a, fstenv a)
+#define FSTSW(a)        CHOICE(fstsw a, wait; fnstsw a, wait; fstsw a)
+#define FNSTSW(a)       CHOICE(fnstsw a, fnstsw a, fstsw a)
+#define FSUB_S(a)       CHOICE(fsubs a, fsubs a, fsubs a)
+#define FSUB_D(a)       CHOICE(fsubl a, fsubl a, fsubd a)
+#define FSUB2(a, b)     CHOICE(fsub ARG2(a,b), fsub ARG2(a,b), fsub ARG2(b,a))
+#define FSUBP(a, b)     CHOICE(fsubp ARG2(a,b), fsubp ARG2(a,b), fsubp ARG2(b,a))
+#define FISUB_L(a)      CHOICE(fisubl a, fisubl a, fisubl a)
+#define FISUB_W(a)      CHOICE(fisub a, fisubs a, fisubs a)
+#define FSUBR_S(a)      CHOICE(fsubrs a, fsubrs a, fsubrs a)
+#define FSUBR_D(a)      CHOICE(fsubrl a, fsubrl a, fsubrd a)
+#define FSUBR2(a, b)    CHOICE(fsubr ARG2(a,b), fsubr ARG2(a,b), fsubr ARG2(b,a))
+#define FSUBRP(a, b)    CHOICE(fsubrp ARG2(a,b), fsubrp ARG2(a,b), fsubrp ARG2(b,a))
+#define FISUBR_L(a)     CHOICE(fisubrl a, fisubrl a, fisubrl a)
+#define FISUBR_W(a)     CHOICE(fisubr a, fisubrs a, fisubrs a)
+#define FTST            CHOICE(ftst, ftst, ftst)
+#define FUCOM(a)        CHOICE(fucom a, fucom a, fucom a)
+#define FUCOMP(a)       CHOICE(fucomp a, fucomp a, fucomp a)
+#define FUCOMPP         CHOICE(fucompp, fucompp, fucompp)
+#define FWAIT           CHOICE(wait, wait, wait)
+#define FXAM            CHOICE(fxam, fxam, fxam)
+#define FXCH(a)         CHOICE(fxch a, fxch a, fxch a)
+#define FXTRACT         CHOICE(fxtract, fxtract, fxtract)
+#define FYL2X           CHOICE(fyl2x, fyl2x, fyl2x)
+#define FYL2XP1         CHOICE(fyl2xp1, fyl2xp1, fyl2xp1)
+/* New instructions */
+#define CPUID           CHOICE(D_BYTE ARG2(15, 162), cpuid, D_BYTE ARG2(15, 162))
+#define RDTSC           CHOICE(D_BYTE ARG2(15, 49), rdtsc, D_BYTE ARG2(15, 49))
+#else /* NASM_ASSEMBLER || MASM_ASSEMBLER is defined */
+        /****************************************/
+        /*                                      */
+        /*      Intel style assemblers.         */
+        /*      (NASM and MASM)                 */
+        /*                                      */
+        /****************************************/
+#define P_EAX           EAX
+#define L_EAX           EAX
+#define W_AX            AX
+#define B_AH            AH
+#define B_AL            AL
+#define P_EBX           EBX
+#define L_EBX           EBX
+#define W_BX            BX
+#define B_BH            BH
+#define B_BL            BL
+#define P_ECX           ECX
+#define L_ECX           ECX
+#define W_CX            CX
+#define B_CH            CH
+#define B_CL            CL
+#define P_EDX           EDX
+#define L_EDX           EDX
+#define W_DX            DX
+#define B_DH            DH
+#define B_DL            DL
+#define P_EBP           EBP
+#define L_EBP           EBP
+#define W_BP            BP
+#define P_ESI           ESI
+#define L_ESI           ESI
+#define W_SI            SI
+#define P_EDI           EDI
+#define L_EDI           EDI
+#define W_DI            DI
+#define P_ESP           ESP
+#define L_ESP           ESP
+#define W_SP            SP
+#define W_CS            CS
+#define W_SS            SS
+#define W_DS            DS
+#define W_ES            ES
+#define W_FS            FS
+#define W_GS            GS
+#define X_ST            ST
+#define D_ST            ST
+#define L_ST            ST
+#define P_MM0           mm0
+#define P_MM1           mm1
+#define P_MM2           mm2
+#define P_MM3           mm3
+#define P_MM4           mm4
+#define P_MM5           mm5
+#define P_MM6           mm6
+#define P_MM7           mm7
+#define P_XMM0          xmm0
+#define P_XMM1          xmm1
+#define P_XMM2          xmm2
+#define P_XMM3          xmm3
+#define P_XMM4          xmm4
+#define P_XMM5          xmm5
+#define P_XMM6          xmm6
+#define P_XMM7          xmm7
+#define CONCAT(x, y)            x ## y
+#define CONCAT3(x, y, z)        x ## y ## z
+#if defined(NASM_ASSEMBLER)
+#define ST(n)           st ## n
+#define ST0             st0
+#define TBYTE_PTR       tword
+#define QWORD_PTR       qword
+#define DWORD_PTR       dword
+#define WORD_PTR        word
+#define BYTE_PTR        byte
+#define OFFSET
+#define GLOBL                   GLOBAL
+#define ALIGNTEXT32             ALIGN 32
+#define ALIGNTEXT16             ALIGN 16
+#define ALIGNTEXT8              ALIGN 8
+#define ALIGNTEXT4              ALIGN 4
+#define ALIGNTEXT2              ALIGN 2
+#define ALIGNTEXT32ifNOP        ALIGN 32
+#define ALIGNTEXT16ifNOP        ALIGN 16
+#define ALIGNTEXT8ifNOP         ALIGN 8
+#define ALIGNTEXT4ifNOP         ALIGN 4
+#define ALIGNDATA32             ALIGN 32
+#define ALIGNDATA16             ALIGN 16
+#define ALIGNDATA8              ALIGN 8
+#define ALIGNDATA4              ALIGN 4
+#define ALIGNDATA2              ALIGN 2
+#define FILE(s)
+#define STRING(s)       db s
+#define D_LONG          dd
+#define D_WORD          dw
+#define D_BYTE          db
+/* #define SPACE */
+/* #define COMM */
+#if defined(__WATCOMC__)
+SECTION _TEXT public align=16 class=CODE use32 flat
+SECTION _DATA public align=16 class=DATA use32 flat
+#define SEG_TEXT        SECTION _TEXT
+#define SEG_DATA        SECTION _DATA
+#define SEG_BSS         SECTION .bss
+#else
+#define SEG_DATA        SECTION .data
+#define SEG_TEXT        SECTION .text
+#define SEG_BSS         SECTION .bss
+#endif
+#define D_SPACE(n)      db n REP 0
+#define AS_BEGIN
+/* Jcc's should be handled better than this... */
+#define NEAR            near
+#else /* MASM */
+#define TBYTE_PTR       tbyte ptr
+#define QWORD_PTR       qword ptr
+#define DWORD_PTR       dword ptr
+#define WORD_PTR        word ptr
+#define BYTE_PTR        byte ptr
+#define OFFSET          offset
+#define GLOBL                   GLOBAL
+#define ALIGNTEXT32             ALIGN 32
+#define ALIGNTEXT16             ALIGN 16
+#define ALIGNTEXT8              ALIGN 8
+#define ALIGNTEXT4              ALIGN 4
+#define ALIGNTEXT2              ALIGN 2
+#define ALIGNTEXT32ifNOP        ALIGN 32
+#define ALIGNTEXT16ifNOP        ALIGN 16
+#define ALIGNTEXT8ifNOP         ALIGN 8
+#define ALIGNTEXT4ifNOP         ALIGN 4
+#define ALIGNDATA32             ALIGN 32
+#define ALIGNDATA16             ALIGN 16
+#define ALIGNDATA8              ALIGN 8
+#define ALIGNDATA4              ALIGN 4
+#define ALIGNDATA2              ALIGN 2
+#define FILE(s)
+#define STRING(s)       db s
+#define D_LONG          dd
+#define D_WORD          dw
+#define D_BYTE          db
+/* #define SPACE */
+/* #define COMM */
+#define SEG_DATA        .DATA
+#define SEG_TEXT        .CODE
+#define SEG_BSS         .DATA
+#define D_SPACE(n)      db n REP 0
+#define AS_BEGIN
+#define NEAR
+#endif
+#define GLNAME(a)       CONCAT(_, a)
+/*
+ *      Addressing Modes
+ */
+/* Immediate Mode */
+#define P_ADDR(a)               OFFSET a
+#define X_ADDR(a)               OFFSET a
+#define D_ADDR(a)               OFFSET a
+#define L_ADDR(a)               OFFSET a
+#define W_ADDR(a)               OFFSET a
+#define B_ADDR(a)               OFFSET a
+#define P_CONST(a)              a
+#define X_CONST(a)              a
+#define D_CONST(a)              a
+#define L_CONST(a)              a
+#define W_CONST(a)              a
+#define B_CONST(a)              a
+/* Indirect Mode */
+#ifdef NASM_ASSEMBLER
+#define P_CONTENT(a)            [a]
+#define X_CONTENT(a)            TBYTE_PTR [a]
+#define D_CONTENT(a)            QWORD_PTR [a]
+#define L_CONTENT(a)            DWORD_PTR [a]
+#define W_CONTENT(a)            WORD_PTR [a]
+#define B_CONTENT(a)            BYTE_PTR [a]
+#else
+#define P_CONTENT(a)            a
+#define X_CONTENT(a)            TBYTE_PTR a
+#define D_CONTENT(a)            QWORD_PTR a
+#define L_CONTENT(a)            DWORD_PTR a
+#define W_CONTENT(a)            WORD_PTR a
+#define B_CONTENT(a)            BYTE_PTR a
+#endif
+/* Register a indirect */
+#define P_REGIND(a)             [a]
+#define X_REGIND(a)             TBYTE_PTR [a]
+#define D_REGIND(a)             QWORD_PTR [a]
+#define L_REGIND(a)             DWORD_PTR [a]
+#define W_REGIND(a)             WORD_PTR [a]
+#define B_REGIND(a)             BYTE_PTR [a]
+/* Register b indirect plus displacement a */
+#define P_REGOFF(a, b)          [b + a]
+#define X_REGOFF(a, b)          TBYTE_PTR [b + a]
+#define D_REGOFF(a, b)          QWORD_PTR [b + a]
+#define L_REGOFF(a, b)          DWORD_PTR [b + a]
+#define W_REGOFF(a, b)          WORD_PTR [b + a]
+#define B_REGOFF(a, b)          BYTE_PTR [b + a]
+/* Reg indirect Base + Index + Displacement  - this is mainly for 16-bit mode
+ * which has no scaling
+ */
+#define P_REGBID(b, i, d)       [b + i + d]
+#define X_REGBID(b, i, d)       TBYTE_PTR [b + i + d]
+#define D_REGBID(b, i, d)       QWORD_PTR [b + i + d]
+#define L_REGBID(b, i, d)       DWORD_PTR [b + i + d]
+#define W_REGBID(b, i, d)       WORD_PTR [b + i + d]
+#define B_REGBID(b, i, d)       BYTE_PTR [b + i + d]
+/* Reg indirect Base + (Index * Scale) */
+#define P_REGBIS(b, i, s)       [b + i * s]
+#define X_REGBIS(b, i, s)       TBYTE_PTR [b + i * s]
+#define D_REGBIS(b, i, s)       QWORD_PTR [b + i * s]
+#define L_REGBIS(b, i, s)       DWORD_PTR [b + i * s]
+#define W_REGBIS(b, i, s)       WORD_PTR [b + i * s]
+#define B_REGBIS(b, i, s)       BYTE_PTR [b + i * s]
+/* Reg indirect Base + (Index * Scale) + Displacement */
+#define P_REGBISD(b, i, s, d)   [b + i * s + d]
+#define X_REGBISD(b, i, s, d)   TBYTE_PTR [b + i * s + d]
+#define D_REGBISD(b, i, s, d)   QWORD_PTR [b + i * s + d]
+#define L_REGBISD(b, i, s, d)   DWORD_PTR [b + i * s + d]
+#define W_REGBISD(b, i, s, d)   WORD_PTR [b + i * s + d]
+#define B_REGBISD(b, i, s, d)   BYTE_PTR [b + i * s + d]
+/* Displaced Scaled Index: */
+#define P_REGDIS(d, i, s)       [i * s + d]
+#define X_REGDIS(d, i, s)       TBYTE_PTR [i * s + d]
+#define D_REGDIS(d, i, s)       QWORD_PTR [i * s + d]
+#define L_REGDIS(d, i, s)       DWORD_PTR [i * s + d]
+#define W_REGDIS(d, i, s)       WORD_PTR [i * s + d]
+#define B_REGDIS(d, i, s)       BYTE_PTR [i * s + d]
+/* Indexed Base: */
+#define P_REGBI(b, i)           [b + i]
+#define X_REGBI(b, i)           TBYTE_PTR [b + i]
+#define D_REGBI(b, i)           QWORD_PTR [b + i]
+#define L_REGBI(b, i)           DWORD_PTR [b + i]
+#define W_REGBI(b, i)           WORD_PTR [b + i]
+#define B_REGBI(b, i)           BYTE_PTR [b + i]
+/* Displaced Base: */
+#define P_REGDB(d, b)           [b + d]
+#define X_REGDB(d, b)           TBYTE_PTR [b + d]
+#define D_REGDB(d, b)           QWORD_PTR [b + d]
+#define L_REGDB(d, b)           DWORD_PTR [b + d]
+#define W_REGDB(d, b)           WORD_PTR [b + d]
+#define B_REGDB(d, b)           BYTE_PTR [b + d]
+/* Variable indirect: */
+#define VARINDIRECT(var)        [var]
+/* Use register contents as jump/call target: */
+#define CODEPTR(reg)            P_(reg)
+/*
+ * Redefine assembler commands
+ */
+#define P_(a)                   P_ ## a
+#define X_(a)                   X_ ## a
+#define D_(a)                   D_ ## a
+#define SR_(a)                  W_ ## a
+#define S_(a)                   L_ ## a
+#define L_(a)                   L_ ## a
+#define W_(a)                   W_ ## a
+#define B_(a)                   B_ ## a
+#define AAA                     aaa
+#define AAD                     aad
+#define AAM                     aam
+#define AAS                     aas
+#define ADC_L(a, b)             adc L_(b), L_(a)
+#define ADC_W(a, b)             adc W_(b), W_(a)
+#define ADC_B(a, b)             adc B_(b), B_(a)
+#define ADD_L(a, b)             add L_(b), L_(a)
+#define ADD_W(a, b)             add W_(b), W_(a)
+#define ADD_B(a, b)             add B_(b), B_(a)
+#define AND_L(a, b)             and L_(b), L_(a)
+#define AND_W(a, b)             and W_(b), W_(a)
+#define AND_B(a, b)             and B_(b), B_(a)
+#define ARPL(a,b)               arpl W_(b), a
+#define BOUND_L(a, b)           bound L_(b), L_(a)
+#define BOUND_W(a, b)           bound W_(b), W_(a)
+#define BSF_L(a, b)             bsf L_(b), L_(a)
+#define BSF_W(a, b)             bsf W_(b), W_(a)
+#define BSR_L(a, b)             bsr L_(b), L_(a)
+#define BSR_W(a, b)             bsr W_(b), W_(a)
+#define BT_L(a, b)              bt L_(b), L_(a)
+#define BT_W(a, b)              bt W_(b), W_(a)
+#define BTC_L(a, b)             btc L_(b), L_(a)
+#define BTC_W(a, b)             btc W_(b), W_(a)
+#define BTR_L(a, b)             btr L_(b), L_(a)
+#define BTR_W(a, b)             btr W_(b), W_(a)
+#define BTS_L(a, b)             bts L_(b), L_(a)
+#define BTS_W(a, b)             bts W_(b), W_(a)
+#define CALL(a)                 call a
+#define CALLF(s,a)              call far s:a
+#define CBW                     cbw
+#define CWDE                    cwde
+#define CLC                     clc
+#define CLD                     cld
+#define CLI                     cli
+#define CLTS                    clts
+#define CMC                     cmc
+#define CMP_L(a, b)             cmp L_(b), L_(a)
+#define CMP_W(a, b)             cmp W_(b), W_(a)
+#define CMP_B(a, b)             cmp B_(b), B_(a)
+#define CMPS_L                  cmpsd
+#define CMPS_W                  cmpsw
+#define CMPS_B                  cmpsb
+#define CPUID                   cpuid
+#define CWD                     cwd
+#define CDQ                     cdq
+#define DAA                     daa
+#define DAS                     das
+#define DEC_L(a)                dec L_(a)
+#define DEC_W(a)                dec W_(a)
+#define DEC_B(a)                dec B_(a)
+#define DIV_L(a)                div L_(a)
+#define DIV_W(a)                div W_(a)
+#define DIV_B(a)                div B_(a)
+#define ENTER(a,b)              enter b, a
+#define HLT                     hlt
+#define IDIV_L(a)               idiv L_(a)
+#define IDIV_W(a)               idiv W_(a)
+#define IDIV_B(a)               idiv B_(a)
+#define IMUL_L(a, b)            imul L_(b), L_(a)
+#define IMUL_W(a, b)            imul W_(b), W_(a)
+#define IMUL_B(a)               imul B_(a)
+#define IN_L                    in EAX, DX
+#define IN_W                    in AX, DX
+#define IN_B                    in AL, DX
+#define IN1_L(a)                in1 L_(a)
+#define IN1_W(a)                in1 W_(a)
+#define IN1_B(a)                in1 B_(a)
+#define INC_L(a)                inc L_(a)
+#define INC_W(a)                inc W_(a)
+#define INC_B(a)                inc B_(a)
+#define INS_L                   ins
+#define INS_W                   ins
+#define INS_B                   ins
+#define INT(a)                  int B_(a)
+#define INT3                    int3
+#define INTO                    into
+#define IRET                    iret
+#define IRETD                   iretd
+#define JA(a)                   ja NEAR a
+#define JAE(a)                  jae NEAR a
+#define JB(a)                   jb NEAR a
+#define JBE(a)                  jbe NEAR a
+#define JC(a)                   jc NEAR a
+#define JE(a)                   je NEAR a
+#define JG(a)                   jg NEAR a
+#define JGE(a)                  jge NEAR a
+#define JL(a)                   jl NEAR a
+#define JLE(a)                  jle NEAR a
+#define JNA(a)                  jna NEAR a
+#define JNAE(a)                 jnae NEAR a
+#define JNB(a)                  jnb NEAR a
+#define JNBE(a)                 jnbe NEAR a
+#define JNC(a)                  jnc NEAR a
+#define JNE(a)                  jne NEAR a
+#define JNG(a)                  jng NEAR a
+#define JNGE(a)                 jnge NEAR a
+#define JNL(a)                  jnl NEAR a
+#define JNLE(a)                 jnle NEAR a
+#define JNO(a)                  jno NEAR a
+#define JNP(a)                  jnp NEAR a
+#define JNS(a)                  jns NEAR a
+#define JNZ(a)                  jnz NEAR a
+#define JO(a)                   jo NEAR a
+#define JP(a)                   jp NEAR a
+#define JPE(a)                  jpe NEAR a
+#define JPO(a)                  jpo NEAR a
+#define JS(a)                   js NEAR a
+#define JZ(a)                   jz NEAR a
+#define JMP(a)                  jmp a
+#define JMPF(s,a)               jmp far s:a
+#define LAHF                    lahf
+#define LAR(a, b)               lar b, a
+#define LEA_L(a, b)             lea P_(b), P_(a)
+#define LEA_W(a, b)             lea P_(b), P_(a)
+#define LEAVE                   leave
+#define LGDT(a)                 lgdt a
+#define LIDT(a)                 lidt a
+#define LDS(a, b)               lds b, P_(a)
+#define LES(a, b)               les b, P_(a)
+#define LFS(a, b)               lfs b, P_(a)
+#define LGS(a, b)               lgs b, P_(a)
+#define LSS(a, b)               lss b, P_(a)
+#define LLDT(a)                 lldt a
+#define LMSW(a)                 lmsw a
+#define LOCK                    lock
+#define LODS_L                  lodsd
+#define LODS_W                  lodsw
+#define LODS_B                  lodsb
+#define LOOP(a)                 loop a
+#define LOOPE(a)                loope a
+#define LOOPZ(a)                loopz a
+#define LOOPNE(a)               loopne a
+#define LOOPNZ(a)               loopnz a
+#define LSL(a, b)               lsl b, a
+#define LTR(a)                  ltr a
+#define MOV_SR(a, b)            mov SR_(b), SR_(a)
+#define MOV_L(a, b)             mov L_(b), L_(a)
+#define MOV_W(a, b)             mov W_(b), W_(a)
+#define MOV_B(a, b)             mov B_(b), B_(a)
+#define MOVS_L                  movsd
+#define MOVS_W                  movsw
+#define MOVS_B                  movsb
+#define MOVSX_BL(a, b)          movsx B_(b), B_(a)
+#define MOVSX_BW(a, b)          movsx B_(b), B_(a)
+#define MOVSX_WL(a, b)          movsx W_(b), W_(a)
+#define MOVZX_BL(a, b)          movzx B_(b), B_(a)
+#define MOVZX_BW(a, b)          movzx B_(b), B_(a)
+#define MOVZX_WL(a, b)          movzx W_(b), W_(a)
+#define MUL_L(a)                mul L_(a)
+#define MUL_W(a)                mul W_(a)
+#define MUL_B(a)                mul B_(a)
+#define NEG_L(a)                neg L_(a)
+#define NEG_W(a)                neg W_(a)
+#define NEG_B(a)                neg B_(a)
+#define NOP                     nop
+#define NOT_L(a)                not L_(a)
+#define NOT_W(a)                not W_(a)
+#define NOT_B(a)                not B_(a)
+#define OR_L(a,b)               or L_(b), L_(a)
+#define OR_W(a,b)               or W_(b), W_(a)
+#define OR_B(a,b)               or B_(b), B_(a)
+#define OUT_L                   out DX, EAX
+#define OUT_W                   out DX, AX
+#define OUT_B                   out DX, AL
+#define OUT1_L(a)               out1 L_(a)
+#define OUT1_W(a)               out1 W_(a)
+#define OUT1_B(a)               out1 B_(a)
+#define OUTS_L                  outsd
+#define OUTS_W                  outsw
+#define OUTS_B                  outsb
+#define POP_SR(a)               pop SR_(a)
+#define POP_L(a)                pop L_(a)
+#define POP_W(a)                pop W_(a)
+#define POPA_L                  popad
+#define POPA_W                  popa
+#define POPF_L                  popfd
+#define POPF_W                  popf
+#define PUSH_SR(a)              push SR_(a)
+#define PUSH_L(a)               push L_(a)
+#define PUSH_W(a)               push W_(a)
+#define PUSH_B(a)               push B_(a)
+#define PUSHA_L                 pushad
+#define PUSHA_W                 pusha
+#define PUSHF_L                 pushfd
+#define PUSHF_W                 pushf
+#define RCL_L(a, b)             rcl L_(b), L_(a)
+#define RCL_W(a, b)             rcl W_(b), W_(a)
+#define RCL_B(a, b)             rcl B_(b), B_(a)
+#define RCR_L(a, b)             rcr L_(b), L_(a)
+#define RCR_W(a, b)             rcr W_(b), W_(a)
+#define RCR_B(a, b)             rcr B_(b), B_(a)
+#define RDTSC                   rdtsc
+#define ROL_L(a, b)             rol L_(b), L_(a)
+#define ROL_W(a, b)             rol W_(b), W_(a)
+#define ROL_B(a, b)             rol B_(b), B_(a)
+#define ROR_L(a, b)             ror L_(b), L_(a)
+#define ROR_W(a, b)             ror W_(b), W_(a)
+#define ROR_B(a, b)             ror B_(b), B_(a)
+#define REP                     rep
+#define REPE                    repe
+#define REPNE                   repne
+#define REPNZ                   REPNE
+#define REPZ                    REPE
+#define RET                     ret
+#define SAHF                    sahf
+#define SAL_L(a, b)             sal L_(b), B_(a)
+#define SAL_W(a, b)             sal W_(b), B_(a)
+#define SAL_B(a, b)             sal B_(b), B_(a)
+#define SAR_L(a, b)             sar L_(b), B_(a)
+#define SAR_W(a, b)             sar W_(b), B_(a)
+#define SAR_B(a, b)             sar B_(b), B_(a)
+#define SBB_L(a, b)             sbb L_(b), L_(a)
+#define SBB_W(a, b)             sbb W_(b), W_(a)
+#define SBB_B(a, b)             sbb B_(b), B_(a)
+#define SCAS_L                  scas
+#define SCAS_W                  scas
+#define SCAS_B                  scas
+#define SETA(a)                 seta a
+#define SETAE(a)                setae a
+#define SETB(a)                 setb a
+#define SETBE(a)                setbe a
+#define SETC(a)                 setc a
+#define SETE(a)                 sete a
+#define SETG(a)                 setg a
+#define SETGE(a)                setge a
+#define SETL(a)                 setl a
+#define SETLE(a)                setle a
+#define SETNA(a)                setna a
+#define SETNAE(a)               setnae a
+#define SETNB(a)                setnb a
+#define SETNBE(a)               setnbe a
+#define SETNC(a)                setnc a
+#define SETNE(a)                setne a
+#define SETNG(a)                setng a
+#define SETNGE(a)               setnge a
+#define SETNL(a)                setnl a
+#define SETNLE(a)               setnle a
+#define SETNO(a)                setno a
+#define SETNP(a)                setnp a
+#define SETNS(a)                setns a
+#define SETNZ(a)                setnz a
+#define SETO(a)                 seto a
+#define SETP(a)                 setp a
+#define SETPE(a)                setpe a
+#define SETPO(a)                setpo a
+#define SETS(a)                 sets a
+#define SETZ(a)                 setz a
+#define SGDT(a)                 sgdt a
+#define SIDT(a)                 sidt a
+#define SHL_L(a, b)             shl L_(b), B_(a)
+#define SHL_W(a, b)             shl W_(b), B_(a)
+#define SHL_B(a, b)             shl B_(b), B_(a)
+#define SHLD_L(a,b,c)           shld
+#define SHLD2_L(a,b)            shld L_(b), L_(a)
+#define SHLD_W(a,b,c)           shld
+#define SHLD2_W(a,b)            shld W_(b), W_(a)
+#define SHR_L(a, b)             shr L_(b), B_(a)
+#define SHR_W(a, b)             shr W_(b), B_(a)
+#define SHR_B(a, b)             shr B_(b), B_(a)
+#define SHRD_L(a,b,c)           shrd
+#define SHRD2_L(a,b)            shrd L_(b), L_(a)
+#define SHRD_W(a,b,c)           shrd
+#define SHRD2_W(a,b)            shrd W_(b), W_(a)
+#define SLDT(a)                 sldt a
+#define SMSW(a)                 smsw a
+#define STC                     stc
+#define STD                     std
+#define STI                     sti
+#define STOS_L                  stosd
+#define STOS_W                  stosw
+#define STOS_B                  stosb
+#define STR(a)                  str a
+#define SUB_L(a, b)             sub L_(b), L_(a)
+#define SUB_W(a, b)             sub W_(b), W_(a)
+#define SUB_B(a, b)             sub B_(b), B_(a)
+#define TEST_L(a, b)            test L_(b), L_(a)
+#define TEST_W(a, b)            test W_(b), W_(a)
+#define TEST_B(a, b)            test B_(b), B_(a)
+#define VERR(a)                 verr a
+#define VERW(a)                 verw a
+#define WAIT                    wait
+#define XCHG_L(a, b)            xchg L_(b), L_(a)
+#define XCHG_W(a, b)            xchg W_(b), W_(a)
+#define XCHG_B(a, b)            xchg B_(b), B_(a)
+#define XLAT                    xlat
+#define XOR_L(a, b)             xor L_(b), L_(a)
+#define XOR_W(a, b)             xor W_(b), W_(a)
+#define XOR_B(a, b)             xor B_(b), B_(a)
+/* Floating Point Instructions */
+#define F2XM1                   f2xm1
+#define FABS                    fabs
+#define FADD_D(a)               fadd D_(a)
+#define FADD_S(a)               fadd S_(a)
+#define FADD2(a, b)             fadd b, a
+#define FADDP(a, b)             faddp b, a
+#define FIADD_L(a)              fiadd L_(a)
+#define FIADD_W(a)              fiadd W_(a)
+#define FBLD(a)                 fbld a
+#define FBSTP(a)                fbstp a
+#define FCHS                    fchs
+#define FCLEX                   fclex
+#define FNCLEX                  fnclex
+#define FCOM(a)                 fcom a
+#define FCOM_D(a)               fcom D_(a)
+#define FCOM_S(a)               fcom S_(a)
+#define FCOMP(a)                fcomp a
+#define FCOMP_D(a)              fcomp D_(a)
+#define FCOMP_S(a)              fcomp S_(a)
+#define FCOMPP                  fcompp
+#define FCOS                    fcos
+#define FDECSTP                 fdecstp
+#define FDIV_D(a)               fdiv D_(a)
+#define FDIV_S(a)               fdiv S_(a)
+#define FDIV2(a, b)             fdiv b, a
+#define FDIVP(a, b)             fdivp b, a
+#define FIDIV_L(a)              fidiv L_(a)
+#define FIDIV_W(a)              fidiv W_(a)
+#define FDIVR_D(a)              fdivr D_(a)
+#define FDIVR_S(a)              fdivr S_(a)
+#define FDIVR2(a, b)            fdivr b, a
+#define FDIVRP(a, b)            fdivrp b, a
+#define FIDIVR_L(a)             fidivr L_(a)
+#define FIDIVR_W(a)             fidivr W_(a)
+#define FFREE(a)                ffree a
+#define FICOM_L(a)              ficom L_(a)
+#define FICOM_W(a)              ficom W_(a)
+#define FICOMP_L(a)             ficomp L_(a)
+#define FICOMP_W(a)             ficomp W_(a)
+#define FILD_Q(a)               fild D_(a)
+#define FILD_L(a)               fild L_(a)
+#define FILD_W(a)               fild W_(a)
+#define FINCSTP                 fincstp
+#define FINIT                   finit
+#define FNINIT                  fninit
+#define FIST_L(a)               fist L_(a)
+#define FIST_W(a)               fist W_(a)
+#define FISTP_Q(a)              fistp D_(a)
+#define FISTP_L(a)              fistp L_(a)
+#define FISTP_W(a)              fistp W_(a)
+#define FLD_X(a)                fld X_(a)
+#define FLD_D(a)                fld D_(a)
+#define FLD_S(a)                fld S_(a)
+#define FLD1                    fld1
+#define FLDL2T                  fldl2t
+#define FLDL2E                  fldl2e
+#define FLDPI                   fldpi
+#define FLDLG2                  fldlg2
+#define FLDLN2                  fldln2
+#define FLDZ                    fldz
+#define FLDCW(a)                fldcw a
+#define FLDENV(a)               fldenv a
+#define FMUL_S(a)               fmul S_(a)
+#define FMUL_D(a)               fmul D_(a)
+#define FMUL2(a, b)             fmul b, a
+#define FMULP(a, b)             fmulp b, a
+#define FIMUL_L(a)              fimul L_(a)
+#define FIMUL_W(a)              fimul W_(a)
+#define FNOP                    fnop
+#define FPATAN                  fpatan
+#define FPREM                   fprem
+#define FPREM1                  fprem1
+#define FPTAN                   fptan
+#define FRNDINT                 frndint
+#define FRSTOR(a)               frstor a
+#define FSAVE(a)                fsave a
+#define FNSAVE(a)               fnsave a
+#define FSCALE                  fscale
+#define FSIN                    fsin
+#define FSINCOS                 fsincos
+#define FSQRT                   fsqrt
+#define FST_D(a)                fst D_(a)
+#define FST_S(a)                fst S_(a)
+#define FSTP_X(a)               fstp X_(a)
+#define FSTP_D(a)               fstp D_(a)
+#define FSTP_S(a)               fstp S_(a)
+#define FSTP(a)                 fstp a
+#define FSTCW(a)                fstcw a
+#define FNSTCW(a)               fnstcw a
+#define FSTENV(a)               fstenv a
+#define FNSTENV(a)              fnstenv a
+#define FSTSW(a)                fstsw a
+#define FNSTSW(a)               fnstsw a
+#define FSUB_S(a)               fsub S_(a)
+#define FSUB_D(a)               fsub D_(a)
+#define FSUB2(a, b)             fsub b, a
+#define FSUBP(a, b)             fsubp b, a
+#define FISUB_L(a)              fisub L_(a)
+#define FISUB_W(a)              fisub W_(a)
+#define FSUBR_S(a)              fsubr S_(a)
+#define FSUBR_D(a)              fsubr D_(a)
+#define FSUBR2(a, b)            fsubr b, a
+#define FSUBRP(a, b)            fsubrp b, a
+#define FISUBR_L(a)             fisubr L_(a)
+#define FISUBR_W(a)             fisubr W_(a)
+#define FTST                    ftst
+#define FUCOM(a)                fucom a
+#define FUCOMP(a)               fucomp a
+#define FUCOMPP                 fucompp
+#define FWAIT                   fwait
+#define FXAM                    fxam
+#define FXCH(a)                 fxch a
+#define FXTRACT                 fxtract
+#define FYL2X                   fyl2x
+#define FYL2XP1                 fyl2xp1
+#endif /* NASM_ASSEMBLER, MASM_ASSEMBLER */
+        /****************************************/
+        /*                                      */
+        /*      Extensions to x86 insn set -    */
+        /*      MMX, 3DNow!                     */
+        /*                                      */
+        /****************************************/
+#if defined(NASM_ASSEMBLER) || defined(MASM_ASSEMBLER)
+#define P_ARG1(a)               P_ ## a
+#define P_ARG2(a, b)            P_ ## b, P_ ## a
+#define P_ARG3(a, b, c)         P_ ## c, P_ ## b, P_ ## a
+#else
+#define P_ARG1(a)               a
+#define P_ARG2(a, b)            a, b
+#define P_ARG3(a, b, c)         a, b, c
+#endif
+/* MMX */
+#define MOVD(a, b)              movd P_ARG2(a, b)
+#define MOVQ(a, b)              movq P_ARG2(a, b)
+#define PADDB(a, b)             paddb P_ARG2(a, b)
+#define PADDW(a, b)             paddw P_ARG2(a, b)
+#define PADDD(a, b)             paddd P_ARG2(a, b)
+#define PADDSB(a, b)            paddsb P_ARG2(a, b)
+#define PADDSW(a, b)            paddsw P_ARG2(a, b)
+#define PADDUSB(a, b)           paddusb P_ARG2(a, b)
+#define PADDUSW(a, b)           paddusw P_ARG2(a, b)
+#define PSUBB(a, b)             psubb P_ARG2(a, b)
+#define PSUBW(a, b)             psubw P_ARG2(a, b)
+#define PSUBD(a, b)             psubd P_ARG2(a, b)
+#define PSUBSB(a, b)            psubsb P_ARG2(a, b)
+#define PSUBSW(a, b)            psubsw P_ARG2(a, b)
+#define PSUBUSB(a, b)           psubusb P_ARG2(a, b)
+#define PSUBUSW(a, b)           psubusw P_ARG2(a, b)
+#define PCMPEQB(a, b)           pcmpeqb P_ARG2(a, b)
+#define PCMPEQW(a, b)           pcmpeqw P_ARG2(a, b)
+#define PCMPEQD(a, b)           pcmpeqd P_ARG2(a, b)
+#define PCMPGTB(a, b)           pcmpgtb P_ARG2(a, b)
+#define PCMPGTW(a, b)           pcmpgtw P_ARG2(a, b)
+#define PCMPGTD(a, b)           pcmpgtd P_ARG2(a, b)
+#define PMULHW(a, b)            pmulhw P_ARG2(a, b)
+#define PMULLW(a, b)            pmullw P_ARG2(a, b)
+#define PMADDWD(a, b)           pmaddwd P_ARG2(a, b)
+#define PAND(a, b)              pand P_ARG2(a, b)
+#define PANDN(a, b)             pandn P_ARG2(a, b)
+#define POR(a, b)               por P_ARG2(a, b)
+#define PXOR(a, b)              pxor P_ARG2(a, b)
+#define PSRAW(a, b)             psraw P_ARG2(a, b)
+#define PSRAD(a, b)             psrad P_ARG2(a, b)
+#define PSRLW(a, b)             psrlw P_ARG2(a, b)
+#define PSRLD(a, b)             psrld P_ARG2(a, b)
+#define PSRLQ(a, b)             psrlq P_ARG2(a, b)
+#define PSLLW(a, b)             psllw P_ARG2(a, b)
+#define PSLLD(a, b)             pslld P_ARG2(a, b)
+#define PSLLQ(a, b)             psllq P_ARG2(a, b)
+#define PACKSSWB(a, b)          packsswb P_ARG2(a, b)
+#define PACKSSDW(a, b)          packssdw P_ARG2(a, b)
+#define PACKUSWB(a, b)          packuswb P_ARG2(a, b)
+#define PUNPCKHBW(a, b)         punpckhbw P_ARG2(a, b)
+#define PUNPCKHWD(a, b)         punpckhwd P_ARG2(a, b)
+#define PUNPCKHDQ(a, b)         punpckhdq P_ARG2(a, b)
+#define PUNPCKLBW(a, b)         punpcklbw P_ARG2(a, b)
+#define PUNPCKLWD(a, b)         punpcklwd P_ARG2(a, b)
+#define PUNPCKLDQ(a, b)         punpckldq P_ARG2(a, b)
+#define EMMS                    emms
+/* AMD 3DNow! */
+#define PAVGUSB(a, b)           pavgusb P_ARG2(a, b)
+#define PFADD(a, b)             pfadd P_ARG2(a, b)
+#define PFSUB(a, b)             pfsub P_ARG2(a, b)
+#define PFSUBR(a, b)            pfsubr P_ARG2(a, b)
+#define PFACC(a, b)             pfacc P_ARG2(a, b)
+#define PFCMPGE(a, b)           pfcmpge P_ARG2(a, b)
+#define PFCMPGT(a, b)           pfcmpgt P_ARG2(a, b)
+#define PFCMPEQ(a, b)           pfcmpeq P_ARG2(a, b)
+#define PFMIN(a, b)             pfmin P_ARG2(a, b)
+#define PFMAX(a, b)             pfmax P_ARG2(a, b)
+#define PI2FD(a, b)             pi2fd P_ARG2(a, b)
+#define PF2ID(a, b)             pf2id P_ARG2(a, b)
+#define PFRCP(a, b)             pfrcp P_ARG2(a, b)
+#define PFRSQRT(a, b)           pfrsqrt P_ARG2(a, b)
+#define PFMUL(a, b)             pfmul P_ARG2(a, b)
+#define PFRCPIT1(a, b)          pfrcpit1 P_ARG2(a, b)
+#define PFRSQIT1(a, b)          pfrsqit1 P_ARG2(a, b)
+#define PFRCPIT2(a, b)          pfrcpit2 P_ARG2(a, b)
+#define PMULHRW(a, b)           pmulhrw P_ARG2(a, b)
+#define FEMMS                   femms
+#define PREFETCH(a)             prefetch P_ARG1(a)
+#define PREFETCHW(a)            prefetchw P_ARG1(a)
+/* Intel SSE */
+#define ADDPS(a, b)             addps P_ARG2(a, b)
+#define ADDSS(a, b)             addss P_ARG2(a, b)
+#define ANDNPS(a, b)            andnps P_ARG2(a, b)
+#define ANDPS(a, b)             andps P_ARG2(a, b)
+/* NASM only knows the pseudo ops for these.
+#define CMPPS(a, b, c)          cmpps P_ARG3(a, b, c)
+#define CMPSS(a, b, c)          cmpss P_ARG3(a, b, c)
+*/
+#define CMPEQPS(a, b)           cmpeqps P_ARG2(a, b)
+#define CMPLTPS(a, b)           cmpltps P_ARG2(a, b)
+#define CMPLEPS(a, b)           cmpleps P_ARG2(a, b)
+#define CMPUNORDPS(a, b)        cmpunordps P_ARG2(a, b)
+#define CMPNEQPS(a, b)          cmpneqps P_ARG2(a, b)
+#define CMPNLTPS(a, b)          cmpnltps P_ARG2(a, b)
+#define CMPNLEPS(a, b)          cmpnleps P_ARG2(a, b)
+#define CMPORDPS(a, b)          cmpordps P_ARG2(a, b)
+#define CMPEQSS(a, b)           cmpeqss P_ARG2(a, b)
+#define CMPLTSS(a, b)           cmpltss P_ARG2(a, b)
+#define CMPLESS(a, b)           cmpless P_ARG2(a, b)
+#define CMPUNORDSS(a, b)        cmpunordss P_ARG2(a, b)
+#define CMPNEQSS(a, b)          cmpneqss P_ARG2(a, b)
+#define CMPNLTSS(a, b)          cmpnltss P_ARG2(a, b)
+#define CMPNLESS(a, b)          cmpnless P_ARG2(a, b)
+#define CMPORDSS(a, b)          cmpordss P_ARG2(a, b)
+#define COMISS(a, b)            comiss P_ARG2(a, b)
+#define CVTPI2PS(a, b)          cvtpi2ps P_ARG2(a, b)
+#define CVTPS2PI(a, b)          cvtps2pi P_ARG2(a, b)
+#define CVTSI2SS(a, b)          cvtsi2ss P_ARG2(a, b)
+#define CVTSS2SI(a, b)          cvtss2si P_ARG2(a, b)
+#define CVTTPS2PI(a, b)         cvttps2pi P_ARG2(a, b)
+#define CVTTSS2SI(a, b)         cvttss2si P_ARG2(a, b)
+#define DIVPS(a, b)             divps P_ARG2(a, b)
+#define DIVSS(a, b)             divss P_ARG2(a, b)
+#define FXRSTOR(a)              fxrstor P_ARG1(a)
+#define FXSAVE(a)               fxsave P_ARG1(a)
+#define LDMXCSR(a)              ldmxcsr P_ARG1(a)
+#define MAXPS(a, b)             maxps P_ARG2(a, b)
+#define MAXSS(a, b)             maxss P_ARG2(a, b)
+#define MINPS(a, b)             minps P_ARG2(a, b)
+#define MINSS(a, b)             minss P_ARG2(a, b)
+#define MOVAPS(a, b)            movaps P_ARG2(a, b)
+#define MOVHLPS(a, b)           movhlps P_ARG2(a, b)
+#define MOVHPS(a, b)            movhps P_ARG2(a, b)
+#define MOVLHPS(a, b)           movlhps P_ARG2(a, b)
+#define MOVLPS(a, b)            movlps P_ARG2(a, b)
+#define MOVMSKPS(a, b)          movmskps P_ARG2(a, b)
+#define MOVNTPS(a, b)           movntps P_ARG2(a, b)
+#define MOVNTQ(a, b)            movntq P_ARG2(a, b)
+#define MOVSS(a, b)             movss P_ARG2(a, b)
+#define MOVUPS(a, b)            movups P_ARG2(a, b)
+#define MULPS(a, b)             mulps P_ARG2(a, b)
+#define MULSS(a, b)             mulss P_ARG2(a, b)
+#define ORPS(a, b)              orps P_ARG2(a, b)
+#define RCPPS(a, b)             rcpps P_ARG2(a, b)
+#define RCPSS(a, b)             rcpss P_ARG2(a, b)
+#define RSQRTPS(a, b)           rsqrtps P_ARG2(a, b)
+#define RSQRTSS(a, b)           rsqrtss P_ARG2(a, b)
+#define SHUFPS(a, b, c)         shufps P_ARG3(a, b, c)
+#define SQRTPS(a, b)            sqrtps P_ARG2(a, b)
+#define SQRTSS(a, b)            sqrtss P_ARG2(a, b)
+#define STMXCSR(a)              stmxcsr P_ARG1(a)
+#define SUBPS(a, b)             subps P_ARG2(a, b)
+#define UCOMISS(a, b)           ucomiss P_ARG2(a, b)
+#define UNPCKHPS(a, b)          unpckhps P_ARG2(a, b)
+#define UNPCKLPS(a, b)          unpcklps P_ARG2(a, b)
+#define XORPS(a, b)             xorps P_ARG2(a, b)
+#define PREFETCHNTA(a)          prefetchnta P_ARG1(a)
+#define PREFETCHT0(a)           prefetcht0 P_ARG1(a)
+#define PREFETCHT1(a)           prefetcht1 P_ARG1(a)
+#define PREFETCHT2(a)           prefetcht2 P_ARG1(a)
+#define SFENCE                  sfence
+/* Added by BrianP for FreeBSD (per David Dawes) */
+#if !defined(NASM_ASSEMBLER) && !defined(MASM_ASSEMBLER) && !defined(__bsdi__)
+#define LLBL(a)         CONCAT(.L,a)
+#define LLBL2(a,b)      CONCAT3(.L,a,b)
+#else
+#define LLBL(a)         a
+#define LLBL2(a,b)      CONCAT(a,b)
+#endif
+/* Segment overrides */
+#define SEGCS           D_BYTE  46
+#define SEGDS           D_BYTE  62
+#define SEGES           D_BYTE  38
+#define SEGFS           D_BYTE  100
+#define SEGGS           D_BYTE  101
+/* Temporary labels: valid until next non-local label */
+#ifdef NASM_ASSEMBLER
+#define TLBL(a)         CONCAT(.,a)
+#else
+#define TLBL(a)         CONCAT(a,$)
+#endif
+/* Hidden symbol visibility support.
+ * If we build with gcc's -fvisibility=hidden flag, we'll need to change
+ * the symbol visibility mode to 'default'.
+ */
+#define HIDDEN(x)
+#endif /* __ASSYNTAX_H__ */

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/clip_args.h
 ,0 → 1,59
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/*
+ * Clip test function interface for assembly code.  Simply define
+ * FRAME_OFFSET to the number of bytes pushed onto the stack before
+ * using the ARG_* argument macros.
+ *
+ * Gareth Hughes
+ */
+#ifndef __CLIP_ARGS_H__
+#define __CLIP_ARGS_H__
+/*
+ * Offsets for clip_func arguments
+ *
+ * typedef GLvector4f *(*clip_func)( GLvector4f *clip_vec,
+ *                                   GLvector4f *proj_vec,
+ *                                   GLubyte clipMask[],
+ *                                   GLubyte *orMask,
+ *                                   GLubyte *andMask );
+ */
+#define OFFSET_SOURCE   4
+#define OFFSET_DEST     8
+#define OFFSET_CLIP     12
+#define OFFSET_OR       16
+#define OFFSET_AND      20
+#define ARG_SOURCE      REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP)
+#define ARG_DEST        REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP)
+#define ARG_CLIP        REGOFF(FRAME_OFFSET+OFFSET_CLIP, ESP)
+#define ARG_OR          REGOFF(FRAME_OFFSET+OFFSET_OR, ESP)
+#define ARG_AND         REGOFF(FRAME_OFFSET+OFFSET_AND, ESP)
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/common_x86.c
 ,0 → 1,336
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/**
+ * \file common_x86.c
+ *
+ * Check CPU capabilities & initialize optimized funtions for this particular
+ * processor.
+ *
+ * Changed by Andre Werthmann for using the new SSE functions.
+ *
+ * \author Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
+ * \author Andre Werthmann <wertmann@cs.uni-potsdam.de>
+ */
+/* XXX these includes should probably go into imports.h or glheader.h */
+#if defined(USE_SSE_ASM) && defined(__linux__)
+#include <linux/version.h>
+#endif
+#if defined(USE_SSE_ASM) && defined(__FreeBSD__)
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#endif
+#if defined(USE_SSE_ASM) && defined(__OpenBSD__)
+#include <sys/param.h>
+#include <sys/sysctl.h>
+#include <machine/cpu.h>
+#endif
+#include "main/imports.h"
+#include "common_x86_asm.h"
+/** Bitmask of X86_FEATURE_x bits */
+int _mesa_x86_cpu_features = 0x0;
+static int detection_debug = GL_FALSE;
+/* No reason for this to be public.
+ */
+extern GLuint   _ASMAPI _mesa_x86_has_cpuid(void);
+extern void     _ASMAPI _mesa_x86_cpuid(GLuint op, GLuint *reg_eax, GLuint *reg_ebx, GLuint *reg_ecx, GLuint *reg_edx);
+extern GLuint   _ASMAPI _mesa_x86_cpuid_eax(GLuint op);
+extern GLuint   _ASMAPI _mesa_x86_cpuid_ebx(GLuint op);
+extern GLuint   _ASMAPI _mesa_x86_cpuid_ecx(GLuint op);
+extern GLuint   _ASMAPI _mesa_x86_cpuid_edx(GLuint op);
+#if defined(USE_SSE_ASM)
+/*
+ * We must verify that the Streaming SIMD Extensions are truly supported
+ * on this processor before we go ahead and hook out the optimized code.
+ *
+ * However, I have been told by Alan Cox that all 2.4 (and later) Linux
+ * kernels provide full SSE support on all processors that expose SSE via
+ * the CPUID mechanism.
+ */
+/* These are assembly functions: */
+extern void _mesa_test_os_sse_support( void );
+extern void _mesa_test_os_sse_exception_support( void );
+#if defined(_WIN32)
+#ifndef STATUS_FLOAT_MULTIPLE_TRAPS
+# define STATUS_FLOAT_MULTIPLE_TRAPS (0xC00002B5L)
+#endif
+static LONG WINAPI ExceptionFilter(LPEXCEPTION_POINTERS exp)
+{
+   PEXCEPTION_RECORD rec = exp->ExceptionRecord;
+   PCONTEXT ctx = exp->ContextRecord;
+   if ( rec->ExceptionCode == EXCEPTION_ILLEGAL_INSTRUCTION ) {
+      _mesa_debug(NULL, "EXCEPTION_ILLEGAL_INSTRUCTION\n" );
+      _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
+   } else if ( rec->ExceptionCode == STATUS_FLOAT_MULTIPLE_TRAPS ) {
+      _mesa_debug(NULL, "STATUS_FLOAT_MULTIPLE_TRAPS\n");
+      /* Windows seems to clear the exception flag itself, we just have to increment Eip */
+   } else {
+      _mesa_debug(NULL, "UNEXPECTED EXCEPTION (0x%08x), terminating!\n" );
+      return EXCEPTION_EXECUTE_HANDLER;
+   }
+   if ( (ctx->ContextFlags & CONTEXT_CONTROL) != CONTEXT_CONTROL ) {
+      _mesa_debug(NULL, "Context does not contain control registers, terminating!\n");
+      return EXCEPTION_EXECUTE_HANDLER;
+   }
+   ctx->Eip += 3;
+   return EXCEPTION_CONTINUE_EXECUTION;
+}
+#endif /* _WIN32 */
+/**
+ * Check if SSE is supported.
+ * If not, turn off the X86_FEATURE_XMM flag in _mesa_x86_cpu_features.
+ */
+void _mesa_check_os_sse_support( void )
+{
+#if defined(__FreeBSD__)
+   {
+      int ret, enabled;
+      unsigned int len;
+      len = sizeof(enabled);
+      ret = sysctlbyname("hw.instruction_sse", &enabled, &len, NULL, 0);
+      if (ret || !enabled)
+         _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
+   }
+#elif defined (__NetBSD__)
+   {
+      int ret, enabled;
+      size_t len = sizeof(enabled);
+      ret = sysctlbyname("machdep.sse", &enabled, &len, (void *)NULL, 0);
+      if (ret || !enabled)
+         _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
+   }
+#elif defined(__OpenBSD__)
+   {
+      int mib[2];
+      int ret, enabled;
+      size_t len = sizeof(enabled);
+      mib[0] = CTL_MACHDEP;
+      mib[1] = CPU_SSE;
+      ret = sysctl(mib, 2, &enabled, &len, NULL, 0);
+      if (ret || !enabled)
+         _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
+   }
+#elif defined(_WIN32)
+   LPTOP_LEVEL_EXCEPTION_FILTER oldFilter;
+   /* Install our ExceptionFilter */
+   oldFilter = SetUnhandledExceptionFilter( ExceptionFilter );
+   if ( cpu_has_xmm ) {
+      _mesa_debug(NULL, "Testing OS support for SSE...\n");
+      _mesa_test_os_sse_support();
+      if ( cpu_has_xmm ) {
+         _mesa_debug(NULL, "Yes.\n");
+      } else {
+         _mesa_debug(NULL, "No!\n");
+      }
+   }
+   if ( cpu_has_xmm ) {
+      _mesa_debug(NULL, "Testing OS support for SSE unmasked exceptions...\n");
+      _mesa_test_os_sse_exception_support();
+      if ( cpu_has_xmm ) {
+         _mesa_debug(NULL, "Yes.\n");
+      } else {
+         _mesa_debug(NULL, "No!\n");
+      }
+   }
+   /* Restore previous exception filter */
+   SetUnhandledExceptionFilter( oldFilter );
+   if ( cpu_has_xmm ) {
+      _mesa_debug(NULL, "Tests of OS support for SSE passed.\n");
+   } else {
+      _mesa_debug(NULL, "Tests of OS support for SSE failed!\n");
+   }
+#else
+   /* Do nothing on other platforms for now.
+    */
+   if (detection_debug)
+      _mesa_debug(NULL, "Not testing OS support for SSE, leaving enabled.\n");
+#endif /* __FreeBSD__ */
+}
+#endif /* USE_SSE_ASM */
+/**
+ * Initialize the _mesa_x86_cpu_features bitfield.
+ * This is a no-op if called more than once.
+ */
+void
+_mesa_get_x86_features(void)
+{
+   static int called = 0;
+   if (called)
+      return;
+   called = 1;
+#ifdef USE_X86_ASM
+   _mesa_x86_cpu_features = 0x0;
+   if (_mesa_getenv( "MESA_NO_ASM")) {
+      return;
+   }
+   if (!_mesa_x86_has_cpuid()) {
+       _mesa_debug(NULL, "CPUID not detected\n");
+   }
+   else {
+       GLuint cpu_features;
+       GLuint cpu_ext_features;
+       GLuint cpu_ext_info;
+       char cpu_vendor[13];
+       GLuint result;
+       /* get vendor name */
+       _mesa_x86_cpuid(0, &result, (GLuint *)(cpu_vendor + 0), (GLuint *)(cpu_vendor + 8), (GLuint *)(cpu_vendor + 4));
+       cpu_vendor[12] = '\0';
+       if (detection_debug)
+          _mesa_debug(NULL, "CPU vendor: %s\n", cpu_vendor);
+       /* get cpu features */
+       cpu_features = _mesa_x86_cpuid_edx(1);
+       if (cpu_features & X86_CPU_FPU)
+           _mesa_x86_cpu_features |= X86_FEATURE_FPU;
+       if (cpu_features & X86_CPU_CMOV)
+           _mesa_x86_cpu_features |= X86_FEATURE_CMOV;
+#ifdef USE_MMX_ASM
+       if (cpu_features & X86_CPU_MMX)
+           _mesa_x86_cpu_features |= X86_FEATURE_MMX;
+#endif
+#ifdef USE_SSE_ASM
+       if (cpu_features & X86_CPU_XMM)
+           _mesa_x86_cpu_features |= X86_FEATURE_XMM;
+       if (cpu_features & X86_CPU_XMM2)
+           _mesa_x86_cpu_features |= X86_FEATURE_XMM2;
+#endif
+       /* query extended cpu features */
+       if ((cpu_ext_info = _mesa_x86_cpuid_eax(0x80000000)) > 0x80000000) {
+           if (cpu_ext_info >= 0x80000001) {
+               cpu_ext_features = _mesa_x86_cpuid_edx(0x80000001);
+               if (cpu_features & X86_CPU_MMX) {
+#ifdef USE_3DNOW_ASM
+                   if (cpu_ext_features & X86_CPUEXT_3DNOW)
+                       _mesa_x86_cpu_features |= X86_FEATURE_3DNOW;
+                   if (cpu_ext_features & X86_CPUEXT_3DNOW_EXT)
+                       _mesa_x86_cpu_features |= X86_FEATURE_3DNOWEXT;
+#endif
+#ifdef USE_MMX_ASM
+                   if (cpu_ext_features & X86_CPUEXT_MMX_EXT)
+                       _mesa_x86_cpu_features |= X86_FEATURE_MMXEXT;
+#endif
+               }
+           }
+           /* query cpu name */
+           if (cpu_ext_info >= 0x80000002) {
+               GLuint ofs;
+               char cpu_name[49];
+               for (ofs = 0; ofs < 3; ofs++)
+                   _mesa_x86_cpuid(0x80000002+ofs, (GLuint *)(cpu_name + (16*ofs)+0), (GLuint *)(cpu_name + (16*ofs)+4), (GLuint *)(cpu_name + (16*ofs)+8), (GLuint *)(cpu_name + (16*ofs)+12));
+               cpu_name[48] = '\0'; /* the name should be NULL terminated, but just to be sure */
+               if (detection_debug)
+                  _mesa_debug(NULL, "CPU name: %s\n", cpu_name);
+           }
+       }
+   }
+#ifdef USE_MMX_ASM
+   if ( cpu_has_mmx ) {
+      if ( _mesa_getenv( "MESA_NO_MMX" ) == 0 ) {
+         if (detection_debug)
+            _mesa_debug(NULL, "MMX cpu detected.\n");
+      } else {
+         _mesa_x86_cpu_features &= ~(X86_FEATURE_MMX);
+      }
+   }
+#endif
+#ifdef USE_3DNOW_ASM
+   if ( cpu_has_3dnow ) {
+      if ( _mesa_getenv( "MESA_NO_3DNOW" ) == 0 ) {
+         if (detection_debug)
+            _mesa_debug(NULL, "3DNow! cpu detected.\n");
+      } else {
+         _mesa_x86_cpu_features &= ~(X86_FEATURE_3DNOW);
+      }
+   }
+#endif
+#ifdef USE_SSE_ASM
+   if ( cpu_has_xmm ) {
+      if ( _mesa_getenv( "MESA_NO_SSE" ) == 0 ) {
+         if (detection_debug)
+            _mesa_debug(NULL, "SSE cpu detected.\n");
+         if ( _mesa_getenv( "MESA_FORCE_SSE" ) == 0 ) {
+            _mesa_check_os_sse_support();
+         }
+      } else {
+         _mesa_debug(NULL, "SSE cpu detected, but switched off by user.\n");
+         _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
+      }
+   }
+#endif
+#endif /* USE_X86_ASM */
+   (void) detection_debug;
+}

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/common_x86_asm.S
 ,0 → 1,220
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/*
+ * Check extended CPU capabilities.  Now justs returns the raw CPUID
+ * feature information, allowing the higher level code to interpret the
+ * results.
+ *
+ * Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
+ *
+ * Cleaned up and simplified by Gareth Hughes <gareth@valinux.com>
+ *
+ */
+/*
+ * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
+ * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
+ * in there will break the build on some platforms.
+ */
+#include "matypes.h"
+#include "assyntax.h"
+#include "common_x86_features.h"
+        SEG_TEXT
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_x86_has_cpuid)
+HIDDEN(_mesa_x86_has_cpuid)
+GLNAME(_mesa_x86_has_cpuid):
+        /* Test for the CPUID command.  If the ID Flag bit in EFLAGS
+         * (bit 21) is writable, the CPUID command is present */
+        PUSHF_L
+        POP_L   (EAX)
+        MOV_L   (EAX, ECX)
+        XOR_L   (CONST(0x00200000), EAX)
+        PUSH_L  (EAX)
+        POPF_L
+        PUSHF_L
+        POP_L   (EAX)
+        /* Verify the ID Flag bit has been written. */
+        CMP_L   (ECX, EAX)
+        SETNE   (AL)
+        XOR_L   (CONST(0xff), EAX)
+        RET
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_x86_cpuid)
+HIDDEN(_mesa_x86_cpuid)
+GLNAME(_mesa_x86_cpuid):
+        MOV_L   (REGOFF(4, ESP), EAX)           /* cpuid op */
+        PUSH_L  (EDI)
+        PUSH_L  (EBX)
+        CPUID
+        MOV_L   (REGOFF(16, ESP), EDI)  /* *eax */
+        MOV_L   (EAX, REGIND(EDI))
+        MOV_L   (REGOFF(20, ESP), EDI)  /* *ebx */
+        MOV_L   (EBX, REGIND(EDI))
+        MOV_L   (REGOFF(24, ESP), EDI)  /* *ecx */
+        MOV_L   (ECX, REGIND(EDI))
+        MOV_L   (REGOFF(28, ESP), EDI)  /* *edx */
+        MOV_L   (EDX, REGIND(EDI))
+        POP_L   (EBX)
+        POP_L   (EDI)
+        RET
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_x86_cpuid_eax)
+HIDDEN(_mesa_x86_cpuid_eax)
+GLNAME(_mesa_x86_cpuid_eax):
+        MOV_L   (REGOFF(4, ESP), EAX)           /* cpuid op */
+        PUSH_L  (EBX)
+        CPUID
+        POP_L   (EBX)
+        RET
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_x86_cpuid_ebx)
+HIDDEN(_mesa_x86_cpuid_ebx)
+GLNAME(_mesa_x86_cpuid_ebx):
+        MOV_L   (REGOFF(4, ESP), EAX)           /* cpuid op */
+        PUSH_L  (EBX)
+        CPUID
+        MOV_L   (EBX, EAX)                      /* return EBX */
+        POP_L   (EBX)
+        RET
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_x86_cpuid_ecx)
+HIDDEN(_mesa_x86_cpuid_ecx)
+GLNAME(_mesa_x86_cpuid_ecx):
+        MOV_L   (REGOFF(4, ESP), EAX)           /* cpuid op */
+        PUSH_L  (EBX)
+        CPUID
+        MOV_L   (ECX, EAX)                      /* return ECX */
+        POP_L   (EBX)
+        RET
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_x86_cpuid_edx)
+HIDDEN(_mesa_x86_cpuid_edx)
+GLNAME(_mesa_x86_cpuid_edx):
+        MOV_L   (REGOFF(4, ESP), EAX)           /* cpuid op */
+        PUSH_L  (EBX)
+        CPUID
+        MOV_L   (EDX, EAX)                      /* return EDX */
+        POP_L   (EBX)
+        RET
+#ifdef USE_SSE_ASM
+/* Execute an SSE instruction to see if the operating system correctly
+ * supports SSE.  A signal handler for SIGILL should have been set
+ * before calling this function, otherwise this could kill the client
+ * application.
+ *
+ *        -----> !!!! ATTENTION DEVELOPERS !!!! <-----
+ *
+ * If you're debugging with gdb and you get stopped in this function,
+ * just type 'continue'!  Execution will proceed normally.
+ * See freedesktop.org bug #1709 for more info.
+ */
+ALIGNTEXT4
+GLOBL GLNAME( _mesa_test_os_sse_support )
+HIDDEN(_mesa_test_os_sse_support)
+GLNAME( _mesa_test_os_sse_support ):
+        XORPS   ( XMM0, XMM0 )
+        RET
+/* Perform an SSE divide-by-zero to see if the operating system
+ * correctly supports unmasked SIMD FPU exceptions.  Signal handlers for
+ * SIGILL and SIGFPE should have been set before calling this function,
+ * otherwise this could kill the client application.
+ */
+ALIGNTEXT4
+GLOBL GLNAME( _mesa_test_os_sse_exception_support )
+HIDDEN(_mesa_test_os_sse_exception_support)
+GLNAME( _mesa_test_os_sse_exception_support ):
+        PUSH_L  ( EBP )
+        MOV_L   ( ESP, EBP )
+        SUB_L   ( CONST( 8 ), ESP )
+        /* Save the original MXCSR register value.
+         */
+        STMXCSR ( REGOFF( -4, EBP ) )
+        /* Unmask the divide-by-zero exception and perform one.
+         */
+        STMXCSR ( REGOFF( -8, EBP ) )
+        AND_L   ( CONST( 0xfffffdff ), REGOFF( -8, EBP ) )
+        LDMXCSR ( REGOFF( -8, EBP ) )
+        XORPS   ( XMM0, XMM0 )
+        PUSH_L  ( CONST( 0x3f800000 ) )
+        PUSH_L  ( CONST( 0x3f800000 ) )
+        PUSH_L  ( CONST( 0x3f800000 ) )
+        PUSH_L  ( CONST( 0x3f800000 ) )
+        MOVUPS  ( REGIND( ESP ), XMM1 )
+        DIVPS   ( XMM0, XMM1 )
+        /* Restore the original MXCSR register value.
+         */
+        LDMXCSR ( REGOFF( -4, EBP ) )
+        LEAVE
+        RET
+#endif
+#if defined (__ELF__) && defined (__linux__)
+        .section .note.GNU-stack,"",%progbits
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/common_x86_asm.h
 ,0 → 1,53
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/*
+ * Check CPU capabilities & initialize optimized funtions for this particular
+ * processor.
+ *
+ * Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
+ * Changed by Andre Werthmann <wertmann@cs.uni-potsdam.de> for using the
+ * new SSE functions
+ *
+ * Reimplemented by Gareth Hughes in a more
+ * future-proof manner, based on code in the Linux kernel.
+ */
+#ifndef __COMMON_X86_ASM_H__
+#define __COMMON_X86_ASM_H__
+/* Do not reference mtypes.h from this file.
+ */
+#include "common_x86_features.h"
+extern int _mesa_x86_cpu_features;
+extern void _mesa_get_x86_features(void);
+extern void _mesa_check_os_sse_support(void);
+extern void _mesa_init_all_x86_transform_asm( void );
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/common_x86_features.h
 ,0 → 1,67
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/*
+ * x86 CPUID feature information.  The raw data is returned by
+ * _mesa_identify_x86_cpu_features() and interpreted with the cpu_has_*
+ * helper macros.
+ *
+ * Gareth Hughes
+ */
+#ifndef __COMMON_X86_FEATURES_H__
+#define __COMMON_X86_FEATURES_H__
+#define X86_FEATURE_FPU         (1<<0)
+#define X86_FEATURE_CMOV        (1<<1)
+#define X86_FEATURE_MMXEXT      (1<<2)
+#define X86_FEATURE_MMX         (1<<3)
+#define X86_FEATURE_FXSR        (1<<4)
+#define X86_FEATURE_XMM         (1<<5)
+#define X86_FEATURE_XMM2        (1<<6)
+#define X86_FEATURE_3DNOWEXT    (1<<7)
+#define X86_FEATURE_3DNOW       (1<<8)
+/* standard X86 CPU features */
+#define X86_CPU_FPU             (1<<0)
+#define X86_CPU_CMOV            (1<<15)
+#define X86_CPU_MMX             (1<<23)
+#define X86_CPU_XMM             (1<<25)
+#define X86_CPU_XMM2            (1<<26)
+/* extended X86 CPU features */
+#define X86_CPUEXT_MMX_EXT      (1<<22)
+#define X86_CPUEXT_3DNOW_EXT    (1<<30)
+#define X86_CPUEXT_3DNOW        (1<<31)
+#define cpu_has_mmx             (_mesa_x86_cpu_features & X86_FEATURE_MMX)
+#define cpu_has_mmxext          (_mesa_x86_cpu_features & X86_FEATURE_MMXEXT)
+#define cpu_has_xmm             (_mesa_x86_cpu_features & X86_FEATURE_XMM)
+#define cpu_has_xmm2            (_mesa_x86_cpu_features & X86_FEATURE_XMM2)
+#define cpu_has_3dnow           (_mesa_x86_cpu_features & X86_FEATURE_3DNOW)
+#define cpu_has_3dnowext        (_mesa_x86_cpu_features & X86_FEATURE_3DNOWEXT)
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/gen_matypes.c
 ,0 → 1,263
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Gareth Hughes
+ */
+/*
+ * This generates an asm version of mtypes.h (called matypes.h), so that
+ * Mesa's x86 assembly code can access the internal structures easily.
+ * This will be particularly useful when developing new x86 asm code for
+ * Mesa, including lighting, clipping, texture image conversion etc.
+ */
+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS
+#endif
+#include <inttypes.h>
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "tnl/t_context.h"
+#undef offsetof
+#define offsetof( type, member ) ((size_t) &((type *)0)->member)
+#define OFFSET_HEADER( x )                                              \
+do {                                                                    \
+   printf( "\n" );                                                      \
+   printf( "\n" );                                                      \
+   printf( "/* ====================================================="   \
+           "========\n" );                                              \
+   printf( " * Offsets for " x "\n" );                                  \
+   printf( " */\n" );                                                   \
+   printf( "\n" );                                                      \
+} while (0)
+#define DEFINE_HEADER( x )                                              \
+do {                                                                    \
+   printf( "\n" );                                                      \
+   printf( "/*\n" );                                                    \
+   printf( " * Flags for " x "\n" );                                    \
+   printf( " */\n" );                                                   \
+   printf( "\n" );                                                      \
+} while (0)
+#ifdef ASM_OFFSETS
+/*
+ * Format the asm output in a special way that we can manipulate
+ * after the fact and turn into the final header for the target.
+ */
+#define DEFINE_UL( s, ul )                                              \
+   __asm__ __volatile__ ( "\n->" s " %0" : : "i" (ul) )
+#define DEFINE( s, d )                                                  \
+   DEFINE_UL( s, d )
+#define printf( x )                                                     \
+   __asm__ __volatile__ ( "\n->" x )
+#else
+#define DEFINE_UL( s, ul )                                              \
+   printf( "#define %s\t%lu\n", s, (unsigned long) (ul) );
+#define DEFINE( s, d )                                                  \
+   printf( "#define %s\t0x%" PRIx64 "\n", s, (uint64_t) d );
+#endif
+#define OFFSET( s, t, m )                                               \
+   DEFINE_UL( s, offsetof( t, m ) )
+#define SIZEOF( s, t )                                                  \
+   DEFINE_UL( s, sizeof(t) )
+int main( int argc, char **argv )
+{
+   printf( "/*\n" );
+   printf( " * This file is automatically generated from the Mesa internal type\n" );
+   printf( " * definitions.  Do not edit directly.\n" );
+   printf( " */\n" );
+   printf( "\n" );
+   printf( "#ifndef __ASM_TYPES_H__\n" );
+   printf( "#define __ASM_TYPES_H__\n" );
+   printf( "\n" );
+   /* struct gl_context offsets:
+    */
+   OFFSET_HEADER( "struct gl_context" );
+   printf( "\n" );
+   OFFSET( "CTX_LIGHT_ENABLED           ", struct gl_context, Light.Enabled );
+   OFFSET( "CTX_LIGHT_SHADE_MODEL       ", struct gl_context, Light.ShadeModel );
+   OFFSET( "CTX_LIGHT_COLOR_MAT_FACE    ", struct gl_context, Light.ColorMaterialFace );
+   OFFSET( "CTX_LIGHT_COLOR_MAT_MODE    ", struct gl_context, Light.ColorMaterialMode );
+   OFFSET( "CTX_LIGHT_COLOR_MAT_MASK    ", struct gl_context, Light._ColorMaterialBitmask );
+   OFFSET( "CTX_LIGHT_COLOR_MAT_ENABLED ", struct gl_context, Light.ColorMaterialEnabled );
+   OFFSET( "CTX_LIGHT_ENABLED_LIST      ", struct gl_context, Light.EnabledList );
+   OFFSET( "CTX_LIGHT_NEED_VERTS        ", struct gl_context, Light._NeedVertices );
+   OFFSET( "CTX_LIGHT_BASE_COLOR        ", struct gl_context, Light._BaseColor );
+   /* struct vertex_buffer offsets:
+    */
+   OFFSET_HEADER( "struct vertex_buffer" );
+   OFFSET( "VB_SIZE                ", struct vertex_buffer, Size );
+   OFFSET( "VB_COUNT               ", struct vertex_buffer, Count );
+   printf( "\n" );
+   OFFSET( "VB_ELTS                ", struct vertex_buffer, Elts );
+   OFFSET( "VB_OBJ_PTR             ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_POS] );
+   OFFSET( "VB_EYE_PTR             ", struct vertex_buffer, EyePtr );
+   OFFSET( "VB_CLIP_PTR            ", struct vertex_buffer, ClipPtr );
+   OFFSET( "VB_PROJ_CLIP_PTR       ", struct vertex_buffer, NdcPtr );
+   OFFSET( "VB_CLIP_OR_MASK        ", struct vertex_buffer, ClipOrMask );
+   OFFSET( "VB_CLIP_MASK           ", struct vertex_buffer, ClipMask );
+   OFFSET( "VB_NORMAL_PTR          ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_NORMAL] );
+   OFFSET( "VB_EDGE_FLAG           ", struct vertex_buffer, EdgeFlag );
+   OFFSET( "VB_TEX0_COORD_PTR      ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_TEX0] );
+   OFFSET( "VB_TEX1_COORD_PTR      ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_TEX1] );
+   OFFSET( "VB_TEX2_COORD_PTR      ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_TEX2] );
+   OFFSET( "VB_TEX3_COORD_PTR      ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_TEX3] );
+   OFFSET( "VB_INDEX_PTR           ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_COLOR_INDEX] );
+   OFFSET( "VB_COLOR_PTR           ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_COLOR0] );
+   OFFSET( "VB_SECONDARY_COLOR_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_COLOR1] );
+   OFFSET( "VB_FOG_COORD_PTR       ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_FOG] );
+   OFFSET( "VB_PRIMITIVE           ", struct vertex_buffer, Primitive );
+   printf( "\n" );
+   DEFINE_HEADER( "struct vertex_buffer" );
+   /* XXX use new labels here someday after vertex proram is done */
+   DEFINE( "VERT_BIT_OBJ           ", VERT_BIT_POS );
+   DEFINE( "VERT_BIT_NORM          ", VERT_BIT_NORMAL );
+   DEFINE( "VERT_BIT_RGBA          ", VERT_BIT_COLOR0 );
+   DEFINE( "VERT_BIT_SPEC_RGB      ", VERT_BIT_COLOR1 );
+   DEFINE( "VERT_BIT_FOG_COORD     ", VERT_BIT_FOG );
+   DEFINE( "VERT_BIT_TEX0          ", VERT_BIT_TEX0 );
+   DEFINE( "VERT_BIT_TEX1          ", VERT_BIT_TEX1 );
+   DEFINE( "VERT_BIT_TEX2          ", VERT_BIT_TEX2 );
+   DEFINE( "VERT_BIT_TEX3          ", VERT_BIT_TEX3 );
+   /* GLvector4f offsets:
+    */
+   OFFSET_HEADER( "GLvector4f" );
+   OFFSET( "V4F_DATA          ", GLvector4f, data );
+   OFFSET( "V4F_START         ", GLvector4f, start );
+   OFFSET( "V4F_COUNT         ", GLvector4f, count );
+   OFFSET( "V4F_STRIDE        ", GLvector4f, stride );
+   OFFSET( "V4F_SIZE          ", GLvector4f, size );
+   OFFSET( "V4F_FLAGS         ", GLvector4f, flags );
+   DEFINE_HEADER( "GLvector4f" );
+   DEFINE( "VEC_MALLOC        ", VEC_MALLOC );
+   DEFINE( "VEC_NOT_WRITEABLE ", VEC_NOT_WRITEABLE );
+   DEFINE( "VEC_BAD_STRIDE    ", VEC_BAD_STRIDE );
+   printf( "\n" );
+   DEFINE( "VEC_SIZE_1        ", VEC_SIZE_1 );
+   DEFINE( "VEC_SIZE_2        ", VEC_SIZE_2 );
+   DEFINE( "VEC_SIZE_3        ", VEC_SIZE_3 );
+   DEFINE( "VEC_SIZE_4        ", VEC_SIZE_4 );
+   /* GLmatrix offsets:
+    */
+   OFFSET_HEADER( "GLmatrix" );
+   OFFSET( "MATRIX_DATA   ", GLmatrix, m );
+   OFFSET( "MATRIX_INV    ", GLmatrix, inv );
+   OFFSET( "MATRIX_FLAGS  ", GLmatrix, flags );
+   OFFSET( "MATRIX_TYPE   ", GLmatrix, type );
+   /* struct gl_light offsets:
+    */
+   OFFSET_HEADER( "struct gl_light" );
+   OFFSET( "LIGHT_NEXT              ", struct gl_light, next );
+   OFFSET( "LIGHT_PREV              ", struct gl_light, prev );
+   printf( "\n" );
+   OFFSET( "LIGHT_AMBIENT           ", struct gl_light, Ambient );
+   OFFSET( "LIGHT_DIFFUSE           ", struct gl_light, Diffuse );
+   OFFSET( "LIGHT_SPECULAR          ", struct gl_light, Specular );
+   OFFSET( "LIGHT_EYE_POSITION      ", struct gl_light, EyePosition );
+   OFFSET( "LIGHT_SPOT_DIRECTION    ", struct gl_light, SpotDirection );
+   OFFSET( "LIGHT_SPOT_EXPONENT     ", struct gl_light, SpotExponent );
+   OFFSET( "LIGHT_SPOT_CUTOFF       ", struct gl_light, SpotCutoff );
+   OFFSET( "LIGHT_COS_CUTOFF        ", struct gl_light, _CosCutoff );
+   OFFSET( "LIGHT_CONST_ATTEN       ", struct gl_light, ConstantAttenuation );
+   OFFSET( "LIGHT_LINEAR_ATTEN      ", struct gl_light, LinearAttenuation );
+   OFFSET( "LIGHT_QUADRATIC_ATTEN   ", struct gl_light, QuadraticAttenuation );
+   OFFSET( "LIGHT_ENABLED           ", struct gl_light, Enabled );
+   printf( "\n" );
+   OFFSET( "LIGHT_FLAGS             ", struct gl_light, _Flags );
+   printf( "\n" );
+   OFFSET( "LIGHT_POSITION          ", struct gl_light, _Position );
+   OFFSET( "LIGHT_VP_INF_NORM       ", struct gl_light, _VP_inf_norm );
+   OFFSET( "LIGHT_H_INF_NORM        ", struct gl_light, _h_inf_norm );
+   OFFSET( "LIGHT_NORM_DIRECTION    ", struct gl_light, _NormSpotDirection );
+   OFFSET( "LIGHT_VP_INF_SPOT_ATTEN ", struct gl_light, _VP_inf_spot_attenuation );
+   printf( "\n" );
+   OFFSET( "LIGHT_MAT_AMBIENT       ", struct gl_light, _MatAmbient );
+   OFFSET( "LIGHT_MAT_DIFFUSE       ", struct gl_light, _MatDiffuse );
+   OFFSET( "LIGHT_MAT_SPECULAR      ", struct gl_light, _MatSpecular );
+   printf( "\n" );
+   SIZEOF( "SIZEOF_GL_LIGHT         ", struct gl_light );
+   DEFINE_HEADER( "struct gl_light" );
+   DEFINE( "LIGHT_SPOT              ", LIGHT_SPOT );
+   DEFINE( "LIGHT_LOCAL_VIEWER      ", LIGHT_LOCAL_VIEWER );
+   DEFINE( "LIGHT_POSITIONAL        ", LIGHT_POSITIONAL );
+   printf( "\n" );
+   DEFINE( "LIGHT_NEED_VERTICES     ", LIGHT_NEED_VERTICES );
+   /* struct gl_lightmodel offsets:
+    */
+   OFFSET_HEADER( "struct gl_lightmodel" );
+   OFFSET( "LIGHT_MODEL_AMBIENT       ", struct gl_lightmodel, Ambient );
+   OFFSET( "LIGHT_MODEL_LOCAL_VIEWER  ", struct gl_lightmodel, LocalViewer );
+   OFFSET( "LIGHT_MODEL_TWO_SIDE      ", struct gl_lightmodel, TwoSide );
+   OFFSET( "LIGHT_MODEL_COLOR_CONTROL ", struct gl_lightmodel, ColorControl );
+   printf( "\n" );
+   printf( "\n" );
+   printf( "#endif /* __ASM_TYPES_H__ */\n" );
+   return 0;
+}

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/matypes.h
 ,0 → 1,159
+/*
+ * This file is automatically generated from the Mesa internal type
+ * definitions.  Do not edit directly.
+ */
+#ifndef __ASM_TYPES_H__
+#define __ASM_TYPES_H__
+/* =============================================================
+ * Offsets for struct gl_context
+ */
+#define CTX_LIGHT_ENABLED               7116
+#define CTX_LIGHT_SHADE_MODEL           7120
+#define CTX_LIGHT_COLOR_MAT_FACE        7128
+#define CTX_LIGHT_COLOR_MAT_MODE        7132
+#define CTX_LIGHT_COLOR_MAT_MASK        7136
+#define CTX_LIGHT_COLOR_MAT_ENABLED     7140
+#define CTX_LIGHT_ENABLED_LIST          7152
+#define CTX_LIGHT_NEED_VERTS            7405
+#define CTX_LIGHT_BASE_COLOR            7408
+/* =============================================================
+ * Offsets for struct vertex_buffer
+ */
+#define VB_SIZE                 0
+#define VB_COUNT                4
+#define VB_ELTS                 8
+#define VB_OBJ_PTR              60
+#define VB_EYE_PTR              12
+#define VB_CLIP_PTR             16
+#define VB_PROJ_CLIP_PTR        20
+#define VB_CLIP_OR_MASK         24
+#define VB_CLIP_MASK            28
+#define VB_NORMAL_PTR           68
+#define VB_EDGE_FLAG            36
+#define VB_TEX0_COORD_PTR       92
+#define VB_TEX1_COORD_PTR       96
+#define VB_TEX2_COORD_PTR       100
+#define VB_TEX3_COORD_PTR       104
+#define VB_INDEX_PTR            84
+#define VB_COLOR_PTR            72
+#define VB_SECONDARY_COLOR_PTR  76
+#define VB_FOG_COORD_PTR        80
+#define VB_PRIMITIVE            52
+/*
+ * Flags for struct vertex_buffer
+ */
+#define VERT_BIT_OBJ            0x1
+#define VERT_BIT_NORM           0x4
+#define VERT_BIT_RGBA           0x8
+#define VERT_BIT_SPEC_RGB       0x10
+#define VERT_BIT_FOG_COORD      0x20
+#define VERT_BIT_TEX0           0x100
+#define VERT_BIT_TEX1           0x200
+#define VERT_BIT_TEX2           0x400
+#define VERT_BIT_TEX3           0x800
+/* =============================================================
+ * Offsets for GLvector4f
+ */
+#define V4F_DATA                0
+#define V4F_START               4
+#define V4F_COUNT               8
+#define V4F_STRIDE              12
+#define V4F_SIZE                16
+#define V4F_FLAGS               20
+/*
+ * Flags for GLvector4f
+ */
+#define VEC_MALLOC              0x10
+#define VEC_NOT_WRITEABLE       0x40
+#define VEC_BAD_STRIDE          0x100
+#define VEC_SIZE_1              0x1
+#define VEC_SIZE_2              0x3
+#define VEC_SIZE_3              0x7
+#define VEC_SIZE_4              0xf
+/* =============================================================
+ * Offsets for GLmatrix
+ */
+#define MATRIX_DATA     0
+#define MATRIX_INV      4
+#define MATRIX_FLAGS    8
+#define MATRIX_TYPE     12
+/* =============================================================
+ * Offsets for struct gl_light
+ */
+#define LIGHT_NEXT                      0
+#define LIGHT_PREV                      4
+#define LIGHT_AMBIENT                   8
+#define LIGHT_DIFFUSE                   24
+#define LIGHT_SPECULAR                  40
+#define LIGHT_EYE_POSITION              56
+#define LIGHT_SPOT_DIRECTION            72
+#define LIGHT_SPOT_EXPONENT             88
+#define LIGHT_SPOT_CUTOFF               92
+#define LIGHT_COS_CUTOFF                96
+#define LIGHT_CONST_ATTEN               100
+#define LIGHT_LINEAR_ATTEN              104
+#define LIGHT_QUADRATIC_ATTEN           108
+#define LIGHT_ENABLED                   112
+#define LIGHT_FLAGS                     116
+#define LIGHT_POSITION                  120
+#define LIGHT_VP_INF_NORM               136
+#define LIGHT_H_INF_NORM                148
+#define LIGHT_NORM_DIRECTION            160
+#define LIGHT_VP_INF_SPOT_ATTEN         176
+#define LIGHT_MAT_AMBIENT               180
+#define LIGHT_MAT_DIFFUSE               204
+#define LIGHT_MAT_SPECULAR              228
+#define SIZEOF_GL_LIGHT                 252
+/*
+ * Flags for struct gl_light
+ */
+#define LIGHT_SPOT                      0x1
+#define LIGHT_LOCAL_VIEWER              0x2
+#define LIGHT_POSITIONAL                0x4
+#define LIGHT_NEED_VERTICES             0x6
+/* =============================================================
+ * Offsets for struct gl_lightmodel
+ */
+#define LIGHT_MODEL_AMBIENT             0
+#define LIGHT_MODEL_LOCAL_VIEWER        16
+#define LIGHT_MODEL_TWO_SIDE            17
+#define LIGHT_MODEL_COLOR_CONTROL       20
+#endif /* __ASM_TYPES_H__ */

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/mmx.h
 ,0 → 1,59
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef ASM_MMX_H
+#define ASM_MMX_H
+#include "main/compiler.h"
+#include "main/glheader.h"
+struct gl_context;
+extern void _ASMAPI
+_mesa_mmx_blend_transparency( struct gl_context *ctx, GLuint n, const GLubyte mask[],
+                              GLvoid *rgba, const GLvoid *dest,
+                              GLenum chanType );
+extern void _ASMAPI
+_mesa_mmx_blend_add( struct gl_context *ctx, GLuint n, const GLubyte mask[],
+                     GLvoid *rgba, const GLvoid *dest,
+                     GLenum chanType );
+extern void _ASMAPI
+_mesa_mmx_blend_min( struct gl_context *ctx, GLuint n, const GLubyte mask[],
+                     GLvoid *rgba, const GLvoid *dest,
+                     GLenum chanType );
+extern void _ASMAPI
+_mesa_mmx_blend_max( struct gl_context *ctx, GLuint n, const GLubyte mask[],
+                     GLvoid *rgba, const GLvoid *dest,
+                     GLenum chanType );
+extern void _ASMAPI
+_mesa_mmx_blend_modulate( struct gl_context *ctx, GLuint n, const GLubyte mask[],
+                          GLvoid *rgba, const GLvoid *dest,
+                          GLenum chanType );
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/mmx_blend.S
 ,0 → 1,402
+        ;
+/*
+ * Written by Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+#ifdef USE_MMX_ASM
+#include "assyntax.h"
+#include "matypes.h"
+/* integer multiplication - alpha plus one
+ *
+ * makes the following approximation to the division (Sree)
+ *
+ *   rgb*a/255 ~= (rgb*(a+1)) >> 256
+ *
+ * which is the fastest method that satisfies the following OpenGL criteria
+ *
+ *   0*0 = 0 and 255*255 = 255
+ *
+ * note that MX1 is a register with 0xffffffffffffffff constant which can be easily obtained making
+ *
+ *   PCMPEQW    ( MX1, MX1 )
+ */
+#define GMB_MULT_AP1( MP1, MA1, MP2, MA2, MX1 ) \
+    PSUBW      ( MX1, MA1 )                     /*   a1 + 1  |   a1 + 1  |   a1 + 1  |   a1 + 1  */     ;\
+    PMULLW     ( MP1, MA1 )                     /*                  t1 = p1*a1                   */     ;\
+                                                                                                        ;\
+TWO(PSUBW      ( MX1, MA2 ))                    /*   a2 + 1  |   a2 + 1  |   a2 + 1  |   a2 + 1  */     ;\
+TWO(PMULLW     ( MP2, MA2 ))                    /*                  t2 = p2*a2                   */     ;\
+                                                                                                        ;\
+    PSRLW      ( CONST(8), MA1 )                /*               t1 >> 8 ~= t1/255               */     ;\
+TWO(PSRLW      ( CONST(8), MA2 ))               /*               t2 >> 8 ~= t2/255               */
+/* integer multiplication - geometric series
+ *
+ * takes the geometric series approximation to the division
+ *
+ *   t/255 = (t >> 8) + (t >> 16) + (t >> 24) ..
+ *
+ * in this case just the first two terms to fit in 16bit arithmetic
+ *
+ *   t/255 ~= (t + (t >> 8)) >> 8
+ *
+ * note that just by itself it doesn't satisfies the OpenGL criteria, as 255*255 = 254,
+ * so the special case a = 255 must be accounted or roundoff must be used
+ */
+#define GMB_MULT_GS( MP1, MA1, MP2, MA2 ) \
+    PMULLW     ( MP1, MA1 )                     /*                  t1 = p1*a1                   */     ;\
+TWO(PMULLW     ( MP2, MA2 ))                    /*                  t2 = p2*a2                   */     ;\
+                                                                                                        ;\
+    MOVQ       ( MA1, MP1 )                                                                             ;\
+    PSRLW      ( CONST(8), MA1 )                /*                    t1 >> 8                    */     ;\
+                                                                                                        ;\
+TWO(MOVQ       ( MA2, MP2 ))                                                                            ;\
+TWO(PSRLW      ( CONST(8), MA2 ))               /*                    t2 >> 8                    */     ;\
+                                                                                                        ;\
+    PADDW      ( MP1, MA1 )                     /*        t1 + (t1 >> 8) ~= (t1/255) << 8        */     ;\
+    PSRLW      ( CONST(8), MA1 )                /*    sa1    |    sb1    |    sg1    |    sr1    */     ;\
+                                                                                                        ;\
+TWO(PADDW      ( MP2, MA2 ))                    /*        t2 + (t2 >> 8) ~= (t2/255) << 8        */     ;\
+TWO(PSRLW      ( CONST(8), MA2 ))               /*    sa2    |    sb2    |    sg2    |    sr2    */
+/* integer multiplication - geometric series plus rounding
+ *
+ * when using a geometric series division instead of truncating the result
+ * use roundoff in the approximation (Jim Blinn)
+ *
+ *   t = rgb*a + 0x80
+ *
+ * achieving the exact results
+ *
+ * note that M80 is register with the 0x0080008000800080 constant
+ */
+#define GMB_MULT_GSR( MP1, MA1, MP2, MA2, M80 ) \
+    PMULLW     ( MP1, MA1 )                     /*                  t1 = p1*a1                   */     ;\
+    PADDW      ( M80, MA1 )                     /*                 t1 += 0x80                    */     ;\
+                                                                                                        ;\
+TWO(PMULLW     ( MP2, MA2 ))                    /*                  t2 = p2*a2                   */     ;\
+TWO(PADDW      ( M80, MA2 ))                    /*                 t2 += 0x80                    */     ;\
+                                                                                                        ;\
+    MOVQ       ( MA1, MP1 )                                                                             ;\
+    PSRLW      ( CONST(8), MA1 )                /*                    t1 >> 8                    */     ;\
+                                                                                                        ;\
+TWO(MOVQ       ( MA2, MP2 ))                                                                            ;\
+TWO(PSRLW      ( CONST(8), MA2 ))               /*                    t2 >> 8                    */     ;\
+                                                                                                        ;\
+    PADDW      ( MP1, MA1 )                     /*        t1 + (t1 >> 8) ~= (t1/255) << 8        */     ;\
+    PSRLW      ( CONST(8), MA1 )                /*    sa1    |    sb1    |    sg1    |    sr1    */     ;\
+                                                                                                        ;\
+TWO(PADDW      ( MP2, MA2 ))                    /*        t2 + (t2 >> 8) ~= (t2/255) << 8        */     ;\
+TWO(PSRLW      ( CONST(8), MA2 ))               /*    sa2    |    sb2    |    sg2    |    sr2    */
+/* linear interpolation - geometric series
+ */
+#define GMB_LERP_GS( MP1, MQ1, MA1, MP2, MQ2, MA2) \
+    PSUBW      ( MQ1, MP1 )                     /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */     ;\
+    PSLLW      ( CONST(8), MQ1 )                /*                    q1 << 8                    */     ;\
+    PMULLW     ( MP1, MA1 )                     /*              t1 = (q1 - p1)*pa1               */     ;\
+                                                                                                        ;\
+TWO(PSUBW      ( MQ2, MP2 ))                    /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */     ;\
+TWO(PSLLW      ( CONST(8), MQ2 ))               /*                    q2 << 8                    */     ;\
+TWO(PMULLW     ( MP2, MA2 ))                    /*              t2 = (q2 - p2)*pa2               */     ;\
+                                                                                                        ;\
+    MOVQ       ( MA1, MP1 )                                                                             ;\
+    PSRLW      ( CONST(8), MA1 )                /*                    t1 >> 8                    */     ;\
+                                                                                                        ;\
+TWO(MOVQ       ( MA2, MP2 ))                                                                            ;\
+TWO(PSRLW      ( CONST(8), MA2 ))               /*                    t2 >> 8                    */     ;\
+                                                                                                        ;\
+    PADDW      ( MP1, MA1 )                     /*        t1 + (t1 >> 8) ~= (t1/255) << 8        */     ;\
+TWO(PADDW      ( MP2, MA2 ))                    /*        t2 + (t2 >> 8) ~= (t2/255) << 8        */     ;\
+                                                                                                        ;\
+    PADDW      ( MQ1, MA1 )                     /*              (t1/255 + q1) << 8               */     ;\
+TWO(PADDW      ( MQ2, MA2 ))                    /*              (t2/255 + q2) << 8               */     ;\
+                                                                                                        ;\
+    PSRLW      ( CONST(8), MA1 )                /*    sa1    |    sb1    |    sg1    |    sr1    */     ;\
+TWO(PSRLW      ( CONST(8), MA2 ))               /*    sa2    |    sb2    |    sg2    |    sr2    */
+/* linear interpolation - geometric series with roundoff
+ *
+ * this is a generalization of Blinn's formula to signed arithmetic
+ *
+ * note that M80 is a register with the 0x0080008000800080 constant
+ */
+#define GMB_LERP_GSR( MP1, MQ1, MA1, MP2, MQ2, MA2, M80) \
+    PSUBW      ( MQ1, MP1 )                     /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */     ;\
+    PSLLW      ( CONST(8), MQ1 )                /*                    q1 << 8                    */     ;\
+    PMULLW     ( MP1, MA1 )                     /*              t1 = (q1 - p1)*pa1               */     ;\
+                                                                                                        ;\
+TWO(PSUBW      ( MQ2, MP2 ))                    /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */     ;\
+TWO(PSLLW      ( CONST(8), MQ2 ))               /*                    q2 << 8                    */     ;\
+TWO(PMULLW     ( MP2, MA2 ))                    /*              t2 = (q2 - p2)*pa2               */     ;\
+                                                                                                        ;\
+    PSRLW      ( CONST(15), MP1 )               /*                 q1 > p1 ? 1 : 0               */     ;\
+TWO(PSRLW      ( CONST(15), MP2 ))              /*                 q2 > q2 ? 1 : 0               */     ;\
+                                                                                                        ;\
+    PSLLW      ( CONST(8), MP1 )                /*             q1 > p1 ? 0x100 : 0               */     ;\
+TWO(PSLLW      ( CONST(8), MP2 ))               /*             q2 > q2 ? 0x100 : 0               */     ;\
+                                                                                                        ;\
+    PSUBW      ( MP1, MA1 )                     /*                  t1 -=? 0x100                 */     ;\
+TWO(PSUBW      ( MP2, MA2 ))                    /*                  t2 -=? 0x100                 */     ;\
+                                                                                                        ;\
+    PADDW      ( M80, MA1 )                     /*                 t1 += 0x80                    */     ;\
+TWO(PADDW      ( M80, MA2 ))                    /*                 t2 += 0x80                    */     ;\
+                                                                                                        ;\
+    MOVQ       ( MA1, MP1 )                                                                             ;\
+    PSRLW      ( CONST(8), MA1 )                /*                    t1 >> 8                    */     ;\
+                                                                                                        ;\
+TWO(MOVQ       ( MA2, MP2 ))                                                                            ;\
+TWO(PSRLW      ( CONST(8), MA2 ))               /*                    t2 >> 8                    */     ;\
+                                                                                                        ;\
+    PADDW      ( MP1, MA1 )                     /*        t1 + (t1 >> 8) ~= (t1/255) << 8        */     ;\
+TWO(PADDW      ( MP2, MA2 ))                    /*        t2 + (t2 >> 8) ~= (t2/255) << 8        */     ;\
+                                                                                                        ;\
+    PADDW      ( MQ1, MA1 )                     /*              (t1/255 + q1) << 8               */     ;\
+TWO(PADDW      ( MQ2, MA2 ))                    /*              (t2/255 + q2) << 8               */     ;\
+                                                                                                        ;\
+    PSRLW      ( CONST(8), MA1 )                /*    sa1    |    sb1    |    sg1    |    sr1    */     ;\
+TWO(PSRLW      ( CONST(8), MA2 ))               /*    sa2    |    sb2    |    sg2    |    sr2    */
+/* linear interpolation - geometric series with correction
+ *
+ * instead of the roundoff this adds a small correction to satisfy the OpenGL criteria
+ *
+ *   t/255 ~= (t + (t >> 8) + (t >> 15)) >> 8
+ *
+ * note that although is faster than rounding off it doesn't give always the exact results
+ */
+#define GMB_LERP_GSC( MP1, MQ1, MA1, MP2, MQ2, MA2) \
+    PSUBW      ( MQ1, MP1 )                     /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */     ;\
+    PSLLW      ( CONST(8), MQ1 )                /*                    q1 << 8                    */     ;\
+    PMULLW     ( MP1, MA1 )                     /*              t1 = (q1 - p1)*pa1               */     ;\
+                                                                                                        ;\
+TWO(PSUBW      ( MQ2, MP2 ))                    /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */     ;\
+TWO(PSLLW      ( CONST(8), MQ2 ))               /*                    q2 << 8                    */     ;\
+TWO(PMULLW     ( MP2, MA2 ))                    /*              t2 = (q2 - p2)*pa2               */     ;\
+                                                                                                        ;\
+    MOVQ       ( MA1, MP1 )                                                                             ;\
+    PSRLW      ( CONST(8), MA1 )                /*                    t1 >> 8                    */     ;\
+                                                                                                        ;\
+TWO(MOVQ       ( MA2, MP2 ))                                                                            ;\
+TWO(PSRLW      ( CONST(8), MA2 ))               /*                    t2 >> 8                    */     ;\
+                                                                                                        ;\
+    PADDW      ( MA1, MP1 )                     /*        t1 + (t1 >> 8) ~= (t1/255) << 8        */     ;\
+    PSRLW      ( CONST(7), MA1 )                /*                    t1 >> 15                   */     ;\
+                                                                                                        ;\
+TWO(PADDW      ( MA2, MP2 ))                    /*        t2 + (t2 >> 8) ~= (t2/255) << 8        */     ;\
+TWO(PSRLW      ( CONST(7), MA2 ))               /*                    t2 >> 15                   */     ;\
+                                                                                                        ;\
+    PADDW      ( MP1, MA1 )                     /*  t1 + (t1 >> 8) + (t1 >>15) ~= (t1/255) << 8  */     ;\
+TWO(PADDW      ( MP2, MA2 ))                    /*  t2 + (t2 >> 8) + (t2 >>15) ~= (t2/255) << 8  */     ;\
+                                                                                                        ;\
+    PADDW      ( MQ1, MA1 )                     /*              (t1/255 + q1) << 8               */     ;\
+TWO(PADDW      ( MQ2, MA2 ))                    /*              (t2/255 + q2) << 8               */     ;\
+                                                                                                        ;\
+    PSRLW      ( CONST(8), MA1 )                /*    sa1    |    sb1    |    sg1    |    sr1    */     ;\
+TWO(PSRLW      ( CONST(8), MA2 ))               /*    sa2    |    sb2    |    sg2    |    sr2    */
+/* common blending setup code
+ *
+ * note that M00 is a register with 0x0000000000000000 constant which can be easily obtained making
+ *
+ *   PXOR      ( M00, M00 )
+ */
+#define GMB_LOAD(rgba, dest, MPP, MQQ) \
+ONE(MOVD       ( REGIND(rgba), MPP ))           /*     |     |     |     | qa1 | qb1 | qg1 | qr1 */     ;\
+ONE(MOVD       ( REGIND(dest), MQQ ))           /*     |     |     |     | pa1 | pb1 | pg1 | pr1 */     ;\
+                                                                                                        ;\
+TWO(MOVQ       ( REGIND(rgba), MPP ))           /* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */     ;\
+TWO(MOVQ       ( REGIND(dest), MQQ ))           /* pa2 | pb2 | pg2 | pr2 | pa1 | pb1 | pg1 | pr1 */
+#define GMB_UNPACK(MP1, MQ1, MP2, MQ2, M00) \
+TWO(MOVQ       ( MP1, MP2 ))                                                                            ;\
+TWO(MOVQ       ( MQ1, MQ2 ))                                                                            ;\
+                                                                                                        ;\
+    PUNPCKLBW  ( M00, MQ1 )                     /*    qa1    |    qb1    |    qg1    |    qr1    */     ;\
+TWO(PUNPCKHBW  ( M00, MQ2 ))                    /*    qa2    |    qb2    |    qg2    |    qr2    */     ;\
+    PUNPCKLBW  ( M00, MP1 )                     /*    pa1    |    pb1    |    pg1    |    pr1    */     ;\
+TWO(PUNPCKHBW  ( M00, MP2 ))                    /*    pa2    |    pb2    |    pg2    |    pr2    */
+#define GMB_ALPHA(MP1, MA1, MP2, MA2) \
+    MOVQ       ( MP1, MA1 )                                                                             ;\
+TWO(MOVQ       ( MP2, MA2 ))                                                                            ;\
+                                                                                                        ;\
+    PUNPCKHWD  ( MA1, MA1 )                     /*    pa1    |    pa1    |           |           */     ;\
+TWO(PUNPCKHWD  ( MA2, MA2 ))                    /*    pa2    |    pa2    |           |           */     ;\
+    PUNPCKHDQ  ( MA1, MA1 )                     /*    pa1    |    pa1    |    pa1    |    pa1    */     ;\
+TWO(PUNPCKHDQ  ( MA2, MA2 ))                    /*    pa2    |    pa2    |    pa2    |    pa2    */
+#define GMB_PACK( MS1, MS2 ) \
+    PACKUSWB   ( MS2, MS1 )                     /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */     ;\
+#define GMB_STORE(rgba, MSS ) \
+ONE(MOVD       ( MSS, REGIND(rgba) ))           /*     |     |     |     | sa1 | sb1 | sg1 | sr1 */     ;\
+TWO(MOVQ       ( MSS, REGIND(rgba) ))           /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */
+/* Kevin F. Quinn <kevquinn@gentoo.org> 2 July 2006
+ * Replace data segment constants with text-segment
+ * constants (via pushl/movq)
+    SEG_DATA
+ALIGNDATA8
+const_0080:
+    D_LONG 0x00800080, 0x00800080
+const_80:
+    D_LONG 0x80808080, 0x80808080
+*/
+#define const_0080_l 0x00800080
+#define const_0080_h 0x00800080
+#define const_80_l 0x80808080
+#define const_80_h 0x80808080
+    SEG_TEXT
+/* Blend transparency function
+ */
+#define TAG(x) CONCAT(x,_transparency)
+#define LLTAG(x) LLBL2(x,_transparency)
+#define INIT \
+    PXOR       ( MM0, MM0 )                     /*   0x0000  |   0x0000  |   0x0000  |   0x0000  */
+#define MAIN( rgba, dest ) \
+    GMB_LOAD( rgba, dest, MM1, MM2 )                                                                    ;\
+    GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 )                                                               ;\
+    GMB_ALPHA( MM1, MM3, MM4, MM6 )                                                                     ;\
+    GMB_LERP_GSC( MM1, MM2, MM3, MM4, MM5, MM6 )                                                        ;\
+    GMB_PACK( MM3, MM6 )                                                                                ;\
+    GMB_STORE( rgba, MM3 )
+#include "mmx_blendtmp.h"
+/* Blend add function
+ *
+ * FIXME: Add some loop unrolling here...
+ */
+#define TAG(x) CONCAT(x,_add)
+#define LLTAG(x) LLBL2(x,_add)
+#define INIT
+#define MAIN( rgba, dest ) \
+ONE(MOVD       ( REGIND(rgba), MM1 ))           /*     |     |     |     | qa1 | qb1 | qg1 | qr1 */     ;\
+ONE(MOVD       ( REGIND(dest), MM2 ))           /*     |     |     |     | pa1 | pb1 | pg1 | pr1 */     ;\
+ONE(PADDUSB    ( MM2, MM1 ))                                                                            ;\
+ONE(MOVD       ( MM1, REGIND(rgba) ))           /*     |     |     |     | sa1 | sb1 | sg1 | sr1 */     ;\
+                                                                                                        ;\
+TWO(MOVQ       ( REGIND(rgba), MM1 ))           /* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */     ;\
+TWO(PADDUSB    ( REGIND(dest), MM1 ))           /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */     ;\
+TWO(MOVQ       ( MM1, REGIND(rgba) ))
+#include "mmx_blendtmp.h"
+/* Blend min function
+ */
+#define TAG(x) CONCAT(x,_min)
+#define LLTAG(x) LLBL2(x,_min)
+/* Kevin F. Quinn 2nd July 2006
+ * Replace data segment constants with text-segment instructions
+#define INIT \
+    MOVQ       ( CONTENT(const_80), MM7 )
+ */
+#define INIT \
+    PUSH_L     ( CONST(const_80_h) )            /* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/     ;\
+    PUSH_L     ( CONST(const_80_l) )                                                                    ;\
+    MOVQ       ( REGIND(ESP), MM7 )                                                                     ;\
+    ADD_L      ( CONST(8), ESP)
+#define MAIN( rgba, dest ) \
+    GMB_LOAD( rgba, dest, MM1, MM2 )                                                                    ;\
+    MOVQ       ( MM1, MM3 )                                                                             ;\
+    MOVQ       ( MM2, MM4 )                                                                             ;\
+    PXOR       ( MM7, MM3 )                     /*              unsigned -> signed               */     ;\
+    PXOR       ( MM7, MM4 )                     /*              unsigned -> signed               */     ;\
+    PCMPGTB    ( MM3, MM4 )                     /*                 q > p ? 0xff : 0x00           */     ;\
+    PAND       ( MM4, MM1 )                     /*                 q > p ? p : 0                 */     ;\
+    PANDN      ( MM2, MM4 )                     /*                 q > p ? 0 : q                 */     ;\
+    POR        ( MM1, MM4 )                     /*                 q > p ? p : q                 */     ;\
+    GMB_STORE( rgba, MM4 )
+#include "mmx_blendtmp.h"
+/* Blend max function
+ */
+#define TAG(x) CONCAT(x,_max)
+#define LLTAG(x) LLBL2(x,_max)
+/* Kevin F. Quinn 2nd July 2006
+ * Replace data segment constants with text-segment instructions
+#define INIT \
+    MOVQ       ( CONTENT(const_80), MM7 )
+ */
+#define INIT \
+    PUSH_L     ( CONST(const_80_l) )            /* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/     ;\
+    PUSH_L     ( CONST(const_80_h) )                                                                    ;\
+    MOVQ       ( REGIND(ESP), MM7 )                                                                     ;\
+    ADD_L      ( CONST(8), ESP)
+#define MAIN( rgba, dest ) \
+    GMB_LOAD( rgba, dest, MM1, MM2 )                                                                    ;\
+    MOVQ       ( MM1, MM3 )                                                                             ;\
+    MOVQ       ( MM2, MM4 )                                                                             ;\
+    PXOR       ( MM7, MM3 )                     /*              unsigned -> signed               */     ;\
+    PXOR       ( MM7, MM4 )                     /*              unsigned -> signed               */     ;\
+    PCMPGTB    ( MM3, MM4 )                     /*                 q > p ? 0xff : 0x00           */     ;\
+    PAND       ( MM4, MM2 )                     /*                 q > p ? q : 0                 */     ;\
+    PANDN      ( MM1, MM4 )                     /*                 q > p ? 0 : p                 */     ;\
+    POR        ( MM2, MM4 )                     /*                 q > p ? p : q                 */     ;\
+    GMB_STORE( rgba, MM4 )
+#include "mmx_blendtmp.h"
+/* Blend modulate function
+ */
+#define TAG(x) CONCAT(x,_modulate)
+#define LLTAG(x) LLBL2(x,_modulate)
+/* Kevin F. Quinn 2nd July 2006
+ * Replace data segment constants with text-segment instructions
+#define INIT \
+    MOVQ       ( CONTENT(const_0080), MM7 )
+ */
+#define INIT \
+    PXOR       ( MM0, MM0 )                     /*   0x0000  |   0x0000  |   0x0000  |   0x0000  */     ;\
+    PUSH_L     ( CONST(const_0080_l) )  /*   0x0080  |   0x0080  |   0x0080  |   0x0080  */     ;\
+    PUSH_L     ( CONST(const_0080_h) )                                                          ;\
+    MOVQ       ( REGIND(ESP), MM7 )                                                                     ;\
+    ADD_L      ( CONST(8), ESP)
+#define MAIN( rgba, dest ) \
+    GMB_LOAD( rgba, dest, MM1, MM2 )                                                                    ;\
+    GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 )                                                               ;\
+    GMB_MULT_GSR( MM1, MM2, MM4, MM5, MM7 )                                                             ;\
+    GMB_PACK( MM2, MM5 )                                                                                ;\
+    GMB_STORE( rgba, MM2 )
+#include "mmx_blendtmp.h"
+#endif
+#if defined (__ELF__) && defined (__linux__)
+        .section .note.GNU-stack,"",%progbits
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/mmx_blendtmp.h
 ,0 → 1,114
+/*
+ * Written by José Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+/*
+ * void _mesa_mmx_blend( struct gl_context *ctx,
+ *                       GLuint n,
+ *                       const GLubyte mask[],
+ *                       GLchan rgba[][4],
+ *                       CONST GLchan dest[][4] )
+ *
+ */
+ALIGNTEXT16
+GLOBL GLNAME( TAG(_mesa_mmx_blend) )
+HIDDEN( TAG(_mesa_mmx_blend) )
+GLNAME( TAG(_mesa_mmx_blend) ):
+    PUSH_L     ( EBP )
+    MOV_L      ( ESP, EBP )
+    PUSH_L     ( ESI )
+    PUSH_L     ( EDI )
+    PUSH_L     ( EBX )
+    MOV_L      ( REGOFF(12, EBP), ECX )         /* n */
+    CMP_L      ( CONST(0), ECX)
+    JE         ( LLTAG(GMB_return) )
+    MOV_L      ( REGOFF(16, EBP), EBX )         /* mask */
+    MOV_L      ( REGOFF(20, EBP), EDI )         /* rgba */
+    MOV_L      ( REGOFF(24, EBP), ESI )         /* dest */
+    INIT
+    TEST_L     ( CONST(4), EDI )                /* align rgba on an 8-byte boundary */
+    JZ         ( LLTAG(GMB_align_end) )
+    CMP_B      ( CONST(0), REGIND(EBX) )        /* *mask == 0 */
+    JE         ( LLTAG(GMB_align_continue) )
+    /* runin */
+#define ONE(x)  x
+#define TWO(x)
+    MAIN       ( EDI, ESI )
+#undef ONE
+#undef TWO
+LLTAG(GMB_align_continue):
+    DEC_L      ( ECX )                          /* n -= 1 */
+    INC_L      ( EBX )                          /* mask += 1 */
+    ADD_L      ( CONST(4), EDI )                /* rgba += 1 */
+    ADD_L      ( CONST(4), ESI )                /* dest += 1 */
+LLTAG(GMB_align_end):
+    CMP_L      ( CONST(2), ECX)
+    JB         ( LLTAG(GMB_loop_end) )
+ALIGNTEXT16
+LLTAG(GMB_loop_begin):
+    CMP_W      ( CONST(0), REGIND(EBX) )        /* *mask == 0 && *(mask + 1) == 0 */
+    JE         ( LLTAG(GMB_loop_continue) )
+    /* main loop */
+#define ONE(x)
+#define TWO(x)  x
+    MAIN       ( EDI, ESI )
+#undef ONE
+#undef TWO
+LLTAG(GMB_loop_continue):
+    DEC_L      ( ECX )
+    DEC_L      ( ECX )                          /* n -= 2 */
+    ADD_L      ( CONST(2), EBX )                /* mask += 2 */
+    ADD_L      ( CONST(8), EDI )                /* rgba += 2 */
+    ADD_L      ( CONST(8), ESI )                /* dest += 2 */
+    CMP_L      ( CONST(2), ECX )
+    JAE        ( LLTAG(GMB_loop_begin) )
+LLTAG(GMB_loop_end):
+    CMP_L      ( CONST(1), ECX )
+    JB         ( LLTAG(GMB_done) )
+    CMP_B      ( CONST(0), REGIND(EBX) )        /* *mask == 0 */
+    JE         ( LLTAG(GMB_done) )
+    /* runout */
+#define ONE(x)  x
+#define TWO(x)
+    MAIN       ( EDI, ESI )
+#undef ONE
+#undef TWO
+LLTAG(GMB_done):
+    EMMS
+LLTAG(GMB_return):
+    POP_L      ( EBX )
+    POP_L      ( EDI )
+    POP_L      ( ESI )
+    MOV_L      ( EBP, ESP )
+    POP_L      ( EBP )
+    RET
+#undef TAG
+#undef LLTAG
+#undef INIT
+#undef MAIN

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/norm_args.h
 ,0 → 1,57
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/*
+ * Normal transform function interface for assembly code.  Simply define
+ * FRAME_OFFSET to the number of bytes pushed onto the stack before
+ * using the ARG_* argument macros.
+ *
+ * Gareth Hughes
+ */
+#ifndef __NORM_ARGS_H__
+#define __NORM_ARGS_H__
+/* Offsets for normal_func arguments
+ *
+ * typedef void (*normal_func)( const GLmatrix *mat,
+ *                              GLfloat scale,
+ *                              const GLvector4f *in,
+ *                              const GLfloat lengths[],
+ *                              GLvector4f *dest );
+ */
+#define OFFSET_MAT      4
+#define OFFSET_SCALE    8
+#define OFFSET_IN       12
+#define OFFSET_LENGTHS  16
+#define OFFSET_DEST     20
+#define ARG_MAT         REGOFF(FRAME_OFFSET+OFFSET_MAT, ESP)
+#define ARG_SCALE       REGOFF(FRAME_OFFSET+OFFSET_SCALE, ESP)
+#define ARG_IN          REGOFF(FRAME_OFFSET+OFFSET_IN, ESP)
+#define ARG_LENGTHS     REGOFF(FRAME_OFFSET+OFFSET_LENGTHS, ESP)
+#define ARG_DEST        REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP)
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/read_rgba_span_x86.S
 ,0 → 1,686
+/*
+ * (C) Copyright IBM Corporation 2004
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+/**
+ * \file read_rgba_span_x86.S
+ * Optimized routines to transfer pixel data from the framebuffer to a
+ * buffer in main memory.
+ *
+ * \author Ian Romanick <idr@us.ibm.com>
+ */
+        .file   "read_rgba_span_x86.S"
+#if !defined(__DJGPP__) && !defined(__MINGW32__) && !defined(__APPLE__) /* this one cries for assyntax.h */
+/* Kevin F. Quinn 2nd July 2006
+ * Replaced data segment constants with text-segment instructions.
+ */
+#define LOAD_MASK(mvins,m1,m2) \
+        pushl   $0xff00ff00 ;\
+        pushl   $0xff00ff00 ;\
+        pushl   $0xff00ff00 ;\
+        pushl   $0xff00ff00 ;\
+        mvins   (%esp), m1      ;\
+        pushl   $0x00ff0000 ;\
+        pushl   $0x00ff0000 ;\
+        pushl   $0x00ff0000 ;\
+        pushl   $0x00ff0000 ;\
+        mvins   (%esp), m2      ;\
+        addl    $32, %esp
+/* I implemented these as macros because they appear in several places,
+ * and I've tweaked them a number of times.  I got tired of changing every
+ * place they appear. :)
+ */
+#define DO_ONE_PIXEL() \
+        movl    (%ebx), %eax ; \
+        addl    $4, %ebx ; \
+        bswap   %eax          /* ARGB -> BGRA */ ; \
+        rorl    $8, %eax      /* BGRA -> ABGR */ ; \
+        movl    %eax, (%ecx)  /* ABGR -> R, G, B, A */ ; \
+        addl    $4, %ecx
+#define DO_ONE_LAST_PIXEL() \
+        movl    (%ebx), %eax ; \
+        bswap   %eax          /* ARGB -> BGRA */ ; \
+        rorl    $8, %eax      /* BGRA -> ABGR */ ; \
+        movl    %eax, (%ecx)  /* ABGR -> R, G, B, A */ ; \
+/**
+ * MMX optimized version of the BGRA8888_REV to RGBA copy routine.
+ *
+ * \warning
+ * This function assumes that the caller will issue the EMMS instruction
+ * at the correct places.
+ */
+.globl _generic_read_RGBA_span_BGRA8888_REV_MMX
+#ifndef USE_DRICORE
+.hidden _generic_read_RGBA_span_BGRA8888_REV_MMX
+#endif
+        .type   _generic_read_RGBA_span_BGRA8888_REV_MMX, @function
+_generic_read_RGBA_span_BGRA8888_REV_MMX:
+        pushl   %ebx
+#ifdef USE_INNER_EMMS
+        emms
+#endif
+        LOAD_MASK(movq,%mm1,%mm2)
+        movl    8(%esp), %ebx   /* source pointer */
+        movl    16(%esp), %edx  /* number of pixels to copy */
+        movl    12(%esp), %ecx  /* destination pointer */
+        testl   %edx, %edx
+        jle     .L20            /* Bail if there's nothing to do. */
+        movl    %ebx, %eax
+        negl    %eax
+        sarl    $2, %eax
+        andl    $1, %eax
+        je      .L17
+        subl    %eax, %edx
+        DO_ONE_PIXEL()
+.L17:
+        /* Would it be faster to unroll this loop once and process 4 pixels
+         * per pass, instead of just two?
+         */
+        movl    %edx, %eax
+        shrl    %eax
+        jmp     .L18
+.L19:
+        movq    (%ebx), %mm0
+        addl    $8, %ebx
+        /* These 9 instructions do what PSHUFB (if there were such an
+         * instruction) could do in 1. :(
+         */
+        movq    %mm0, %mm3
+        movq    %mm0, %mm4
+        pand    %mm2, %mm3
+        psllq   $16, %mm4
+        psrlq   $16, %mm3
+        pand    %mm2, %mm4
+        pand    %mm1, %mm0
+        por     %mm4, %mm3
+        por     %mm3, %mm0
+        movq    %mm0, (%ecx)
+        addl    $8, %ecx
+        subl    $1, %eax
+.L18:
+        jne     .L19
+#ifdef USE_INNER_EMMS
+        emms
+#endif
+        /* At this point there are either 1 or 0 pixels remaining to be
+         * converted.  Convert the last pixel, if needed.
+         */
+        testl   $1, %edx
+        je      .L20
+        DO_ONE_LAST_PIXEL()
+.L20:
+        popl    %ebx
+        ret
+        .size   _generic_read_RGBA_span_BGRA8888_REV_MMX, .-_generic_read_RGBA_span_BGRA8888_REV_MMX
+/**
+ * SSE optimized version of the BGRA8888_REV to RGBA copy routine.  SSE
+ * instructions are only actually used to read data from the framebuffer.
+ * In practice, the speed-up is pretty small.
+ *
+ * \todo
+ * Do some more testing and determine if there's any reason to have this
+ * function in addition to the MMX version.
+ *
+ * \warning
+ * This function assumes that the caller will issue the EMMS instruction
+ * at the correct places.
+ */
+.globl _generic_read_RGBA_span_BGRA8888_REV_SSE
+#ifndef USE_DRICORE
+.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE
+#endif
+        .type   _generic_read_RGBA_span_BGRA8888_REV_SSE, @function
+_generic_read_RGBA_span_BGRA8888_REV_SSE:
+        pushl   %esi
+        pushl   %ebx
+        pushl   %ebp
+#ifdef USE_INNER_EMMS
+        emms
+#endif
+        LOAD_MASK(movq,%mm1,%mm2)
+        movl    16(%esp), %ebx  /* source pointer */
+        movl    24(%esp), %edx  /* number of pixels to copy */
+        movl    20(%esp), %ecx  /* destination pointer */
+        testl   %edx, %edx
+        jle     .L35            /* Bail if there's nothing to do. */
+        movl    %esp, %ebp
+        subl    $16, %esp
+        andl    $0xfffffff0, %esp
+        movl    %ebx, %eax
+        movl    %edx, %esi
+        negl    %eax
+        andl    $15, %eax
+        sarl    $2, %eax
+        cmpl    %edx, %eax
+        cmovle  %eax, %esi
+        subl    %esi, %edx
+        testl   $1, %esi
+        je      .L32
+        DO_ONE_PIXEL()
+.L32:
+        testl   $2, %esi
+        je      .L31
+        movq    (%ebx), %mm0
+        addl    $8, %ebx
+        movq    %mm0, %mm3
+        movq    %mm0, %mm4
+        pand    %mm2, %mm3
+        psllq   $16, %mm4
+        psrlq   $16, %mm3
+        pand    %mm2, %mm4
+        pand    %mm1, %mm0
+        por     %mm4, %mm3
+        por     %mm3, %mm0
+        movq    %mm0, (%ecx)
+        addl    $8, %ecx
+.L31:
+        movl    %edx, %eax
+        shrl    $2, %eax
+        jmp     .L33
+.L34:
+        movaps  (%ebx), %xmm0
+        addl    $16, %ebx
+        /* This would be so much better if we could just move directly from
+         * an SSE register to an MMX register.  Unfortunately, that
+         * functionality wasn't introduced until SSE2 with the MOVDQ2Q
+         * instruction.
+         */
+        movaps  %xmm0, (%esp)
+        movq    (%esp), %mm0
+        movq    8(%esp), %mm5
+        movq    %mm0, %mm3
+        movq    %mm0, %mm4
+        movq    %mm5, %mm6
+        movq    %mm5, %mm7
+        pand    %mm2, %mm3
+        pand    %mm2, %mm6
+        psllq   $16, %mm4
+        psllq   $16, %mm7
+        psrlq   $16, %mm3
+        psrlq   $16, %mm6
+        pand    %mm2, %mm4
+        pand    %mm2, %mm7
+        pand    %mm1, %mm0
+        pand    %mm1, %mm5
+        por     %mm4, %mm3
+        por     %mm7, %mm6
+        por     %mm3, %mm0
+        por     %mm6, %mm5
+        movq    %mm0, (%ecx)
+        movq    %mm5, 8(%ecx)
+        addl    $16, %ecx
+        subl    $1, %eax
+.L33:
+        jne     .L34
+#ifdef USE_INNER_EMMS
+        emms
+#endif
+        movl    %ebp, %esp
+        /* At this point there are either [0, 3] pixels remaining to be
+         * converted.
+         */
+        testl   $2, %edx
+        je      .L36
+        movq    (%ebx), %mm0
+        addl    $8, %ebx
+        movq    %mm0, %mm3
+        movq    %mm0, %mm4
+        pand    %mm2, %mm3
+        psllq   $16, %mm4
+        psrlq   $16, %mm3
+        pand    %mm2, %mm4
+        pand    %mm1, %mm0
+        por     %mm4, %mm3
+        por     %mm3, %mm0
+        movq    %mm0, (%ecx)
+        addl    $8, %ecx
+.L36:
+        testl   $1, %edx
+        je      .L35
+        DO_ONE_LAST_PIXEL()
+.L35:
+        popl    %ebp
+        popl    %ebx
+        popl    %esi
+        ret
+        .size   _generic_read_RGBA_span_BGRA8888_REV_SSE, .-_generic_read_RGBA_span_BGRA8888_REV_SSE
+/**
+ * SSE2 optimized version of the BGRA8888_REV to RGBA copy routine.
+ */
+        .text
+.globl _generic_read_RGBA_span_BGRA8888_REV_SSE2
+#ifndef USE_DRICORE
+.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE2
+#endif
+        .type   _generic_read_RGBA_span_BGRA8888_REV_SSE2, @function
+_generic_read_RGBA_span_BGRA8888_REV_SSE2:
+        pushl   %esi
+        pushl   %ebx
+        LOAD_MASK(movdqu,%xmm1,%xmm2)
+        movl    12(%esp), %ebx  /* source pointer */
+        movl    20(%esp), %edx  /* number of pixels to copy */
+        movl    16(%esp), %ecx  /* destination pointer */
+        movl    %ebx, %eax
+        movl    %edx, %esi
+        testl   %edx, %edx
+        jle     .L46            /* Bail if there's nothing to do. */
+        /* If the source pointer isn't a multiple of 16 we have to process
+         * a few pixels the "slow" way to get the address aligned for
+         * the SSE fetch intsructions.
+         */
+        negl    %eax
+        andl    $15, %eax
+        sarl    $2, %eax
+        cmpl    %edx, %eax
+        cmovbe  %eax, %esi
+        subl    %esi, %edx
+        testl   $1, %esi
+        je      .L41
+        DO_ONE_PIXEL()
+.L41:
+        testl   $2, %esi
+        je      .L40
+        movq    (%ebx), %xmm0
+        addl    $8, %ebx
+        movdqa  %xmm0, %xmm3
+        movdqa  %xmm0, %xmm4
+        andps   %xmm1, %xmm0
+        andps   %xmm2, %xmm3
+        pslldq  $2, %xmm4
+        psrldq  $2, %xmm3
+        andps   %xmm2, %xmm4
+        orps    %xmm4, %xmm3
+        orps    %xmm3, %xmm0
+        movq    %xmm0, (%ecx)
+        addl    $8, %ecx
+.L40:
+        /* Would it be worth having a specialized version of this loop for
+         * the case where the destination is 16-byte aligned?  That version
+         * would be identical except that it could use movedqa instead of
+         * movdqu.
+         */
+        movl    %edx, %eax
+        shrl    $2, %eax
+        jmp     .L42
+.L43:
+        movdqa  (%ebx), %xmm0
+        addl    $16, %ebx
+        movdqa  %xmm0, %xmm3
+        movdqa  %xmm0, %xmm4
+        andps   %xmm1, %xmm0
+        andps   %xmm2, %xmm3
+        pslldq  $2, %xmm4
+        psrldq  $2, %xmm3
+        andps   %xmm2, %xmm4
+        orps    %xmm4, %xmm3
+        orps    %xmm3, %xmm0
+        movdqu  %xmm0, (%ecx)
+        addl    $16, %ecx
+        subl    $1, %eax
+.L42:
+        jne     .L43
+        /* There may be upto 3 pixels remaining to be copied.  Take care
+         * of them now.  We do the 2 pixel case first because the data
+         * will be aligned.
+         */
+        testl   $2, %edx
+        je      .L47
+        movq    (%ebx), %xmm0
+        addl    $8, %ebx
+        movdqa  %xmm0, %xmm3
+        movdqa  %xmm0, %xmm4
+        andps   %xmm1, %xmm0
+        andps   %xmm2, %xmm3
+        pslldq  $2, %xmm4
+        psrldq  $2, %xmm3
+        andps   %xmm2, %xmm4
+        orps    %xmm4, %xmm3
+        orps    %xmm3, %xmm0
+        movq    %xmm0, (%ecx)
+        addl    $8, %ecx
+.L47:
+        testl   $1, %edx
+        je      .L46
+        DO_ONE_LAST_PIXEL()
+.L46:
+        popl    %ebx
+        popl    %esi
+        ret
+        .size   _generic_read_RGBA_span_BGRA8888_REV_SSE2, .-_generic_read_RGBA_span_BGRA8888_REV_SSE2
+#define MASK_565_L      0x07e0f800
+#define MASK_565_H      0x0000001f
+/* Setting SCALE_ADJUST to 5 gives a perfect match with the
+ * classic C implementation in Mesa.  Setting SCALE_ADJUST
+ * to 0 is slightly faster but at a small cost to accuracy.
+ */
+#define SCALE_ADJUST    5
+#if SCALE_ADJUST == 5
+#define PRESCALE_L 0x00100001
+#define PRESCALE_H 0x00000200
+#define SCALE_L 0x40C620E8
+#define SCALE_H 0x0000839d
+#elif SCALE_ADJUST == 0
+#define PRESCALE_L 0x00200001
+#define PRESCALE_H 0x00000800
+#define SCALE_L 0x01040108
+#define SCALE_H 0x00000108
+#else
+#error SCALE_ADJUST must either be 5 or 0.
+#endif
+#define ALPHA_L 0x00000000
+#define ALPHA_H 0x00ff0000
+/**
+ * MMX optimized version of the RGB565 to RGBA copy routine.
+ */
+        .text
+        .globl  _generic_read_RGBA_span_RGB565_MMX
+#ifndef USE_DRICORE
+        .hidden _generic_read_RGBA_span_RGB565_MMX
+#endif
+        .type   _generic_read_RGBA_span_RGB565_MMX, @function
+_generic_read_RGBA_span_RGB565_MMX:
+#ifdef USE_INNER_EMMS
+        emms
+#endif
+        movl    4(%esp), %eax   /* source pointer */
+        movl    8(%esp), %edx   /* destination pointer */
+        movl    12(%esp), %ecx  /* number of pixels to copy */
+        pushl   $MASK_565_H
+        pushl   $MASK_565_L
+        movq    (%esp), %mm5
+        pushl   $PRESCALE_H
+        pushl   $PRESCALE_L
+        movq    (%esp), %mm6
+        pushl   $SCALE_H
+        pushl   $SCALE_L
+        movq    (%esp), %mm7
+        pushl   $ALPHA_H
+        pushl   $ALPHA_L
+        movq    (%esp), %mm3
+        addl    $32,%esp
+        sarl    $2, %ecx
+        jl      .L01            /* Bail early if the count is negative. */
+        jmp     .L02
+.L03:
+        /* Fetch 4 RGB565 pixels into %mm4.  Distribute the first and
+         * second pixels into the four words of %mm0 and %mm2.
+         */
+        movq    (%eax), %mm4
+        addl    $8, %eax
+        pshufw  $0x00, %mm4, %mm0
+        pshufw  $0x55, %mm4, %mm2
+        /* Mask the pixels so that each word of each register contains only
+         * one color component.
+         */
+        pand    %mm5, %mm0
+        pand    %mm5, %mm2
+        /* Adjust the component values so that they are as small as possible,
+         * but large enough so that we can multiply them by an unsigned 16-bit
+         * number and get a value as large as 0x00ff0000.
+         */
+        pmullw  %mm6, %mm0
+        pmullw  %mm6, %mm2
+#if SCALE_ADJUST > 0
+        psrlw   $SCALE_ADJUST, %mm0
+        psrlw   $SCALE_ADJUST, %mm2
+#endif
+        /* Scale the input component values to be on the range
+         * [0, 0x00ff0000].  This it the real magic of the whole routine.
+         */
+        pmulhuw %mm7, %mm0
+        pmulhuw %mm7, %mm2
+        /* Always set the alpha value to 0xff.
+         */
+        por %mm3, %mm0
+        por %mm3, %mm2
+        /* Pack the 16-bit values to 8-bit values and store the converted
+         * pixel data.
+         */
+        packuswb        %mm2, %mm0
+        movq    %mm0, (%edx)
+        addl    $8, %edx
+        pshufw  $0xaa, %mm4, %mm0
+        pshufw  $0xff, %mm4, %mm2
+        pand    %mm5, %mm0
+        pand    %mm5, %mm2
+        pmullw  %mm6, %mm0
+        pmullw  %mm6, %mm2
+#if SCALE_ADJUST > 0
+        psrlw   $SCALE_ADJUST, %mm0
+        psrlw   $SCALE_ADJUST, %mm2
+#endif
+        pmulhuw %mm7, %mm0
+        pmulhuw %mm7, %mm2
+        por %mm3, %mm0
+        por %mm3, %mm2
+        packuswb        %mm2, %mm0
+        movq    %mm0, (%edx)
+        addl    $8, %edx
+        subl    $1, %ecx
+.L02:
+        jne     .L03
+        /* At this point there can be at most 3 pixels left to process.  If
+         * there is either 2 or 3 left, process 2.
+         */
+        movl    12(%esp), %ecx
+        testl   $0x02, %ecx
+        je      .L04
+        movd    (%eax), %mm4
+        addl    $4, %eax
+        pshufw  $0x00, %mm4, %mm0
+        pshufw  $0x55, %mm4, %mm2
+        pand    %mm5, %mm0
+        pand    %mm5, %mm2
+        pmullw  %mm6, %mm0
+        pmullw  %mm6, %mm2
+#if SCALE_ADJUST > 0
+        psrlw   $SCALE_ADJUST, %mm0
+        psrlw   $SCALE_ADJUST, %mm2
+#endif
+        pmulhuw %mm7, %mm0
+        pmulhuw %mm7, %mm2
+        por %mm3, %mm0
+        por %mm3, %mm2
+        packuswb        %mm2, %mm0
+        movq    %mm0, (%edx)
+        addl    $8, %edx
+.L04:
+        /* At this point there can be at most 1 pixel left to process.
+         * Process it if needed.
+         */
+        testl   $0x01, %ecx
+        je      .L01
+        movzwl  (%eax), %ecx
+        movd    %ecx, %mm4
+        pshufw  $0x00, %mm4, %mm0
+        pand    %mm5, %mm0
+        pmullw  %mm6, %mm0
+#if SCALE_ADJUST > 0
+        psrlw   $SCALE_ADJUST, %mm0
+#endif
+        pmulhuw %mm7, %mm0
+        por %mm3, %mm0
+        packuswb        %mm0, %mm0
+        movd    %mm0, (%edx)
+.L01:
+#ifdef USE_INNER_EMMS
+        emms
+#endif
+        ret
+#endif /* !defined(__DJGPP__) && !defined(__MINGW32__) && !defined(__APPLE__) */
+#if defined (__ELF__) && defined (__linux__)
+        .section .note.GNU-stack,"",%progbits
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/read_rgba_span_x86.h
 ,0 → 1,56
+/*
+ * (C) Copyright IBM Corporation 2004
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+/**
+ * \file read_rgba_span_x86.h
+ *
+ * \author Ian Romanick <idr@us.ibm.com>
+ */
+#ifndef READ_RGBA_SPAN_X86_H
+#define READ_RGBA_SPAN_X86_H
+#if defined(USE_SSE_ASM) || defined(USE_MMX_ASM)
+#include "x86/common_x86_asm.h"
+#endif
+#if defined(USE_SSE_ASM)
+extern void _generic_read_RGBA_span_BGRA8888_REV_SSE2( const unsigned char *,
+    unsigned char *, unsigned );
+#endif
+#if defined(USE_SSE_ASM)
+extern void _generic_read_RGBA_span_BGRA8888_REV_SSE( const unsigned char *,
+    unsigned char *, unsigned );
+#endif
+#if defined(USE_MMX_ASM)
+extern void _generic_read_RGBA_span_BGRA8888_REV_MMX( const unsigned char *,
+    unsigned char *, unsigned );
+extern void _generic_read_RGBA_span_RGB565_MMX( const unsigned char *,
+    unsigned char *, unsigned );
+#endif
+#endif /* READ_RGBA_SPAN_X86_H */

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/rtasm/x86sse.c
 ,0 → 1,1203
+#ifdef USE_X86_ASM
+#if defined(__i386__) || defined(__386__)
+#include "main/imports.h"
+#include "x86sse.h"
+#define DISASSEM 0
+#define X86_TWOB 0x0f
+#if 0
+static unsigned char *cptr( void (*label)() )
+{
+   return (unsigned char *)(unsigned long)label;
+}
+#endif
+static void do_realloc( struct x86_function *p )
+{
+   if (p->size == 0) {
+      p->size = 1024;
+      p->store = _mesa_exec_malloc(p->size);
+      p->csr = p->store;
+   }
+   else {
+      unsigned used = p->csr - p->store;
+      unsigned char *tmp = p->store;
+      p->size *= 2;
+      p->store = _mesa_exec_malloc(p->size);
+      memcpy(p->store, tmp, used);
+      p->csr = p->store + used;
+      _mesa_exec_free(tmp);
+   }
+}
+/* Emit bytes to the instruction stream:
+ */
+static unsigned char *reserve( struct x86_function *p, int bytes )
+{
+   if (p->csr + bytes - p->store > p->size)
+      do_realloc(p);
+   {
+      unsigned char *csr = p->csr;
+      p->csr += bytes;
+      return csr;
+   }
+}
+static void emit_1b( struct x86_function *p, char b0 )
+{
+   char *csr = (char *)reserve(p, 1);
+   *csr = b0;
+}
+static void emit_1i( struct x86_function *p, int i0 )
+{
+   int *icsr = (int *)reserve(p, sizeof(i0));
+   *icsr = i0;
+}
+static void emit_1ub( struct x86_function *p, unsigned char b0 )
+{
+   unsigned char *csr = reserve(p, 1);
+   *csr++ = b0;
+}
+static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 )
+{
+   unsigned char *csr = reserve(p, 2);
+   *csr++ = b0;
+   *csr++ = b1;
+}
+static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 )
+{
+   unsigned char *csr = reserve(p, 3);
+   *csr++ = b0;
+   *csr++ = b1;
+   *csr++ = b2;
+}
+/* Build a modRM byte + possible displacement.  No treatment of SIB
+ * indexing.  BZZT - no way to encode an absolute address.
+ */
+static void emit_modrm( struct x86_function *p,
+                        struct x86_reg reg,
+                        struct x86_reg regmem )
+{
+   unsigned char val = 0;
+   assert(reg.mod == mod_REG);
+   val |= regmem.mod << 6;      /* mod field */
+   val |= reg.idx << 3;         /* reg field */
+   val |= regmem.idx;           /* r/m field */
+   emit_1ub(p, val);
+   /* Oh-oh we've stumbled into the SIB thing.
+    */
+   if (regmem.file == file_REG32 &&
+       regmem.idx == reg_SP) {
+      emit_1ub(p, 0x24);                /* simplistic! */
+   }
+   switch (regmem.mod) {
+   case mod_REG:
+   case mod_INDIRECT:
+      break;
+   case mod_DISP8:
+      emit_1b(p, regmem.disp);
+      break;
+   case mod_DISP32:
+      emit_1i(p, regmem.disp);
+      break;
+   default:
+      assert(0);
+      break;
+   }
+}
+static void emit_modrm_noreg( struct x86_function *p,
+                              unsigned op,
+                              struct x86_reg regmem )
+{
+   struct x86_reg dummy = x86_make_reg(file_REG32, op);
+   emit_modrm(p, dummy, regmem);
+}
+/* Many x86 instructions have two opcodes to cope with the situations
+ * where the destination is a register or memory reference
+ * respectively.  This function selects the correct opcode based on
+ * the arguments presented.
+ */
+static void emit_op_modrm( struct x86_function *p,
+                           unsigned char op_dst_is_reg,
+                           unsigned char op_dst_is_mem,
+                           struct x86_reg dst,
+                           struct x86_reg src )
+{
+   switch (dst.mod) {
+   case mod_REG:
+      emit_1ub(p, op_dst_is_reg);
+      emit_modrm(p, dst, src);
+      break;
+   case mod_INDIRECT:
+   case mod_DISP32:
+   case mod_DISP8:
+      assert(src.mod == mod_REG);
+      emit_1ub(p, op_dst_is_mem);
+      emit_modrm(p, src, dst);
+      break;
+   default:
+      assert(0);
+      break;
+   }
+}
+/* Create and manipulate registers and regmem values:
+ */
+struct x86_reg x86_make_reg( enum x86_reg_file file,
+                             enum x86_reg_name idx )
+{
+   struct x86_reg reg;
+   reg.file = file;
+   reg.idx = idx;
+   reg.mod = mod_REG;
+   reg.disp = 0;
+   return reg;
+}
+struct x86_reg x86_make_disp( struct x86_reg reg,
+                              int disp )
+{
+   assert(reg.file == file_REG32);
+   if (reg.mod == mod_REG)
+      reg.disp = disp;
+   else
+      reg.disp += disp;
+   if (reg.disp == 0)
+      reg.mod = mod_INDIRECT;
+   else if (reg.disp <= 127 && reg.disp >= -128)
+      reg.mod = mod_DISP8;
+   else
+      reg.mod = mod_DISP32;
+   return reg;
+}
+struct x86_reg x86_deref( struct x86_reg reg )
+{
+   return x86_make_disp(reg, 0);
+}
+struct x86_reg x86_get_base_reg( struct x86_reg reg )
+{
+   return x86_make_reg( reg.file, reg.idx );
+}
+unsigned char *x86_get_label( struct x86_function *p )
+{
+   return p->csr;
+}
+/***********************************************************************
+ * x86 instructions
+ */
+void x86_jcc( struct x86_function *p,
+              enum x86_cc cc,
+              unsigned char *label )
+{
+   int offset = label - (x86_get_label(p) + 2);
+   if (offset <= 127 && offset >= -128) {
+      emit_1ub(p, 0x70 + cc);
+      emit_1b(p, (char) offset);
+   }
+   else {
+      offset = label - (x86_get_label(p) + 6);
+      emit_2ub(p, 0x0f, 0x80 + cc);
+      emit_1i(p, offset);
+   }
+}
+/* Always use a 32bit offset for forward jumps:
+ */
+unsigned char *x86_jcc_forward( struct x86_function *p,
+                          enum x86_cc cc )
+{
+   emit_2ub(p, 0x0f, 0x80 + cc);
+   emit_1i(p, 0);
+   return x86_get_label(p);
+}
+unsigned char *x86_jmp_forward( struct x86_function *p)
+{
+   emit_1ub(p, 0xe9);
+   emit_1i(p, 0);
+   return x86_get_label(p);
+}
+unsigned char *x86_call_forward( struct x86_function *p)
+{
+   emit_1ub(p, 0xe8);
+   emit_1i(p, 0);
+   return x86_get_label(p);
+}
+/* Fixup offset from forward jump:
+ */
+void x86_fixup_fwd_jump( struct x86_function *p,
+                         unsigned char *fixup )
+{
+   *(int *)(fixup - 4) = x86_get_label(p) - fixup;
+}
+void x86_jmp( struct x86_function *p, unsigned char *label)
+{
+   emit_1ub(p, 0xe9);
+   emit_1i(p, label - x86_get_label(p) - 4);
+}
+#if 0
+/* This doesn't work once we start reallocating & copying the
+ * generated code on buffer fills, because the call is relative to the
+ * current pc.
+ */
+void x86_call( struct x86_function *p, void (*label)())
+{
+   emit_1ub(p, 0xe8);
+   emit_1i(p, cptr(label) - x86_get_label(p) - 4);
+}
+#else
+void x86_call( struct x86_function *p, struct x86_reg reg)
+{
+   emit_1ub(p, 0xff);
+   emit_modrm_noreg(p, 2, reg);
+}
+#endif
+/* michal:
+ * Temporary. As I need immediate operands, and dont want to mess with the codegen,
+ * I load the immediate into general purpose register and use it.
+ */
+void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
+{
+   assert(dst.mod == mod_REG);
+   emit_1ub(p, 0xb8 + dst.idx);
+   emit_1i(p, imm);
+}
+void x86_push( struct x86_function *p,
+               struct x86_reg reg )
+{
+   assert(reg.mod == mod_REG);
+   emit_1ub(p, 0x50 + reg.idx);
+   p->stack_offset += 4;
+}
+void x86_pop( struct x86_function *p,
+              struct x86_reg reg )
+{
+   assert(reg.mod == mod_REG);
+   emit_1ub(p, 0x58 + reg.idx);
+   p->stack_offset -= 4;
+}
+void x86_inc( struct x86_function *p,
+              struct x86_reg reg )
+{
+   assert(reg.mod == mod_REG);
+   emit_1ub(p, 0x40 + reg.idx);
+}
+void x86_dec( struct x86_function *p,
+              struct x86_reg reg )
+{
+   assert(reg.mod == mod_REG);
+   emit_1ub(p, 0x48 + reg.idx);
+}
+void x86_ret( struct x86_function *p )
+{
+   emit_1ub(p, 0xc3);
+}
+void x86_sahf( struct x86_function *p )
+{
+   emit_1ub(p, 0x9e);
+}
+void x86_mov( struct x86_function *p,
+              struct x86_reg dst,
+              struct x86_reg src )
+{
+   emit_op_modrm( p, 0x8b, 0x89, dst, src );
+}
+void x86_xor( struct x86_function *p,
+              struct x86_reg dst,
+              struct x86_reg src )
+{
+   emit_op_modrm( p, 0x33, 0x31, dst, src );
+}
+void x86_cmp( struct x86_function *p,
+              struct x86_reg dst,
+              struct x86_reg src )
+{
+   emit_op_modrm( p, 0x3b, 0x39, dst, src );
+}
+void x86_lea( struct x86_function *p,
+              struct x86_reg dst,
+              struct x86_reg src )
+{
+   emit_1ub(p, 0x8d);
+   emit_modrm( p, dst, src );
+}
+void x86_test( struct x86_function *p,
+               struct x86_reg dst,
+               struct x86_reg src )
+{
+   emit_1ub(p, 0x85);
+   emit_modrm( p, dst, src );
+}
+void x86_add( struct x86_function *p,
+               struct x86_reg dst,
+               struct x86_reg src )
+{
+   emit_op_modrm(p, 0x03, 0x01, dst, src );
+}
+void x86_mul( struct x86_function *p,
+               struct x86_reg src )
+{
+   assert (src.file == file_REG32 && src.mod == mod_REG);
+   emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src );
+}
+void x86_sub( struct x86_function *p,
+               struct x86_reg dst,
+               struct x86_reg src )
+{
+   emit_op_modrm(p, 0x2b, 0x29, dst, src );
+}
+void x86_or( struct x86_function *p,
+             struct x86_reg dst,
+             struct x86_reg src )
+{
+   emit_op_modrm( p, 0x0b, 0x09, dst, src );
+}
+void x86_and( struct x86_function *p,
+              struct x86_reg dst,
+              struct x86_reg src )
+{
+   emit_op_modrm( p, 0x23, 0x21, dst, src );
+}
+/***********************************************************************
+ * SSE instructions
+ */
+void sse_movss( struct x86_function *p,
+                struct x86_reg dst,
+                struct x86_reg src )
+{
+   emit_2ub(p, 0xF3, X86_TWOB);
+   emit_op_modrm( p, 0x10, 0x11, dst, src );
+}
+void sse_movaps( struct x86_function *p,
+                 struct x86_reg dst,
+                 struct x86_reg src )
+{
+   emit_1ub(p, X86_TWOB);
+   emit_op_modrm( p, 0x28, 0x29, dst, src );
+}
+void sse_movups( struct x86_function *p,
+                 struct x86_reg dst,
+                 struct x86_reg src )
+{
+   emit_1ub(p, X86_TWOB);
+   emit_op_modrm( p, 0x10, 0x11, dst, src );
+}
+void sse_movhps( struct x86_function *p,
+                 struct x86_reg dst,
+                 struct x86_reg src )
+{
+   assert(dst.mod != mod_REG || src.mod != mod_REG);
+   emit_1ub(p, X86_TWOB);
+   emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */
+}
+void sse_movlps( struct x86_function *p,
+                 struct x86_reg dst,
+                 struct x86_reg src )
+{
+   assert(dst.mod != mod_REG || src.mod != mod_REG);
+   emit_1ub(p, X86_TWOB);
+   emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */
+}
+void sse_maxps( struct x86_function *p,
+                struct x86_reg dst,
+                struct x86_reg src )
+{
+   emit_2ub(p, X86_TWOB, 0x5F);
+   emit_modrm( p, dst, src );
+}
+void sse_maxss( struct x86_function *p,
+                struct x86_reg dst,
+                struct x86_reg src )
+{
+   emit_3ub(p, 0xF3, X86_TWOB, 0x5F);
+   emit_modrm( p, dst, src );
+}
+void sse_divss( struct x86_function *p,
+                struct x86_reg dst,
+                struct x86_reg src )
+{
+   emit_3ub(p, 0xF3, X86_TWOB, 0x5E);
+   emit_modrm( p, dst, src );
+}
+void sse_minps( struct x86_function *p,
+                struct x86_reg dst,
+                struct x86_reg src )
+{
+   emit_2ub(p, X86_TWOB, 0x5D);
+   emit_modrm( p, dst, src );
+}
+void sse_subps( struct x86_function *p,
+                struct x86_reg dst,
+                struct x86_reg src )
+{
+   emit_2ub(p, X86_TWOB, 0x5C);
+   emit_modrm( p, dst, src );
+}
+void sse_mulps( struct x86_function *p,
+                struct x86_reg dst,
+                struct x86_reg src )
+{
+   emit_2ub(p, X86_TWOB, 0x59);
+   emit_modrm( p, dst, src );
+}
+void sse_mulss( struct x86_function *p,
+                struct x86_reg dst,
+                struct x86_reg src )
+{
+   emit_3ub(p, 0xF3, X86_TWOB, 0x59);
+   emit_modrm( p, dst, src );
+}
+void sse_addps( struct x86_function *p,
+                struct x86_reg dst,
+                struct x86_reg src )
+{
+   emit_2ub(p, X86_TWOB, 0x58);
+   emit_modrm( p, dst, src );
+}
+void sse_addss( struct x86_function *p,
+                struct x86_reg dst,
+                struct x86_reg src )
+{
+   emit_3ub(p, 0xF3, X86_TWOB, 0x58);
+   emit_modrm( p, dst, src );
+}
+void sse_andnps( struct x86_function *p,
+                 struct x86_reg dst,
+                 struct x86_reg src )
+{
+   emit_2ub(p, X86_TWOB, 0x55);
+   emit_modrm( p, dst, src );
+}
+void sse_andps( struct x86_function *p,
+                struct x86_reg dst,
+                struct x86_reg src )
+{
+   emit_2ub(p, X86_TWOB, 0x54);
+   emit_modrm( p, dst, src );
+}
+void sse_rsqrtps( struct x86_function *p,
+                  struct x86_reg dst,
+                  struct x86_reg src )
+{
+   emit_2ub(p, X86_TWOB, 0x52);
+   emit_modrm( p, dst, src );
+}
+void sse_rsqrtss( struct x86_function *p,
+                  struct x86_reg dst,
+                  struct x86_reg src )
+{
+   emit_3ub(p, 0xF3, X86_TWOB, 0x52);
+   emit_modrm( p, dst, src );
+}
+void sse_movhlps( struct x86_function *p,
+                  struct x86_reg dst,
+                  struct x86_reg src )
+{
+   assert(dst.mod == mod_REG && src.mod == mod_REG);
+   emit_2ub(p, X86_TWOB, 0x12);
+   emit_modrm( p, dst, src );
+}
+void sse_movlhps( struct x86_function *p,
+                  struct x86_reg dst,
+                  struct x86_reg src )
+{
+   assert(dst.mod == mod_REG && src.mod == mod_REG);
+   emit_2ub(p, X86_TWOB, 0x16);
+   emit_modrm( p, dst, src );
+}
+void sse_orps( struct x86_function *p,
+               struct x86_reg dst,
+               struct x86_reg src )
+{
+   emit_2ub(p, X86_TWOB, 0x56);
+   emit_modrm( p, dst, src );
+}
+void sse_xorps( struct x86_function *p,
+                struct x86_reg dst,
+                struct x86_reg src )
+{
+   emit_2ub(p, X86_TWOB, 0x57);
+   emit_modrm( p, dst, src );
+}
+void sse_cvtps2pi( struct x86_function *p,
+                   struct x86_reg dst,
+                   struct x86_reg src )
+{
+   assert(dst.file == file_MMX &&
+          (src.file == file_XMM || src.mod != mod_REG));
+   p->need_emms = 1;
+   emit_2ub(p, X86_TWOB, 0x2d);
+   emit_modrm( p, dst, src );
+}
+/* Shufps can also be used to implement a reduced swizzle when dest ==
+ * arg0.
+ */
+void sse_shufps( struct x86_function *p,
+                 struct x86_reg dest,
+                 struct x86_reg arg0,
+                 unsigned char shuf)
+{
+   emit_2ub(p, X86_TWOB, 0xC6);
+   emit_modrm(p, dest, arg0);
+   emit_1ub(p, shuf);
+}
+void sse_cmpps( struct x86_function *p,
+                struct x86_reg dest,
+                struct x86_reg arg0,
+                unsigned char cc)
+{
+   emit_2ub(p, X86_TWOB, 0xC2);
+   emit_modrm(p, dest, arg0);
+   emit_1ub(p, cc);
+}
+void sse_pmovmskb( struct x86_function *p,
+                   struct x86_reg dest,
+                   struct x86_reg src)
+{
+    emit_3ub(p, 0x66, X86_TWOB, 0xD7);
+    emit_modrm(p, dest, src);
+}
+/***********************************************************************
+ * SSE2 instructions
+ */
+/**
+ * Perform a reduced swizzle:
+ */
+void sse2_pshufd( struct x86_function *p,
+                  struct x86_reg dest,
+                  struct x86_reg arg0,
+                  unsigned char shuf)
+{
+   emit_3ub(p, 0x66, X86_TWOB, 0x70);
+   emit_modrm(p, dest, arg0);
+   emit_1ub(p, shuf);
+}
+void sse2_cvttps2dq( struct x86_function *p,
+                     struct x86_reg dst,
+                     struct x86_reg src )
+{
+   emit_3ub( p, 0xF3, X86_TWOB, 0x5B );
+   emit_modrm( p, dst, src );
+}
+void sse2_cvtps2dq( struct x86_function *p,
+                    struct x86_reg dst,
+                    struct x86_reg src )
+{
+   emit_3ub(p, 0x66, X86_TWOB, 0x5B);
+   emit_modrm( p, dst, src );
+}
+void sse2_packssdw( struct x86_function *p,
+                    struct x86_reg dst,
+                    struct x86_reg src )
+{
+   emit_3ub(p, 0x66, X86_TWOB, 0x6B);
+   emit_modrm( p, dst, src );
+}
+void sse2_packsswb( struct x86_function *p,
+                    struct x86_reg dst,
+                    struct x86_reg src )
+{
+   emit_3ub(p, 0x66, X86_TWOB, 0x63);
+   emit_modrm( p, dst, src );
+}
+void sse2_packuswb( struct x86_function *p,
+                    struct x86_reg dst,
+                    struct x86_reg src )
+{
+   emit_3ub(p, 0x66, X86_TWOB, 0x67);
+   emit_modrm( p, dst, src );
+}
+void sse2_rcpps( struct x86_function *p,
+                 struct x86_reg dst,
+                 struct x86_reg src )
+{
+   emit_2ub(p, X86_TWOB, 0x53);
+   emit_modrm( p, dst, src );
+}
+void sse2_rcpss( struct x86_function *p,
+                struct x86_reg dst,
+                struct x86_reg src )
+{
+   emit_3ub(p, 0xF3, X86_TWOB, 0x53);
+   emit_modrm( p, dst, src );
+}
+void sse2_movd( struct x86_function *p,
+                struct x86_reg dst,
+                struct x86_reg src )
+{
+   emit_2ub(p, 0x66, X86_TWOB);
+   emit_op_modrm( p, 0x6e, 0x7e, dst, src );
+}
+/***********************************************************************
+ * x87 instructions
+ */
+void x87_fist( struct x86_function *p, struct x86_reg dst )
+{
+   emit_1ub(p, 0xdb);
+   emit_modrm_noreg(p, 2, dst);
+}
+void x87_fistp( struct x86_function *p, struct x86_reg dst )
+{
+   emit_1ub(p, 0xdb);
+   emit_modrm_noreg(p, 3, dst);
+}
+void x87_fild( struct x86_function *p, struct x86_reg arg )
+{
+   emit_1ub(p, 0xdf);
+   emit_modrm_noreg(p, 0, arg);
+}
+void x87_fldz( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xee);
+}
+void x87_fldcw( struct x86_function *p, struct x86_reg arg )
+{
+   assert(arg.file == file_REG32);
+   assert(arg.mod != mod_REG);
+   emit_1ub(p, 0xd9);
+   emit_modrm_noreg(p, 5, arg);
+}
+void x87_fld1( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xe8);
+}
+void x87_fldl2e( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xea);
+}
+void x87_fldln2( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xed);
+}
+void x87_fwait( struct x86_function *p )
+{
+   emit_1ub(p, 0x9b);
+}
+void x87_fnclex( struct x86_function *p )
+{
+   emit_2ub(p, 0xdb, 0xe2);
+}
+void x87_fclex( struct x86_function *p )
+{
+   x87_fwait(p);
+   x87_fnclex(p);
+}
+static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg,
+                          unsigned char dst0ub0,
+                          unsigned char dst0ub1,
+                          unsigned char arg0ub0,
+                          unsigned char arg0ub1,
+                          unsigned char argmem_noreg)
+{
+   assert(dst.file == file_x87);
+   if (arg.file == file_x87) {
+      if (dst.idx == 0)
+         emit_2ub(p, dst0ub0, dst0ub1+arg.idx);
+      else if (arg.idx == 0)
+         emit_2ub(p, arg0ub0, arg0ub1+arg.idx);
+      else
+         assert(0);
+   }
+   else if (dst.idx == 0) {
+      assert(arg.file == file_REG32);
+      emit_1ub(p, 0xd8);
+      emit_modrm_noreg(p, argmem_noreg, arg);
+   }
+   else
+      assert(0);
+}
+void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+{
+   x87_arith_op(p, dst, arg,
+xd8, 0xc8,
+xdc, 0xc8,
+);
+}
+void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+{
+   x87_arith_op(p, dst, arg,
+xd8, 0xe0,
+xdc, 0xe8,
+);
+}
+void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+{
+   x87_arith_op(p, dst, arg,
+xd8, 0xe8,
+xdc, 0xe0,
+);
+}
+void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+{
+   x87_arith_op(p, dst, arg,
+xd8, 0xc0,
+xdc, 0xc0,
+);
+}
+void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+{
+   x87_arith_op(p, dst, arg,
+xd8, 0xf0,
+xdc, 0xf8,
+);
+}
+void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+{
+   x87_arith_op(p, dst, arg,
+xd8, 0xf8,
+xdc, 0xf0,
+);
+}
+void x87_fmulp( struct x86_function *p, struct x86_reg dst )
+{
+   assert(dst.file == file_x87);
+   assert(dst.idx >= 1);
+   emit_2ub(p, 0xde, 0xc8+dst.idx);
+}
+void x87_fsubp( struct x86_function *p, struct x86_reg dst )
+{
+   assert(dst.file == file_x87);
+   assert(dst.idx >= 1);
+   emit_2ub(p, 0xde, 0xe8+dst.idx);
+}
+void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
+{
+   assert(dst.file == file_x87);
+   assert(dst.idx >= 1);
+   emit_2ub(p, 0xde, 0xe0+dst.idx);
+}
+void x87_faddp( struct x86_function *p, struct x86_reg dst )
+{
+   assert(dst.file == file_x87);
+   assert(dst.idx >= 1);
+   emit_2ub(p, 0xde, 0xc0+dst.idx);
+}
+void x87_fdivp( struct x86_function *p, struct x86_reg dst )
+{
+   assert(dst.file == file_x87);
+   assert(dst.idx >= 1);
+   emit_2ub(p, 0xde, 0xf8+dst.idx);
+}
+void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
+{
+   assert(dst.file == file_x87);
+   assert(dst.idx >= 1);
+   emit_2ub(p, 0xde, 0xf0+dst.idx);
+}
+void x87_fucom( struct x86_function *p, struct x86_reg arg )
+{
+   assert(arg.file == file_x87);
+   emit_2ub(p, 0xdd, 0xe0+arg.idx);
+}
+void x87_fucomp( struct x86_function *p, struct x86_reg arg )
+{
+   assert(arg.file == file_x87);
+   emit_2ub(p, 0xdd, 0xe8+arg.idx);
+}
+void x87_fucompp( struct x86_function *p )
+{
+   emit_2ub(p, 0xda, 0xe9);
+}
+void x87_fxch( struct x86_function *p, struct x86_reg arg )
+{
+   assert(arg.file == file_x87);
+   emit_2ub(p, 0xd9, 0xc8+arg.idx);
+}
+void x87_fabs( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xe1);
+}
+void x87_fchs( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xe0);
+}
+void x87_fcos( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xff);
+}
+void x87_fprndint( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xfc);
+}
+void x87_fscale( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xfd);
+}
+void x87_fsin( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xfe);
+}
+void x87_fsincos( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xfb);
+}
+void x87_fsqrt( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xfa);
+}
+void x87_fxtract( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xf4);
+}
+/* st0 = (2^st0)-1
+ *
+ * Restrictions: -1.0 <= st0 <= 1.0
+ */
+void x87_f2xm1( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xf0);
+}
+/* st1 = st1 * log2(st0);
+ * pop_stack;
+ */
+void x87_fyl2x( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xf1);
+}
+/* st1 = st1 * log2(st0 + 1.0);
+ * pop_stack;
+ *
+ * A fast operation, with restrictions: -.29 < st0 < .29
+ */
+void x87_fyl2xp1( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xf9);
+}
+void x87_fld( struct x86_function *p, struct x86_reg arg )
+{
+   if (arg.file == file_x87)
+      emit_2ub(p, 0xd9, 0xc0 + arg.idx);
+   else {
+      emit_1ub(p, 0xd9);
+      emit_modrm_noreg(p, 0, arg);
+   }
+}
+void x87_fst( struct x86_function *p, struct x86_reg dst )
+{
+   if (dst.file == file_x87)
+      emit_2ub(p, 0xdd, 0xd0 + dst.idx);
+   else {
+      emit_1ub(p, 0xd9);
+      emit_modrm_noreg(p, 2, dst);
+   }
+}
+void x87_fstp( struct x86_function *p, struct x86_reg dst )
+{
+   if (dst.file == file_x87)
+      emit_2ub(p, 0xdd, 0xd8 + dst.idx);
+   else {
+      emit_1ub(p, 0xd9);
+      emit_modrm_noreg(p, 3, dst);
+   }
+}
+void x87_fcom( struct x86_function *p, struct x86_reg dst )
+{
+   if (dst.file == file_x87)
+      emit_2ub(p, 0xd8, 0xd0 + dst.idx);
+   else {
+      emit_1ub(p, 0xd8);
+      emit_modrm_noreg(p, 2, dst);
+   }
+}
+void x87_fcomp( struct x86_function *p, struct x86_reg dst )
+{
+   if (dst.file == file_x87)
+      emit_2ub(p, 0xd8, 0xd8 + dst.idx);
+   else {
+      emit_1ub(p, 0xd8);
+      emit_modrm_noreg(p, 3, dst);
+   }
+}
+void x87_fnstsw( struct x86_function *p, struct x86_reg dst )
+{
+   assert(dst.file == file_REG32);
+   if (dst.idx == reg_AX &&
+       dst.mod == mod_REG)
+      emit_2ub(p, 0xdf, 0xe0);
+   else {
+      emit_1ub(p, 0xdd);
+      emit_modrm_noreg(p, 7, dst);
+   }
+}
+/***********************************************************************
+ * MMX instructions
+ */
+void mmx_emms( struct x86_function *p )
+{
+   assert(p->need_emms);
+   emit_2ub(p, 0x0f, 0x77);
+   p->need_emms = 0;
+}
+void mmx_packssdw( struct x86_function *p,
+                   struct x86_reg dst,
+                   struct x86_reg src )
+{
+   assert(dst.file == file_MMX &&
+          (src.file == file_MMX || src.mod != mod_REG));
+   p->need_emms = 1;
+   emit_2ub(p, X86_TWOB, 0x6b);
+   emit_modrm( p, dst, src );
+}
+void mmx_packuswb( struct x86_function *p,
+                   struct x86_reg dst,
+                   struct x86_reg src )
+{
+   assert(dst.file == file_MMX &&
+          (src.file == file_MMX || src.mod != mod_REG));
+   p->need_emms = 1;
+   emit_2ub(p, X86_TWOB, 0x67);
+   emit_modrm( p, dst, src );
+}
+void mmx_movd( struct x86_function *p,
+               struct x86_reg dst,
+               struct x86_reg src )
+{
+   p->need_emms = 1;
+   emit_1ub(p, X86_TWOB);
+   emit_op_modrm( p, 0x6e, 0x7e, dst, src );
+}
+void mmx_movq( struct x86_function *p,
+               struct x86_reg dst,
+               struct x86_reg src )
+{
+   p->need_emms = 1;
+   emit_1ub(p, X86_TWOB);
+   emit_op_modrm( p, 0x6f, 0x7f, dst, src );
+}
+/***********************************************************************
+ * Helper functions
+ */
+/* Retreive a reference to one of the function arguments, taking into
+ * account any push/pop activity:
+ */
+struct x86_reg x86_fn_arg( struct x86_function *p,
+                           unsigned arg )
+{
+   return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
+                        p->stack_offset + arg * 4);     /* ??? */
+}
+void x86_init_func( struct x86_function *p )
+{
+   p->size = 0;
+   p->store = NULL;
+   p->csr = p->store;
+}
+int x86_init_func_size( struct x86_function *p, unsigned code_size )
+{
+   p->size = code_size;
+   p->store = _mesa_exec_malloc(code_size);
+   p->csr = p->store;
+   return p->store != NULL;
+}
+void x86_release_func( struct x86_function *p )
+{
+   _mesa_exec_free(p->store);
+   p->store = NULL;
+   p->csr = NULL;
+   p->size = 0;
+}
+void (*x86_get_func( struct x86_function *p ))(void)
+{
+   if (DISASSEM && p->store)
+      printf("disassemble %p %p\n", p->store, p->csr);
+   return (void (*)(void)) (unsigned long) p->store;
+}
+#else
+void x86sse_dummy( void )
+{
+}
+#endif
+#else  /* USE_X86_ASM */
+int x86sse_c_dummy_var; /* silence warning */
+#endif /* USE_X86_ASM */

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/rtasm/x86sse.h
 ,0 → 1,256
+#ifndef _X86SSE_H_
+#define _X86SSE_H_
+#if defined(__i386__) || defined(__386__)
+/* It is up to the caller to ensure that instructions issued are
+ * suitable for the host cpu.  There are no checks made in this module
+ * for mmx/sse/sse2 support on the cpu.
+ */
+struct x86_reg {
+   unsigned file:3;
+   unsigned idx:3;
+   unsigned mod:2;              /* mod_REG if this is just a register */
+   int      disp:24;            /* only +/- 23bits of offset - should be enough... */
+};
+struct x86_function {
+   unsigned size;
+   unsigned char *store;
+   unsigned char *csr;
+   unsigned stack_offset;
+   int need_emms;
+   const char *fn;
+};
+enum x86_reg_file {
+   file_REG32,
+   file_MMX,
+   file_XMM,
+   file_x87
+};
+/* Values for mod field of modr/m byte
+ */
+enum x86_reg_mod {
+   mod_INDIRECT,
+   mod_DISP8,
+   mod_DISP32,
+   mod_REG
+};
+enum x86_reg_name {
+   reg_AX,
+   reg_CX,
+   reg_DX,
+   reg_BX,
+   reg_SP,
+   reg_BP,
+   reg_SI,
+   reg_DI
+};
+enum x86_cc {
+   cc_O,                        /* overflow */
+   cc_NO,                       /* not overflow */
+   cc_NAE,                      /* not above or equal / carry */
+   cc_AE,                       /* above or equal / not carry */
+   cc_E,                        /* equal / zero */
+   cc_NE                        /* not equal / not zero */
+};
+enum sse_cc {
+   cc_Equal,
+   cc_LessThan,
+   cc_LessThanEqual,
+   cc_Unordered,
+   cc_NotEqual,
+   cc_NotLessThan,
+   cc_NotLessThanEqual,
+   cc_Ordered
+};
+#define cc_Z  cc_E
+#define cc_NZ cc_NE
+/* Begin/end/retreive function creation:
+ */
+void x86_init_func( struct x86_function *p );
+int x86_init_func_size( struct x86_function *p, unsigned code_size );
+void x86_release_func( struct x86_function *p );
+void (*x86_get_func( struct x86_function *p ))( void );
+/* Create and manipulate registers and regmem values:
+ */
+struct x86_reg x86_make_reg( enum x86_reg_file file,
+                             enum x86_reg_name idx );
+struct x86_reg x86_make_disp( struct x86_reg reg,
+                              int disp );
+struct x86_reg x86_deref( struct x86_reg reg );
+struct x86_reg x86_get_base_reg( struct x86_reg reg );
+/* Labels, jumps and fixup:
+ */
+unsigned char *x86_get_label( struct x86_function *p );
+void x86_jcc( struct x86_function *p,
+              enum x86_cc cc,
+              unsigned char *label );
+unsigned char *x86_jcc_forward( struct x86_function *p,
+                          enum x86_cc cc );
+unsigned char *x86_jmp_forward( struct x86_function *p);
+unsigned char *x86_call_forward( struct x86_function *p);
+void x86_fixup_fwd_jump( struct x86_function *p,
+                         unsigned char *fixup );
+void x86_jmp( struct x86_function *p, unsigned char *label );
+/* void x86_call( struct x86_function *p, void (*label)() ); */
+void x86_call( struct x86_function *p, struct x86_reg reg);
+/* michal:
+ * Temporary. As I need immediate operands, and dont want to mess with the codegen,
+ * I load the immediate into general purpose register and use it.
+ */
+void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm );
+/* Macro for sse_shufps() and sse2_pshufd():
+ */
+#define SHUF(_x,_y,_z,_w)       (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6))
+#define SHUF_NOOP               RSW(0,1,2,3)
+#define GET_SHUF(swz, idx)      (((swz) >> ((idx)*2)) & 0x3)
+void mmx_emms( struct x86_function *p );
+void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
+                  unsigned char shuf );
+void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src,
+                unsigned char cc );
+void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
+                 unsigned char shuf );
+void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
+void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_dec( struct x86_function *p, struct x86_reg reg );
+void x86_inc( struct x86_function *p, struct x86_reg reg );
+void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_mul( struct x86_function *p, struct x86_reg src );
+void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_pop( struct x86_function *p, struct x86_reg reg );
+void x86_push( struct x86_function *p, struct x86_reg reg );
+void x86_ret( struct x86_function *p );
+void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_sahf( struct x86_function *p );
+void x87_f2xm1( struct x86_function *p );
+void x87_fabs( struct x86_function *p );
+void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
+void x87_faddp( struct x86_function *p, struct x86_reg dst );
+void x87_fchs( struct x86_function *p );
+void x87_fclex( struct x86_function *p );
+void x87_fcom( struct x86_function *p, struct x86_reg dst );
+void x87_fcomp( struct x86_function *p, struct x86_reg dst );
+void x87_fcos( struct x86_function *p );
+void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
+void x87_fdivp( struct x86_function *p, struct x86_reg dst );
+void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
+void x87_fdivrp( struct x86_function *p, struct x86_reg dst );
+void x87_fild( struct x86_function *p, struct x86_reg arg );
+void x87_fist( struct x86_function *p, struct x86_reg dst );
+void x87_fistp( struct x86_function *p, struct x86_reg dst );
+void x87_fld( struct x86_function *p, struct x86_reg arg );
+void x87_fld1( struct x86_function *p );
+void x87_fldcw( struct x86_function *p, struct x86_reg arg );
+void x87_fldl2e( struct x86_function *p );
+void x87_fldln2( struct x86_function *p );
+void x87_fldz( struct x86_function *p );
+void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
+void x87_fmulp( struct x86_function *p, struct x86_reg dst );
+void x87_fnclex( struct x86_function *p );
+void x87_fprndint( struct x86_function *p );
+void x87_fscale( struct x86_function *p );
+void x87_fsin( struct x86_function *p );
+void x87_fsincos( struct x86_function *p );
+void x87_fsqrt( struct x86_function *p );
+void x87_fst( struct x86_function *p, struct x86_reg dst );
+void x87_fstp( struct x86_function *p, struct x86_reg dst );
+void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
+void x87_fsubp( struct x86_function *p, struct x86_reg dst );
+void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
+void x87_fsubrp( struct x86_function *p, struct x86_reg dst );
+void x87_fxch( struct x86_function *p, struct x86_reg dst );
+void x87_fxtract( struct x86_function *p );
+void x87_fyl2x( struct x86_function *p );
+void x87_fyl2xp1( struct x86_function *p );
+void x87_fwait( struct x86_function *p );
+void x87_fnstsw( struct x86_function *p, struct x86_reg dst );
+void x87_fucompp( struct x86_function *p );
+void x87_fucomp( struct x86_function *p, struct x86_reg arg );
+void x87_fucom( struct x86_function *p, struct x86_reg arg );
+/* Retreive a reference to one of the function arguments, taking into
+ * account any push/pop activity.  Note - doesn't track explict
+ * manipulation of ESP by other instructions.
+ */
+struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg );
+#endif
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/sse.c
 ,0 → 1,123
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/*
+ * PentiumIII-SIMD (SSE) optimizations contributed by
+ * Andre Werthmann <wertmann@cs.uni-potsdam.de>
+ */
+#include "main/glheader.h"
+#include "main/context.h"
+#include "math/m_xform.h"
+#include "tnl/t_context.h"
+#include "sse.h"
+#include "x86_xform.h"
+#ifdef DEBUG_MATH
+#include "math/m_debug.h"
+#endif
+#ifdef USE_SSE_ASM
+DECLARE_XFORM_GROUP( sse, 2 )
+DECLARE_XFORM_GROUP( sse, 3 )
+#if 1
+/* Some functions are not written in SSE-assembly, because the fpu ones are faster */
+extern void _ASMAPI _mesa_sse_transform_normals_no_rot( NORM_ARGS );
+extern void _ASMAPI _mesa_sse_transform_rescale_normals( NORM_ARGS );
+extern void _ASMAPI _mesa_sse_transform_rescale_normals_no_rot( NORM_ARGS );
+extern void _ASMAPI _mesa_sse_transform_points4_general( XFORM_ARGS );
+extern void _ASMAPI _mesa_sse_transform_points4_3d( XFORM_ARGS );
+/* XXX this function segfaults, see below */
+extern void _ASMAPI _mesa_sse_transform_points4_identity( XFORM_ARGS );
+/* XXX this one works, see below */
+extern void _ASMAPI _mesa_x86_transform_points4_identity( XFORM_ARGS );
+#else
+DECLARE_NORM_GROUP( sse )
+#endif
+extern void _ASMAPI
+_mesa_v16_sse_general_xform( GLfloat *first_vert,
+                             const GLfloat *m,
+                             const GLfloat *src,
+                             GLuint src_stride,
+                             GLuint count );
+extern void _ASMAPI
+_mesa_sse_project_vertices( GLfloat *first,
+                            GLfloat *last,
+                            const GLfloat *m,
+                            GLuint stride );
+extern void _ASMAPI
+_mesa_sse_project_clipped_vertices( GLfloat *first,
+                                    GLfloat *last,
+                                    const GLfloat *m,
+                                    GLuint stride,
+                                    const GLubyte *clipmask );
+#endif
+void _mesa_init_sse_transform_asm( void )
+{
+#ifdef USE_SSE_ASM
+   ASSIGN_XFORM_GROUP( sse, 2 );
+   ASSIGN_XFORM_GROUP( sse, 3 );
+#if 1
+   /* TODO: Finish these off.
+    */
+   _mesa_transform_tab[4][MATRIX_GENERAL] =
+      _mesa_sse_transform_points4_general;
+   _mesa_transform_tab[4][MATRIX_3D] =
+      _mesa_sse_transform_points4_3d;
+   /* XXX NOTE: _mesa_sse_transform_points4_identity segfaults with the
+      conformance tests, so use the x86 version.
+   */
+   _mesa_transform_tab[4][MATRIX_IDENTITY] =
+      _mesa_x86_transform_points4_identity;/*_mesa_sse_transform_points4_identity;*/
+   _mesa_normal_tab[NORM_TRANSFORM_NO_ROT] =
+      _mesa_sse_transform_normals_no_rot;
+   _mesa_normal_tab[NORM_TRANSFORM | NORM_RESCALE] =
+      _mesa_sse_transform_rescale_normals;
+   _mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE] =
+      _mesa_sse_transform_rescale_normals_no_rot;
+#else
+   ASSIGN_XFORM_GROUP( sse, 4 );
+   ASSIGN_NORM_GROUP( sse );
+#endif
+#ifdef DEBUG_MATH
+   _math_test_all_transform_functions( "SSE" );
+   _math_test_all_normal_transform_functions( "SSE" );
+#endif
+#endif
+}

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/sse.h
 ,0 → 1,36
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/*
+ * PentiumIII-SIMD (SSE) optimizations contributed by
+ * Andre Werthmann <wertmann@cs.uni-potsdam.de>
+ */
+#ifndef __SSE_H__
+#define __SSE_H__
+void _mesa_init_sse_transform_asm( void );
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/sse_normal.S
 ,0 → 1,261
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/** TODO:
+  * - insert PREFETCH instructions to avoid cache-misses !
+  * - some more optimizations are possible...
+  * - for 40-50% more performance in the SSE-functions, the
+  *   data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
+  */
+#ifdef USE_SSE_ASM
+#include "assyntax.h"
+#include "matypes.h"
+#include "norm_args.h"
+   SEG_TEXT
+#define M(i)    REGOFF(i * 4, EDX)
+#define S(i)    REGOFF(i * 4, ESI)
+#define D(i)    REGOFF(i * 4, EDI)
+#define STRIDE  REGOFF(12, ESI)
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_sse_transform_rescale_normals_no_rot)
+HIDDEN(_mesa_sse_transform_rescale_normals_no_rot)
+GLNAME(_mesa_sse_transform_rescale_normals_no_rot):
+#define FRAME_OFFSET 8
+        PUSH_L  ( ESI )
+        PUSH_L  ( EDI )
+        MOV_L   ( ARG_IN, ESI )                         /* ptr to source GLvector3f */
+        MOV_L   ( ARG_DEST, EDI )                       /* ptr to dest GLvector3f */
+        MOV_L   ( ARG_MAT, EDX )                        /* ptr to matrix */
+        MOV_L   ( REGOFF(MATRIX_INV, EDX), EDX)         /* matrix->inv */
+        MOV_L   ( REGOFF(V4F_COUNT, ESI), ECX )         /* source count */
+        TEST_L  ( ECX, ECX )
+        JZ( LLBL(K_G3TRNNRR_finish) )                   /* count was zero; go to finish */
+        MOV_L   ( STRIDE, EAX )                         /* stride */
+        MOV_L   ( ECX, REGOFF(V4F_COUNT, EDI) )         /* set dest-count */
+        IMUL_L( CONST(16), ECX )                        /* count *= 16 */
+        MOV_L( REGOFF(V4F_START, ESI), ESI )            /* ptr to first source vertex */
+        MOV_L( REGOFF(V4F_START, EDI), EDI )            /* ptr to first dest vertex */
+        ADD_L( EDI, ECX )                               /* count += dest ptr */
+ALIGNTEXT32
+        MOVSS   ( M(0), XMM1 )                          /* m0 */
+        MOVSS   ( M(5), XMM2 )                          /* m5 */
+        UNPCKLPS( XMM2, XMM1 )                          /* m5 | m0 */
+        MOVSS   ( ARG_SCALE, XMM0 )                     /* scale */
+        SHUFPS  ( CONST(0x0), XMM0, XMM0 )              /* scale | scale */
+        MULPS   ( XMM0, XMM1 )                          /* m5*scale | m0*scale */
+        MULSS   ( M(10), XMM0 )                         /* m10*scale */
+ALIGNTEXT32
+LLBL(K_G3TRNNRR_top):
+        MOVLPS  ( S(0), XMM2 )                          /* uy | ux */
+        MULPS   ( XMM1, XMM2 )                          /* uy*m5*scale | ux*m0*scale */
+        MOVLPS  ( XMM2, D(0) )                          /* ->D(1) | D(0) */
+        MOVSS   ( S(2), XMM2 )                          /* uz */
+        MULSS   ( XMM0, XMM2 )                          /* uz*m10*scale */
+        MOVSS   ( XMM2, D(2) )                          /* ->D(2) */
+LLBL(K_G3TRNNRR_skip):
+        ADD_L   ( CONST(16), EDI )
+        ADD_L   ( EAX, ESI )
+        CMP_L   ( ECX, EDI )
+        JNE     ( LLBL(K_G3TRNNRR_top) )
+LLBL(K_G3TRNNRR_finish):
+        POP_L   ( EDI )
+        POP_L   ( ESI )
+        RET
+#undef FRAME_OFFSET
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_sse_transform_rescale_normals)
+HIDDEN(_mesa_sse_transform_rescale_normals)
+GLNAME(_mesa_sse_transform_rescale_normals):
+#define FRAME_OFFSET 8
+        PUSH_L  ( ESI )
+        PUSH_L  ( EDI )
+        MOV_L   ( ARG_IN, ESI )                         /* ptr to source GLvector3f */
+        MOV_L   ( ARG_DEST, EDI )                       /* ptr to dest GLvector3f */
+        MOV_L   ( ARG_MAT, EDX )                        /* ptr to matrix */
+        MOV_L   ( REGOFF(MATRIX_INV, EDX), EDX)         /* matrix->inv */
+        MOV_L   ( REGOFF(V4F_COUNT, ESI), ECX )         /* source count */
+        TEST_L  ( ECX, ECX )
+        JZ( LLBL(K_G3TRNR_finish) )                     /* count was zero; go to finish */
+        MOV_L   ( STRIDE, EAX )                         /* stride */
+        MOV_L   ( ECX, REGOFF(V4F_COUNT, EDI) )         /* set dest-count */
+        IMUL_L( CONST(16), ECX )                        /* count *= 16 */
+        MOV_L( REGOFF(V4F_START, ESI), ESI )            /* ptr to first source vertex */
+        MOV_L( REGOFF(V4F_START, EDI), EDI )            /* ptr to first dest vertex */
+        ADD_L( EDI, ECX )                               /* count += dest ptr */
+ALIGNTEXT32
+        MOVSS   ( M(0), XMM0 )                          /* m0 */
+        MOVSS   ( M(4), XMM1 )                          /* m4 */
+        UNPCKLPS( XMM1, XMM0 )                          /* m4 | m0 */
+        MOVSS   ( ARG_SCALE, XMM4 )                     /* scale */
+        SHUFPS  ( CONST(0x0), XMM4, XMM4 )              /* scale | scale */
+        MULPS   ( XMM4, XMM0 )                          /* m4*scale | m0*scale */
+        MOVSS   ( M(1), XMM1 )                          /* m1 */
+        MOVSS   ( M(5), XMM2 )                          /* m5 */
+        UNPCKLPS( XMM2, XMM1 )                          /* m5 | m1 */
+        MULPS   ( XMM4, XMM1 )                          /* m5*scale | m1*scale */
+        MOVSS   ( M(2), XMM2 )                          /* m2 */
+        MOVSS   ( M(6), XMM3 )                          /* m6 */
+        UNPCKLPS( XMM3, XMM2 )                          /* m6 | m2 */
+        MULPS   ( XMM4, XMM2 )                          /* m6*scale | m2*scale */
+        MOVSS   ( M(8), XMM6 )                          /* m8 */
+        MULSS   ( ARG_SCALE, XMM6 )                     /* m8*scale */
+        MOVSS   ( M(9), XMM7 )                          /* m9 */
+        MULSS   ( ARG_SCALE, XMM7 )                     /* m9*scale */
+ALIGNTEXT32
+LLBL(K_G3TRNR_top):
+        MOVSS   ( S(0), XMM3 )                          /* ux */
+        SHUFPS  ( CONST(0x0), XMM3, XMM3 )              /* ux | ux */
+        MULPS   ( XMM0, XMM3 )                          /* ux*m4 | ux*m0 */
+        MOVSS   ( S(1), XMM4 )                          /* uy */
+        SHUFPS  ( CONST(0x0), XMM4, XMM4 )              /* uy | uy */
+        MULPS   ( XMM1, XMM4 )                          /* uy*m5 | uy*m1 */
+        MOVSS   ( S(2), XMM5 )                          /* uz */
+        SHUFPS  ( CONST(0x0), XMM5, XMM5 )              /* uz | uz */
+        MULPS   ( XMM2, XMM5 )                          /* uz*m6 | uz*m2 */
+        ADDPS   ( XMM4, XMM3 )
+        ADDPS   ( XMM5, XMM3 )
+        MOVLPS  ( XMM3, D(0) )
+        MOVSS   ( M(10), XMM3 )                         /* m10 */
+        MULSS   ( ARG_SCALE, XMM3 )                     /* m10*scale */
+        MULSS   ( S(2), XMM3 )                          /* m10*scale*uz */
+        MOVSS   ( S(1), XMM4 )                          /* uy */
+        MULSS   ( XMM7, XMM4 )                          /* uy*m9*scale */
+        MOVSS   ( S(0), XMM5 )                          /* ux */
+        MULSS   ( XMM6, XMM5 )                          /* ux*m8*scale */
+        ADDSS   ( XMM4, XMM3 )
+        ADDSS   ( XMM5, XMM3 )
+        MOVSS   ( XMM3, D(2) )
+LLBL(K_G3TRNR_skip):
+        ADD_L   ( CONST(16), EDI )
+        ADD_L   ( EAX, ESI )
+        CMP_L   ( ECX, EDI )
+        JNE     ( LLBL(K_G3TRNR_top) )
+LLBL(K_G3TRNR_finish):
+        POP_L   ( EDI )
+        POP_L   ( ESI )
+        RET
+#undef FRAME_OFFSET
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_sse_transform_normals_no_rot)
+HIDDEN(_mesa_sse_transform_normals_no_rot)
+GLNAME(_mesa_sse_transform_normals_no_rot):
+#define FRAME_OFFSET 8
+        PUSH_L  ( ESI )
+        PUSH_L  ( EDI )
+        MOV_L   ( ARG_IN, ESI )                         /* ptr to source GLvector3f */
+        MOV_L   ( ARG_DEST, EDI )                       /* ptr to dest GLvector3f */
+        MOV_L   ( ARG_MAT, EDX )                        /* ptr to matrix */
+        MOV_L   ( REGOFF(MATRIX_INV, EDX), EDX)         /* matrix->inv */
+        MOV_L   ( REGOFF(V4F_COUNT, ESI), ECX )         /* source count */
+        TEST_L  ( ECX, ECX )
+        JZ( LLBL(K_G3TNNRR_finish) )                    /* count was zero; go to finish */
+        MOV_L   ( STRIDE, EAX )                         /* stride */
+        MOV_L   ( ECX, REGOFF(V4F_COUNT, EDI) )         /* set dest-count */
+        IMUL_L( CONST(16), ECX )                        /* count *= 16 */
+        MOV_L( REGOFF(V4F_START, ESI), ESI )            /* ptr to first source vertex */
+        MOV_L( REGOFF(V4F_START, EDI), EDI )            /* ptr to first dest vertex */
+        ADD_L( EDI, ECX )                               /* count += dest ptr */
+ALIGNTEXT32
+        MOVSS( M(0), XMM0 )                             /* m0 */
+        MOVSS( M(5), XMM1 )                             /* m5 */
+        UNPCKLPS( XMM1, XMM0 )                          /* m5 | m0 */
+        MOVSS( M(10), XMM1 )                            /* m10 */
+ALIGNTEXT32
+LLBL(K_G3TNNRR_top):
+        MOVLPS( S(0), XMM2 )                            /* uy | ux */
+        MULPS( XMM0, XMM2 )                             /* uy*m5 | ux*m0 */
+        MOVLPS( XMM2, D(0) )
+        MOVSS( S(2), XMM2 )                             /* uz */
+        MULSS( XMM1, XMM2 )                             /* uz*m10 */
+        MOVSS( XMM2, D(2) )
+LLBL(K_G3TNNRR_skip):
+        ADD_L   ( CONST(16), EDI )
+        ADD_L   ( EAX, ESI )
+        CMP_L   ( ECX, EDI )
+        JNE     ( LLBL(K_G3TNNRR_top) )
+LLBL(K_G3TNNRR_finish):
+        POP_L   ( EDI )
+        POP_L   ( ESI )
+        RET
+#undef FRAME_OFFSET
+#endif
+#if defined (__ELF__) && defined (__linux__)
+        .section .note.GNU-stack,"",%progbits
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/sse_xform1.S
 ,0 → 1,446
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/** TODO:
+  * - insert PREFETCH instructions to avoid cache-misses !
+  * - some more optimizations are possible...
+  * - for 40-50% more performance in the SSE-functions, the
+  *   data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
+  */
+#ifdef USE_SSE_ASM
+#include "assyntax.h"
+#include "matypes.h"
+#include "xform_args.h"
+   SEG_TEXT
+#define S(i)    REGOFF(i * 4, ESI)
+#define D(i)    REGOFF(i * 4, EDI)
+#define M(i)    REGOFF(i * 4, EDX)
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points1_general)
+HIDDEN( _mesa_sse_transform_points1_general )
+GLNAME( _mesa_sse_transform_points1_general ):
+#define FRAME_OFFSET 8
+    PUSH_L    ( ESI )
+    PUSH_L    ( EDI )
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )  /* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )    /* ptr to dest GLvector4f */
+    MOV_L( ARG_MATRIX, EDX )                    /* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX )        /* source count */
+    CMP_L( CONST(0), ECX )                      /* count == 0 ? */
+    JE( LLBL(K_GTP1GR_finish) )                 /* yes -> nothing to do. */
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )       /* stride */
+    OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )   /* set dest flags */
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )        /* set dest count */
+    MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )    /* set dest size */
+    SHL_L( CONST(4), ECX )                      /* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI )        /* ptr to first source vertex */
+    MOV_L( REGOFF(V4F_START, EDI), EDI )        /* ptr to first dest vertex */
+    ADD_L( EDI, ECX )                           /* count += dest ptr */
+ALIGNTEXT32
+    MOVAPS( M(0), XMM0 )                        /* m3  | m2  | m1  | m0  */
+    MOVAPS( M(12), XMM1 )                       /* m15 | m14 | m13 | m12 */
+ALIGNTEXT32
+LLBL(K_GTP1GR_top):
+    MOVSS( S(0), XMM2 )                         /* ox */
+    SHUFPS( CONST(0x0), XMM2, XMM2 )            /* ox | ox | ox | ox */
+    MULPS( XMM0, XMM2 )                         /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
+    ADDPS( XMM1, XMM2 )                         /* + | + | + | + */
+    MOVUPS( XMM2, D(0) )
+LLBL(K_GTP1GR_skip):
+    ADD_L     ( CONST(16), EDI )
+    ADD_L     ( EAX, ESI )
+    CMP_L     ( ECX, EDI )
+    JNE       ( LLBL(K_GTP1GR_top) )
+LLBL(K_GTP1GR_finish):
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+#undef FRAME_OFFSET
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points1_identity)
+HIDDEN(_mesa_sse_transform_points1_identity)
+GLNAME( _mesa_sse_transform_points1_identity ):
+#define FRAME_OFFSET 8
+    PUSH_L    ( ESI )
+    PUSH_L    ( EDI )
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )  /* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )    /* ptr to dest GLvector4f */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX )        /* source count */
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTP1IR_finish) )                 /* count was zero; go to finish */
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )       /* stride */
+    OR_L( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, EDI) )   /* set dest flags */
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )        /* set dest count */
+    MOV_L( CONST(1), REGOFF(V4F_SIZE, EDI) )    /* set dest size */
+    SHL_L( CONST(4), ECX )                      /* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI )        /* ptr to first source vertex */
+    MOV_L( REGOFF(V4F_START, EDI), EDI )        /* ptr to first dest vertex */
+    ADD_L( EDI, ECX )                           /* count += dest ptr */
+    CMP_L( ESI, EDI )
+    JE( LLBL(K_GTP1IR_finish) )
+ALIGNTEXT32
+LLBL(K_GTP1IR_top):
+    MOV_L( S(0), EDX )
+    MOV_L( EDX, D(0) )
+LLBL(K_GTP1IR_skip):
+    ADD_L     ( CONST(16), EDI )
+    ADD_L     ( EAX, ESI )
+    CMP_L     ( ECX, EDI )
+    JNE       ( LLBL(K_GTP1IR_top) )
+LLBL(K_GTP1IR_finish):
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+#undef FRAME_OFFSET
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points1_3d_no_rot)
+HIDDEN(_mesa_sse_transform_points1_3d_no_rot)
+GLNAME(_mesa_sse_transform_points1_3d_no_rot):
+#define FRAME_OFFSET 8
+    PUSH_L( ESI )
+    PUSH_L( EDI )
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )  /* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )    /* ptr to dest GLvector4f */
+    MOV_L( ARG_MATRIX, EDX )                    /* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX )        /* source count */
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTP13DNRR_finish) )              /* count was zero; go to finish */
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )       /* stride */
+    OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )   /* set dest flags */
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )        /* set dest count */
+    MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )    /* set dest size */
+    SHL_L( CONST(4), ECX )                      /* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI )        /* ptr to first source vertex */
+    MOV_L( REGOFF(V4F_START, EDI), EDI )        /* ptr to first dest vertex */
+    ADD_L( EDI, ECX )                           /* count += dest ptr */
+ALIGNTEXT32
+    MOVSS( M(0), XMM0 )                         /* m0 */
+    MOVSS( M(12), XMM1 )                        /* m12 */
+    MOVSS( M(13), XMM2 )                        /* m13 */
+    MOVSS( M(14), XMM3 )                        /* m14 */
+ALIGNTEXT32
+LLBL(K_GTP13DNRR_top):
+    MOVSS( S(0), XMM4 )                         /* ox */
+    MULSS( XMM0, XMM4 )                         /* ox*m0 */
+    ADDSS( XMM1, XMM4 )                         /* ox*m0+m12 */
+    MOVSS( XMM4, D(0) )
+    MOVSS( XMM2, D(1) )
+    MOVSS( XMM3, D(2) )
+LLBL(K_GTP13DNRR_skip):
+    ADD_L    ( CONST(16), EDI )
+    ADD_L    ( EAX, ESI )
+    CMP_L    ( ECX, EDI )
+    JNE      ( LLBL(K_GTP13DNRR_top) )
+LLBL(K_GTP13DNRR_finish):
+    POP_L    ( EDI )
+    POP_L    ( ESI )
+    RET
+#undef FRAME_OFFSET
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points1_perspective)
+HIDDEN(_mesa_sse_transform_points1_perspective)
+GLNAME(_mesa_sse_transform_points1_perspective):
+#define FRAME_OFFSET 8
+    PUSH_L   ( ESI )
+    PUSH_L   ( EDI )
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )  /* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )    /* ptr to dest GLvector4f */
+    MOV_L( ARG_MATRIX, EDX )                    /* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX )        /* source count */
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTP13PR_finish) )                /* count was zero; go to finish */
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )       /* stride */
+    OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )   /* set dest flags */
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )        /* set dest count */
+    MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )    /* set dest size */
+    SHL_L( CONST(4), ECX )                      /* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI )        /* ptr to first source vertex */
+    MOV_L( REGOFF(V4F_START, EDI), EDI )        /* ptr to first dest vertex */
+    ADD_L( EDI, ECX )                           /* count += dest ptr */
+ALIGNTEXT32
+    XORPS( XMM0, XMM0 )                         /* 0 | 0 | 0 | 0 */
+    MOVSS( M(0), XMM1 )                         /* m0 */
+    MOVSS( M(14), XMM2 )                        /* m14 */
+ALIGNTEXT32
+LLBL(K_GTP13PR_top):
+    MOVSS( S(0), XMM3 )                         /* ox */
+    MULSS( XMM1, XMM3 )                         /* ox*m0 */
+    MOVSS( XMM3, D(0) )                         /* ox*m0->D(0) */
+    MOVSS( XMM2, D(2) )                         /* m14->D(2) */
+    MOVSS( XMM0, D(1) )
+    MOVSS( XMM0, D(3) )
+LLBL(K_GTP13PR_skip):
+    ADD_L( CONST(16), EDI )
+    ADD_L( EAX, ESI )
+    CMP_L( ECX, EDI )
+    JNE( LLBL(K_GTP13PR_top) )
+LLBL(K_GTP13PR_finish):
+    POP_L    ( EDI )
+    POP_L    ( ESI )
+    RET
+#undef FRAME_OFFSET
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points1_2d)
+HIDDEN(_mesa_sse_transform_points1_2d)
+GLNAME(_mesa_sse_transform_points1_2d):
+#define FRAME_OFFSET 8
+    PUSH_L( ESI )
+    PUSH_L( EDI )
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )  /* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )    /* ptr to dest GLvector4f */
+    MOV_L( ARG_MATRIX, EDX )                    /* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX )        /* source count */
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTP13P2DR_finish) )              /* count was zero; go to finish */
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )       /* stride */
+    OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )   /* set dest flags */
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )        /* set dest count */
+    MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )    /* set dest size */
+    SHL_L( CONST(4), ECX )                      /* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI )        /* ptr to first source vertex */
+    MOV_L( REGOFF(V4F_START, EDI), EDI )        /* ptr to first dest vertex */
+    ADD_L( EDI, ECX )                           /* count += dest ptr */
+ALIGNTEXT32
+    MOVLPS( M(0), XMM0 )                        /* m1  | m0  */
+    MOVLPS( M(12), XMM1 )                       /* m13 | m12 */
+ALIGNTEXT32
+LLBL(K_GTP13P2DR_top):
+    MOVSS( S(0), XMM2 )                         /* ox */
+    SHUFPS( CONST(0x0), XMM2, XMM2 )            /* ox | ox | ox | ox */
+    MULPS( XMM0, XMM2 )                         /* - | - | ox*m1 | ox*m0 */
+    ADDPS( XMM1, XMM2 )                         /* - | - | ox*m1+m13 | ox*m0+m12 */
+    MOVLPS( XMM2, D(0) )
+LLBL(K_GTP13P2DR_skip):
+    ADD_L    ( CONST(16), EDI )
+    ADD_L    ( EAX, ESI )
+    CMP_L    ( ECX, EDI )
+    JNE      ( LLBL(K_GTP13P2DR_top) )
+LLBL(K_GTP13P2DR_finish):
+    POP_L    ( EDI )
+    POP_L    ( ESI )
+    RET
+#undef FRAME_OFFSET
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points1_2d_no_rot)
+HIDDEN(_mesa_sse_transform_points1_2d_no_rot)
+GLNAME(_mesa_sse_transform_points1_2d_no_rot):
+#define FRAME_OFFSET 8
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )      /* ptr to source GLvector4f */
+        MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )        /* ptr to dest GLvector4f */
+        MOV_L( ARG_MATRIX, EDX )                /* ptr to matrix */
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )    /* source count */
+        TEST_L( ECX, ECX)
+        JZ( LLBL(K_GTP13P2DNRR_finish) )        /* count was zero; go to finish */
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )   /* stride */
+        OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )       /* set dest flags */
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )    /* set dest count */
+        MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )        /* set dest size */
+        SHL_L( CONST(4), ECX )                  /* count *= 16 */
+        MOV_L( REGOFF(V4F_START, ESI), ESI )    /* ptr to first source vertex */
+        MOV_L( REGOFF(V4F_START, EDI), EDI )    /* ptr to first dest vertex */
+        ADD_L( EDI, ECX )                       /* count += dest ptr */
+ALIGNTEXT32
+        MOVSS( M(0), XMM0 )                     /* m0 */
+        MOVSS( M(12), XMM1 )                    /* m12 */
+        MOVSS( M(13), XMM2 )                    /* m13 */
+ALIGNTEXT32
+LLBL(K_GTP13P2DNRR_top):
+        MOVSS( S(0), XMM3 )                     /* ox */
+        MULSS( XMM0, XMM3 )                     /* ox*m0 */
+        ADDSS( XMM1, XMM3 )                     /* ox*m0+m12 */
+        MOVSS( XMM3, D(0) )
+        MOVSS( XMM2, D(1) )
+LLBL(K_GTP13P2DNRR_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(K_GTP13P2DNRR_top) )
+LLBL(K_GTP13P2DNRR_finish):
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points1_3d)
+HIDDEN(_mesa_sse_transform_points1_3d)
+GLNAME(_mesa_sse_transform_points1_3d):
+#define FRAME_OFFSET 8
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )      /* ptr to source GLvector4f */
+        MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )        /* ptr to dest GLvector4f */
+        MOV_L( ARG_MATRIX, EDX )                /* ptr to matrix */
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )    /* source count */
+        TEST_L( ECX, ECX)
+        JZ( LLBL(K_GTP13P3DR_finish) )  /* count was zero; go to finish */
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )   /* stride */
+        OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )       /* set dest flags */
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )    /* set dest count */
+        MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )        /* set dest size */
+        SHL_L( CONST(4), ECX )                  /* count *= 16 */
+        MOV_L( REGOFF(V4F_START, ESI), ESI )    /* ptr to first source vertex */
+        MOV_L( REGOFF(V4F_START, EDI), EDI )    /* ptr to first dest vertex */
+        ADD_L( EDI, ECX )                       /* count += dest ptr */
+ALIGNTEXT32
+        MOVAPS( M(0), XMM0 )                    /* m3  | m2  | m1  |  m0 */
+        MOVAPS( M(12), XMM1 )                   /* m15 | m14 | m13 | m12 */
+ALIGNTEXT32
+LLBL(K_GTP13P3DR_top):
+        MOVSS( S(0), XMM2 )                     /* ox */
+        SHUFPS( CONST(0x0), XMM2, XMM2 )        /* ox | ox | ox | ox */
+        MULPS( XMM0, XMM2 )                     /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
+        ADDPS( XMM1, XMM2 )                     /* +m15  | +m14  | +m13  | +m12  */
+        MOVLPS( XMM2, D(0) )                    /*   -   |   -   | ->D(1)| ->D(0)*/
+        UNPCKHPS( XMM2, XMM2 )                  /* ox*m3+m15 | ox*m3+m15 | ox*m2+m14 | ox*m2+m14 */
+        MOVSS( XMM2, D(2) )
+LLBL(K_GTP13P3DR_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(K_GTP13P3DR_top) )
+LLBL(K_GTP13P3DR_finish):
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+#endif
+#if defined (__ELF__) && defined (__linux__)
+        .section .note.GNU-stack,"",%progbits
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/sse_xform2.S
 ,0 → 1,466
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/** TODO:
+  * - insert PREFETCH instructions to avoid cache-misses !
+  * - some more optimizations are possible...
+  * - for 40-50% more performance in the SSE-functions, the
+  *   data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
+  */
+#ifdef USE_SSE_ASM
+#include "assyntax.h"
+#include "matypes.h"
+#include "xform_args.h"
+   SEG_TEXT
+#define S(i)    REGOFF(i * 4, ESI)
+#define D(i)    REGOFF(i * 4, EDI)
+#define M(i)    REGOFF(i * 4, EDX)
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points2_general)
+HIDDEN (_mesa_sse_transform_points2_general)
+GLNAME( _mesa_sse_transform_points2_general ):
+#define FRAME_OFFSET 8
+    PUSH_L    ( ESI )
+    PUSH_L    ( EDI )
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )  /* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )    /* ptr to dest GLvector4f */
+    MOV_L( ARG_MATRIX, EDX )                    /* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX )        /* source count */
+    TEST_L( ECX, ECX )
+    JZ( LLBL(K_GTP2GR_finish) )                 /* count was zero; go to finish */
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )       /* stride */
+    OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )   /* set dest flags */
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )        /* set dest count */
+    MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )    /* set dest size */
+    SHL_L( CONST(4), ECX )                      /* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI )        /* ptr to first source vertex */
+    MOV_L( REGOFF(V4F_START, EDI), EDI )        /* ptr to first dest vertex */
+    ADD_L( EDI, ECX )                           /* count += dest ptr */
+ALIGNTEXT32
+    MOVAPS( M(0), XMM0 )                        /* m3  | m2  | m1  | m0 */
+    MOVAPS( M(4), XMM1 )                        /* m7  | m6  | m5  | m4 */
+    MOVAPS( M(12), XMM2 )                       /* m15 | m14 | m13 | m12 */
+ALIGNTEXT32
+LLBL(K_GTP2GR_top):
+    MOVSS( S(0), XMM3 )                         /* ox */
+    SHUFPS( CONST(0x0), XMM3, XMM3 )            /* ox | ox | ox | ox */
+    MULPS( XMM0, XMM3 )                         /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
+    MOVSS( S(1), XMM4 )                         /* oy */
+    SHUFPS( CONST(0x0), XMM4, XMM4 )            /* oy | oy | oy | oy */
+    MULPS( XMM1, XMM4 )                         /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
+    ADDPS( XMM4, XMM3 )
+    ADDPS( XMM2, XMM3 )
+    MOVAPS( XMM3, D(0) )
+LLBL(K_GTP2GR_skip):
+    ADD_L     ( CONST(16), EDI )
+    ADD_L     ( EAX, ESI )
+    CMP_L     ( ECX, EDI )
+    JNE       ( LLBL(K_GTP2GR_top) )
+LLBL(K_GTP2GR_finish):
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+#undef FRAME_OFFSET
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points2_identity)
+HIDDEN(_mesa_sse_transform_points2_identity)
+GLNAME( _mesa_sse_transform_points2_identity ):
+#define FRAME_OFFSET 8
+    PUSH_L    ( ESI )
+    PUSH_L    ( EDI )
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )    /* ptr to dest GLvector4f */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX )        /* source count */
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTP2IR_finish) )                 /* count was zero; go to finish */
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )       /* stride */
+    OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )   /* set dest flags */
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )        /* set dest count */
+    MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )    /* set dest size */
+    SHL_L( CONST(4), ECX )                      /* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI )        /* ptr to first source vertex */
+    MOV_L( REGOFF(V4F_START, EDI), EDI )        /* ptr to first dest vertex */
+    ADD_L( EDI, ECX )                           /* count += dest ptr */
+    CMP_L( ESI, EDI )
+    JE( LLBL(K_GTP2IR_finish) )
+ALIGNTEXT32
+LLBL(K_GTP2IR_top):
+    MOV_L     ( S(0), EDX )
+    MOV_L     ( EDX, D(0) )
+    MOV_L     ( S(1), EDX )
+    MOV_L     ( EDX, D(1) )
+LLBL(K_GTP2IR_skip):
+    ADD_L     ( CONST(16), EDI )
+    ADD_L     ( EAX, ESI )
+    CMP_L     ( ECX, EDI )
+    JNE       ( LLBL(K_GTP2IR_top) )
+LLBL(K_GTP2IR_finish):
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+#undef FRAME_OFFSET
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points2_3d_no_rot)
+HIDDEN(_mesa_sse_transform_points2_3d_no_rot)
+GLNAME(_mesa_sse_transform_points2_3d_no_rot):
+#define FRAME_OFFSET 8
+    PUSH_L( ESI )
+    PUSH_L( EDI )
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )  /* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )    /* ptr to dest GLvector4f */
+    MOV_L( ARG_MATRIX, EDX )                    /* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX )        /* source count */
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTP23DNRR_finish) )              /* count was zero; go to finish */
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )       /* stride */
+    OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )   /* set dest flags */
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )        /* set dest count */
+    MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )    /* set dest size */
+    SHL_L( CONST(4), ECX )                      /* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI )        /* ptr to first source vertex */
+    MOV_L( REGOFF(V4F_START, EDI), EDI )        /* ptr to first dest vertex */
+    ADD_L( EDI, ECX )                           /* count += dest ptr */
+    XORPS( XMM0, XMM0 )                         /* clean the working register */
+ALIGNTEXT32
+    MOVSS    ( M(0), XMM1 )                     /* - | - |  -  | m0  */
+    MOVSS    ( M(5), XMM2 )                     /* - | - |  -  | m5  */
+    UNPCKLPS ( XMM2, XMM1 )                     /* - | - | m5  | m0  */
+    MOVLPS   ( M(12), XMM2 )                    /* - | - | m13 | m12 */
+    MOVSS    ( M(14), XMM3 )                    /* - | - |  -  | m14 */
+ALIGNTEXT32
+LLBL(K_GTP23DNRR_top):
+    MOVLPS   ( S(0), XMM0 )                     /* - | - |  oy   | ox */
+    MULPS    ( XMM1, XMM0 )                     /* - | - | oy*m5 | ox*m0 */
+    ADDPS    ( XMM2, XMM0 )                     /* - | - | +m13  | +m12 */
+    MOVLPS   ( XMM0, D(0) )                     /* -> D(1) | -> D(0) */
+    MOVSS    ( XMM3, D(2) )                     /* -> D(2) */
+LLBL(K_GTP23DNRR_skip):
+    ADD_L    ( CONST(16), EDI )
+    ADD_L    ( EAX, ESI )
+    CMP_L    ( ECX, EDI )
+    JNE      ( LLBL(K_GTP23DNRR_top) )
+LLBL(K_GTP23DNRR_finish):
+    POP_L    ( EDI )
+    POP_L    ( ESI )
+    RET
+#undef FRAME_OFFSET
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points2_perspective)
+HIDDEN(_mesa_sse_transform_points2_perspective)
+GLNAME(_mesa_sse_transform_points2_perspective):
+#define FRAME_OFFSET 8
+    PUSH_L   ( ESI )
+    PUSH_L   ( EDI )
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )  /* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )    /* ptr to dest GLvector4f */
+    MOV_L( ARG_MATRIX, EDX )                    /* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX )        /* source count */
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTP23PR_finish) )                /* count was zero; go to finish */
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )       /* stride */
+    OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )   /* set dest flags */
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )        /* set dest count */
+    MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )    /* set dest size */
+    SHL_L( CONST(4), ECX )                      /* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI )        /* ptr to first source vertex */
+    MOV_L( REGOFF(V4F_START, EDI), EDI )        /* ptr to first dest vertex */
+    ADD_L( EDI, ECX )                           /* count += dest ptr */
+ALIGNTEXT32
+    MOVSS    ( M(0), XMM1 )                     /* -  | -  |  -  | m0  */
+    MOVSS    ( M(5), XMM2 )                     /* -  | -  |  -  | m5  */
+    UNPCKLPS ( XMM2, XMM1 )                     /* -  | -  | m5  | m0  */
+    MOVSS    ( M(14), XMM3 )                    /* m14 */
+    XORPS    ( XMM0, XMM0 )                     /* 0 | 0 | 0 | 0 */
+ALIGNTEXT32
+LLBL(K_GTP23PR_top):
+    MOVLPS( S(0), XMM4 )                        /* oy | ox */
+    MULPS( XMM1, XMM4 )                         /* oy*m5 | ox*m0 */
+    MOVLPS( XMM4, D(0) )                        /* ->D(1) | ->D(0) */
+    MOVSS( XMM3, D(2) )                         /* ->D(2) */
+    MOVSS( XMM0, D(3) )                         /* ->D(3) */
+LLBL(K_GTP23PR_skip):
+    ADD_L( CONST(16), EDI )
+    ADD_L( EAX, ESI )
+    CMP_L( ECX, EDI )
+    JNE( LLBL(K_GTP23PR_top) )
+LLBL(K_GTP23PR_finish):
+    POP_L    ( EDI )
+    POP_L    ( ESI )
+    RET
+#undef FRAME_OFFSET
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points2_2d)
+HIDDEN(_mesa_sse_transform_points2_2d)
+GLNAME(_mesa_sse_transform_points2_2d):
+#define FRAME_OFFSET 8
+    PUSH_L( ESI )
+    PUSH_L( EDI )
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )  /* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )    /* ptr to dest GLvector4f */
+    MOV_L( ARG_MATRIX, EDX )                    /* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX )        /* source count */
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTP23P2DR_finish) )              /* count was zero; go to finish */
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )       /* stride */
+    OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )   /* set dest flags */
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )        /* set dest count */
+    MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )    /* set dest size */
+    SHL_L( CONST(4), ECX )                      /* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI )        /* ptr to first source vertex */
+    MOV_L( REGOFF(V4F_START, EDI), EDI )        /* ptr to first dest vertex */
+    ADD_L( EDI, ECX )                           /* count += dest ptr */
+ALIGNTEXT32
+    MOVLPS( M(0), XMM0 )                        /* m1  | m0 */
+    MOVLPS( M(4), XMM1 )                        /* m5  | m4 */
+    MOVLPS( M(12), XMM2 )                       /* m13 | m12 */
+ALIGNTEXT32
+LLBL(K_GTP23P2DR_top):
+    MOVSS( S(0), XMM3 )                         /* ox */
+    SHUFPS( CONST(0x0), XMM3, XMM3 )            /* ox | ox */
+    MULPS( XMM0, XMM3 )                         /* ox*m1 | ox*m0 */
+    MOVSS( S(1), XMM4 )                         /* oy */
+    SHUFPS( CONST(0x0), XMM4, XMM4 )            /* oy | oy */
+    MULPS( XMM1, XMM4 )                         /* oy*m5 | oy*m4 */
+    ADDPS( XMM4, XMM3 )
+    ADDPS( XMM2, XMM3 )
+    MOVLPS( XMM3, D(0) )                        /* ->D(1) | ->D(0) */
+LLBL(K_GTP23P2DR_skip):
+    ADD_L    ( CONST(16), EDI )
+    ADD_L    ( EAX, ESI )
+    CMP_L    ( ECX, EDI )
+    JNE      ( LLBL(K_GTP23P2DR_top) )
+LLBL(K_GTP23P2DR_finish):
+    POP_L    ( EDI )
+    POP_L    ( ESI )
+    RET
+#undef FRAME_OFFSET
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points2_2d_no_rot)
+HIDDEN(_mesa_sse_transform_points2_2d_no_rot)
+GLNAME(_mesa_sse_transform_points2_2d_no_rot):
+#define FRAME_OFFSET 8
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )      /* ptr to source GLvector4f */
+        MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )        /* ptr to dest GLvector4f */
+        MOV_L( ARG_MATRIX, EDX )                /* ptr to matrix */
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )    /* source count */
+        TEST_L( ECX, ECX)
+        JZ( LLBL(K_GTP23P2DNRR_finish) )        /* count was zero; go to finish */
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )   /* stride */
+        OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )       /* set dest flags */
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )    /* set dest count */
+        MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )        /* set dest size */
+        SHL_L( CONST(4), ECX )                  /* count *= 16 */
+        MOV_L( REGOFF(V4F_START, ESI), ESI )    /* ptr to first source vertex */
+        MOV_L( REGOFF(V4F_START, EDI), EDI )    /* ptr to first dest vertex */
+        ADD_L( EDI, ECX )                       /* count += dest ptr */
+ALIGNTEXT32
+        MOVSS    ( M(0), XMM1 )                 /* m0 */
+        MOVSS    ( M(5), XMM2 )                 /* m5 */
+        UNPCKLPS ( XMM2, XMM1 )                 /* m5 | m0 */
+        MOVLPS   ( M(12), XMM2 )                /* m13 | m12 */
+ALIGNTEXT32
+LLBL(K_GTP23P2DNRR_top):
+        MOVLPS( S(0), XMM0 )                    /* oy | ox */
+        MULPS( XMM1, XMM0 )                     /* oy*m5 | ox*m0 */
+        ADDPS( XMM2, XMM0 )                     /* +m13 | +m12 */
+        MOVLPS( XMM0, D(0) )                    /* ->D(1) | ->D(0) */
+LLBL(K_GTP23P2DNRR_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(K_GTP23P2DNRR_top) )
+LLBL(K_GTP23P2DNRR_finish):
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points2_3d)
+HIDDEN(_mesa_sse_transform_points2_3d)
+GLNAME(_mesa_sse_transform_points2_3d):
+#define FRAME_OFFSET 8
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )      /* ptr to source GLvector4f */
+        MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )        /* ptr to dest GLvector4f */
+        MOV_L( ARG_MATRIX, EDX )                /* ptr to matrix */
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )    /* source count */
+        TEST_L( ECX, ECX)
+        JZ( LLBL(K_GTP23P3DR_finish) )  /* count was zero; go to finish */
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )   /* stride */
+        OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )       /* set dest flags */
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )    /* set dest count */
+        MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )        /* set dest size */
+        SHL_L( CONST(4), ECX )                  /* count *= 16 */
+        MOV_L( REGOFF(V4F_START, ESI), ESI )    /* ptr to first source vertex */
+        MOV_L( REGOFF(V4F_START, EDI), EDI )    /* ptr to first dest vertex */
+        ADD_L( EDI, ECX )                       /* count += dest ptr */
+ALIGNTEXT32
+        MOVAPS( M(0), XMM0 )                    /* m2  | m1  | m0 */
+        MOVAPS( M(4), XMM1 )                    /* m6  | m5  | m4 */
+        MOVAPS( M(12), XMM2 )                   /* m14 | m13 | m12 */
+ALIGNTEXT32
+LLBL(K_GTP23P3DR_top):
+        MOVSS( S(0), XMM3 )                     /* ox */
+        SHUFPS( CONST(0x0), XMM3, XMM3 )        /* ox | ox | ox */
+        MULPS( XMM0, XMM3 )                     /* ox*m2 | ox*m1 | ox*m0 */
+        MOVSS( S(1), XMM4 )                     /* oy */
+        SHUFPS( CONST(0x0), XMM4, XMM4 )        /* oy | oy | oy */
+        MULPS( XMM1, XMM4 )                     /* oy*m6 | oy*m5 | oy*m4 */
+        ADDPS( XMM4, XMM3 )
+        ADDPS( XMM2, XMM3 )
+        MOVLPS( XMM3, D(0) )                    /* ->D(1) | ->D(0) */
+        UNPCKHPS( XMM3, XMM3 )
+        MOVSS( XMM3, D(2) )                     /* ->D(2) */
+LLBL(K_GTP23P3DR_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(K_GTP23P3DR_top) )
+LLBL(K_GTP23P3DR_finish):
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+#endif
+#if defined (__ELF__) && defined (__linux__)
+        .section .note.GNU-stack,"",%progbits
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/sse_xform3.S
 ,0 → 1,512
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/** TODO:
+  * - insert PREFETCH instructions to avoid cache-misses !
+  * - some more optimizations are possible...
+  * - for 40-50% more performance in the SSE-functions, the
+  *   data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
+  */
+#ifdef USE_SSE_ASM
+#include "assyntax.h"
+#include "matypes.h"
+#include "xform_args.h"
+   SEG_TEXT
+#define S(i)    REGOFF(i * 4, ESI)
+#define D(i)    REGOFF(i * 4, EDI)
+#define M(i)    REGOFF(i * 4, EDX)
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points3_general)
+HIDDEN(_mesa_sse_transform_points3_general)
+GLNAME( _mesa_sse_transform_points3_general ):
+#define FRAME_OFFSET 8
+    PUSH_L    ( ESI )
+    PUSH_L    ( EDI )
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )  /* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )    /* ptr to dest GLvector4f */
+    MOV_L( ARG_MATRIX, EDX )                    /* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX )        /* source count */
+    CMP_L     ( CONST(0), ECX )                 /* count == 0 ? */
+    JE        ( LLBL(K_GTPGR_finish) )          /* yes -> nothing to do. */
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )       /* stride */
+    OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )   /* set dest flags */
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )        /* set dest count */
+    MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )    /* set dest size */
+    SHL_L( CONST(4), ECX )                      /* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI )        /* ptr to first source vertex */
+    MOV_L( REGOFF(V4F_START, EDI), EDI )        /* ptr to first dest vertex */
+    ADD_L( EDI, ECX )                           /* count += dest ptr */
+ALIGNTEXT32
+    MOVAPS    ( REGOFF(0, EDX), XMM0 )  /* m0  | m1  | m2  | m3 */
+    MOVAPS    ( REGOFF(16, EDX), XMM1 ) /* m4  | m5  | m6  | m7 */
+    MOVAPS    ( REGOFF(32, EDX), XMM2 ) /* m8  | m9  | m10 | m11 */
+    MOVAPS    ( REGOFF(48, EDX), XMM3 ) /* m12 | m13 | m14 | m15 */
+ALIGNTEXT32
+LLBL(K_GTPGR_top):
+    MOVSS     ( REGOFF(0, ESI), XMM4 )          /*    |    |    | ox */
+    SHUFPS    ( CONST(0x0), XMM4, XMM4 )        /* ox | ox | ox | ox */
+    MOVSS     ( REGOFF(4, ESI), XMM5 )          /*    |    |    | oy */
+    SHUFPS    ( CONST(0x0), XMM5, XMM5 )        /* oy | oy | oy | oy */
+    MOVSS     ( REGOFF(8, ESI), XMM6 )          /*    |    |    | oz */
+    SHUFPS    ( CONST(0x0), XMM6, XMM6 )        /* oz | oz | oz | oz */
+    MULPS     ( XMM0, XMM4 )            /* m3*ox  | m2*ox  | m1*ox | m0*ox */
+    MULPS     ( XMM1, XMM5 )            /* m7*oy  | m6*oy  | m5*oy | m4*oy */
+    MULPS     ( XMM2, XMM6 )            /* m11*oz | m10*oz | m9*oz | m8*oz */
+    ADDPS     ( XMM5, XMM4 )
+    ADDPS     ( XMM6, XMM4 )
+    ADDPS     ( XMM3, XMM4 )
+    MOVAPS    ( XMM4, REGOFF(0, EDI) )
+LLBL(K_GTPGR_skip):
+    ADD_L     ( CONST(16), EDI )
+    ADD_L     ( EAX, ESI )
+    CMP_L     ( ECX, EDI )
+    JNE       ( LLBL(K_GTPGR_top) )
+LLBL(K_GTPGR_finish):
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+#undef FRAME_OFFSET
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points3_identity)
+HIDDEN(_mesa_sse_transform_points3_identity)
+GLNAME( _mesa_sse_transform_points3_identity ):
+#define FRAME_OFFSET 8
+    PUSH_L    ( ESI )
+    PUSH_L    ( EDI )
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )  /* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )    /* ptr to dest GLvector4f */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX )        /* source count */
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTPIR_finish) )                  /* count was zero; go to finish */
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )       /* stride */
+    OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )   /* set dest flags */
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )        /* set dest count */
+    MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )    /* set dest size */
+    SHL_L( CONST(4), ECX )                      /* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI )        /* ptr to first source vertex */
+    MOV_L( REGOFF(V4F_START, EDI), EDI )        /* ptr to first dest vertex */
+    ADD_L( EDI, ECX )                           /* count += dest ptr */
+    CMP_L( ESI, EDI )
+    JE( LLBL(K_GTPIR_finish) )
+ALIGNTEXT32
+LLBL(K_GTPIR_top):
+    MOVLPS    ( S(0), XMM0 )
+    MOVLPS    ( XMM0, D(0) )
+    MOVSS     ( S(2), XMM0 )
+    MOVSS     ( XMM0, D(2) )
+LLBL(K_GTPIR_skip):
+    ADD_L     ( CONST(16), EDI )
+    ADD_L     ( EAX, ESI )
+    CMP_L     ( ECX, EDI )
+    JNE       ( LLBL(K_GTPIR_top) )
+LLBL(K_GTPIR_finish):
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+#undef FRAME_OFFSET
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points3_3d_no_rot)
+HIDDEN(_mesa_sse_transform_points3_3d_no_rot)
+GLNAME(_mesa_sse_transform_points3_3d_no_rot):
+#define FRAME_OFFSET 8
+    PUSH_L( ESI )
+    PUSH_L( EDI )
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )  /* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )    /* ptr to dest GLvector4f */
+    MOV_L( ARG_MATRIX, EDX )                    /* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX )        /* source count */
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTP3DNRR_finish) )               /* count was zero; go to finish */
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )       /* stride */
+    OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )   /* set dest flags */
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )        /* set dest count */
+    MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )    /* set dest size */
+    SHL_L( CONST(4), ECX )                      /* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI )        /* ptr to first source vertex */
+    MOV_L( REGOFF(V4F_START, EDI), EDI )        /* ptr to first dest vertex */
+    ADD_L( EDI, ECX )                           /* count += dest ptr */
+    XORPS( XMM0, XMM0 )                         /* clean the working register */
+ALIGNTEXT32
+    MOVSS    ( M(0), XMM1 )                     /* - | - |  -  | m0  */
+    MOVSS    ( M(5), XMM2 )                     /* - | - |  -  | m5  */
+    UNPCKLPS ( XMM2, XMM1 )                     /* - | - | m5  | m0  */
+    MOVLPS   ( M(12), XMM2 )                    /* - | - | m13 | m12 */
+    MOVSS    ( M(10), XMM3 )                    /* - | - |  -  | m10 */
+    MOVSS    ( M(14), XMM4 )                    /* - | - |  -  | m14 */
+ALIGNTEXT32
+LLBL(K_GTP3DNRR_top):
+    MOVLPS   ( S(0), XMM0 )                     /* - | - |  s1   | s0 */
+    MULPS    ( XMM1, XMM0 )                     /* - | - | s1*m5 | s0*m0 */
+    ADDPS    ( XMM2, XMM0 )                     /* - | - | +m13  | +m12 */
+    MOVLPS   ( XMM0, D(0) )                     /* -> D(1) | -> D(0) */
+    MOVSS    ( S(2), XMM0 )                     /* sz */
+    MULSS    ( XMM3, XMM0 )                     /* sz*m10 */
+    ADDSS    ( XMM4, XMM0 )                     /* +m14 */
+    MOVSS    ( XMM0, D(2) )                     /* -> D(2) */
+LLBL(K_GTP3DNRR_skip):
+    ADD_L    ( CONST(16), EDI )
+    ADD_L    ( EAX, ESI )
+    CMP_L    ( ECX, EDI )
+    JNE      ( LLBL(K_GTP3DNRR_top) )
+LLBL(K_GTP3DNRR_finish):
+    POP_L    ( EDI )
+    POP_L    ( ESI )
+    RET
+#undef FRAME_OFFSET
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points3_perspective)
+HIDDEN(_mesa_sse_transform_points3_perspective)
+GLNAME(_mesa_sse_transform_points3_perspective):
+#define FRAME_OFFSET 8
+    PUSH_L   ( ESI )
+    PUSH_L   ( EDI )
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )  /* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )    /* ptr to dest GLvector4f */
+    MOV_L( ARG_MATRIX, EDX )                    /* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX )        /* source count */
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTP3PR_finish) )                 /* count was zero; go to finish */
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )       /* stride */
+    OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )   /* set dest flags */
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )        /* set dest count */
+    MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )    /* set dest size */
+    SHL_L( CONST(4), ECX )                      /* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI )        /* ptr to first source vertex */
+    MOV_L( REGOFF(V4F_START, EDI), EDI )        /* ptr to first dest vertex */
+    ADD_L( EDI, ECX )                           /* count += dest ptr */
+ALIGNTEXT32
+    MOVSS    ( M(0), XMM1 )                     /* -  | -  |  -  | m0  */
+    MOVSS    ( M(5), XMM2 )                     /* -  | -  |  -  | m5  */
+    UNPCKLPS ( XMM2, XMM1 )                     /* -  | -  | m5  | m0  */
+    MOVLPS   ( M(8), XMM2 )                     /* -  | -  | m9  | m8  */
+    MOVSS    ( M(10), XMM3 )                    /* m10 */
+    MOVSS    ( M(14), XMM4 )                    /* m14 */
+    XORPS    ( XMM6, XMM6 )                     /* 0 */
+ALIGNTEXT32
+LLBL(K_GTP3PR_top):
+    MOVLPS   ( S(0), XMM0 )                     /* oy | ox */
+    MULPS    ( XMM1, XMM0 )                     /* oy*m5 | ox*m0 */
+    MOVSS    ( S(2), XMM5 )                     /* oz */
+    SHUFPS   ( CONST(0x0), XMM5, XMM5 )         /* oz | oz */
+    MULPS    ( XMM2, XMM5 )                     /* oz*m9 | oz*m8 */
+    ADDPS    ( XMM5, XMM0 )                     /* +oy*m5 | +ox*m0 */
+    MOVLPS   ( XMM0, D(0) )                     /* ->D(1) | ->D(0) */
+    MOVSS    ( S(2), XMM0 )                     /* oz */
+    MULSS    ( XMM3, XMM0 )                     /* oz*m10 */
+    ADDSS    ( XMM4, XMM0 )                     /* +m14 */
+    MOVSS    ( XMM0, D(2) )                     /* ->D(2) */
+    MOVSS    ( S(2), XMM0 )                     /* oz */
+    MOVSS    ( XMM6, XMM5 )                     /* 0 */
+    SUBPS    ( XMM0, XMM5 )                     /* -oz */
+    MOVSS    ( XMM5, D(3) )                     /* ->D(3) */
+LLBL(K_GTP3PR_skip):
+    ADD_L( CONST(16), EDI )
+    ADD_L( EAX, ESI )
+    CMP_L( ECX, EDI )
+    JNE( LLBL(K_GTP3PR_top) )
+LLBL(K_GTP3PR_finish):
+    POP_L    ( EDI )
+    POP_L    ( ESI )
+    RET
+#undef FRAME_OFFSET
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points3_2d)
+HIDDEN(_mesa_sse_transform_points3_2d)
+GLNAME(_mesa_sse_transform_points3_2d):
+#define FRAME_OFFSET 8
+    PUSH_L( ESI )
+    PUSH_L( EDI )
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )  /* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )    /* ptr to dest GLvector4f */
+    MOV_L( ARG_MATRIX, EDX )                    /* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX )        /* source count */
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTP3P2DR_finish) )               /* count was zero; go to finish */
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )       /* stride */
+    OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )   /* set dest flags */
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )        /* set dest count */
+    MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )    /* set dest size */
+    SHL_L( CONST(4), ECX )                      /* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI )        /* ptr to first source vertex */
+    MOV_L( REGOFF(V4F_START, EDI), EDI )        /* ptr to first dest vertex */
+    ADD_L( EDI, ECX )                           /* count += dest ptr */
+ALIGNTEXT32
+    MOVLPS( M(0), XMM0 )                        /* m1  | m0 */
+    MOVLPS( M(4), XMM1 )                        /* m5  | m4 */
+    MOVLPS( M(12), XMM2 )                       /* m13 | m12 */
+ALIGNTEXT32
+LLBL(K_GTP3P2DR_top):
+    MOVSS    ( S(0), XMM3 )                     /* ox */
+    SHUFPS   ( CONST(0x0), XMM3, XMM3 )         /* ox | ox */
+    MULPS    ( XMM0, XMM3 )                     /* ox*m1 | ox*m0 */
+    MOVSS    ( S(1), XMM4 )                     /* oy */
+    SHUFPS   ( CONST(0x0), XMM4, XMM4 )         /* oy | oy */
+    MULPS    ( XMM1, XMM4 )                     /* oy*m5 | oy*m4 */
+    ADDPS    ( XMM4, XMM3 )
+    ADDPS    ( XMM2, XMM3 )
+    MOVLPS   ( XMM3, D(0) )
+    MOVSS    ( S(2), XMM3 )
+    MOVSS    ( XMM3, D(2) )
+LLBL(K_GTP3P2DR_skip):
+    ADD_L    ( CONST(16), EDI )
+    ADD_L    ( EAX, ESI )
+    CMP_L    ( ECX, EDI )
+    JNE      ( LLBL(K_GTP3P2DR_top) )
+LLBL(K_GTP3P2DR_finish):
+    POP_L    ( EDI )
+    POP_L    ( ESI )
+    RET
+#undef FRAME_OFFSET
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points3_2d_no_rot)
+HIDDEN(_mesa_sse_transform_points3_2d_no_rot)
+GLNAME(_mesa_sse_transform_points3_2d_no_rot):
+#define FRAME_OFFSET 8
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )      /* ptr to source GLvector4f */
+        MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )        /* ptr to dest GLvector4f */
+        MOV_L( ARG_MATRIX, EDX )                /* ptr to matrix */
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )    /* source count */
+        TEST_L( ECX, ECX)
+        JZ( LLBL(K_GTP3P2DNRR_finish) )         /* count was zero; go to finish */
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )   /* stride */
+        OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )       /* set dest flags */
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )    /* set dest count */
+        MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )        /* set dest size */
+        SHL_L( CONST(4), ECX )                  /* count *= 16 */
+        MOV_L( REGOFF(V4F_START, ESI), ESI )    /* ptr to first source vertex */
+        MOV_L( REGOFF(V4F_START, EDI), EDI )    /* ptr to first dest vertex */
+        ADD_L( EDI, ECX )                       /* count += dest ptr */
+ALIGNTEXT32
+        MOVSS    ( M(0), XMM1 )                 /* m0 */
+        MOVSS    ( M(5), XMM2 )                 /* m5 */
+        UNPCKLPS ( XMM2, XMM1 )                 /* m5 | m0 */
+        MOVLPS   ( M(12), XMM2 )                /* m13 | m12 */
+ALIGNTEXT32
+LLBL(K_GTP3P2DNRR_top):
+        MOVLPS( S(0), XMM0 )                    /* oy | ox */
+        MULPS( XMM1, XMM0 )                     /* oy*m5 | ox*m0 */
+        ADDPS( XMM2, XMM0 )                     /* +m13 | +m12 */
+        MOVLPS( XMM0, D(0) )                    /* ->D(1) | ->D(0) */
+        MOVSS( S(2), XMM0 )
+        MOVSS( XMM0, D(2) )
+LLBL(K_GTP3P2DNRR_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(K_GTP3P2DNRR_top) )
+LLBL(K_GTP3P2DNRR_finish):
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points3_3d)
+HIDDEN(_mesa_sse_transform_points3_3d)
+GLNAME(_mesa_sse_transform_points3_3d):
+#define FRAME_OFFSET 8
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )      /* ptr to source GLvector4f */
+        MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )        /* ptr to dest GLvector4f */
+        MOV_L( ARG_MATRIX, EDX )                /* ptr to matrix */
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )    /* source count */
+        TEST_L( ECX, ECX)
+        JZ( LLBL(K_GTP3P3DR_finish) )   /* count was zero; go to finish */
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )   /* stride */
+        OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )       /* set dest flags */
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )    /* set dest count */
+        MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )        /* set dest size */
+        SHL_L( CONST(4), ECX )                  /* count *= 16 */
+        MOV_L( REGOFF(V4F_START, ESI), ESI )    /* ptr to first source vertex */
+        MOV_L( REGOFF(V4F_START, EDI), EDI )    /* ptr to first dest vertex */
+        ADD_L( EDI, ECX )                       /* count += dest ptr */
+ALIGNTEXT32
+        MOVAPS( M(0), XMM0 )                    /* m2  | m1  | m0 */
+        MOVAPS( M(4), XMM1 )                    /* m6  | m5  | m4 */
+        MOVAPS( M(8), XMM2 )                    /* m10 | m9  | m8 */
+        MOVAPS( M(12), XMM3 )                   /* m14 | m13 | m12 */
+ALIGNTEXT32
+LLBL(K_GTP3P3DR_top):
+        MOVSS( S(0), XMM4 )
+        SHUFPS( CONST(0x0), XMM4, XMM4 )        /* ox | ox | ox */
+        MULPS( XMM0, XMM4 )                     /* ox*m2 | ox*m1 | ox*m0 */
+        MOVSS( S(1), XMM5 )
+        SHUFPS( CONST(0x0), XMM5, XMM5 )        /* oy | oy | oy */
+        MULPS( XMM1, XMM5 )                     /* oy*m6 | oy*m5 | oy*m4 */
+        MOVSS( S(2), XMM6 )
+        SHUFPS( CONST(0x0), XMM6, XMM6 )        /* oz | oz | oz */
+        MULPS( XMM2, XMM6 )                     /* oz*m10 | oz*m9 | oz*m8 */
+        ADDPS( XMM5, XMM4 )                     /* + | + | + */
+        ADDPS( XMM6, XMM4 )                     /* + | + | + */
+        ADDPS( XMM3, XMM4 )                     /* + | + | + */
+        MOVLPS( XMM4, D(0) )                    /* => D(1) | => D(0) */
+        UNPCKHPS( XMM4, XMM4 )
+        MOVSS( XMM4, D(2) )
+LLBL(K_GTP3P3DR_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(K_GTP3P3DR_top) )
+LLBL(K_GTP3P3DR_finish):
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+#endif
+#if defined (__ELF__) && defined (__linux__)
+        .section .note.GNU-stack,"",%progbits
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/sse_xform4.S
 ,0 → 1,235
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifdef USE_SSE_ASM
+#include "assyntax.h"
+#include "matypes.h"
+#include "xform_args.h"
+        SEG_TEXT
+#define FRAME_OFFSET    8
+#define SRC(i)          REGOFF(i * 4, ESI)
+#define DST(i)          REGOFF(i * 4, EDI)
+#define MAT(i)          REGOFF(i * 4, EDX)
+#define SELECT(r0, r1, r2, r3)  CONST( r0 * 64 + r1 * 16 + r2 * 4 + r3 )
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_sse_transform_points4_general )
+HIDDEN(_mesa_sse_transform_points4_general)
+GLNAME( _mesa_sse_transform_points4_general ):
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_MATRIX, EDX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        TEST_L( ECX, ECX )                      /* verify non-zero count */
+        JE( LLBL( sse_general_done ) )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )   /* stride */
+        OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )    /* set dest count */
+        MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )/* set dest size */
+        MOV_L( REGOFF(V4F_START, ESI), ESI )    /* ptr to first source vertex */
+        MOV_L( REGOFF(V4F_START, EDI), EDI )    /* ptr to first dest vertex */
+        PREFETCHT0( REGIND(ESI) )
+        MOVAPS( MAT(0), XMM4 )                  /* m3  | m2  | m1  | m0  */
+        MOVAPS( MAT(4), XMM5 )                  /* m7  | m6  | m5  | m4  */
+        MOVAPS( MAT(8), XMM6 )                  /* m11 | m10 | m9  | m8  */
+        MOVAPS( MAT(12), XMM7 )                 /* m15 | m14 | m13 | m12 */
+ALIGNTEXT16
+LLBL( sse_general_loop ):
+        MOVSS( SRC(0), XMM0 )                   /* ox */
+        SHUFPS( CONST(0x0), XMM0, XMM0 )        /* ox | ox | ox | ox */
+        MULPS( XMM4, XMM0 )                     /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
+        MOVSS( SRC(1), XMM1 )                   /* oy */
+        SHUFPS( CONST(0x0), XMM1, XMM1 )        /* oy | oy | oy | oy */
+        MULPS( XMM5, XMM1 )                     /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
+        MOVSS( SRC(2), XMM2 )                   /* oz */
+        SHUFPS( CONST(0x0), XMM2, XMM2 )        /* oz | oz | oz | oz */
+        MULPS( XMM6, XMM2 )                     /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */
+        MOVSS( SRC(3), XMM3 )                   /* ow */
+        SHUFPS( CONST(0x0), XMM3, XMM3 )        /* ow | ow | ow | ow */
+        MULPS( XMM7, XMM3 )                     /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
+        ADDPS( XMM1, XMM0 )                     /* ox*m3+oy*m7 | ... */
+        ADDPS( XMM2, XMM0 )                     /* ox*m3+oy*m7+oz*m11 | ... */
+        ADDPS( XMM3, XMM0 )                     /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
+        MOVAPS( XMM0, DST(0) )                  /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        DEC_L( ECX )
+        JNZ( LLBL( sse_general_loop ) )
+LLBL( sse_general_done ):
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+ALIGNTEXT4
+GLOBL GLNAME( _mesa_sse_transform_points4_3d )
+HIDDEN(_mesa_sse_transform_points4_3d)
+GLNAME( _mesa_sse_transform_points4_3d ):
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        MOV_L( ARG_SOURCE, ESI )                /* ptr to source GLvector4f */
+        MOV_L( ARG_DEST, EDI )                  /* ptr to dest GLvector4f */
+        MOV_L( ARG_MATRIX, EDX )                /* ptr to matrix */
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )    /* source count */
+        TEST_L( ECX, ECX)
+        JZ( LLBL(K_GTP43P3DR_finish) )          /* count was zero; go to finish */
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )   /* stride */
+        OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )    /* set dest count */
+        MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )/* set dest size */
+        SHL_L( CONST(4), ECX )                  /* count *= 16 */
+        MOV_L( REGOFF(V4F_START, ESI), ESI )    /* ptr to first source vertex */
+        MOV_L( REGOFF(V4F_START, EDI), EDI )    /* ptr to first dest vertex */
+        ADD_L( EDI, ECX )                       /* count += dest ptr */
+        MOVAPS( MAT(0), XMM0 )                  /* m3  | m2  | m1  |  m0 */
+        MOVAPS( MAT(4), XMM1 )                  /* m7  | m6  | m5  |  m4 */
+        MOVAPS( MAT(8), XMM2 )                  /* m11 | m10 | m9  |  m8 */
+        MOVAPS( MAT(12), XMM3 )                 /* m15 | m14 | m13 | m12 */
+ALIGNTEXT32
+LLBL( K_GTP43P3DR_top ):
+        MOVSS( SRC(0), XMM4 )                   /* ox */
+        SHUFPS( CONST(0x0), XMM4, XMM4 )        /* ox | ox | ox | ox */
+        MULPS( XMM0, XMM4 )                     /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
+        MOVSS( SRC(1), XMM5 )                   /* oy */
+        SHUFPS( CONST(0x0), XMM5, XMM5 )        /* oy | oy | oy | oy */
+        MULPS( XMM1, XMM5 )                     /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
+        MOVSS( SRC(2), XMM6 )                   /* oz */
+        SHUFPS( CONST(0x0), XMM6, XMM6 )        /* oz | oz | oz | oz */
+        MULPS( XMM2, XMM6 )                     /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */
+        MOVSS( SRC(3), XMM7 )                   /* ow */
+        SHUFPS( CONST(0x0), XMM7, XMM7 )        /* ow | ow | ow | ow */
+        MULPS( XMM3, XMM7 )                     /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
+        ADDPS( XMM5, XMM4 )                     /* ox*m3+oy*m7 | ... */
+        ADDPS( XMM6, XMM4 )                     /* ox*m3+oy*m7+oz*m11 | ... */
+        ADDPS( XMM7, XMM4 )                     /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
+        MOVAPS( XMM4, DST(0) )                  /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
+        MOVSS( SRC(3), XMM4 )                   /* ow */
+        MOVSS( XMM4, DST(3) )                   /* ->D(3) */
+LLBL( K_GTP43P3DR_skip ):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(K_GTP43P3DR_top) )
+LLBL( K_GTP43P3DR_finish ):
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_sse_transform_points4_identity )
+HIDDEN(_mesa_sse_transform_points4_identity)
+GLNAME( _mesa_sse_transform_points4_identity ):
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_MATRIX, EDX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        TEST_L( ECX, ECX )                      /* verify non-zero count */
+        JE( LLBL( sse_identity_done ) )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )   /* stride */
+        OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )    /* set dest count */
+        MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )/* set dest size */
+        MOV_L( REGOFF(V4F_START, ESI), ESI )    /* ptr to first source vertex */
+        MOV_L( REGOFF(V4F_START, EDI), EDI )    /* ptr to first dest vertex */
+ALIGNTEXT16
+LLBL( sse_identity_loop ):
+        PREFETCHNTA( REGOFF(32, ESI) )
+        MOVAPS( REGIND(ESI), XMM0 )
+        ADD_L( EAX, ESI )
+        MOVAPS( XMM0, REGIND(EDI) )
+        ADD_L( CONST(16), EDI )
+        DEC_L( ECX )
+        JNZ( LLBL( sse_identity_loop ) )
+LLBL( sse_identity_done ):
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#endif
+#if defined (__ELF__) && defined (__linux__)
+        .section .note.GNU-stack,"",%progbits
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/x86_cliptest.S
 ,0 → 1,407
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/*
+ * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
+ * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
+ * in there will break the build on some platforms.
+ */
+#include "assyntax.h"
+#include "matypes.h"
+#include "clip_args.h"
+#define SRC0            REGOFF(0, ESI)
+#define SRC1            REGOFF(4, ESI)
+#define SRC2            REGOFF(8, ESI)
+#define SRC3            REGOFF(12, ESI)
+#define DST0            REGOFF(0, EDI)
+#define DST1            REGOFF(4, EDI)
+#define DST2            REGOFF(8, EDI)
+#define DST3            REGOFF(12, EDI)
+#define MAT0            REGOFF(0, EDX)
+#define MAT1            REGOFF(4, EDX)
+#define MAT2            REGOFF(8, EDX)
+#define MAT3            REGOFF(12, EDX)
+/*
+ * Table for clip test.
+ *
+ *      bit6 = SRC3 < 0
+ *      bit5 = SRC2 < 0
+ *      bit4 = abs(S(2)) > abs(S(3))
+ *      bit3 = SRC1 < 0
+ *      bit2 = abs(S(1)) > abs(S(3))
+ *      bit1 = SRC0 < 0
+ *      bit0 = abs(S(0)) > abs(S(3))
+ */
+        SEG_DATA
+clip_table:
+        D_BYTE 0x00, 0x01, 0x00, 0x02, 0x04, 0x05, 0x04, 0x06
+        D_BYTE 0x00, 0x01, 0x00, 0x02, 0x08, 0x09, 0x08, 0x0a
+        D_BYTE 0x20, 0x21, 0x20, 0x22, 0x24, 0x25, 0x24, 0x26
+        D_BYTE 0x20, 0x21, 0x20, 0x22, 0x28, 0x29, 0x28, 0x2a
+        D_BYTE 0x00, 0x01, 0x00, 0x02, 0x04, 0x05, 0x04, 0x06
+        D_BYTE 0x00, 0x01, 0x00, 0x02, 0x08, 0x09, 0x08, 0x0a
+        D_BYTE 0x10, 0x11, 0x10, 0x12, 0x14, 0x15, 0x14, 0x16
+        D_BYTE 0x10, 0x11, 0x10, 0x12, 0x18, 0x19, 0x18, 0x1a
+        D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x37, 0x35, 0x37, 0x36
+        D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x3b, 0x39, 0x3b, 0x3a
+        D_BYTE 0x2f, 0x2d, 0x2f, 0x2e, 0x27, 0x25, 0x27, 0x26
+        D_BYTE 0x2f, 0x2d, 0x2f, 0x2e, 0x2b, 0x29, 0x2b, 0x2a
+        D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x37, 0x35, 0x37, 0x36
+        D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x3b, 0x39, 0x3b, 0x3a
+        D_BYTE 0x1f, 0x1d, 0x1f, 0x1e, 0x17, 0x15, 0x17, 0x16
+        D_BYTE 0x1f, 0x1d, 0x1f, 0x1e, 0x1b, 0x19, 0x1b, 0x1a
+        SEG_TEXT
+/*
+ * _mesa_x86_cliptest_points4
+ *
+ *   AL:  ormask
+ *   AH:  andmask
+ *   EBX: temp0
+ *   ECX: temp1
+ *   EDX: clipmask[]
+ *   ESI: clip[]
+ *   EDI: proj[]
+ *   EBP: temp2
+ */
+#if defined(__ELF__) && defined(__PIC__) && defined(GNU_ASSEMBLER) && !defined(ELFPIC)
+#define ELFPIC
+#endif
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_cliptest_points4 )
+HIDDEN(_mesa_x86_cliptest_points4)
+GLNAME( _mesa_x86_cliptest_points4 ):
+#ifdef ELFPIC
+#define FRAME_OFFSET 20
+#else
+#define FRAME_OFFSET 16
+#endif
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        PUSH_L( EBP )
+        PUSH_L( EBX )
+#ifdef ELFPIC
+        /* store pointer to clip_table on stack */
+        CALL( LLBL(ctp4_get_eip) )
+        ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX )
+        MOV_L( REGOFF(clip_table@GOT, EBX), EBX )
+        PUSH_L( EBX )
+        JMP( LLBL(ctp4_clip_table_ready) )
+LLBL(ctp4_get_eip):
+        /* store eip in ebx */
+        MOV_L( REGIND(ESP), EBX )
+        RET
+LLBL(ctp4_clip_table_ready):
+#endif
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_CLIP, EDX )
+        MOV_L( ARG_OR, EBX )
+        MOV_L( ARG_AND, EBP )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        MOV_L( REGOFF(V4F_START, ESI), ESI )
+        OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+        MOV_L( EAX, ARG_SOURCE )        /* put stride in ARG_SOURCE */
+        MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+        MOV_L( REGOFF(V4F_START, EDI), EDI )
+        ADD_L( EDX, ECX )
+        MOV_L( ECX, ARG_CLIP )          /* put clipmask + count in ARG_CLIP */
+        CMP_L( ECX, EDX )
+        MOV_B( REGIND(EBX), AL )
+        MOV_B( REGIND(EBP), AH )
+        JZ( LLBL(ctp4_finish) )
+ALIGNTEXT16
+LLBL(ctp4_top):
+        FLD1                            /* F3 */
+        FDIV_S( SRC3 )          /* GH: don't care about div-by-zero */
+        MOV_L( SRC3, EBP )
+        MOV_L( SRC2, EBX )
+        XOR_L( ECX, ECX )
+        ADD_L( EBP, EBP )       /* ebp = abs(S(3))*2 ; carry = sign of S(3) */
+        ADC_L( ECX, ECX )
+        ADD_L( EBX, EBX )       /* ebx = abs(S(2))*2 ; carry = sign of S(2) */
+        ADC_L( ECX, ECX )
+        CMP_L( EBX, EBP )       /* carry = abs(S(2))*2 > abs(S(3))*2 */
+        ADC_L( ECX, ECX )
+        MOV_L( SRC1, EBX )
+        ADD_L( EBX, EBX )       /* ebx = abs(S(1))*2 ; carry = sign of S(1) */
+        ADC_L( ECX, ECX )
+        CMP_L( EBX, EBP )       /* carry = abs(S(1))*2 > abs(S(3))*2 */
+        ADC_L( ECX, ECX )
+        MOV_L( SRC0, EBX )
+        ADD_L( EBX, EBX )       /* ebx = abs(S(0))*2 ; carry = sign of S(0) */
+        ADC_L( ECX, ECX )
+        CMP_L( EBX, EBP )       /* carry = abs(S(0))*2 > abs(S(3))*2 */
+        ADC_L( ECX, ECX )
+#ifdef ELFPIC
+        MOV_L( REGIND(ESP), EBP )       /* clip_table */
+        MOV_B( REGBI(EBP, ECX), CL )
+#else
+        MOV_B( REGOFF(clip_table,ECX), CL )
+#endif
+        OR_B( CL, AL )
+        AND_B( CL, AH )
+        TEST_B( CL, CL )
+        MOV_B( CL, REGIND(EDX) )
+        JZ( LLBL(ctp4_proj) )
+LLBL(ctp4_noproj):
+        FSTP( ST(0) )                   /* */
+        MOV_L( CONST(0), DST0 )
+        MOV_L( CONST(0), DST1 )
+        MOV_L( CONST(0), DST2 )
+        MOV_L( CONST(0x3f800000), DST3 )
+        JMP( LLBL(ctp4_next) )
+LLBL(ctp4_proj):
+        FLD_S( SRC0 )                   /* F0 F3 */
+        FMUL2( ST(1), ST0 )
+        FLD_S( SRC1 )                   /* F1 F0 F3 */
+        FMUL2( ST(2), ST0 )
+        FLD_S( SRC2 )                   /* F2 F1 F0 F3 */
+        FMUL2( ST(3), ST0 )
+        FXCH( ST(2) )                   /* F0 F1 F2 F3 */
+        FSTP_S( DST0 )          /* F1 F2 F3 */
+        FSTP_S( DST1 )          /* F2 F3 */
+        FSTP_S( DST2 )          /* F3 */
+        FSTP_S( DST3 )          /* */
+LLBL(ctp4_next):
+        INC_L( EDX )
+        ADD_L( CONST(16), EDI )
+        ADD_L( ARG_SOURCE, ESI )
+        CMP_L( EDX, ARG_CLIP )
+        JNZ( LLBL(ctp4_top) )
+        MOV_L( ARG_OR, ECX )
+        MOV_L( ARG_AND, EDX )
+        MOV_B( AL, REGIND(ECX) )
+        MOV_B( AH, REGIND(EDX) )
+LLBL(ctp4_finish):
+        MOV_L( ARG_DEST, EAX )
+#ifdef ELFPIC
+        POP_L( ESI )                    /* discard ptr to clip_table */
+#endif
+        POP_L( EBX )
+        POP_L( EBP )
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_cliptest_points4_np )
+HIDDEN(_mesa_x86_cliptest_points4_np)
+GLNAME( _mesa_x86_cliptest_points4_np ):
+#ifdef ELFPIC
+#define FRAME_OFFSET 20
+#else
+#define FRAME_OFFSET 16
+#endif
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        PUSH_L( EBP )
+        PUSH_L( EBX )
+#ifdef ELFPIC
+        /* store pointer to clip_table on stack */
+        CALL( LLBL(ctp4_np_get_eip) )
+        ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX )
+        MOV_L( REGOFF(clip_table@GOT, EBX), EBX )
+        PUSH_L( EBX )
+        JMP( LLBL(ctp4_np_clip_table_ready) )
+LLBL(ctp4_np_get_eip):
+        /* store eip in ebx */
+        MOV_L( REGIND(ESP), EBX )
+        RET
+LLBL(ctp4_np_clip_table_ready):
+#endif
+        MOV_L( ARG_SOURCE, ESI )
+        /* slot */
+        MOV_L( ARG_CLIP, EDX )
+        MOV_L( ARG_OR, EBX )
+        MOV_L( ARG_AND, EBP )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        MOV_L( REGOFF(V4F_START, ESI), ESI )
+        MOV_L( EAX, ARG_DEST )          /* put stride in ARG_DEST */
+        ADD_L( EDX, ECX )
+        MOV_L( ECX, EDI )               /* put clipmask + count in EDI */
+        CMP_L( ECX, EDX )
+        MOV_B( REGIND(EBX), AL )
+        MOV_B( REGIND(EBP), AH )
+        JZ( LLBL(ctp4_np_finish) )
+ALIGNTEXT16
+LLBL(ctp4_np_top):
+        MOV_L( SRC3, EBP )
+        MOV_L( SRC2, EBX )
+        XOR_L( ECX, ECX )
+        ADD_L( EBP, EBP )       /* ebp = abs(S(3))*2 ; carry = sign of S(3) */
+        ADC_L( ECX, ECX )
+        ADD_L( EBX, EBX )       /* ebx = abs(S(2))*2 ; carry = sign of S(2) */
+        ADC_L( ECX, ECX )
+        CMP_L( EBX, EBP )       /* carry = abs(S(2))*2 > abs(S(3))*2 */
+        ADC_L( ECX, ECX )
+        MOV_L( SRC1, EBX )
+        ADD_L( EBX, EBX )       /* ebx = abs(S(1))*2 ; carry = sign of S(1) */
+        ADC_L( ECX, ECX )
+        CMP_L( EBX, EBP )       /* carry = abs(S(1))*2 > abs(S(3))*2 */
+        ADC_L( ECX, ECX )
+        MOV_L( SRC0, EBX )
+        ADD_L( EBX, EBX )       /* ebx = abs(S(0))*2 ; carry = sign of S(0) */
+        ADC_L( ECX, ECX )
+        CMP_L( EBX, EBP )       /* carry = abs(S(0))*2 > abs(S(3))*2 */
+        ADC_L( ECX, ECX )
+#ifdef ELFPIC
+        MOV_L( REGIND(ESP), EBP )       /* clip_table */
+        MOV_B( REGBI(EBP, ECX), CL )
+#else
+        MOV_B( REGOFF(clip_table,ECX), CL )
+#endif
+        OR_B( CL, AL )
+        AND_B( CL, AH )
+        TEST_B( CL, CL )
+        MOV_B( CL, REGIND(EDX) )
+        INC_L( EDX )
+        /* slot */
+        ADD_L( ARG_DEST, ESI )
+        CMP_L( EDX, EDI )
+        JNZ( LLBL(ctp4_np_top) )
+        MOV_L( ARG_OR, ECX )
+        MOV_L( ARG_AND, EDX )
+        MOV_B( AL, REGIND(ECX) )
+        MOV_B( AH, REGIND(EDX) )
+LLBL(ctp4_np_finish):
+        MOV_L( ARG_SOURCE, EAX )
+#ifdef ELFPIC
+        POP_L( ESI )                    /* discard ptr to clip_table */
+#endif
+        POP_L( EBX )
+        POP_L( EBP )
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#if defined (__ELF__) && defined (__linux__)
+        .section .note.GNU-stack,"",%progbits
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/x86_xform.c
 ,0 → 1,122
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/*
+ * Intel x86 assembly code by Josh Vanderhoof
+ */
+#include "main/glheader.h"
+#include "main/context.h"
+#include "math/m_xform.h"
+#include "x86_xform.h"
+#include "common_x86_asm.h"
+#ifdef USE_X86_ASM
+#ifdef USE_3DNOW_ASM
+#include "3dnow.h"
+#endif
+#ifdef USE_SSE_ASM
+#include "sse.h"
+#endif
+#endif
+#ifdef DEBUG_MATH
+#include "math/m_debug.h"
+#endif
+#ifdef USE_X86_ASM
+DECLARE_XFORM_GROUP( x86, 2 )
+DECLARE_XFORM_GROUP( x86, 3 )
+DECLARE_XFORM_GROUP( x86, 4 )
+extern GLvector4f * _ASMAPI
+_mesa_x86_cliptest_points4( GLvector4f *clip_vec,
+                            GLvector4f *proj_vec,
+                            GLubyte clipMask[],
+                            GLubyte *orMask,
+                            GLubyte *andMask,
+                            GLboolean viewport_z_clip );
+extern GLvector4f * _ASMAPI
+_mesa_x86_cliptest_points4_np( GLvector4f *clip_vec,
+                               GLvector4f *proj_vec,
+                               GLubyte clipMask[],
+                               GLubyte *orMask,
+                               GLubyte *andMask,
+                               GLboolean viewport_z_clip );
+extern void _ASMAPI
+_mesa_v16_x86_cliptest_points4( GLfloat *first_vert,
+                                GLfloat *last_vert,
+                                GLubyte *or_mask,
+                                GLubyte *and_mask,
+                                GLubyte *clip_mask,
+                                GLboolean viewport_z_clip );
+extern void _ASMAPI
+_mesa_v16_x86_general_xform( GLfloat *dest,
+                             const GLfloat *m,
+                             const GLfloat *src,
+                             GLuint src_stride,
+                             GLuint count );
+#endif
+#ifdef USE_X86_ASM
+static void _mesa_init_x86_transform_asm( void )
+{
+   ASSIGN_XFORM_GROUP( x86, 2 );
+   ASSIGN_XFORM_GROUP( x86, 3 );
+   ASSIGN_XFORM_GROUP( x86, 4 );
+   _mesa_clip_tab[4] = _mesa_x86_cliptest_points4;
+   _mesa_clip_np_tab[4] = _mesa_x86_cliptest_points4_np;
+#ifdef DEBUG_MATH
+   _math_test_all_transform_functions( "x86" );
+   _math_test_all_cliptest_functions( "x86" );
+#endif
+}
+#endif
+void _mesa_init_all_x86_transform_asm( void )
+{
+   _mesa_get_x86_features();
+#ifdef USE_X86_ASM
+   if ( _mesa_x86_cpu_features ) {
+      _mesa_init_x86_transform_asm();
+   }
+   if ( cpu_has_xmm ) {
+      _mesa_init_sse_transform_asm();
+   }
+#endif
+}

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/x86_xform.h
 ,0 → 1,106
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Gareth Hughes
+ */
+#ifndef X86_XFORM_H
+#define X86_XFORM_H
+/* =============================================================
+ * Transformation function declarations:
+ */
+#define XFORM_ARGS      GLvector4f *to_vec,                             \
+                        const GLfloat m[16],                            \
+                        const GLvector4f *from_vec
+#define DECLARE_XFORM_GROUP( pfx, sz ) \
+extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_general( XFORM_ARGS );         \
+extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_identity( XFORM_ARGS );        \
+extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d_no_rot( XFORM_ARGS );       \
+extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_perspective( XFORM_ARGS );     \
+extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d( XFORM_ARGS );              \
+extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d_no_rot( XFORM_ARGS );       \
+extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d( XFORM_ARGS );
+#define ASSIGN_XFORM_GROUP( pfx, sz )                                   \
+   _mesa_transform_tab[sz][MATRIX_GENERAL] =                            \
+      _mesa_##pfx##_transform_points##sz##_general;                     \
+   _mesa_transform_tab[sz][MATRIX_IDENTITY] =                           \
+      _mesa_##pfx##_transform_points##sz##_identity;                    \
+   _mesa_transform_tab[sz][MATRIX_3D_NO_ROT] =                          \
+      _mesa_##pfx##_transform_points##sz##_3d_no_rot;                   \
+   _mesa_transform_tab[sz][MATRIX_PERSPECTIVE] =                        \
+      _mesa_##pfx##_transform_points##sz##_perspective;                 \
+   _mesa_transform_tab[sz][MATRIX_2D] =                                 \
+      _mesa_##pfx##_transform_points##sz##_2d;                          \
+   _mesa_transform_tab[sz][MATRIX_2D_NO_ROT] =                          \
+      _mesa_##pfx##_transform_points##sz##_2d_no_rot;                   \
+   _mesa_transform_tab[sz][MATRIX_3D] =                                 \
+      _mesa_##pfx##_transform_points##sz##_3d;
+/* =============================================================
+ * Normal transformation function declarations:
+ */
+#define NORM_ARGS       const GLmatrix *mat,                            \
+                        GLfloat scale,                                  \
+                        const GLvector4f *in,                           \
+                        const GLfloat *lengths,                         \
+                        GLvector4f *dest
+#define DECLARE_NORM_GROUP( pfx ) \
+extern void _ASMAPI _mesa_##pfx##_rescale_normals( NORM_ARGS );                         \
+extern void _ASMAPI _mesa_##pfx##_normalize_normals( NORM_ARGS );                       \
+extern void _ASMAPI _mesa_##pfx##_transform_normals( NORM_ARGS );                       \
+extern void _ASMAPI _mesa_##pfx##_transform_normals_no_rot( NORM_ARGS );                \
+extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals( NORM_ARGS );               \
+extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals_no_rot( NORM_ARGS );        \
+extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals( NORM_ARGS );             \
+extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals_no_rot( NORM_ARGS );
+#define ASSIGN_NORM_GROUP( pfx )                                        \
+   _mesa_normal_tab[NORM_RESCALE] =                                     \
+      _mesa_##pfx##_rescale_normals;                                    \
+   _mesa_normal_tab[NORM_NORMALIZE] =                                   \
+      _mesa_##pfx##_normalize_normals;                                  \
+   _mesa_normal_tab[NORM_TRANSFORM] =                                   \
+      _mesa_##pfx##_transform_normals;                                  \
+   _mesa_normal_tab[NORM_TRANSFORM_NO_ROT] =                            \
+      _mesa_##pfx##_transform_normals_no_rot;                           \
+   _mesa_normal_tab[NORM_TRANSFORM | NORM_RESCALE] =                    \
+      _mesa_##pfx##_transform_rescale_normals;                          \
+   _mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE] =             \
+      _mesa_##pfx##_transform_rescale_normals_no_rot;                   \
+   _mesa_normal_tab[NORM_TRANSFORM | NORM_NORMALIZE] =                  \
+      _mesa_##pfx##_transform_normalize_normals;                        \
+   _mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_NORMALIZE] =           \
+      _mesa_##pfx##_transform_normalize_normals_no_rot;
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/x86_xform2.S
 ,0 → 1,574
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/*
+ * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
+ * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
+ * in there will break the build on some platforms.
+ */
+#include "assyntax.h"
+#include "matypes.h"
+#include "xform_args.h"
+        SEG_TEXT
+#define FP_ONE          1065353216
+#define FP_ZERO         0
+#define SRC0            REGOFF(0, ESI)
+#define SRC1            REGOFF(4, ESI)
+#define SRC2            REGOFF(8, ESI)
+#define SRC3            REGOFF(12, ESI)
+#define DST0            REGOFF(0, EDI)
+#define DST1            REGOFF(4, EDI)
+#define DST2            REGOFF(8, EDI)
+#define DST3            REGOFF(12, EDI)
+#define MAT0            REGOFF(0, EDX)
+#define MAT1            REGOFF(4, EDX)
+#define MAT2            REGOFF(8, EDX)
+#define MAT3            REGOFF(12, EDX)
+#define MAT4            REGOFF(16, EDX)
+#define MAT5            REGOFF(20, EDX)
+#define MAT6            REGOFF(24, EDX)
+#define MAT7            REGOFF(28, EDX)
+#define MAT8            REGOFF(32, EDX)
+#define MAT9            REGOFF(36, EDX)
+#define MAT10           REGOFF(40, EDX)
+#define MAT11           REGOFF(44, EDX)
+#define MAT12           REGOFF(48, EDX)
+#define MAT13           REGOFF(52, EDX)
+#define MAT14           REGOFF(56, EDX)
+#define MAT15           REGOFF(60, EDX)
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points2_general )
+HIDDEN(_mesa_x86_transform_points2_general)
+GLNAME( _mesa_x86_transform_points2_general ):
+#define FRAME_OFFSET 8
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_MATRIX, EDX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        TEST_L( ECX, ECX )
+        JZ( LLBL(x86_p2_gr_done) )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+        OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+        MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+        SHL_L( CONST(4), ECX )
+        MOV_L( REGOFF(V4F_START, ESI), ESI )
+        MOV_L( REGOFF(V4F_START, EDI), EDI )
+        ADD_L( EDI, ECX )
+ALIGNTEXT16
+LLBL(x86_p2_gr_loop):
+        FLD_S( SRC0 )                   /* F4 */
+        FMUL_S( MAT0 )
+        FLD_S( SRC0 )                   /* F5 F4 */
+        FMUL_S( MAT1 )
+        FLD_S( SRC0 )                   /* F6 F5 F4 */
+        FMUL_S( MAT2 )
+        FLD_S( SRC0 )                   /* F7 F6 F5 F4 */
+        FMUL_S( MAT3 )
+        FLD_S( SRC1 )                   /* F0 F7 F6 F5 F4 */
+        FMUL_S( MAT4 )
+        FLD_S( SRC1 )                   /* F1 F0 F7 F6 F5 F4 */
+        FMUL_S( MAT5 )
+        FLD_S( SRC1 )                   /* F2 F1 F0 F7 F6 F5 F4 */
+        FMUL_S( MAT6 )
+        FLD_S( SRC1 )                   /* F3 F2 F1 F0 F7 F6 F5 F4 */
+        FMUL_S( MAT7 )
+        FXCH( ST(3) )                   /* F0 F2 F1 F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(7) )             /* F2 F1 F3 F7 F6 F5 F4 */
+        FXCH( ST(1) )                   /* F1 F2 F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(5) )             /* F2 F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(3) )             /* F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(1) )             /* F7 F6 F5 F4 */
+        FXCH( ST(3) )                   /* F4 F6 F5 F7 */
+        FADD_S( MAT12 )
+        FXCH( ST(2) )                   /* F5 F6 F4 F7 */
+        FADD_S( MAT13 )
+        FXCH( ST(1) )                   /* F6 F5 F4 F7 */
+        FADD_S( MAT14 )
+        FXCH( ST(3) )                   /* F7 F5 F4 F6 */
+        FADD_S( MAT15 )
+        FXCH( ST(2) )                   /* F4 F5 F7 F6 */
+        FSTP_S( DST0 )                  /* F5 F7 F6 */
+        FSTP_S( DST1 )                  /* F7 F6 */
+        FXCH( ST(1) )                   /* F6 F7 */
+        FSTP_S( DST2 )                  /* F7 */
+        FSTP_S( DST3 )                  /* */
+LLBL(x86_p2_gr_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(x86_p2_gr_loop) )
+LLBL(x86_p2_gr_done):
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points2_perspective )
+HIDDEN(_mesa_x86_transform_points2_perspective)
+GLNAME( _mesa_x86_transform_points2_perspective ):
+#define FRAME_OFFSET 12
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        PUSH_L( EBX )
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_MATRIX, EDX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        TEST_L( ECX, ECX )
+        JZ( LLBL(x86_p2_pr_done) )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+        OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+        MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+        SHL_L( CONST(4), ECX )
+        MOV_L( REGOFF(V4F_START, ESI), ESI )
+        MOV_L( REGOFF(V4F_START, EDI), EDI )
+        ADD_L( EDI, ECX )
+        MOV_L( MAT14, EBX )
+ALIGNTEXT16
+LLBL(x86_p2_pr_loop):
+        FLD_S( SRC0 )                   /* F4 */
+        FMUL_S( MAT0 )
+        FLD_S( SRC1 )                   /* F1 F4 */
+        FMUL_S( MAT5 )
+        FXCH( ST(1) )                   /* F4 F1 */
+        FSTP_S( DST0   )                /* F1 */
+        FSTP_S( DST1   )                /* */
+        MOV_L( EBX, DST2 )
+        MOV_L( CONST(FP_ZERO), DST3 )
+LLBL(x86_p2_pr_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(x86_p2_pr_loop) )
+LLBL(x86_p2_pr_done):
+        POP_L( EBX )
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points2_3d )
+HIDDEN(_mesa_x86_transform_points2_3d)
+GLNAME( _mesa_x86_transform_points2_3d ):
+#define FRAME_OFFSET 8
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_MATRIX, EDX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        TEST_L( ECX, ECX )
+        JZ( LLBL(x86_p2_3dr_done) )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+        OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+        MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
+        SHL_L( CONST(4), ECX )
+        MOV_L( REGOFF(V4F_START, ESI), ESI )
+        MOV_L( REGOFF(V4F_START, EDI), EDI )
+        ADD_L( EDI, ECX )
+ALIGNTEXT16
+LLBL(x86_p2_3dr_loop):
+        FLD_S( SRC0 )                   /* F4 */
+        FMUL_S( MAT0 )
+        FLD_S( SRC0 )                   /* F5 F4 */
+        FMUL_S( MAT1 )
+        FLD_S( SRC0 )                   /* F6 F5 F4 */
+        FMUL_S( MAT2 )
+        FLD_S( SRC1 )                   /* F0 F6 F5 F4 */
+        FMUL_S( MAT4 )
+        FLD_S( SRC1 )                   /* F1 F0 F6 F5 F4 */
+        FMUL_S( MAT5 )
+        FLD_S( SRC1 )                   /* F2 F1 F0 F6 F5 F4 */
+        FMUL_S( MAT6 )
+        FXCH( ST(2) )                   /* F0 F1 F2 F6 F5 F4 */
+        FADDP( ST0, ST(5) )             /* F1 F2 F6 F5 F4 */
+        FADDP( ST0, ST(3) )             /* F2 F6 F5 F4 */
+        FADDP( ST0, ST(1) )             /* F6 F5 F4 */
+        FXCH( ST(2) )                   /* F4 F5 F6 */
+        FADD_S( MAT12 )
+        FXCH( ST(1) )                   /* F5 F4 F6 */
+        FADD_S( MAT13 )
+        FXCH( ST(2) )                   /* F6 F4 F5 */
+        FADD_S( MAT14 )
+        FXCH( ST(1) )                   /* F4 F6 F5 */
+        FSTP_S( DST0 )                  /* F6 F5 */
+        FXCH( ST(1) )                   /* F5 F6 */
+        FSTP_S( DST1 )                  /* F6 */
+        FSTP_S( DST2 )                  /* */
+LLBL(x86_p2_3dr_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(x86_p2_3dr_loop) )
+LLBL(x86_p2_3dr_done):
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points2_3d_no_rot )
+HIDDEN(_mesa_x86_transform_points2_3d_no_rot)
+GLNAME( _mesa_x86_transform_points2_3d_no_rot ):
+#define FRAME_OFFSET 12
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        PUSH_L( EBX )
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_MATRIX, EDX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        TEST_L( ECX, ECX )
+        JZ( LLBL(x86_p2_3dnrr_done) )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+        OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+        MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
+        SHL_L( CONST(4), ECX )
+        MOV_L( REGOFF(V4F_START, ESI), ESI )
+        MOV_L( REGOFF(V4F_START, EDI), EDI )
+        ADD_L( EDI, ECX )
+        MOV_L( MAT14, EBX )
+ALIGNTEXT16
+LLBL(x86_p2_3dnrr_loop):
+        FLD_S( SRC0 )                   /* F4 */
+        FMUL_S( MAT0 )
+        FLD_S( SRC1 )                   /* F1 F4 */
+        FMUL_S( MAT5 )
+        FXCH( ST(1) )                   /* F4 F1 */
+        FADD_S( MAT12 )
+        FLD_S( MAT13 )          /* F5 F4 F1 */
+        FXCH( ST(2) )                   /* F1 F4 F5 */
+        FADDP( ST0, ST(2) )             /* F4 F5 */
+        FSTP_S( DST0 )          /* F5 */
+        FSTP_S( DST1 )          /* */
+        MOV_L( EBX, DST2 )
+LLBL(x86_p2_3dnrr_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(x86_p2_3dnrr_loop) )
+LLBL(x86_p2_3dnrr_done):
+        POP_L( EBX )
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points2_2d )
+HIDDEN(_mesa_x86_transform_points2_2d)
+GLNAME( _mesa_x86_transform_points2_2d ):
+#define FRAME_OFFSET 8
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_MATRIX, EDX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        TEST_L( ECX, ECX )
+        JZ( LLBL(x86_p2_2dr_done) )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+        OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+        MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
+        SHL_L( CONST(4), ECX )
+        MOV_L( REGOFF(V4F_START, ESI), ESI )
+        MOV_L( REGOFF(V4F_START, EDI), EDI )
+        ADD_L( EDI, ECX )
+ALIGNTEXT16
+LLBL(x86_p2_2dr_loop):
+        FLD_S( SRC0 )                   /* F4 */
+        FMUL_S( MAT0 )
+        FLD_S( SRC0 )                   /* F5 F4 */
+        FMUL_S( MAT1 )
+        FLD_S( SRC1 )                   /* F0 F5 F4 */
+        FMUL_S( MAT4 )
+        FLD_S( SRC1 )                   /* F1 F0 F5 F4 */
+        FMUL_S( MAT5 )
+        FXCH( ST(1) )                   /* F0 F1 F5 F4 */
+        FADDP( ST0, ST(3) )             /* F1 F5 F4 */
+        FADDP( ST0, ST(1) )             /* F5 F4 */
+        FXCH( ST(1) )                   /* F4 F5 */
+        FADD_S( MAT12 )
+        FXCH( ST(1) )                   /* F5 F4 */
+        FADD_S( MAT13 )
+        FXCH( ST(1) )                   /* F4 F5 */
+        FSTP_S( DST0 )          /* F5 */
+        FSTP_S( DST1 )          /* */
+LLBL(x86_p2_2dr_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(x86_p2_2dr_loop) )
+LLBL(x86_p2_2dr_done):
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+ALIGNTEXT4
+GLOBL GLNAME( _mesa_x86_transform_points2_2d_no_rot )
+HIDDEN(_mesa_x86_transform_points2_2d_no_rot)
+GLNAME( _mesa_x86_transform_points2_2d_no_rot ):
+#define FRAME_OFFSET 8
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_MATRIX, EDX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        TEST_L( ECX, ECX )
+        JZ( LLBL(x86_p2_2dnrr_done) )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+        OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+        MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
+        SHL_L( CONST(4), ECX )
+        MOV_L( REGOFF(V4F_START, ESI), ESI )
+        MOV_L( REGOFF(V4F_START, EDI), EDI )
+        ADD_L( EDI, ECX )
+ALIGNTEXT16
+LLBL(x86_p2_2dnrr_loop):
+        FLD_S( SRC0 )                   /* F4 */
+        FMUL_S( MAT0 )
+        FLD_S( SRC1 )                   /* F1 F4 */
+        FMUL_S( MAT5 )
+        FXCH( ST(1) )                   /* F4 F1 */
+        FADD_S( MAT12 )
+        FLD_S( MAT13 )          /* F5 F4 F1 */
+        FXCH( ST(2) )                   /* F1 F4 F5 */
+        FADDP( ST0, ST(2) )             /* F4 F5 */
+        FSTP_S( DST0   )                /* F5 */
+        FSTP_S( DST1   )                /* */
+LLBL(x86_p2_2dnrr_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(x86_p2_2dnrr_loop) )
+LLBL(x86_p2_2dnrr_done):
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points2_identity )
+HIDDEN(_mesa_x86_transform_points2_identity)
+GLNAME( _mesa_x86_transform_points2_identity ):
+#define FRAME_OFFSET 12
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        PUSH_L( EBX )
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_MATRIX, EDX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        TEST_L( ECX, ECX )
+        JZ( LLBL(x86_p2_ir_done) )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+        OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+        MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
+        SHL_L( CONST(4), ECX )
+        MOV_L( REGOFF(V4F_START, ESI), ESI )
+        MOV_L( REGOFF(V4F_START, EDI), EDI )
+        ADD_L( EDI, ECX )
+        CMP_L( ESI, EDI )
+        JE( LLBL(x86_p2_ir_done) )
+ALIGNTEXT16
+LLBL(x86_p2_ir_loop):
+        MOV_L( SRC0, EBX )
+        MOV_L( SRC1, EDX )
+        MOV_L( EBX, DST0 )
+        MOV_L( EDX, DST1 )
+LLBL(x86_p2_ir_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(x86_p2_ir_loop) )
+LLBL(x86_p2_ir_done):
+        POP_L( EBX )
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+#if defined (__ELF__) && defined (__linux__)
+        .section .note.GNU-stack,"",%progbits
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/x86_xform3.S
 ,0 → 1,644
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/*
+ * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
+ * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
+ * in there will break the build on some platforms.
+ */
+#include "assyntax.h"
+#include "matypes.h"
+#include "xform_args.h"
+        SEG_TEXT
+#define FP_ONE          1065353216
+#define FP_ZERO         0
+#define SRC0            REGOFF(0, ESI)
+#define SRC1            REGOFF(4, ESI)
+#define SRC2            REGOFF(8, ESI)
+#define SRC3            REGOFF(12, ESI)
+#define DST0            REGOFF(0, EDI)
+#define DST1            REGOFF(4, EDI)
+#define DST2            REGOFF(8, EDI)
+#define DST3            REGOFF(12, EDI)
+#define MAT0            REGOFF(0, EDX)
+#define MAT1            REGOFF(4, EDX)
+#define MAT2            REGOFF(8, EDX)
+#define MAT3            REGOFF(12, EDX)
+#define MAT4            REGOFF(16, EDX)
+#define MAT5            REGOFF(20, EDX)
+#define MAT6            REGOFF(24, EDX)
+#define MAT7            REGOFF(28, EDX)
+#define MAT8            REGOFF(32, EDX)
+#define MAT9            REGOFF(36, EDX)
+#define MAT10           REGOFF(40, EDX)
+#define MAT11           REGOFF(44, EDX)
+#define MAT12           REGOFF(48, EDX)
+#define MAT13           REGOFF(52, EDX)
+#define MAT14           REGOFF(56, EDX)
+#define MAT15           REGOFF(60, EDX)
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points3_general )
+HIDDEN(_mesa_x86_transform_points3_general)
+GLNAME( _mesa_x86_transform_points3_general ):
+#define FRAME_OFFSET 8
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_MATRIX, EDX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        TEST_L( ECX, ECX )
+        JZ( LLBL(x86_p3_gr_done) )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+        OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+        MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+        SHL_L( CONST(4), ECX )
+        MOV_L( REGOFF(V4F_START, ESI), ESI )
+        MOV_L( REGOFF(V4F_START, EDI), EDI )
+        ADD_L( EDI, ECX )
+ALIGNTEXT16
+LLBL(x86_p3_gr_loop):
+        FLD_S( SRC0 )                   /* F4 */
+        FMUL_S( MAT0 )
+        FLD_S( SRC0 )                   /* F5 F4 */
+        FMUL_S( MAT1 )
+        FLD_S( SRC0 )                   /* F6 F5 F4 */
+        FMUL_S( MAT2 )
+        FLD_S( SRC0 )                   /* F7 F6 F5 F4 */
+        FMUL_S( MAT3 )
+        FLD_S( SRC1 )                   /* F0 F7 F6 F5 F4 */
+        FMUL_S( MAT4 )
+        FLD_S( SRC1 )                   /* F1 F0 F7 F6 F5 F4 */
+        FMUL_S( MAT5 )
+        FLD_S( SRC1 )                   /* F2 F1 F0 F7 F6 F5 F4 */
+        FMUL_S( MAT6 )
+        FLD_S( SRC1 )                   /* F3 F2 F1 F0 F7 F6 F5 F4 */
+        FMUL_S( MAT7 )
+        FXCH( ST(3) )                   /* F0 F2 F1 F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(7) )             /* F2 F1 F3 F7 F6 F5 F4 */
+        FXCH( ST(1) )                   /* F1 F2 F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(5) )             /* F2 F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(3) )             /* F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(1) )             /* F7 F6 F5 F4 */
+        FLD_S( SRC2 )                   /* F0 F7 F6 F5 F4 */
+        FMUL_S( MAT8 )
+        FLD_S( SRC2 )                   /* F1 F0 F7 F6 F5 F4 */
+        FMUL_S( MAT9 )
+        FLD_S( SRC2 )                   /* F2 F1 F0 F7 F6 F5 F4 */
+        FMUL_S( MAT10 )
+        FLD_S( SRC2 )                   /* F3 F2 F1 F0 F7 F6 F5 F4 */
+        FMUL_S( MAT11 )
+        FXCH( ST(3) )                   /* F0 F2 F1 F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(7) )             /* F2 F1 F3 F7 F6 F5 F4 */
+        FXCH( ST(1) )                   /* F1 F2 F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(5) )             /* F2 F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(3) )             /* F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(1) )             /* F7 F6 F5 F4 */
+        FXCH( ST(3) )                   /* F4 F6 F5 F7 */
+        FADD_S( MAT12 )
+        FXCH( ST(2) )                   /* F5 F6 F4 F7 */
+        FADD_S( MAT13 )
+        FXCH( ST(1) )                   /* F6 F5 F4 F7 */
+        FADD_S( MAT14 )
+        FXCH( ST(3) )                   /* F7 F5 F4 F6 */
+        FADD_S( MAT15 )
+        FXCH( ST(2) )                   /* F4 F5 F7 F6 */
+        FSTP_S( DST0 )          /* F5 F7 F6 */
+        FSTP_S( DST1 )          /* F7 F6 */
+        FXCH( ST(1) )                   /* F6 F7 */
+        FSTP_S( DST2 )          /* F7 */
+        FSTP_S( DST3 )          /* */
+LLBL(x86_p3_gr_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(x86_p3_gr_loop) )
+LLBL(x86_p3_gr_done):
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points3_perspective )
+HIDDEN(_mesa_x86_transform_points3_perspective)
+GLNAME( _mesa_x86_transform_points3_perspective ):
+#define FRAME_OFFSET 12
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        PUSH_L( EBX )
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_MATRIX, EDX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        TEST_L( ECX, ECX )
+        JZ( LLBL(x86_p3_pr_done) )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+        OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+        MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+        SHL_L( CONST(4), ECX )
+        MOV_L( REGOFF(V4F_START, ESI), ESI )
+        MOV_L( REGOFF(V4F_START, EDI), EDI )
+        ADD_L( EDI, ECX )
+ALIGNTEXT16
+LLBL(x86_p3_pr_loop):
+        FLD_S( SRC0 )                   /* F4 */
+        FMUL_S( MAT0 )
+        FLD_S( SRC1 )                   /* F5 F4 */
+        FMUL_S( MAT5 )
+        FLD_S( SRC2 )                   /* F0 F5 F4 */
+        FMUL_S( MAT8 )
+        FLD_S( SRC2 )                   /* F1 F0 F5 F4 */
+        FMUL_S( MAT9 )
+        FLD_S( SRC2 )                   /* F2 F1 F0 F5 F4 */
+        FMUL_S( MAT10 )
+        FXCH( ST(2) )                   /* F0 F1 F2 F5 F4 */
+        FADDP( ST0, ST(4) )             /* F1 F2 F5 F4 */
+        FADDP( ST0, ST(2) )             /* F2 F5 F4 */
+        FLD_S( MAT14 )          /* F6 F2 F5 F4 */
+        FXCH( ST(1) )                   /* F2 F6 F5 F4 */
+        FADDP( ST0, ST(1) )             /* F6 F5 F4 */
+        MOV_L( SRC2, EBX )
+        XOR_L( CONST(-2147483648), EBX )/* change sign */
+        FXCH( ST(2) )                   /* F4 F5 F6 */
+        FSTP_S( DST0 )          /* F5 F6 */
+        FSTP_S( DST1 )          /* F6 */
+        FSTP_S( DST2 )          /* */
+        MOV_L( EBX, DST3 )
+LLBL(x86_p3_pr_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(x86_p3_pr_loop) )
+LLBL(x86_p3_pr_done):
+        POP_L( EBX )
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points3_3d )
+HIDDEN(_mesa_x86_transform_points3_3d)
+GLNAME( _mesa_x86_transform_points3_3d ):
+#define FRAME_OFFSET 8
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_MATRIX, EDX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        TEST_L( ECX, ECX )
+        JZ( LLBL(x86_p3_3dr_done) )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+        OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+        MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
+        SHL_L( CONST(4), ECX )
+        MOV_L( REGOFF(V4F_START, ESI), ESI )
+        MOV_L( REGOFF(V4F_START, EDI), EDI )
+        ADD_L( EDI, ECX )
+ALIGNTEXT16
+LLBL(x86_p3_3dr_loop):
+        FLD_S( SRC0 )                   /* F4 */
+        FMUL_S( MAT0 )
+        FLD_S( SRC0 )                   /* F5 F4 */
+        FMUL_S( MAT1 )
+        FLD_S( SRC0 )                   /* F6 F5 F4 */
+        FMUL_S( MAT2 )
+        FLD_S( SRC1 )                   /* F0 F6 F5 F4 */
+        FMUL_S( MAT4 )
+        FLD_S( SRC1 )                   /* F1 F0 F6 F5 F4 */
+        FMUL_S( MAT5 )
+        FLD_S( SRC1 )                   /* F2 F1 F0 F6 F5 F4 */
+        FMUL_S( MAT6 )
+        FXCH( ST(2) )                   /* F0 F1 F2 F6 F5 F4 */
+        FADDP( ST0, ST(5) )             /* F1 F2 F6 F5 F4 */
+        FADDP( ST0, ST(3) )             /* F2 F6 F5 F4 */
+        FADDP( ST0, ST(1) )             /* F6 F5 F4 */
+        FLD_S( SRC2 )                   /* F0 F6 F5 F4 */
+        FMUL_S( MAT8 )
+        FLD_S( SRC2 )                   /* F1 F0 F6 F5 F4 */
+        FMUL_S( MAT9 )
+        FLD_S( SRC2 )                   /* F2 F1 F0 F6 F5 F4 */
+        FMUL_S( MAT10 )
+        FXCH( ST(2) )                   /* F0 F1 F2 F6 F5 F4 */
+        FADDP( ST0, ST(5) )             /* F1 F2 F6 F5 F4 */
+        FADDP( ST0, ST(3) )             /* F2 F6 F5 F4 */
+        FADDP( ST0, ST(1) )             /* F6 F5 F4 */
+        FXCH( ST(2) )                   /* F4 F5 F6 */
+        FADD_S( MAT12 )
+        FXCH( ST(1) )                   /* F5 F4 F6 */
+        FADD_S( MAT13 )
+        FXCH( ST(2) )                   /* F6 F4 F5 */
+        FADD_S( MAT14 )
+        FXCH( ST(1) )                   /* F4 F6 F5 */
+        FSTP_S( DST0   )                /* F6 F5 */
+        FXCH( ST(1) )                   /* F5 F6 */
+        FSTP_S( DST1   )                /* F6 */
+        FSTP_S( DST2   )                /* */
+LLBL(x86_p3_3dr_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(x86_p3_3dr_loop) )
+LLBL(x86_p3_3dr_done):
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points3_3d_no_rot )
+HIDDEN(_mesa_x86_transform_points3_3d_no_rot)
+GLNAME( _mesa_x86_transform_points3_3d_no_rot ):
+#define FRAME_OFFSET 8
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_MATRIX, EDX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        TEST_L( ECX, ECX )
+        JZ( LLBL(x86_p3_3dnrr_done) )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+        OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+        MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
+        SHL_L( CONST(4), ECX )
+        MOV_L( REGOFF(V4F_START, ESI), ESI )
+        MOV_L( REGOFF(V4F_START, EDI), EDI )
+        ADD_L( EDI, ECX )
+ALIGNTEXT16
+LLBL(x86_p3_3dnrr_loop):
+        FLD_S( SRC0 )                   /* F4 */
+        FMUL_S( MAT0 )
+        FLD_S( SRC1 )                   /* F1 F4 */
+        FMUL_S( MAT5 )
+        FLD_S( SRC2 )                   /* F2 F1 F4 */
+        FMUL_S( MAT10 )
+        FXCH( ST(2) )                   /* F4 F1 F2 */
+        FADD_S( MAT12 )
+        FLD_S( MAT13 )          /* F5 F4 F1 F2 */
+        FXCH( ST(2) )                   /* F1 F4 F5 F2 */
+        FADDP( ST0, ST(2) )             /* F4 F5 F2 */
+        FLD_S( MAT14 )          /* F6 F4 F5 F2 */
+        FXCH( ST(3) )                   /* F2 F4 F5 F6 */
+        FADDP( ST0, ST(3) )             /* F4 F5 F6 */
+        FSTP_S( DST0   )                /* F5 F6 */
+        FSTP_S( DST1   )                /* F6 */
+        FSTP_S( DST2   )                /* */
+LLBL(x86_p3_3dnrr_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(x86_p3_3dnrr_loop) )
+LLBL(x86_p3_3dnrr_done):
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points3_2d )
+HIDDEN(_mesa_x86_transform_points3_2d)
+GLNAME( _mesa_x86_transform_points3_2d ):
+#define FRAME_OFFSET 12
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        PUSH_L( EBX )
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_MATRIX, EDX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        TEST_L( ECX, ECX )
+        JZ( LLBL(x86_p3_2dr_done) )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+        OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+        MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
+        SHL_L( CONST(4), ECX )
+        MOV_L( REGOFF(V4F_START, ESI), ESI )
+        MOV_L( REGOFF(V4F_START, EDI), EDI )
+        ADD_L( EDI, ECX )
+ALIGNTEXT16
+LLBL(x86_p3_2dr_loop):
+        FLD_S( SRC0 )                   /* F4 */
+        FMUL_S( MAT0 )
+        FLD_S( SRC0 )                   /* F5 F4 */
+        FMUL_S( MAT1 )
+        FLD_S( SRC1 )                   /* F0 F5 F4 */
+        FMUL_S( MAT4 )
+        FLD_S( SRC1 )                   /* F1 F0 F5 F4 */
+        FMUL_S( MAT5 )
+        FXCH( ST(1) )                   /* F0 F1 F5 F4 */
+        FADDP( ST0, ST(3) )             /* F1 F5 F4 */
+        FADDP( ST0, ST(1) )             /* F5 F4 */
+        FXCH( ST(1) )                   /* F4 F5 */
+        FADD_S( MAT12 )
+        FXCH( ST(1) )                   /* F5 F4 */
+        FADD_S( MAT13 )
+        MOV_L( SRC2, EBX )
+        FXCH( ST(1) )                   /* F4 F5 */
+        FSTP_S( DST0   )                /* F5 */
+        FSTP_S( DST1   )                /* */
+        MOV_L( EBX, DST2 )
+LLBL(x86_p3_2dr_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(x86_p3_2dr_loop) )
+LLBL(x86_p3_2dr_done):
+        POP_L( EBX )
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points3_2d_no_rot )
+HIDDEN(_mesa_x86_transform_points3_2d_no_rot)
+GLNAME( _mesa_x86_transform_points3_2d_no_rot ):
+#define FRAME_OFFSET 12
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        PUSH_L( EBX )
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_MATRIX, EDX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        TEST_L( ECX, ECX )
+        JZ( LLBL(x86_p3_2dnrr_done) )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+        OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+        MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
+        SHL_L( CONST(4), ECX )
+        MOV_L( REGOFF(V4F_START, ESI), ESI )
+        MOV_L( REGOFF(V4F_START, EDI), EDI )
+        ADD_L( EDI, ECX )
+ALIGNTEXT16
+LLBL(x86_p3_2dnrr_loop):
+        FLD_S( SRC0 )                   /* F4 */
+        FMUL_S( MAT0 )
+        FLD_S( SRC1 )                   /* F1 F4 */
+        FMUL_S( MAT5 )
+        FXCH( ST(1) )                   /* F4 F1 */
+        FADD_S( MAT12 )
+        FLD_S( MAT13 )          /* F5 F4 F1 */
+        FXCH( ST(2) )                   /* F1 F4 F5 */
+        FADDP( ST0, ST(2) )             /* F4 F5 */
+        MOV_L( SRC2, EBX )
+        FSTP_S( DST0 )          /* F5 */
+        FSTP_S( DST1 )          /* */
+        MOV_L( EBX, DST2 )
+LLBL(x86_p3_2dnrr_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(x86_p3_2dnrr_loop) )
+LLBL(x86_p3_2dnrr_done):
+        POP_L( EBX )
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points3_identity )
+HIDDEN(_mesa_x86_transform_points3_identity)
+GLNAME(_mesa_x86_transform_points3_identity ):
+#define FRAME_OFFSET 16
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        PUSH_L( EBX )
+        PUSH_L( EBP )
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_MATRIX, EDX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        TEST_L( ECX, ECX )
+        JZ( LLBL(x86_p3_ir_done) )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+        OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+        MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
+        SHL_L( CONST(4), ECX )
+        MOV_L( REGOFF(V4F_START, ESI), ESI )
+        MOV_L( REGOFF(V4F_START, EDI), EDI )
+        ADD_L( EDI, ECX )
+        CMP_L( ESI, EDI )
+        JE( LLBL(x86_p3_ir_done) )
+ALIGNTEXT16
+LLBL(x86_p3_ir_loop):
+#if 1
+        MOV_L( SRC0, EBX )
+        MOV_L( SRC1, EBP )
+        MOV_L( SRC2, EDX )
+        MOV_L( EBX, DST0 )
+        MOV_L( EBP, DST1 )
+        MOV_L( EDX, DST2 )
+#else
+        FLD_S( SRC0 )
+        FLD_S( SRC1 )
+        FLD_S( SRC2 )
+        FSTP_S( DST2 )
+        FSTP_S( DST1 )
+        FSTP_S( DST0 )
+#endif
+LLBL(x86_p3_ir_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(x86_p3_ir_loop) )
+LLBL(x86_p3_ir_done):
+        POP_L( EBP )
+        POP_L( EBX )
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#if defined (__ELF__) && defined (__linux__)
+        .section .note.GNU-stack,"",%progbits
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/x86_xform4.S
 ,0 → 1,677
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/*
+ * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
+ * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
+ * in there will break the build on some platforms.
+ */
+#include "assyntax.h"
+#include "matypes.h"
+#include "xform_args.h"
+        SEG_TEXT
+#define FP_ONE          1065353216
+#define FP_ZERO         0
+#define SRC0            REGOFF(0, ESI)
+#define SRC1            REGOFF(4, ESI)
+#define SRC2            REGOFF(8, ESI)
+#define SRC3            REGOFF(12, ESI)
+#define DST0            REGOFF(0, EDI)
+#define DST1            REGOFF(4, EDI)
+#define DST2            REGOFF(8, EDI)
+#define DST3            REGOFF(12, EDI)
+#define MAT0            REGOFF(0, EDX)
+#define MAT1            REGOFF(4, EDX)
+#define MAT2            REGOFF(8, EDX)
+#define MAT3            REGOFF(12, EDX)
+#define MAT4            REGOFF(16, EDX)
+#define MAT5            REGOFF(20, EDX)
+#define MAT6            REGOFF(24, EDX)
+#define MAT7            REGOFF(28, EDX)
+#define MAT8            REGOFF(32, EDX)
+#define MAT9            REGOFF(36, EDX)
+#define MAT10           REGOFF(40, EDX)
+#define MAT11           REGOFF(44, EDX)
+#define MAT12           REGOFF(48, EDX)
+#define MAT13           REGOFF(52, EDX)
+#define MAT14           REGOFF(56, EDX)
+#define MAT15           REGOFF(60, EDX)
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points4_general )
+HIDDEN(_mesa_x86_transform_points4_general)
+GLNAME( _mesa_x86_transform_points4_general ):
+#define FRAME_OFFSET 8
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_MATRIX, EDX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        TEST_L( ECX, ECX )
+        JZ( LLBL(x86_p4_gr_done) )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+        OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+        MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+        SHL_L( CONST(4), ECX )
+        MOV_L( REGOFF(V4F_START, ESI), ESI )
+        MOV_L( REGOFF(V4F_START, EDI), EDI )
+        ADD_L( EDI, ECX )
+ALIGNTEXT16
+LLBL(x86_p4_gr_loop):
+        FLD_S( SRC0 )                   /* F4 */
+        FMUL_S( MAT0 )
+        FLD_S( SRC0 )                   /* F5 F4 */
+        FMUL_S( MAT1 )
+        FLD_S( SRC0 )                   /* F6 F5 F4 */
+        FMUL_S( MAT2 )
+        FLD_S( SRC0 )                   /* F7 F6 F5 F4 */
+        FMUL_S( MAT3 )
+        FLD_S( SRC1 )                   /* F0 F7 F6 F5 F4 */
+        FMUL_S( MAT4 )
+        FLD_S( SRC1 )                   /* F1 F0 F7 F6 F5 F4 */
+        FMUL_S( MAT5 )
+        FLD_S( SRC1 )                   /* F2 F1 F0 F7 F6 F5 F4 */
+        FMUL_S( MAT6 )
+        FLD_S( SRC1 )                   /* F3 F2 F1 F0 F7 F6 F5 F4 */
+        FMUL_S( MAT7 )
+        FXCH( ST(3) )                   /* F0 F2 F1 F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(7) )             /* F2 F1 F3 F7 F6 F5 F4 */
+        FXCH( ST(1) )                   /* F1 F2 F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(5) )             /* F2 F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(3) )             /* F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(1) )             /* F7 F6 F5 F4 */
+        FLD_S( SRC2 )                   /* F0 F7 F6 F5 F4 */
+        FMUL_S( MAT8 )
+        FLD_S( SRC2 )                   /* F1 F0 F7 F6 F5 F4 */
+        FMUL_S( MAT9 )
+        FLD_S( SRC2 )                   /* F2 F1 F0 F7 F6 F5 F4 */
+        FMUL_S( MAT10 )
+        FLD_S( SRC2 )                   /* F3 F2 F1 F0 F7 F6 F5 F4 */
+        FMUL_S( MAT11 )
+        FXCH( ST(3) )                   /* F0 F2 F1 F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(7) )             /* F2 F1 F3 F7 F6 F5 F4 */
+        FXCH( ST(1) )                   /* F1 F2 F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(5) )             /* F2 F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(3) )             /* F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(1) )             /* F7 F6 F5 F4 */
+        FLD_S( SRC3 )                   /* F0 F7 F6 F5 F4 */
+        FMUL_S( MAT12 )
+        FLD_S( SRC3 )                   /* F1 F0 F7 F6 F5 F4 */
+        FMUL_S( MAT13 )
+        FLD_S( SRC3 )                   /* F2 F1 F0 F7 F6 F5 F4 */
+        FMUL_S( MAT14 )
+        FLD_S( SRC3 )                   /* F3 F2 F1 F0 F7 F6 F5 F4 */
+        FMUL_S( MAT15 )
+        FXCH( ST(3) )                   /* F0 F2 F1 F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(7) )             /* F2 F1 F3 F7 F6 F5 F4 */
+        FXCH( ST(1) )                   /* F1 F2 F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(5) )             /* F2 F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(3) )             /* F3 F7 F6 F5 F4 */
+        FADDP( ST0, ST(1) )             /* F7 F6 F5 F4 */
+        FXCH( ST(3) )                   /* F4 F6 F5 F7 */
+        FSTP_S( DST0 )          /* F6 F5 F7 */
+        FXCH( ST(1) )                   /* F5 F6 F7 */
+        FSTP_S( DST1 )          /* F6 F7 */
+        FSTP_S( DST2 )          /* F7 */
+        FSTP_S( DST3 )          /* */
+LLBL(x86_p4_gr_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(x86_p4_gr_loop) )
+LLBL(x86_p4_gr_done):
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points4_perspective )
+HIDDEN(_mesa_x86_transform_points4_perspective)
+GLNAME( _mesa_x86_transform_points4_perspective ):
+#define FRAME_OFFSET 12
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        PUSH_L( EBX )
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_MATRIX, EDX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        TEST_L( ECX, ECX )
+        JZ( LLBL(x86_p4_pr_done) )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+        OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+        MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+        SHL_L( CONST(4), ECX )
+        MOV_L( REGOFF(V4F_START, ESI), ESI )
+        MOV_L( REGOFF(V4F_START, EDI), EDI )
+        ADD_L( EDI, ECX )
+ALIGNTEXT16
+LLBL(x86_p4_pr_loop):
+        FLD_S( SRC0 )                   /* F4 */
+        FMUL_S( MAT0 )
+        FLD_S( SRC1 )                   /* F5 F4 */
+        FMUL_S( MAT5 )
+        FLD_S( SRC2 )                   /* F0 F5 F4 */
+        FMUL_S( MAT8 )
+        FLD_S( SRC2 )                   /* F1 F0 F5 F4 */
+        FMUL_S( MAT9 )
+        FLD_S( SRC2 )                   /* F6 F1 F0 F5 F4 */
+        FMUL_S( MAT10 )
+        FXCH( ST(2) )                   /* F0 F1 F6 F5 F4 */
+        FADDP( ST0, ST(4) )             /* F1 F6 F5 F4 */
+        FADDP( ST0, ST(2) )             /* F6 F5 F4 */
+        FLD_S( SRC3 )                   /* F2 F6 F5 F4 */
+        FMUL_S( MAT14 )
+        FADDP( ST0, ST(1) )             /* F6 F5 F4 */
+        MOV_L( SRC2, EBX )
+        XOR_L( CONST(-2147483648), EBX )/* change sign */
+        FXCH( ST(2) )                   /* F4 F5 F6 */
+        FSTP_S( DST0 )          /* F5 F6 */
+        FSTP_S( DST1 )          /* F6 */
+        FSTP_S( DST2 )          /* */
+        MOV_L( EBX, DST3 )
+LLBL(x86_p4_pr_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(x86_p4_pr_loop) )
+LLBL(x86_p4_pr_done):
+        POP_L( EBX )
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points4_3d )
+HIDDEN(_mesa_x86_transform_points4_3d)
+GLNAME( _mesa_x86_transform_points4_3d ):
+#define FRAME_OFFSET 12
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        PUSH_L( EBX )
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_MATRIX, EDX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        TEST_L( ECX, ECX )
+        JZ( LLBL(x86_p4_3dr_done) )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+        OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+        MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+        SHL_L( CONST(4), ECX )
+        MOV_L( REGOFF(V4F_START, ESI), ESI )
+        MOV_L( REGOFF(V4F_START, EDI), EDI )
+        ADD_L( EDI, ECX )
+ALIGNTEXT16
+LLBL(x86_p4_3dr_loop):
+        FLD_S( SRC0 )                   /* F4 */
+        FMUL_S( MAT0 )
+        FLD_S( SRC0 )                   /* F5 F4 */
+        FMUL_S( MAT1 )
+        FLD_S( SRC0 )                   /* F6 F5 F4 */
+        FMUL_S( MAT2 )
+        FLD_S( SRC1 )                   /* F0 F6 F5 F4 */
+        FMUL_S( MAT4 )
+        FLD_S( SRC1 )                   /* F1 F0 F6 F5 F4 */
+        FMUL_S( MAT5 )
+        FLD_S( SRC1 )                   /* F2 F1 F0 F6 F5 F4 */
+        FMUL_S( MAT6 )
+        FXCH( ST(2) )                   /* F0 F1 F2 F6 F5 F4 */
+        FADDP( ST0, ST(5) )             /* F1 F2 F6 F5 F4 */
+        FADDP( ST0, ST(3) )             /* F2 F6 F5 F4 */
+        FADDP( ST0, ST(1) )             /* F6 F5 F4 */
+        FLD_S( SRC2 )                   /* F0 F6 F5 F4 */
+        FMUL_S( MAT8 )
+        FLD_S( SRC2 )                   /* F1 F0 F6 F5 F4 */
+        FMUL_S( MAT9 )
+        FLD_S( SRC2 )                   /* F2 F1 F0 F6 F5 F4 */
+        FMUL_S( MAT10 )
+        FXCH( ST(2) )                   /* F0 F1 F2 F6 F5 F4 */
+        FADDP( ST0, ST(5) )             /* F1 F2 F6 F5 F4 */
+        FADDP( ST0, ST(3) )             /* F2 F6 F5 F4 */
+        FADDP( ST0, ST(1) )             /* F6 F5 F4 */
+        FLD_S( SRC3 )                   /* F0 F6 F5 F4 */
+        FMUL_S( MAT12 )
+        FLD_S( SRC3 )                   /* F1 F0 F6 F5 F4 */
+        FMUL_S( MAT13 )
+        FLD_S( SRC3 )                   /* F2 F1 F0 F6 F5 F4 */
+        FMUL_S( MAT14 )
+        FXCH( ST(2) )                   /* F0 F1 F2 F6 F5 F4 */
+        FADDP( ST0, ST(5) )             /* F1 F2 F6 F5 F4 */
+        FADDP( ST0, ST(3) )             /* F2 F6 F5 F4 */
+        FADDP( ST0, ST(1) )             /* F6 F5 F4 */
+        MOV_L( SRC3, EBX )
+        FXCH( ST(2) )                   /* F4 F5 F6 */
+        FSTP_S( DST0 )          /* F5 F6 */
+        FSTP_S( DST1 )          /* F6 */
+        FSTP_S( DST2 )          /* */
+        MOV_L( EBX, DST3 )
+LLBL(x86_p4_3dr_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(x86_p4_3dr_loop) )
+LLBL(x86_p4_3dr_done):
+        POP_L( EBX )
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_x86_transform_points4_3d_no_rot)
+HIDDEN(_mesa_x86_transform_points4_3d_no_rot)
+GLNAME(_mesa_x86_transform_points4_3d_no_rot):
+#define FRAME_OFFSET 12
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        PUSH_L( EBX )
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_MATRIX, EDX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        TEST_L( ECX, ECX )
+        JZ( LLBL(x86_p4_3dnrr_done) )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+        OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+        MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+        SHL_L( CONST(4), ECX )
+        MOV_L( REGOFF(V4F_START, ESI), ESI )
+        MOV_L( REGOFF(V4F_START, EDI), EDI )
+        ADD_L( EDI, ECX )
+ALIGNTEXT16
+LLBL(x86_p4_3dnrr_loop):
+        FLD_S( SRC0 )                   /* F4 */
+        FMUL_S( MAT0 )
+        FLD_S( SRC1 )                   /* F5 F4 */
+        FMUL_S( MAT5 )
+        FLD_S( SRC2 )                   /* F6 F5 F4 */
+        FMUL_S( MAT10 )
+        FLD_S( SRC3 )                   /* F0 F6 F5 F4 */
+        FMUL_S( MAT12 )
+        FLD_S( SRC3 )                   /* F1 F0 F6 F5 F4 */
+        FMUL_S( MAT13 )
+        FLD_S( SRC3 )                   /* F2 F1 F0 F6 F5 F4 */
+        FMUL_S( MAT14 )
+        FXCH( ST(2) )                   /* F0 F1 F2 F6 F5 F4 */
+        FADDP( ST0, ST(5) )             /* F1 F2 F6 F5 F4 */
+        FADDP( ST0, ST(3) )             /* F2 F6 F5 F4 */
+        FADDP( ST0, ST(1) )             /* F6 F5 F4 */
+        MOV_L( SRC3, EBX )
+        FXCH( ST(2) )                   /* F4 F5 F6 */
+        FSTP_S( DST0   )                /* F5 F6 */
+        FSTP_S( DST1   )                /* F6 */
+        FSTP_S( DST2   )                /* */
+        MOV_L( EBX, DST3 )
+LLBL(x86_p4_3dnrr_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(x86_p4_3dnrr_loop) )
+LLBL(x86_p4_3dnrr_done):
+        POP_L( EBX )
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points4_2d )
+HIDDEN(_mesa_x86_transform_points4_2d)
+GLNAME( _mesa_x86_transform_points4_2d ):
+#define FRAME_OFFSET 16
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        PUSH_L( EBX )
+        PUSH_L( EBP )
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_MATRIX, EDX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        TEST_L( ECX, ECX )
+        JZ( LLBL(x86_p4_2dr_done) )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+        OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+        MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+        SHL_L( CONST(4), ECX )
+        MOV_L( REGOFF(V4F_START, ESI), ESI )
+        MOV_L( REGOFF(V4F_START, EDI), EDI )
+        ADD_L( EDI, ECX )
+ALIGNTEXT16
+LLBL(x86_p4_2dr_loop):
+        FLD_S( SRC0 )                   /* F4 */
+        FMUL_S( MAT0 )
+        FLD_S( SRC0 )                   /* F5 F4 */
+        FMUL_S( MAT1 )
+        FLD_S( SRC1 )                   /* F0 F5 F4 */
+        FMUL_S( MAT4 )
+        FLD_S( SRC1 )                   /* F1 F0 F5 F4 */
+        FMUL_S( MAT5 )
+        FXCH( ST(1) )                   /* F0 F1 F5 F4 */
+        FADDP( ST0, ST(3) )             /* F1 F5 F4 */
+        FADDP( ST0, ST(1) )             /* F5 F4 */
+        FLD_S( SRC3 )                   /* F0 F5 F4 */
+        FMUL_S( MAT12 )
+        FLD_S( SRC3 )                   /* F1 F0 F5 F4 */
+        FMUL_S( MAT13 )
+        FXCH( ST(1) )                   /* F0 F1 F5 F4 */
+        FADDP( ST0, ST(3) )             /* F1 F5 F4 */
+        FADDP( ST0, ST(1) )             /* F5 F4 */
+        MOV_L( SRC2, EBX )
+        MOV_L( SRC3, EBP )
+        FXCH( ST(1) )                   /* F4 F5 */
+        FSTP_S( DST0 )          /* F5 */
+        FSTP_S( DST1 )          /* */
+        MOV_L( EBX, DST2 )
+        MOV_L( EBP, DST3 )
+LLBL(x86_p4_2dr_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(x86_p4_2dr_loop) )
+LLBL(x86_p4_2dr_done):
+        POP_L( EBP )
+        POP_L( EBX )
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points4_2d_no_rot )
+HIDDEN(_mesa_x86_transform_points4_2d_no_rot)
+GLNAME( _mesa_x86_transform_points4_2d_no_rot ):
+#define FRAME_OFFSET 16
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        PUSH_L( EBX )
+        PUSH_L( EBP )
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_MATRIX, EDX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        TEST_L( ECX, ECX )
+        JZ( LLBL(x86_p4_2dnrr_done) )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+        OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+        MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+        SHL_L( CONST(4), ECX )
+        MOV_L( REGOFF(V4F_START, ESI), ESI )
+        MOV_L( REGOFF(V4F_START, EDI), EDI )
+        ADD_L( EDI, ECX )
+ALIGNTEXT16
+LLBL(x86_p4_2dnrr_loop):
+        FLD_S( SRC0 )                   /* F4 */
+        FMUL_S( MAT0 )
+        FLD_S( SRC1 )                   /* F5 F4 */
+        FMUL_S( MAT5 )
+        FLD_S( SRC3 )                   /* F0 F5 F4 */
+        FMUL_S( MAT12 )
+        FLD_S( SRC3 )                   /* F1 F0 F5 F4 */
+        FMUL_S( MAT13 )
+        FXCH( ST(1) )                   /* F0 F1 F5 F4 */
+        FADDP( ST0, ST(3) )             /* F1 F5 F4 */
+        FADDP( ST0, ST(1) )             /* F5 F4 */
+        MOV_L( SRC2, EBX )
+        MOV_L( SRC3, EBP )
+        FXCH( ST(1) )                   /* F4 F5 */
+        FSTP_S( DST0   )                /* F5 */
+        FSTP_S( DST1   )                /* */
+        MOV_L( EBX, DST2 )
+        MOV_L( EBP, DST3 )
+LLBL(x86_p4_2dnrr_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(x86_p4_2dnrr_loop) )
+LLBL(x86_p4_2dnrr_done):
+        POP_L( EBP )
+        POP_L( EBX )
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#undef FRAME_OFFSET
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points4_identity )
+HIDDEN(_mesa_x86_transform_points4_identity)
+GLNAME( _mesa_x86_transform_points4_identity ):
+#define FRAME_OFFSET 12
+        PUSH_L( ESI )
+        PUSH_L( EDI )
+        PUSH_L( EBX )
+        MOV_L( ARG_SOURCE, ESI )
+        MOV_L( ARG_DEST, EDI )
+        MOV_L( ARG_MATRIX, EDX )
+        MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+        TEST_L( ECX, ECX )
+        JZ( LLBL(x86_p4_ir_done) )
+        MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+        OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+        MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+        MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+        SHL_L( CONST(4), ECX )
+        MOV_L( REGOFF(V4F_START, ESI), ESI )
+        MOV_L( REGOFF(V4F_START, EDI), EDI )
+        ADD_L( EDI, ECX )
+        CMP_L( ESI, EDI )
+        JE( LLBL(x86_p4_ir_done) )
+ALIGNTEXT16
+LLBL(x86_p4_ir_loop):
+        MOV_L( SRC0, EBX )
+        MOV_L( SRC1, EDX )
+        MOV_L( EBX, DST0 )
+        MOV_L( EDX, DST1 )
+        MOV_L( SRC2, EBX )
+        MOV_L( SRC3, EDX )
+        MOV_L( EBX, DST2 )
+        MOV_L( EDX, DST3 )
+LLBL(x86_p4_ir_skip):
+        ADD_L( CONST(16), EDI )
+        ADD_L( EAX, ESI )
+        CMP_L( ECX, EDI )
+        JNE( LLBL(x86_p4_ir_loop) )
+LLBL(x86_p4_ir_done):
+        POP_L( EBX )
+        POP_L( EDI )
+        POP_L( ESI )
+        RET
+#if defined (__ELF__) && defined (__linux__)
+        .section .note.GNU-stack,"",%progbits
+#endif

 /contrib/sdk/sources/Mesa/mesa-9.2.5/src/mesa/x86/xform_args.h
 ,0 → 1,51
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/*
+ * Transform function interface for assembly code.  Simply define
+ * FRAME_OFFSET to the number of bytes pushed onto the stack before
+ * using the ARG_* argument macros.
+ *
+ * Gareth Hughes
+ */
+#ifndef __XFORM_ARGS_H__
+#define __XFORM_ARGS_H__
+/* Offsets for transform_func arguments
+ *
+ * typedef void (*transform_func)( GLvector4f *to_vec,
+ *                                 const GLfloat m[16],
+ *                                 const GLvector4f *from_vec );
+ */
+#define OFFSET_DEST     4
+#define OFFSET_MATRIX   8
+#define OFFSET_SOURCE   12
+#define ARG_DEST        REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP)
+#define ARG_MATRIX      REGOFF(FRAME_OFFSET+OFFSET_MATRIX, ESP)
+#define ARG_SOURCE      REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP)
+#endif

Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 5562 → Rev 5563