Subversion Repositories Kolibri OS

Rev

Rev 4358 | Blame | Last modification | View Log | RSS feed

  1.  
  2. /*
  3.  * Mesa 3-D graphics library
  4.  *
  5.  * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
  6.  *
  7.  * Permission is hereby granted, free of charge, to any person obtaining a
  8.  * copy of this software and associated documentation files (the "Software"),
  9.  * to deal in the Software without restriction, including without limitation
  10.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11.  * and/or sell copies of the Software, and to permit persons to whom the
  12.  * Software is furnished to do so, subject to the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice shall be included
  15.  * in all copies or substantial portions of the Software.
  16.  *
  17.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  21.  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  22.  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  23.  * OTHER DEALINGS IN THE SOFTWARE.
  24.  */
  25.  
  26. /** TODO:
  27.   * - insert PREFETCH instructions to avoid cache-misses !
  28.   * - some more optimizations are possible...
  29.   * - for 40-50% more performance in the SSE-functions, the
  30.   *   data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
  31.   */
  32.  
  33. #ifdef USE_SSE_ASM
  34. #include "assyntax.h"
  35. #include "matypes.h"
  36. #include "xform_args.h"
  37.  
  38.    SEG_TEXT
  39.  
  40. #define S(i)    REGOFF(i * 4, ESI)
  41. #define D(i)    REGOFF(i * 4, EDI)
  42. #define M(i)    REGOFF(i * 4, EDX)
  43.  
  44.  
  45. ALIGNTEXT4
  46. GLOBL GLNAME(_mesa_sse_transform_points1_general)
  47. HIDDEN( _mesa_sse_transform_points1_general )
  48. GLNAME( _mesa_sse_transform_points1_general ):
  49.  
  50. #define FRAME_OFFSET 8
  51.     PUSH_L    ( ESI )
  52.     PUSH_L    ( EDI )
  53.  
  54.     MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )  /* ptr to source GLvector4f */
  55.     MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )    /* ptr to dest GLvector4f */
  56.  
  57.     MOV_L( ARG_MATRIX, EDX )                    /* ptr to matrix */
  58.     MOV_L( REGOFF(V4F_COUNT, ESI), ECX )        /* source count */
  59.  
  60.     CMP_L( CONST(0), ECX )                      /* count == 0 ? */
  61.     JE( LLBL(K_GTP1GR_finish) )                 /* yes -> nothing to do. */
  62.  
  63.     MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )       /* stride */
  64.     OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )   /* set dest flags */
  65.  
  66.     MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )        /* set dest count */
  67.     MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )    /* set dest size */
  68.  
  69.     SHL_L( CONST(4), ECX )                      /* count *= 16 */
  70.     MOV_L( REGOFF(V4F_START, ESI), ESI )        /* ptr to first source vertex */
  71.  
  72.     MOV_L( REGOFF(V4F_START, EDI), EDI )        /* ptr to first dest vertex */
  73.     ADD_L( EDI, ECX )                           /* count += dest ptr */
  74.  
  75.  
  76. ALIGNTEXT32
  77.     MOVAPS( M(0), XMM0 )                        /* m3  | m2  | m1  | m0  */
  78.     MOVAPS( M(12), XMM1 )                       /* m15 | m14 | m13 | m12 */
  79.  
  80. ALIGNTEXT32
  81. LLBL(K_GTP1GR_top):
  82.     MOVSS( S(0), XMM2 )                         /* ox */
  83.     SHUFPS( CONST(0x0), XMM2, XMM2 )            /* ox | ox | ox | ox */
  84.     MULPS( XMM0, XMM2 )                         /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
  85.     ADDPS( XMM1, XMM2 )                         /* + | + | + | + */
  86.     MOVUPS( XMM2, D(0) )
  87.  
  88. LLBL(K_GTP1GR_skip):
  89.     ADD_L     ( CONST(16), EDI )
  90.     ADD_L     ( EAX, ESI )
  91.     CMP_L     ( ECX, EDI )
  92.     JNE       ( LLBL(K_GTP1GR_top) )
  93.  
  94. LLBL(K_GTP1GR_finish):
  95.     POP_L     ( EDI )
  96.     POP_L     ( ESI )
  97.     RET
  98. #undef FRAME_OFFSET
  99.  
  100.  
  101.  
  102. ALIGNTEXT4
  103. GLOBL GLNAME(_mesa_sse_transform_points1_identity)
  104. HIDDEN(_mesa_sse_transform_points1_identity)
  105. GLNAME( _mesa_sse_transform_points1_identity ):
  106.  
  107. #define FRAME_OFFSET 8
  108.     PUSH_L    ( ESI )
  109.     PUSH_L    ( EDI )
  110.  
  111.     MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )  /* ptr to source GLvector4f */
  112.     MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )    /* ptr to dest GLvector4f */
  113.  
  114.     MOV_L( REGOFF(V4F_COUNT, ESI), ECX )        /* source count */
  115.  
  116.     TEST_L( ECX, ECX)
  117.     JZ( LLBL(K_GTP1IR_finish) )                 /* count was zero; go to finish */
  118.  
  119.     MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )       /* stride */
  120.     OR_L( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, EDI) )   /* set dest flags */
  121.  
  122.     MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )        /* set dest count */
  123.     MOV_L( CONST(1), REGOFF(V4F_SIZE, EDI) )    /* set dest size */
  124.  
  125.     SHL_L( CONST(4), ECX )                      /* count *= 16 */
  126.     MOV_L( REGOFF(V4F_START, ESI), ESI )        /* ptr to first source vertex */
  127.  
  128.     MOV_L( REGOFF(V4F_START, EDI), EDI )        /* ptr to first dest vertex */
  129.     ADD_L( EDI, ECX )                           /* count += dest ptr */
  130.  
  131.     CMP_L( ESI, EDI )
  132.     JE( LLBL(K_GTP1IR_finish) )
  133.  
  134.  
  135. ALIGNTEXT32
  136. LLBL(K_GTP1IR_top):
  137.     MOV_L( S(0), EDX )
  138.     MOV_L( EDX, D(0) )
  139.  
  140. LLBL(K_GTP1IR_skip):
  141.     ADD_L     ( CONST(16), EDI )
  142.     ADD_L     ( EAX, ESI )
  143.     CMP_L     ( ECX, EDI )
  144.     JNE       ( LLBL(K_GTP1IR_top) )
  145.  
  146. LLBL(K_GTP1IR_finish):
  147.     POP_L     ( EDI )
  148.     POP_L     ( ESI )
  149.     RET
  150. #undef FRAME_OFFSET
  151.  
  152.  
  153.  
  154. ALIGNTEXT4
  155. GLOBL GLNAME(_mesa_sse_transform_points1_3d_no_rot)
  156. HIDDEN(_mesa_sse_transform_points1_3d_no_rot)
  157. GLNAME(_mesa_sse_transform_points1_3d_no_rot):
  158.  
  159. #define FRAME_OFFSET 8
  160.     PUSH_L( ESI )
  161.     PUSH_L( EDI )
  162.  
  163.     MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )  /* ptr to source GLvector4f */
  164.     MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )    /* ptr to dest GLvector4f */
  165.  
  166.     MOV_L( ARG_MATRIX, EDX )                    /* ptr to matrix */
  167.     MOV_L( REGOFF(V4F_COUNT, ESI), ECX )        /* source count */
  168.  
  169.     TEST_L( ECX, ECX)
  170.     JZ( LLBL(K_GTP13DNRR_finish) )              /* count was zero; go to finish */
  171.  
  172.     MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )       /* stride */
  173.     OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )   /* set dest flags */
  174.  
  175.     MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )        /* set dest count */
  176.     MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )    /* set dest size */
  177.  
  178.     SHL_L( CONST(4), ECX )                      /* count *= 16 */
  179.     MOV_L( REGOFF(V4F_START, ESI), ESI )        /* ptr to first source vertex */
  180.  
  181.     MOV_L( REGOFF(V4F_START, EDI), EDI )        /* ptr to first dest vertex */
  182.     ADD_L( EDI, ECX )                           /* count += dest ptr */
  183.  
  184.  
  185. ALIGNTEXT32
  186.     MOVSS( M(0), XMM0 )                         /* m0 */
  187.     MOVSS( M(12), XMM1 )                        /* m12 */
  188.     MOVSS( M(13), XMM2 )                        /* m13 */
  189.     MOVSS( M(14), XMM3 )                        /* m14 */
  190.  
  191. ALIGNTEXT32
  192. LLBL(K_GTP13DNRR_top):
  193.     MOVSS( S(0), XMM4 )                         /* ox */
  194.     MULSS( XMM0, XMM4 )                         /* ox*m0 */
  195.     ADDSS( XMM1, XMM4 )                         /* ox*m0+m12 */
  196.     MOVSS( XMM4, D(0) )
  197.  
  198.     MOVSS( XMM2, D(1) )
  199.     MOVSS( XMM3, D(2) )
  200.  
  201. LLBL(K_GTP13DNRR_skip):
  202.     ADD_L    ( CONST(16), EDI )
  203.     ADD_L    ( EAX, ESI )
  204.     CMP_L    ( ECX, EDI )
  205.     JNE      ( LLBL(K_GTP13DNRR_top) )
  206.  
  207. LLBL(K_GTP13DNRR_finish):
  208.     POP_L    ( EDI )
  209.     POP_L    ( ESI )
  210.     RET
  211. #undef FRAME_OFFSET
  212.  
  213.  
  214.  
  215. ALIGNTEXT4
  216. GLOBL GLNAME(_mesa_sse_transform_points1_perspective)
  217. HIDDEN(_mesa_sse_transform_points1_perspective)
  218. GLNAME(_mesa_sse_transform_points1_perspective):
  219.  
  220. #define FRAME_OFFSET 8
  221.     PUSH_L   ( ESI )
  222.     PUSH_L   ( EDI )
  223.  
  224.     MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )  /* ptr to source GLvector4f */
  225.     MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )    /* ptr to dest GLvector4f */
  226.  
  227.     MOV_L( ARG_MATRIX, EDX )                    /* ptr to matrix */
  228.     MOV_L( REGOFF(V4F_COUNT, ESI), ECX )        /* source count */
  229.  
  230.     TEST_L( ECX, ECX)
  231.     JZ( LLBL(K_GTP13PR_finish) )                /* count was zero; go to finish */
  232.  
  233.     MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )       /* stride */
  234.     OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )   /* set dest flags */
  235.  
  236.     MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )        /* set dest count */
  237.     MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )    /* set dest size */
  238.  
  239.     SHL_L( CONST(4), ECX )                      /* count *= 16 */
  240.     MOV_L( REGOFF(V4F_START, ESI), ESI )        /* ptr to first source vertex */
  241.  
  242.     MOV_L( REGOFF(V4F_START, EDI), EDI )        /* ptr to first dest vertex */
  243.     ADD_L( EDI, ECX )                           /* count += dest ptr */
  244.  
  245.  
  246. ALIGNTEXT32
  247.     XORPS( XMM0, XMM0 )                         /* 0 | 0 | 0 | 0 */
  248.     MOVSS( M(0), XMM1 )                         /* m0 */
  249.     MOVSS( M(14), XMM2 )                        /* m14 */
  250.  
  251. ALIGNTEXT32
  252. LLBL(K_GTP13PR_top):
  253.     MOVSS( S(0), XMM3 )                         /* ox */
  254.     MULSS( XMM1, XMM3 )                         /* ox*m0 */
  255.     MOVSS( XMM3, D(0) )                         /* ox*m0->D(0) */
  256.     MOVSS( XMM2, D(2) )                         /* m14->D(2) */
  257.  
  258.     MOVSS( XMM0, D(1) )
  259.     MOVSS( XMM0, D(3) )
  260.  
  261. LLBL(K_GTP13PR_skip):
  262.     ADD_L( CONST(16), EDI )
  263.     ADD_L( EAX, ESI )
  264.     CMP_L( ECX, EDI )
  265.     JNE( LLBL(K_GTP13PR_top) )
  266.  
  267. LLBL(K_GTP13PR_finish):
  268.     POP_L    ( EDI )
  269.     POP_L    ( ESI )
  270.     RET
  271. #undef FRAME_OFFSET
  272.  
  273.  
  274. ALIGNTEXT4
  275. GLOBL GLNAME(_mesa_sse_transform_points1_2d)
  276. HIDDEN(_mesa_sse_transform_points1_2d)
  277. GLNAME(_mesa_sse_transform_points1_2d):
  278.  
  279. #define FRAME_OFFSET 8
  280.     PUSH_L( ESI )
  281.     PUSH_L( EDI )
  282.  
  283.     MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )  /* ptr to source GLvector4f */
  284.     MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )    /* ptr to dest GLvector4f */
  285.  
  286.     MOV_L( ARG_MATRIX, EDX )                    /* ptr to matrix */
  287.     MOV_L( REGOFF(V4F_COUNT, ESI), ECX )        /* source count */
  288.  
  289.     TEST_L( ECX, ECX)
  290.     JZ( LLBL(K_GTP13P2DR_finish) )              /* count was zero; go to finish */
  291.  
  292.     MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )       /* stride */
  293.     OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )   /* set dest flags */
  294.  
  295.     MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )        /* set dest count */
  296.     MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )    /* set dest size */
  297.  
  298.     SHL_L( CONST(4), ECX )                      /* count *= 16 */
  299.     MOV_L( REGOFF(V4F_START, ESI), ESI )        /* ptr to first source vertex */
  300.  
  301.     MOV_L( REGOFF(V4F_START, EDI), EDI )        /* ptr to first dest vertex */
  302.     ADD_L( EDI, ECX )                           /* count += dest ptr */
  303.  
  304. ALIGNTEXT32
  305.     MOVLPS( M(0), XMM0 )                        /* m1  | m0  */
  306.     MOVLPS( M(12), XMM1 )                       /* m13 | m12 */
  307.  
  308. ALIGNTEXT32
  309. LLBL(K_GTP13P2DR_top):
  310.     MOVSS( S(0), XMM2 )                         /* ox */
  311.     SHUFPS( CONST(0x0), XMM2, XMM2 )            /* ox | ox | ox | ox */
  312.     MULPS( XMM0, XMM2 )                         /* - | - | ox*m1 | ox*m0 */
  313.     ADDPS( XMM1, XMM2 )                         /* - | - | ox*m1+m13 | ox*m0+m12 */
  314.     MOVLPS( XMM2, D(0) )
  315.  
  316. LLBL(K_GTP13P2DR_skip):
  317.     ADD_L    ( CONST(16), EDI )
  318.     ADD_L    ( EAX, ESI )
  319.     CMP_L    ( ECX, EDI )
  320.     JNE      ( LLBL(K_GTP13P2DR_top) )
  321.  
  322. LLBL(K_GTP13P2DR_finish):
  323.     POP_L    ( EDI )
  324.     POP_L    ( ESI )
  325.     RET
  326. #undef FRAME_OFFSET
  327.  
  328.  
  329. ALIGNTEXT4
  330. GLOBL GLNAME(_mesa_sse_transform_points1_2d_no_rot)
  331. HIDDEN(_mesa_sse_transform_points1_2d_no_rot)
  332. GLNAME(_mesa_sse_transform_points1_2d_no_rot):
  333.  
  334. #define FRAME_OFFSET 8
  335.         PUSH_L( ESI )
  336.         PUSH_L( EDI )
  337.  
  338.         MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )      /* ptr to source GLvector4f */
  339.         MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )        /* ptr to dest GLvector4f */
  340.  
  341.         MOV_L( ARG_MATRIX, EDX )                /* ptr to matrix */
  342.         MOV_L( REGOFF(V4F_COUNT, ESI), ECX )    /* source count */
  343.  
  344.         TEST_L( ECX, ECX)
  345.         JZ( LLBL(K_GTP13P2DNRR_finish) )        /* count was zero; go to finish */
  346.  
  347.         MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )   /* stride */
  348.         OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )       /* set dest flags */
  349.  
  350.         MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )    /* set dest count */
  351.         MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )        /* set dest size */
  352.  
  353.         SHL_L( CONST(4), ECX )                  /* count *= 16 */
  354.         MOV_L( REGOFF(V4F_START, ESI), ESI )    /* ptr to first source vertex */
  355.  
  356.         MOV_L( REGOFF(V4F_START, EDI), EDI )    /* ptr to first dest vertex */
  357.         ADD_L( EDI, ECX )                       /* count += dest ptr */
  358.  
  359. ALIGNTEXT32
  360.         MOVSS( M(0), XMM0 )                     /* m0 */
  361.         MOVSS( M(12), XMM1 )                    /* m12 */
  362.         MOVSS( M(13), XMM2 )                    /* m13 */
  363.  
  364. ALIGNTEXT32
  365. LLBL(K_GTP13P2DNRR_top):
  366.         MOVSS( S(0), XMM3 )                     /* ox */
  367.         MULSS( XMM0, XMM3 )                     /* ox*m0 */
  368.         ADDSS( XMM1, XMM3 )                     /* ox*m0+m12 */
  369.         MOVSS( XMM3, D(0) )
  370.         MOVSS( XMM2, D(1) )
  371.  
  372. LLBL(K_GTP13P2DNRR_skip):
  373.         ADD_L( CONST(16), EDI )
  374.         ADD_L( EAX, ESI )
  375.         CMP_L( ECX, EDI )
  376.         JNE( LLBL(K_GTP13P2DNRR_top) )
  377.  
  378. LLBL(K_GTP13P2DNRR_finish):
  379.         POP_L( EDI )
  380.         POP_L( ESI )
  381.         RET
  382. #undef FRAME_OFFSET
  383.  
  384.  
  385.  
  386. ALIGNTEXT4
  387. GLOBL GLNAME(_mesa_sse_transform_points1_3d)
  388. HIDDEN(_mesa_sse_transform_points1_3d)
  389. GLNAME(_mesa_sse_transform_points1_3d):
  390.  
  391. #define FRAME_OFFSET 8
  392.         PUSH_L( ESI )
  393.         PUSH_L( EDI )
  394.  
  395.         MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )      /* ptr to source GLvector4f */
  396.         MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI )        /* ptr to dest GLvector4f */
  397.  
  398.         MOV_L( ARG_MATRIX, EDX )                /* ptr to matrix */
  399.         MOV_L( REGOFF(V4F_COUNT, ESI), ECX )    /* source count */
  400.  
  401.         TEST_L( ECX, ECX)
  402.         JZ( LLBL(K_GTP13P3DR_finish) )  /* count was zero; go to finish */
  403.  
  404.         MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )   /* stride */
  405.         OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )       /* set dest flags */
  406.  
  407.         MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )    /* set dest count */
  408.         MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )        /* set dest size */
  409.  
  410.         SHL_L( CONST(4), ECX )                  /* count *= 16 */
  411.         MOV_L( REGOFF(V4F_START, ESI), ESI )    /* ptr to first source vertex */
  412.  
  413.         MOV_L( REGOFF(V4F_START, EDI), EDI )    /* ptr to first dest vertex */
  414.         ADD_L( EDI, ECX )                       /* count += dest ptr */
  415.  
  416.  
  417. ALIGNTEXT32
  418.         MOVAPS( M(0), XMM0 )                    /* m3  | m2  | m1  |  m0 */
  419.         MOVAPS( M(12), XMM1 )                   /* m15 | m14 | m13 | m12 */
  420.  
  421. ALIGNTEXT32
  422. LLBL(K_GTP13P3DR_top):
  423.         MOVSS( S(0), XMM2 )                     /* ox */
  424.         SHUFPS( CONST(0x0), XMM2, XMM2 )        /* ox | ox | ox | ox */
  425.         MULPS( XMM0, XMM2 )                     /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
  426.         ADDPS( XMM1, XMM2 )                     /* +m15  | +m14  | +m13  | +m12  */
  427.         MOVLPS( XMM2, D(0) )                    /*   -   |   -   | ->D(1)| ->D(0)*/
  428.         UNPCKHPS( XMM2, XMM2 )                  /* ox*m3+m15 | ox*m3+m15 | ox*m2+m14 | ox*m2+m14 */
  429.         MOVSS( XMM2, D(2) )
  430.  
  431. LLBL(K_GTP13P3DR_skip):
  432.         ADD_L( CONST(16), EDI )
  433.         ADD_L( EAX, ESI )
  434.         CMP_L( ECX, EDI )
  435.         JNE( LLBL(K_GTP13P3DR_top) )
  436.  
  437. LLBL(K_GTP13P3DR_finish):
  438.         POP_L( EDI )
  439.         POP_L( ESI )
  440.         RET
  441. #undef FRAME_OFFSET
  442. #endif
  443.  
  444. #if defined (__ELF__) && defined (__linux__)
  445.         .section .note.GNU-stack,"",%progbits
  446. #endif
  447.