WebSVN – Kolibri OS – Path Comparison – / – /contrib/other/sdlquake-1.0.9/d_draw16.S Rev 5130 and /contrib/other/sdlquake-1.0.9/d

Regard whitespace Rev 5130 → Rev 5131

 /contrib/other/sdlquake-1.0.9/d_draw16.S
 ,0 → 1,974
+/*
+Copyright (C) 1996-1997 Id Software, Inc.
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+See the GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+*/
+//
+// d_draw16.s
+// x86 assembly-language horizontal 8-bpp span-drawing code, with 16-pixel
+// subdivision.
+//
+#include "asm_i386.h"
+#include "quakeasm.h"
+#include "asm_draw.h"
+#include "d_ifacea.h"
+#if     id386
+//----------------------------------------------------------------------
+// 8-bpp horizontal span drawing code for polygons, with no transparency and
+// 16-pixel subdivision.
+//
+// Assumes there is at least one span in pspans, and that every span
+// contains at least one pixel
+//----------------------------------------------------------------------
+        .data
+        .text
+// out-of-line, rarely-needed clamping code
+LClampHigh0:
+        movl    C(bbextents),%esi
+        jmp             LClampReentry0
+LClampHighOrLow0:
+        jg              LClampHigh0
+        xorl    %esi,%esi
+        jmp             LClampReentry0
+LClampHigh1:
+        movl    C(bbextentt),%edx
+        jmp             LClampReentry1
+LClampHighOrLow1:
+        jg              LClampHigh1
+        xorl    %edx,%edx
+        jmp             LClampReentry1
+LClampLow2:
+        movl    $4096,%ebp
+        jmp             LClampReentry2
+LClampHigh2:
+        movl    C(bbextents),%ebp
+        jmp             LClampReentry2
+LClampLow3:
+        movl    $4096,%ecx
+        jmp             LClampReentry3
+LClampHigh3:
+        movl    C(bbextentt),%ecx
+        jmp             LClampReentry3
+LClampLow4:
+        movl    $4096,%eax
+        jmp             LClampReentry4
+LClampHigh4:
+        movl    C(bbextents),%eax
+        jmp             LClampReentry4
+LClampLow5:
+        movl    $4096,%ebx
+        jmp             LClampReentry5
+LClampHigh5:
+        movl    C(bbextentt),%ebx
+        jmp             LClampReentry5
+#define pspans  4+16
+        .align 4
+.globl C(D_DrawSpans16)
+C(D_DrawSpans16):
+        pushl   %ebp                            // preserve caller's stack frame
+        pushl   %edi
+        pushl   %esi                            // preserve register variables
+        pushl   %ebx
+//
+// set up scaled-by-16 steps, for 16-long segments; also set up cacheblock
+// and span list pointers
+//
+// TODO: any overlap from rearranging?
+        flds    C(d_sdivzstepu)
+        fmuls   fp_16
+        movl    C(cacheblock),%edx
+        flds    C(d_tdivzstepu)
+        fmuls   fp_16
+        movl    pspans(%esp),%ebx       // point to the first span descriptor
+        flds    C(d_zistepu)
+        fmuls   fp_16
+        movl    %edx,pbase                      // pbase = cacheblock
+        fstps   zi16stepu
+        fstps   tdivz16stepu
+        fstps   sdivz16stepu
+LSpanLoop:
+//
+// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
+// initial s and t values
+//
+// FIXME: pipeline FILD?
+        fildl   espan_t_v(%ebx)
+        fildl   espan_t_u(%ebx)
+        fld             %st(1)                  // dv | du | dv
+        fmuls   C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv
+        fld             %st(1)                  // du | dv*d_sdivzstepv | du | dv
+        fmuls   C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
+        fld             %st(2)                  // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
+        fmuls   C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu |
+                                                        //  dv*d_sdivzstepv | du | dv
+        fxch    %st(1)                  // du*d_sdivzstepu | du*d_tdivzstepu |
+                                                        //  dv*d_sdivzstepv | du | dv
+        faddp   %st(0),%st(2)   // du*d_tdivzstepu |
+                                                        //  du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
+        fxch    %st(1)                  // du*d_sdivzstepu + dv*d_sdivzstepv |
+                                                        //  du*d_tdivzstepu | du | dv
+        fld             %st(3)                  // dv | du*d_sdivzstepu + dv*d_sdivzstepv |
+                                                        //  du*d_tdivzstepu | du | dv
+        fmuls   C(d_tdivzstepv) // dv*d_tdivzstepv |
+                                                        //  du*d_sdivzstepu + dv*d_sdivzstepv |
+                                                        //  du*d_tdivzstepu | du | dv
+        fxch    %st(1)                  // du*d_sdivzstepu + dv*d_sdivzstepv |
+                                                        //  dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
+        fadds   C(d_sdivzorigin)        // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
+                                                        //  du*d_sdivzstepu; stays in %st(2) at end
+        fxch    %st(4)                  // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
+                                                        //  s/z
+        fmuls   C(d_zistepv)            // dv*d_zistepv | dv*d_tdivzstepv |
+                                                        //  du*d_tdivzstepu | du | s/z
+        fxch    %st(1)                  // dv*d_tdivzstepv |  dv*d_zistepv |
+                                                        //  du*d_tdivzstepu | du | s/z
+        faddp   %st(0),%st(2)   // dv*d_zistepv |
+                                                        //  dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
+        fxch    %st(2)                  // du | dv*d_tdivzstepv + du*d_tdivzstepu |
+                                                        //  dv*d_zistepv | s/z
+        fmuls   C(d_zistepu)            // du*d_zistepu |
+                                                        //  dv*d_tdivzstepv + du*d_tdivzstepu |
+                                                        //  dv*d_zistepv | s/z
+        fxch    %st(1)                  // dv*d_tdivzstepv + du*d_tdivzstepu |
+                                                        //  du*d_zistepu | dv*d_zistepv | s/z
+        fadds   C(d_tdivzorigin)        // tdivz = d_tdivzorigin + dv*d_tdivzstepv +
+                                                        //  du*d_tdivzstepu; stays in %st(1) at end
+        fxch    %st(2)                  // dv*d_zistepv | du*d_zistepu | t/z | s/z
+        faddp   %st(0),%st(1)   // dv*d_zistepv + du*d_zistepu | t/z | s/z
+        flds    fp_64k                  // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
+        fxch    %st(1)                  // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
+        fadds   C(d_ziorigin)           // zi = d_ziorigin + dv*d_zistepv +
+                                                        //  du*d_zistepu; stays in %st(0) at end
+                                                        // 1/z | fp_64k | t/z | s/z
+//
+// calculate and clamp s & t
+//
+        fdivr   %st(0),%st(1)   // 1/z | z*64k | t/z | s/z
+//
+// point %edi to the first pixel in the span
+//
+        movl    C(d_viewbuffer),%ecx
+        movl    espan_t_v(%ebx),%eax
+        movl    %ebx,pspantemp  // preserve spans pointer
+        movl    C(tadjust),%edx
+        movl    C(sadjust),%esi
+        movl    C(d_scantable)(,%eax,4),%edi    // v * screenwidth
+        addl    %ecx,%edi
+        movl    espan_t_u(%ebx),%ecx
+        addl    %ecx,%edi                               // pdest = &pdestspan[scans->u];
+        movl    espan_t_count(%ebx),%ecx
+//
+// now start the FDIV for the end of the span
+//
+        cmpl    $16,%ecx
+        ja              LSetupNotLast1
+        decl    %ecx
+        jz              LCleanup1               // if only one pixel, no need to start an FDIV
+        movl    %ecx,spancountminus1
+// finish up the s and t calcs
+        fxch    %st(1)                  // z*64k | 1/z | t/z | s/z
+        fld             %st(0)                  // z*64k | z*64k | 1/z | t/z | s/z
+        fmul    %st(4),%st(0)   // s | z*64k | 1/z | t/z | s/z
+        fxch    %st(1)                  // z*64k | s | 1/z | t/z | s/z
+        fmul    %st(3),%st(0)   // t | s | 1/z | t/z | s/z
+        fxch    %st(1)                  // s | t | 1/z | t/z | s/z
+        fistpl  s                               // 1/z | t | t/z | s/z
+        fistpl  t                               // 1/z | t/z | s/z
+        fildl   spancountminus1
+        flds    C(d_tdivzstepu) // C(d_tdivzstepu) | spancountminus1
+        flds    C(d_zistepu)            // C(d_zistepu) | C(d_tdivzstepu) | spancountminus1
+        fmul    %st(2),%st(0)   // C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1
+        fxch    %st(1)                  // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
+        fmul    %st(2),%st(0)   // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
+        fxch    %st(2)                  // scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1
+        fmuls   C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 |
+                                                        //  C(d_tdivzstepu)*scm1
+        fxch    %st(1)                  // C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 |
+                                                        //  C(d_tdivzstepu)*scm1
+        faddp   %st(0),%st(3)   // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
+        fxch    %st(1)                  // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
+        faddp   %st(0),%st(3)   // C(d_sdivzstepu)*scm1
+        faddp   %st(0),%st(3)
+        flds    fp_64k
+        fdiv    %st(1),%st(0)   // this is what we've gone to all this trouble to
+                                                        //  overlap
+        jmp             LFDIVInFlight1
+LCleanup1:
+// finish up the s and t calcs
+        fxch    %st(1)                  // z*64k | 1/z | t/z | s/z
+        fld             %st(0)                  // z*64k | z*64k | 1/z | t/z | s/z
+        fmul    %st(4),%st(0)   // s | z*64k | 1/z | t/z | s/z
+        fxch    %st(1)                  // z*64k | s | 1/z | t/z | s/z
+        fmul    %st(3),%st(0)   // t | s | 1/z | t/z | s/z
+        fxch    %st(1)                  // s | t | 1/z | t/z | s/z
+        fistpl  s                               // 1/z | t | t/z | s/z
+        fistpl  t                               // 1/z | t/z | s/z
+        jmp             LFDIVInFlight1
+        .align  4
+LSetupNotLast1:
+// finish up the s and t calcs
+        fxch    %st(1)                  // z*64k | 1/z | t/z | s/z
+        fld             %st(0)                  // z*64k | z*64k | 1/z | t/z | s/z
+        fmul    %st(4),%st(0)   // s | z*64k | 1/z | t/z | s/z
+        fxch    %st(1)                  // z*64k | s | 1/z | t/z | s/z
+        fmul    %st(3),%st(0)   // t | s | 1/z | t/z | s/z
+        fxch    %st(1)                  // s | t | 1/z | t/z | s/z
+        fistpl  s                               // 1/z | t | t/z | s/z
+        fistpl  t                               // 1/z | t/z | s/z
+        fadds   zi16stepu
+        fxch    %st(2)
+        fadds   sdivz16stepu
+        fxch    %st(2)
+        flds    tdivz16stepu
+        faddp   %st(0),%st(2)
+        flds    fp_64k
+        fdiv    %st(1),%st(0)   // z = 1/1/z
+                                                        // this is what we've gone to all this trouble to
+                                                        //  overlap
+LFDIVInFlight1:
+        addl    s,%esi
+        addl    t,%edx
+        movl    C(bbextents),%ebx
+        movl    C(bbextentt),%ebp
+        cmpl    %ebx,%esi
+        ja              LClampHighOrLow0
+LClampReentry0:
+        movl    %esi,s
+        movl    pbase,%ebx
+        shll    $16,%esi
+        cmpl    %ebp,%edx
+        movl    %esi,sfracf
+        ja              LClampHighOrLow1
+LClampReentry1:
+        movl    %edx,t
+        movl    s,%esi                                  // sfrac = scans->sfrac;
+        shll    $16,%edx
+        movl    t,%eax                                  // tfrac = scans->tfrac;
+        sarl    $16,%esi
+        movl    %edx,tfracf
+//
+// calculate the texture starting address
+//
+        sarl    $16,%eax
+        movl    C(cachewidth),%edx
+        imull   %edx,%eax                               // (tfrac >> 16) * cachewidth
+        addl    %ebx,%esi
+        addl    %eax,%esi                               // psource = pbase + (sfrac >> 16) +
+                                                                        //           ((tfrac >> 16) * cachewidth);
+//
+// determine whether last span or not
+//
+        cmpl    $16,%ecx
+        jna             LLastSegment
+//
+// not the last segment; do full 16-wide segment
+//
+LNotLastSegment:
+//
+// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
+// get there
+//
+// pick up after the FDIV that was left in flight previously
+        fld             %st(0)                  // duplicate it
+        fmul    %st(4),%st(0)   // s = s/z * z
+        fxch    %st(1)
+        fmul    %st(3),%st(0)   // t = t/z * z
+        fxch    %st(1)
+        fistpl  snext
+        fistpl  tnext
+        movl    snext,%eax
+        movl    tnext,%edx
+        movb    (%esi),%bl      // get first source texel
+        subl    $16,%ecx                // count off this segments' pixels
+        movl    C(sadjust),%ebp
+        movl    %ecx,counttemp  // remember count of remaining pixels
+        movl    C(tadjust),%ecx
+        movb    %bl,(%edi)      // store first dest pixel
+        addl    %eax,%ebp
+        addl    %edx,%ecx
+        movl    C(bbextents),%eax
+        movl    C(bbextentt),%edx
+        cmpl    $4096,%ebp
+        jl              LClampLow2
+        cmpl    %eax,%ebp
+        ja              LClampHigh2
+LClampReentry2:
+        cmpl    $4096,%ecx
+        jl              LClampLow3
+        cmpl    %edx,%ecx
+        ja              LClampHigh3
+LClampReentry3:
+        movl    %ebp,snext
+        movl    %ecx,tnext
+        subl    s,%ebp
+        subl    t,%ecx
+//
+// set up advancetable
+//
+        movl    %ecx,%eax
+        movl    %ebp,%edx
+        sarl    $20,%eax                        // tstep >>= 16;
+        jz              LZero
+        sarl    $20,%edx                        // sstep >>= 16;
+        movl    C(cachewidth),%ebx
+        imull   %ebx,%eax
+        jmp             LSetUp1
+LZero:
+        sarl    $20,%edx                        // sstep >>= 16;
+        movl    C(cachewidth),%ebx
+LSetUp1:
+        addl    %edx,%eax                       // add in sstep
+                                                                // (tstep >> 16) * cachewidth + (sstep >> 16);
+        movl    tfracf,%edx
+        movl    %eax,advancetable+4     // advance base in t
+        addl    %ebx,%eax                       // ((tstep >> 16) + 1) * cachewidth +
+                                                                //  (sstep >> 16);
+        shll    $12,%ebp                        // left-justify sstep fractional part
+        movl    sfracf,%ebx
+        shll    $12,%ecx                        // left-justify tstep fractional part
+        movl    %eax,advancetable       // advance extra in t
+        movl    %ecx,tstep
+        addl    %ecx,%edx                       // advance tfrac fractional part by tstep frac
+        sbbl    %ecx,%ecx                       // turn tstep carry into -1 (0 if none)
+        addl    %ebp,%ebx                       // advance sfrac fractional part by sstep frac
+        adcl    advancetable+4(,%ecx,4),%esi    // point to next source texel
+        addl    tstep,%edx
+        sbbl    %ecx,%ecx
+        movb    (%esi),%al
+        addl    %ebp,%ebx
+        movb    %al,1(%edi)
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        sbbl    %ecx,%ecx
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        sbbl    %ecx,%ecx
+        movb    %al,2(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        sbbl    %ecx,%ecx
+        movb    %al,3(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        sbbl    %ecx,%ecx
+        movb    %al,4(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        sbbl    %ecx,%ecx
+        movb    %al,5(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        sbbl    %ecx,%ecx
+        movb    %al,6(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        sbbl    %ecx,%ecx
+        movb    %al,7(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+//
+// start FDIV for end of next segment in flight, so it can overlap
+//
+        movl    counttemp,%ecx
+        cmpl    $16,%ecx                        // more than one segment after this?
+        ja              LSetupNotLast2  // yes
+        decl    %ecx
+        jz              LFDIVInFlight2  // if only one pixel, no need to start an FDIV
+        movl    %ecx,spancountminus1
+        fildl   spancountminus1
+        flds    C(d_zistepu)            // C(d_zistepu) | spancountminus1
+        fmul    %st(1),%st(0)   // C(d_zistepu)*scm1 | scm1
+        flds    C(d_tdivzstepu) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
+        fmul    %st(2),%st(0)   // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
+        fxch    %st(1)                  // C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1
+        faddp   %st(0),%st(3)   // C(d_tdivzstepu)*scm1 | scm1
+        fxch    %st(1)                  // scm1 | C(d_tdivzstepu)*scm1
+        fmuls   C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
+        fxch    %st(1)                  // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
+        faddp   %st(0),%st(3)   // C(d_sdivzstepu)*scm1
+        flds    fp_64k                  // 64k | C(d_sdivzstepu)*scm1
+        fxch    %st(1)                  // C(d_sdivzstepu)*scm1 | 64k
+        faddp   %st(0),%st(4)   // 64k
+        fdiv    %st(1),%st(0)   // this is what we've gone to all this trouble to
+                                                        //  overlap
+        jmp             LFDIVInFlight2
+        .align  4
+LSetupNotLast2:
+        fadds   zi16stepu
+        fxch    %st(2)
+        fadds   sdivz16stepu
+        fxch    %st(2)
+        flds    tdivz16stepu
+        faddp   %st(0),%st(2)
+        flds    fp_64k
+        fdiv    %st(1),%st(0)   // z = 1/1/z
+                                                        // this is what we've gone to all this trouble to
+                                                        //  overlap
+LFDIVInFlight2:
+        movl    %ecx,counttemp
+        addl    tstep,%edx
+        sbbl    %ecx,%ecx
+        movb    %al,8(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        sbbl    %ecx,%ecx
+        movb    %al,9(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        sbbl    %ecx,%ecx
+        movb    %al,10(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        sbbl    %ecx,%ecx
+        movb    %al,11(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        sbbl    %ecx,%ecx
+        movb    %al,12(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        sbbl    %ecx,%ecx
+        movb    %al,13(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        sbbl    %ecx,%ecx
+        movb    %al,14(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    $16,%edi
+        movl    %edx,tfracf
+        movl    snext,%edx
+        movl    %ebx,sfracf
+        movl    tnext,%ebx
+        movl    %edx,s
+        movl    %ebx,t
+        movl    counttemp,%ecx          // retrieve count
+//
+// determine whether last span or not
+//
+        cmpl    $16,%ecx                                // are there multiple segments remaining?
+        movb    %al,-1(%edi)
+        ja              LNotLastSegment         // yes
+//
+// last segment of scan
+//
+LLastSegment:
+//
+// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
+// get there. The number of pixels left is variable, and we want to land on the
+// last pixel, not step one past it, so we can't run into arithmetic problems
+//
+        testl   %ecx,%ecx
+        jz              LNoSteps                // just draw the last pixel and we're done
+// pick up after the FDIV that was left in flight previously
+        fld             %st(0)                  // duplicate it
+        fmul    %st(4),%st(0)   // s = s/z * z
+        fxch    %st(1)
+        fmul    %st(3),%st(0)   // t = t/z * z
+        fxch    %st(1)
+        fistpl  snext
+        fistpl  tnext
+        movb    (%esi),%al              // load first texel in segment
+        movl    C(tadjust),%ebx
+        movb    %al,(%edi)              // store first pixel in segment
+        movl    C(sadjust),%eax
+        addl    snext,%eax
+        addl    tnext,%ebx
+        movl    C(bbextents),%ebp
+        movl    C(bbextentt),%edx
+        cmpl    $4096,%eax
+        jl              LClampLow4
+        cmpl    %ebp,%eax
+        ja              LClampHigh4
+LClampReentry4:
+        movl    %eax,snext
+        cmpl    $4096,%ebx
+        jl              LClampLow5
+        cmpl    %edx,%ebx
+        ja              LClampHigh5
+LClampReentry5:
+        cmpl    $1,%ecx                 // don't bother
+        je              LOnlyOneStep    // if two pixels in segment, there's only one step,
+                                                        //  of the segment length
+        subl    s,%eax
+        subl    t,%ebx
+        addl    %eax,%eax               // convert to 15.17 format so multiply by 1.31
+        addl    %ebx,%ebx               //  reciprocal yields 16.48
+        imull   reciprocal_table_16-8(,%ecx,4)  // sstep = (snext - s) /
+                                                                                        //  (spancount-1)
+        movl    %edx,%ebp
+        movl    %ebx,%eax
+        imull   reciprocal_table_16-8(,%ecx,4)  // tstep = (tnext - t) /
+                                                                                        //  (spancount-1)
+LSetEntryvec:
+//
+// set up advancetable
+//
+        movl    entryvec_table_16(,%ecx,4),%ebx
+        movl    %edx,%eax
+        movl    %ebx,jumptemp           // entry point into code for RET later
+        movl    %ebp,%ecx
+        sarl    $16,%edx                        // tstep >>= 16;
+        movl    C(cachewidth),%ebx
+        sarl    $16,%ecx                        // sstep >>= 16;
+        imull   %ebx,%edx
+        addl    %ecx,%edx                       // add in sstep
+                                                                // (tstep >> 16) * cachewidth + (sstep >> 16);
+        movl    tfracf,%ecx
+        movl    %edx,advancetable+4     // advance base in t
+        addl    %ebx,%edx                       // ((tstep >> 16) + 1) * cachewidth +
+                                                                //  (sstep >> 16);
+        shll    $16,%ebp                        // left-justify sstep fractional part
+        movl    sfracf,%ebx
+        shll    $16,%eax                        // left-justify tstep fractional part
+        movl    %edx,advancetable       // advance extra in t
+        movl    %eax,tstep
+        movl    %ecx,%edx
+        addl    %eax,%edx
+        sbbl    %ecx,%ecx
+        addl    %ebp,%ebx
+        adcl    advancetable+4(,%ecx,4),%esi
+        jmp             *jumptemp                       // jump to the number-of-pixels handler
+//----------------------------------------
+LNoSteps:
+        movb    (%esi),%al              // load first texel in segment
+        subl    $15,%edi                        // adjust for hardwired offset
+        jmp             LEndSpan
+LOnlyOneStep:
+        subl    s,%eax
+        subl    t,%ebx
+        movl    %eax,%ebp
+        movl    %ebx,%edx
+        jmp             LSetEntryvec
+//----------------------------------------
+.globl  Entry2_16, Entry3_16, Entry4_16, Entry5_16
+.globl  Entry6_16, Entry7_16, Entry8_16, Entry9_16
+.globl  Entry10_16, Entry11_16, Entry12_16, Entry13_16
+.globl  Entry14_16, Entry15_16, Entry16_16
+Entry2_16:
+        subl    $14,%edi                // adjust for hardwired offsets
+        movb    (%esi),%al
+        jmp             LEntry2_16
+//----------------------------------------
+Entry3_16:
+        subl    $13,%edi                // adjust for hardwired offsets
+        addl    %eax,%edx
+        movb    (%esi),%al
+        sbbl    %ecx,%ecx
+        addl    %ebp,%ebx
+        adcl    advancetable+4(,%ecx,4),%esi
+        jmp             LEntry3_16
+//----------------------------------------
+Entry4_16:
+        subl    $12,%edi                // adjust for hardwired offsets
+        addl    %eax,%edx
+        movb    (%esi),%al
+        sbbl    %ecx,%ecx
+        addl    %ebp,%ebx
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        jmp             LEntry4_16
+//----------------------------------------
+Entry5_16:
+        subl    $11,%edi                // adjust for hardwired offsets
+        addl    %eax,%edx
+        movb    (%esi),%al
+        sbbl    %ecx,%ecx
+        addl    %ebp,%ebx
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        jmp             LEntry5_16
+//----------------------------------------
+Entry6_16:
+        subl    $10,%edi                // adjust for hardwired offsets
+        addl    %eax,%edx
+        movb    (%esi),%al
+        sbbl    %ecx,%ecx
+        addl    %ebp,%ebx
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        jmp             LEntry6_16
+//----------------------------------------
+Entry7_16:
+        subl    $9,%edi         // adjust for hardwired offsets
+        addl    %eax,%edx
+        movb    (%esi),%al
+        sbbl    %ecx,%ecx
+        addl    %ebp,%ebx
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        jmp             LEntry7_16
+//----------------------------------------
+Entry8_16:
+        subl    $8,%edi         // adjust for hardwired offsets
+        addl    %eax,%edx
+        movb    (%esi),%al
+        sbbl    %ecx,%ecx
+        addl    %ebp,%ebx
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        jmp             LEntry8_16
+//----------------------------------------
+Entry9_16:
+        subl    $7,%edi         // adjust for hardwired offsets
+        addl    %eax,%edx
+        movb    (%esi),%al
+        sbbl    %ecx,%ecx
+        addl    %ebp,%ebx
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        jmp             LEntry9_16
+//----------------------------------------
+Entry10_16:
+        subl    $6,%edi         // adjust for hardwired offsets
+        addl    %eax,%edx
+        movb    (%esi),%al
+        sbbl    %ecx,%ecx
+        addl    %ebp,%ebx
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        jmp             LEntry10_16
+//----------------------------------------
+Entry11_16:
+        subl    $5,%edi         // adjust for hardwired offsets
+        addl    %eax,%edx
+        movb    (%esi),%al
+        sbbl    %ecx,%ecx
+        addl    %ebp,%ebx
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        jmp             LEntry11_16
+//----------------------------------------
+Entry12_16:
+        subl    $4,%edi         // adjust for hardwired offsets
+        addl    %eax,%edx
+        movb    (%esi),%al
+        sbbl    %ecx,%ecx
+        addl    %ebp,%ebx
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        jmp             LEntry12_16
+//----------------------------------------
+Entry13_16:
+        subl    $3,%edi         // adjust for hardwired offsets
+        addl    %eax,%edx
+        movb    (%esi),%al
+        sbbl    %ecx,%ecx
+        addl    %ebp,%ebx
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        jmp             LEntry13_16
+//----------------------------------------
+Entry14_16:
+        subl    $2,%edi         // adjust for hardwired offsets
+        addl    %eax,%edx
+        movb    (%esi),%al
+        sbbl    %ecx,%ecx
+        addl    %ebp,%ebx
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        jmp             LEntry14_16
+//----------------------------------------
+Entry15_16:
+        decl    %edi            // adjust for hardwired offsets
+        addl    %eax,%edx
+        movb    (%esi),%al
+        sbbl    %ecx,%ecx
+        addl    %ebp,%ebx
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        jmp             LEntry15_16
+//----------------------------------------
+Entry16_16:
+        addl    %eax,%edx
+        movb    (%esi),%al
+        sbbl    %ecx,%ecx
+        addl    %ebp,%ebx
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+        sbbl    %ecx,%ecx
+        movb    %al,1(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+LEntry15_16:
+        sbbl    %ecx,%ecx
+        movb    %al,2(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+LEntry14_16:
+        sbbl    %ecx,%ecx
+        movb    %al,3(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+LEntry13_16:
+        sbbl    %ecx,%ecx
+        movb    %al,4(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+LEntry12_16:
+        sbbl    %ecx,%ecx
+        movb    %al,5(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+LEntry11_16:
+        sbbl    %ecx,%ecx
+        movb    %al,6(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+LEntry10_16:
+        sbbl    %ecx,%ecx
+        movb    %al,7(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+LEntry9_16:
+        sbbl    %ecx,%ecx
+        movb    %al,8(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+LEntry8_16:
+        sbbl    %ecx,%ecx
+        movb    %al,9(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+LEntry7_16:
+        sbbl    %ecx,%ecx
+        movb    %al,10(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+LEntry6_16:
+        sbbl    %ecx,%ecx
+        movb    %al,11(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+LEntry5_16:
+        sbbl    %ecx,%ecx
+        movb    %al,12(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+        addl    tstep,%edx
+LEntry4_16:
+        sbbl    %ecx,%ecx
+        movb    %al,13(%edi)
+        addl    %ebp,%ebx
+        movb    (%esi),%al
+        adcl    advancetable+4(,%ecx,4),%esi
+LEntry3_16:
+        movb    %al,14(%edi)
+        movb    (%esi),%al
+LEntry2_16:
+LEndSpan:
+//
+// clear s/z, t/z, 1/z from FP stack
+//
+        fstp %st(0)
+        fstp %st(0)
+        fstp %st(0)
+        movl    pspantemp,%ebx                          // restore spans pointer
+        movl    espan_t_pnext(%ebx),%ebx        // point to next span
+        testl   %ebx,%ebx                       // any more spans?
+        movb    %al,15(%edi)
+        jnz             LSpanLoop                       // more spans
+        popl    %ebx                            // restore register variables
+        popl    %esi
+        popl    %edi
+        popl    %ebp                            // restore the caller's stack frame
+        ret
+#endif  // id386

Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 5130 → Rev 5131