Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Blackfin Pixel Operations
  3.  * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
  4.  *
  5.  * This file is part of FFmpeg.
  6.  *
  7.  * FFmpeg is free software; you can redistribute it and/or
  8.  * modify it under the terms of the GNU Lesser General Public
  9.  * License as published by the Free Software Foundation; either
  10.  * version 2.1 of the License, or (at your option) any later version.
  11.  *
  12.  * FFmpeg is distributed in the hope that it will be useful,
  13.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15.  * Lesser General Public License for more details.
  16.  *
  17.  * You should have received a copy of the GNU Lesser General Public
  18.  * License along with FFmpeg; if not, write to the Free Software
  19.  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20.  */
  21. #include "config_bfin.h"
  22.  
  23. DEFUN(put_pixels_clamped,mL1,
  24.         (int16_t *block, uint8_t *dest, int line_size)):
  25.     [--SP] = (R7:4);
  26.     R4 = 0;
  27.     R5.l = 0x00ff;
  28.     R5.h = 0x00ff;
  29.     I0 = R0;         // block
  30.     I1 = R1;         // dest
  31.     R2 += -4;        // line_size
  32.     M1 = R2;
  33.     P0 = 8;
  34.     R0 = [I0++];
  35.     R1 = [I0++];
  36.     R2 = MAX(R0, R4) (V);
  37.     LSETUP (ppc$0,ppc$1) LC0=P0;
  38. ppc$0: R2 = MIN(R2, R5) (V);
  39.        R3 = MAX(R1, R4) (V);
  40.        R3 = MIN(R3, R5) (V)      || R0 = [I0++];
  41.        R6 = BYTEPACK (R2,R3)     || R1 = [I0++];
  42.        R2 = MAX(R0, R4) (V)      || [I1++] = R6;
  43.        R2 = MIN(R2, R5) (V);
  44.        R3 = MAX(R1, R4) (V);
  45.        R3 = MIN(R3, R5) (V)      || R0 = [I0++];
  46.        R6 = BYTEPACK (R2,R3)     || R1 = [I0++];
  47. ppc$1: R2 = Max(R0, R4) (V)      || [I1++M1] = R6;
  48.  
  49.     (R7:4) = [SP++];
  50.     RTS;
  51. DEFUN_END(put_pixels_clamped)
  52.  
  53. DEFUN(add_pixels_clamped,mL1,
  54.         (int16_t *block, uint8_t *dest, int line_size)):
  55.     [-- SP] = (R7:4);
  56.     R4 = 0;
  57.     I0 = 0;
  58.     R2 += -4;        // line_size
  59.     M0 = R2;
  60.     I1 = R1;         // dest
  61.     I3 = R0;         // block
  62.     I2 = R1;         // dest
  63.     P0 = 8;
  64.     M3 = 2;
  65.     R0 = [I3++]  || R2 = [I1];
  66.     R2 = R2 << 8                      || R0.H = W[I3--]  || R3 = [I1++];
  67.     R3 = R3 >> 8                      || R1.L = W[I3]    || I3 += 4;
  68.     R6 = BYTEOP3P(R1:0, R3:2) (LO)    || R1.H = W[I3++]  || R2 = [I1];
  69.  
  70.     LSETUP(apc$2,apc$3) LC1 = P0;
  71. apc$2: R7 = BYTEOP3P(R1:0, R3:2) (HI, R) || R0 = [I3++]     || R3 = [I1++M0];
  72.        R2 = R2 << 8                      || R0.H = W[I3--];
  73.        R3 = R3 >> 8                      || R1.L = W[I3]    || I3 += 4;
  74.        R6 = R6 + R7 (S)                  || R1.H = W[I3];
  75.        R6 = BYTEOP3P(R1:0, R3:2) (LO)    || I3+=M3          || [I2++]=R6;
  76.        R7 = BYTEOP3P(R1:0, R3:2) (HI, R) || R0 = [I3++]     || R2 = [I1];
  77.        R2 = R2 << 8                      || R0.H = W[I3--]  || R3 = [I1++];
  78.        R3 = R3 >> 8                      || R1.L = W[I3]    || I3 += 4;
  79.        R6 = R6 + R7 (S)                  || R1.H = W[I3++];
  80. apc$3: R6 = BYTEOP3P(R1:0, R3:2) (LO)    || [I2++M0] = R6   || R2 = [I1];
  81.  
  82.     (R7:4) = [SP++];
  83.     RTS;
  84. DEFUN_END(add_pixels_clamped)
  85.  
  86. DEFUN(diff_pixels,mL1,
  87.        (int16_t *block, uint8_t *s1, uint8_t *s2, int stride)):
  88.         link 0;
  89.         [--sp] = (r7:4);
  90.         p0=8;
  91.         i3=r0;        // block
  92.         i0=r1;        // s1
  93.         i1=r2;        // s2
  94.         r2=[fp+20];   // stride
  95.         r2+=-8;
  96.         m0=r2;
  97.  
  98.  
  99.         LSETUP(.LS0,.LE0) LC0=P0;
  100.         DISALGNEXCPT                       || R0 = [I0++]   || R2  =[I1++];
  101.  
  102. .LS0:   DISALGNEXCPT                       || R1 = [I0++]   || R3 = [I1++];
  103.         (R5,R4) = BYTEOP16M (R1:0,R3:2)    || R0 = [I0++M0] || R2 = [I1++M0];
  104.         (R7,R6) = BYTEOP16M (R1:0,R3:2) (R)|| R0 = [I0++]   || [I3++] = R4;
  105.         DISALGNEXCPT                       || R2 = [I1++]   || [I3++] = R5;
  106.         [i3++]=r6;
  107. .LE0:  [i3++]=r7;
  108.  
  109.         (r7:4) = [sp++];
  110.         unlink;
  111.         rts;
  112. DEFUN_END(diff_pixels)
  113.  
  114. /*
  115.     for (i = 0; i < 16; i++) {
  116.         for (j = 0; j < 16; j++) {
  117.           sum += pix[j];
  118.         }
  119.         pix += line_size;
  120.     }
  121. */
  122. DEFUN(pix_sum,mL1,
  123.         (uint8_t *p, int stride)):
  124.         link 0;
  125.         [--sp] = (r7:4);
  126.         p0=8;
  127.         i0=r0;        // s1
  128.         i1=r0;
  129.         m1=r1;
  130.         r1=r1+r1;
  131.         r1+=-16;       // stride
  132.         m0=r1;
  133.         i1+=m1;
  134.  
  135.         r6=0;
  136.  
  137.         LSETUP(LS$PS,LE$PS) LC0=P0;
  138.         DISALGNEXCPT                       || R0 = [I0++]   || R2  =[I1++];
  139.  
  140. LS$PS:  DISALGNEXCPT                       || R1 = [I0++]   || R3 = [I1++];
  141.         (R5,R4) = BYTEOP16P (R3:2,R1:0)    || R0 = [I0++]   || R2 = [I1++];
  142.         r6=r6+|+r5;
  143.         r6=r6+|+r4;
  144.         (R5,R4) = BYTEOP16P (R3:2,R1:0) (R)|| R1 = [I0++]   || R3 = [I1++];
  145.         r6=r6+|+r5;
  146.         r6=r6+|+r4;
  147.         (R5,R4) = BYTEOP16P (R3:2,R1:0)    || R0 = [I0++m0] || R2 = [I1++m0];
  148.         r6=r6+|+r5;
  149.         r6=r6+|+r4;
  150.         (R5,R4) = BYTEOP16P (R3:2,R1:0) (R)|| R0 = [I0++]   || R2 = [I1++];
  151.         r6=r6+|+r5;
  152. LE$PS:  r6=r6+|+r4;
  153.         r0.l=r6.l+r6.h;
  154.         r0.h=0;
  155.  
  156.         (r7:4) = [sp++];
  157.         unlink;
  158.         rts;
  159. DEFUN_END(pix_sum)
  160.  
  161.  
  162. DEFUN(get_pixels,mL1,
  163.         (int16_t *av_restrict block, const uint8_t *pixels, int line_size)):
  164.         [--sp] = (r7:4);
  165.         i3=r0;        // dest
  166.         i0=r1;        // src0
  167.         p0=8;
  168.         r2+=-8;
  169.         m0=r2;
  170.         LSETUP(gp8$0,gp8$1) LC0=P0;
  171.  
  172.         DISALGNEXCPT                   || R0 = [I0++];
  173.         DISALGNEXCPT                   || R1 = [I0++];
  174.  
  175. gp8$0:  (R7,R6) = byteunpack R1:0      || R0 = [I0++M0];
  176.         (R5,R4) = byteunpack R1:0 (R)  || R0 = [I0++]    || [I3++]=R6;
  177.         DISALGNEXCPT                   || R1 = [I0++]    || [I3++]=R7;
  178.         [I3++]=R4;
  179. gp8$1:  [I3++]=R5
  180.  
  181.  
  182.         (r7:4) = [sp++];
  183.         RTS;
  184. DEFUN_END(get_pixels)
  185.  
  186.  
  187. /* sad = sad16x16 (ubyte *mb, ubyte *refwin, srcwidth, refwinwidth, h) */
  188. /* 91 cycles */
  189. DEFUN(z_sad16x16,mL1,
  190.         (uint8_t *blk1, uint8_t *blk2, int dsz, int line_size, int h)):
  191.         link 0;
  192.         I0 = R0;
  193.         I1 = R1;
  194.  
  195.         A1 = A0 = 0;
  196.         R0 = [sp+20]; // rwidth
  197.         P2 = [sp+24]; // height
  198.         R3 = 16;
  199.         R0 = R0 - R3;
  200.         R3 = R2 - R3;
  201.         M1 = R0;
  202.         M0 = R3;
  203.  
  204.         DISALGNEXCPT         || R0 = [I0++]    || R2 = [I1++];
  205.         LSETUP (s$16, e$16) LC0=P2;
  206. s$16:   DISALGNEXCPT         || R1 = [I0++]    || R3 = [I1++];
  207.         SAA (R1:0,R3:2)      || R0 = [I0++]    || R2 = [I1++];
  208.         SAA (R1:0,R3:2) (R)  || R1 = [I0++]    || R3 = [I1++];
  209.         SAA (R1:0,R3:2)      || R0 = [I0++M0]  || R2 = [I1++M1];
  210. e$16:   SAA (R1:0,R3:2) (R)  || R0 = [I0++]    || R2 = [I1++];
  211.  
  212.         R3=A1.L+A1.H,  R2=A0.L+A0.H ;
  213.         R0 = R2 + R3 ;
  214.         unlink;
  215.         RTS;
  216. DEFUN_END(z_sad16x16)
  217.  
  218. /* sad = sad8x8 (ubyte *mb, ubyte *refwin, int srcwidth, int refwinwidth, int h) */
  219. /* 36 cycles */
  220. DEFUN(z_sad8x8,mL1,
  221.         (uint8_t *blk1, uint8_t *blk2, int dsz, int line_size, int h)):
  222.         I0 = R0;
  223.         I1 = R1;
  224.  
  225.         A1 = A0 = 0;
  226.         r0 = [sp+12]; // rwidth
  227.         P2 = [sp+16]; //height
  228.         R3 = 8;
  229.         R0 = R0 - R3;
  230.         R3 = R2 - R3;
  231.         M0 = R3;
  232.         M1 = R0;
  233.  
  234.         LSETUP (s$8, e$8) LC0=P2;
  235.         DISALGNEXCPT         || R0 = [I0++]   || R2 = [I1++];
  236.         DISALGNEXCPT         || R1 = [I0++]   || R3 = [I1++];
  237. s$8:    SAA (R1:0,R3:2)      || R0 = [I0++M0] || R2 = [I1++M1];
  238.         SAA (R1:0,R3:2) (R)  || R0 = [I0++]   || R2 = [I1++];
  239. e$8:    DISALGNEXCPT         || R1 = [I0++]   || R3 = [I1++];
  240.  
  241.         R3=A1.L+A1.H,  R2=A0.L+A0.H ;
  242.         R0 = R2 + R3 ;
  243.         RTS;
  244. DEFUN_END(z_sad8x8)
  245.  
  246. DEFUN(pix_norm1,mL1,
  247.         (uint8_t * pix, int line_size)):
  248.         [--SP]=(R7:4,P5:3);
  249.  
  250.         // Fetch the input arguments.
  251.         P1 = R0;  // pix
  252.         P0 = R1;  // line_size
  253.         P5 = 16;  // loop ctr.
  254.         P0 -= P5;
  255.         M0 = P0;  // M0 = line_size-16;
  256.         // Now for the real work.
  257.         A1 = A0 = 0;
  258.         lsetup(_pix_norm1_blkfn_loopStart, _pix_norm1_blkfn_loopEnd) LC1 = P5;
  259.         I0 = P1;
  260.         DISALGNEXCPT || r0 = [i0++];
  261.  
  262. _pix_norm1_blkfn_loopStart:
  263.         // following unpacks pix1[0..15] pix1+line_size[0..15]
  264.         DISALGNEXCPT || r1 = [i0++];
  265.  
  266.         (r5, r4) = byteunpack r1:0 || r0 = [i0++];
  267.         a1 += r5.h * r5.h, a0 += r5.l * r5.l (is);
  268.         a1 += r4.h * r4.h, a0 += r4.l * r4.l (is);
  269.         (r5, r4) = byteunpack r1:0(r) || r1 = [i0++];
  270.         a1 += r5.h * r5.h, a0 += r5.l * r5.l (is);
  271.         a1 += r4.h * r4.h, a0 += r4.l * r4.l (is);
  272.         (r5, r4) = byteunpack r1:0 || r0 = [i0++M0];
  273.         a1 += r5.h * r5.h, a0 += r5.l * r5.l (is);
  274.         a1 += r4.h * r4.h, a0 += r4.l * r4.l (is);
  275.         (r5, r4) = byteunpack r1:0(r) || r0 = [i0++];
  276.         a1 += r5.h * r5.h, a0 += r5.l * r5.l (is);
  277. _pix_norm1_blkfn_loopEnd:
  278.         a1 += r4.h * r4.h, a0 += r4.l * r4.l (is);
  279.  
  280.  
  281. // Clean up at the end:
  282.         R2 = A0, R3 = A1;
  283.         R0 = R2 + R3 (S);
  284.  
  285.         (R7:4,P5:3)=[SP++];
  286.  
  287.         RTS;
  288. DEFUN_END(pix_norm1)
  289.  
  290. DEFUN(sse4,mL1,
  291.         (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)):
  292.         link 0;
  293.         [--sp] = (r7:6);
  294.         p0=[fp+24];   // h
  295.         i0=r1;        // pix1
  296.         i1=r2;        // pix2
  297.         r2=[fp+20];   // line_size
  298.         r2+=-4;
  299.         m0=r2;
  300.  
  301.         a0=a1=0;
  302.         LSETUP(.S40,.E40) LC0=P0;
  303.         DISALGNEXCPT                       || R0 = [I0++]   || R2  =[I1++];
  304.  
  305. .S40:   DISALGNEXCPT                       || R1 = [I0++M0] || R3 = [I1++M0];
  306.         (R7,R6) = BYTEOP16M (R1:0,R3:2);
  307.         a0 += r7.l * r7.l, a1 += r7.h * r7.h (is);
  308. .E40:   a0 += r6.l * r6.l, a1 += r6.h * r6.h (is);
  309.         a0 += a1;
  310.         r0 = a0;
  311.  
  312.         (r7:6) = [sp++];
  313.         unlink;
  314.         rts;
  315. DEFUN_END(sse4)
  316.  
  317. DEFUN(sse8,mL1,
  318.         (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)):
  319.         link 0;
  320.         [--sp] = (r7:6);
  321.         p0=[fp+24];   // h
  322.         i0=r1;        // pix1
  323.         i1=r2;        // pix2
  324.         r2=[fp+20];   // line_size
  325.         r2+=-8;
  326.         m0=r2;
  327.  
  328.         a0=a1=0;
  329.         LSETUP(.S80,.E80) LC0=P0;
  330.         DISALGNEXCPT                       || R0 = [I0++]   || R2  =[I1++];
  331.  
  332. .S80:   DISALGNEXCPT                       || R1 = [I0++]   || R3 = [I1++];
  333.         (R7,R6) = BYTEOP16M (R1:0,R3:2)    || R0 = [I0++M0] || R2 = [I1++M0];
  334.         a0 += r7.l * r7.l, a1 += r7.h * r7.h (is);
  335.         a0 += r6.l * r6.l, a1 += r6.h * r6.h (is);
  336.         (R7,R6) = BYTEOP16M (R1:0,R3:2) (R)|| R0 = [I0++]   || R2 = [I1++];
  337.         a0 += r7.l * r7.l, a1 += r7.h * r7.h (is);
  338. .E80:   a0 += r6.l * r6.l, a1 += r6.h * r6.h (is);
  339.         a0 += a1;
  340.         r0 = a0;
  341.  
  342.         (r7:6) = [sp++];
  343.         unlink;
  344.         rts;
  345. DEFUN_END(sse8)
  346.  
  347. DEFUN(sse16,mL1,
  348.         (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)):
  349.         link 0;
  350.         [--sp] = (r7:6);
  351.         p0=[fp+24];   // h
  352.         i0=r1;        // pix1
  353.         i1=r2;        // pix2
  354.         r2=[fp+20];   // line_size
  355.         r2+=-16;
  356.         m0=r2;
  357.  
  358.         a0=a1=0;
  359.         DISALGNEXCPT                       || R0 = [I0++]   || R2  =[I1++];
  360.         LSETUP(.S160,.E160) LC0=P0;
  361.  
  362. .S160:  DISALGNEXCPT                       || R1 = [I0++]   || R3 = [I1++];
  363.         (R7,R6) = BYTEOP16M (R1:0,R3:2)    || R0 = [I0++]   || R2 = [I1++];
  364.         a0 += r7.l * r7.l, a1 += r7.h * r7.h (is);
  365.         a0 += r6.l * r6.l, a1 += r6.h * r6.h (is);
  366.         (R7,R6) = BYTEOP16M (R1:0,R3:2) (R)|| R1 = [I0++]   || R3 = [I1++];
  367.         a0 += r7.l * r7.l, a1 += r7.h * r7.h (is);
  368.         a0 += r6.l * r6.l, a1 += r6.h * r6.h (is);
  369.         (R7,R6) = BYTEOP16M (R1:0,R3:2)    || R0 = [I0++M0] || R2 = [I1++M0];
  370.         a0 += r7.l * r7.l, a1 += r7.h * r7.h (is);
  371.         a0 += r6.l * r6.l, a1 += r6.h * r6.h (is);
  372.         (R7,R6) = BYTEOP16M (R1:0,R3:2) (R)|| R0 = [I0++]   || R2 = [I1++];
  373.         a0 += r7.l * r7.l, a1 += r7.h * r7.h (is);
  374. .E160:  a0 += r6.l * r6.l, a1 += r6.h * r6.h (is);
  375.         a0 += a1;
  376.         r0 = a0;
  377.  
  378.         (r7:6) = [sp++];
  379.         unlink;
  380.         rts;
  381. DEFUN_END(sse16)
  382.