Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Format Conversion Utils for MIPS
  3.  *
  4.  * Copyright (c) 2012
  5.  *      MIPS Technologies, Inc., California.
  6.  *
  7.  * Redistribution and use in source and binary forms, with or without
  8.  * modification, are permitted provided that the following conditions
  9.  * are met:
  10.  * 1. Redistributions of source code must retain the above copyright
  11.  *    notice, this list of conditions and the following disclaimer.
  12.  * 2. Redistributions in binary form must reproduce the above copyright
  13.  *    notice, this list of conditions and the following disclaimer in the
  14.  *    documentation and/or other materials provided with the distribution.
  15.  * 3. Neither the name of the MIPS Technologies, Inc., nor the names of is
  16.  *    contributors may be used to endorse or promote products derived from
  17.  *    this software without specific prior written permission.
  18.  *
  19.  * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
  20.  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21.  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22.  * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
  23.  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  24.  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  25.  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  26.  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  27.  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  28.  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  29.  * SUCH DAMAGE.
  30.  *
  31.  * Author:  Zoran Lukic (zoranl@mips.com)
  32.  * Author:  Nedeljko Babic (nbabic@mips.com)
  33.  *
  34.  * This file is part of FFmpeg.
  35.  *
  36.  * FFmpeg is free software; you can redistribute it and/or
  37.  * modify it under the terms of the GNU Lesser General Public
  38.  * License as published by the Free Software Foundation; either
  39.  * version 2.1 of the License, or (at your option) any later version.
  40.  *
  41.  * FFmpeg is distributed in the hope that it will be useful,
  42.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  43.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  44.  * Lesser General Public License for more details.
  45.  *
  46.  * You should have received a copy of the GNU Lesser General Public
  47.  * License along with FFmpeg; if not, write to the Free Software
  48.  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  49.  */
  50. #include "config.h"
  51. #include "libavcodec/avcodec.h"
  52. #include "libavcodec/fmtconvert.h"
  53. #include "libavutil/mips/asmdefs.h"
  54.  
  55. #if HAVE_INLINE_ASM
  56. static void int32_to_float_fmul_scalar_mips(float *dst, const int *src,
  57.         float mul, int len)
  58. {
  59.     /*
  60.      * variables used in inline assembler
  61.      */
  62.     float temp1, temp3, temp5, temp7, temp9, temp11, temp13, temp15;
  63.  
  64.     int rpom1, rpom2, rpom11, rpom21, rpom12, rpom22, rpom13, rpom23;
  65.     const int *src_end = src + len;
  66.     /*
  67.      * loop is 8 times unrolled in assembler in order to achieve better performance
  68.      */
  69.     __asm__ volatile (
  70.         "i32tf_lp%=:                                    \n\t"
  71.         "lw       %[rpom11],     0(%[src])              \n\t"
  72.         "lw       %[rpom21],     4(%[src])              \n\t"
  73.         "lw       %[rpom1],      8(%[src])              \n\t"
  74.         "lw       %[rpom2],      12(%[src])             \n\t"
  75.         "mtc1     %[rpom11],     %[temp1]               \n\t"
  76.         "mtc1     %[rpom21],     %[temp3]               \n\t"
  77.         "mtc1     %[rpom1],      %[temp5]               \n\t"
  78.         "mtc1     %[rpom2],      %[temp7]               \n\t"
  79.  
  80.         "lw       %[rpom13],     16(%[src])             \n\t"
  81.         "lw       %[rpom23],     20(%[src])             \n\t"
  82.         "lw       %[rpom12],     24(%[src])             \n\t"
  83.         "lw       %[rpom22],     28(%[src])             \n\t"
  84.         "mtc1     %[rpom13],     %[temp9]               \n\t"
  85.         "mtc1     %[rpom23],     %[temp11]              \n\t"
  86.         "mtc1     %[rpom12],     %[temp13]              \n\t"
  87.         "mtc1     %[rpom22],     %[temp15]              \n\t"
  88.  
  89.         PTR_ADDIU "%[src],       32                     \n\t"
  90.         "cvt.s.w  %[temp1],      %[temp1]               \n\t"
  91.         "cvt.s.w  %[temp3],      %[temp3]               \n\t"
  92.         "cvt.s.w  %[temp5],      %[temp5]               \n\t"
  93.         "cvt.s.w  %[temp7],      %[temp7]               \n\t"
  94.  
  95.         "cvt.s.w  %[temp9],      %[temp9]               \n\t"
  96.         "cvt.s.w  %[temp11],     %[temp11]              \n\t"
  97.         "cvt.s.w  %[temp13],     %[temp13]              \n\t"
  98.         "cvt.s.w  %[temp15],     %[temp15]              \n\t"
  99.  
  100.         "mul.s   %[temp1],       %[temp1],    %[mul]    \n\t"
  101.         "mul.s   %[temp3],       %[temp3],    %[mul]    \n\t"
  102.         "mul.s   %[temp5],       %[temp5],    %[mul]    \n\t"
  103.         "mul.s   %[temp7],       %[temp7],    %[mul]    \n\t"
  104.  
  105.         "mul.s   %[temp9],       %[temp9],    %[mul]    \n\t"
  106.         "mul.s   %[temp11],      %[temp11],   %[mul]    \n\t"
  107.         "mul.s   %[temp13],      %[temp13],   %[mul]    \n\t"
  108.         "mul.s   %[temp15],      %[temp15],   %[mul]    \n\t"
  109.  
  110.         "swc1    %[temp1],       0(%[dst])              \n\t" /*dst[i] = src[i] * mul;    */
  111.         "swc1    %[temp3],       4(%[dst])              \n\t" /*dst[i+1] = src[i+1] * mul;*/
  112.         "swc1    %[temp5],       8(%[dst])              \n\t" /*dst[i+2] = src[i+2] * mul;*/
  113.         "swc1    %[temp7],       12(%[dst])             \n\t" /*dst[i+3] = src[i+3] * mul;*/
  114.  
  115.         "swc1    %[temp9],       16(%[dst])             \n\t" /*dst[i+4] = src[i+4] * mul;*/
  116.         "swc1    %[temp11],      20(%[dst])             \n\t" /*dst[i+5] = src[i+5] * mul;*/
  117.         "swc1    %[temp13],      24(%[dst])             \n\t" /*dst[i+6] = src[i+6] * mul;*/
  118.         "swc1    %[temp15],      28(%[dst])             \n\t" /*dst[i+7] = src[i+7] * mul;*/
  119.         PTR_ADDIU "%[dst],       32                     \n\t"
  120.         "bne     %[src],        %[src_end], i32tf_lp%=  \n\t"
  121.         : [temp1]"=&f"(temp1),   [temp11]"=&f"(temp11),
  122.           [temp13]"=&f"(temp13), [temp15]"=&f"(temp15),
  123.           [temp3]"=&f"(temp3),   [temp5]"=&f"(temp5),
  124.           [temp7]"=&f"(temp7),   [temp9]"=&f"(temp9),
  125.           [rpom1]"=&r"(rpom1),   [rpom2]"=&r"(rpom2),
  126.           [rpom11]"=&r"(rpom11), [rpom21]"=&r"(rpom21),
  127.           [rpom12]"=&r"(rpom12), [rpom22]"=&r"(rpom22),
  128.           [rpom13]"=&r"(rpom13), [rpom23]"=&r"(rpom23),
  129.           [dst]"+r"(dst),       [src]"+r"(src)
  130.         : [mul]"f"(mul),        [src_end]"r"(src_end)
  131.         : "memory"
  132.     );
  133. }
  134. #endif /* HAVE_INLINE_ASM */
  135.  
  136. av_cold void ff_fmt_convert_init_mips(FmtConvertContext *c)
  137. {
  138. #if HAVE_INLINE_ASM
  139.     c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_mips;
  140. #endif
  141. }
  142.