Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright (c) 2013 RISC OS Open Ltd
  3.  * Author: Ben Avison <bavison@riscosopen.org>
  4.  *
  5.  * This file is part of FFmpeg.
  6.  *
  7.  * FFmpeg is free software; you can redistribute it and/or
  8.  * modify it under the terms of the GNU Lesser General Public
  9.  * License as published by the Free Software Foundation; either
  10.  * version 2.1 of the License, or (at your option) any later version.
  11.  *
  12.  * FFmpeg is distributed in the hope that it will be useful,
  13.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15.  * Lesser General Public License for more details.
  16.  *
  17.  * You should have received a copy of the GNU Lesser General Public
  18.  * License along with FFmpeg; if not, write to the Free Software
  19.  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20.  */
  21.  
  22. #include "libavutil/arm/asm.S"
  23.  
  24. RESULT  .req    a1
  25. BUF     .req    a1
  26. SIZE    .req    a2
  27. PATTERN .req    a3
  28. PTR     .req    a4
  29. DAT0    .req    v1
  30. DAT1    .req    v2
  31. DAT2    .req    v3
  32. DAT3    .req    v4
  33. TMP0    .req    v5
  34. TMP1    .req    v6
  35. TMP2    .req    ip
  36. TMP3    .req    lr
  37.  
  38. #define PRELOAD_DISTANCE 4
  39.  
  40. .macro innerloop4
  41.         ldr     DAT0, [PTR], #4
  42.         subs    SIZE, SIZE, #4 @ C flag survives rest of macro
  43.         sub     TMP0, DAT0, PATTERN, lsr #14
  44.         bic     TMP0, TMP0, DAT0
  45.         ands    TMP0, TMP0, PATTERN
  46. .endm
  47.  
  48. .macro innerloop16  decrement, do_preload
  49.         ldmia   PTR!, {DAT0,DAT1,DAT2,DAT3}
  50.  .ifnc "\do_preload",""
  51.         pld     [PTR, #PRELOAD_DISTANCE*32]
  52.  .endif
  53.  .ifnc "\decrement",""
  54.         subs    SIZE, SIZE, #\decrement @ C flag survives rest of macro
  55.  .endif
  56.         sub     TMP0, DAT0, PATTERN, lsr #14
  57.         sub     TMP1, DAT1, PATTERN, lsr #14
  58.         bic     TMP0, TMP0, DAT0
  59.         bic     TMP1, TMP1, DAT1
  60.         sub     TMP2, DAT2, PATTERN, lsr #14
  61.         sub     TMP3, DAT3, PATTERN, lsr #14
  62.         ands    TMP0, TMP0, PATTERN
  63.         bic     TMP2, TMP2, DAT2
  64.         it      eq
  65.         andseq  TMP1, TMP1, PATTERN
  66.         bic     TMP3, TMP3, DAT3
  67.         itt     eq
  68.         andseq  TMP2, TMP2, PATTERN
  69.         andseq  TMP3, TMP3, PATTERN
  70. .endm
  71.  
  72. /* int ff_h264_find_start_code_candidate_armv6(const uint8_t *buf, int size) */
  73. function ff_h264_find_start_code_candidate_armv6, export=1
  74.         push    {v1-v6,lr}
  75.         mov     PTR, BUF
  76.         @ Ensure there are at least (PRELOAD_DISTANCE+2) complete cachelines to go
  77.         @ before using code that does preloads
  78.         cmp     SIZE, #(PRELOAD_DISTANCE+3)*32 - 1
  79.         blo     60f
  80.  
  81.         @ Get to word-alignment, 1 byte at a time
  82.         tst     PTR, #3
  83.         beq     2f
  84. 1:      ldrb    DAT0, [PTR], #1
  85.         sub     SIZE, SIZE, #1
  86.         teq     DAT0, #0
  87.         beq     90f
  88.         tst     PTR, #3
  89.         bne     1b
  90. 2:      @ Get to 4-word alignment, 1 word at a time
  91.         ldr     PATTERN, =0x80008000
  92.         setend  be
  93.         tst     PTR, #12
  94.         beq     4f
  95. 3:      innerloop4
  96.         bne     91f
  97.         tst     PTR, #12
  98.         bne     3b
  99. 4:      @ Get to cacheline (8-word) alignment
  100.         tst     PTR, #16
  101.         beq     5f
  102.         innerloop16  16
  103.         bne     93f
  104. 5:      @ Check complete cachelines, with preloading
  105.         @ We need to stop when there are still (PRELOAD_DISTANCE+1)
  106.         @ complete cachelines to go
  107.         sub     SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32
  108. 6:      innerloop16  , do_preload
  109.         bne     93f
  110.         innerloop16  32
  111.         bne     93f
  112.         bcs     6b
  113.         @ Preload trailing part-cacheline, if any
  114.         tst     SIZE, #31
  115.         beq     7f
  116.         pld     [PTR, #(PRELOAD_DISTANCE+1)*32]
  117.         @ Check remaining data without doing any more preloads. First
  118.         @ do in chunks of 4 words:
  119. 7:      adds    SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 - 16
  120.         bmi     9f
  121. 8:      innerloop16  16
  122.         bne     93f
  123.         bcs     8b
  124.         @ Then in words:
  125. 9:      adds    SIZE, SIZE, #16 - 4
  126.         bmi     11f
  127. 10:     innerloop4
  128.         bne     91f
  129.         bcs     10b
  130. 11:     setend  le
  131.         @ Check second byte of final halfword
  132.         ldrb    DAT0, [PTR, #-1]
  133.         teq     DAT0, #0
  134.         beq     90f
  135.         @ Check any remaining bytes
  136.         tst     SIZE, #3
  137.         beq     13f
  138. 12:     ldrb    DAT0, [PTR], #1
  139.         sub     SIZE, SIZE, #1
  140.         teq     DAT0, #0
  141.         beq     90f
  142.         tst     SIZE, #3
  143.         bne     12b
  144.         @ No candidate found
  145. 13:     sub     RESULT, PTR, BUF
  146.         b       99f
  147.  
  148. 60:     @ Small buffer - simply check by looping over bytes
  149.         subs    SIZE, SIZE, #1
  150.         bcc     99f
  151. 61:     ldrb    DAT0, [PTR], #1
  152.         subs    SIZE, SIZE, #1
  153.         teq     DAT0, #0
  154.         beq     90f
  155.         bcs     61b
  156.         @ No candidate found
  157.         sub     RESULT, PTR, BUF
  158.         b       99f
  159.  
  160. 90:     @ Found a candidate at the preceding byte
  161.         sub     RESULT, PTR, BUF
  162.         sub     RESULT, RESULT, #1
  163.         b       99f
  164.  
  165. 91:     @ Found a candidate somewhere in the preceding 4 bytes
  166.         sub     RESULT, PTR, BUF
  167.         sub     RESULT, RESULT, #4
  168.         sub     TMP0, DAT0, #0x20000
  169.         bics    TMP0, TMP0, DAT0
  170.         itt     pl
  171.         ldrbpl  DAT0, [PTR, #-3]
  172.         addpl   RESULT, RESULT, #2
  173.         bpl     92f
  174.         teq     RESULT, #0
  175.         beq     98f @ don't look back a byte if found at first byte in buffer
  176.         ldrb    DAT0, [PTR, #-5]
  177. 92:     teq     DAT0, #0
  178.         it      eq
  179.         subeq   RESULT, RESULT, #1
  180.         b       98f
  181.  
  182. 93:     @ Found a candidate somewhere in the preceding 16 bytes
  183.         sub     RESULT, PTR, BUF
  184.         sub     RESULT, RESULT, #16
  185.         teq     TMP0, #0
  186.         beq     95f @ not in first 4 bytes
  187.         sub     TMP0, DAT0, #0x20000
  188.         bics    TMP0, TMP0, DAT0
  189.         itt     pl
  190.         ldrbpl  DAT0, [PTR, #-15]
  191.         addpl   RESULT, RESULT, #2
  192.         bpl     94f
  193.         teq     RESULT, #0
  194.         beq     98f @ don't look back a byte if found at first byte in buffer
  195.         ldrb    DAT0, [PTR, #-17]
  196. 94:     teq     DAT0, #0
  197.         it      eq
  198.         subeq   RESULT, RESULT, #1
  199.         b       98f
  200. 95:     add     RESULT, RESULT, #4
  201.         teq     TMP1, #0
  202.         beq     96f @ not in next 4 bytes
  203.         sub     TMP1, DAT1, #0x20000
  204.         bics    TMP1, TMP1, DAT1
  205.         itee    mi
  206.         ldrbmi  DAT0, [PTR, #-13]
  207.         ldrbpl  DAT0, [PTR, #-11]
  208.         addpl   RESULT, RESULT, #2
  209.         teq     DAT0, #0
  210.         it      eq
  211.         subeq   RESULT, RESULT, #1
  212.         b       98f
  213. 96:     add     RESULT, RESULT, #4
  214.         teq     TMP2, #0
  215.         beq     97f @ not in next 4 bytes
  216.         sub     TMP2, DAT2, #0x20000
  217.         bics    TMP2, TMP2, DAT2
  218.         itee    mi
  219.         ldrbmi  DAT0, [PTR, #-9]
  220.         ldrbpl  DAT0, [PTR, #-7]
  221.         addpl   RESULT, RESULT, #2
  222.         teq     DAT0, #0
  223.         it      eq
  224.         subeq   RESULT, RESULT, #1
  225.         b       98f
  226. 97:     add     RESULT, RESULT, #4
  227.         sub     TMP3, DAT3, #0x20000
  228.         bics    TMP3, TMP3, DAT3
  229.         itee    mi
  230.         ldrbmi  DAT0, [PTR, #-5]
  231.         ldrbpl  DAT0, [PTR, #-3]
  232.         addpl   RESULT, RESULT, #2
  233.         teq     DAT0, #0
  234.         it      eq
  235.         subeq   RESULT, RESULT, #1
  236.         @ drop through to 98f
  237. 98:     setend  le
  238. 99:     pop     {v1-v6,pc}
  239. .endfunc
  240.  
  241.         .unreq  RESULT
  242.         .unreq  BUF
  243.         .unreq  SIZE
  244.         .unreq  PATTERN
  245.         .unreq  PTR
  246.         .unreq  DAT0
  247.         .unreq  DAT1
  248.         .unreq  DAT2
  249.         .unreq  DAT3
  250.         .unreq  TMP0
  251.         .unreq  TMP1
  252.         .unreq  TMP2
  253.         .unreq  TMP3
  254.