Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /*
  2.         dct64_i486.c: DCT64, a plain C variant for i486
  3.  
  4.         copyright 1998-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
  5.         see COPYING and AUTHORS files in distribution or http://mpg123.de
  6.         initially written by Fabrice Bellard
  7. */
  8.  
  9. /* Discrete Cosine Tansform (DCT) for subband synthesis.
  10.  *
  11.  * This code is optimized for 80486. It should be compiled with gcc
  12.  * 2.7.2 or higher.
  13.  *
  14.  * Note: This code does not give the necessary accuracy. Moreover, no
  15.  * overflow test are done.
  16.  *
  17.  * (c) 1998 Fabrice Bellard.  
  18.  */
  19.  
  20. #include "mpg123.h"
  21.  
  22. #define COS_0_0 16403
  23. #define COS_0_1 16563
  24. #define COS_0_2 16890
  25. #define COS_0_3 17401
  26. #define COS_0_4 18124
  27. #define COS_0_5 19101
  28. #define COS_0_6 20398
  29. #define COS_0_7 22112
  30. #define COS_0_8 24396
  31. #define COS_0_9 27503
  32. #define COS_0_10 31869
  33. #define COS_0_11 38320
  34. #define COS_0_12 48633
  35. #define COS_0_13 67429
  36. #define COS_0_14 111660
  37. #define COS_0_15 333906
  38. #define COS_1_0 16463
  39. #define COS_1_1 17121
  40. #define COS_1_2 18577
  41. #define COS_1_3 21195
  42. #define COS_1_4 25826
  43. #define COS_1_5 34756
  44. #define COS_1_6 56441
  45. #define COS_1_7 167154
  46. #define COS_2_0 16704
  47. #define COS_2_1 19704
  48. #define COS_2_2 29490
  49. #define COS_2_3 83981
  50. #define COS_3_0 17733
  51. #define COS_3_1 42813
  52. #define COS_4_0 23170        
  53.  
  54. #define SETOUT(out,n,expr) out[FIR_BUFFER_SIZE*(n)]=(expr)
  55. #define MULL(a,b) (((long)(a)*(long)(b)) >> 15)
  56. #define MUL(a,b) \
  57. (\
  58.        ((!(b & 0x3F)) ? (((a)*(b >> 6)) >> 9) :\
  59.        ((!(b & 0x1F)) ? (((a)*(b >> 5)) >> 10) :\
  60.        ((!(b & 0x0F)) ? (((a)*(b >> 4)) >> 11) :\
  61.        ((!(b & 0x07)) ? (((a)*(b >> 3)) >> 12) :\
  62.        ((!(b & 0x03)) ? (((a)*(b >> 2)) >> 13) :\
  63.        ((!(b & 0x01)) ? (((a)*(b >> 1)) >> 14) :\
  64.                         (((a)*(b   )) >> 15))))))))
  65.  
  66.  
  67. void dct64_1_486(int *out0,int *out1,int *b1,int *b2)
  68. {
  69.   b1[0x00] = b2[0x00] + b2[0x1F];
  70.   b1[0x1F] = MUL((b2[0x00] - b2[0x1F]),COS_0_0);
  71.  
  72.   b1[0x01] = b2[0x01] + b2[0x1E];
  73.   b1[0x1E] = MUL((b2[0x01] - b2[0x1E]),COS_0_1);
  74.  
  75.   b1[0x02] = b2[0x02] + b2[0x1D];
  76.   b1[0x1D] = MUL((b2[0x02] - b2[0x1D]),COS_0_2);
  77.  
  78.   b1[0x03] = b2[0x03] + b2[0x1C];
  79.   b1[0x1C] = MUL((b2[0x03] - b2[0x1C]),COS_0_3);
  80.  
  81.   b1[0x04] = b2[0x04] + b2[0x1B];
  82.   b1[0x1B] = MUL((b2[0x04] - b2[0x1B]),COS_0_4);
  83.  
  84.   b1[0x05] = b2[0x05] + b2[0x1A];
  85.   b1[0x1A] = MUL((b2[0x05] - b2[0x1A]),COS_0_5);
  86.  
  87.   b1[0x06] = b2[0x06] + b2[0x19];
  88.   b1[0x19] = MUL((b2[0x06] - b2[0x19]),COS_0_6);
  89.  
  90.   b1[0x07] = b2[0x07] + b2[0x18];
  91.   b1[0x18] = MUL((b2[0x07] - b2[0x18]),COS_0_7);
  92.  
  93.   b1[0x08] = b2[0x08] + b2[0x17];
  94.   b1[0x17] = MUL((b2[0x08] - b2[0x17]),COS_0_8);
  95.  
  96.   b1[0x09] = b2[0x09] + b2[0x16];
  97.   b1[0x16] = MUL((b2[0x09] - b2[0x16]),COS_0_9);
  98.  
  99.   b1[0x0A] = b2[0x0A] + b2[0x15];
  100.   b1[0x15] = MUL((b2[0x0A] - b2[0x15]),COS_0_10);
  101.  
  102.   b1[0x0B] = b2[0x0B] + b2[0x14];
  103.   b1[0x14] = MUL((b2[0x0B] - b2[0x14]),COS_0_11);
  104.  
  105.   b1[0x0C] = b2[0x0C] + b2[0x13];
  106.   b1[0x13] = MUL((b2[0x0C] - b2[0x13]),COS_0_12);
  107.  
  108.   b1[0x0D] = b2[0x0D] + b2[0x12];
  109.   b1[0x12] = MULL((b2[0x0D] - b2[0x12]),COS_0_13);
  110.  
  111.   b1[0x0E] = b2[0x0E] + b2[0x11];
  112.   b1[0x11] = MULL((b2[0x0E] - b2[0x11]),COS_0_14);
  113.  
  114.   b1[0x0F] = b2[0x0F] + b2[0x10];
  115.   b1[0x10] = MULL((b2[0x0F] - b2[0x10]),COS_0_15);
  116.  
  117.  
  118.   b2[0x00] = b1[0x00] + b1[0x0F];
  119.   b2[0x0F] = MUL((b1[0x00] - b1[0x0F]),COS_1_0);
  120.   b2[0x01] = b1[0x01] + b1[0x0E];
  121.   b2[0x0E] = MUL((b1[0x01] - b1[0x0E]),COS_1_1);
  122.   b2[0x02] = b1[0x02] + b1[0x0D];
  123.   b2[0x0D] = MUL((b1[0x02] - b1[0x0D]),COS_1_2);
  124.   b2[0x03] = b1[0x03] + b1[0x0C];
  125.   b2[0x0C] = MUL((b1[0x03] - b1[0x0C]),COS_1_3);
  126.   b2[0x04] = b1[0x04] + b1[0x0B];
  127.   b2[0x0B] = MUL((b1[0x04] - b1[0x0B]),COS_1_4);
  128.   b2[0x05] = b1[0x05] + b1[0x0A];
  129.   b2[0x0A] = MUL((b1[0x05] - b1[0x0A]),COS_1_5);
  130.   b2[0x06] = b1[0x06] + b1[0x09];
  131.   b2[0x09] = MUL((b1[0x06] - b1[0x09]),COS_1_6);
  132.   b2[0x07] = b1[0x07] + b1[0x08];
  133.   b2[0x08] = MULL((b1[0x07] - b1[0x08]),COS_1_7);
  134.  
  135.   b2[0x10] = b1[0x10] + b1[0x1F];
  136.   b2[0x1F] = MUL((b1[0x1F] - b1[0x10]),COS_1_0);
  137.   b2[0x11] = b1[0x11] + b1[0x1E];
  138.   b2[0x1E] = MUL((b1[0x1E] - b1[0x11]),COS_1_1);
  139.   b2[0x12] = b1[0x12] + b1[0x1D];
  140.   b2[0x1D] = MUL((b1[0x1D] - b1[0x12]),COS_1_2);
  141.   b2[0x13] = b1[0x13] + b1[0x1C];
  142.   b2[0x1C] = MUL((b1[0x1C] - b1[0x13]),COS_1_3);
  143.   b2[0x14] = b1[0x14] + b1[0x1B];
  144.   b2[0x1B] = MUL((b1[0x1B] - b1[0x14]),COS_1_4);
  145.   b2[0x15] = b1[0x15] + b1[0x1A];
  146.   b2[0x1A] = MUL((b1[0x1A] - b1[0x15]),COS_1_5);
  147.   b2[0x16] = b1[0x16] + b1[0x19];
  148.   b2[0x19] = MUL((b1[0x19] - b1[0x16]),COS_1_6);
  149.   b2[0x17] = b1[0x17] + b1[0x18];
  150.   b2[0x18] = MULL((b1[0x18] - b1[0x17]),COS_1_7);
  151.  
  152.  
  153.   b1[0x00] = b2[0x00] + b2[0x07];
  154.   b1[0x07] = MUL((b2[0x00] - b2[0x07]),COS_2_0);
  155.   b1[0x01] = b2[0x01] + b2[0x06];
  156.   b1[0x06] = MUL((b2[0x01] - b2[0x06]),COS_2_1);
  157.   b1[0x02] = b2[0x02] + b2[0x05];
  158.   b1[0x05] = MUL((b2[0x02] - b2[0x05]),COS_2_2);
  159.   b1[0x03] = b2[0x03] + b2[0x04];
  160.   b1[0x04] = MULL((b2[0x03] - b2[0x04]),COS_2_3);
  161.  
  162.   b1[0x08] = b2[0x08] + b2[0x0F];
  163.   b1[0x0F] = MUL((b2[0x0F] - b2[0x08]),COS_2_0);
  164.   b1[0x09] = b2[0x09] + b2[0x0E];
  165.   b1[0x0E] = MUL((b2[0x0E] - b2[0x09]),COS_2_1);
  166.   b1[0x0A] = b2[0x0A] + b2[0x0D];
  167.   b1[0x0D] = MUL((b2[0x0D] - b2[0x0A]),COS_2_2);
  168.   b1[0x0B] = b2[0x0B] + b2[0x0C];
  169.   b1[0x0C] = MULL((b2[0x0C] - b2[0x0B]),COS_2_3);
  170.  
  171.   b1[0x10] = b2[0x10] + b2[0x17];
  172.   b1[0x17] = MUL((b2[0x10] - b2[0x17]),COS_2_0);
  173.   b1[0x11] = b2[0x11] + b2[0x16];
  174.   b1[0x16] = MUL((b2[0x11] - b2[0x16]),COS_2_1);
  175.   b1[0x12] = b2[0x12] + b2[0x15];
  176.   b1[0x15] = MUL((b2[0x12] - b2[0x15]),COS_2_2);
  177.   b1[0x13] = b2[0x13] + b2[0x14];
  178.   b1[0x14] = MULL((b2[0x13] - b2[0x14]),COS_2_3);
  179.  
  180.   b1[0x18] = b2[0x18] + b2[0x1F];
  181.   b1[0x1F] = MUL((b2[0x1F] - b2[0x18]),COS_2_0);
  182.   b1[0x19] = b2[0x19] + b2[0x1E];
  183.   b1[0x1E] = MUL((b2[0x1E] - b2[0x19]),COS_2_1);
  184.   b1[0x1A] = b2[0x1A] + b2[0x1D];
  185.   b1[0x1D] = MUL((b2[0x1D] - b2[0x1A]),COS_2_2);
  186.   b1[0x1B] = b2[0x1B] + b2[0x1C];
  187.   b1[0x1C] = MULL((b2[0x1C] - b2[0x1B]),COS_2_3);
  188.  
  189.  
  190.   b2[0x00] = b1[0x00] + b1[0x03];
  191.   b2[0x03] = MUL((b1[0x00] - b1[0x03]),COS_3_0);
  192.   b2[0x01] = b1[0x01] + b1[0x02];
  193.   b2[0x02] = MUL((b1[0x01] - b1[0x02]),COS_3_1);
  194.  
  195.   b2[0x04] = b1[0x04] + b1[0x07];
  196.   b2[0x07] = MUL((b1[0x07] - b1[0x04]),COS_3_0);
  197.   b2[0x05] = b1[0x05] + b1[0x06];
  198.   b2[0x06] = MUL((b1[0x06] - b1[0x05]),COS_3_1);
  199.  
  200.   b2[0x08] = b1[0x08] + b1[0x0B];
  201.   b2[0x0B] = MUL((b1[0x08] - b1[0x0B]),COS_3_0);
  202.   b2[0x09] = b1[0x09] + b1[0x0A];
  203.   b2[0x0A] = MUL((b1[0x09] - b1[0x0A]),COS_3_1);
  204.  
  205.   b2[0x0C] = b1[0x0C] + b1[0x0F];
  206.   b2[0x0F] = MUL((b1[0x0F] - b1[0x0C]),COS_3_0);
  207.   b2[0x0D] = b1[0x0D] + b1[0x0E];
  208.   b2[0x0E] = MUL((b1[0x0E] - b1[0x0D]),COS_3_1);
  209.  
  210.   b2[0x10] = b1[0x10] + b1[0x13];
  211.   b2[0x13] = MUL((b1[0x10] - b1[0x13]),COS_3_0);
  212.   b2[0x11] = b1[0x11] + b1[0x12];
  213.   b2[0x12] = MUL((b1[0x11] - b1[0x12]),COS_3_1);
  214.  
  215.   b2[0x14] = b1[0x14] + b1[0x17];
  216.   b2[0x17] = MUL((b1[0x17] - b1[0x14]),COS_3_0);
  217.   b2[0x15] = b1[0x15] + b1[0x16];
  218.   b2[0x16] = MUL((b1[0x16] - b1[0x15]),COS_3_1);
  219.  
  220.   b2[0x18] = b1[0x18] + b1[0x1B];
  221.   b2[0x1B] = MUL((b1[0x18] - b1[0x1B]),COS_3_0);
  222.   b2[0x19] = b1[0x19] + b1[0x1A];
  223.   b2[0x1A] = MUL((b1[0x19] - b1[0x1A]),COS_3_1);
  224.  
  225.   b2[0x1C] = b1[0x1C] + b1[0x1F];
  226.   b2[0x1F] = MUL((b1[0x1F] - b1[0x1C]),COS_3_0);
  227.   b2[0x1D] = b1[0x1D] + b1[0x1E];
  228.   b2[0x1E] = MUL((b1[0x1E] - b1[0x1D]),COS_3_1);
  229.  
  230.   {
  231.     int i;
  232.     for(i=0;i<32;i+=4) {
  233.       b1[i+0x00] = b2[i+0x00] + b2[i+0x01];
  234.       b1[i+0x01] = MUL((b2[i+0x00] - b2[i+0x01]),COS_4_0);
  235.       b1[i+0x02] = b2[i+0x02] + b2[i+0x03];
  236.       b1[i+0x03] = MUL((b2[i+0x03] - b2[i+0x02]),COS_4_0);
  237.     }
  238.   }
  239.  
  240.   b1[0x02] += b1[0x03];
  241.   b1[0x06] += b1[0x07];
  242.   b1[0x04] += b1[0x06];
  243.   b1[0x06] += b1[0x05];
  244.   b1[0x05] += b1[0x07];
  245.  
  246.   b1[0x0A] += b1[0x0B];
  247.   b1[0x0E] += b1[0x0F];
  248.   b1[0x0C] += b1[0x0E];
  249.   b1[0x0E] += b1[0x0D];
  250.   b1[0x0D] += b1[0x0F];
  251.  
  252.   b1[0x12] += b1[0x13];
  253.   b1[0x16] += b1[0x17];
  254.   b1[0x14] += b1[0x16];
  255.   b1[0x16] += b1[0x15];
  256.   b1[0x15] += b1[0x17];
  257.  
  258.   b1[0x1A] += b1[0x1B];
  259.   b1[0x1E] += b1[0x1F];
  260.   b1[0x1C] += b1[0x1E];
  261.   b1[0x1E] += b1[0x1D];
  262.   b1[0x1D] += b1[0x1F];
  263.  
  264.  SETOUT(out0,16,b1[0x00]);
  265.  SETOUT(out0,12,b1[0x04]);
  266.  SETOUT(out0, 8,b1[0x02]);
  267.  SETOUT(out0, 4,b1[0x06]);
  268.  SETOUT(out0, 0,b1[0x01]);
  269.  SETOUT(out1, 0,b1[0x01]);
  270.  SETOUT(out1, 4,b1[0x05]);
  271.  SETOUT(out1, 8,b1[0x03]);
  272.  SETOUT(out1,12,b1[0x07]);
  273.  
  274.  b1[0x08] += b1[0x0C];
  275.  SETOUT(out0,14,b1[0x08]);
  276.  b1[0x0C] += b1[0x0a];
  277.  SETOUT(out0,10,b1[0x0C]);
  278.  b1[0x0A] += b1[0x0E];
  279.  SETOUT(out0, 6,b1[0x0A]);
  280.  b1[0x0E] += b1[0x09];
  281.  SETOUT(out0, 2,b1[0x0E]);
  282.  b1[0x09] += b1[0x0D];
  283.  SETOUT(out1, 2,b1[0x09]);
  284.  b1[0x0D] += b1[0x0B];
  285.  SETOUT(out1, 6,b1[0x0D]);
  286.  b1[0x0B] += b1[0x0F];
  287.  SETOUT(out1,10,b1[0x0B]);
  288.  SETOUT(out1,14,b1[0x0F]);
  289.  
  290.  b1[0x18] += b1[0x1C];
  291.  SETOUT(out0,15,b1[0x10] + b1[0x18]);
  292.  SETOUT(out0,13,b1[0x18] + b1[0x14]);
  293.  b1[0x1C] += b1[0x1a];
  294.  SETOUT(out0,11,b1[0x14] + b1[0x1C]);
  295.  SETOUT(out0, 9,b1[0x1C] + b1[0x12]);
  296.  b1[0x1A] += b1[0x1E];
  297.  SETOUT(out0, 7,b1[0x12] + b1[0x1A]);
  298.  SETOUT(out0, 5,b1[0x1A] + b1[0x16]);
  299.  b1[0x1E] += b1[0x19];
  300.  SETOUT(out0, 3,b1[0x16] + b1[0x1E]);
  301.  SETOUT(out0, 1,b1[0x1E] + b1[0x11]);
  302.  b1[0x19] += b1[0x1D];
  303.  SETOUT(out1, 1,b1[0x11] + b1[0x19]);
  304.  SETOUT(out1, 3,b1[0x19] + b1[0x15]);
  305.  b1[0x1D] += b1[0x1B];
  306.  SETOUT(out1, 5,b1[0x15] + b1[0x1D]);
  307.  SETOUT(out1, 7,b1[0x1D] + b1[0x13]);
  308.  b1[0x1B] += b1[0x1F];
  309.  SETOUT(out1, 9,b1[0x13] + b1[0x1B]);
  310.  SETOUT(out1,11,b1[0x1B] + b1[0x17]);
  311.  SETOUT(out1,13,b1[0x17] + b1[0x1F]);
  312.  SETOUT(out1,15,b1[0x1F]);
  313. }
  314.  
  315.  
  316. /*
  317.  * the call via dct64 is a trick to force GCC to use
  318.  * (new) registers for the b1,b2 pointer to the bufs[xx] field
  319.  */
  320. void dct64_486(int *a,int *b,real *samples)
  321. {
  322.   int bufs[64];
  323.   int i;
  324.  
  325. #ifdef REAL_IS_FIXED  
  326. #define TOINT(a) ((a) * 32768 / (int)REAL_FACTOR)
  327.  
  328.   for(i=0;i<32;i++) {
  329.     bufs[i]=TOINT(samples[i]);
  330.   }
  331. #else      
  332.   int *p = bufs;
  333.   register double const scale = ((65536.0 * 32) + 1) * 65536.0;
  334.  
  335.   for(i=0;i<32;i++) {
  336.     *((double *) (p++)) = scale + *samples++; /* beware on bufs overrun: 8B store from x87 */
  337.   }
  338. #endif
  339.  
  340.   dct64_1_486(a,b,bufs+32,bufs);
  341. }
  342.  
  343.