Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. // Ogg Vorbis audio decoder - v1.19 - public domain
  2. // http://nothings.org/stb_vorbis/
  3. //
  4. // Original version written by Sean Barrett in 2007.
  5. //
  6. // Originally sponsored by RAD Game Tools. Seeking implementation
  7. // sponsored by Phillip Bennefall, Marc Andersen, Aaron Baker,
  8. // Elias Software, Aras Pranckevicius, and Sean Barrett.
  9. //
  10. // LICENSE
  11. //
  12. //   See end of file for license information.
  13. //
  14. // Limitations:
  15. //
  16. //   - floor 0 not supported (used in old ogg vorbis files pre-2004)
  17. //   - lossless sample-truncation at beginning ignored
  18. //   - cannot concatenate multiple vorbis streams
  19. //   - sample positions are 32-bit, limiting seekable 192Khz
  20. //       files to around 6 hours (Ogg supports 64-bit)
  21. //
  22. // Feature contributors:
  23. //    Dougall Johnson (sample-exact seeking)
  24. //
  25. // Bugfix/warning contributors:
  26. //    Terje Mathisen     Niklas Frykholm     Andy Hill
  27. //    Casey Muratori     John Bolton         Gargaj
  28. //    Laurent Gomila     Marc LeBlanc        Ronny Chevalier
  29. //    Bernhard Wodo      Evan Balster        github:alxprd
  30. //    Tom Beaumont       Ingo Leitgeb        Nicolas Guillemot
  31. //    Phillip Bennefall  Rohit               Thiago Goulart
  32. //    github:manxorist   saga musix          github:infatum
  33. //    Timur Gagiev       Maxwell Koo         Peter Waller
  34. //    github:audinowho   Dougall Johnson
  35. //
  36. // Partial history:
  37. //    1.19    - 2020-02-05 - warnings
  38. //    1.18    - 2020-02-02 - fix seek bugs; parse header comments; misc warnings etc.
  39. //    1.17    - 2019-07-08 - fix CVE-2019-13217..CVE-2019-13223 (by ForAllSecure)
  40. //    1.16    - 2019-03-04 - fix warnings
  41. //    1.15    - 2019-02-07 - explicit failure if Ogg Skeleton data is found
  42. //    1.14    - 2018-02-11 - delete bogus dealloca usage
  43. //    1.13    - 2018-01-29 - fix truncation of last frame (hopefully)
  44. //    1.12    - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
  45. //    1.11    - 2017-07-23 - fix MinGW compilation
  46. //    1.10    - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
  47. //    1.09    - 2016-04-04 - back out 'truncation of last frame' fix from previous version
  48. //    1.08    - 2016-04-02 - warnings; setup memory leaks; truncation of last frame
  49. //    1.07    - 2015-01-16 - fixes for crashes on invalid files; warning fixes; const
  50. //    1.06    - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
  51. //                           some crash fixes when out of memory or with corrupt files
  52. //                           fix some inappropriately signed shifts
  53. //    1.05    - 2015-04-19 - don't define __forceinline if it's redundant
  54. //    1.04    - 2014-08-27 - fix missing const-correct case in API
  55. //    1.03    - 2014-08-07 - warning fixes
  56. //    1.02    - 2014-07-09 - declare qsort comparison as explicitly _cdecl in Windows
  57. //    1.01    - 2014-06-18 - fix stb_vorbis_get_samples_float (interleaved was correct)
  58. //    1.0     - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
  59. //                           (API change) report sample rate for decode-full-file funcs
  60. //
  61. // See end of file for full version history.
  62.  
  63.  
  64. //////////////////////////////////////////////////////////////////////////////
  65. //
  66. //  HEADER BEGINS HERE
  67. //
  68.  
  69. #ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
  70. #define STB_VORBIS_INCLUDE_STB_VORBIS_H
  71.  
  72. #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
  73. #define STB_VORBIS_NO_STDIO 1
  74. #endif
  75.  
  76. #ifndef STB_VORBIS_NO_STDIO
  77. #include <stdio.h>
  78. #endif
  79.  
  80. #ifdef __cplusplus
  81. extern "C" {
  82. #endif
  83.  
  84. ///////////   THREAD SAFETY
  85.  
  86. // Individual stb_vorbis* handles are not thread-safe; you cannot decode from
  87. // them from multiple threads at the same time. However, you can have multiple
  88. // stb_vorbis* handles and decode from them independently in multiple thrads.
  89.  
  90.  
  91. ///////////   MEMORY ALLOCATION
  92.  
  93. // normally stb_vorbis uses malloc() to allocate memory at startup,
  94. // and alloca() to allocate temporary memory during a frame on the
  95. // stack. (Memory consumption will depend on the amount of setup
  96. // data in the file and how you set the compile flags for speed
  97. // vs. size. In my test files the maximal-size usage is ~150KB.)
  98. //
  99. // You can modify the wrapper functions in the source (setup_malloc,
  100. // setup_temp_malloc, temp_malloc) to change this behavior, or you
  101. // can use a simpler allocation model: you pass in a buffer from
  102. // which stb_vorbis will allocate _all_ its memory (including the
  103. // temp memory). "open" may fail with a VORBIS_outofmem if you
  104. // do not pass in enough data; there is no way to determine how
  105. // much you do need except to succeed (at which point you can
  106. // query get_info to find the exact amount required. yes I know
  107. // this is lame).
  108. //
  109. // If you pass in a non-NULL buffer of the type below, allocation
  110. // will occur from it as described above. Otherwise just pass NULL
  111. // to use malloc()/alloca()
  112.  
  113. typedef struct
  114. {
  115.    char *alloc_buffer;
  116.    int   alloc_buffer_length_in_bytes;
  117. } stb_vorbis_alloc;
  118.  
  119.  
  120. ///////////   FUNCTIONS USEABLE WITH ALL INPUT MODES
  121.  
  122. typedef struct stb_vorbis stb_vorbis;
  123.  
  124. typedef struct
  125. {
  126.    unsigned int sample_rate;
  127.    int channels;
  128.  
  129.    unsigned int setup_memory_required;
  130.    unsigned int setup_temp_memory_required;
  131.    unsigned int temp_memory_required;
  132.  
  133.    int max_frame_size;
  134. } stb_vorbis_info;
  135.  
  136. typedef struct
  137. {
  138.    char *vendor;
  139.  
  140.    int comment_list_length;
  141.    char **comment_list;
  142. } stb_vorbis_comment;
  143.  
  144. // get general information about the file
  145. extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f);
  146.  
  147. // get ogg comments
  148. extern stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f);
  149.  
  150. // get the last error detected (clears it, too)
  151. extern int stb_vorbis_get_error(stb_vorbis *f);
  152.  
  153. // close an ogg vorbis file and free all memory in use
  154. extern void stb_vorbis_close(stb_vorbis *f);
  155.  
  156. // this function returns the offset (in samples) from the beginning of the
  157. // file that will be returned by the next decode, if it is known, or -1
  158. // otherwise. after a flush_pushdata() call, this may take a while before
  159. // it becomes valid again.
  160. // NOT WORKING YET after a seek with PULLDATA API
  161. extern int stb_vorbis_get_sample_offset(stb_vorbis *f);
  162.  
  163. // returns the current seek point within the file, or offset from the beginning
  164. // of the memory buffer. In pushdata mode it returns 0.
  165. extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
  166.  
  167. ///////////   PUSHDATA API
  168.  
  169. #ifndef STB_VORBIS_NO_PUSHDATA_API
  170.  
  171. // this API allows you to get blocks of data from any source and hand
  172. // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
  173. // you how much it used, and you have to give it the rest next time;
  174. // and stb_vorbis may not have enough data to work with and you will
  175. // need to give it the same data again PLUS more. Note that the Vorbis
  176. // specification does not bound the size of an individual frame.
  177.  
  178. extern stb_vorbis *stb_vorbis_open_pushdata(
  179.          const unsigned char * datablock, int datablock_length_in_bytes,
  180.          int *datablock_memory_consumed_in_bytes,
  181.          int *error,
  182.          const stb_vorbis_alloc *alloc_buffer);
  183. // create a vorbis decoder by passing in the initial data block containing
  184. //    the ogg&vorbis headers (you don't need to do parse them, just provide
  185. //    the first N bytes of the file--you're told if it's not enough, see below)
  186. // on success, returns an stb_vorbis *, does not set error, returns the amount of
  187. //    data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
  188. // on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed
  189. // if returns NULL and *error is VORBIS_need_more_data, then the input block was
  190. //       incomplete and you need to pass in a larger block from the start of the file
  191.  
  192. extern int stb_vorbis_decode_frame_pushdata(
  193.          stb_vorbis *f,
  194.          const unsigned char *datablock, int datablock_length_in_bytes,
  195.          int *channels,             // place to write number of float * buffers
  196.          float ***output,           // place to write float ** array of float * buffers
  197.          int *samples               // place to write number of output samples
  198.      );
  199. // decode a frame of audio sample data if possible from the passed-in data block
  200. //
  201. // return value: number of bytes we used from datablock
  202. //
  203. // possible cases:
  204. //     0 bytes used, 0 samples output (need more data)
  205. //     N bytes used, 0 samples output (resynching the stream, keep going)
  206. //     N bytes used, M samples output (one frame of data)
  207. // note that after opening a file, you will ALWAYS get one N-bytes,0-sample
  208. // frame, because Vorbis always "discards" the first frame.
  209. //
  210. // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
  211. // instead only datablock_length_in_bytes-3 or less. This is because it wants
  212. // to avoid missing parts of a page header if they cross a datablock boundary,
  213. // without writing state-machiney code to record a partial detection.
  214. //
  215. // The number of channels returned are stored in *channels (which can be
  216. // NULL--it is always the same as the number of channels reported by
  217. // get_info). *output will contain an array of float* buffers, one per
  218. // channel. In other words, (*output)[0][0] contains the first sample from
  219. // the first channel, and (*output)[1][0] contains the first sample from
  220. // the second channel.
  221.  
  222. extern void stb_vorbis_flush_pushdata(stb_vorbis *f);
  223. // inform stb_vorbis that your next datablock will not be contiguous with
  224. // previous ones (e.g. you've seeked in the data); future attempts to decode
  225. // frames will cause stb_vorbis to resynchronize (as noted above), and
  226. // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
  227. // will begin decoding the _next_ frame.
  228. //
  229. // if you want to seek using pushdata, you need to seek in your file, then
  230. // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
  231. // decoding is returning you data, call stb_vorbis_get_sample_offset, and
  232. // if you don't like the result, seek your file again and repeat.
  233. #endif
  234.  
  235.  
  236. //////////   PULLING INPUT API
  237.  
  238. #ifndef STB_VORBIS_NO_PULLDATA_API
  239. // This API assumes stb_vorbis is allowed to pull data from a source--
  240. // either a block of memory containing the _entire_ vorbis stream, or a
  241. // FILE * that you or it create, or possibly some other reading mechanism
  242. // if you go modify the source to replace the FILE * case with some kind
  243. // of callback to your code. (But if you don't support seeking, you may
  244. // just want to go ahead and use pushdata.)
  245.  
  246. #if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
  247. extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
  248. #endif
  249. #if !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
  250. extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
  251. #endif
  252. // decode an entire file and output the data interleaved into a malloc()ed
  253. // buffer stored in *output. The return value is the number of samples
  254. // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
  255. // When you're done with it, just free() the pointer returned in *output.
  256.  
  257. extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
  258.                                   int *error, const stb_vorbis_alloc *alloc_buffer);
  259. // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
  260. // this must be the entire stream!). on failure, returns NULL and sets *error
  261.  
  262. #ifndef STB_VORBIS_NO_STDIO
  263. extern stb_vorbis * stb_vorbis_open_filename(const char *filename,
  264.                                   int *error, const stb_vorbis_alloc *alloc_buffer);
  265. // create an ogg vorbis decoder from a filename via fopen(). on failure,
  266. // returns NULL and sets *error (possibly to VORBIS_file_open_failure).
  267.  
  268. extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
  269.                                   int *error, const stb_vorbis_alloc *alloc_buffer);
  270. // create an ogg vorbis decoder from an open FILE *, looking for a stream at
  271. // the _current_ seek point (ftell). on failure, returns NULL and sets *error.
  272. // note that stb_vorbis must "own" this stream; if you seek it in between
  273. // calls to stb_vorbis, it will become confused. Moreover, if you attempt to
  274. // perform stb_vorbis_seek_*() operations on this file, it will assume it
  275. // owns the _entire_ rest of the file after the start point. Use the next
  276. // function, stb_vorbis_open_file_section(), to limit it.
  277.  
  278. extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close,
  279.                 int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len);
  280. // create an ogg vorbis decoder from an open FILE *, looking for a stream at
  281. // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
  282. // on failure, returns NULL and sets *error. note that stb_vorbis must "own"
  283. // this stream; if you seek it in between calls to stb_vorbis, it will become
  284. // confused.
  285. #endif
  286.  
  287. extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
  288. extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
  289. // these functions seek in the Vorbis file to (approximately) 'sample_number'.
  290. // after calling seek_frame(), the next call to get_frame_*() will include
  291. // the specified sample. after calling stb_vorbis_seek(), the next call to
  292. // stb_vorbis_get_samples_* will start with the specified sample. If you
  293. // do not need to seek to EXACTLY the target sample when using get_samples_*,
  294. // you can also use seek_frame().
  295.  
  296. extern int stb_vorbis_seek_start(stb_vorbis *f);
  297. // this function is equivalent to stb_vorbis_seek(f,0)
  298.  
  299. extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
  300. extern float        stb_vorbis_stream_length_in_seconds(stb_vorbis *f);
  301. // these functions return the total length of the vorbis stream
  302.  
  303. extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
  304. // decode the next frame and return the number of samples. the number of
  305. // channels returned are stored in *channels (which can be NULL--it is always
  306. // the same as the number of channels reported by get_info). *output will
  307. // contain an array of float* buffers, one per channel. These outputs will
  308. // be overwritten on the next call to stb_vorbis_get_frame_*.
  309. //
  310. // You generally should not intermix calls to stb_vorbis_get_frame_*()
  311. // and stb_vorbis_get_samples_*(), since the latter calls the former.
  312.  
  313. #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
  314. extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts);
  315. extern int stb_vorbis_get_frame_short            (stb_vorbis *f, int num_c, short **buffer, int num_samples);
  316. #endif
  317. // decode the next frame and return the number of *samples* per channel.
  318. // Note that for interleaved data, you pass in the number of shorts (the
  319. // size of your array), but the return value is the number of samples per
  320. // channel, not the total number of samples.
  321. //
  322. // The data is coerced to the number of channels you request according to the
  323. // channel coercion rules (see below). You must pass in the size of your
  324. // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
  325. // The maximum buffer size needed can be gotten from get_info(); however,
  326. // the Vorbis I specification implies an absolute maximum of 4096 samples
  327. // per channel.
  328.  
  329. // Channel coercion rules:
  330. //    Let M be the number of channels requested, and N the number of channels present,
  331. //    and Cn be the nth channel; let stereo L be the sum of all L and center channels,
  332. //    and stereo R be the sum of all R and center channels (channel assignment from the
  333. //    vorbis spec).
  334. //        M    N       output
  335. //        1    k      sum(Ck) for all k
  336. //        2    *      stereo L, stereo R
  337. //        k    l      k > l, the first l channels, then 0s
  338. //        k    l      k <= l, the first k channels
  339. //    Note that this is not _good_ surround etc. mixing at all! It's just so
  340. //    you get something useful.
  341.  
  342. extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
  343. extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
  344. // gets num_samples samples, not necessarily on a frame boundary--this requires
  345. // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
  346. // Returns the number of samples stored per channel; it may be less than requested
  347. // at the end of the file. If there are no more samples in the file, returns 0.
  348.  
  349. #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
  350. extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts);
  351. extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples);
  352. #endif
  353. // gets num_samples samples, not necessarily on a frame boundary--this requires
  354. // buffering so you have to supply the buffers. Applies the coercion rules above
  355. // to produce 'channels' channels. Returns the number of samples stored per channel;
  356. // it may be less than requested at the end of the file. If there are no more
  357. // samples in the file, returns 0.
  358.  
  359. #endif
  360.  
  361. ////////   ERROR CODES
  362.  
  363. enum STBVorbisError
  364. {
  365.    VORBIS__no_error,
  366.  
  367.    VORBIS_need_more_data=1,             // not a real error
  368.  
  369.    VORBIS_invalid_api_mixing,           // can't mix API modes
  370.    VORBIS_outofmem,                     // not enough memory
  371.    VORBIS_feature_not_supported,        // uses floor 0
  372.    VORBIS_too_many_channels,            // STB_VORBIS_MAX_CHANNELS is too small
  373.    VORBIS_file_open_failure,            // fopen() failed
  374.    VORBIS_seek_without_length,          // can't seek in unknown-length file
  375.  
  376.    VORBIS_unexpected_eof=10,            // file is truncated?
  377.    VORBIS_seek_invalid,                 // seek past EOF
  378.  
  379.    // decoding errors (corrupt/invalid stream) -- you probably
  380.    // don't care about the exact details of these
  381.  
  382.    // vorbis errors:
  383.    VORBIS_invalid_setup=20,
  384.    VORBIS_invalid_stream,
  385.  
  386.    // ogg errors:
  387.    VORBIS_missing_capture_pattern=30,
  388.    VORBIS_invalid_stream_structure_version,
  389.    VORBIS_continued_packet_flag_invalid,
  390.    VORBIS_incorrect_stream_serial_number,
  391.    VORBIS_invalid_first_page,
  392.    VORBIS_bad_packet_type,
  393.    VORBIS_cant_find_last_page,
  394.    VORBIS_seek_failed,
  395.    VORBIS_ogg_skeleton_not_supported
  396. };
  397.  
  398.  
  399. #ifdef __cplusplus
  400. }
  401. #endif
  402.  
  403. #endif // STB_VORBIS_INCLUDE_STB_VORBIS_H
  404. //
  405. //  HEADER ENDS HERE
  406. //
  407. //////////////////////////////////////////////////////////////////////////////
  408.  
  409. #ifndef STB_VORBIS_HEADER_ONLY
  410.  
  411. // global configuration settings (e.g. set these in the project/makefile),
  412. // or just set them in this file at the top (although ideally the first few
  413. // should be visible when the header file is compiled too, although it's not
  414. // crucial)
  415.  
  416. // STB_VORBIS_NO_PUSHDATA_API
  417. //     does not compile the code for the various stb_vorbis_*_pushdata()
  418. //     functions
  419. // #define STB_VORBIS_NO_PUSHDATA_API
  420.  
  421. // STB_VORBIS_NO_PULLDATA_API
  422. //     does not compile the code for the non-pushdata APIs
  423. // #define STB_VORBIS_NO_PULLDATA_API
  424.  
  425. // STB_VORBIS_NO_STDIO
  426. //     does not compile the code for the APIs that use FILE *s internally
  427. //     or externally (implied by STB_VORBIS_NO_PULLDATA_API)
  428. // #define STB_VORBIS_NO_STDIO
  429.  
  430. // STB_VORBIS_NO_INTEGER_CONVERSION
  431. //     does not compile the code for converting audio sample data from
  432. //     float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
  433. // #define STB_VORBIS_NO_INTEGER_CONVERSION
  434.  
  435. // STB_VORBIS_NO_FAST_SCALED_FLOAT
  436. //      does not use a fast float-to-int trick to accelerate float-to-int on
  437. //      most platforms which requires endianness be defined correctly.
  438. //#define STB_VORBIS_NO_FAST_SCALED_FLOAT
  439.  
  440.  
  441. // STB_VORBIS_MAX_CHANNELS [number]
  442. //     globally define this to the maximum number of channels you need.
  443. //     The spec does not put a restriction on channels except that
  444. //     the count is stored in a byte, so 255 is the hard limit.
  445. //     Reducing this saves about 16 bytes per value, so using 16 saves
  446. //     (255-16)*16 or around 4KB. Plus anything other memory usage
  447. //     I forgot to account for. Can probably go as low as 8 (7.1 audio),
  448. //     6 (5.1 audio), or 2 (stereo only).
  449. #ifndef STB_VORBIS_MAX_CHANNELS
  450. #define STB_VORBIS_MAX_CHANNELS    16  // enough for anyone?
  451. #endif
  452.  
  453. // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
  454. //     after a flush_pushdata(), stb_vorbis begins scanning for the
  455. //     next valid page, without backtracking. when it finds something
  456. //     that looks like a page, it streams through it and verifies its
  457. //     CRC32. Should that validation fail, it keeps scanning. But it's
  458. //     possible that _while_ streaming through to check the CRC32 of
  459. //     one candidate page, it sees another candidate page. This #define
  460. //     determines how many "overlapping" candidate pages it can search
  461. //     at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
  462. //     garbage pages could be as big as 64KB, but probably average ~16KB.
  463. //     So don't hose ourselves by scanning an apparent 64KB page and
  464. //     missing a ton of real ones in the interim; so minimum of 2
  465. #ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
  466. #define STB_VORBIS_PUSHDATA_CRC_COUNT  4
  467. #endif
  468.  
  469. // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
  470. //     sets the log size of the huffman-acceleration table.  Maximum
  471. //     supported value is 24. with larger numbers, more decodings are O(1),
  472. //     but the table size is larger so worse cache missing, so you'll have
  473. //     to probe (and try multiple ogg vorbis files) to find the sweet spot.
  474. #ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
  475. #define STB_VORBIS_FAST_HUFFMAN_LENGTH   10
  476. #endif
  477.  
  478. // STB_VORBIS_FAST_BINARY_LENGTH [number]
  479. //     sets the log size of the binary-search acceleration table. this
  480. //     is used in similar fashion to the fast-huffman size to set initial
  481. //     parameters for the binary search
  482.  
  483. // STB_VORBIS_FAST_HUFFMAN_INT
  484. //     The fast huffman tables are much more efficient if they can be
  485. //     stored as 16-bit results instead of 32-bit results. This restricts
  486. //     the codebooks to having only 65535 possible outcomes, though.
  487. //     (At least, accelerated by the huffman table.)
  488. #ifndef STB_VORBIS_FAST_HUFFMAN_INT
  489. #define STB_VORBIS_FAST_HUFFMAN_SHORT
  490. #endif
  491.  
  492. // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
  493. //     If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
  494. //     back on binary searching for the correct one. This requires storing
  495. //     extra tables with the huffman codes in sorted order. Defining this
  496. //     symbol trades off space for speed by forcing a linear search in the
  497. //     non-fast case, except for "sparse" codebooks.
  498. // #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
  499.  
  500. // STB_VORBIS_DIVIDES_IN_RESIDUE
  501. //     stb_vorbis precomputes the result of the scalar residue decoding
  502. //     that would otherwise require a divide per chunk. you can trade off
  503. //     space for time by defining this symbol.
  504. // #define STB_VORBIS_DIVIDES_IN_RESIDUE
  505.  
  506. // STB_VORBIS_DIVIDES_IN_CODEBOOK
  507. //     vorbis VQ codebooks can be encoded two ways: with every case explicitly
  508. //     stored, or with all elements being chosen from a small range of values,
  509. //     and all values possible in all elements. By default, stb_vorbis expands
  510. //     this latter kind out to look like the former kind for ease of decoding,
  511. //     because otherwise an integer divide-per-vector-element is required to
  512. //     unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
  513. //     trade off storage for speed.
  514. //#define STB_VORBIS_DIVIDES_IN_CODEBOOK
  515.  
  516. #ifdef STB_VORBIS_CODEBOOK_SHORTS
  517. #error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats"
  518. #endif
  519.  
  520. // STB_VORBIS_DIVIDE_TABLE
  521. //     this replaces small integer divides in the floor decode loop with
  522. //     table lookups. made less than 1% difference, so disabled by default.
  523.  
  524. // STB_VORBIS_NO_INLINE_DECODE
  525. //     disables the inlining of the scalar codebook fast-huffman decode.
  526. //     might save a little codespace; useful for debugging
  527. // #define STB_VORBIS_NO_INLINE_DECODE
  528.  
  529. // STB_VORBIS_NO_DEFER_FLOOR
  530. //     Normally we only decode the floor without synthesizing the actual
  531. //     full curve. We can instead synthesize the curve immediately. This
  532. //     requires more memory and is very likely slower, so I don't think
  533. //     you'd ever want to do it except for debugging.
  534. // #define STB_VORBIS_NO_DEFER_FLOOR
  535.  
  536.  
  537.  
  538.  
  539. //////////////////////////////////////////////////////////////////////////////
  540.  
  541. #ifdef STB_VORBIS_NO_PULLDATA_API
  542.    #define STB_VORBIS_NO_INTEGER_CONVERSION
  543.    #define STB_VORBIS_NO_STDIO
  544. #endif
  545.  
  546. #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
  547.    #define STB_VORBIS_NO_STDIO 1
  548. #endif
  549.  
  550. #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
  551. #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
  552.  
  553.    // only need endianness for fast-float-to-int, which we don't
  554.    // use for pushdata
  555.  
  556.    #ifndef STB_VORBIS_BIG_ENDIAN
  557.      #define STB_VORBIS_ENDIAN  0
  558.    #else
  559.      #define STB_VORBIS_ENDIAN  1
  560.    #endif
  561.  
  562. #endif
  563. #endif
  564.  
  565.  
  566. #ifndef STB_VORBIS_NO_STDIO
  567. #include <stdio.h>
  568. #endif
  569.  
  570. #ifndef STB_VORBIS_NO_CRT
  571.    #include <stdlib.h>
  572.    #include <string.h>
  573.    #include <assert.h>
  574.    #include <math.h>
  575.  
  576.    // find definition of alloca if it's not in stdlib.h:
  577.    #if defined(_MSC_VER) || defined(__MINGW32__)
  578.       #include <malloc.h>
  579.    #endif
  580.    #if defined(__linux__) || defined(__linux) || defined(__EMSCRIPTEN__)
  581.       #include <alloca.h>
  582.    #endif
  583. #else // STB_VORBIS_NO_CRT
  584.    #define NULL 0
  585.    #define malloc(s)   0
  586.    #define free(s)     ((void) 0)
  587.    #define realloc(s,d)  0
  588. #endif // STB_VORBIS_NO_CRT
  589.  
  590. #include <limits.h>
  591.  
  592. #ifdef __MINGW32__
  593.    // eff you mingw:
  594.    //     "fixed":
  595.    //         http://sourceforge.net/p/mingw-w64/mailman/message/32882927/
  596.    //     "no that broke the build, reverted, who cares about C":
  597.    //         http://sourceforge.net/p/mingw-w64/mailman/message/32890381/
  598.    #ifdef __forceinline
  599.    #undef __forceinline
  600.    #endif
  601.    #define __forceinline
  602.    #define alloca __builtin_alloca
  603. #elif !defined(_MSC_VER)
  604.    #if __GNUC__
  605.       #define __forceinline inline
  606.    #else
  607.       #define __forceinline
  608.    #endif
  609. #endif
  610.  
  611. #if STB_VORBIS_MAX_CHANNELS > 256
  612. #error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
  613. #endif
  614.  
  615. #if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
  616. #error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
  617. #endif
  618.  
  619.  
  620. #if 0
  621. #include <crtdbg.h>
  622. #define CHECK(f)   _CrtIsValidHeapPointer(f->channel_buffers[1])
  623. #else
  624. #define CHECK(f)   ((void) 0)
  625. #endif
  626.  
  627. #define MAX_BLOCKSIZE_LOG  13   // from specification
  628. #define MAX_BLOCKSIZE      (1 << MAX_BLOCKSIZE_LOG)
  629.  
  630.  
  631. typedef unsigned char  uint8;
  632. typedef   signed char   int8;
  633. typedef unsigned short uint16;
  634. typedef   signed short  int16;
  635. typedef unsigned int   uint32;
  636. typedef   signed int    int32;
  637.  
  638. #ifndef TRUE
  639. #define TRUE 1
  640. #define FALSE 0
  641. #endif
  642.  
  643. typedef float codetype;
  644.  
  645. // @NOTE
  646. //
  647. // Some arrays below are tagged "//varies", which means it's actually
  648. // a variable-sized piece of data, but rather than malloc I assume it's
  649. // small enough it's better to just allocate it all together with the
  650. // main thing
  651. //
  652. // Most of the variables are specified with the smallest size I could pack
  653. // them into. It might give better performance to make them all full-sized
  654. // integers. It should be safe to freely rearrange the structures or change
  655. // the sizes larger--nothing relies on silently truncating etc., nor the
  656. // order of variables.
  657.  
  658. #define FAST_HUFFMAN_TABLE_SIZE   (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
  659. #define FAST_HUFFMAN_TABLE_MASK   (FAST_HUFFMAN_TABLE_SIZE - 1)
  660.  
  661. typedef struct
  662. {
  663.    int dimensions, entries;
  664.    uint8 *codeword_lengths;
  665.    float  minimum_value;
  666.    float  delta_value;
  667.    uint8  value_bits;
  668.    uint8  lookup_type;
  669.    uint8  sequence_p;
  670.    uint8  sparse;
  671.    uint32 lookup_values;
  672.    codetype *multiplicands;
  673.    uint32 *codewords;
  674.    #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
  675.     int16  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
  676.    #else
  677.     int32  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
  678.    #endif
  679.    uint32 *sorted_codewords;
  680.    int    *sorted_values;
  681.    int     sorted_entries;
  682. } Codebook;
  683.  
  684. typedef struct
  685. {
  686.    uint8 order;
  687.    uint16 rate;
  688.    uint16 bark_map_size;
  689.    uint8 amplitude_bits;
  690.    uint8 amplitude_offset;
  691.    uint8 number_of_books;
  692.    uint8 book_list[16]; // varies
  693. } Floor0;
  694.  
  695. typedef struct
  696. {
  697.    uint8 partitions;
  698.    uint8 partition_class_list[32]; // varies
  699.    uint8 class_dimensions[16]; // varies
  700.    uint8 class_subclasses[16]; // varies
  701.    uint8 class_masterbooks[16]; // varies
  702.    int16 subclass_books[16][8]; // varies
  703.    uint16 Xlist[31*8+2]; // varies
  704.    uint8 sorted_order[31*8+2];
  705.    uint8 neighbors[31*8+2][2];
  706.    uint8 floor1_multiplier;
  707.    uint8 rangebits;
  708.    int values;
  709. } Floor1;
  710.  
  711. typedef union
  712. {
  713.    Floor0 floor0;
  714.    Floor1 floor1;
  715. } Floor;
  716.  
  717. typedef struct
  718. {
  719.    uint32 begin, end;
  720.    uint32 part_size;
  721.    uint8 classifications;
  722.    uint8 classbook;
  723.    uint8 **classdata;
  724.    int16 (*residue_books)[8];
  725. } Residue;
  726.  
  727. typedef struct
  728. {
  729.    uint8 magnitude;
  730.    uint8 angle;
  731.    uint8 mux;
  732. } MappingChannel;
  733.  
  734. typedef struct
  735. {
  736.    uint16 coupling_steps;
  737.    MappingChannel *chan;
  738.    uint8  submaps;
  739.    uint8  submap_floor[15]; // varies
  740.    uint8  submap_residue[15]; // varies
  741. } Mapping;
  742.  
  743. typedef struct
  744. {
  745.    uint8 blockflag;
  746.    uint8 mapping;
  747.    uint16 windowtype;
  748.    uint16 transformtype;
  749. } Mode;
  750.  
  751. typedef struct
  752. {
  753.    uint32  goal_crc;    // expected crc if match
  754.    int     bytes_left;  // bytes left in packet
  755.    uint32  crc_so_far;  // running crc
  756.    int     bytes_done;  // bytes processed in _current_ chunk
  757.    uint32  sample_loc;  // granule pos encoded in page
  758. } CRCscan;
  759.  
  760. typedef struct
  761. {
  762.    uint32 page_start, page_end;
  763.    uint32 last_decoded_sample;
  764. } ProbedPage;
  765.  
  766. struct stb_vorbis
  767. {
  768.   // user-accessible info
  769.    unsigned int sample_rate;
  770.    int channels;
  771.  
  772.    unsigned int setup_memory_required;
  773.    unsigned int temp_memory_required;
  774.    unsigned int setup_temp_memory_required;
  775.  
  776.    char *vendor;
  777.    int comment_list_length;
  778.    char **comment_list;
  779.  
  780.   // input config
  781. #ifndef STB_VORBIS_NO_STDIO
  782.    FILE *f;
  783.    uint32 f_start;
  784.    int close_on_free;
  785. #endif
  786.  
  787.    uint8 *stream;
  788.    uint8 *stream_start;
  789.    uint8 *stream_end;
  790.  
  791.    uint32 stream_len;
  792.  
  793.    uint8  push_mode;
  794.  
  795.    // the page to seek to when seeking to start, may be zero
  796.    uint32 first_audio_page_offset;
  797.  
  798.    // p_first is the page on which the first audio packet ends
  799.    // (but not necessarily the page on which it starts)
  800.    ProbedPage p_first, p_last;
  801.  
  802.   // memory management
  803.    stb_vorbis_alloc alloc;
  804.    int setup_offset;
  805.    int temp_offset;
  806.  
  807.   // run-time results
  808.    int eof;
  809.    enum STBVorbisError error;
  810.  
  811.   // user-useful data
  812.  
  813.   // header info
  814.    int blocksize[2];
  815.    int blocksize_0, blocksize_1;
  816.    int codebook_count;
  817.    Codebook *codebooks;
  818.    int floor_count;
  819.    uint16 floor_types[64]; // varies
  820.    Floor *floor_config;
  821.    int residue_count;
  822.    uint16 residue_types[64]; // varies
  823.    Residue *residue_config;
  824.    int mapping_count;
  825.    Mapping *mapping;
  826.    int mode_count;
  827.    Mode mode_config[64];  // varies
  828.  
  829.    uint32 total_samples;
  830.  
  831.   // decode buffer
  832.    float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
  833.    float *outputs        [STB_VORBIS_MAX_CHANNELS];
  834.  
  835.    float *previous_window[STB_VORBIS_MAX_CHANNELS];
  836.    int previous_length;
  837.  
  838.    #ifndef STB_VORBIS_NO_DEFER_FLOOR
  839.    int16 *finalY[STB_VORBIS_MAX_CHANNELS];
  840.    #else
  841.    float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
  842.    #endif
  843.  
  844.    uint32 current_loc; // sample location of next frame to decode
  845.    int    current_loc_valid;
  846.  
  847.   // per-blocksize precomputed data
  848.  
  849.    // twiddle factors
  850.    float *A[2],*B[2],*C[2];
  851.    float *window[2];
  852.    uint16 *bit_reverse[2];
  853.  
  854.   // current page/packet/segment streaming info
  855.    uint32 serial; // stream serial number for verification
  856.    int last_page;
  857.    int segment_count;
  858.    uint8 segments[255];
  859.    uint8 page_flag;
  860.    uint8 bytes_in_seg;
  861.    uint8 first_decode;
  862.    int next_seg;
  863.    int last_seg;  // flag that we're on the last segment
  864.    int last_seg_which; // what was the segment number of the last seg?
  865.    uint32 acc;
  866.    int valid_bits;
  867.    int packet_bytes;
  868.    int end_seg_with_known_loc;
  869.    uint32 known_loc_for_packet;
  870.    int discard_samples_deferred;
  871.    uint32 samples_output;
  872.  
  873.   // push mode scanning
  874.    int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
  875. #ifndef STB_VORBIS_NO_PUSHDATA_API
  876.    CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT];
  877. #endif
  878.  
  879.   // sample-access
  880.    int channel_buffer_start;
  881.    int channel_buffer_end;
  882. };
  883.  
  884. #if defined(STB_VORBIS_NO_PUSHDATA_API)
  885.    #define IS_PUSH_MODE(f)   FALSE
  886. #elif defined(STB_VORBIS_NO_PULLDATA_API)
  887.    #define IS_PUSH_MODE(f)   TRUE
  888. #else
  889.    #define IS_PUSH_MODE(f)   ((f)->push_mode)
  890. #endif
  891.  
  892. typedef struct stb_vorbis vorb;
  893.  
  894. static int error(vorb *f, enum STBVorbisError e)
  895. {
  896.    f->error = e;
  897.    if (!f->eof && e != VORBIS_need_more_data) {
  898.       f->error=e; // breakpoint for debugging
  899.    }
  900.    return 0;
  901. }
  902.  
  903.  
  904. // these functions are used for allocating temporary memory
  905. // while decoding. if you can afford the stack space, use
  906. // alloca(); otherwise, provide a temp buffer and it will
  907. // allocate out of those.
  908.  
  909. #define array_size_required(count,size)  (count*(sizeof(void *)+(size)))
  910.  
  911. #define temp_alloc(f,size)              (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
  912. #define temp_free(f,p)                  (void)0
  913. #define temp_alloc_save(f)              ((f)->temp_offset)
  914. #define temp_alloc_restore(f,p)         ((f)->temp_offset = (p))
  915.  
  916. #define temp_block_array(f,count,size)  make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
  917.  
  918. // given a sufficiently large block of memory, make an array of pointers to subblocks of it
  919. static void *make_block_array(void *mem, int count, int size)
  920. {
  921.    int i;
  922.    void ** p = (void **) mem;
  923.    char *q = (char *) (p + count);
  924.    for (i=0; i < count; ++i) {
  925.       p[i] = q;
  926.       q += size;
  927.    }
  928.    return p;
  929. }
  930.  
  931. static void *setup_malloc(vorb *f, int sz)
  932. {
  933.    sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
  934.    f->setup_memory_required += sz;
  935.    if (f->alloc.alloc_buffer) {
  936.       void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
  937.       if (f->setup_offset + sz > f->temp_offset) return NULL;
  938.       f->setup_offset += sz;
  939.       return p;
  940.    }
  941.    return sz ? malloc(sz) : NULL;
  942. }
  943.  
  944. static void setup_free(vorb *f, void *p)
  945. {
  946.    if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack
  947.    free(p);
  948. }
  949.  
  950. static void *setup_temp_malloc(vorb *f, int sz)
  951. {
  952.    sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
  953.    if (f->alloc.alloc_buffer) {
  954.       if (f->temp_offset - sz < f->setup_offset) return NULL;
  955.       f->temp_offset -= sz;
  956.       return (char *) f->alloc.alloc_buffer + f->temp_offset;
  957.    }
  958.    return malloc(sz);
  959. }
  960.  
  961. static void setup_temp_free(vorb *f, void *p, int sz)
  962. {
  963.    if (f->alloc.alloc_buffer) {
  964.       f->temp_offset += (sz+3)&~3;
  965.       return;
  966.    }
  967.    free(p);
  968. }
  969.  
  970. #define CRC32_POLY    0x04c11db7   // from spec
  971.  
  972. static uint32 crc_table[256];
  973. static void crc32_init(void)
  974. {
  975.    int i,j;
  976.    uint32 s;
  977.    for(i=0; i < 256; i++) {
  978.       for (s=(uint32) i << 24, j=0; j < 8; ++j)
  979.          s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
  980.       crc_table[i] = s;
  981.    }
  982. }
  983.  
  984. static __forceinline uint32 crc32_update(uint32 crc, uint8 byte)
  985. {
  986.    return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
  987. }
  988.  
  989.  
  990. // used in setup, and for huffman that doesn't go fast path
  991. static unsigned int bit_reverse(unsigned int n)
  992. {
  993.   n = ((n & 0xAAAAAAAA) >>  1) | ((n & 0x55555555) << 1);
  994.   n = ((n & 0xCCCCCCCC) >>  2) | ((n & 0x33333333) << 2);
  995.   n = ((n & 0xF0F0F0F0) >>  4) | ((n & 0x0F0F0F0F) << 4);
  996.   n = ((n & 0xFF00FF00) >>  8) | ((n & 0x00FF00FF) << 8);
  997.   return (n >> 16) | (n << 16);
  998. }
  999.  
  1000. static float square(float x)
  1001. {
  1002.    return x*x;
  1003. }
  1004.  
  1005. // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
  1006. // as required by the specification. fast(?) implementation from stb.h
  1007. // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
  1008. static int ilog(int32 n)
  1009. {
  1010.    static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
  1011.  
  1012.    if (n < 0) return 0; // signed n returns 0
  1013.  
  1014.    // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
  1015.    if (n < (1 << 14))
  1016.         if (n < (1 <<  4))            return  0 + log2_4[n      ];
  1017.         else if (n < (1 <<  9))       return  5 + log2_4[n >>  5];
  1018.              else                     return 10 + log2_4[n >> 10];
  1019.    else if (n < (1 << 24))
  1020.              if (n < (1 << 19))       return 15 + log2_4[n >> 15];
  1021.              else                     return 20 + log2_4[n >> 20];
  1022.         else if (n < (1 << 29))       return 25 + log2_4[n >> 25];
  1023.              else                     return 30 + log2_4[n >> 30];
  1024. }
  1025.  
  1026. #ifndef M_PI
  1027.   #define M_PI  3.14159265358979323846264f  // from CRC
  1028. #endif
  1029.  
  1030. // code length assigned to a value with no huffman encoding
  1031. #define NO_CODE   255
  1032.  
  1033. /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
  1034. //
  1035. // these functions are only called at setup, and only a few times
  1036. // per file
  1037.  
  1038. static float float32_unpack(uint32 x)
  1039. {
  1040.    // from the specification
  1041.    uint32 mantissa = x & 0x1fffff;
  1042.    uint32 sign = x & 0x80000000;
  1043.    uint32 exp = (x & 0x7fe00000) >> 21;
  1044.    double res = sign ? -(double)mantissa : (double)mantissa;
  1045.    return (float) ldexp((float)res, exp-788);
  1046. }
  1047.  
  1048.  
  1049. // zlib & jpeg huffman tables assume that the output symbols
  1050. // can either be arbitrarily arranged, or have monotonically
  1051. // increasing frequencies--they rely on the lengths being sorted;
  1052. // this makes for a very simple generation algorithm.
  1053. // vorbis allows a huffman table with non-sorted lengths. This
  1054. // requires a more sophisticated construction, since symbols in
  1055. // order do not map to huffman codes "in order".
  1056. static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
  1057. {
  1058.    if (!c->sparse) {
  1059.       c->codewords      [symbol] = huff_code;
  1060.    } else {
  1061.       c->codewords       [count] = huff_code;
  1062.       c->codeword_lengths[count] = len;
  1063.       values             [count] = symbol;
  1064.    }
  1065. }
  1066.  
  1067. static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
  1068. {
  1069.    int i,k,m=0;
  1070.    uint32 available[32];
  1071.  
  1072.    memset(available, 0, sizeof(available));
  1073.    // find the first entry
  1074.    for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
  1075.    if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
  1076.    // add to the list
  1077.    add_entry(c, 0, k, m++, len[k], values);
  1078.    // add all available leaves
  1079.    for (i=1; i <= len[k]; ++i)
  1080.       available[i] = 1U << (32-i);
  1081.    // note that the above code treats the first case specially,
  1082.    // but it's really the same as the following code, so they
  1083.    // could probably be combined (except the initial code is 0,
  1084.    // and I use 0 in available[] to mean 'empty')
  1085.    for (i=k+1; i < n; ++i) {
  1086.       uint32 res;
  1087.       int z = len[i], y;
  1088.       if (z == NO_CODE) continue;
  1089.       // find lowest available leaf (should always be earliest,
  1090.       // which is what the specification calls for)
  1091.       // note that this property, and the fact we can never have
  1092.       // more than one free leaf at a given level, isn't totally
  1093.       // trivial to prove, but it seems true and the assert never
  1094.       // fires, so!
  1095.       while (z > 0 && !available[z]) --z;
  1096.       if (z == 0) { return FALSE; }
  1097.       res = available[z];
  1098.       assert(z >= 0 && z < 32);
  1099.       available[z] = 0;
  1100.       add_entry(c, bit_reverse(res), i, m++, len[i], values);
  1101.       // propagate availability up the tree
  1102.       if (z != len[i]) {
  1103.          assert(len[i] >= 0 && len[i] < 32);
  1104.          for (y=len[i]; y > z; --y) {
  1105.             assert(available[y] == 0);
  1106.             available[y] = res + (1 << (32-y));
  1107.          }
  1108.       }
  1109.    }
  1110.    return TRUE;
  1111. }
  1112.  
  1113. // accelerated huffman table allows fast O(1) match of all symbols
  1114. // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
  1115. static void compute_accelerated_huffman(Codebook *c)
  1116. {
  1117.    int i, len;
  1118.    for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
  1119.       c->fast_huffman[i] = -1;
  1120.  
  1121.    len = c->sparse ? c->sorted_entries : c->entries;
  1122.    #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
  1123.    if (len > 32767) len = 32767; // largest possible value we can encode!
  1124.    #endif
  1125.    for (i=0; i < len; ++i) {
  1126.       if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
  1127.          uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
  1128.          // set table entries for all bit combinations in the higher bits
  1129.          while (z < FAST_HUFFMAN_TABLE_SIZE) {
  1130.              c->fast_huffman[z] = i;
  1131.              z += 1 << c->codeword_lengths[i];
  1132.          }
  1133.       }
  1134.    }
  1135. }
  1136.  
  1137. #ifdef _MSC_VER
  1138. #define STBV_CDECL __cdecl
  1139. #else
  1140. #define STBV_CDECL
  1141. #endif
  1142.  
  1143. static int STBV_CDECL uint32_compare(const void *p, const void *q)
  1144. {
  1145.    uint32 x = * (uint32 *) p;
  1146.    uint32 y = * (uint32 *) q;
  1147.    return x < y ? -1 : x > y;
  1148. }
  1149.  
  1150. static int include_in_sort(Codebook *c, uint8 len)
  1151. {
  1152.    if (c->sparse) { assert(len != NO_CODE); return TRUE; }
  1153.    if (len == NO_CODE) return FALSE;
  1154.    if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
  1155.    return FALSE;
  1156. }
  1157.  
  1158. // if the fast table above doesn't work, we want to binary
  1159. // search them... need to reverse the bits
  1160. static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
  1161. {
  1162.    int i, len;
  1163.    // build a list of all the entries
  1164.    // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
  1165.    // this is kind of a frivolous optimization--I don't see any performance improvement,
  1166.    // but it's like 4 extra lines of code, so.
  1167.    if (!c->sparse) {
  1168.       int k = 0;
  1169.       for (i=0; i < c->entries; ++i)
  1170.          if (include_in_sort(c, lengths[i]))
  1171.             c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
  1172.       assert(k == c->sorted_entries);
  1173.    } else {
  1174.       for (i=0; i < c->sorted_entries; ++i)
  1175.          c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
  1176.    }
  1177.  
  1178.    qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
  1179.    c->sorted_codewords[c->sorted_entries] = 0xffffffff;
  1180.  
  1181.    len = c->sparse ? c->sorted_entries : c->entries;
  1182.    // now we need to indicate how they correspond; we could either
  1183.    //   #1: sort a different data structure that says who they correspond to
  1184.    //   #2: for each sorted entry, search the original list to find who corresponds
  1185.    //   #3: for each original entry, find the sorted entry
  1186.    // #1 requires extra storage, #2 is slow, #3 can use binary search!
  1187.    for (i=0; i < len; ++i) {
  1188.       int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
  1189.       if (include_in_sort(c,huff_len)) {
  1190.          uint32 code = bit_reverse(c->codewords[i]);
  1191.          int x=0, n=c->sorted_entries;
  1192.          while (n > 1) {
  1193.             // invariant: sc[x] <= code < sc[x+n]
  1194.             int m = x + (n >> 1);
  1195.             if (c->sorted_codewords[m] <= code) {
  1196.                x = m;
  1197.                n -= (n>>1);
  1198.             } else {
  1199.                n >>= 1;
  1200.             }
  1201.          }
  1202.          assert(c->sorted_codewords[x] == code);
  1203.          if (c->sparse) {
  1204.             c->sorted_values[x] = values[i];
  1205.             c->codeword_lengths[x] = huff_len;
  1206.          } else {
  1207.             c->sorted_values[x] = i;
  1208.          }
  1209.       }
  1210.    }
  1211. }
  1212.  
  1213. // only run while parsing the header (3 times)
  1214. static int vorbis_validate(uint8 *data)
  1215. {
  1216.    static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
  1217.    return memcmp(data, vorbis, 6) == 0;
  1218. }
  1219.  
  1220. // called from setup only, once per code book
  1221. // (formula implied by specification)
  1222. static int lookup1_values(int entries, int dim)
  1223. {
  1224.    int r = (int) floor(exp((float) log((float) entries) / dim));
  1225.    if ((int) floor(powerd((float) r+1, dim)) <= entries)   // (int) cast for MinGW warning;
  1226.       ++r;                                              // floor() to avoid _ftol() when non-CRT
  1227.    if (powerd((float) r+1, dim) <= entries)
  1228.       return -1;
  1229.    if ((int) floor(powerd((float) r, dim)) > entries)
  1230.       return -1;
  1231.    return r;
  1232. }
  1233.  
  1234. // called twice per file
  1235. static void compute_twiddle_factors(int n, float *A, float *B, float *C)
  1236. {
  1237.    int n4 = n >> 2, n8 = n >> 3;
  1238.    int k,k2;
  1239.  
  1240.    for (k=k2=0; k < n4; ++k,k2+=2) {
  1241.       A[k2  ] = (float)  cos(4*k*M_PI/n);
  1242.       A[k2+1] = (float) -sin(4*k*M_PI/n);
  1243.       B[k2  ] = (float)  cos((k2+1)*M_PI/n/2) * 0.5f;
  1244.       B[k2+1] = (float)  sin((k2+1)*M_PI/n/2) * 0.5f;
  1245.    }
  1246.    for (k=k2=0; k < n8; ++k,k2+=2) {
  1247.       C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
  1248.       C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
  1249.    }
  1250. }
  1251.  
  1252. static void compute_window(int n, float *window)
  1253. {
  1254.    int n2 = n >> 1, i;
  1255.    for (i=0; i < n2; ++i)
  1256.       window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
  1257. }
  1258.  
  1259. static void compute_bitreverse(int n, uint16 *rev)
  1260. {
  1261.    int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
  1262.    int i, n8 = n >> 3;
  1263.    for (i=0; i < n8; ++i)
  1264.       rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
  1265. }
  1266.  
  1267. static int init_blocksize(vorb *f, int b, int n)
  1268. {
  1269.    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
  1270.    f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
  1271.    f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
  1272.    f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
  1273.    if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
  1274.    compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
  1275.    f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
  1276.    if (!f->window[b]) return error(f, VORBIS_outofmem);
  1277.    compute_window(n, f->window[b]);
  1278.    f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
  1279.    if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
  1280.    compute_bitreverse(n, f->bit_reverse[b]);
  1281.    return TRUE;
  1282. }
  1283.  
  1284. static void neighbors(uint16 *x, int n, int *plow, int *phigh)
  1285. {
  1286.    int low = -1;
  1287.    int high = 65536;
  1288.    int i;
  1289.    for (i=0; i < n; ++i) {
  1290.       if (x[i] > low  && x[i] < x[n]) { *plow  = i; low = x[i]; }
  1291.       if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
  1292.    }
  1293. }
  1294.  
  1295. // this has been repurposed so y is now the original index instead of y
  1296. typedef struct
  1297. {
  1298.    uint16 x,id;
  1299. } stbv__floor_ordering;
  1300.  
  1301. static int STBV_CDECL point_compare(const void *p, const void *q)
  1302. {
  1303.    stbv__floor_ordering *a = (stbv__floor_ordering *) p;
  1304.    stbv__floor_ordering *b = (stbv__floor_ordering *) q;
  1305.    return a->x < b->x ? -1 : a->x > b->x;
  1306. }
  1307.  
  1308. //
  1309. /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
  1310.  
  1311.  
  1312. #if defined(STB_VORBIS_NO_STDIO)
  1313.    #define USE_MEMORY(z)    TRUE
  1314. #else
  1315.    #define USE_MEMORY(z)    ((z)->stream)
  1316. #endif
  1317.  
  1318. static uint8 get8(vorb *z)
  1319. {
  1320.    if (USE_MEMORY(z)) {
  1321.       if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
  1322.       return *z->stream++;
  1323.    }
  1324.  
  1325.    #ifndef STB_VORBIS_NO_STDIO
  1326.    {
  1327.    int c = fgetc(z->f);
  1328.    if (c == EOF) { z->eof = TRUE; return 0; }
  1329.    return c;
  1330.    }
  1331.    #endif
  1332. }
  1333.  
  1334. static uint32 get32(vorb *f)
  1335. {
  1336.    uint32 x;
  1337.    x = get8(f);
  1338.    x += get8(f) << 8;
  1339.    x += get8(f) << 16;
  1340.    x += (uint32) get8(f) << 24;
  1341.    return x;
  1342. }
  1343.  
  1344. static int getn(vorb *z, uint8 *data, int n)
  1345. {
  1346.    if (USE_MEMORY(z)) {
  1347.       if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
  1348.       memcpy(data, z->stream, n);
  1349.       z->stream += n;
  1350.       return 1;
  1351.    }
  1352.  
  1353.    #ifndef STB_VORBIS_NO_STDIO
  1354.    if (fread(data, n, 1, z->f) == 1)
  1355.       return 1;
  1356.    else {
  1357.       z->eof = 1;
  1358.       return 0;
  1359.    }
  1360.    #endif
  1361. }
  1362.  
  1363. static void skip(vorb *z, int n)
  1364. {
  1365.    if (USE_MEMORY(z)) {
  1366.       z->stream += n;
  1367.       if (z->stream >= z->stream_end) z->eof = 1;
  1368.       return;
  1369.    }
  1370.    #ifndef STB_VORBIS_NO_STDIO
  1371.    {
  1372.       long x = ftell(z->f);
  1373.       fseek(z->f, x+n, SEEK_SET);
  1374.    }
  1375.    #endif
  1376. }
  1377.  
  1378. static int set_file_offset(stb_vorbis *f, unsigned int loc)
  1379. {
  1380.    #ifndef STB_VORBIS_NO_PUSHDATA_API
  1381.    if (f->push_mode) return 0;
  1382.    #endif
  1383.    f->eof = 0;
  1384.    if (USE_MEMORY(f)) {
  1385.       if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
  1386.          f->stream = f->stream_end;
  1387.          f->eof = 1;
  1388.          return 0;
  1389.       } else {
  1390.          f->stream = f->stream_start + loc;
  1391.          return 1;
  1392.       }
  1393.    }
  1394.    #ifndef STB_VORBIS_NO_STDIO
  1395.    if (loc + f->f_start < loc || loc >= 0x80000000) {
  1396.       loc = 0x7fffffff;
  1397.       f->eof = 1;
  1398.    } else {
  1399.       loc += f->f_start;
  1400.    }
  1401.    if (!fseek(f->f, loc, SEEK_SET))
  1402.       return 1;
  1403.    f->eof = 1;
  1404.    fseek(f->f, f->f_start, SEEK_END);
  1405.    return 0;
  1406.    #endif
  1407. }
  1408.  
  1409.  
  1410. static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
  1411.  
  1412. static int capture_pattern(vorb *f)
  1413. {
  1414.    if (0x4f != get8(f)) return FALSE;
  1415.    if (0x67 != get8(f)) return FALSE;
  1416.    if (0x67 != get8(f)) return FALSE;
  1417.    if (0x53 != get8(f)) return FALSE;
  1418.    return TRUE;
  1419. }
  1420.  
  1421. #define PAGEFLAG_continued_packet   1
  1422. #define PAGEFLAG_first_page         2
  1423. #define PAGEFLAG_last_page          4
  1424.  
  1425. static int start_page_no_capturepattern(vorb *f)
  1426. {
  1427.    uint32 loc0,loc1,n;
  1428.    if (f->first_decode && !IS_PUSH_MODE(f)) {
  1429.       f->p_first.page_start = stb_vorbis_get_file_offset(f) - 4;
  1430.    }
  1431.    // stream structure version
  1432.    if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
  1433.    // header flag
  1434.    f->page_flag = get8(f);
  1435.    // absolute granule position
  1436.    loc0 = get32(f);
  1437.    loc1 = get32(f);
  1438.    // @TODO: validate loc0,loc1 as valid positions?
  1439.    // stream serial number -- vorbis doesn't interleave, so discard
  1440.    get32(f);
  1441.    //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
  1442.    // page sequence number
  1443.    n = get32(f);
  1444.    f->last_page = n;
  1445.    // CRC32
  1446.    get32(f);
  1447.    // page_segments
  1448.    f->segment_count = get8(f);
  1449.    if (!getn(f, f->segments, f->segment_count))
  1450.       return error(f, VORBIS_unexpected_eof);
  1451.    // assume we _don't_ know any the sample position of any segments
  1452.    f->end_seg_with_known_loc = -2;
  1453.    if (loc0 != ~0U || loc1 != ~0U) {
  1454.       int i;
  1455.       // determine which packet is the last one that will complete
  1456.       for (i=f->segment_count-1; i >= 0; --i)
  1457.          if (f->segments[i] < 255)
  1458.             break;
  1459.       // 'i' is now the index of the _last_ segment of a packet that ends
  1460.       if (i >= 0) {
  1461.          f->end_seg_with_known_loc = i;
  1462.          f->known_loc_for_packet   = loc0;
  1463.       }
  1464.    }
  1465.    if (f->first_decode) {
  1466.       int i,len;
  1467.       len = 0;
  1468.       for (i=0; i < f->segment_count; ++i)
  1469.          len += f->segments[i];
  1470.       len += 27 + f->segment_count;
  1471.       f->p_first.page_end = f->p_first.page_start + len;
  1472.       f->p_first.last_decoded_sample = loc0;
  1473.    }
  1474.    f->next_seg = 0;
  1475.    return TRUE;
  1476. }
  1477.  
  1478. static int start_page(vorb *f)
  1479. {
  1480.    if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
  1481.    return start_page_no_capturepattern(f);
  1482. }
  1483.  
  1484. static int start_packet(vorb *f)
  1485. {
  1486.    while (f->next_seg == -1) {
  1487.       if (!start_page(f)) return FALSE;
  1488.       if (f->page_flag & PAGEFLAG_continued_packet)
  1489.          return error(f, VORBIS_continued_packet_flag_invalid);
  1490.    }
  1491.    f->last_seg = FALSE;
  1492.    f->valid_bits = 0;
  1493.    f->packet_bytes = 0;
  1494.    f->bytes_in_seg = 0;
  1495.    // f->next_seg is now valid
  1496.    return TRUE;
  1497. }
  1498.  
  1499. static int maybe_start_packet(vorb *f)
  1500. {
  1501.    if (f->next_seg == -1) {
  1502.       int x = get8(f);
  1503.       if (f->eof) return FALSE; // EOF at page boundary is not an error!
  1504.       if (0x4f != x      ) return error(f, VORBIS_missing_capture_pattern);
  1505.       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
  1506.       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
  1507.       if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
  1508.       if (!start_page_no_capturepattern(f)) return FALSE;
  1509.       if (f->page_flag & PAGEFLAG_continued_packet) {
  1510.          // set up enough state that we can read this packet if we want,
  1511.          // e.g. during recovery
  1512.          f->last_seg = FALSE;
  1513.          f->bytes_in_seg = 0;
  1514.          return error(f, VORBIS_continued_packet_flag_invalid);
  1515.       }
  1516.    }
  1517.    return start_packet(f);
  1518. }
  1519.  
  1520. static int next_segment(vorb *f)
  1521. {
  1522.    int len;
  1523.    if (f->last_seg) return 0;
  1524.    if (f->next_seg == -1) {
  1525.       f->last_seg_which = f->segment_count-1; // in case start_page fails
  1526.       if (!start_page(f)) { f->last_seg = 1; return 0; }
  1527.       if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
  1528.    }
  1529.    len = f->segments[f->next_seg++];
  1530.    if (len < 255) {
  1531.       f->last_seg = TRUE;
  1532.       f->last_seg_which = f->next_seg-1;
  1533.    }
  1534.    if (f->next_seg >= f->segment_count)
  1535.       f->next_seg = -1;
  1536.    assert(f->bytes_in_seg == 0);
  1537.    f->bytes_in_seg = len;
  1538.    return len;
  1539. }
  1540.  
  1541. #define EOP    (-1)
  1542. #define INVALID_BITS  (-1)
  1543.  
  1544. static int get8_packet_raw(vorb *f)
  1545. {
  1546.    if (!f->bytes_in_seg) {  // CLANG!
  1547.       if (f->last_seg) return EOP;
  1548.       else if (!next_segment(f)) return EOP;
  1549.    }
  1550.    assert(f->bytes_in_seg > 0);
  1551.    --f->bytes_in_seg;
  1552.    ++f->packet_bytes;
  1553.    return get8(f);
  1554. }
  1555.  
  1556. static int get8_packet(vorb *f)
  1557. {
  1558.    int x = get8_packet_raw(f);
  1559.    f->valid_bits = 0;
  1560.    return x;
  1561. }
  1562.  
  1563. static int get32_packet(vorb *f)
  1564. {
  1565.    uint32 x;
  1566.    x = get8_packet(f);
  1567.    x += get8_packet(f) << 8;
  1568.    x += get8_packet(f) << 16;
  1569.    x += (uint32) get8_packet(f) << 24;
  1570.    return x;
  1571. }
  1572.  
  1573. static void flush_packet(vorb *f)
  1574. {
  1575.    while (get8_packet_raw(f) != EOP);
  1576. }
  1577.  
  1578. // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
  1579. // as the huffman decoder?
  1580. static uint32 get_bits(vorb *f, int n)
  1581. {
  1582.    uint32 z;
  1583.  
  1584.    if (f->valid_bits < 0) return 0;
  1585.    if (f->valid_bits < n) {
  1586.       if (n > 24) {
  1587.          // the accumulator technique below would not work correctly in this case
  1588.          z = get_bits(f, 24);
  1589.          z += get_bits(f, n-24) << 24;
  1590.          return z;
  1591.       }
  1592.       if (f->valid_bits == 0) f->acc = 0;
  1593.       while (f->valid_bits < n) {
  1594.          int z = get8_packet_raw(f);
  1595.          if (z == EOP) {
  1596.             f->valid_bits = INVALID_BITS;
  1597.             return 0;
  1598.          }
  1599.          f->acc += z << f->valid_bits;
  1600.          f->valid_bits += 8;
  1601.       }
  1602.    }
  1603.    if (f->valid_bits < 0) return 0;
  1604.    z = f->acc & ((1 << n)-1);
  1605.    f->acc >>= n;
  1606.    f->valid_bits -= n;
  1607.    return z;
  1608. }
  1609.  
  1610. // @OPTIMIZE: primary accumulator for huffman
  1611. // expand the buffer to as many bits as possible without reading off end of packet
  1612. // it might be nice to allow f->valid_bits and f->acc to be stored in registers,
  1613. // e.g. cache them locally and decode locally
  1614. static __forceinline void prep_huffman(vorb *f)
  1615. {
  1616.    if (f->valid_bits <= 24) {
  1617.       if (f->valid_bits == 0) f->acc = 0;
  1618.       do {
  1619.          int z;
  1620.          if (f->last_seg && !f->bytes_in_seg) return;
  1621.          z = get8_packet_raw(f);
  1622.          if (z == EOP) return;
  1623.          f->acc += (unsigned) z << f->valid_bits;
  1624.          f->valid_bits += 8;
  1625.       } while (f->valid_bits <= 24);
  1626.    }
  1627. }
  1628.  
  1629. enum
  1630. {
  1631.    VORBIS_packet_id = 1,
  1632.    VORBIS_packet_comment = 3,
  1633.    VORBIS_packet_setup = 5
  1634. };
  1635.  
  1636. static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
  1637. {
  1638.    int i;
  1639.    prep_huffman(f);
  1640.  
  1641.    if (c->codewords == NULL && c->sorted_codewords == NULL)
  1642.       return -1;
  1643.  
  1644.    // cases to use binary search: sorted_codewords && !c->codewords
  1645.    //                             sorted_codewords && c->entries > 8
  1646.    if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
  1647.       // binary search
  1648.       uint32 code = bit_reverse(f->acc);
  1649.       int x=0, n=c->sorted_entries, len;
  1650.  
  1651.       while (n > 1) {
  1652.          // invariant: sc[x] <= code < sc[x+n]
  1653.          int m = x + (n >> 1);
  1654.          if (c->sorted_codewords[m] <= code) {
  1655.             x = m;
  1656.             n -= (n>>1);
  1657.          } else {
  1658.             n >>= 1;
  1659.          }
  1660.       }
  1661.       // x is now the sorted index
  1662.       if (!c->sparse) x = c->sorted_values[x];
  1663.       // x is now sorted index if sparse, or symbol otherwise
  1664.       len = c->codeword_lengths[x];
  1665.       if (f->valid_bits >= len) {
  1666.          f->acc >>= len;
  1667.          f->valid_bits -= len;
  1668.          return x;
  1669.       }
  1670.  
  1671.       f->valid_bits = 0;
  1672.       return -1;
  1673.    }
  1674.  
  1675.    // if small, linear search
  1676.    assert(!c->sparse);
  1677.    for (i=0; i < c->entries; ++i) {
  1678.       if (c->codeword_lengths[i] == NO_CODE) continue;
  1679.       if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
  1680.          if (f->valid_bits >= c->codeword_lengths[i]) {
  1681.             f->acc >>= c->codeword_lengths[i];
  1682.             f->valid_bits -= c->codeword_lengths[i];
  1683.             return i;
  1684.          }
  1685.          f->valid_bits = 0;
  1686.          return -1;
  1687.       }
  1688.    }
  1689.  
  1690.    error(f, VORBIS_invalid_stream);
  1691.    f->valid_bits = 0;
  1692.    return -1;
  1693. }
  1694.  
  1695. #ifndef STB_VORBIS_NO_INLINE_DECODE
  1696.  
  1697. #define DECODE_RAW(var, f,c)                                  \
  1698.    if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)        \
  1699.       prep_huffman(f);                                        \
  1700.    var = f->acc & FAST_HUFFMAN_TABLE_MASK;                    \
  1701.    var = c->fast_huffman[var];                                \
  1702.    if (var >= 0) {                                            \
  1703.       int n = c->codeword_lengths[var];                       \
  1704.       f->acc >>= n;                                           \
  1705.       f->valid_bits -= n;                                     \
  1706.       if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
  1707.    } else {                                                   \
  1708.       var = codebook_decode_scalar_raw(f,c);                  \
  1709.    }
  1710.  
  1711. #else
  1712.  
  1713. static int codebook_decode_scalar(vorb *f, Codebook *c)
  1714. {
  1715.    int i;
  1716.    if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
  1717.       prep_huffman(f);
  1718.    // fast huffman table lookup
  1719.    i = f->acc & FAST_HUFFMAN_TABLE_MASK;
  1720.    i = c->fast_huffman[i];
  1721.    if (i >= 0) {
  1722.       f->acc >>= c->codeword_lengths[i];
  1723.       f->valid_bits -= c->codeword_lengths[i];
  1724.       if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
  1725.       return i;
  1726.    }
  1727.    return codebook_decode_scalar_raw(f,c);
  1728. }
  1729.  
  1730. #define DECODE_RAW(var,f,c)    var = codebook_decode_scalar(f,c);
  1731.  
  1732. #endif
  1733.  
  1734. #define DECODE(var,f,c)                                       \
  1735.    DECODE_RAW(var,f,c)                                        \
  1736.    if (c->sparse) var = c->sorted_values[var];
  1737.  
  1738. #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
  1739.   #define DECODE_VQ(var,f,c)   DECODE_RAW(var,f,c)
  1740. #else
  1741.   #define DECODE_VQ(var,f,c)   DECODE(var,f,c)
  1742. #endif
  1743.  
  1744.  
  1745.  
  1746.  
  1747.  
  1748.  
  1749. // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
  1750. // where we avoid one addition
  1751. #define CODEBOOK_ELEMENT(c,off)          (c->multiplicands[off])
  1752. #define CODEBOOK_ELEMENT_FAST(c,off)     (c->multiplicands[off])
  1753. #define CODEBOOK_ELEMENT_BASE(c)         (0)
  1754.  
  1755. static int codebook_decode_start(vorb *f, Codebook *c)
  1756. {
  1757.    int z = -1;
  1758.  
  1759.    // type 0 is only legal in a scalar context
  1760.    if (c->lookup_type == 0)
  1761.       error(f, VORBIS_invalid_stream);
  1762.    else {
  1763.       DECODE_VQ(z,f,c);
  1764.       if (c->sparse) assert(z < c->sorted_entries);
  1765.       if (z < 0) {  // check for EOP
  1766.          if (!f->bytes_in_seg)
  1767.             if (f->last_seg)
  1768.                return z;
  1769.          error(f, VORBIS_invalid_stream);
  1770.       }
  1771.    }
  1772.    return z;
  1773. }
  1774.  
  1775. static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
  1776. {
  1777.    int i,z = codebook_decode_start(f,c);
  1778.    if (z < 0) return FALSE;
  1779.    if (len > c->dimensions) len = c->dimensions;
  1780.  
  1781. #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
  1782.    if (c->lookup_type == 1) {
  1783.       float last = CODEBOOK_ELEMENT_BASE(c);
  1784.       int div = 1;
  1785.       for (i=0; i < len; ++i) {
  1786.          int off = (z / div) % c->lookup_values;
  1787.          float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
  1788.          output[i] += val;
  1789.          if (c->sequence_p) last = val + c->minimum_value;
  1790.          div *= c->lookup_values;
  1791.       }
  1792.       return TRUE;
  1793.    }
  1794. #endif
  1795.  
  1796.    z *= c->dimensions;
  1797.    if (c->sequence_p) {
  1798.       float last = CODEBOOK_ELEMENT_BASE(c);
  1799.       for (i=0; i < len; ++i) {
  1800.          float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
  1801.          output[i] += val;
  1802.          last = val + c->minimum_value;
  1803.       }
  1804.    } else {
  1805.       float last = CODEBOOK_ELEMENT_BASE(c);
  1806.       for (i=0; i < len; ++i) {
  1807.          output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
  1808.       }
  1809.    }
  1810.  
  1811.    return TRUE;
  1812. }
  1813.  
  1814. static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
  1815. {
  1816.    int i,z = codebook_decode_start(f,c);
  1817.    float last = CODEBOOK_ELEMENT_BASE(c);
  1818.    if (z < 0) return FALSE;
  1819.    if (len > c->dimensions) len = c->dimensions;
  1820.  
  1821. #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
  1822.    if (c->lookup_type == 1) {
  1823.       int div = 1;
  1824.       for (i=0; i < len; ++i) {
  1825.          int off = (z / div) % c->lookup_values;
  1826.          float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
  1827.          output[i*step] += val;
  1828.          if (c->sequence_p) last = val;
  1829.          div *= c->lookup_values;
  1830.       }
  1831.       return TRUE;
  1832.    }
  1833. #endif
  1834.  
  1835.    z *= c->dimensions;
  1836.    for (i=0; i < len; ++i) {
  1837.       float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
  1838.       output[i*step] += val;
  1839.       if (c->sequence_p) last = val;
  1840.    }
  1841.  
  1842.    return TRUE;
  1843. }
  1844.  
  1845. static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
  1846. {
  1847.    int c_inter = *c_inter_p;
  1848.    int p_inter = *p_inter_p;
  1849.    int i,z, effective = c->dimensions;
  1850.  
  1851.    // type 0 is only legal in a scalar context
  1852.    if (c->lookup_type == 0)   return error(f, VORBIS_invalid_stream);
  1853.  
  1854.    while (total_decode > 0) {
  1855.       float last = CODEBOOK_ELEMENT_BASE(c);
  1856.       DECODE_VQ(z,f,c);
  1857.       #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
  1858.       assert(!c->sparse || z < c->sorted_entries);
  1859.       #endif
  1860.       if (z < 0) {
  1861.          if (!f->bytes_in_seg)
  1862.             if (f->last_seg) return FALSE;
  1863.          return error(f, VORBIS_invalid_stream);
  1864.       }
  1865.  
  1866.       // if this will take us off the end of the buffers, stop short!
  1867.       // we check by computing the length of the virtual interleaved
  1868.       // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
  1869.       // and the length we'll be using (effective)
  1870.       if (c_inter + p_inter*ch + effective > len * ch) {
  1871.          effective = len*ch - (p_inter*ch - c_inter);
  1872.       }
  1873.  
  1874.    #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
  1875.       if (c->lookup_type == 1) {
  1876.          int div = 1;
  1877.          for (i=0; i < effective; ++i) {
  1878.             int off = (z / div) % c->lookup_values;
  1879.             float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
  1880.             if (outputs[c_inter])
  1881.                outputs[c_inter][p_inter] += val;
  1882.             if (++c_inter == ch) { c_inter = 0; ++p_inter; }
  1883.             if (c->sequence_p) last = val;
  1884.             div *= c->lookup_values;
  1885.          }
  1886.       } else
  1887.    #endif
  1888.       {
  1889.          z *= c->dimensions;
  1890.          if (c->sequence_p) {
  1891.             for (i=0; i < effective; ++i) {
  1892.                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
  1893.                if (outputs[c_inter])
  1894.                   outputs[c_inter][p_inter] += val;
  1895.                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
  1896.                last = val;
  1897.             }
  1898.          } else {
  1899.             for (i=0; i < effective; ++i) {
  1900.                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
  1901.                if (outputs[c_inter])
  1902.                   outputs[c_inter][p_inter] += val;
  1903.                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
  1904.             }
  1905.          }
  1906.       }
  1907.  
  1908.       total_decode -= effective;
  1909.    }
  1910.    *c_inter_p = c_inter;
  1911.    *p_inter_p = p_inter;
  1912.    return TRUE;
  1913. }
  1914.  
  1915. static int predict_point(int x, int x0, int x1, int y0, int y1)
  1916. {
  1917.    int dy = y1 - y0;
  1918.    int adx = x1 - x0;
  1919.    // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
  1920.    int err = abs(dy) * (x - x0);
  1921.    int off = err / adx;
  1922.    return dy < 0 ? y0 - off : y0 + off;
  1923. }
  1924.  
  1925. // the following table is block-copied from the specification
  1926. static float inverse_db_table[256] =
  1927. {
  1928.   1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
  1929.   1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
  1930.   1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
  1931.   2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
  1932.   2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
  1933.   3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
  1934.   4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
  1935.   6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
  1936.   7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
  1937.   1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
  1938.   1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
  1939.   1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
  1940.   2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
  1941.   2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
  1942.   3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
  1943.   4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
  1944.   5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
  1945.   7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
  1946.   9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
  1947.   1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
  1948.   1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
  1949.   2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
  1950.   2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
  1951.   3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
  1952.   4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
  1953.   5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
  1954.   7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
  1955.   9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
  1956.   0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
  1957.   0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
  1958.   0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
  1959.   0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
  1960.   0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
  1961.   0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
  1962.   0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
  1963.   0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
  1964.   0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f,
  1965.   0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f,
  1966.   0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f,
  1967.   0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f,
  1968.   0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f,
  1969.   0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f,
  1970.   0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f,
  1971.   0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f,
  1972.   0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f,
  1973.   0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f,
  1974.   0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f,
  1975.   0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f,
  1976.   0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f,
  1977.   0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f,
  1978.   0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f,
  1979.   0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f,
  1980.   0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f,
  1981.   0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f,
  1982.   0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f,
  1983.   0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f,
  1984.   0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f,
  1985.   0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f,
  1986.   0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f,
  1987.   0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f,
  1988.   0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f,
  1989.   0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f,
  1990.   0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f,
  1991.   0.82788260f,    0.88168307f,    0.9389798f,     1.0f
  1992. };
  1993.  
  1994.  
  1995. // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
  1996. // note that you must produce bit-identical output to decode correctly;
  1997. // this specific sequence of operations is specified in the spec (it's
  1998. // drawing integer-quantized frequency-space lines that the encoder
  1999. // expects to be exactly the same)
  2000. //     ... also, isn't the whole point of Bresenham's algorithm to NOT
  2001. // have to divide in the setup? sigh.
  2002. #ifndef STB_VORBIS_NO_DEFER_FLOOR
  2003. #define LINE_OP(a,b)   a *= b
  2004. #else
  2005. #define LINE_OP(a,b)   a = b
  2006. #endif
  2007.  
  2008. #ifdef STB_VORBIS_DIVIDE_TABLE
  2009. #define DIVTAB_NUMER   32
  2010. #define DIVTAB_DENOM   64
  2011. int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
  2012. #endif
  2013.  
  2014. static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
  2015. {
  2016.    int dy = y1 - y0;
  2017.    int adx = x1 - x0;
  2018.    int ady = abs(dy);
  2019.    int base;
  2020.    int x=x0,y=y0;
  2021.    int err = 0;
  2022.    int sy;
  2023.  
  2024. #ifdef STB_VORBIS_DIVIDE_TABLE
  2025.    if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
  2026.       if (dy < 0) {
  2027.          base = -integer_divide_table[ady][adx];
  2028.          sy = base-1;
  2029.       } else {
  2030.          base =  integer_divide_table[ady][adx];
  2031.          sy = base+1;
  2032.       }
  2033.    } else {
  2034.       base = dy / adx;
  2035.       if (dy < 0)
  2036.          sy = base - 1;
  2037.       else
  2038.          sy = base+1;
  2039.    }
  2040. #else
  2041.    base = dy / adx;
  2042.    if (dy < 0)
  2043.       sy = base - 1;
  2044.    else
  2045.       sy = base+1;
  2046. #endif
  2047.    ady -= abs(base) * adx;
  2048.    if (x1 > n) x1 = n;
  2049.    if (x < x1) {
  2050.       LINE_OP(output[x], inverse_db_table[y&255]);
  2051.       for (++x; x < x1; ++x) {
  2052.          err += ady;
  2053.          if (err >= adx) {
  2054.             err -= adx;
  2055.             y += sy;
  2056.          } else
  2057.             y += base;
  2058.          LINE_OP(output[x], inverse_db_table[y&255]);
  2059.       }
  2060.    }
  2061. }
  2062.  
  2063. static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
  2064. {
  2065.    int k;
  2066.    if (rtype == 0) {
  2067.       int step = n / book->dimensions;
  2068.       for (k=0; k < step; ++k)
  2069.          if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
  2070.             return FALSE;
  2071.    } else {
  2072.       for (k=0; k < n; ) {
  2073.          if (!codebook_decode(f, book, target+offset, n-k))
  2074.             return FALSE;
  2075.          k += book->dimensions;
  2076.          offset += book->dimensions;
  2077.       }
  2078.    }
  2079.    return TRUE;
  2080. }
  2081.  
  2082. // n is 1/2 of the blocksize --
  2083. // specification: "Correct per-vector decode length is [n]/2"
  2084. static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
  2085. {
  2086.    int i,j,pass;
  2087.    Residue *r = f->residue_config + rn;
  2088.    int rtype = f->residue_types[rn];
  2089.    int c = r->classbook;
  2090.    int classwords = f->codebooks[c].dimensions;
  2091.    unsigned int actual_size = rtype == 2 ? n*2 : n;
  2092.    unsigned int limit_r_begin = (r->begin < actual_size ? r->begin : actual_size);
  2093.    unsigned int limit_r_end   = (r->end   < actual_size ? r->end   : actual_size);
  2094.    int n_read = limit_r_end - limit_r_begin;
  2095.    int part_read = n_read / r->part_size;
  2096.    int temp_alloc_point = temp_alloc_save(f);
  2097.    #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
  2098.    uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
  2099.    #else
  2100.    int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
  2101.    #endif
  2102.  
  2103.    CHECK(f);
  2104.  
  2105.    for (i=0; i < ch; ++i)
  2106.       if (!do_not_decode[i])
  2107.          memset(residue_buffers[i], 0, sizeof(float) * n);
  2108.  
  2109.    if (rtype == 2 && ch != 1) {
  2110.       for (j=0; j < ch; ++j)
  2111.          if (!do_not_decode[j])
  2112.             break;
  2113.       if (j == ch)
  2114.          goto done;
  2115.  
  2116.       for (pass=0; pass < 8; ++pass) {
  2117.          int pcount = 0, class_set = 0;
  2118.          if (ch == 2) {
  2119.             while (pcount < part_read) {
  2120.                int z = r->begin + pcount*r->part_size;
  2121.                int c_inter = (z & 1), p_inter = z>>1;
  2122.                if (pass == 0) {
  2123.                   Codebook *c = f->codebooks+r->classbook;
  2124.                   int q;
  2125.                   DECODE(q,f,c);
  2126.                   if (q == EOP) goto done;
  2127.                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
  2128.                   part_classdata[0][class_set] = r->classdata[q];
  2129.                   #else
  2130.                   for (i=classwords-1; i >= 0; --i) {
  2131.                      classifications[0][i+pcount] = q % r->classifications;
  2132.                      q /= r->classifications;
  2133.                   }
  2134.                   #endif
  2135.                }
  2136.                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
  2137.                   int z = r->begin + pcount*r->part_size;
  2138.                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
  2139.                   int c = part_classdata[0][class_set][i];
  2140.                   #else
  2141.                   int c = classifications[0][pcount];
  2142.                   #endif
  2143.                   int b = r->residue_books[c][pass];
  2144.                   if (b >= 0) {
  2145.                      Codebook *book = f->codebooks + b;
  2146.                      #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
  2147.                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
  2148.                         goto done;
  2149.                      #else
  2150.                      // saves 1%
  2151.                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
  2152.                         goto done;
  2153.                      #endif
  2154.                   } else {
  2155.                      z += r->part_size;
  2156.                      c_inter = z & 1;
  2157.                      p_inter = z >> 1;
  2158.                   }
  2159.                }
  2160.                #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
  2161.                ++class_set;
  2162.                #endif
  2163.             }
  2164.          } else if (ch > 2) {
  2165.             while (pcount < part_read) {
  2166.                int z = r->begin + pcount*r->part_size;
  2167.                int c_inter = z % ch, p_inter = z/ch;
  2168.                if (pass == 0) {
  2169.                   Codebook *c = f->codebooks+r->classbook;
  2170.                   int q;
  2171.                   DECODE(q,f,c);
  2172.                   if (q == EOP) goto done;
  2173.                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
  2174.                   part_classdata[0][class_set] = r->classdata[q];
  2175.                   #else
  2176.                   for (i=classwords-1; i >= 0; --i) {
  2177.                      classifications[0][i+pcount] = q % r->classifications;
  2178.                      q /= r->classifications;
  2179.                   }
  2180.                   #endif
  2181.                }
  2182.                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
  2183.                   int z = r->begin + pcount*r->part_size;
  2184.                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
  2185.                   int c = part_classdata[0][class_set][i];
  2186.                   #else
  2187.                   int c = classifications[0][pcount];
  2188.                   #endif
  2189.                   int b = r->residue_books[c][pass];
  2190.                   if (b >= 0) {
  2191.                      Codebook *book = f->codebooks + b;
  2192.                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
  2193.                         goto done;
  2194.                   } else {
  2195.                      z += r->part_size;
  2196.                      c_inter = z % ch;
  2197.                      p_inter = z / ch;
  2198.                   }
  2199.                }
  2200.                #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
  2201.                ++class_set;
  2202.                #endif
  2203.             }
  2204.          }
  2205.       }
  2206.       goto done;
  2207.    }
  2208.    CHECK(f);
  2209.  
  2210.    for (pass=0; pass < 8; ++pass) {
  2211.       int pcount = 0, class_set=0;
  2212.       while (pcount < part_read) {
  2213.          if (pass == 0) {
  2214.             for (j=0; j < ch; ++j) {
  2215.                if (!do_not_decode[j]) {
  2216.                   Codebook *c = f->codebooks+r->classbook;
  2217.                   int temp;
  2218.                   DECODE(temp,f,c);
  2219.                   if (temp == EOP) goto done;
  2220.                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
  2221.                   part_classdata[j][class_set] = r->classdata[temp];
  2222.                   #else
  2223.                   for (i=classwords-1; i >= 0; --i) {
  2224.                      classifications[j][i+pcount] = temp % r->classifications;
  2225.                      temp /= r->classifications;
  2226.                   }
  2227.                   #endif
  2228.                }
  2229.             }
  2230.          }
  2231.          for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
  2232.             for (j=0; j < ch; ++j) {
  2233.                if (!do_not_decode[j]) {
  2234.                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
  2235.                   int c = part_classdata[j][class_set][i];
  2236.                   #else
  2237.                   int c = classifications[j][pcount];
  2238.                   #endif
  2239.                   int b = r->residue_books[c][pass];
  2240.                   if (b >= 0) {
  2241.                      float *target = residue_buffers[j];
  2242.                      int offset = r->begin + pcount * r->part_size;
  2243.                      int n = r->part_size;
  2244.                      Codebook *book = f->codebooks + b;
  2245.                      if (!residue_decode(f, book, target, offset, n, rtype))
  2246.                         goto done;
  2247.                   }
  2248.                }
  2249.             }
  2250.          }
  2251.          #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
  2252.          ++class_set;
  2253.          #endif
  2254.       }
  2255.    }
  2256.   done:
  2257.    CHECK(f);
  2258.    #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
  2259.    temp_free(f,part_classdata);
  2260.    #else
  2261.    temp_free(f,classifications);
  2262.    #endif
  2263.    temp_alloc_restore(f,temp_alloc_point);
  2264. }
  2265.  
  2266.  
  2267. #if 0
  2268. // slow way for debugging
  2269. void inverse_mdct_slow(float *buffer, int n)
  2270. {
  2271.    int i,j;
  2272.    int n2 = n >> 1;
  2273.    float *x = (float *) malloc(sizeof(*x) * n2);
  2274.    memcpy(x, buffer, sizeof(*x) * n2);
  2275.    for (i=0; i < n; ++i) {
  2276.       float acc = 0;
  2277.       for (j=0; j < n2; ++j)
  2278.          // formula from paper:
  2279.          //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
  2280.          // formula from wikipedia
  2281.          //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
  2282.          // these are equivalent, except the formula from the paper inverts the multiplier!
  2283.          // however, what actually works is NO MULTIPLIER!?!
  2284.          //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
  2285.          acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
  2286.       buffer[i] = acc;
  2287.    }
  2288.    free(x);
  2289. }
  2290. #elif 0
  2291. // same as above, but just barely able to run in real time on modern machines
  2292. void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
  2293. {
  2294.    float mcos[16384];
  2295.    int i,j;
  2296.    int n2 = n >> 1, nmask = (n << 2) -1;
  2297.    float *x = (float *) malloc(sizeof(*x) * n2);
  2298.    memcpy(x, buffer, sizeof(*x) * n2);
  2299.    for (i=0; i < 4*n; ++i)
  2300.       mcos[i] = (float) cos(M_PI / 2 * i / n);
  2301.  
  2302.    for (i=0; i < n; ++i) {
  2303.       float acc = 0;
  2304.       for (j=0; j < n2; ++j)
  2305.          acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
  2306.       buffer[i] = acc;
  2307.    }
  2308.    free(x);
  2309. }
  2310. #elif 0
  2311. // transform to use a slow dct-iv; this is STILL basically trivial,
  2312. // but only requires half as many ops
  2313. void dct_iv_slow(float *buffer, int n)
  2314. {
  2315.    float mcos[16384];
  2316.    float x[2048];
  2317.    int i,j;
  2318.    int n2 = n >> 1, nmask = (n << 3) - 1;
  2319.    memcpy(x, buffer, sizeof(*x) * n);
  2320.    for (i=0; i < 8*n; ++i)
  2321.       mcos[i] = (float) cos(M_PI / 4 * i / n);
  2322.    for (i=0; i < n; ++i) {
  2323.       float acc = 0;
  2324.       for (j=0; j < n; ++j)
  2325.          acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
  2326.       buffer[i] = acc;
  2327.    }
  2328. }
  2329.  
  2330. void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
  2331. {
  2332.    int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
  2333.    float temp[4096];
  2334.  
  2335.    memcpy(temp, buffer, n2 * sizeof(float));
  2336.    dct_iv_slow(temp, n2);  // returns -c'-d, a-b'
  2337.  
  2338.    for (i=0; i < n4  ; ++i) buffer[i] = temp[i+n4];            // a-b'
  2339.    for (   ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1];   // b-a', c+d'
  2340.    for (   ; i < n   ; ++i) buffer[i] = -temp[i - n3_4];       // c'+d
  2341. }
  2342. #endif
  2343.  
  2344. #ifndef LIBVORBIS_MDCT
  2345. #define LIBVORBIS_MDCT 0
  2346. #endif
  2347.  
  2348. #if LIBVORBIS_MDCT
  2349. // directly call the vorbis MDCT using an interface documented
  2350. // by Jeff Roberts... useful for performance comparison
  2351. typedef struct
  2352. {
  2353.   int n;
  2354.   int log2n;
  2355.  
  2356.   float *trig;
  2357.   int   *bitrev;
  2358.  
  2359.   float scale;
  2360. } mdct_lookup;
  2361.  
  2362. extern void mdct_init(mdct_lookup *lookup, int n);
  2363. extern void mdct_clear(mdct_lookup *l);
  2364. extern void mdct_backward(mdct_lookup *init, float *in, float *out);
  2365.  
  2366. mdct_lookup M1,M2;
  2367.  
  2368. void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
  2369. {
  2370.    mdct_lookup *M;
  2371.    if (M1.n == n) M = &M1;
  2372.    else if (M2.n == n) M = &M2;
  2373.    else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
  2374.    else {
  2375.       if (M2.n) __asm int 3;
  2376.       mdct_init(&M2, n);
  2377.       M = &M2;
  2378.    }
  2379.  
  2380.    mdct_backward(M, buffer, buffer);
  2381. }
  2382. #endif
  2383.  
  2384.  
  2385. // the following were split out into separate functions while optimizing;
  2386. // they could be pushed back up but eh. __forceinline showed no change;
  2387. // they're probably already being inlined.
  2388. static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
  2389. {
  2390.    float *ee0 = e + i_off;
  2391.    float *ee2 = ee0 + k_off;
  2392.    int i;
  2393.  
  2394.    assert((n & 3) == 0);
  2395.    for (i=(n>>2); i > 0; --i) {
  2396.       float k00_20, k01_21;
  2397.       k00_20  = ee0[ 0] - ee2[ 0];
  2398.       k01_21  = ee0[-1] - ee2[-1];
  2399.       ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
  2400.       ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
  2401.       ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
  2402.       ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
  2403.       A += 8;
  2404.  
  2405.       k00_20  = ee0[-2] - ee2[-2];
  2406.       k01_21  = ee0[-3] - ee2[-3];
  2407.       ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
  2408.       ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
  2409.       ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
  2410.       ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
  2411.       A += 8;
  2412.  
  2413.       k00_20  = ee0[-4] - ee2[-4];
  2414.       k01_21  = ee0[-5] - ee2[-5];
  2415.       ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
  2416.       ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
  2417.       ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
  2418.       ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
  2419.       A += 8;
  2420.  
  2421.       k00_20  = ee0[-6] - ee2[-6];
  2422.       k01_21  = ee0[-7] - ee2[-7];
  2423.       ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
  2424.       ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
  2425.       ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
  2426.       ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
  2427.       A += 8;
  2428.       ee0 -= 8;
  2429.       ee2 -= 8;
  2430.    }
  2431. }
  2432.  
  2433. static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
  2434. {
  2435.    int i;
  2436.    float k00_20, k01_21;
  2437.  
  2438.    float *e0 = e + d0;
  2439.    float *e2 = e0 + k_off;
  2440.  
  2441.    for (i=lim >> 2; i > 0; --i) {
  2442.       k00_20 = e0[-0] - e2[-0];
  2443.       k01_21 = e0[-1] - e2[-1];
  2444.       e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
  2445.       e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
  2446.       e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
  2447.       e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
  2448.  
  2449.       A += k1;
  2450.  
  2451.       k00_20 = e0[-2] - e2[-2];
  2452.       k01_21 = e0[-3] - e2[-3];
  2453.       e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
  2454.       e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
  2455.       e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
  2456.       e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
  2457.  
  2458.       A += k1;
  2459.  
  2460.       k00_20 = e0[-4] - e2[-4];
  2461.       k01_21 = e0[-5] - e2[-5];
  2462.       e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
  2463.       e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
  2464.       e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
  2465.       e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
  2466.  
  2467.       A += k1;
  2468.  
  2469.       k00_20 = e0[-6] - e2[-6];
  2470.       k01_21 = e0[-7] - e2[-7];
  2471.       e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
  2472.       e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
  2473.       e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
  2474.       e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
  2475.  
  2476.       e0 -= 8;
  2477.       e2 -= 8;
  2478.  
  2479.       A += k1;
  2480.    }
  2481. }
  2482.  
  2483. static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
  2484. {
  2485.    int i;
  2486.    float A0 = A[0];
  2487.    float A1 = A[0+1];
  2488.    float A2 = A[0+a_off];
  2489.    float A3 = A[0+a_off+1];
  2490.    float A4 = A[0+a_off*2+0];
  2491.    float A5 = A[0+a_off*2+1];
  2492.    float A6 = A[0+a_off*3+0];
  2493.    float A7 = A[0+a_off*3+1];
  2494.  
  2495.    float k00,k11;
  2496.  
  2497.    float *ee0 = e  +i_off;
  2498.    float *ee2 = ee0+k_off;
  2499.  
  2500.    for (i=n; i > 0; --i) {
  2501.       k00     = ee0[ 0] - ee2[ 0];
  2502.       k11     = ee0[-1] - ee2[-1];
  2503.       ee0[ 0] =  ee0[ 0] + ee2[ 0];
  2504.       ee0[-1] =  ee0[-1] + ee2[-1];
  2505.       ee2[ 0] = (k00) * A0 - (k11) * A1;
  2506.       ee2[-1] = (k11) * A0 + (k00) * A1;
  2507.  
  2508.       k00     = ee0[-2] - ee2[-2];
  2509.       k11     = ee0[-3] - ee2[-3];
  2510.       ee0[-2] =  ee0[-2] + ee2[-2];
  2511.       ee0[-3] =  ee0[-3] + ee2[-3];
  2512.       ee2[-2] = (k00) * A2 - (k11) * A3;
  2513.       ee2[-3] = (k11) * A2 + (k00) * A3;
  2514.  
  2515.       k00     = ee0[-4] - ee2[-4];
  2516.       k11     = ee0[-5] - ee2[-5];
  2517.       ee0[-4] =  ee0[-4] + ee2[-4];
  2518.       ee0[-5] =  ee0[-5] + ee2[-5];
  2519.       ee2[-4] = (k00) * A4 - (k11) * A5;
  2520.       ee2[-5] = (k11) * A4 + (k00) * A5;
  2521.  
  2522.       k00     = ee0[-6] - ee2[-6];
  2523.       k11     = ee0[-7] - ee2[-7];
  2524.       ee0[-6] =  ee0[-6] + ee2[-6];
  2525.       ee0[-7] =  ee0[-7] + ee2[-7];
  2526.       ee2[-6] = (k00) * A6 - (k11) * A7;
  2527.       ee2[-7] = (k11) * A6 + (k00) * A7;
  2528.  
  2529.       ee0 -= k0;
  2530.       ee2 -= k0;
  2531.    }
  2532. }
  2533.  
  2534. static __forceinline void iter_54(float *z)
  2535. {
  2536.    float k00,k11,k22,k33;
  2537.    float y0,y1,y2,y3;
  2538.  
  2539.    k00  = z[ 0] - z[-4];
  2540.    y0   = z[ 0] + z[-4];
  2541.    y2   = z[-2] + z[-6];
  2542.    k22  = z[-2] - z[-6];
  2543.  
  2544.    z[-0] = y0 + y2;      // z0 + z4 + z2 + z6
  2545.    z[-2] = y0 - y2;      // z0 + z4 - z2 - z6
  2546.  
  2547.    // done with y0,y2
  2548.  
  2549.    k33  = z[-3] - z[-7];
  2550.  
  2551.    z[-4] = k00 + k33;    // z0 - z4 + z3 - z7
  2552.    z[-6] = k00 - k33;    // z0 - z4 - z3 + z7
  2553.  
  2554.    // done with k33
  2555.  
  2556.    k11  = z[-1] - z[-5];
  2557.    y1   = z[-1] + z[-5];
  2558.    y3   = z[-3] + z[-7];
  2559.  
  2560.    z[-1] = y1 + y3;      // z1 + z5 + z3 + z7
  2561.    z[-3] = y1 - y3;      // z1 + z5 - z3 - z7
  2562.    z[-5] = k11 - k22;    // z1 - z5 + z2 - z6
  2563.    z[-7] = k11 + k22;    // z1 - z5 - z2 + z6
  2564. }
  2565.  
  2566. static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
  2567. {
  2568.    int a_off = base_n >> 3;
  2569.    float A2 = A[0+a_off];
  2570.    float *z = e + i_off;
  2571.    float *base = z - 16 * n;
  2572.  
  2573.    while (z > base) {
  2574.       float k00,k11;
  2575.  
  2576.       k00   = z[-0] - z[-8];
  2577.       k11   = z[-1] - z[-9];
  2578.       z[-0] = z[-0] + z[-8];
  2579.       z[-1] = z[-1] + z[-9];
  2580.       z[-8] =  k00;
  2581.       z[-9] =  k11 ;
  2582.  
  2583.       k00    = z[ -2] - z[-10];
  2584.       k11    = z[ -3] - z[-11];
  2585.       z[ -2] = z[ -2] + z[-10];
  2586.       z[ -3] = z[ -3] + z[-11];
  2587.       z[-10] = (k00+k11) * A2;
  2588.       z[-11] = (k11-k00) * A2;
  2589.  
  2590.       k00    = z[-12] - z[ -4];  // reverse to avoid a unary negation
  2591.       k11    = z[ -5] - z[-13];
  2592.       z[ -4] = z[ -4] + z[-12];
  2593.       z[ -5] = z[ -5] + z[-13];
  2594.       z[-12] = k11;
  2595.       z[-13] = k00;
  2596.  
  2597.       k00    = z[-14] - z[ -6];  // reverse to avoid a unary negation
  2598.       k11    = z[ -7] - z[-15];
  2599.       z[ -6] = z[ -6] + z[-14];
  2600.       z[ -7] = z[ -7] + z[-15];
  2601.       z[-14] = (k00+k11) * A2;
  2602.       z[-15] = (k00-k11) * A2;
  2603.  
  2604.       iter_54(z);
  2605.       iter_54(z-8);
  2606.       z -= 16;
  2607.    }
  2608. }
  2609.  
  2610. static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
  2611. {
  2612.    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
  2613.    int ld;
  2614.    // @OPTIMIZE: reduce register pressure by using fewer variables?
  2615.    int save_point = temp_alloc_save(f);
  2616.    float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
  2617.    float *u=NULL,*v=NULL;
  2618.    // twiddle factors
  2619.    float *A = f->A[blocktype];
  2620.  
  2621.    // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
  2622.    // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
  2623.  
  2624.    // kernel from paper
  2625.  
  2626.  
  2627.    // merged:
  2628.    //   copy and reflect spectral data
  2629.    //   step 0
  2630.  
  2631.    // note that it turns out that the items added together during
  2632.    // this step are, in fact, being added to themselves (as reflected
  2633.    // by step 0). inexplicable inefficiency! this became obvious
  2634.    // once I combined the passes.
  2635.  
  2636.    // so there's a missing 'times 2' here (for adding X to itself).
  2637.    // this propagates through linearly to the end, where the numbers
  2638.    // are 1/2 too small, and need to be compensated for.
  2639.  
  2640.    {
  2641.       float *d,*e, *AA, *e_stop;
  2642.       d = &buf2[n2-2];
  2643.       AA = A;
  2644.       e = &buffer[0];
  2645.       e_stop = &buffer[n2];
  2646.       while (e != e_stop) {
  2647.          d[1] = (e[0] * AA[0] - e[2]*AA[1]);
  2648.          d[0] = (e[0] * AA[1] + e[2]*AA[0]);
  2649.          d -= 2;
  2650.          AA += 2;
  2651.          e += 4;
  2652.       }
  2653.  
  2654.       e = &buffer[n2-3];
  2655.       while (d >= buf2) {
  2656.          d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
  2657.          d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
  2658.          d -= 2;
  2659.          AA += 2;
  2660.          e -= 4;
  2661.       }
  2662.    }
  2663.  
  2664.    // now we use symbolic names for these, so that we can
  2665.    // possibly swap their meaning as we change which operations
  2666.    // are in place
  2667.  
  2668.    u = buffer;
  2669.    v = buf2;
  2670.  
  2671.    // step 2    (paper output is w, now u)
  2672.    // this could be in place, but the data ends up in the wrong
  2673.    // place... _somebody_'s got to swap it, so this is nominated
  2674.    {
  2675.       float *AA = &A[n2-8];
  2676.       float *d0,*d1, *e0, *e1;
  2677.  
  2678.       e0 = &v[n4];
  2679.       e1 = &v[0];
  2680.  
  2681.       d0 = &u[n4];
  2682.       d1 = &u[0];
  2683.  
  2684.       while (AA >= A) {
  2685.          float v40_20, v41_21;
  2686.  
  2687.          v41_21 = e0[1] - e1[1];
  2688.          v40_20 = e0[0] - e1[0];
  2689.          d0[1]  = e0[1] + e1[1];
  2690.          d0[0]  = e0[0] + e1[0];
  2691.          d1[1]  = v41_21*AA[4] - v40_20*AA[5];
  2692.          d1[0]  = v40_20*AA[4