Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5517 | hidnplayr | 1 | #define NULL 0 |
2 | typedef unsigned int size_t; |
||
3 | |||
4 | |||
5 | // sort |
||
6 | #define THRESH 4 /* threshold for insertion */ |
||
7 | #define MTHRESH 6 /* threshold for median */ |
||
8 | |||
9 | static int (*qcmp)(const void *, const void *); /* the comparison routine */ |
||
10 | static int qsz; /* size of each record */ |
||
11 | static int thresh; /* THRESHold in chars */ |
||
12 | static int mthresh; /* MTHRESHold in chars */ |
||
13 | |||
14 | /* |
||
15 | * qst: |
||
16 | * Do a quicksort |
||
17 | * First, find the median element, and put that one in the first place as the |
||
18 | * discriminator. (This "median" is just the median of the first, last and |
||
19 | * middle elements). (Using this median instead of the first element is a big |
||
20 | * win). Then, the usual partitioning/swapping, followed by moving the |
||
21 | * discriminator into the right place. Then, figure out the sizes of the two |
||
22 | * partions, do the smaller one recursively and the larger one via a repeat of |
||
23 | * this code. Stopping when there are less than THRESH elements in a partition |
||
24 | * and cleaning up with an insertion sort (in our caller) is a huge win. |
||
25 | * All data swaps are done in-line, which is space-losing but time-saving. |
||
26 | * (And there are only three places where this is done). |
||
27 | */ |
||
28 | |||
29 | static void qst(char *base, char *max) |
||
30 | { |
||
31 | char c, *i, *j, *jj; |
||
32 | int ii; |
||
33 | char *mid, *tmp; |
||
34 | int lo, hi; |
||
35 | |||
36 | /* |
||
37 | * At the top here, lo is the number of characters of elements in the |
||
38 | * current partition. (Which should be max - base). |
||
39 | * Find the median of the first, last, and middle element and make |
||
40 | * that the middle element. Set j to largest of first and middle. |
||
41 | * If max is larger than that guy, then it's that guy, else compare |
||
42 | * max with loser of first and take larger. Things are set up to |
||
43 | * prefer the middle, then the first in case of ties. |
||
44 | */ |
||
45 | lo = max - base; /* number of elements as chars */ |
||
46 | do { |
||
47 | mid = i = base + qsz * ((lo / qsz) >> 1); |
||
48 | if (lo >= mthresh) |
||
49 | { |
||
50 | j = (qcmp((jj = base), i) > 0 ? jj : i); |
||
51 | if (qcmp(j, (tmp = max - qsz)) > 0) |
||
52 | { |
||
53 | /* switch to first loser */ |
||
54 | j = (j == jj ? i : jj); |
||
55 | if (qcmp(j, tmp) < 0) |
||
56 | j = tmp; |
||
57 | } |
||
58 | if (j != i) |
||
59 | { |
||
60 | ii = qsz; |
||
61 | do { |
||
62 | c = *i; |
||
63 | *i++ = *j; |
||
64 | *j++ = c; |
||
65 | } while (--ii); |
||
66 | } |
||
67 | } |
||
68 | /* |
||
69 | * Semi-standard quicksort partitioning/swapping |
||
70 | */ |
||
71 | for (i = base, j = max - qsz; ; ) |
||
72 | { |
||
73 | while (i < mid && qcmp(i, mid) <= 0) |
||
74 | i += qsz; |
||
75 | while (j > mid) |
||
76 | { |
||
77 | if (qcmp(mid, j) <= 0) |
||
78 | { |
||
79 | j -= qsz; |
||
80 | continue; |
||
81 | } |
||
82 | tmp = i + qsz; /* value of i after swap */ |
||
83 | if (i == mid) |
||
84 | { |
||
85 | /* j <-> mid, new mid is j */ |
||
86 | mid = jj = j; |
||
87 | } |
||
88 | else |
||
89 | { |
||
90 | /* i <-> j */ |
||
91 | jj = j; |
||
92 | j -= qsz; |
||
93 | } |
||
94 | goto swap; |
||
95 | } |
||
96 | if (i == mid) |
||
97 | { |
||
98 | break; |
||
99 | } |
||
100 | else |
||
101 | { |
||
102 | /* i <-> mid, new mid is i */ |
||
103 | jj = mid; |
||
104 | tmp = mid = i; /* value of i after swap */ |
||
105 | j -= qsz; |
||
106 | } |
||
107 | swap: |
||
108 | ii = qsz; |
||
109 | do { |
||
110 | c = *i; |
||
111 | *i++ = *jj; |
||
112 | *jj++ = c; |
||
113 | } while (--ii); |
||
114 | i = tmp; |
||
115 | } |
||
116 | /* |
||
117 | * Look at sizes of the two partitions, do the smaller |
||
118 | * one first by recursion, then do the larger one by |
||
119 | * making sure lo is its size, base and max are update |
||
120 | * correctly, and branching back. But only repeat |
||
121 | * (recursively or by branching) if the partition is |
||
122 | * of at least size THRESH. |
||
123 | */ |
||
124 | i = (j = mid) + qsz; |
||
125 | if ((lo = j - base) <= (hi = max - i)) |
||
126 | { |
||
127 | if (lo >= thresh) |
||
128 | qst(base, j); |
||
129 | base = i; |
||
130 | lo = hi; |
||
131 | } |
||
132 | else |
||
133 | { |
||
134 | if (hi >= thresh) |
||
135 | qst(i, max); |
||
136 | max = j; |
||
137 | } |
||
138 | } while (lo >= thresh); |
||
139 | } |
||
140 | |||
141 | /* |
||
142 | * qsort: |
||
143 | * First, set up some global parameters for qst to share. Then, quicksort |
||
144 | * with qst(), and then a cleanup insertion sort ourselves. Sound simple? |
||
145 | * It's not... |
||
146 | */ |
||
147 | |||
148 | void qsort_g(void *base0, size_t n, size_t size, int (*compar)(const void *, const void *)) |
||
149 | { |
||
150 | char *base = (char *)base0; |
||
151 | char c, *i, *j, *lo, *hi; |
||
152 | char *min, *max; |
||
153 | |||
154 | if (n <= 1) |
||
155 | return; |
||
156 | qsz = size; |
||
157 | qcmp = compar; |
||
158 | thresh = qsz * THRESH; |
||
159 | mthresh = qsz * MTHRESH; |
||
160 | max = base + n * qsz; |
||
161 | if (n >= THRESH) |
||
162 | { |
||
163 | qst(base, max); |
||
164 | hi = base + thresh; |
||
165 | } |
||
166 | else |
||
167 | { |
||
168 | hi = max; |
||
169 | } |
||
170 | /* |
||
171 | * First put smallest element, which must be in the first THRESH, in |
||
172 | * the first position as a sentinel. This is done just by searching |
||
173 | * the first THRESH elements (or the first n if n < THRESH), finding |
||
174 | * the min, and swapping it into the first position. |
||
175 | */ |
||
176 | for (j = lo = base; (lo += qsz) < hi; ) |
||
177 | if (qcmp(j, lo) > 0) |
||
178 | j = lo; |
||
179 | if (j != base) |
||
180 | { |
||
181 | /* swap j into place */ |
||
182 | for (i = base, hi = base + qsz; i < hi; ) |
||
183 | { |
||
184 | c = *j; |
||
185 | *j++ = *i; |
||
186 | *i++ = c; |
||
187 | } |
||
188 | } |
||
189 | /* |
||
190 | * With our sentinel in place, we now run the following hyper-fast |
||
191 | * insertion sort. For each remaining element, min, from [1] to [n-1], |
||
192 | * set hi to the index of the element AFTER which this one goes. |
||
193 | * Then, do the standard insertion sort shift on a character at a time |
||
194 | * basis for each element in the frob. |
||
195 | */ |
||
196 | for (min = base; (hi = min += qsz) < max; ) |
||
197 | { |
||
198 | while (qcmp(hi -= qsz, min) > 0) |
||
199 | /* void */; |
||
200 | if ((hi += qsz) != min) { |
||
201 | for (lo = min + qsz; --lo >= min; ) |
||
202 | { |
||
203 | c = *lo; |
||
204 | for (i = j = lo; (j -= qsz) >= hi; i = j) |
||
205 | *i = *j; |
||
206 | *i = c; |
||
207 | } |
||
208 | } |
||
209 | } |
||
210 | } |
||
211 | |||
212 | #define STBTT_sort(data,num_items,item_size,compare_func) qsort_g(data,num_items,item_size,compare_func) |
||
213 | |||
214 | asm ("_floor: \n\t" |
||
215 | "pushl %ebp\n\t" |
||
216 | "movl %esp,%ebp\n\t" |
||
217 | "subl $8,%esp\n\t" |
||
218 | "fstcw -4(%ebp)\n\t" |
||
219 | "fwait\n\t" |
||
220 | "movw -4(%ebp),%ax\n\t" |
||
221 | "andw $0xf3ff,%ax\n\t" |
||
222 | "orw $0x0400,%ax\n\t" |
||
223 | "movw %ax,-2(%ebp)\n\t" |
||
224 | "fldcw -2(%ebp)\n\t" |
||
225 | "fldl 8(%ebp)\n\t" |
||
226 | "frndint\n\t" |
||
227 | "fldcw -4(%ebp)\n\t" |
||
228 | "movl %ebp,%esp\n\t" |
||
229 | "popl %ebp\n\t" |
||
230 | "ret"); |
||
231 | |||
232 | |||
233 | int i_floor (float x) { |
||
234 | int z; |
||
235 | z=x; |
||
236 | if (z+1>x) {return z;} else {return (z+1);} |
||
237 | } |
||
238 | |||
239 | int i_ceil (float x) { |
||
240 | int z; |
||
241 | z=x; |
||
242 | if (z>x) {return z;} else {return (z+1);} |
||
243 | } |
||
244 | |||
245 | |||
246 | double sqrt (double x) |
||
247 | { |
||
248 | if (x < 0.0F ) |
||
249 | { |
||
250 | return -1; |
||
251 | } |
||
252 | else |
||
253 | { |
||
254 | double res; |
||
255 | asm ("fsqrt" : "=t" (res) : "0" (x)); |
||
256 | return res; |
||
257 | } |
||
258 | } |
||
259 | |||
260 | #define STBTT_ifloor(x) ((int) i_floor(x)) |
||
261 | #define STBTT_iceil(x) ((int) i_ceil(x)) |
||
262 | |||
263 | static inline void *zmalloc(size_t size) { |
||
264 | void *val; |
||
265 | __asm__ __volatile__( "int $0x40":"=a"(val):"a"(68),"b"(12),"c"(size)); |
||
266 | return val; |
||
267 | } |
||
268 | |||
269 | static inline void zfree(void *p) |
||
270 | { |
||
271 | size_t foo; |
||
272 | asm volatile ("int $0x40":"=a"(foo):"a"(68), "b"(13), "c"(p)); |
||
273 | return; |
||
274 | } |
||
275 | |||
276 | #define STBTT_malloc(x,u) zmalloc(x) |
||
277 | #define STBTT_free(x,u) zfree(x) |
||
278 | |||
279 | #define assert_g(ignore)((void) 0) |
||
280 | |||
281 | #define STBTT_assert(x) assert_g(x) |
||
282 | |||
283 | int strlen_g(const char* string) |
||
284 | { |
||
285 | int i; |
||
286 | i=0; |
||
287 | while (*string++) i++; |
||
288 | return i; |
||
289 | } |
||
290 | |||
291 | #define STBTT_strlen(x) strlen_g(x) |
||
292 | |||
293 | void* zmemset(void *mem, int c, unsigned size) |
||
294 | { |
||
295 | unsigned i; |
||
296 | |||
297 | for ( i = 0; i < size; i++ ) |
||
298 | *((char *)mem+i) = (char) c; |
||
299 | |||
300 | return 0; |
||
301 | } |
||
302 | |||
303 | void* zmemcpy(void *dst, const void *src, unsigned size) |
||
304 | { |
||
305 | |||
306 | unsigned i; |
||
307 | |||
308 | for ( i = 0; i < size; i++) |
||
309 | *(char *)(dst+i) = *(char *)(src+i); |
||
310 | |||
311 | return 0; |
||
312 | } |
||
313 | |||
314 | #define STBTT_memcpy zmemcpy |
||
315 | #define STBTT_memset zmemset>>>>>>>=>=>->->->=>=>>> |