Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
3960 | Serge | 1 | /* mpg123 note: This is BSD-licensed code that is no problem for mpg123 usage under LGPL. |
2 | It's Free, understood? ;-) */ |
||
3 | |||
4 | /* Another note: This code is basically written by Thorsten Glaser, |
||
5 | Thomas Orgis did just some rearrangements and comments. */ |
||
6 | |||
7 | /*- |
||
8 | * Copyright (c) 2008 |
||
9 | * Thorsten Glaser |
||
10 | * |
||
11 | * Provided that these terms and disclaimer and all copyright notices |
||
12 | * are retained or reproduced in an accompanying document, permission |
||
13 | * is granted to deal in this work without restriction, including un- |
||
14 | * limited rights to use, publicly perform, distribute, sell, modify, |
||
15 | * merge, give away, or sublicence. |
||
16 | * |
||
17 | * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to |
||
18 | * the utmost extent permitted by applicable law, neither express nor |
||
19 | * implied; without malicious intent or gross negligence. In no event |
||
20 | * may a licensor, author or contributor be held liable for indirect, |
||
21 | * direct, other damage, loss, or other issues arising in any way out |
||
22 | * of dealing in the work, even if advised of the possibility of such |
||
23 | * damage or existence of a defect, except proven that it results out |
||
24 | * of said person's immediate fault when using the work as intended. |
||
25 | *- |
||
26 | * Convert from ICY encoding (windows-1252 codepage) to UTF-8 |
||
27 | */ |
||
28 | |||
29 | /* Includes string and stdlib headers... */ |
||
30 | #include "compat.h" |
||
31 | |||
32 | /* ThOr: too lazy for this type check; also we use char/short all around anyway. |
||
33 | Of cource, it would be the proper way to use _these_ kind of types all around. */ |
||
34 | #define uint8_t unsigned char |
||
35 | #define uint16_t unsigned short |
||
36 | |||
37 | static const uint8_t cp1252_utf8[] = { |
||
38 | /* 0x00 @ 0 */ 0x00, |
||
39 | /* 0x01 @ 1 */ 0x01, |
||
40 | /* 0x02 @ 2 */ 0x02, |
||
41 | /* 0x03 @ 3 */ 0x03, |
||
42 | /* 0x04 @ 4 */ 0x04, |
||
43 | /* 0x05 @ 5 */ 0x05, |
||
44 | /* 0x06 @ 6 */ 0x06, |
||
45 | /* 0x07 @ 7 */ 0x07, |
||
46 | /* 0x08 @ 8 */ 0x08, |
||
47 | /* 0x09 @ 9 */ 0x09, |
||
48 | /* 0x0A @ 10 */ 0x0A, |
||
49 | /* 0x0B @ 11 */ 0x0B, |
||
50 | /* 0x0C @ 12 */ 0x0C, |
||
51 | /* 0x0D @ 13 */ 0x0D, |
||
52 | /* 0x0E @ 14 */ 0x0E, |
||
53 | /* 0x0F @ 15 */ 0x0F, |
||
54 | /* 0x10 @ 16 */ 0x10, |
||
55 | /* 0x11 @ 17 */ 0x11, |
||
56 | /* 0x12 @ 18 */ 0x12, |
||
57 | /* 0x13 @ 19 */ 0x13, |
||
58 | /* 0x14 @ 20 */ 0x14, |
||
59 | /* 0x15 @ 21 */ 0x15, |
||
60 | /* 0x16 @ 22 */ 0x16, |
||
61 | /* 0x17 @ 23 */ 0x17, |
||
62 | /* 0x18 @ 24 */ 0x18, |
||
63 | /* 0x19 @ 25 */ 0x19, |
||
64 | /* 0x1A @ 26 */ 0x1A, |
||
65 | /* 0x1B @ 27 */ 0x1B, |
||
66 | /* 0x1C @ 28 */ 0x1C, |
||
67 | /* 0x1D @ 29 */ 0x1D, |
||
68 | /* 0x1E @ 30 */ 0x1E, |
||
69 | /* 0x1F @ 31 */ 0x1F, |
||
70 | /* 0x20 @ 32 */ 0x20, |
||
71 | /* 0x21 @ 33 */ 0x21, |
||
72 | /* 0x22 @ 34 */ 0x22, |
||
73 | /* 0x23 @ 35 */ 0x23, |
||
74 | /* 0x24 @ 36 */ 0x24, |
||
75 | /* 0x25 @ 37 */ 0x25, |
||
76 | /* 0x26 @ 38 */ 0x26, |
||
77 | /* 0x27 @ 39 */ 0x27, |
||
78 | /* 0x28 @ 40 */ 0x28, |
||
79 | /* 0x29 @ 41 */ 0x29, |
||
80 | /* 0x2A @ 42 */ 0x2A, |
||
81 | /* 0x2B @ 43 */ 0x2B, |
||
82 | /* 0x2C @ 44 */ 0x2C, |
||
83 | /* 0x2D @ 45 */ 0x2D, |
||
84 | /* 0x2E @ 46 */ 0x2E, |
||
85 | /* 0x2F @ 47 */ 0x2F, |
||
86 | /* 0x30 @ 48 */ 0x30, |
||
87 | /* 0x31 @ 49 */ 0x31, |
||
88 | /* 0x32 @ 50 */ 0x32, |
||
89 | /* 0x33 @ 51 */ 0x33, |
||
90 | /* 0x34 @ 52 */ 0x34, |
||
91 | /* 0x35 @ 53 */ 0x35, |
||
92 | /* 0x36 @ 54 */ 0x36, |
||
93 | /* 0x37 @ 55 */ 0x37, |
||
94 | /* 0x38 @ 56 */ 0x38, |
||
95 | /* 0x39 @ 57 */ 0x39, |
||
96 | /* 0x3A @ 58 */ 0x3A, |
||
97 | /* 0x3B @ 59 */ 0x3B, |
||
98 | /* 0x3C @ 60 */ 0x3C, |
||
99 | /* 0x3D @ 61 */ 0x3D, |
||
100 | /* 0x3E @ 62 */ 0x3E, |
||
101 | /* 0x3F @ 63 */ 0x3F, |
||
102 | /* 0x40 @ 64 */ 0x40, |
||
103 | /* 0x41 @ 65 */ 0x41, |
||
104 | /* 0x42 @ 66 */ 0x42, |
||
105 | /* 0x43 @ 67 */ 0x43, |
||
106 | /* 0x44 @ 68 */ 0x44, |
||
107 | /* 0x45 @ 69 */ 0x45, |
||
108 | /* 0x46 @ 70 */ 0x46, |
||
109 | /* 0x47 @ 71 */ 0x47, |
||
110 | /* 0x48 @ 72 */ 0x48, |
||
111 | /* 0x49 @ 73 */ 0x49, |
||
112 | /* 0x4A @ 74 */ 0x4A, |
||
113 | /* 0x4B @ 75 */ 0x4B, |
||
114 | /* 0x4C @ 76 */ 0x4C, |
||
115 | /* 0x4D @ 77 */ 0x4D, |
||
116 | /* 0x4E @ 78 */ 0x4E, |
||
117 | /* 0x4F @ 79 */ 0x4F, |
||
118 | /* 0x50 @ 80 */ 0x50, |
||
119 | /* 0x51 @ 81 */ 0x51, |
||
120 | /* 0x52 @ 82 */ 0x52, |
||
121 | /* 0x53 @ 83 */ 0x53, |
||
122 | /* 0x54 @ 84 */ 0x54, |
||
123 | /* 0x55 @ 85 */ 0x55, |
||
124 | /* 0x56 @ 86 */ 0x56, |
||
125 | /* 0x57 @ 87 */ 0x57, |
||
126 | /* 0x58 @ 88 */ 0x58, |
||
127 | /* 0x59 @ 89 */ 0x59, |
||
128 | /* 0x5A @ 90 */ 0x5A, |
||
129 | /* 0x5B @ 91 */ 0x5B, |
||
130 | /* 0x5C @ 92 */ 0x5C, |
||
131 | /* 0x5D @ 93 */ 0x5D, |
||
132 | /* 0x5E @ 94 */ 0x5E, |
||
133 | /* 0x5F @ 95 */ 0x5F, |
||
134 | /* 0x60 @ 96 */ 0x60, |
||
135 | /* 0x61 @ 97 */ 0x61, |
||
136 | /* 0x62 @ 98 */ 0x62, |
||
137 | /* 0x63 @ 99 */ 0x63, |
||
138 | /* 0x64 @ 100 */ 0x64, |
||
139 | /* 0x65 @ 101 */ 0x65, |
||
140 | /* 0x66 @ 102 */ 0x66, |
||
141 | /* 0x67 @ 103 */ 0x67, |
||
142 | /* 0x68 @ 104 */ 0x68, |
||
143 | /* 0x69 @ 105 */ 0x69, |
||
144 | /* 0x6A @ 106 */ 0x6A, |
||
145 | /* 0x6B @ 107 */ 0x6B, |
||
146 | /* 0x6C @ 108 */ 0x6C, |
||
147 | /* 0x6D @ 109 */ 0x6D, |
||
148 | /* 0x6E @ 110 */ 0x6E, |
||
149 | /* 0x6F @ 111 */ 0x6F, |
||
150 | /* 0x70 @ 112 */ 0x70, |
||
151 | /* 0x71 @ 113 */ 0x71, |
||
152 | /* 0x72 @ 114 */ 0x72, |
||
153 | /* 0x73 @ 115 */ 0x73, |
||
154 | /* 0x74 @ 116 */ 0x74, |
||
155 | /* 0x75 @ 117 */ 0x75, |
||
156 | /* 0x76 @ 118 */ 0x76, |
||
157 | /* 0x77 @ 119 */ 0x77, |
||
158 | /* 0x78 @ 120 */ 0x78, |
||
159 | /* 0x79 @ 121 */ 0x79, |
||
160 | /* 0x7A @ 122 */ 0x7A, |
||
161 | /* 0x7B @ 123 */ 0x7B, |
||
162 | /* 0x7C @ 124 */ 0x7C, |
||
163 | /* 0x7D @ 125 */ 0x7D, |
||
164 | /* 0x7E @ 126 */ 0x7E, |
||
165 | /* 0x7F @ 127 */ 0x7F, |
||
166 | /* 0x80 @ 128 */ 0xE2, 0x82, 0xAC, |
||
167 | /* 0x81 @ 131 */ 0xEF, 0xBF, 0xBD, |
||
168 | /* 0x82 @ 134 */ 0xE2, 0x80, 0x9A, |
||
169 | /* 0x83 @ 137 */ 0xC6, 0x92, |
||
170 | /* 0x84 @ 139 */ 0xE2, 0x80, 0x9E, |
||
171 | /* 0x85 @ 142 */ 0xE2, 0x80, 0xA6, |
||
172 | /* 0x86 @ 145 */ 0xE2, 0x80, 0xA0, |
||
173 | /* 0x87 @ 148 */ 0xE2, 0x80, 0xA1, |
||
174 | /* 0x88 @ 151 */ 0xCB, 0x86, |
||
175 | /* 0x89 @ 153 */ 0xE2, 0x80, 0xB0, |
||
176 | /* 0x8A @ 156 */ 0xC5, 0xA0, |
||
177 | /* 0x8B @ 158 */ 0xE2, 0x80, 0xB9, |
||
178 | /* 0x8C @ 161 */ 0xC5, 0x92, |
||
179 | /* 0x8D @ 163 */ 0xEF, 0xBF, 0xBD, |
||
180 | /* 0x8E @ 166 */ 0xC5, 0xBD, |
||
181 | /* 0x8F @ 168 */ 0xEF, 0xBF, 0xBD, |
||
182 | /* 0x90 @ 171 */ 0xEF, 0xBF, 0xBD, |
||
183 | /* 0x91 @ 174 */ 0xE2, 0x80, 0x98, |
||
184 | /* 0x92 @ 177 */ 0xE2, 0x80, 0x99, |
||
185 | /* 0x93 @ 180 */ 0xE2, 0x80, 0x9C, |
||
186 | /* 0x94 @ 183 */ 0xE2, 0x80, 0x9D, |
||
187 | /* 0x95 @ 186 */ 0xE2, 0x80, 0xA2, |
||
188 | /* 0x96 @ 189 */ 0xE2, 0x80, 0x93, |
||
189 | /* 0x97 @ 192 */ 0xE2, 0x80, 0x94, |
||
190 | /* 0x98 @ 195 */ 0xCB, 0x9C, |
||
191 | /* 0x99 @ 197 */ 0xE2, 0x84, 0xA2, |
||
192 | /* 0x9A @ 200 */ 0xC5, 0xA1, |
||
193 | /* 0x9B @ 202 */ 0xE2, 0x80, 0xBA, |
||
194 | /* 0x9C @ 205 */ 0xC5, 0x93, |
||
195 | /* 0x9D @ 207 */ 0xEF, 0xBF, 0xBD, |
||
196 | /* 0x9E @ 210 */ 0xC5, 0xBE, |
||
197 | /* 0x9F @ 212 */ 0xC5, 0xB8, |
||
198 | /* 0xA0 @ 214 */ 0xC2, 0xA0, |
||
199 | /* 0xA1 @ 216 */ 0xC2, 0xA1, |
||
200 | /* 0xA2 @ 218 */ 0xC2, 0xA2, |
||
201 | /* 0xA3 @ 220 */ 0xC2, 0xA3, |
||
202 | /* 0xA4 @ 222 */ 0xC2, 0xA4, |
||
203 | /* 0xA5 @ 224 */ 0xC2, 0xA5, |
||
204 | /* 0xA6 @ 226 */ 0xC2, 0xA6, |
||
205 | /* 0xA7 @ 228 */ 0xC2, 0xA7, |
||
206 | /* 0xA8 @ 230 */ 0xC2, 0xA8, |
||
207 | /* 0xA9 @ 232 */ 0xC2, 0xA9, |
||
208 | /* 0xAA @ 234 */ 0xC2, 0xAA, |
||
209 | /* 0xAB @ 236 */ 0xC2, 0xAB, |
||
210 | /* 0xAC @ 238 */ 0xC2, 0xAC, |
||
211 | /* 0xAD @ 240 */ 0xC2, 0xAD, |
||
212 | /* 0xAE @ 242 */ 0xC2, 0xAE, |
||
213 | /* 0xAF @ 244 */ 0xC2, 0xAF, |
||
214 | /* 0xB0 @ 246 */ 0xC2, 0xB0, |
||
215 | /* 0xB1 @ 248 */ 0xC2, 0xB1, |
||
216 | /* 0xB2 @ 250 */ 0xC2, 0xB2, |
||
217 | /* 0xB3 @ 252 */ 0xC2, 0xB3, |
||
218 | /* 0xB4 @ 254 */ 0xC2, 0xB4, |
||
219 | /* 0xB5 @ 256 */ 0xC2, 0xB5, |
||
220 | /* 0xB6 @ 258 */ 0xC2, 0xB6, |
||
221 | /* 0xB7 @ 260 */ 0xC2, 0xB7, |
||
222 | /* 0xB8 @ 262 */ 0xC2, 0xB8, |
||
223 | /* 0xB9 @ 264 */ 0xC2, 0xB9, |
||
224 | /* 0xBA @ 266 */ 0xC2, 0xBA, |
||
225 | /* 0xBB @ 268 */ 0xC2, 0xBB, |
||
226 | /* 0xBC @ 270 */ 0xC2, 0xBC, |
||
227 | /* 0xBD @ 272 */ 0xC2, 0xBD, |
||
228 | /* 0xBE @ 274 */ 0xC2, 0xBE, |
||
229 | /* 0xBF @ 276 */ 0xC2, 0xBF, |
||
230 | /* 0xC0 @ 278 */ 0xC3, 0x80, |
||
231 | /* 0xC1 @ 280 */ 0xC3, 0x81, |
||
232 | /* 0xC2 @ 282 */ 0xC3, 0x82, |
||
233 | /* 0xC3 @ 284 */ 0xC3, 0x83, |
||
234 | /* 0xC4 @ 286 */ 0xC3, 0x84, |
||
235 | /* 0xC5 @ 288 */ 0xC3, 0x85, |
||
236 | /* 0xC6 @ 290 */ 0xC3, 0x86, |
||
237 | /* 0xC7 @ 292 */ 0xC3, 0x87, |
||
238 | /* 0xC8 @ 294 */ 0xC3, 0x88, |
||
239 | /* 0xC9 @ 296 */ 0xC3, 0x89, |
||
240 | /* 0xCA @ 298 */ 0xC3, 0x8A, |
||
241 | /* 0xCB @ 300 */ 0xC3, 0x8B, |
||
242 | /* 0xCC @ 302 */ 0xC3, 0x8C, |
||
243 | /* 0xCD @ 304 */ 0xC3, 0x8D, |
||
244 | /* 0xCE @ 306 */ 0xC3, 0x8E, |
||
245 | /* 0xCF @ 308 */ 0xC3, 0x8F, |
||
246 | /* 0xD0 @ 310 */ 0xC3, 0x90, |
||
247 | /* 0xD1 @ 312 */ 0xC3, 0x91, |
||
248 | /* 0xD2 @ 314 */ 0xC3, 0x92, |
||
249 | /* 0xD3 @ 316 */ 0xC3, 0x93, |
||
250 | /* 0xD4 @ 318 */ 0xC3, 0x94, |
||
251 | /* 0xD5 @ 320 */ 0xC3, 0x95, |
||
252 | /* 0xD6 @ 322 */ 0xC3, 0x96, |
||
253 | /* 0xD7 @ 324 */ 0xC3, 0x97, |
||
254 | /* 0xD8 @ 326 */ 0xC3, 0x98, |
||
255 | /* 0xD9 @ 328 */ 0xC3, 0x99, |
||
256 | /* 0xDA @ 330 */ 0xC3, 0x9A, |
||
257 | /* 0xDB @ 332 */ 0xC3, 0x9B, |
||
258 | /* 0xDC @ 334 */ 0xC3, 0x9C, |
||
259 | /* 0xDD @ 336 */ 0xC3, 0x9D, |
||
260 | /* 0xDE @ 338 */ 0xC3, 0x9E, |
||
261 | /* 0xDF @ 340 */ 0xC3, 0x9F, |
||
262 | /* 0xE0 @ 342 */ 0xC3, 0xA0, |
||
263 | /* 0xE1 @ 344 */ 0xC3, 0xA1, |
||
264 | /* 0xE2 @ 346 */ 0xC3, 0xA2, |
||
265 | /* 0xE3 @ 348 */ 0xC3, 0xA3, |
||
266 | /* 0xE4 @ 350 */ 0xC3, 0xA4, |
||
267 | /* 0xE5 @ 352 */ 0xC3, 0xA5, |
||
268 | /* 0xE6 @ 354 */ 0xC3, 0xA6, |
||
269 | /* 0xE7 @ 356 */ 0xC3, 0xA7, |
||
270 | /* 0xE8 @ 358 */ 0xC3, 0xA8, |
||
271 | /* 0xE9 @ 360 */ 0xC3, 0xA9, |
||
272 | /* 0xEA @ 362 */ 0xC3, 0xAA, |
||
273 | /* 0xEB @ 364 */ 0xC3, 0xAB, |
||
274 | /* 0xEC @ 366 */ 0xC3, 0xAC, |
||
275 | /* 0xED @ 368 */ 0xC3, 0xAD, |
||
276 | /* 0xEE @ 370 */ 0xC3, 0xAE, |
||
277 | /* 0xEF @ 372 */ 0xC3, 0xAF, |
||
278 | /* 0xF0 @ 374 */ 0xC3, 0xB0, |
||
279 | /* 0xF1 @ 376 */ 0xC3, 0xB1, |
||
280 | /* 0xF2 @ 378 */ 0xC3, 0xB2, |
||
281 | /* 0xF3 @ 380 */ 0xC3, 0xB3, |
||
282 | /* 0xF4 @ 382 */ 0xC3, 0xB4, |
||
283 | /* 0xF5 @ 384 */ 0xC3, 0xB5, |
||
284 | /* 0xF6 @ 386 */ 0xC3, 0xB6, |
||
285 | /* 0xF7 @ 388 */ 0xC3, 0xB7, |
||
286 | /* 0xF8 @ 390 */ 0xC3, 0xB8, |
||
287 | /* 0xF9 @ 392 */ 0xC3, 0xB9, |
||
288 | /* 0xFA @ 394 */ 0xC3, 0xBA, |
||
289 | /* 0xFB @ 396 */ 0xC3, 0xBB, |
||
290 | /* 0xFC @ 398 */ 0xC3, 0xBC, |
||
291 | /* 0xFD @ 400 */ 0xC3, 0xBD, |
||
292 | /* 0xFE @ 402 */ 0xC3, 0xBE, |
||
293 | /* 0xFF @ 404 */ 0xC3, 0xBF, |
||
294 | }; |
||
295 | |||
296 | static const uint16_t tblofs[257] = { |
||
297 | /* 0x00 */ 0, 1, 2, 3, 4, 5, 6, 7, |
||
298 | /* 0x08 */ 8, 9, 10, 11, 12, 13, 14, 15, |
||
299 | /* 0x10 */ 16, 17, 18, 19, 20, 21, 22, 23, |
||
300 | /* 0x18 */ 24, 25, 26, 27, 28, 29, 30, 31, |
||
301 | /* 0x20 */ 32, 33, 34, 35, 36, 37, 38, 39, |
||
302 | /* 0x28 */ 40, 41, 42, 43, 44, 45, 46, 47, |
||
303 | /* 0x30 */ 48, 49, 50, 51, 52, 53, 54, 55, |
||
304 | /* 0x38 */ 56, 57, 58, 59, 60, 61, 62, 63, |
||
305 | /* 0x40 */ 64, 65, 66, 67, 68, 69, 70, 71, |
||
306 | /* 0x48 */ 72, 73, 74, 75, 76, 77, 78, 79, |
||
307 | /* 0x50 */ 80, 81, 82, 83, 84, 85, 86, 87, |
||
308 | /* 0x58 */ 88, 89, 90, 91, 92, 93, 94, 95, |
||
309 | /* 0x60 */ 96, 97, 98, 99, 100, 101, 102, 103, |
||
310 | /* 0x68 */ 104, 105, 106, 107, 108, 109, 110, 111, |
||
311 | /* 0x70 */ 112, 113, 114, 115, 116, 117, 118, 119, |
||
312 | /* 0x78 */ 120, 121, 122, 123, 124, 125, 126, 127, |
||
313 | /* 0x80 */ 128, 131, 134, 137, 139, 142, 145, 148, |
||
314 | /* 0x88 */ 151, 153, 156, 158, 161, 163, 166, 168, |
||
315 | /* 0x90 */ 171, 174, 177, 180, 183, 186, 189, 192, |
||
316 | /* 0x98 */ 195, 197, 200, 202, 205, 207, 210, 212, |
||
317 | /* 0xA0 */ 214, 216, 218, 220, 222, 224, 226, 228, |
||
318 | /* 0xA8 */ 230, 232, 234, 236, 238, 240, 242, 244, |
||
319 | /* 0xB0 */ 246, 248, 250, 252, 254, 256, 258, 260, |
||
320 | /* 0xB8 */ 262, 264, 266, 268, 270, 272, 274, 276, |
||
321 | /* 0xC0 */ 278, 280, 282, 284, 286, 288, 290, 292, |
||
322 | /* 0xC8 */ 294, 296, 298, 300, 302, 304, 306, 308, |
||
323 | /* 0xD0 */ 310, 312, 314, 316, 318, 320, 322, 324, |
||
324 | /* 0xD8 */ 326, 328, 330, 332, 334, 336, 338, 340, |
||
325 | /* 0xE0 */ 342, 344, 346, 348, 350, 352, 354, 356, |
||
326 | /* 0xE8 */ 358, 360, 362, 364, 366, 368, 370, 372, |
||
327 | /* 0xF0 */ 374, 376, 378, 380, 382, 384, 386, 388, |
||
328 | /* 0xF8 */ 390, 392, 394, 396, 398, 400, 402, 404, |
||
329 | /* sizeof (cp1252_utf8) */ 406 |
||
330 | }; |
||
331 | |||
332 | /* Check if a string qualifies as UTF-8. */ |
||
333 | static int |
||
334 | is_utf8(const char* src) |
||
335 | { |
||
336 | uint8_t ch; |
||
337 | size_t i; |
||
338 | const uint8_t* s = (const uint8_t*) src; |
||
339 | |||
340 | /* We make a loop over every character, until we find a null one. |
||
341 | Remember: The string is supposed to end with a NUL, so ahead checks are safe. */ |
||
342 | while ((ch = *s++)) { |
||
343 | /* Ye olde 7bit ASCII chars 'rr fine for anything */ |
||
344 | if(ch < 0x80) continue; |
||
345 | |||
346 | /* Now, we watch out for non-UTF conform sequences. */ |
||
347 | else if ((ch < 0xC2) || (ch > 0xFD)) |
||
348 | return 0; |
||
349 | /* check for some misformed sequences */ |
||
350 | if (((ch == 0xC2) && (s[0] < 0xA0)) || |
||
351 | ((ch == 0xEF) && (s[0] == 0xBF) && (s[1] > 0xBD))) |
||
352 | /* XXX add more for outside the BMP */ |
||
353 | return 0; |
||
354 | |||
355 | /* Check the continuation bytes. */ |
||
356 | if (ch < 0xE0) i = 1; |
||
357 | else if (ch < 0xF0) i = 2; |
||
358 | else if (ch < 0xF8) i = 3; |
||
359 | else if (ch < 0xFC) i = 4; |
||
360 | else |
||
361 | i = 5; |
||
362 | |||
363 | while (i--) |
||
364 | if ((*s++ & 0xC0) != 0x80) |
||
365 | return 0; |
||
366 | } |
||
367 | |||
368 | /* If no check failed, the string indeed looks like valid UTF-8. */ |
||
369 | return 1; |
||
370 | } |
||
371 | |||
372 | /* The main conversion routine. |
||
373 | ICY in CP-1252 (or UTF-8 alreay) to UTF-8 encoded string. |
||
374 | If force is applied, it will always encode to UTF-8, without checking. */ |
||
375 | char * |
||
376 | icy2utf8(const char *src, int force) |
||
377 | { |
||
378 | const uint8_t *s = (const uint8_t *)src; |
||
379 | size_t srclen, dstlen, i, k; |
||
380 | uint8_t ch, *d; |
||
381 | char *dst; |
||
382 | |||
383 | /* Some funny streams from Apple/iTunes give ICY info in UTF-8 already. |
||
384 | So, be prepared and don't try to re-encode such. Unless forced. */ |
||
385 | if(!force && is_utf8(src)) return (strdup(src)); |
||
386 | |||
387 | srclen = strlen(src) + 1; |
||
388 | /* allocate conservatively */ |
||
389 | if ((d = malloc(srclen * 3)) == NULL) |
||
390 | return (NULL); |
||
391 | |||
392 | i = 0; |
||
393 | dstlen = 0; |
||
394 | while (i < srclen) { |
||
395 | ch = s[i++]; |
||
396 | k = tblofs[ch]; |
||
397 | while (k < tblofs[ch + 1]) |
||
398 | d[dstlen++] = cp1252_utf8[k++]; |
||
399 | } |
||
400 | |||
401 | /* dstlen includes trailing NUL since srclen also does */ |
||
402 | if ((dst = realloc(d, dstlen)) == NULL) { |
||
403 | free(d); |
||
404 | return (NULL); |
||
405 | } |
||
406 | return (dst); |
||
407 | } |
||
408 | |||
409 | /* This stuff is for testing only. */ |
||
410 | #ifdef TEST |
||
411 | static const char intext[] = "\225 Gr\374\337e kosten 0,55 \200\205"; |
||
412 | |||
413 | #include |
||
414 | |||
415 | int |
||
416 | main(void) |
||
417 | { |
||
418 | char *t, *t2; |
||
419 | |||
420 | if ((t = icy2utf8(intext, 0)) == NULL) { |
||
421 | fprintf(stderr, "out of memory\n"); |
||
422 | return (1); |
||
423 | } |
||
424 | |||
425 | /* make sure it won't be converted twice */ |
||
426 | if ((t2 = icy2utf8(t), 0) == NULL) { |
||
427 | fprintf(stderr, "out of memory\n"); |
||
428 | return (1); |
||
429 | } |
||
430 | |||
431 | printf("Result is:\t\343\200\214%s\343\200\215\n" |
||
432 | "\t\t\343\200\214%s\343\200\215\n", t, t2); |
||
433 | |||
434 | free(t); |
||
435 | free(t2); |
||
436 | return (0); |
||
437 | } |
||
438 | #endif>>>>>>>>> |