Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
3960 Serge 1
/* mpg123 note: This is BSD-licensed code that is no problem for mpg123 usage under LGPL.
2
   It's Free, understood? ;-) */
3
 
4
/* Another note: This code is basically written by Thorsten Glaser,
5
   Thomas Orgis did just some rearrangements and comments. */
6
 
7
/*-
8
 * Copyright (c) 2008
9
 *	Thorsten Glaser 
10
 *
11
 * Provided that these terms and disclaimer and all copyright notices
12
 * are retained or reproduced in an accompanying document, permission
13
 * is granted to deal in this work without restriction, including un-
14
 * limited rights to use, publicly perform, distribute, sell, modify,
15
 * merge, give away, or sublicence.
16
 *
17
 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
18
 * the utmost extent permitted by applicable law, neither express nor
19
 * implied; without malicious intent or gross negligence. In no event
20
 * may a licensor, author or contributor be held liable for indirect,
21
 * direct, other damage, loss, or other issues arising in any way out
22
 * of dealing in the work, even if advised of the possibility of such
23
 * damage or existence of a defect, except proven that it results out
24
 * of said person's immediate fault when using the work as intended.
25
 *-
26
 * Convert from ICY encoding (windows-1252 codepage) to UTF-8
27
 */
28
 
29
/* Includes string and stdlib headers... */
30
#include "compat.h"
31
 
32
/* ThOr: too lazy for this type check; also we use char/short all around anyway.
33
   Of cource, it would be the proper way to use _these_ kind of types all around. */
34
#define uint8_t  unsigned char
35
#define uint16_t unsigned short
36
 
37
static const uint8_t cp1252_utf8[] = {
38
	/* 0x00 @   0 */	0x00,
39
	/* 0x01 @   1 */	0x01,
40
	/* 0x02 @   2 */	0x02,
41
	/* 0x03 @   3 */	0x03,
42
	/* 0x04 @   4 */	0x04,
43
	/* 0x05 @   5 */	0x05,
44
	/* 0x06 @   6 */	0x06,
45
	/* 0x07 @   7 */	0x07,
46
	/* 0x08 @   8 */	0x08,
47
	/* 0x09 @   9 */	0x09,
48
	/* 0x0A @  10 */	0x0A,
49
	/* 0x0B @  11 */	0x0B,
50
	/* 0x0C @  12 */	0x0C,
51
	/* 0x0D @  13 */	0x0D,
52
	/* 0x0E @  14 */	0x0E,
53
	/* 0x0F @  15 */	0x0F,
54
	/* 0x10 @  16 */	0x10,
55
	/* 0x11 @  17 */	0x11,
56
	/* 0x12 @  18 */	0x12,
57
	/* 0x13 @  19 */	0x13,
58
	/* 0x14 @  20 */	0x14,
59
	/* 0x15 @  21 */	0x15,
60
	/* 0x16 @  22 */	0x16,
61
	/* 0x17 @  23 */	0x17,
62
	/* 0x18 @  24 */	0x18,
63
	/* 0x19 @  25 */	0x19,
64
	/* 0x1A @  26 */	0x1A,
65
	/* 0x1B @  27 */	0x1B,
66
	/* 0x1C @  28 */	0x1C,
67
	/* 0x1D @  29 */	0x1D,
68
	/* 0x1E @  30 */	0x1E,
69
	/* 0x1F @  31 */	0x1F,
70
	/* 0x20 @  32 */	0x20,
71
	/* 0x21 @  33 */	0x21,
72
	/* 0x22 @  34 */	0x22,
73
	/* 0x23 @  35 */	0x23,
74
	/* 0x24 @  36 */	0x24,
75
	/* 0x25 @  37 */	0x25,
76
	/* 0x26 @  38 */	0x26,
77
	/* 0x27 @  39 */	0x27,
78
	/* 0x28 @  40 */	0x28,
79
	/* 0x29 @  41 */	0x29,
80
	/* 0x2A @  42 */	0x2A,
81
	/* 0x2B @  43 */	0x2B,
82
	/* 0x2C @  44 */	0x2C,
83
	/* 0x2D @  45 */	0x2D,
84
	/* 0x2E @  46 */	0x2E,
85
	/* 0x2F @  47 */	0x2F,
86
	/* 0x30 @  48 */	0x30,
87
	/* 0x31 @  49 */	0x31,
88
	/* 0x32 @  50 */	0x32,
89
	/* 0x33 @  51 */	0x33,
90
	/* 0x34 @  52 */	0x34,
91
	/* 0x35 @  53 */	0x35,
92
	/* 0x36 @  54 */	0x36,
93
	/* 0x37 @  55 */	0x37,
94
	/* 0x38 @  56 */	0x38,
95
	/* 0x39 @  57 */	0x39,
96
	/* 0x3A @  58 */	0x3A,
97
	/* 0x3B @  59 */	0x3B,
98
	/* 0x3C @  60 */	0x3C,
99
	/* 0x3D @  61 */	0x3D,
100
	/* 0x3E @  62 */	0x3E,
101
	/* 0x3F @  63 */	0x3F,
102
	/* 0x40 @  64 */	0x40,
103
	/* 0x41 @  65 */	0x41,
104
	/* 0x42 @  66 */	0x42,
105
	/* 0x43 @  67 */	0x43,
106
	/* 0x44 @  68 */	0x44,
107
	/* 0x45 @  69 */	0x45,
108
	/* 0x46 @  70 */	0x46,
109
	/* 0x47 @  71 */	0x47,
110
	/* 0x48 @  72 */	0x48,
111
	/* 0x49 @  73 */	0x49,
112
	/* 0x4A @  74 */	0x4A,
113
	/* 0x4B @  75 */	0x4B,
114
	/* 0x4C @  76 */	0x4C,
115
	/* 0x4D @  77 */	0x4D,
116
	/* 0x4E @  78 */	0x4E,
117
	/* 0x4F @  79 */	0x4F,
118
	/* 0x50 @  80 */	0x50,
119
	/* 0x51 @  81 */	0x51,
120
	/* 0x52 @  82 */	0x52,
121
	/* 0x53 @  83 */	0x53,
122
	/* 0x54 @  84 */	0x54,
123
	/* 0x55 @  85 */	0x55,
124
	/* 0x56 @  86 */	0x56,
125
	/* 0x57 @  87 */	0x57,
126
	/* 0x58 @  88 */	0x58,
127
	/* 0x59 @  89 */	0x59,
128
	/* 0x5A @  90 */	0x5A,
129
	/* 0x5B @  91 */	0x5B,
130
	/* 0x5C @  92 */	0x5C,
131
	/* 0x5D @  93 */	0x5D,
132
	/* 0x5E @  94 */	0x5E,
133
	/* 0x5F @  95 */	0x5F,
134
	/* 0x60 @  96 */	0x60,
135
	/* 0x61 @  97 */	0x61,
136
	/* 0x62 @  98 */	0x62,
137
	/* 0x63 @  99 */	0x63,
138
	/* 0x64 @ 100 */	0x64,
139
	/* 0x65 @ 101 */	0x65,
140
	/* 0x66 @ 102 */	0x66,
141
	/* 0x67 @ 103 */	0x67,
142
	/* 0x68 @ 104 */	0x68,
143
	/* 0x69 @ 105 */	0x69,
144
	/* 0x6A @ 106 */	0x6A,
145
	/* 0x6B @ 107 */	0x6B,
146
	/* 0x6C @ 108 */	0x6C,
147
	/* 0x6D @ 109 */	0x6D,
148
	/* 0x6E @ 110 */	0x6E,
149
	/* 0x6F @ 111 */	0x6F,
150
	/* 0x70 @ 112 */	0x70,
151
	/* 0x71 @ 113 */	0x71,
152
	/* 0x72 @ 114 */	0x72,
153
	/* 0x73 @ 115 */	0x73,
154
	/* 0x74 @ 116 */	0x74,
155
	/* 0x75 @ 117 */	0x75,
156
	/* 0x76 @ 118 */	0x76,
157
	/* 0x77 @ 119 */	0x77,
158
	/* 0x78 @ 120 */	0x78,
159
	/* 0x79 @ 121 */	0x79,
160
	/* 0x7A @ 122 */	0x7A,
161
	/* 0x7B @ 123 */	0x7B,
162
	/* 0x7C @ 124 */	0x7C,
163
	/* 0x7D @ 125 */	0x7D,
164
	/* 0x7E @ 126 */	0x7E,
165
	/* 0x7F @ 127 */	0x7F,
166
	/* 0x80 @ 128 */	0xE2, 0x82, 0xAC,
167
	/* 0x81 @ 131 */	0xEF, 0xBF, 0xBD,
168
	/* 0x82 @ 134 */	0xE2, 0x80, 0x9A,
169
	/* 0x83 @ 137 */	0xC6, 0x92,
170
	/* 0x84 @ 139 */	0xE2, 0x80, 0x9E,
171
	/* 0x85 @ 142 */	0xE2, 0x80, 0xA6,
172
	/* 0x86 @ 145 */	0xE2, 0x80, 0xA0,
173
	/* 0x87 @ 148 */	0xE2, 0x80, 0xA1,
174
	/* 0x88 @ 151 */	0xCB, 0x86,
175
	/* 0x89 @ 153 */	0xE2, 0x80, 0xB0,
176
	/* 0x8A @ 156 */	0xC5, 0xA0,
177
	/* 0x8B @ 158 */	0xE2, 0x80, 0xB9,
178
	/* 0x8C @ 161 */	0xC5, 0x92,
179
	/* 0x8D @ 163 */	0xEF, 0xBF, 0xBD,
180
	/* 0x8E @ 166 */	0xC5, 0xBD,
181
	/* 0x8F @ 168 */	0xEF, 0xBF, 0xBD,
182
	/* 0x90 @ 171 */	0xEF, 0xBF, 0xBD,
183
	/* 0x91 @ 174 */	0xE2, 0x80, 0x98,
184
	/* 0x92 @ 177 */	0xE2, 0x80, 0x99,
185
	/* 0x93 @ 180 */	0xE2, 0x80, 0x9C,
186
	/* 0x94 @ 183 */	0xE2, 0x80, 0x9D,
187
	/* 0x95 @ 186 */	0xE2, 0x80, 0xA2,
188
	/* 0x96 @ 189 */	0xE2, 0x80, 0x93,
189
	/* 0x97 @ 192 */	0xE2, 0x80, 0x94,
190
	/* 0x98 @ 195 */	0xCB, 0x9C,
191
	/* 0x99 @ 197 */	0xE2, 0x84, 0xA2,
192
	/* 0x9A @ 200 */	0xC5, 0xA1,
193
	/* 0x9B @ 202 */	0xE2, 0x80, 0xBA,
194
	/* 0x9C @ 205 */	0xC5, 0x93,
195
	/* 0x9D @ 207 */	0xEF, 0xBF, 0xBD,
196
	/* 0x9E @ 210 */	0xC5, 0xBE,
197
	/* 0x9F @ 212 */	0xC5, 0xB8,
198
	/* 0xA0 @ 214 */	0xC2, 0xA0,
199
	/* 0xA1 @ 216 */	0xC2, 0xA1,
200
	/* 0xA2 @ 218 */	0xC2, 0xA2,
201
	/* 0xA3 @ 220 */	0xC2, 0xA3,
202
	/* 0xA4 @ 222 */	0xC2, 0xA4,
203
	/* 0xA5 @ 224 */	0xC2, 0xA5,
204
	/* 0xA6 @ 226 */	0xC2, 0xA6,
205
	/* 0xA7 @ 228 */	0xC2, 0xA7,
206
	/* 0xA8 @ 230 */	0xC2, 0xA8,
207
	/* 0xA9 @ 232 */	0xC2, 0xA9,
208
	/* 0xAA @ 234 */	0xC2, 0xAA,
209
	/* 0xAB @ 236 */	0xC2, 0xAB,
210
	/* 0xAC @ 238 */	0xC2, 0xAC,
211
	/* 0xAD @ 240 */	0xC2, 0xAD,
212
	/* 0xAE @ 242 */	0xC2, 0xAE,
213
	/* 0xAF @ 244 */	0xC2, 0xAF,
214
	/* 0xB0 @ 246 */	0xC2, 0xB0,
215
	/* 0xB1 @ 248 */	0xC2, 0xB1,
216
	/* 0xB2 @ 250 */	0xC2, 0xB2,
217
	/* 0xB3 @ 252 */	0xC2, 0xB3,
218
	/* 0xB4 @ 254 */	0xC2, 0xB4,
219
	/* 0xB5 @ 256 */	0xC2, 0xB5,
220
	/* 0xB6 @ 258 */	0xC2, 0xB6,
221
	/* 0xB7 @ 260 */	0xC2, 0xB7,
222
	/* 0xB8 @ 262 */	0xC2, 0xB8,
223
	/* 0xB9 @ 264 */	0xC2, 0xB9,
224
	/* 0xBA @ 266 */	0xC2, 0xBA,
225
	/* 0xBB @ 268 */	0xC2, 0xBB,
226
	/* 0xBC @ 270 */	0xC2, 0xBC,
227
	/* 0xBD @ 272 */	0xC2, 0xBD,
228
	/* 0xBE @ 274 */	0xC2, 0xBE,
229
	/* 0xBF @ 276 */	0xC2, 0xBF,
230
	/* 0xC0 @ 278 */	0xC3, 0x80,
231
	/* 0xC1 @ 280 */	0xC3, 0x81,
232
	/* 0xC2 @ 282 */	0xC3, 0x82,
233
	/* 0xC3 @ 284 */	0xC3, 0x83,
234
	/* 0xC4 @ 286 */	0xC3, 0x84,
235
	/* 0xC5 @ 288 */	0xC3, 0x85,
236
	/* 0xC6 @ 290 */	0xC3, 0x86,
237
	/* 0xC7 @ 292 */	0xC3, 0x87,
238
	/* 0xC8 @ 294 */	0xC3, 0x88,
239
	/* 0xC9 @ 296 */	0xC3, 0x89,
240
	/* 0xCA @ 298 */	0xC3, 0x8A,
241
	/* 0xCB @ 300 */	0xC3, 0x8B,
242
	/* 0xCC @ 302 */	0xC3, 0x8C,
243
	/* 0xCD @ 304 */	0xC3, 0x8D,
244
	/* 0xCE @ 306 */	0xC3, 0x8E,
245
	/* 0xCF @ 308 */	0xC3, 0x8F,
246
	/* 0xD0 @ 310 */	0xC3, 0x90,
247
	/* 0xD1 @ 312 */	0xC3, 0x91,
248
	/* 0xD2 @ 314 */	0xC3, 0x92,
249
	/* 0xD3 @ 316 */	0xC3, 0x93,
250
	/* 0xD4 @ 318 */	0xC3, 0x94,
251
	/* 0xD5 @ 320 */	0xC3, 0x95,
252
	/* 0xD6 @ 322 */	0xC3, 0x96,
253
	/* 0xD7 @ 324 */	0xC3, 0x97,
254
	/* 0xD8 @ 326 */	0xC3, 0x98,
255
	/* 0xD9 @ 328 */	0xC3, 0x99,
256
	/* 0xDA @ 330 */	0xC3, 0x9A,
257
	/* 0xDB @ 332 */	0xC3, 0x9B,
258
	/* 0xDC @ 334 */	0xC3, 0x9C,
259
	/* 0xDD @ 336 */	0xC3, 0x9D,
260
	/* 0xDE @ 338 */	0xC3, 0x9E,
261
	/* 0xDF @ 340 */	0xC3, 0x9F,
262
	/* 0xE0 @ 342 */	0xC3, 0xA0,
263
	/* 0xE1 @ 344 */	0xC3, 0xA1,
264
	/* 0xE2 @ 346 */	0xC3, 0xA2,
265
	/* 0xE3 @ 348 */	0xC3, 0xA3,
266
	/* 0xE4 @ 350 */	0xC3, 0xA4,
267
	/* 0xE5 @ 352 */	0xC3, 0xA5,
268
	/* 0xE6 @ 354 */	0xC3, 0xA6,
269
	/* 0xE7 @ 356 */	0xC3, 0xA7,
270
	/* 0xE8 @ 358 */	0xC3, 0xA8,
271
	/* 0xE9 @ 360 */	0xC3, 0xA9,
272
	/* 0xEA @ 362 */	0xC3, 0xAA,
273
	/* 0xEB @ 364 */	0xC3, 0xAB,
274
	/* 0xEC @ 366 */	0xC3, 0xAC,
275
	/* 0xED @ 368 */	0xC3, 0xAD,
276
	/* 0xEE @ 370 */	0xC3, 0xAE,
277
	/* 0xEF @ 372 */	0xC3, 0xAF,
278
	/* 0xF0 @ 374 */	0xC3, 0xB0,
279
	/* 0xF1 @ 376 */	0xC3, 0xB1,
280
	/* 0xF2 @ 378 */	0xC3, 0xB2,
281
	/* 0xF3 @ 380 */	0xC3, 0xB3,
282
	/* 0xF4 @ 382 */	0xC3, 0xB4,
283
	/* 0xF5 @ 384 */	0xC3, 0xB5,
284
	/* 0xF6 @ 386 */	0xC3, 0xB6,
285
	/* 0xF7 @ 388 */	0xC3, 0xB7,
286
	/* 0xF8 @ 390 */	0xC3, 0xB8,
287
	/* 0xF9 @ 392 */	0xC3, 0xB9,
288
	/* 0xFA @ 394 */	0xC3, 0xBA,
289
	/* 0xFB @ 396 */	0xC3, 0xBB,
290
	/* 0xFC @ 398 */	0xC3, 0xBC,
291
	/* 0xFD @ 400 */	0xC3, 0xBD,
292
	/* 0xFE @ 402 */	0xC3, 0xBE,
293
	/* 0xFF @ 404 */	0xC3, 0xBF,
294
};
295
 
296
static const uint16_t tblofs[257] = {
297
	/* 0x00 */   0,   1,   2,   3,   4,   5,   6,   7,
298
	/* 0x08 */   8,   9,  10,  11,  12,  13,  14,  15,
299
	/* 0x10 */  16,  17,  18,  19,  20,  21,  22,  23,
300
	/* 0x18 */  24,  25,  26,  27,  28,  29,  30,  31,
301
	/* 0x20 */  32,  33,  34,  35,  36,  37,  38,  39,
302
	/* 0x28 */  40,  41,  42,  43,  44,  45,  46,  47,
303
	/* 0x30 */  48,  49,  50,  51,  52,  53,  54,  55,
304
	/* 0x38 */  56,  57,  58,  59,  60,  61,  62,  63,
305
	/* 0x40 */  64,  65,  66,  67,  68,  69,  70,  71,
306
	/* 0x48 */  72,  73,  74,  75,  76,  77,  78,  79,
307
	/* 0x50 */  80,  81,  82,  83,  84,  85,  86,  87,
308
	/* 0x58 */  88,  89,  90,  91,  92,  93,  94,  95,
309
	/* 0x60 */  96,  97,  98,  99, 100, 101, 102, 103,
310
	/* 0x68 */ 104, 105, 106, 107, 108, 109, 110, 111,
311
	/* 0x70 */ 112, 113, 114, 115, 116, 117, 118, 119,
312
	/* 0x78 */ 120, 121, 122, 123, 124, 125, 126, 127,
313
	/* 0x80 */ 128, 131, 134, 137, 139, 142, 145, 148,
314
	/* 0x88 */ 151, 153, 156, 158, 161, 163, 166, 168,
315
	/* 0x90 */ 171, 174, 177, 180, 183, 186, 189, 192,
316
	/* 0x98 */ 195, 197, 200, 202, 205, 207, 210, 212,
317
	/* 0xA0 */ 214, 216, 218, 220, 222, 224, 226, 228,
318
	/* 0xA8 */ 230, 232, 234, 236, 238, 240, 242, 244,
319
	/* 0xB0 */ 246, 248, 250, 252, 254, 256, 258, 260,
320
	/* 0xB8 */ 262, 264, 266, 268, 270, 272, 274, 276,
321
	/* 0xC0 */ 278, 280, 282, 284, 286, 288, 290, 292,
322
	/* 0xC8 */ 294, 296, 298, 300, 302, 304, 306, 308,
323
	/* 0xD0 */ 310, 312, 314, 316, 318, 320, 322, 324,
324
	/* 0xD8 */ 326, 328, 330, 332, 334, 336, 338, 340,
325
	/* 0xE0 */ 342, 344, 346, 348, 350, 352, 354, 356,
326
	/* 0xE8 */ 358, 360, 362, 364, 366, 368, 370, 372,
327
	/* 0xF0 */ 374, 376, 378, 380, 382, 384, 386, 388,
328
	/* 0xF8 */ 390, 392, 394, 396, 398, 400, 402, 404,
329
	/* sizeof (cp1252_utf8) */ 406
330
};
331
 
332
/* Check if a string qualifies as UTF-8. */
333
static int
334
is_utf8(const char* src)
335
{
336
	uint8_t ch;
337
	size_t i;
338
	const uint8_t* s = (const uint8_t*) src;
339
 
340
	/* We make a loop over every character, until we find a null one.
341
	   Remember: The string is supposed to end with a NUL, so ahead checks are safe. */
342
	while ((ch = *s++))	{
343
		/* Ye olde 7bit ASCII chars 'rr fine for anything */
344
		if(ch < 0x80) continue;
345
 
346
		/* Now, we watch out for non-UTF conform sequences. */
347
		else if ((ch < 0xC2) || (ch > 0xFD))
348
			return 0;
349
		/* check for some misformed sequences */
350
		if (((ch == 0xC2) && (s[0] < 0xA0)) ||
351
		    ((ch == 0xEF) && (s[0] == 0xBF) && (s[1] > 0xBD)))
352
			/* XXX add more for outside the BMP */
353
			return 0;
354
 
355
		/* Check the continuation bytes. */
356
		if      (ch < 0xE0) i = 1;
357
		else if (ch < 0xF0) i = 2;
358
		else if (ch < 0xF8)	i = 3;
359
		else if (ch < 0xFC)	i = 4;
360
		else
361
			i = 5;
362
 
363
		while (i--)
364
			if ((*s++ & 0xC0) != 0x80)
365
				return 0;
366
	}
367
 
368
	/* If no check failed, the string indeed looks like valid UTF-8. */
369
	return 1;
370
}
371
 
372
/* The main conversion routine.
373
   ICY in CP-1252 (or UTF-8 alreay) to UTF-8 encoded string.
374
   If force is applied, it will always encode to UTF-8, without checking. */
375
char *
376
icy2utf8(const char *src, int force)
377
{
378
	const uint8_t *s = (const uint8_t *)src;
379
	size_t srclen, dstlen, i, k;
380
	uint8_t ch, *d;
381
	char *dst;
382
 
383
	/* Some funny streams from Apple/iTunes give ICY info in UTF-8 already.
384
	   So, be prepared and don't try to re-encode such. Unless forced. */
385
	if(!force && is_utf8(src)) return (strdup(src));
386
 
387
	srclen = strlen(src) + 1;
388
	/* allocate conservatively */
389
	if ((d = malloc(srclen * 3)) == NULL)
390
		return (NULL);
391
 
392
	i = 0;
393
	dstlen = 0;
394
	while (i < srclen) {
395
		ch = s[i++];
396
		k = tblofs[ch];
397
		while (k < tblofs[ch + 1])
398
			d[dstlen++] = cp1252_utf8[k++];
399
	}
400
 
401
	/* dstlen includes trailing NUL since srclen also does */
402
	if ((dst = realloc(d, dstlen)) == NULL) {
403
		free(d);
404
		return (NULL);
405
	}
406
	return (dst);
407
}
408
 
409
/* This stuff is for testing only. */
410
#ifdef TEST
411
static const char intext[] = "\225 Gr\374\337e kosten 0,55 \200\205";
412
 
413
#include 
414
 
415
int
416
main(void)
417
{
418
	char *t, *t2;
419
 
420
	if ((t = icy2utf8(intext, 0)) == NULL) {
421
		fprintf(stderr, "out of memory\n");
422
		return (1);
423
	}
424
 
425
	/* make sure it won't be converted twice */
426
	if ((t2 = icy2utf8(t), 0) == NULL) {
427
		fprintf(stderr, "out of memory\n");
428
		return (1);
429
	}
430
 
431
	printf("Result is:\t\343\200\214%s\343\200\215\n"
432
		"\t\t\343\200\214%s\343\200\215\n", t, t2);
433
 
434
	free(t);
435
	free(t2);
436
	return (0);
437
}
438
#endif