Rev 1905 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
1905 | serge | 1 | /* |
2 | id3: ID3v2.3 and ID3v2.4 parsing (a relevant subset) |
||
3 | |||
4 | copyright 2006-2008 by the mpg123 project - free software under the terms of the LGPL 2.1 |
||
5 | see COPYING and AUTHORS files in distribution or http://mpg123.org |
||
6 | initially written by Thomas Orgis |
||
7 | */ |
||
8 | |||
9 | #include "mpg123lib_intern.h" |
||
10 | #include "id3.h" |
||
11 | #include "debug.h" |
||
12 | |||
13 | #ifndef NO_ID3V2 /* Only the main parsing routine will always be there. */ |
||
14 | |||
15 | /* We know the usual text frames plus some specifics. */ |
||
16 | #define KNOWN_FRAMES 4 |
||
17 | static const char frame_type[KNOWN_FRAMES][5] = { "COMM", "TXXX", "RVA2", "USLT" }; |
||
18 | enum frame_types { unknown = -2, text = -1, comment, extra, rva2, uslt }; |
||
19 | |||
20 | /* UTF support definitions */ |
||
21 | |||
22 | typedef void (*text_converter)(mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet); |
||
23 | |||
24 | static void convert_latin1 (mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet); |
||
25 | static void convert_utf16bom(mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet); |
||
26 | static void convert_utf8 (mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet); |
||
27 | |||
28 | static const text_converter text_converters[4] = |
||
29 | { |
||
30 | convert_latin1, |
||
31 | /* We always check for (multiple) BOM in 16bit unicode. Without BOM, UTF16 BE is the default. |
||
32 | Errors in encoding are detected anyway. */ |
||
33 | convert_utf16bom, |
||
34 | convert_utf16bom, |
||
35 | convert_utf8 |
||
36 | }; |
||
37 | |||
3960 | Serge | 38 | static const unsigned int encoding_widths[4] = { 1, 2, 2, 1 }; |
1905 | serge | 39 | |
40 | /* the code starts here... */ |
||
41 | |||
42 | static void null_id3_links(mpg123_handle *fr) |
||
43 | { |
||
44 | fr->id3v2.title = NULL; |
||
45 | fr->id3v2.artist = NULL; |
||
46 | fr->id3v2.album = NULL; |
||
47 | fr->id3v2.year = NULL; |
||
48 | fr->id3v2.genre = NULL; |
||
49 | fr->id3v2.comment = NULL; |
||
50 | } |
||
51 | |||
52 | void init_id3(mpg123_handle *fr) |
||
53 | { |
||
54 | fr->id3v2.version = 0; /* nothing there */ |
||
55 | null_id3_links(fr); |
||
56 | fr->id3v2.comments = 0; |
||
57 | fr->id3v2.comment_list = NULL; |
||
58 | fr->id3v2.texts = 0; |
||
59 | fr->id3v2.text = NULL; |
||
60 | fr->id3v2.extras = 0; |
||
61 | fr->id3v2.extra = NULL; |
||
62 | } |
||
63 | |||
64 | /* Managing of the text, comment and extra lists. */ |
||
65 | |||
66 | /* Initialize one element. */ |
||
67 | static void init_mpg123_text(mpg123_text *txt) |
||
68 | { |
||
69 | mpg123_init_string(&txt->text); |
||
70 | mpg123_init_string(&txt->description); |
||
71 | txt->id[0] = 0; |
||
72 | txt->id[1] = 0; |
||
73 | txt->id[2] = 0; |
||
74 | txt->id[3] = 0; |
||
75 | txt->lang[0] = 0; |
||
76 | txt->lang[1] = 0; |
||
77 | txt->lang[2] = 0; |
||
78 | } |
||
79 | |||
80 | /* Free memory of one element. */ |
||
81 | static void free_mpg123_text(mpg123_text *txt) |
||
82 | { |
||
83 | mpg123_free_string(&txt->text); |
||
84 | mpg123_free_string(&txt->description); |
||
85 | } |
||
86 | |||
87 | /* Free memory of whole list. */ |
||
88 | #define free_comment(mh) free_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments)) |
||
89 | #define free_text(mh) free_id3_text(&((mh)->id3v2.text), &((mh)->id3v2.texts)) |
||
90 | #define free_extra(mh) free_id3_text(&((mh)->id3v2.extra), &((mh)->id3v2.extras)) |
||
91 | static void free_id3_text(mpg123_text **list, size_t *size) |
||
92 | { |
||
93 | size_t i; |
||
94 | for(i=0; i<*size; ++i) free_mpg123_text(&((*list)[i])); |
||
95 | |||
96 | free(*list); |
||
97 | *list = NULL; |
||
98 | *size = 0; |
||
99 | } |
||
100 | |||
101 | /* Add items to the list. */ |
||
102 | #define add_comment(mh) add_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments)) |
||
103 | #define add_text(mh) add_id3_text(&((mh)->id3v2.text), &((mh)->id3v2.texts)) |
||
104 | #define add_extra(mh) add_id3_text(&((mh)->id3v2.extra), &((mh)->id3v2.extras)) |
||
105 | static mpg123_text *add_id3_text(mpg123_text **list, size_t *size) |
||
106 | { |
||
107 | mpg123_text *x = safe_realloc(*list, sizeof(mpg123_text)*(*size+1)); |
||
108 | if(x == NULL) return NULL; /* bad */ |
||
109 | |||
110 | *list = x; |
||
111 | *size += 1; |
||
112 | init_mpg123_text(&((*list)[*size-1])); |
||
113 | |||
114 | return &((*list)[*size-1]); /* Return pointer to the added text. */ |
||
115 | } |
||
116 | |||
117 | /* Remove the last item. */ |
||
118 | #define pop_comment(mh) pop_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments)) |
||
119 | #define pop_text(mh) pop_id3_text(&((mh)->id3v2.text), &((mh)->id3v2.texts)) |
||
120 | #define pop_extra(mh) pop_id3_text(&((mh)->id3v2.extra), &((mh)->id3v2.extras)) |
||
121 | static void pop_id3_text(mpg123_text **list, size_t *size) |
||
122 | { |
||
123 | mpg123_text *x; |
||
124 | if(*size < 1) return; |
||
125 | |||
126 | free_mpg123_text(&((*list)[*size-1])); |
||
127 | if(*size > 1) |
||
128 | { |
||
129 | x = safe_realloc(*list, sizeof(mpg123_text)*(*size-1)); |
||
130 | if(x != NULL){ *list = x; *size -= 1; } |
||
131 | } |
||
132 | else |
||
133 | { |
||
134 | free(*list); |
||
135 | *list = NULL; |
||
136 | *size = 0; |
||
137 | } |
||
138 | } |
||
139 | |||
140 | /* OK, back t the higher level functions. */ |
||
141 | |||
142 | void exit_id3(mpg123_handle *fr) |
||
143 | { |
||
144 | free_comment(fr); |
||
145 | free_extra(fr); |
||
146 | free_text(fr); |
||
147 | } |
||
148 | |||
149 | void reset_id3(mpg123_handle *fr) |
||
150 | { |
||
151 | exit_id3(fr); |
||
152 | init_id3(fr); |
||
153 | } |
||
154 | |||
155 | /* Set the id3v2.artist id3v2.title ... links to elements of the array. */ |
||
156 | void id3_link(mpg123_handle *fr) |
||
157 | { |
||
158 | size_t i; |
||
159 | mpg123_id3v2 *v2 = &fr->id3v2; |
||
160 | debug("linking ID3v2"); |
||
161 | null_id3_links(fr); |
||
162 | for(i=0; i |
||
163 | { |
||
164 | mpg123_text *entry = &v2->text[i]; |
||
165 | if (!strncmp("TIT2", entry->id, 4)) v2->title = &entry->text; |
||
166 | else if(!strncmp("TALB", entry->id, 4)) v2->album = &entry->text; |
||
167 | else if(!strncmp("TPE1", entry->id, 4)) v2->artist = &entry->text; |
||
168 | else if(!strncmp("TYER", entry->id, 4)) v2->year = &entry->text; |
||
169 | else if(!strncmp("TCON", entry->id, 4)) v2->genre = &entry->text; |
||
170 | } |
||
171 | for(i=0; i |
||
172 | { |
||
173 | mpg123_text *entry = &v2->comment_list[i]; |
||
174 | if(entry->description.fill == 0 || entry->description.p[0] == 0) |
||
175 | v2->comment = &entry->text; |
||
176 | } |
||
177 | /* When no generic comment found, use the last non-generic one. */ |
||
178 | if(v2->comment == NULL && v2->comments > 0) |
||
179 | v2->comment = &v2->comment_list[v2->comments-1].text; |
||
180 | } |
||
181 | |||
182 | /* |
||
183 | Store ID3 text data in an mpg123_string; either verbatim copy or everything translated to UTF-8 encoding. |
||
184 | Preserve the zero string separator (I don't need strlen for the total size). |
||
185 | |||
186 | ID3v2 standard says that there should be one text frame of specific type per tag, and subsequent tags overwrite old values. |
||
187 | So, I always replace the text that may be stored already (perhaps with a list of zero-separated strings, though). |
||
188 | */ |
||
3960 | Serge | 189 | static void store_id3_text(mpg123_string *sb, char *source, size_t source_size, const int noquiet, const int notranslate) |
1905 | serge | 190 | { |
191 | if(!source_size) |
||
192 | { |
||
193 | debug("Empty id3 data!"); |
||
194 | return; |
||
195 | } |
||
196 | |||
197 | /* We shall just copy the data. Client wants to decode itself. */ |
||
198 | if(notranslate) |
||
199 | { |
||
200 | /* Future: Add a path for ID3 errors. */ |
||
201 | if(!mpg123_resize_string(sb, source_size)) |
||
202 | { |
||
203 | if(noquiet) error("Cannot resize target string, out of memory?"); |
||
204 | return; |
||
205 | } |
||
206 | memcpy(sb->p, source, source_size); |
||
207 | sb->fill = source_size; |
||
208 | debug1("stored undecoded ID3 text of size %"SIZE_P, (size_p)source_size); |
||
209 | return; |
||
210 | } |
||
211 | |||
212 | id3_to_utf8(sb, ((unsigned char *)source)[0], (unsigned char*)source+1, source_size-1, noquiet); |
||
213 | |||
214 | if(sb->fill) debug1("UTF-8 string (the first one): %s", sb->p); |
||
215 | else if(noquiet) error("unable to convert string to UTF-8 (out of memory, junk input?)!"); |
||
216 | } |
||
217 | |||
218 | /* On error, sb->size is 0. */ |
||
219 | void id3_to_utf8(mpg123_string *sb, unsigned char encoding, const unsigned char *source, size_t source_size, int noquiet) |
||
220 | { |
||
221 | unsigned int bwidth; |
||
222 | debug1("encoding: %u", encoding); |
||
223 | /* A note: ID3v2.3 uses UCS-2 non-variable 16bit encoding, v2.4 uses UTF16. |
||
224 | UTF-16 uses a reserved/private range in UCS-2 to add the magic, so we just always treat it as UTF. */ |
||
225 | if(encoding > mpg123_id3_enc_max) |
||
226 | { |
||
227 | if(noquiet) error1("Unknown text encoding %u, I take no chances, sorry!", encoding); |
||
228 | |||
229 | mpg123_free_string(sb); |
||
230 | return; |
||
231 | } |
||
232 | bwidth = encoding_widths[encoding]; |
||
233 | /* Hack! I've seen a stray zero byte before BOM. Is that supposed to happen? */ |
||
234 | if(encoding != mpg123_id3_utf16be) /* UTF16be _can_ beging with a null byte! */ |
||
235 | while(source_size > bwidth && source[0] == 0) |
||
236 | { |
||
237 | --source_size; |
||
238 | ++source; |
||
239 | debug("skipped leading zero"); |
||
240 | } |
||
241 | if(source_size % bwidth) |
||
242 | { |
||
243 | /* When we need two bytes for a character, it's strange to have an uneven bytestream length. */ |
||
244 | if(noquiet) warning2("Weird tag size %d for encoding %u - I will probably trim too early or something but I think the MP3 is broken.", (int)source_size, encoding); |
||
245 | source_size -= source_size % bwidth; |
||
246 | } |
||
247 | text_converters[encoding](sb, source, source_size, noquiet); |
||
248 | } |
||
249 | |||
3960 | Serge | 250 | static char *next_text(char* prev, int encoding, size_t limit) |
1905 | serge | 251 | { |
252 | char *text = prev; |
||
253 | size_t width = encoding_widths[encoding]; |
||
254 | |||
255 | /* So I go lengths to find zero or double zero... |
||
256 | Remember bug 2834636: Only check for aligned NULLs! */ |
||
257 | while(text-prev < (ssize_t)limit) |
||
258 | { |
||
259 | if(text[0] == 0) |
||
260 | { |
||
261 | if(width <= limit-(text-prev)) |
||
262 | { |
||
263 | size_t i = 1; |
||
264 | for(; i |
||
265 | |||
266 | if(i == width) /* found a null wide enough! */ |
||
267 | { |
||
268 | text += width; |
||
269 | break; |
||
270 | } |
||
271 | } |
||
272 | else return NULL; /* No full character left? This text is broken */ |
||
273 | } |
||
274 | |||
275 | text += width; |
||
276 | } |
||
3960 | Serge | 277 | if((size_t)(text-prev) >= limit) text = NULL; |
1905 | serge | 278 | |
279 | return text; |
||
280 | } |
||
281 | |||
282 | static const char *enc_name(int enc) |
||
283 | { |
||
284 | switch(enc) |
||
285 | { |
||
286 | case 0: return "Latin 1"; |
||
287 | case 1: return "UTF-16 BOM"; |
||
288 | case 2: return "UTF-16 BE"; |
||
289 | case 3: return "UTF-8"; |
||
290 | default: return "unknown!"; |
||
291 | } |
||
292 | } |
||
293 | |||
294 | static void process_text(mpg123_handle *fr, char *realdata, size_t realsize, char *id) |
||
295 | { |
||
296 | /* Text encoding $xx */ |
||
297 | /* The text (encoded) ... */ |
||
298 | mpg123_text *t = add_text(fr); |
||
299 | if(VERBOSE4) fprintf(stderr, "Note: Storing text from %s encoding\n", enc_name(realdata[0])); |
||
300 | if(t == NULL) |
||
301 | { |
||
302 | if(NOQUIET) error("Unable to attach new text!"); |
||
303 | return; |
||
304 | } |
||
305 | memcpy(t->id, id, 4); |
||
306 | store_id3_text(&t->text, realdata, realsize, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT); |
||
307 | if(VERBOSE4) fprintf(stderr, "Note: ID3v2 %c%c%c%c text frame: %s\n", id[0], id[1], id[2], id[3], t->text.p); |
||
308 | } |
||
309 | |||
310 | /* Store a new comment that perhaps is a RVA / RVA_ALBUM/AUDIOPHILE / RVA_MIX/RADIO one |
||
311 | Special gimmik: It also stores USLT to the texts. Stucture is the same as for comments. */ |
||
312 | static void process_comment(mpg123_handle *fr, enum frame_types tt, char *realdata, size_t realsize, int rva_level, char *id) |
||
313 | { |
||
314 | /* Text encoding $xx */ |
||
315 | /* Language $xx xx xx */ |
||
316 | /* Short description (encoded!) |
||
317 | /* Then the comment text (encoded) ... */ |
||
318 | char encoding = realdata[0]; |
||
319 | char *lang = realdata+1; /* I'll only use the 3 bytes! */ |
||
320 | char *descr = realdata+4; |
||
321 | char *text = NULL; |
||
322 | mpg123_text *xcom = NULL; |
||
323 | mpg123_text localcom; /* UTF-8 variant for local processing. */ |
||
324 | |||
325 | if((int)realsize < descr-realdata) |
||
326 | { |
||
327 | if(NOQUIET) error1("Invalid frame size of %lu (too small for anything).", (unsigned long)realsize); |
||
328 | return; |
||
329 | } |
||
330 | xcom = (tt == uslt ? add_text(fr) : add_comment(fr)); |
||
331 | if(VERBOSE4) fprintf(stderr, "Note: Storing comment from %s encoding\n", enc_name(realdata[0])); |
||
332 | if(xcom == NULL) |
||
333 | { |
||
334 | if(NOQUIET) error("Unable to attach new comment!"); |
||
335 | return; |
||
336 | } |
||
337 | memcpy(xcom->lang, lang, 3); |
||
338 | memcpy(xcom->id, id, 4); |
||
339 | /* Now I can abuse a byte from lang for the encoding. */ |
||
340 | descr[-1] = encoding; |
||
341 | /* Be careful with finding the end of description, I have to honor encoding here. */ |
||
342 | text = next_text(descr, encoding, realsize-(descr-realdata)); |
||
343 | if(text == NULL) |
||
344 | { |
||
345 | if(NOQUIET) error("No comment text / valid description?"); |
||
346 | pop_comment(fr); |
||
347 | return; |
||
348 | } |
||
349 | |||
350 | init_mpg123_text(&localcom); |
||
351 | /* Store the text, without translation to UTF-8, but for comments always a local copy in UTF-8. |
||
352 | Reminder: No bailing out from here on without freeing the local comment data! */ |
||
353 | store_id3_text(&xcom->description, descr-1, text-descr+1, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT); |
||
354 | if(tt == comment) |
||
3960 | Serge | 355 | store_id3_text(&localcom.description, descr-1, text-descr+1, NOQUIET, 0); |
1905 | serge | 356 | |
357 | text[-1] = encoding; /* Byte abusal for encoding... */ |
||
358 | store_id3_text(&xcom->text, text-1, realsize+1-(text-realdata), NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT); |
||
359 | /* Remember: I will probably decode the above (again) for rva comment checking. So no messing around, please. */ |
||
360 | |||
361 | if(VERBOSE4) /* Do _not_ print the verbatim text: The encoding might be funny! */ |
||
362 | { |
||
363 | fprintf(stderr, "Note: ID3 comm/uslt desc of length %"SIZE_P".\n", (size_p)xcom->description.fill); |
||
364 | fprintf(stderr, "Note: ID3 comm/uslt text of length %"SIZE_P".\n", (size_p)xcom->text.fill); |
||
365 | } |
||
366 | /* Look out for RVA info only when we really deal with a straight comment. */ |
||
367 | if(tt == comment && localcom.description.fill > 0) |
||
368 | { |
||
369 | int rva_mode = -1; /* mix / album */ |
||
370 | if( !strcasecmp(localcom.description.p, "rva") |
||
371 | || !strcasecmp(localcom.description.p, "rva_mix") |
||
372 | || !strcasecmp(localcom.description.p, "rva_track") |
||
373 | || !strcasecmp(localcom.description.p, "rva_radio") ) |
||
374 | rva_mode = 0; |
||
375 | else if( !strcasecmp(localcom.description.p, "rva_album") |
||
376 | || !strcasecmp(localcom.description.p, "rva_audiophile") |
||
377 | || !strcasecmp(localcom.description.p, "rva_user") ) |
||
378 | rva_mode = 1; |
||
379 | if((rva_mode > -1) && (fr->rva.level[rva_mode] <= rva_level)) |
||
380 | { |
||
381 | /* Only translate the contents in here where we really need them. */ |
||
3960 | Serge | 382 | store_id3_text(&localcom.text, text-1, realsize+1-(text-realdata), NOQUIET, 0); |
1905 | serge | 383 | if(localcom.text.fill > 0) |
384 | { |
||
385 | fr->rva.gain[rva_mode] = (float) atof(localcom.text.p); |
||
386 | if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]); |
||
387 | fr->rva.peak[rva_mode] = 0; |
||
388 | fr->rva.level[rva_mode] = rva_level; |
||
389 | } |
||
390 | } |
||
391 | } |
||
392 | /* Make sure to free the local memory... */ |
||
393 | free_mpg123_text(&localcom); |
||
394 | } |
||
395 | |||
3960 | Serge | 396 | static void process_extra(mpg123_handle *fr, char* realdata, size_t realsize, int rva_level, char *id) |
1905 | serge | 397 | { |
398 | /* Text encoding $xx */ |
||
399 | /* Description ... $00 (00) */ |
||
400 | /* Text ... */ |
||
401 | char encoding = realdata[0]; |
||
402 | char *descr = realdata+1; /* remember, the encoding is descr[-1] */ |
||
403 | char *text; |
||
404 | mpg123_text *xex; |
||
405 | mpg123_text localex; |
||
406 | |||
407 | if((int)realsize < descr-realdata) |
||
408 | { |
||
409 | if(NOQUIET) error1("Invalid frame size of %lu (too small for anything).", (unsigned long)realsize); |
||
410 | return; |
||
411 | } |
||
412 | text = next_text(descr, encoding, realsize-(descr-realdata)); |
||
413 | if(VERBOSE4) fprintf(stderr, "Note: Storing extra from %s encoding\n", enc_name(realdata[0])); |
||
414 | if(text == NULL) |
||
415 | { |
||
416 | if(NOQUIET) error("No extra frame text / valid description?"); |
||
417 | return; |
||
418 | } |
||
419 | xex = add_extra(fr); |
||
420 | if(xex == NULL) |
||
421 | { |
||
422 | if(NOQUIET) error("Unable to attach new extra text!"); |
||
423 | return; |
||
424 | } |
||
425 | memcpy(xex->id, id, 4); |
||
426 | init_mpg123_text(&localex); /* For our local copy. */ |
||
3960 | Serge | 427 | |
428 | /* The outside storage gets reencoded to UTF-8 only if not requested otherwise. |
||
429 | Remember that we really need the -1 here to hand in the encoding byte!*/ |
||
1905 | serge | 430 | store_id3_text(&xex->description, descr-1, text-descr+1, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT); |
3960 | Serge | 431 | /* Our local copy is always stored in UTF-8! */ |
432 | store_id3_text(&localex.description, descr-1, text-descr+1, NOQUIET, 0); |
||
433 | /* At first, only store the outside copy of the payload. We may not need the local copy. */ |
||
1905 | serge | 434 | text[-1] = encoding; |
435 | store_id3_text(&xex->text, text-1, realsize-(text-realdata)+1, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT); |
||
3960 | Serge | 436 | |
1905 | serge | 437 | /* Now check if we would like to interpret this extra info for RVA. */ |
438 | if(localex.description.fill > 0) |
||
439 | { |
||
440 | int is_peak = 0; |
||
441 | int rva_mode = -1; /* mix / album */ |
||
442 | |||
443 | if(!strncasecmp(localex.description.p, "replaygain_track_",17)) |
||
444 | { |
||
445 | if(VERBOSE3) fprintf(stderr, "Note: RVA ReplayGain track gain/peak\n"); |
||
446 | |||
447 | rva_mode = 0; |
||
448 | if(!strcasecmp(localex.description.p, "replaygain_track_peak")) is_peak = 1; |
||
449 | else if(strcasecmp(localex.description.p, "replaygain_track_gain")) rva_mode = -1; |
||
450 | } |
||
451 | else |
||
452 | if(!strncasecmp(localex.description.p, "replaygain_album_",17)) |
||
453 | { |
||
454 | if(VERBOSE3) fprintf(stderr, "Note: RVA ReplayGain album gain/peak\n"); |
||
455 | |||
456 | rva_mode = 1; |
||
457 | if(!strcasecmp(localex.description.p, "replaygain_album_peak")) is_peak = 1; |
||
458 | else if(strcasecmp(localex.description.p, "replaygain_album_gain")) rva_mode = -1; |
||
459 | } |
||
460 | if((rva_mode > -1) && (fr->rva.level[rva_mode] <= rva_level)) |
||
461 | { |
||
462 | /* Now we need the translated copy of the data. */ |
||
3960 | Serge | 463 | store_id3_text(&localex.text, text-1, realsize-(text-realdata)+1, NOQUIET, 0); |
1905 | serge | 464 | if(localex.text.fill > 0) |
465 | { |
||
466 | if(is_peak) |
||
467 | { |
||
468 | fr->rva.peak[rva_mode] = (float) atof(localex.text.p); |
||
469 | if(VERBOSE3) fprintf(stderr, "Note: RVA peak %f\n", fr->rva.peak[rva_mode]); |
||
470 | } |
||
471 | else |
||
472 | { |
||
473 | fr->rva.gain[rva_mode] = (float) atof(localex.text.p); |
||
474 | if(VERBOSE3) fprintf(stderr, "Note: RVA gain %fdB\n", fr->rva.gain[rva_mode]); |
||
475 | } |
||
476 | fr->rva.level[rva_mode] = rva_level; |
||
477 | } |
||
478 | } |
||
479 | } |
||
480 | |||
481 | free_mpg123_text(&localex); |
||
482 | } |
||
483 | |||
484 | /* Make a ID3v2.3+ 4-byte ID from a ID3v2.2 3-byte ID |
||
485 | Note that not all frames survived to 2.4; the mapping goes to 2.3 . |
||
486 | A notable miss is the old RVA frame, which is very unspecific anyway. |
||
487 | This function returns -1 when a not known 3 char ID was encountered, 0 otherwise. */ |
||
3960 | Serge | 488 | static int promote_framename(mpg123_handle *fr, char *id) /* fr because of VERBOSE macros */ |
1905 | serge | 489 | { |
490 | size_t i; |
||
491 | char *old[] = |
||
492 | { |
||
493 | "COM", "TAL", "TBP", "TCM", "TCO", "TCR", "TDA", "TDY", "TEN", "TFT", |
||
494 | "TIM", "TKE", "TLA", "TLE", "TMT", "TOA", "TOF", "TOL", "TOR", "TOT", |
||
495 | "TP1", "TP2", "TP3", "TP4", "TPA", "TPB", "TRC", "TDA", "TRK", "TSI", |
||
496 | "TSS", "TT1", "TT2", "TT3", "TXT", "TXX", "TYE" |
||
497 | }; |
||
498 | char *new[] = |
||
499 | { |
||
500 | "COMM", "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDAT", "TDLY", "TENC", "TFLT", |
||
501 | "TIME", "TKEY", "TLAN", "TLEN", "TMED", "TOPE", "TOFN", "TOLY", "TORY", "TOAL", |
||
502 | "TPE1", "TPE2", "TPE3", "TPE4", "TPOS", "TPUB", "TSRC", "TRDA", "TRCK", "TSIZ", |
||
503 | "TSSE", "TIT1", "TIT2", "TIT3", "TEXT", "TXXX", "TYER" |
||
504 | }; |
||
505 | for(i=0; i |
||
506 | { |
||
507 | if(!strncmp(id, old[i], 3)) |
||
508 | { |
||
509 | memcpy(id, new[i], 4); |
||
510 | if(VERBOSE3) fprintf(stderr, "Translated ID3v2.2 frame %s to %s\n", old[i], new[i]); |
||
511 | return 0; |
||
512 | } |
||
513 | } |
||
514 | if(VERBOSE3) fprintf(stderr, "Ignoring untranslated ID3v2.2 frame %c%c%c\n", id[0], id[1], id[2]); |
||
515 | return -1; |
||
516 | } |
||
517 | |||
518 | #endif /* NO_ID3V2 */ |
||
519 | |||
520 | /* |
||
521 | trying to parse ID3v2.3 and ID3v2.4 tags... |
||
522 | |||
523 | returns: 0: bad or just unparseable tag |
||
524 | 1: good, (possibly) new tag info |
||
525 | <0: reader error (may need more data feed, try again) |
||
526 | */ |
||
527 | int parse_new_id3(mpg123_handle *fr, unsigned long first4bytes) |
||
528 | { |
||
529 | #define UNSYNC_FLAG 128 |
||
530 | #define EXTHEAD_FLAG 64 |
||
531 | #define EXP_FLAG 32 |
||
532 | #define FOOTER_FLAG 16 |
||
533 | #define UNKNOWN_FLAGS 15 /* 00001111*/ |
||
534 | unsigned char buf[6]; |
||
535 | unsigned long length=0; |
||
536 | unsigned char flags = 0; |
||
537 | int ret = 1; |
||
538 | int ret2; |
||
539 | unsigned char major = first4bytes & 0xff; |
||
540 | debug1("ID3v2: major tag version: %i", major); |
||
541 | if(major == 0xff) return 0; /* Invalid... */ |
||
542 | if((ret2 = fr->rd->read_frame_body(fr, buf, 6)) < 0) /* read more header information */ |
||
543 | return ret2; |
||
544 | |||
545 | if(buf[0] == 0xff) return 0; /* Revision, will never be 0xff. */ |
||
546 | |||
547 | /* second new byte are some nice flags, if these are invalid skip the whole thing */ |
||
548 | flags = buf[1]; |
||
549 | debug1("ID3v2: flags 0x%08x", flags); |
||
550 | /* use 4 bytes from buf to construct 28bit uint value and return 1; return 0 if bytes are not synchsafe */ |
||
551 | #define synchsafe_to_long(buf,res) \ |
||
552 | ( \ |
||
553 | (((buf)[0]|(buf)[1]|(buf)[2]|(buf)[3]) & 0x80) ? 0 : \ |
||
554 | (res = (((unsigned long) (buf)[0]) << 21) \ |
||
555 | | (((unsigned long) (buf)[1]) << 14) \ |
||
556 | | (((unsigned long) (buf)[2]) << 7) \ |
||
557 | | ((unsigned long) (buf)[3]) \ |
||
558 | ,1) \ |
||
559 | ) |
||
560 | /* id3v2.3 does not store synchsafe frame sizes, but synchsafe tag size - doh! */ |
||
561 | #define bytes_to_long(buf,res) \ |
||
562 | ( \ |
||
563 | major == 3 ? \ |
||
564 | (res = (((unsigned long) (buf)[0]) << 24) \ |
||
565 | | (((unsigned long) (buf)[1]) << 16) \ |
||
566 | | (((unsigned long) (buf)[2]) << 8) \ |
||
567 | | ((unsigned long) (buf)[3]) \ |
||
568 | ,1) : synchsafe_to_long(buf,res) \ |
||
569 | ) |
||
570 | /* for id3v2.2 only */ |
||
571 | #define threebytes_to_long(buf,res) \ |
||
572 | ( \ |
||
573 | res = (((unsigned long) (buf)[0]) << 16) \ |
||
574 | | (((unsigned long) (buf)[1]) << 8) \ |
||
575 | | ((unsigned long) (buf)[2]) \ |
||
576 | ,1 \ |
||
577 | ) |
||
578 | |||
579 | /* length-10 or length-20 (footer present); 4 synchsafe integers == 28 bit number */ |
||
580 | /* we have already read 10 bytes, so left are length or length+10 bytes belonging to tag */ |
||
581 | if(!synchsafe_to_long(buf+2,length)) |
||
582 | { |
||
583 | if(NOQUIET) error4("Bad tag length (not synchsafe): 0x%02x%02x%02x%02x; You got a bad ID3 tag here.", buf[2],buf[3],buf[4],buf[5]); |
||
584 | return 0; |
||
585 | } |
||
586 | debug1("ID3v2: tag data length %lu", length); |
||
587 | #ifndef NO_ID3V2 |
||
588 | if(VERBOSE2) fprintf(stderr,"Note: ID3v2.%i rev %i tag of %lu bytes\n", major, buf[0], length); |
||
589 | /* skip if unknown version/scary flags, parse otherwise */ |
||
3960 | Serge | 590 | if(fr->p.flags & MPG123_SKIP_ID3V2 || ((flags & UNKNOWN_FLAGS) || (major > 4) || (major < 2))) |
1905 | serge | 591 | { |
3960 | Serge | 592 | if(NOQUIET) |
593 | { |
||
594 | if(fr->p.flags & MPG123_SKIP_ID3V2) |
||
595 | { |
||
596 | if(VERBOSE3) fprintf(stderr, "Note: Skipping ID3v2 tag per user request.\n"); |
||
597 | } |
||
598 | else /* Must be because of scary Tag properties. */ |
||
599 | warning2("ID3v2: Won't parse the ID3v2 tag with major version %u and flags 0x%xu - some extra code may be needed", major, flags); |
||
600 | } |
||
1905 | serge | 601 | #endif |
602 | if((ret2 = fr->rd->skip_bytes(fr,length)) < 0) /* will not store data in backbuff! */ |
||
603 | ret = ret2; |
||
604 | #ifndef NO_ID3V2 |
||
605 | } |
||
606 | else |
||
607 | { |
||
3960 | Serge | 608 | unsigned char* tagdata = NULL; |
1905 | serge | 609 | fr->id3v2.version = major; |
610 | /* try to interpret that beast */ |
||
611 | if((tagdata = (unsigned char*) malloc(length+1)) != NULL) |
||
612 | { |
||
613 | debug("ID3v2: analysing frames..."); |
||
614 | if((ret2 = fr->rd->read_frame_body(fr,tagdata,length)) > 0) |
||
615 | { |
||
616 | unsigned long tagpos = 0; |
||
617 | debug1("ID3v2: have read at all %lu bytes for the tag now", (unsigned long)length+6); |
||
618 | /* going to apply strlen for strings inside frames, make sure that it doesn't overflow! */ |
||
619 | tagdata[length] = 0; |
||
620 | if(flags & EXTHEAD_FLAG) |
||
621 | { |
||
622 | debug("ID3v2: skipping extended header"); |
||
623 | if(!bytes_to_long(tagdata, tagpos)) |
||
624 | { |
||
625 | ret = 0; |
||
626 | if(NOQUIET) error4("Bad (non-synchsafe) tag offset: 0x%02x%02x%02x%02x", tagdata[0], tagdata[1], tagdata[2], tagdata[3]); |
||
627 | } |
||
628 | } |
||
629 | if(ret > 0) |
||
630 | { |
||
631 | char id[5]; |
||
632 | unsigned long framesize; |
||
633 | unsigned long fflags; /* need 16 bits, actually */ |
||
634 | id[4] = 0; |
||
635 | /* pos now advanced after ext head, now a frame has to follow */ |
||
636 | while(tagpos < length-10) /* I want to read at least a full header */ |
||
637 | { |
||
638 | int i = 0; |
||
639 | unsigned long pos = tagpos; |
||
640 | int head_part = fr->id3v2.version == 2 ? 3 : 4; /* bytes of frame title and of framesize value */ |
||
641 | /* level 1,2,3 - 0 is info from lame/info tag! */ |
||
642 | /* rva tags with ascending significance, then general frames */ |
||
643 | enum frame_types tt = unknown; |
||
644 | /* we may have entered the padding zone or any other strangeness: check if we have valid frame id characters */ |
||
645 | for(i=0; i< head_part; ++i) |
||
646 | if( !( ((tagdata[tagpos+i] > 47) && (tagdata[tagpos+i] < 58)) |
||
647 | || ((tagdata[tagpos+i] > 64) && (tagdata[tagpos+i] < 91)) ) ) |
||
648 | { |
||
649 | debug5("ID3v2: real tag data apparently ended after %lu bytes with 0x%02x%02x%02x%02x", tagpos, tagdata[tagpos], tagdata[tagpos+1], tagdata[tagpos+2], tagdata[tagpos+3]); |
||
650 | /* This is no hard error... let's just hope that we got something meaningful already (ret==1 in that case). */ |
||
651 | goto tagparse_cleanup; /* Need to escape two loops here. */ |
||
652 | } |
||
653 | if(ret > 0) |
||
654 | { |
||
655 | /* 4 or 3 bytes id */ |
||
656 | strncpy(id, (char*) tagdata+pos, head_part); |
||
3960 | Serge | 657 | id[head_part] = 0; /* terminate for 3 or 4 bytes */ |
1905 | serge | 658 | pos += head_part; |
659 | tagpos += head_part; |
||
660 | /* size as 32 bits or 28 bits */ |
||
661 | if(fr->id3v2.version == 2) threebytes_to_long(tagdata+pos, framesize); |
||
662 | else |
||
663 | if(!bytes_to_long(tagdata+pos, framesize)) |
||
664 | { |
||
665 | /* Just assume that up to now there was some good data. */ |
||
666 | if(NOQUIET) error1("ID3v2: non-syncsafe size of %s frame, skipping the remainder of tag", id); |
||
667 | break; |
||
668 | } |
||
669 | if(VERBOSE3) fprintf(stderr, "Note: ID3v2 %s frame of size %lu\n", id, framesize); |
||
670 | tagpos += head_part + framesize; /* the important advancement in whole tag */ |
||
671 | if(tagpos > length) |
||
672 | { |
||
673 | if(NOQUIET) error("Whoa! ID3v2 frame claims to be larger than the whole rest of the tag."); |
||
674 | break; |
||
675 | } |
||
676 | pos += head_part; |
||
677 | if(fr->id3v2.version > 2) |
||
678 | { |
||
679 | fflags = (((unsigned long) tagdata[pos]) << 8) | ((unsigned long) tagdata[pos+1]); |
||
680 | pos += 2; |
||
681 | tagpos += 2; |
||
682 | } |
||
683 | else fflags = 0; |
||
684 | /* for sanity, after full parsing tagpos should be == pos */ |
||
685 | /* debug4("ID3v2: found %s frame, size %lu (as bytes: 0x%08lx), flags 0x%016lx", id, framesize, framesize, fflags); */ |
||
686 | /* %0abc0000 %0h00kmnp */ |
||
687 | #define BAD_FFLAGS (unsigned long) 36784 |
||
688 | #define PRES_TAG_FFLAG 16384 |
||
689 | #define PRES_FILE_FFLAG 8192 |
||
690 | #define READ_ONLY_FFLAG 4096 |
||
691 | #define GROUP_FFLAG 64 |
||
692 | #define COMPR_FFLAG 8 |
||
693 | #define ENCR_FFLAG 4 |
||
694 | #define UNSYNC_FFLAG 2 |
||
695 | #define DATLEN_FFLAG 1 |
||
696 | if(head_part < 4 && promote_framename(fr, id) != 0) continue; |
||
697 | |||
698 | /* shall not or want not handle these */ |
||
699 | if(fflags & (BAD_FFLAGS | COMPR_FFLAG | ENCR_FFLAG)) |
||
700 | { |
||
701 | if(NOQUIET) warning("ID3v2: skipping invalid/unsupported frame"); |
||
702 | continue; |
||
703 | } |
||
704 | |||
705 | for(i = 0; i < KNOWN_FRAMES; ++i) |
||
706 | if(!strncmp(frame_type[i], id, 4)){ tt = i; break; } |
||
707 | |||
708 | if(id[0] == 'T' && tt != extra) tt = text; |
||
709 | |||
710 | if(tt != unknown) |
||
711 | { |
||
712 | int rva_mode = -1; /* mix / album */ |
||
713 | unsigned long realsize = framesize; |
||
714 | unsigned char* realdata = tagdata+pos; |
||
715 | if((flags & UNSYNC_FLAG) || (fflags & UNSYNC_FFLAG)) |
||
716 | { |
||
717 | unsigned long ipos = 0; |
||
718 | unsigned long opos = 0; |
||
719 | debug("Id3v2: going to de-unsync the frame data"); |
||
720 | /* de-unsync: FF00 -> FF; real FF00 is simply represented as FF0000 ... */ |
||
721 | /* damn, that means I have to delete bytes from withing the data block... thus need temporal storage */ |
||
722 | /* standard mandates that de-unsync should always be safe if flag is set */ |
||
723 | realdata = (unsigned char*) malloc(framesize); /* will need <= bytes */ |
||
724 | if(realdata == NULL) |
||
725 | { |
||
726 | if(NOQUIET) error("ID3v2: unable to allocate working buffer for de-unsync"); |
||
727 | continue; |
||
728 | } |
||
729 | /* now going byte per byte through the data... */ |
||
730 | realdata[0] = tagdata[pos]; |
||
731 | opos = 1; |
||
732 | for(ipos = pos+1; ipos < pos+framesize; ++ipos) |
||
733 | { |
||
734 | if(!((tagdata[ipos] == 0) && (tagdata[ipos-1] == 0xff))) |
||
735 | { |
||
736 | realdata[opos++] = tagdata[ipos]; |
||
737 | } |
||
738 | } |
||
739 | realsize = opos; |
||
740 | debug2("ID3v2: de-unsync made %lu out of %lu bytes", realsize, framesize); |
||
741 | } |
||
742 | pos = 0; /* now at the beginning again... */ |
||
743 | switch(tt) |
||
744 | { |
||
745 | case comment: |
||
746 | case uslt: |
||
747 | process_comment(fr, tt, (char*)realdata, realsize, comment+1, id); |
||
748 | break; |
||
749 | case extra: /* perhaps foobar2000's work */ |
||
750 | process_extra(fr, (char*)realdata, realsize, extra+1, id); |
||
751 | break; |
||
752 | case rva2: /* "the" RVA tag */ |
||
753 | { |
||
754 | /* starts with null-terminated identification */ |
||
755 | if(VERBOSE3) fprintf(stderr, "Note: RVA2 identification \"%s\"\n", realdata); |
||
756 | /* default: some individual value, mix mode */ |
||
757 | rva_mode = 0; |
||
758 | if( !strncasecmp((char*)realdata, "album", 5) |
||
759 | || !strncasecmp((char*)realdata, "audiophile", 10) |
||
760 | || !strncasecmp((char*)realdata, "user", 4)) |
||
761 | rva_mode = 1; |
||
762 | if(fr->rva.level[rva_mode] <= rva2+1) |
||
763 | { |
||
764 | pos += strlen((char*) realdata) + 1; |
||
765 | if(realdata[pos] == 1) |
||
766 | { |
||
767 | ++pos; |
||
768 | /* only handle master channel */ |
||
769 | debug("ID3v2: it is for the master channel"); |
||
770 | /* two bytes adjustment, one byte for bits representing peak - n bytes, eh bits, for peak */ |
||
771 | /* 16 bit signed integer = dB * 512 ... the double cast is needed to preserve the sign of negative values! */ |
||
772 | fr->rva.gain[rva_mode] = (float) ( (((short)((signed char)realdata[pos])) << 8) | realdata[pos+1] ) / 512; |
||
773 | pos += 2; |
||
774 | if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]); |
||
775 | /* heh, the peak value is represented by a number of bits - but in what manner? Skipping that part */ |
||
776 | fr->rva.peak[rva_mode] = 0; |
||
777 | fr->rva.level[rva_mode] = rva2+1; |
||
778 | } |
||
779 | } |
||
780 | } |
||
781 | break; |
||
782 | /* non-rva metainfo, simply store... */ |
||
783 | case text: |
||
784 | process_text(fr, (char*)realdata, realsize, id); |
||
785 | break; |
||
786 | default: if(NOQUIET) error1("ID3v2: unknown frame type %i", tt); |
||
787 | } |
||
788 | if((flags & UNSYNC_FLAG) || (fflags & UNSYNC_FFLAG)) free(realdata); |
||
789 | } |
||
790 | #undef BAD_FFLAGS |
||
791 | #undef PRES_TAG_FFLAG |
||
792 | #undef PRES_FILE_FFLAG |
||
793 | #undef READ_ONLY_FFLAG |
||
794 | #undef GROUP_FFLAG |
||
795 | #undef COMPR_FFLAG |
||
796 | #undef ENCR_FFLAG |
||
797 | #undef UNSYNC_FFLAG |
||
798 | #undef DATLEN_FFLAG |
||
799 | } |
||
800 | else break; |
||
801 | #undef KNOWN_FRAMES |
||
802 | } |
||
803 | } |
||
804 | } |
||
805 | else |
||
806 | { |
||
3960 | Serge | 807 | /* There are tags with zero length. Strictly not an error, then. */ |
808 | if(length > 0 && NOQUIET && ret2 != MPG123_NEED_MORE) error("ID3v2: Duh, not able to read ID3v2 tag data."); |
||
1905 | serge | 809 | ret = ret2; |
810 | } |
||
811 | tagparse_cleanup: |
||
812 | free(tagdata); |
||
813 | } |
||
814 | else |
||
815 | { |
||
816 | if(NOQUIET) error1("ID3v2: Arrg! Unable to allocate %lu bytes for interpreting ID3v2 data - trying to skip instead.", length); |
||
817 | if((ret2 = fr->rd->skip_bytes(fr,length)) < 0) ret = ret2; /* will not store data in backbuff! */ |
||
818 | else ret = 0; |
||
819 | } |
||
820 | } |
||
821 | #endif /* NO_ID3V2 */ |
||
822 | /* skip footer if present */ |
||
823 | if((ret > 0) && (flags & FOOTER_FLAG) && ((ret2 = fr->rd->skip_bytes(fr,length)) < 0)) ret = ret2; |
||
824 | |||
825 | return ret; |
||
826 | #undef UNSYNC_FLAG |
||
827 | #undef EXTHEAD_FLAG |
||
828 | #undef EXP_FLAG |
||
829 | #undef FOOTER_FLAG |
||
830 | #undef UNKOWN_FLAGS |
||
831 | } |
||
832 | |||
833 | #ifndef NO_ID3V2 /* Disabling all the rest... */ |
||
834 | |||
835 | static void convert_latin1(mpg123_string *sb, const unsigned char* s, size_t l, const int noquiet) |
||
836 | { |
||
837 | size_t length = l; |
||
838 | size_t i; |
||
839 | unsigned char *p; |
||
840 | /* determine real length, a latin1 character can at most take 2 in UTF8 */ |
||
841 | for(i=0; i |
||
842 | if(s[i] >= 0x80) ++length; |
||
843 | |||
844 | debug1("UTF-8 length: %lu", (unsigned long)length); |
||
845 | /* one extra zero byte for paranoia */ |
||
846 | if(!mpg123_resize_string(sb, length+1)){ mpg123_free_string(sb); return ; } |
||
847 | |||
848 | p = (unsigned char*) sb->p; /* Signedness doesn't matter but it shows I thought about the non-issue */ |
||
849 | for(i=0; i |
||
850 | if(s[i] < 0x80){ *p = s[i]; ++p; } |
||
851 | else /* two-byte encoding */ |
||
852 | { |
||
853 | *p = 0xc0 | (s[i]>>6); |
||
854 | *(p+1) = 0x80 | (s[i] & 0x3f); |
||
855 | p+=2; |
||
856 | } |
||
857 | |||
858 | sb->p[length] = 0; |
||
859 | sb->fill = length+1; |
||
860 | } |
||
861 | |||
862 | /* |
||
863 | Check if we have a byte oder mark(s) there, return: |
||
864 | -1: little endian |
||
865 | 0: no BOM |
||
866 | 1: big endian |
||
867 | |||
868 | This modifies source and len to indicate the data _after_ the BOM(s). |
||
3960 | Serge | 869 | Note on nasty data: The last encountered BOM determines the endianness. |
1905 | serge | 870 | I have seen data with multiple BOMS, namely from "the" id3v2 program. |
871 | Not nice, but what should I do? |
||
872 | */ |
||
873 | static int check_bom(const unsigned char** source, size_t *len) |
||
874 | { |
||
875 | int this_bom = 0; |
||
876 | int further_bom = 0; |
||
877 | |||
878 | if(*len < 2) return 0; |
||
879 | |||
880 | if((*source)[0] == 0xff && (*source)[1] == 0xfe) |
||
881 | this_bom = -1; |
||
882 | |||
883 | if((*source)[0] == 0xfe && (*source)[1] == 0xff) |
||
884 | this_bom = 1; |
||
885 | |||
886 | /* Skip the detected BOM. */ |
||
887 | if(this_bom != 0) |
||
888 | { |
||
889 | *source += 2; |
||
890 | *len -= 2; |
||
891 | /* Check for following BOMs. The last one wins! */ |
||
892 | further_bom = check_bom(source, len); |
||
893 | if(further_bom == 0) return this_bom; /* End of the recursion. */ |
||
894 | else return further_bom; |
||
895 | } |
||
896 | else return 0; |
||
897 | } |
||
898 | |||
899 | #define FULLPOINT(f,s) ( (((f)&0x3ff)<<10) + ((s)&0x3ff) + 0x10000 ) |
||
900 | /* Remember: There's a limit at 0x1ffff. */ |
||
901 | #define UTF8LEN(x) ( (x)<0x80 ? 1 : ((x)<0x800 ? 2 : ((x)<0x10000 ? 3 : 4))) |
||
902 | static void convert_utf16bom(mpg123_string *sb, const unsigned char* s, size_t l, const int noquiet) |
||
903 | { |
||
904 | size_t i; |
||
905 | size_t n; /* number bytes that make up full pairs */ |
||
906 | unsigned char *p; |
||
907 | size_t length = 0; /* the resulting UTF-8 length */ |
||
908 | /* Determine real length... extreme case can be more than utf-16 length. */ |
||
909 | size_t high = 0; |
||
910 | size_t low = 1; |
||
911 | int bom_endian; |
||
912 | |||
913 | debug1("convert_utf16 with length %lu", (unsigned long)l); |
||
914 | |||
915 | bom_endian = check_bom(&s, &l); |
||
3960 | Serge | 916 | debug1("UTF16 endianness check: %i", bom_endian); |
1905 | serge | 917 | |
918 | if(bom_endian == -1) /* little-endian */ |
||
919 | { |
||
920 | high = 1; /* The second byte is the high byte. */ |
||
921 | low = 0; /* The first byte is the low byte. */ |
||
922 | } |
||
923 | |||
924 | n = (l/2)*2; /* number bytes that make up full pairs */ |
||
925 | |||
926 | /* first: get length, check for errors -- stop at first one */ |
||
927 | for(i=0; i < n; i+=2) |
||
928 | { |
||
929 | unsigned long point = ((unsigned long) s[i+high]<<8) + s[i+low]; |
||
930 | if((point & 0xd800) == 0xd800) /* lead surrogate */ |
||
931 | { |
||
932 | unsigned short second = (i+3 < l) ? (s[i+2+high]<<8) + s[i+2+low] : 0; |
||
933 | if((second & 0xdc00) == 0xdc00) /* good... */ |
||
934 | { |
||
935 | point = FULLPOINT(point,second); |
||
936 | length += UTF8LEN(point); /* possibly 4 bytes */ |
||
937 | i+=2; /* We overstepped one word. */ |
||
938 | } |
||
939 | else /* if no valid pair, break here */ |
||
940 | { |
||
941 | if(noquiet) error2("Invalid UTF16 surrogate pair at %li (0x%04lx).", (unsigned long)i, point); |
||
942 | n = i; /* Forget the half pair, END! */ |
||
943 | break; |
||
944 | } |
||
945 | } |
||
946 | else length += UTF8LEN(point); /* 1,2 or 3 bytes */ |
||
947 | } |
||
948 | |||
949 | if(!mpg123_resize_string(sb, length+1)){ mpg123_free_string(sb); return ; } |
||
950 | |||
951 | /* Now really convert, skip checks as these have been done just before. */ |
||
952 | p = (unsigned char*) sb->p; /* Signedness doesn't matter but it shows I thought about the non-issue */ |
||
953 | for(i=0; i < n; i+=2) |
||
954 | { |
||
955 | unsigned long codepoint = ((unsigned long) s[i+high]<<8) + s[i+low]; |
||
956 | if((codepoint & 0xd800) == 0xd800) /* lead surrogate */ |
||
957 | { |
||
958 | unsigned short second = (s[i+2+high]<<8) + s[i+2+low]; |
||
959 | codepoint = FULLPOINT(codepoint,second); |
||
960 | i+=2; /* We overstepped one word. */ |
||
961 | } |
||
962 | if(codepoint < 0x80) *p++ = (unsigned char) codepoint; |
||
963 | else if(codepoint < 0x800) |
||
964 | { |
||
965 | *p++ = (unsigned char) (0xc0 | (codepoint>>6)); |
||
966 | *p++ = (unsigned char) (0x80 | (codepoint & 0x3f)); |
||
967 | } |
||
968 | else if(codepoint < 0x10000) |
||
969 | { |
||
970 | *p++ = (unsigned char) (0xe0 | (codepoint>>12)); |
||
971 | *p++ = 0x80 | ((codepoint>>6) & 0x3f); |
||
972 | *p++ = 0x80 | (codepoint & 0x3f); |
||
973 | } |
||
974 | else if (codepoint < 0x200000) |
||
975 | { |
||
976 | *p++ = (unsigned char) (0xf0 | codepoint>>18); |
||
977 | *p++ = (unsigned char) (0x80 | ((codepoint>>12) & 0x3f)); |
||
978 | *p++ = (unsigned char) (0x80 | ((codepoint>>6) & 0x3f)); |
||
979 | *p++ = (unsigned char) (0x80 | (codepoint & 0x3f)); |
||
980 | } /* ignore bigger ones (that are not possible here anyway) */ |
||
981 | } |
||
982 | sb->p[sb->size-1] = 0; /* paranoia... */ |
||
983 | sb->fill = sb->size; |
||
984 | } |
||
985 | #undef UTF8LEN |
||
986 | #undef FULLPOINT |
||
987 | |||
988 | static void convert_utf8(mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet) |
||
989 | { |
||
990 | if(mpg123_resize_string(sb, len+1)) |
||
991 | { |
||
992 | memcpy(sb->p, source, len); |
||
993 | sb->p[len] = 0; |
||
994 | sb->fill = len+1; |
||
995 | } |
||
996 | else mpg123_free_string(sb); |
||
997 | } |
||
998 | |||
999 | #endif>>>>8)><8)>8)><8)>>8)><8)>>8)><8)>>0x10000>0x800>0x80>10)><10)>>>>>><>=>>=>>>><>>>>>>>><>><>><>><>><>><>><>><>>0:>=>>=>>=>>>*size;> |