Subversion Repositories Kolibri OS

Rev

Rev 1905 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
1905 serge 1
/*
2
	id3: ID3v2.3 and ID3v2.4 parsing (a relevant subset)
3
 
4
	copyright 2006-2008 by the mpg123 project - free software under the terms of the LGPL 2.1
5
	see COPYING and AUTHORS files in distribution or http://mpg123.org
6
	initially written by Thomas Orgis
7
*/
8
 
9
#include "mpg123lib_intern.h"
10
#include "id3.h"
11
#include "debug.h"
12
 
13
#ifndef NO_ID3V2 /* Only the main parsing routine will always be there. */
14
 
15
/* We know the usual text frames plus some specifics. */
16
#define KNOWN_FRAMES 4
17
static const char frame_type[KNOWN_FRAMES][5] = { "COMM", "TXXX", "RVA2", "USLT" };
18
enum frame_types { unknown = -2, text = -1, comment, extra, rva2, uslt };
19
 
20
/* UTF support definitions */
21
 
22
typedef void (*text_converter)(mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet);
23
 
24
static void convert_latin1  (mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet);
25
static void convert_utf16bom(mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet);
26
static void convert_utf8    (mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet);
27
 
28
static const text_converter text_converters[4] =
29
{
30
	convert_latin1,
31
	/* We always check for (multiple) BOM in 16bit unicode. Without BOM, UTF16 BE is the default.
32
	   Errors in encoding are detected anyway. */
33
	convert_utf16bom,
34
	convert_utf16bom,
35
	convert_utf8
36
};
37
 
3960 Serge 38
static const unsigned int encoding_widths[4] = { 1, 2, 2, 1 };
1905 serge 39
 
40
/* the code starts here... */
41
 
42
static void null_id3_links(mpg123_handle *fr)
43
{
44
	fr->id3v2.title  = NULL;
45
	fr->id3v2.artist = NULL;
46
	fr->id3v2.album  = NULL;
47
	fr->id3v2.year   = NULL;
48
	fr->id3v2.genre  = NULL;
49
	fr->id3v2.comment = NULL;
50
}
51
 
52
void init_id3(mpg123_handle *fr)
53
{
54
	fr->id3v2.version = 0; /* nothing there */
55
	null_id3_links(fr);
56
	fr->id3v2.comments     = 0;
57
	fr->id3v2.comment_list = NULL;
58
	fr->id3v2.texts    = 0;
59
	fr->id3v2.text     = NULL;
60
	fr->id3v2.extras   = 0;
61
	fr->id3v2.extra    = NULL;
62
}
63
 
64
/* Managing of the text, comment and extra lists. */
65
 
66
/* Initialize one element. */
67
static void init_mpg123_text(mpg123_text *txt)
68
{
69
	mpg123_init_string(&txt->text);
70
	mpg123_init_string(&txt->description);
71
	txt->id[0] = 0;
72
	txt->id[1] = 0;
73
	txt->id[2] = 0;
74
	txt->id[3] = 0;
75
	txt->lang[0] = 0;
76
	txt->lang[1] = 0;
77
	txt->lang[2] = 0;
78
}
79
 
80
/* Free memory of one element. */
81
static void free_mpg123_text(mpg123_text *txt)
82
{
83
	mpg123_free_string(&txt->text);
84
	mpg123_free_string(&txt->description);
85
}
86
 
87
/* Free memory of whole list. */
88
#define free_comment(mh) free_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments))
89
#define free_text(mh)    free_id3_text(&((mh)->id3v2.text),         &((mh)->id3v2.texts))
90
#define free_extra(mh)   free_id3_text(&((mh)->id3v2.extra),        &((mh)->id3v2.extras))
91
static void free_id3_text(mpg123_text **list, size_t *size)
92
{
93
	size_t i;
94
	for(i=0; i<*size; ++i) free_mpg123_text(&((*list)[i]));
95
 
96
	free(*list);
97
	*list = NULL;
98
	*size = 0;
99
}
100
 
101
/* Add items to the list. */
102
#define add_comment(mh) add_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments))
103
#define add_text(mh)    add_id3_text(&((mh)->id3v2.text),         &((mh)->id3v2.texts))
104
#define add_extra(mh)   add_id3_text(&((mh)->id3v2.extra),        &((mh)->id3v2.extras))
105
static mpg123_text *add_id3_text(mpg123_text **list, size_t *size)
106
{
107
	mpg123_text *x = safe_realloc(*list, sizeof(mpg123_text)*(*size+1));
108
	if(x == NULL) return NULL; /* bad */
109
 
110
	*list  = x;
111
	*size += 1;
112
	init_mpg123_text(&((*list)[*size-1]));
113
 
114
	return &((*list)[*size-1]); /* Return pointer to the added text. */
115
}
116
 
117
/* Remove the last item. */
118
#define pop_comment(mh) pop_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments))
119
#define pop_text(mh)    pop_id3_text(&((mh)->id3v2.text),         &((mh)->id3v2.texts))
120
#define pop_extra(mh)   pop_id3_text(&((mh)->id3v2.extra),        &((mh)->id3v2.extras))
121
static void pop_id3_text(mpg123_text **list, size_t *size)
122
{
123
	mpg123_text *x;
124
	if(*size < 1) return;
125
 
126
	free_mpg123_text(&((*list)[*size-1]));
127
	if(*size > 1)
128
	{
129
		x = safe_realloc(*list, sizeof(mpg123_text)*(*size-1));
130
		if(x != NULL){ *list  = x; *size -= 1; }
131
	}
132
	else
133
	{
134
		free(*list);
135
		*list = NULL;
136
		*size = 0;
137
	}
138
}
139
 
140
/* OK, back t the higher level functions. */
141
 
142
void exit_id3(mpg123_handle *fr)
143
{
144
	free_comment(fr);
145
	free_extra(fr);
146
	free_text(fr);
147
}
148
 
149
void reset_id3(mpg123_handle *fr)
150
{
151
	exit_id3(fr);
152
	init_id3(fr);
153
}
154
 
155
/* Set the id3v2.artist id3v2.title ... links to elements of the array. */
156
void id3_link(mpg123_handle *fr)
157
{
158
	size_t i;
159
	mpg123_id3v2 *v2 = &fr->id3v2;
160
	debug("linking ID3v2");
161
	null_id3_links(fr);
162
	for(i=0; itexts; ++i)
163
	{
164
		mpg123_text *entry = &v2->text[i];
165
		if     (!strncmp("TIT2", entry->id, 4)) v2->title  = &entry->text;
166
		else if(!strncmp("TALB", entry->id, 4)) v2->album  = &entry->text;
167
		else if(!strncmp("TPE1", entry->id, 4)) v2->artist = &entry->text;
168
		else if(!strncmp("TYER", entry->id, 4)) v2->year   = &entry->text;
169
		else if(!strncmp("TCON", entry->id, 4)) v2->genre  = &entry->text;
170
	}
171
	for(i=0; icomments; ++i)
172
	{
173
		mpg123_text *entry = &v2->comment_list[i];
174
		if(entry->description.fill == 0 || entry->description.p[0] == 0)
175
		v2->comment = &entry->text;
176
	}
177
	/* When no generic comment found, use the last non-generic one. */
178
	if(v2->comment == NULL && v2->comments > 0)
179
	v2->comment = &v2->comment_list[v2->comments-1].text;
180
}
181
 
182
/*
183
	Store ID3 text data in an mpg123_string; either verbatim copy or everything translated to UTF-8 encoding.
184
	Preserve the zero string separator (I don't need strlen for the total size).
185
 
186
	ID3v2 standard says that there should be one text frame of specific type per tag, and subsequent tags overwrite old values.
187
	So, I always replace the text that may be stored already (perhaps with a list of zero-separated strings, though).
188
*/
3960 Serge 189
static void store_id3_text(mpg123_string *sb, char *source, size_t source_size, const int noquiet, const int notranslate)
1905 serge 190
{
191
	if(!source_size)
192
	{
193
		debug("Empty id3 data!");
194
		return;
195
	}
196
 
197
	/* We shall just copy the data. Client wants to decode itself. */
198
	if(notranslate)
199
	{
200
		/* Future: Add a path for ID3 errors. */
201
		if(!mpg123_resize_string(sb, source_size))
202
		{
203
			if(noquiet) error("Cannot resize target string, out of memory?");
204
			return;
205
		}
206
		memcpy(sb->p, source, source_size);
207
		sb->fill = source_size;
208
		debug1("stored undecoded ID3 text of size %"SIZE_P, (size_p)source_size);
209
		return;
210
	}
211
 
212
	id3_to_utf8(sb, ((unsigned char *)source)[0], (unsigned char*)source+1, source_size-1, noquiet);
213
 
214
	if(sb->fill) debug1("UTF-8 string (the first one): %s", sb->p);
215
	else if(noquiet) error("unable to convert string to UTF-8 (out of memory, junk input?)!");
216
}
217
 
218
/* On error, sb->size is 0. */
219
void id3_to_utf8(mpg123_string *sb, unsigned char encoding, const unsigned char *source, size_t source_size, int noquiet)
220
{
221
	unsigned int bwidth;
222
	debug1("encoding: %u", encoding);
223
	/* A note: ID3v2.3 uses UCS-2 non-variable 16bit encoding, v2.4 uses UTF16.
224
	   UTF-16 uses a reserved/private range in UCS-2 to add the magic, so we just always treat it as UTF. */
225
	if(encoding > mpg123_id3_enc_max)
226
	{
227
		if(noquiet) error1("Unknown text encoding %u, I take no chances, sorry!", encoding);
228
 
229
		mpg123_free_string(sb);
230
		return;
231
	}
232
	bwidth = encoding_widths[encoding];
233
	/* Hack! I've seen a stray zero byte before BOM. Is that supposed to happen? */
234
	if(encoding != mpg123_id3_utf16be) /* UTF16be _can_ beging with a null byte! */
235
	while(source_size > bwidth && source[0] == 0)
236
	{
237
		--source_size;
238
		++source;
239
		debug("skipped leading zero");
240
	}
241
	if(source_size % bwidth)
242
	{
243
		/* When we need two bytes for a character, it's strange to have an uneven bytestream length. */
244
		if(noquiet) warning2("Weird tag size %d for encoding %u - I will probably trim too early or something but I think the MP3 is broken.", (int)source_size, encoding);
245
		source_size -= source_size % bwidth;
246
	}
247
	text_converters[encoding](sb, source, source_size, noquiet);
248
}
249
 
3960 Serge 250
static char *next_text(char* prev, int encoding, size_t limit)
1905 serge 251
{
252
	char *text = prev;
253
	size_t width = encoding_widths[encoding];
254
 
255
	/* So I go lengths to find zero or double zero...
256
	   Remember bug 2834636: Only check for aligned NULLs! */
257
	while(text-prev < (ssize_t)limit)
258
	{
259
		if(text[0] == 0)
260
		{
261
			if(width <= limit-(text-prev))
262
			{
263
				size_t i = 1;
264
				for(; i
265
 
266
				if(i == width) /* found a null wide enough! */
267
				{
268
					text += width;
269
					break;
270
				}
271
			}
272
			else return NULL; /* No full character left? This text is broken */
273
		}
274
 
275
		text += width;
276
	}
3960 Serge 277
	if((size_t)(text-prev) >= limit) text = NULL;
1905 serge 278
 
279
	return text;
280
}
281
 
282
static const char *enc_name(int enc)
283
{
284
	switch(enc)
285
	{
286
		case 0:  return "Latin 1";
287
		case 1:  return "UTF-16 BOM";
288
		case 2:  return "UTF-16 BE";
289
		case 3:  return "UTF-8";
290
		default: return "unknown!";
291
	}
292
}
293
 
294
static void process_text(mpg123_handle *fr, char *realdata, size_t realsize, char *id)
295
{
296
	/* Text encoding          $xx */
297
	/* The text (encoded) ... */
298
	mpg123_text *t = add_text(fr);
299
	if(VERBOSE4) fprintf(stderr, "Note: Storing text from %s encoding\n", enc_name(realdata[0]));
300
	if(t == NULL)
301
	{
302
		if(NOQUIET) error("Unable to attach new text!");
303
		return;
304
	}
305
	memcpy(t->id, id, 4);
306
	store_id3_text(&t->text, realdata, realsize, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
307
	if(VERBOSE4) fprintf(stderr, "Note: ID3v2 %c%c%c%c text frame: %s\n", id[0], id[1], id[2], id[3], t->text.p);
308
}
309
 
310
/* Store a new comment that perhaps is a RVA / RVA_ALBUM/AUDIOPHILE / RVA_MIX/RADIO one
311
   Special gimmik: It also stores USLT to the texts. Stucture is the same as for comments. */
312
static void process_comment(mpg123_handle *fr, enum frame_types tt, char *realdata, size_t realsize, int rva_level, char *id)
313
{
314
	/* Text encoding          $xx */
315
	/* Language               $xx xx xx */
316
	/* Short description (encoded!)       $00 (00) */
317
	/* Then the comment text (encoded) ... */
318
	char  encoding = realdata[0];
319
	char *lang    = realdata+1; /* I'll only use the 3 bytes! */
320
	char *descr   = realdata+4;
321
	char *text = NULL;
322
	mpg123_text *xcom = NULL;
323
	mpg123_text localcom; /* UTF-8 variant for local processing. */
324
 
325
	if((int)realsize < descr-realdata)
326
	{
327
		if(NOQUIET) error1("Invalid frame size of %lu (too small for anything).", (unsigned long)realsize);
328
		return;
329
	}
330
	xcom = (tt == uslt ? add_text(fr) : add_comment(fr));
331
	if(VERBOSE4) fprintf(stderr, "Note: Storing comment from %s encoding\n", enc_name(realdata[0]));
332
	if(xcom == NULL)
333
	{
334
		if(NOQUIET) error("Unable to attach new comment!");
335
		return;
336
	}
337
	memcpy(xcom->lang, lang, 3);
338
	memcpy(xcom->id, id, 4);
339
	/* Now I can abuse a byte from lang for the encoding. */
340
	descr[-1] = encoding;
341
	/* Be careful with finding the end of description, I have to honor encoding here. */
342
	text = next_text(descr, encoding, realsize-(descr-realdata));
343
	if(text == NULL)
344
	{
345
		if(NOQUIET) error("No comment text / valid description?");
346
		pop_comment(fr);
347
		return;
348
	}
349
 
350
	init_mpg123_text(&localcom);
351
	/* Store the text, without translation to UTF-8, but for comments always a local copy in UTF-8.
352
	   Reminder: No bailing out from here on without freeing the local comment data! */
353
	store_id3_text(&xcom->description, descr-1, text-descr+1, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
354
	if(tt == comment)
3960 Serge 355
	store_id3_text(&localcom.description, descr-1, text-descr+1, NOQUIET, 0);
1905 serge 356
 
357
	text[-1] = encoding; /* Byte abusal for encoding... */
358
	store_id3_text(&xcom->text, text-1, realsize+1-(text-realdata), NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
359
	/* Remember: I will probably decode the above (again) for rva comment checking. So no messing around, please. */
360
 
361
	if(VERBOSE4) /* Do _not_ print the verbatim text: The encoding might be funny! */
362
	{
363
		fprintf(stderr, "Note: ID3 comm/uslt desc of length %"SIZE_P".\n", (size_p)xcom->description.fill);
364
		fprintf(stderr, "Note: ID3 comm/uslt text of length %"SIZE_P".\n", (size_p)xcom->text.fill);
365
	}
366
	/* Look out for RVA info only when we really deal with a straight comment. */
367
	if(tt == comment && localcom.description.fill > 0)
368
	{
369
		int rva_mode = -1; /* mix / album */
370
		if(    !strcasecmp(localcom.description.p, "rva")
371
			 || !strcasecmp(localcom.description.p, "rva_mix")
372
			 || !strcasecmp(localcom.description.p, "rva_track")
373
			 || !strcasecmp(localcom.description.p, "rva_radio") )
374
		rva_mode = 0;
375
		else if(    !strcasecmp(localcom.description.p, "rva_album")
376
		         || !strcasecmp(localcom.description.p, "rva_audiophile")
377
		         || !strcasecmp(localcom.description.p, "rva_user") )
378
		rva_mode = 1;
379
		if((rva_mode > -1) && (fr->rva.level[rva_mode] <= rva_level))
380
		{
381
			/* Only translate the contents in here where we really need them. */
3960 Serge 382
			store_id3_text(&localcom.text, text-1, realsize+1-(text-realdata), NOQUIET, 0);
1905 serge 383
			if(localcom.text.fill > 0)
384
			{
385
				fr->rva.gain[rva_mode] = (float) atof(localcom.text.p);
386
				if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]);
387
				fr->rva.peak[rva_mode] = 0;
388
				fr->rva.level[rva_mode] = rva_level;
389
			}
390
		}
391
	}
392
	/* Make sure to free the local memory... */
393
	free_mpg123_text(&localcom);
394
}
395
 
3960 Serge 396
static void process_extra(mpg123_handle *fr, char* realdata, size_t realsize, int rva_level, char *id)
1905 serge 397
{
398
	/* Text encoding          $xx */
399
	/* Description        ... $00 (00) */
400
	/* Text ... */
401
	char encoding = realdata[0];
402
	char *descr  = realdata+1; /* remember, the encoding is descr[-1] */
403
	char *text;
404
	mpg123_text *xex;
405
	mpg123_text localex;
406
 
407
	if((int)realsize < descr-realdata)
408
	{
409
		if(NOQUIET) error1("Invalid frame size of %lu (too small for anything).", (unsigned long)realsize);
410
		return;
411
	}
412
	text = next_text(descr, encoding, realsize-(descr-realdata));
413
	if(VERBOSE4) fprintf(stderr, "Note: Storing extra from %s encoding\n", enc_name(realdata[0]));
414
	if(text == NULL)
415
	{
416
		if(NOQUIET) error("No extra frame text / valid description?");
417
		return;
418
	}
419
	xex = add_extra(fr);
420
	if(xex == NULL)
421
	{
422
		if(NOQUIET) error("Unable to attach new extra text!");
423
		return;
424
	}
425
	memcpy(xex->id, id, 4);
426
	init_mpg123_text(&localex); /* For our local copy. */
3960 Serge 427
 
428
	/* The outside storage gets reencoded to UTF-8 only if not requested otherwise.
429
	   Remember that we really need the -1 here to hand in the encoding byte!*/
1905 serge 430
	store_id3_text(&xex->description, descr-1, text-descr+1, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
3960 Serge 431
	/* Our local copy is always stored in UTF-8! */
432
	store_id3_text(&localex.description, descr-1, text-descr+1, NOQUIET, 0);
433
	/* At first, only store the outside copy of the payload. We may not need the local copy. */
1905 serge 434
	text[-1] = encoding;
435
	store_id3_text(&xex->text, text-1, realsize-(text-realdata)+1, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
3960 Serge 436
 
1905 serge 437
	/* Now check if we would like to interpret this extra info for RVA. */
438
	if(localex.description.fill > 0)
439
	{
440
		int is_peak = 0;
441
		int rva_mode = -1; /* mix / album */
442
 
443
		if(!strncasecmp(localex.description.p, "replaygain_track_",17))
444
		{
445
			if(VERBOSE3) fprintf(stderr, "Note: RVA ReplayGain track gain/peak\n");
446
 
447
			rva_mode = 0;
448
			if(!strcasecmp(localex.description.p, "replaygain_track_peak")) is_peak = 1;
449
			else if(strcasecmp(localex.description.p, "replaygain_track_gain")) rva_mode = -1;
450
		}
451
		else
452
		if(!strncasecmp(localex.description.p, "replaygain_album_",17))
453
		{
454
			if(VERBOSE3) fprintf(stderr, "Note: RVA ReplayGain album gain/peak\n");
455
 
456
			rva_mode = 1;
457
			if(!strcasecmp(localex.description.p, "replaygain_album_peak")) is_peak = 1;
458
			else if(strcasecmp(localex.description.p, "replaygain_album_gain")) rva_mode = -1;
459
		}
460
		if((rva_mode > -1) && (fr->rva.level[rva_mode] <= rva_level))
461
		{
462
			/* Now we need the translated copy of the data. */
3960 Serge 463
			store_id3_text(&localex.text, text-1, realsize-(text-realdata)+1, NOQUIET, 0);
1905 serge 464
			if(localex.text.fill > 0)
465
			{
466
				if(is_peak)
467
				{
468
					fr->rva.peak[rva_mode] = (float) atof(localex.text.p);
469
					if(VERBOSE3) fprintf(stderr, "Note: RVA peak %f\n", fr->rva.peak[rva_mode]);
470
				}
471
				else
472
				{
473
					fr->rva.gain[rva_mode] = (float) atof(localex.text.p);
474
					if(VERBOSE3) fprintf(stderr, "Note: RVA gain %fdB\n", fr->rva.gain[rva_mode]);
475
				}
476
				fr->rva.level[rva_mode] = rva_level;
477
			}
478
		}
479
	}
480
 
481
	free_mpg123_text(&localex);
482
}
483
 
484
/* Make a ID3v2.3+ 4-byte ID from a ID3v2.2 3-byte ID
485
   Note that not all frames survived to 2.4; the mapping goes to 2.3 .
486
   A notable miss is the old RVA frame, which is very unspecific anyway.
487
   This function returns -1 when a not known 3 char ID was encountered, 0 otherwise. */
3960 Serge 488
static int promote_framename(mpg123_handle *fr, char *id) /* fr because of VERBOSE macros */
1905 serge 489
{
490
	size_t i;
491
	char *old[] =
492
	{
493
		"COM",  "TAL",  "TBP",  "TCM",  "TCO",  "TCR",  "TDA",  "TDY",  "TEN",  "TFT",
494
		"TIM",  "TKE",  "TLA",  "TLE",  "TMT",  "TOA",  "TOF",  "TOL",  "TOR",  "TOT",
495
		"TP1",  "TP2",  "TP3",  "TP4",  "TPA",  "TPB",  "TRC",  "TDA",  "TRK",  "TSI",
496
		"TSS",  "TT1",  "TT2",  "TT3",  "TXT",  "TXX",  "TYE"
497
	};
498
	char *new[] =
499
	{
500
		"COMM", "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDAT", "TDLY", "TENC", "TFLT",
501
		"TIME", "TKEY", "TLAN", "TLEN", "TMED", "TOPE", "TOFN", "TOLY", "TORY", "TOAL",
502
		"TPE1", "TPE2", "TPE3", "TPE4", "TPOS", "TPUB", "TSRC", "TRDA", "TRCK", "TSIZ",
503
		"TSSE", "TIT1", "TIT2", "TIT3", "TEXT", "TXXX", "TYER"
504
	};
505
	for(i=0; i
506
	{
507
		if(!strncmp(id, old[i], 3))
508
		{
509
			memcpy(id, new[i], 4);
510
			if(VERBOSE3) fprintf(stderr, "Translated ID3v2.2 frame %s to %s\n", old[i], new[i]);
511
			return 0;
512
		}
513
	}
514
	if(VERBOSE3) fprintf(stderr, "Ignoring untranslated ID3v2.2 frame %c%c%c\n", id[0], id[1], id[2]);
515
	return -1;
516
}
517
 
518
#endif /* NO_ID3V2 */
519
 
520
/*
521
	trying to parse ID3v2.3 and ID3v2.4 tags...
522
 
523
	returns:  0: bad or just unparseable tag
524
	          1: good, (possibly) new tag info
525
	         <0: reader error (may need more data feed, try again)
526
*/
527
int parse_new_id3(mpg123_handle *fr, unsigned long first4bytes)
528
{
529
	#define UNSYNC_FLAG 128
530
	#define EXTHEAD_FLAG 64
531
	#define EXP_FLAG 32
532
	#define FOOTER_FLAG 16
533
	#define UNKNOWN_FLAGS 15 /* 00001111*/
534
	unsigned char buf[6];
535
	unsigned long length=0;
536
	unsigned char flags = 0;
537
	int ret = 1;
538
	int ret2;
539
	unsigned char major = first4bytes & 0xff;
540
	debug1("ID3v2: major tag version: %i", major);
541
	if(major == 0xff) return 0; /* Invalid... */
542
	if((ret2 = fr->rd->read_frame_body(fr, buf, 6)) < 0) /* read more header information */
543
	return ret2;
544
 
545
	if(buf[0] == 0xff) return 0; /* Revision, will never be 0xff. */
546
 
547
	/* second new byte are some nice flags, if these are invalid skip the whole thing */
548
	flags = buf[1];
549
	debug1("ID3v2: flags 0x%08x", flags);
550
	/* use 4 bytes from buf to construct 28bit uint value and return 1; return 0 if bytes are not synchsafe */
551
	#define synchsafe_to_long(buf,res) \
552
	( \
553
		(((buf)[0]|(buf)[1]|(buf)[2]|(buf)[3]) & 0x80) ? 0 : \
554
		(res =  (((unsigned long) (buf)[0]) << 21) \
555
		     | (((unsigned long) (buf)[1]) << 14) \
556
		     | (((unsigned long) (buf)[2]) << 7) \
557
		     |  ((unsigned long) (buf)[3]) \
558
		,1) \
559
	)
560
	/* id3v2.3 does not store synchsafe frame sizes, but synchsafe tag size - doh! */
561
	#define bytes_to_long(buf,res) \
562
	( \
563
		major == 3 ? \
564
		(res =  (((unsigned long) (buf)[0]) << 24) \
565
		     | (((unsigned long) (buf)[1]) << 16) \
566
		     | (((unsigned long) (buf)[2]) << 8) \
567
		     |  ((unsigned long) (buf)[3]) \
568
		,1) : synchsafe_to_long(buf,res) \
569
	)
570
	/* for id3v2.2 only */
571
	#define threebytes_to_long(buf,res) \
572
	( \
573
		res =  (((unsigned long) (buf)[0]) << 16) \
574
		     | (((unsigned long) (buf)[1]) << 8) \
575
		     |  ((unsigned long) (buf)[2]) \
576
		,1 \
577
	)
578
 
579
	/* length-10 or length-20 (footer present); 4 synchsafe integers == 28 bit number  */
580
	/* we have already read 10 bytes, so left are length or length+10 bytes belonging to tag */
581
	if(!synchsafe_to_long(buf+2,length))
582
	{
583
		if(NOQUIET) error4("Bad tag length (not synchsafe): 0x%02x%02x%02x%02x; You got a bad ID3 tag here.", buf[2],buf[3],buf[4],buf[5]);
584
		return 0;
585
	}
586
	debug1("ID3v2: tag data length %lu", length);
587
#ifndef NO_ID3V2
588
	if(VERBOSE2) fprintf(stderr,"Note: ID3v2.%i rev %i tag of %lu bytes\n", major, buf[0], length);
589
	/* skip if unknown version/scary flags, parse otherwise */
3960 Serge 590
	if(fr->p.flags & MPG123_SKIP_ID3V2 || ((flags & UNKNOWN_FLAGS) || (major > 4) || (major < 2)))
1905 serge 591
	{
3960 Serge 592
		if(NOQUIET)
593
		{
594
			if(fr->p.flags & MPG123_SKIP_ID3V2)
595
			{
596
				if(VERBOSE3) fprintf(stderr, "Note: Skipping ID3v2 tag per user request.\n");
597
			}
598
			else /* Must be because of scary Tag properties. */
599
			warning2("ID3v2: Won't parse the ID3v2 tag with major version %u and flags 0x%xu - some extra code may be needed", major, flags);
600
		}
1905 serge 601
#endif
602
		if((ret2 = fr->rd->skip_bytes(fr,length)) < 0) /* will not store data in backbuff! */
603
		ret = ret2;
604
#ifndef NO_ID3V2
605
	}
606
	else
607
	{
3960 Serge 608
		unsigned char* tagdata = NULL;
1905 serge 609
		fr->id3v2.version = major;
610
		/* try to interpret that beast */
611
		if((tagdata = (unsigned char*) malloc(length+1)) != NULL)
612
		{
613
			debug("ID3v2: analysing frames...");
614
			if((ret2 = fr->rd->read_frame_body(fr,tagdata,length)) > 0)
615
			{
616
				unsigned long tagpos = 0;
617
				debug1("ID3v2: have read at all %lu bytes for the tag now", (unsigned long)length+6);
618
				/* going to apply strlen for strings inside frames, make sure that it doesn't overflow! */
619
				tagdata[length] = 0;
620
				if(flags & EXTHEAD_FLAG)
621
				{
622
					debug("ID3v2: skipping extended header");
623
					if(!bytes_to_long(tagdata, tagpos))
624
					{
625
						ret = 0;
626
						if(NOQUIET) error4("Bad (non-synchsafe) tag offset: 0x%02x%02x%02x%02x", tagdata[0], tagdata[1], tagdata[2], tagdata[3]);
627
					}
628
				}
629
				if(ret > 0)
630
				{
631
					char id[5];
632
					unsigned long framesize;
633
					unsigned long fflags; /* need 16 bits, actually */
634
					id[4] = 0;
635
					/* pos now advanced after ext head, now a frame has to follow */
636
					while(tagpos < length-10) /* I want to read at least a full header */
637
					{
638
						int i = 0;
639
						unsigned long pos = tagpos;
640
						int head_part = fr->id3v2.version == 2 ? 3 : 4; /* bytes of frame title and of framesize value */
641
						/* level 1,2,3 - 0 is info from lame/info tag! */
642
						/* rva tags with ascending significance, then general frames */
643
						enum frame_types tt = unknown;
644
						/* we may have entered the padding zone or any other strangeness: check if we have valid frame id characters */
645
						for(i=0; i< head_part; ++i)
646
						if( !( ((tagdata[tagpos+i] > 47) && (tagdata[tagpos+i] < 58))
647
						    || ((tagdata[tagpos+i] > 64) && (tagdata[tagpos+i] < 91)) ) )
648
						{
649
							debug5("ID3v2: real tag data apparently ended after %lu bytes with 0x%02x%02x%02x%02x", tagpos, tagdata[tagpos], tagdata[tagpos+1], tagdata[tagpos+2], tagdata[tagpos+3]);
650
							/* This is no hard error... let's just hope that we got something meaningful already (ret==1 in that case). */
651
							goto tagparse_cleanup; /* Need to escape two loops here. */
652
						}
653
						if(ret > 0)
654
						{
655
							/* 4 or 3 bytes id */
656
							strncpy(id, (char*) tagdata+pos, head_part);
3960 Serge 657
							id[head_part] = 0; /* terminate for 3 or 4 bytes */
1905 serge 658
							pos += head_part;
659
							tagpos += head_part;
660
							/* size as 32 bits or 28 bits */
661
							if(fr->id3v2.version == 2) threebytes_to_long(tagdata+pos, framesize);
662
							else
663
							if(!bytes_to_long(tagdata+pos, framesize))
664
							{
665
								/* Just assume that up to now there was some good data. */
666
								if(NOQUIET) error1("ID3v2: non-syncsafe size of %s frame, skipping the remainder of tag", id);
667
								break;
668
							}
669
							if(VERBOSE3) fprintf(stderr, "Note: ID3v2 %s frame of size %lu\n", id, framesize);
670
							tagpos += head_part + framesize; /* the important advancement in whole tag */
671
							if(tagpos > length)
672
							{
673
								if(NOQUIET) error("Whoa! ID3v2 frame claims to be larger than the whole rest of the tag.");
674
								break;
675
							}
676
							pos += head_part;
677
							if(fr->id3v2.version > 2)
678
							{
679
								fflags  = (((unsigned long) tagdata[pos]) << 8) | ((unsigned long) tagdata[pos+1]);
680
								pos    += 2;
681
								tagpos += 2;
682
							}
683
							else fflags = 0;
684
							/* for sanity, after full parsing tagpos should be == pos */
685
							/* debug4("ID3v2: found %s frame, size %lu (as bytes: 0x%08lx), flags 0x%016lx", id, framesize, framesize, fflags); */
686
							/* %0abc0000 %0h00kmnp */
687
							#define BAD_FFLAGS (unsigned long) 36784
688
							#define PRES_TAG_FFLAG 16384
689
							#define PRES_FILE_FFLAG 8192
690
							#define READ_ONLY_FFLAG 4096
691
							#define GROUP_FFLAG 64
692
							#define COMPR_FFLAG 8
693
							#define ENCR_FFLAG 4
694
							#define UNSYNC_FFLAG 2
695
							#define DATLEN_FFLAG 1
696
							if(head_part < 4 && promote_framename(fr, id) != 0) continue;
697
 
698
							/* shall not or want not handle these */
699
							if(fflags & (BAD_FFLAGS | COMPR_FFLAG | ENCR_FFLAG))
700
							{
701
								if(NOQUIET) warning("ID3v2: skipping invalid/unsupported frame");
702
								continue;
703
							}
704
 
705
							for(i = 0; i < KNOWN_FRAMES; ++i)
706
							if(!strncmp(frame_type[i], id, 4)){ tt = i; break; }
707
 
708
							if(id[0] == 'T' && tt != extra) tt = text;
709
 
710
							if(tt != unknown)
711
							{
712
								int rva_mode = -1; /* mix / album */
713
								unsigned long realsize = framesize;
714
								unsigned char* realdata = tagdata+pos;
715
								if((flags & UNSYNC_FLAG) || (fflags & UNSYNC_FFLAG))
716
								{
717
									unsigned long ipos = 0;
718
									unsigned long opos = 0;
719
									debug("Id3v2: going to de-unsync the frame data");
720
									/* de-unsync: FF00 -> FF; real FF00 is simply represented as FF0000 ... */
721
									/* damn, that means I have to delete bytes from withing the data block... thus need temporal storage */
722
									/* standard mandates that de-unsync should always be safe if flag is set */
723
									realdata = (unsigned char*) malloc(framesize); /* will need <= bytes */
724
									if(realdata == NULL)
725
									{
726
										if(NOQUIET) error("ID3v2: unable to allocate working buffer for de-unsync");
727
										continue;
728
									}
729
									/* now going byte per byte through the data... */
730
									realdata[0] = tagdata[pos];
731
									opos = 1;
732
									for(ipos = pos+1; ipos < pos+framesize; ++ipos)
733
									{
734
										if(!((tagdata[ipos] == 0) && (tagdata[ipos-1] == 0xff)))
735
										{
736
											realdata[opos++] = tagdata[ipos];
737
										}
738
									}
739
									realsize = opos;
740
									debug2("ID3v2: de-unsync made %lu out of %lu bytes", realsize, framesize);
741
								}
742
								pos = 0; /* now at the beginning again... */
743
								switch(tt)
744
								{
745
									case comment:
746
									case uslt:
747
										process_comment(fr, tt, (char*)realdata, realsize, comment+1, id);
748
									break;
749
									case extra: /* perhaps foobar2000's work */
750
										process_extra(fr, (char*)realdata, realsize, extra+1, id);
751
									break;
752
									case rva2: /* "the" RVA tag */
753
									{
754
										/* starts with null-terminated identification */
755
										if(VERBOSE3) fprintf(stderr, "Note: RVA2 identification \"%s\"\n", realdata);
756
										/* default: some individual value, mix mode */
757
										rva_mode = 0;
758
										if( !strncasecmp((char*)realdata, "album", 5)
759
										    || !strncasecmp((char*)realdata, "audiophile", 10)
760
										    || !strncasecmp((char*)realdata, "user", 4))
761
										rva_mode = 1;
762
										if(fr->rva.level[rva_mode] <= rva2+1)
763
										{
764
											pos += strlen((char*) realdata) + 1;
765
											if(realdata[pos] == 1)
766
											{
767
												++pos;
768
												/* only handle master channel */
769
												debug("ID3v2: it is for the master channel");
770
												/* two bytes adjustment, one byte for bits representing peak - n bytes, eh bits, for peak */
771
												/* 16 bit signed integer = dB * 512  ... the double cast is needed to preserve the sign of negative values! */
772
												fr->rva.gain[rva_mode] = (float) ( (((short)((signed char)realdata[pos])) << 8) | realdata[pos+1] ) / 512;
773
												pos += 2;
774
												if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]);
775
												/* heh, the peak value is represented by a number of bits - but in what manner? Skipping that part */
776
												fr->rva.peak[rva_mode] = 0;
777
												fr->rva.level[rva_mode] = rva2+1;
778
											}
779
										}
780
									}
781
									break;
782
									/* non-rva metainfo, simply store... */
783
									case text:
784
										process_text(fr, (char*)realdata, realsize, id);
785
									break;
786
									default: if(NOQUIET) error1("ID3v2: unknown frame type %i", tt);
787
								}
788
								if((flags & UNSYNC_FLAG) || (fflags & UNSYNC_FFLAG)) free(realdata);
789
							}
790
							#undef BAD_FFLAGS
791
							#undef PRES_TAG_FFLAG
792
							#undef PRES_FILE_FFLAG
793
							#undef READ_ONLY_FFLAG
794
							#undef GROUP_FFLAG
795
							#undef COMPR_FFLAG
796
							#undef ENCR_FFLAG
797
							#undef UNSYNC_FFLAG
798
							#undef DATLEN_FFLAG
799
						}
800
						else break;
801
						#undef KNOWN_FRAMES
802
					}
803
				}
804
			}
805
			else
806
			{
3960 Serge 807
				/* There are tags with zero length. Strictly not an error, then. */
808
				if(length > 0 && NOQUIET && ret2 != MPG123_NEED_MORE) error("ID3v2: Duh, not able to read ID3v2 tag data.");
1905 serge 809
				ret = ret2;
810
			}
811
tagparse_cleanup:
812
			free(tagdata);
813
		}
814
		else
815
		{
816
			if(NOQUIET) error1("ID3v2: Arrg! Unable to allocate %lu bytes for interpreting ID3v2 data - trying to skip instead.", length);
817
			if((ret2 = fr->rd->skip_bytes(fr,length)) < 0) ret = ret2; /* will not store data in backbuff! */
818
			else ret = 0;
819
		}
820
	}
821
#endif /* NO_ID3V2 */
822
	/* skip footer if present */
823
	if((ret > 0) && (flags & FOOTER_FLAG) && ((ret2 = fr->rd->skip_bytes(fr,length)) < 0)) ret = ret2;
824
 
825
	return ret;
826
	#undef UNSYNC_FLAG
827
	#undef EXTHEAD_FLAG
828
	#undef EXP_FLAG
829
	#undef FOOTER_FLAG
830
	#undef UNKOWN_FLAGS
831
}
832
 
833
#ifndef NO_ID3V2 /* Disabling all the rest... */
834
 
835
static void convert_latin1(mpg123_string *sb, const unsigned char* s, size_t l, const int noquiet)
836
{
837
	size_t length = l;
838
	size_t i;
839
	unsigned char *p;
840
	/* determine real length, a latin1 character can at most take 2  in UTF8 */
841
	for(i=0; i
842
	if(s[i] >= 0x80) ++length;
843
 
844
	debug1("UTF-8 length: %lu", (unsigned long)length);
845
	/* one extra zero byte for paranoia */
846
	if(!mpg123_resize_string(sb, length+1)){ mpg123_free_string(sb); return ; }
847
 
848
	p = (unsigned char*) sb->p; /* Signedness doesn't matter but it shows I thought about the non-issue */
849
	for(i=0; i
850
	if(s[i] < 0x80){ *p = s[i]; ++p; }
851
	else /* two-byte encoding */
852
	{
853
		*p     = 0xc0 | (s[i]>>6);
854
		*(p+1) = 0x80 | (s[i] & 0x3f);
855
		p+=2;
856
	}
857
 
858
	sb->p[length] = 0;
859
	sb->fill = length+1;
860
}
861
 
862
/*
863
	Check if we have a byte oder mark(s) there, return:
864
	-1: little endian
865
	 0: no BOM
866
	 1: big endian
867
 
868
	This modifies source and len to indicate the data _after_ the BOM(s).
3960 Serge 869
	Note on nasty data: The last encountered BOM determines the endianness.
1905 serge 870
	I have seen data with multiple BOMS, namely from "the" id3v2 program.
871
	Not nice, but what should I do?
872
*/
873
static int check_bom(const unsigned char** source, size_t *len)
874
{
875
	int this_bom    = 0;
876
	int further_bom = 0;
877
 
878
	if(*len < 2) return 0;
879
 
880
	if((*source)[0] == 0xff && (*source)[1] == 0xfe)
881
	this_bom = -1;
882
 
883
	if((*source)[0] == 0xfe && (*source)[1] == 0xff)
884
	this_bom = 1;
885
 
886
	/* Skip the detected BOM. */
887
	if(this_bom != 0)
888
	{
889
		*source += 2;
890
		*len    -= 2;
891
		/* Check for following BOMs. The last one wins! */
892
		further_bom = check_bom(source, len);
893
		if(further_bom == 0) return this_bom; /* End of the recursion. */
894
		else                 return further_bom;
895
	}
896
	else return 0;
897
}
898
 
899
#define FULLPOINT(f,s) ( (((f)&0x3ff)<<10) + ((s)&0x3ff) + 0x10000 )
900
/* Remember: There's a limit at 0x1ffff. */
901
#define UTF8LEN(x) ( (x)<0x80 ? 1 : ((x)<0x800 ? 2 : ((x)<0x10000 ? 3 : 4)))
902
static void convert_utf16bom(mpg123_string *sb, const unsigned char* s, size_t l, const int noquiet)
903
{
904
	size_t i;
905
	size_t n; /* number bytes that make up full pairs */
906
	unsigned char *p;
907
	size_t length = 0; /* the resulting UTF-8 length */
908
	/* Determine real length... extreme case can be more than utf-16 length. */
909
	size_t high = 0;
910
	size_t low  = 1;
911
	int bom_endian;
912
 
913
	debug1("convert_utf16 with length %lu", (unsigned long)l);
914
 
915
	bom_endian = check_bom(&s, &l);
3960 Serge 916
	debug1("UTF16 endianness check: %i", bom_endian);
1905 serge 917
 
918
	if(bom_endian == -1) /* little-endian */
919
	{
920
		high = 1; /* The second byte is the high byte. */
921
		low  = 0; /* The first byte is the low byte. */
922
	}
923
 
924
	n = (l/2)*2; /* number bytes that make up full pairs */
925
 
926
	/* first: get length, check for errors -- stop at first one */
927
	for(i=0; i < n; i+=2)
928
	{
929
		unsigned long point = ((unsigned long) s[i+high]<<8) + s[i+low];
930
		if((point & 0xd800) == 0xd800) /* lead surrogate */
931
		{
932
			unsigned short second = (i+3 < l) ? (s[i+2+high]<<8) + s[i+2+low] : 0;
933
			if((second & 0xdc00) == 0xdc00) /* good... */
934
			{
935
				point = FULLPOINT(point,second);
936
				length += UTF8LEN(point); /* possibly 4 bytes */
937
				i+=2; /* We overstepped one word. */
938
			}
939
			else /* if no valid pair, break here */
940
			{
941
				if(noquiet) error2("Invalid UTF16 surrogate pair at %li (0x%04lx).", (unsigned long)i, point);
942
				n = i; /* Forget the half pair, END! */
943
				break;
944
			}
945
		}
946
		else length += UTF8LEN(point); /* 1,2 or 3 bytes */
947
	}
948
 
949
	if(!mpg123_resize_string(sb, length+1)){ mpg123_free_string(sb); return ; }
950
 
951
	/* Now really convert, skip checks as these have been done just before. */
952
	p = (unsigned char*) sb->p; /* Signedness doesn't matter but it shows I thought about the non-issue */
953
	for(i=0; i < n; i+=2)
954
	{
955
		unsigned long codepoint = ((unsigned long) s[i+high]<<8) + s[i+low];
956
		if((codepoint & 0xd800) == 0xd800) /* lead surrogate */
957
		{
958
			unsigned short second = (s[i+2+high]<<8) + s[i+2+low];
959
			codepoint = FULLPOINT(codepoint,second);
960
			i+=2; /* We overstepped one word. */
961
		}
962
		if(codepoint < 0x80) *p++ = (unsigned char) codepoint;
963
		else if(codepoint < 0x800)
964
		{
965
			*p++ = (unsigned char) (0xc0 | (codepoint>>6));
966
			*p++ = (unsigned char) (0x80 | (codepoint & 0x3f));
967
		}
968
		else if(codepoint < 0x10000)
969
		{
970
			*p++ = (unsigned char) (0xe0 | (codepoint>>12));
971
			*p++ = 0x80 | ((codepoint>>6) & 0x3f);
972
			*p++ = 0x80 | (codepoint & 0x3f);
973
		}
974
		else if (codepoint < 0x200000)
975
		{
976
			*p++ = (unsigned char) (0xf0 | codepoint>>18);
977
			*p++ = (unsigned char) (0x80 | ((codepoint>>12) & 0x3f));
978
			*p++ = (unsigned char) (0x80 | ((codepoint>>6) & 0x3f));
979
			*p++ = (unsigned char) (0x80 | (codepoint & 0x3f));
980
		} /* ignore bigger ones (that are not possible here anyway) */
981
	}
982
	sb->p[sb->size-1] = 0; /* paranoia... */
983
	sb->fill = sb->size;
984
}
985
#undef UTF8LEN
986
#undef FULLPOINT
987
 
988
static void convert_utf8(mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet)
989
{
990
	if(mpg123_resize_string(sb, len+1))
991
	{
992
		memcpy(sb->p, source, len);
993
		sb->p[len] = 0;
994
		sb->fill = len+1;
995
	}
996
	else mpg123_free_string(sb);
997
}
998
 
999
#endif