WebSVN – Kolibri OS – Blame – /programs/develop/libraries/libmpg123/id3.c

Rev	Author	Line No.	Line
1905	serge	1	/*
		2	id3: ID3v2.3 and ID3v2.4 parsing (a relevant subset)
		3
		4	copyright 2006-2008 by the mpg123 project - free software under the terms of the LGPL 2.1
		5	see COPYING and AUTHORS files in distribution or http://mpg123.org
		6	initially written by Thomas Orgis
		7	*/
		8
		9	#include "mpg123lib_intern.h"
		10	#include "id3.h"
		11	#include "debug.h"
		12
		13	#ifndef NO_ID3V2 /* Only the main parsing routine will always be there. */
		14
		15	/* We know the usual text frames plus some specifics. */
		16	#define KNOWN_FRAMES 4
		17	static const char frame_type[KNOWN_FRAMES][5] = { "COMM", "TXXX", "RVA2", "USLT" };
		18	enum frame_types { unknown = -2, text = -1, comment, extra, rva2, uslt };
		19
		20	/* UTF support definitions */
		21
		22	typedef void (text_converter)(mpg123_string sb, const unsigned char* source, size_t len, const int noquiet);
		23
		24	static void convert_latin1 (mpg123_string sb, const unsigned char source, size_t len, const int noquiet);
		25	static void convert_utf16bom(mpg123_string sb, const unsigned char source, size_t len, const int noquiet);
		26	static void convert_utf8 (mpg123_string sb, const unsigned char source, size_t len, const int noquiet);
		27
		28	static const text_converter text_converters[4] =
		29	{
		30	convert_latin1,
		31	/* We always check for (multiple) BOM in 16bit unicode. Without BOM, UTF16 BE is the default.
		32	Errors in encoding are detected anyway. */
		33	convert_utf16bom,
		34	convert_utf16bom,
		35	convert_utf8
		36	};
		37
3960	Serge	38	static const unsigned int encoding_widths[4] = { 1, 2, 2, 1 };
1905	serge	39
		40	/* the code starts here... */
		41
		42	static void null_id3_links(mpg123_handle *fr)
		43	{
		44	fr->id3v2.title = NULL;
		45	fr->id3v2.artist = NULL;
		46	fr->id3v2.album = NULL;
		47	fr->id3v2.year = NULL;
		48	fr->id3v2.genre = NULL;
		49	fr->id3v2.comment = NULL;
		50	}
		51
		52	void init_id3(mpg123_handle *fr)
		53	{
		54	fr->id3v2.version = 0; /* nothing there */
		55	null_id3_links(fr);
		56	fr->id3v2.comments = 0;
		57	fr->id3v2.comment_list = NULL;
		58	fr->id3v2.texts = 0;
		59	fr->id3v2.text = NULL;
		60	fr->id3v2.extras = 0;
		61	fr->id3v2.extra = NULL;
		62	}
		63
		64	/* Managing of the text, comment and extra lists. */
		65
		66	/* Initialize one element. */
		67	static void init_mpg123_text(mpg123_text *txt)
		68	{
		69	mpg123_init_string(&txt->text);
		70	mpg123_init_string(&txt->description);
		71	txt->id[0] = 0;
		72	txt->id[1] = 0;
		73	txt->id[2] = 0;
		74	txt->id[3] = 0;
		75	txt->lang[0] = 0;
		76	txt->lang[1] = 0;
		77	txt->lang[2] = 0;
		78	}
		79
		80	/* Free memory of one element. */
		81	static void free_mpg123_text(mpg123_text *txt)
		82	{
		83	mpg123_free_string(&txt->text);
		84	mpg123_free_string(&txt->description);
		85	}
		86
		87	/* Free memory of whole list. */
		88	#define free_comment(mh) free_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments))
		89	#define free_text(mh) free_id3_text(&((mh)->id3v2.text), &((mh)->id3v2.texts))
		90	#define free_extra(mh) free_id3_text(&((mh)->id3v2.extra), &((mh)->id3v2.extras))
		91	static void free_id3_text(mpg123_text *list, size_t size)
		92	{
		93	size_t i;
		94	for(i=0; i<size; ++i) free_mpg123_text(&((list)[i]));
		95
		96	free(*list);
		97	*list = NULL;
		98	*size = 0;
		99	}
		100
		101	/* Add items to the list. */
		102	#define add_comment(mh) add_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments))
		103	#define add_text(mh) add_id3_text(&((mh)->id3v2.text), &((mh)->id3v2.texts))
		104	#define add_extra(mh) add_id3_text(&((mh)->id3v2.extra), &((mh)->id3v2.extras))
		105	static mpg123_text add_id3_text(mpg123_text list, size_t size)
		106	{
		107	mpg123_text x = safe_realloc(list, sizeof(mpg123_text)(size+1));
		108	if(x == NULL) return NULL; /* bad */
		109
		110	*list = x;
		111	*size += 1;
		112	init_mpg123_text(&((list)[size-1]));
		113
		114	return &((list)[size-1]); /* Return pointer to the added text. */
		115	}
		116
		117	/* Remove the last item. */
		118	#define pop_comment(mh) pop_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments))
		119	#define pop_text(mh) pop_id3_text(&((mh)->id3v2.text), &((mh)->id3v2.texts))
		120	#define pop_extra(mh) pop_id3_text(&((mh)->id3v2.extra), &((mh)->id3v2.extras))
		121	static void pop_id3_text(mpg123_text *list, size_t size)
		122	{
		123	mpg123_text *x;
		124	if(*size < 1) return;
		125
		126	free_mpg123_text(&((list)[size-1]));
		127	if(*size > 1)
		128	{
		129	x = safe_realloc(list, sizeof(mpg123_text)(*size-1));
		130	if(x != NULL){ list = x; size -= 1; }
		131	}
		132	else
		133	{
		134	free(*list);
		135	*list = NULL;
		136	*size = 0;
		137	}
		138	}
		139
		140	/* OK, back t the higher level functions. */
		141
		142	void exit_id3(mpg123_handle *fr)
		143	{
		144	free_comment(fr);
		145	free_extra(fr);
		146	free_text(fr);
		147	}
		148
		149	void reset_id3(mpg123_handle *fr)
		150	{
		151	exit_id3(fr);
		152	init_id3(fr);
		153	}
		154
		155	/* Set the id3v2.artist id3v2.title ... links to elements of the array. */
		156	void id3_link(mpg123_handle *fr)
		157	{
		158	size_t i;
		159	mpg123_id3v2 *v2 = &fr->id3v2;
		160	debug("linking ID3v2");
		161	null_id3_links(fr);
		162	for(i=0; itexts; ++i)
		163	{
		164	mpg123_text *entry = &v2->text[i];
		165	if (!strncmp("TIT2", entry->id, 4)) v2->title = &entry->text;
		166	else if(!strncmp("TALB", entry->id, 4)) v2->album = &entry->text;
		167	else if(!strncmp("TPE1", entry->id, 4)) v2->artist = &entry->text;
		168	else if(!strncmp("TYER", entry->id, 4)) v2->year = &entry->text;
		169	else if(!strncmp("TCON", entry->id, 4)) v2->genre = &entry->text;
		170	}
		171	for(i=0; icomments; ++i)
		172	{
		173	mpg123_text *entry = &v2->comment_list[i];
		174	if(entry->description.fill == 0 \|\| entry->description.p[0] == 0)
		175	v2->comment = &entry->text;
		176	}
		177	/* When no generic comment found, use the last non-generic one. */
		178	if(v2->comment == NULL && v2->comments > 0)
		179	v2->comment = &v2->comment_list[v2->comments-1].text;
		180	}
		181
		182	/*
		183	Store ID3 text data in an mpg123_string; either verbatim copy or everything translated to UTF-8 encoding.
		184	Preserve the zero string separator (I don't need strlen for the total size).
		185
		186	ID3v2 standard says that there should be one text frame of specific type per tag, and subsequent tags overwrite old values.
		187	So, I always replace the text that may be stored already (perhaps with a list of zero-separated strings, though).
		188	*/
3960	Serge	189	static void store_id3_text(mpg123_string sb, char source, size_t source_size, const int noquiet, const int notranslate)
1905	serge	190	{
		191	if(!source_size)
		192	{
		193	debug("Empty id3 data!");
		194	return;
		195	}
		196
		197	/* We shall just copy the data. Client wants to decode itself. */
		198	if(notranslate)
		199	{
		200	/* Future: Add a path for ID3 errors. */
		201	if(!mpg123_resize_string(sb, source_size))
		202	{
		203	if(noquiet) error("Cannot resize target string, out of memory?");
		204	return;
		205	}
		206	memcpy(sb->p, source, source_size);
		207	sb->fill = source_size;
		208	debug1("stored undecoded ID3 text of size %"SIZE_P, (size_p)source_size);
		209	return;
		210	}
		211
		212	id3_to_utf8(sb, ((unsigned char )source)[0], (unsigned char)source+1, source_size-1, noquiet);
		213
		214	if(sb->fill) debug1("UTF-8 string (the first one): %s", sb->p);
		215	else if(noquiet) error("unable to convert string to UTF-8 (out of memory, junk input?)!");
		216	}
		217
		218	/* On error, sb->size is 0. */
		219	void id3_to_utf8(mpg123_string sb, unsigned char encoding, const unsigned char source, size_t source_size, int noquiet)
		220	{
		221	unsigned int bwidth;
		222	debug1("encoding: %u", encoding);
		223	/* A note: ID3v2.3 uses UCS-2 non-variable 16bit encoding, v2.4 uses UTF16.
		224	UTF-16 uses a reserved/private range in UCS-2 to add the magic, so we just always treat it as UTF. */
		225	if(encoding > mpg123_id3_enc_max)
		226	{
		227	if(noquiet) error1("Unknown text encoding %u, I take no chances, sorry!", encoding);
		228
		229	mpg123_free_string(sb);
		230	return;
		231	}
		232	bwidth = encoding_widths[encoding];
		233	/* Hack! I've seen a stray zero byte before BOM. Is that supposed to happen? */
		234	if(encoding != mpg123_id3_utf16be) /* UTF16be _can_ beging with a null byte! */
		235	while(source_size > bwidth && source[0] == 0)
		236	{
		237	--source_size;
		238	++source;
		239	debug("skipped leading zero");
		240	}
		241	if(source_size % bwidth)
		242	{
		243	/* When we need two bytes for a character, it's strange to have an uneven bytestream length. */
		244	if(noquiet) warning2("Weird tag size %d for encoding %u - I will probably trim too early or something but I think the MP3 is broken.", (int)source_size, encoding);
		245	source_size -= source_size % bwidth;
		246	}
		247	text_converters[encoding](sb, source, source_size, noquiet);
		248	}
		249
3960	Serge	250	static char next_text(char prev, int encoding, size_t limit)
1905	serge	251	{
		252	char *text = prev;
		253	size_t width = encoding_widths[encoding];
		254
		255	/* So I go lengths to find zero or double zero...
		256	Remember bug 2834636: Only check for aligned NULLs! */
		257	while(text-prev < (ssize_t)limit)
		258	{
		259	if(text[0] == 0)
		260	{
		261	if(width <= limit-(text-prev))
		262	{
		263	size_t i = 1;
		264	for(; i
		265
		266	if(i == width) /* found a null wide enough! */
		267	{
		268	text += width;
		269	break;
		270	}
		271	}
		272	else return NULL; /* No full character left? This text is broken */
		273	}
		274
		275	text += width;
		276	}
3960	Serge	277	if((size_t)(text-prev) >= limit) text = NULL;
1905	serge	278
		279	return text;
		280	}
		281
		282	static const char *enc_name(int enc)
		283	{
		284	switch(enc)
		285	{
		286	case 0: return "Latin 1";
		287	case 1: return "UTF-16 BOM";
		288	case 2: return "UTF-16 BE";
		289	case 3: return "UTF-8";
		290	default: return "unknown!";
		291	}
		292	}
		293
		294	static void process_text(mpg123_handle fr, char realdata, size_t realsize, char *id)
		295	{
		296	/* Text encoding $xx */
		297	/* The text (encoded) ... */
		298	mpg123_text *t = add_text(fr);
		299	if(VERBOSE4) fprintf(stderr, "Note: Storing text from %s encoding\n", enc_name(realdata[0]));
		300	if(t == NULL)
		301	{
		302	if(NOQUIET) error("Unable to attach new text!");
		303	return;
		304	}
		305	memcpy(t->id, id, 4);
		306	store_id3_text(&t->text, realdata, realsize, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
		307	if(VERBOSE4) fprintf(stderr, "Note: ID3v2 %c%c%c%c text frame: %s\n", id[0], id[1], id[2], id[3], t->text.p);
		308	}
		309
		310	/* Store a new comment that perhaps is a RVA / RVA_ALBUM/AUDIOPHILE / RVA_MIX/RADIO one
		311	Special gimmik: It also stores USLT to the texts. Stucture is the same as for comments. */
		312	static void process_comment(mpg123_handle fr, enum frame_types tt, char realdata, size_t realsize, int rva_level, char *id)
		313	{
		314	/* Text encoding $xx */
		315	/* Language $xx xx xx */
		316	/* Short description (encoded!) $00 (00) */
		317	/* Then the comment text (encoded) ... */
		318	char encoding = realdata[0];
		319	char lang = realdata+1; / I'll only use the 3 bytes! */
		320	char *descr = realdata+4;
		321	char *text = NULL;
		322	mpg123_text *xcom = NULL;
		323	mpg123_text localcom; /* UTF-8 variant for local processing. */
		324
		325	if((int)realsize < descr-realdata)
		326	{
		327	if(NOQUIET) error1("Invalid frame size of %lu (too small for anything).", (unsigned long)realsize);
		328	return;
		329	}
		330	xcom = (tt == uslt ? add_text(fr) : add_comment(fr));
		331	if(VERBOSE4) fprintf(stderr, "Note: Storing comment from %s encoding\n", enc_name(realdata[0]));
		332	if(xcom == NULL)
		333	{
		334	if(NOQUIET) error("Unable to attach new comment!");
		335	return;
		336	}
		337	memcpy(xcom->lang, lang, 3);
		338	memcpy(xcom->id, id, 4);
		339	/* Now I can abuse a byte from lang for the encoding. */
		340	descr[-1] = encoding;
		341	/* Be careful with finding the end of description, I have to honor encoding here. */
		342	text = next_text(descr, encoding, realsize-(descr-realdata));
		343	if(text == NULL)
		344	{
		345	if(NOQUIET) error("No comment text / valid description?");
		346	pop_comment(fr);
		347	return;
		348	}
		349
		350	init_mpg123_text(&localcom);
		351	/* Store the text, without translation to UTF-8, but for comments always a local copy in UTF-8.
		352	Reminder: No bailing out from here on without freeing the local comment data! */
		353	store_id3_text(&xcom->description, descr-1, text-descr+1, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
		354	if(tt == comment)
3960	Serge	355	store_id3_text(&localcom.description, descr-1, text-descr+1, NOQUIET, 0);
1905	serge	356
		357	text[-1] = encoding; /* Byte abusal for encoding... */
		358	store_id3_text(&xcom->text, text-1, realsize+1-(text-realdata), NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
		359	/* Remember: I will probably decode the above (again) for rva comment checking. So no messing around, please. */
		360
		361	if(VERBOSE4) /* Do _not_ print the verbatim text: The encoding might be funny! */
		362	{
		363	fprintf(stderr, "Note: ID3 comm/uslt desc of length %"SIZE_P".\n", (size_p)xcom->description.fill);
		364	fprintf(stderr, "Note: ID3 comm/uslt text of length %"SIZE_P".\n", (size_p)xcom->text.fill);
		365	}
		366	/* Look out for RVA info only when we really deal with a straight comment. */
		367	if(tt == comment && localcom.description.fill > 0)
		368	{
		369	int rva_mode = -1; /* mix / album */
		370	if( !strcasecmp(localcom.description.p, "rva")
		371	\|\| !strcasecmp(localcom.description.p, "rva_mix")
		372	\|\| !strcasecmp(localcom.description.p, "rva_track")
		373	\|\| !strcasecmp(localcom.description.p, "rva_radio") )
		374	rva_mode = 0;
		375	else if( !strcasecmp(localcom.description.p, "rva_album")
		376	\|\| !strcasecmp(localcom.description.p, "rva_audiophile")
		377	\|\| !strcasecmp(localcom.description.p, "rva_user") )
		378	rva_mode = 1;
		379	if((rva_mode > -1) && (fr->rva.level[rva_mode] <= rva_level))
		380	{
		381	/* Only translate the contents in here where we really need them. */
3960	Serge	382	store_id3_text(&localcom.text, text-1, realsize+1-(text-realdata), NOQUIET, 0);
1905	serge	383	if(localcom.text.fill > 0)
		384	{
		385	fr->rva.gain[rva_mode] = (float) atof(localcom.text.p);
		386	if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]);
		387	fr->rva.peak[rva_mode] = 0;
		388	fr->rva.level[rva_mode] = rva_level;
		389	}
		390	}
		391	}
		392	/* Make sure to free the local memory... */
		393	free_mpg123_text(&localcom);
		394	}
		395
3960	Serge	396	static void process_extra(mpg123_handle fr, char realdata, size_t realsize, int rva_level, char *id)
1905	serge	397	{
		398	/* Text encoding $xx */
		399	/* Description ... $00 (00) */
		400	/* Text ... */
		401	char encoding = realdata[0];
		402	char descr = realdata+1; / remember, the encoding is descr[-1] */
		403	char *text;
		404	mpg123_text *xex;
		405	mpg123_text localex;
		406
		407	if((int)realsize < descr-realdata)
		408	{
		409	if(NOQUIET) error1("Invalid frame size of %lu (too small for anything).", (unsigned long)realsize);
		410	return;
		411	}
		412	text = next_text(descr, encoding, realsize-(descr-realdata));
		413	if(VERBOSE4) fprintf(stderr, "Note: Storing extra from %s encoding\n", enc_name(realdata[0]));
		414	if(text == NULL)
		415	{
		416	if(NOQUIET) error("No extra frame text / valid description?");
		417	return;
		418	}
		419	xex = add_extra(fr);
		420	if(xex == NULL)
		421	{
		422	if(NOQUIET) error("Unable to attach new extra text!");
		423	return;
		424	}
		425	memcpy(xex->id, id, 4);
		426	init_mpg123_text(&localex); /* For our local copy. */
3960	Serge	427
		428	/* The outside storage gets reencoded to UTF-8 only if not requested otherwise.
		429	Remember that we really need the -1 here to hand in the encoding byte!*/
1905	serge	430	store_id3_text(&xex->description, descr-1, text-descr+1, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
3960	Serge	431	/* Our local copy is always stored in UTF-8! */
		432	store_id3_text(&localex.description, descr-1, text-descr+1, NOQUIET, 0);
		433	/* At first, only store the outside copy of the payload. We may not need the local copy. */
1905	serge	434	text[-1] = encoding;
		435	store_id3_text(&xex->text, text-1, realsize-(text-realdata)+1, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
3960	Serge	436
1905	serge	437	/* Now check if we would like to interpret this extra info for RVA. */
		438	if(localex.description.fill > 0)
		439	{
		440	int is_peak = 0;
		441	int rva_mode = -1; /* mix / album */
		442
		443	if(!strncasecmp(localex.description.p, "replaygain_track_",17))
		444	{
		445	if(VERBOSE3) fprintf(stderr, "Note: RVA ReplayGain track gain/peak\n");
		446
		447	rva_mode = 0;
		448	if(!strcasecmp(localex.description.p, "replaygain_track_peak")) is_peak = 1;
		449	else if(strcasecmp(localex.description.p, "replaygain_track_gain")) rva_mode = -1;
		450	}
		451	else
		452	if(!strncasecmp(localex.description.p, "replaygain_album_",17))
		453	{
		454	if(VERBOSE3) fprintf(stderr, "Note: RVA ReplayGain album gain/peak\n");
		455
		456	rva_mode = 1;
		457	if(!strcasecmp(localex.description.p, "replaygain_album_peak")) is_peak = 1;
		458	else if(strcasecmp(localex.description.p, "replaygain_album_gain")) rva_mode = -1;
		459	}
		460	if((rva_mode > -1) && (fr->rva.level[rva_mode] <= rva_level))
		461	{
		462	/* Now we need the translated copy of the data. */
3960	Serge	463	store_id3_text(&localex.text, text-1, realsize-(text-realdata)+1, NOQUIET, 0);
1905	serge	464	if(localex.text.fill > 0)
		465	{
		466	if(is_peak)
		467	{
		468	fr->rva.peak[rva_mode] = (float) atof(localex.text.p);
		469	if(VERBOSE3) fprintf(stderr, "Note: RVA peak %f\n", fr->rva.peak[rva_mode]);
		470	}
		471	else
		472	{
		473	fr->rva.gain[rva_mode] = (float) atof(localex.text.p);
		474	if(VERBOSE3) fprintf(stderr, "Note: RVA gain %fdB\n", fr->rva.gain[rva_mode]);
		475	}
		476	fr->rva.level[rva_mode] = rva_level;
		477	}
		478	}
		479	}
		480
		481	free_mpg123_text(&localex);
		482	}
		483
		484	/* Make a ID3v2.3+ 4-byte ID from a ID3v2.2 3-byte ID
		485	Note that not all frames survived to 2.4; the mapping goes to 2.3 .
		486	A notable miss is the old RVA frame, which is very unspecific anyway.
		487	This function returns -1 when a not known 3 char ID was encountered, 0 otherwise. */
3960	Serge	488	static int promote_framename(mpg123_handle fr, char id) /* fr because of VERBOSE macros */
1905	serge	489	{
		490	size_t i;
		491	char *old[] =
		492	{
		493	"COM", "TAL", "TBP", "TCM", "TCO", "TCR", "TDA", "TDY", "TEN", "TFT",
		494	"TIM", "TKE", "TLA", "TLE", "TMT", "TOA", "TOF", "TOL", "TOR", "TOT",
		495	"TP1", "TP2", "TP3", "TP4", "TPA", "TPB", "TRC", "TDA", "TRK", "TSI",
		496	"TSS", "TT1", "TT2", "TT3", "TXT", "TXX", "TYE"
		497	};
		498	char *new[] =
		499	{
		500	"COMM", "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDAT", "TDLY", "TENC", "TFLT",
		501	"TIME", "TKEY", "TLAN", "TLEN", "TMED", "TOPE", "TOFN", "TOLY", "TORY", "TOAL",
		502	"TPE1", "TPE2", "TPE3", "TPE4", "TPOS", "TPUB", "TSRC", "TRDA", "TRCK", "TSIZ",
		503	"TSSE", "TIT1", "TIT2", "TIT3", "TEXT", "TXXX", "TYER"
		504	};
		505	for(i=0; i
		506	{
		507	if(!strncmp(id, old[i], 3))
		508	{
		509	memcpy(id, new[i], 4);
		510	if(VERBOSE3) fprintf(stderr, "Translated ID3v2.2 frame %s to %s\n", old[i], new[i]);
		511	return 0;
		512	}
		513	}
		514	if(VERBOSE3) fprintf(stderr, "Ignoring untranslated ID3v2.2 frame %c%c%c\n", id[0], id[1], id[2]);
		515	return -1;
		516	}
		517
		518	#endif /* NO_ID3V2 */
		519
		520	/*
		521	trying to parse ID3v2.3 and ID3v2.4 tags...
		522
		523	returns: 0: bad or just unparseable tag
		524	1: good, (possibly) new tag info
		525	<0: reader error (may need more data feed, try again)
		526	*/
		527	int parse_new_id3(mpg123_handle *fr, unsigned long first4bytes)
		528	{
		529	#define UNSYNC_FLAG 128
		530	#define EXTHEAD_FLAG 64
		531	#define EXP_FLAG 32
		532	#define FOOTER_FLAG 16
		533	#define UNKNOWN_FLAGS 15 /* 00001111*/
		534	unsigned char buf[6];
		535	unsigned long length=0;
		536	unsigned char flags = 0;
		537	int ret = 1;
		538	int ret2;
		539	unsigned char major = first4bytes & 0xff;
		540	debug1("ID3v2: major tag version: %i", major);
		541	if(major == 0xff) return 0; /* Invalid... */
		542	if((ret2 = fr->rd->read_frame_body(fr, buf, 6)) < 0) /* read more header information */
		543	return ret2;
		544
		545	if(buf[0] == 0xff) return 0; /* Revision, will never be 0xff. */
		546
		547	/* second new byte are some nice flags, if these are invalid skip the whole thing */
		548	flags = buf[1];
		549	debug1("ID3v2: flags 0x%08x", flags);
		550	/* use 4 bytes from buf to construct 28bit uint value and return 1; return 0 if bytes are not synchsafe */
		551	#define synchsafe_to_long(buf,res) \
		552	( \
		553	(((buf)[0]\|(buf)[1]\|(buf)[2]\|(buf)[3]) & 0x80) ? 0 : \
		554	(res = (((unsigned long) (buf)[0]) << 21) \
		555	\| (((unsigned long) (buf)[1]) << 14) \
		556	\| (((unsigned long) (buf)[2]) << 7) \
		557	\| ((unsigned long) (buf)[3]) \
		558	,1) \
		559	)
		560	/* id3v2.3 does not store synchsafe frame sizes, but synchsafe tag size - doh! */
		561	#define bytes_to_long(buf,res) \
		562	( \
		563	major == 3 ? \
		564	(res = (((unsigned long) (buf)[0]) << 24) \
		565	\| (((unsigned long) (buf)[1]) << 16) \
		566	\| (((unsigned long) (buf)[2]) << 8) \
		567	\| ((unsigned long) (buf)[3]) \
		568	,1) : synchsafe_to_long(buf,res) \
		569	)
		570	/* for id3v2.2 only */
		571	#define threebytes_to_long(buf,res) \
		572	( \
		573	res = (((unsigned long) (buf)[0]) << 16) \
		574	\| (((unsigned long) (buf)[1]) << 8) \
		575	\| ((unsigned long) (buf)[2]) \
		576	,1 \
		577	)
		578
		579	/* length-10 or length-20 (footer present); 4 synchsafe integers == 28 bit number */
		580	/* we have already read 10 bytes, so left are length or length+10 bytes belonging to tag */
		581	if(!synchsafe_to_long(buf+2,length))
		582	{
		583	if(NOQUIET) error4("Bad tag length (not synchsafe): 0x%02x%02x%02x%02x; You got a bad ID3 tag here.", buf[2],buf[3],buf[4],buf[5]);
		584	return 0;
		585	}
		586	debug1("ID3v2: tag data length %lu", length);
		587	#ifndef NO_ID3V2
		588	if(VERBOSE2) fprintf(stderr,"Note: ID3v2.%i rev %i tag of %lu bytes\n", major, buf[0], length);
		589	/* skip if unknown version/scary flags, parse otherwise */
3960	Serge	590	if(fr->p.flags & MPG123_SKIP_ID3V2 \|\| ((flags & UNKNOWN_FLAGS) \|\| (major > 4) \|\| (major < 2)))
1905	serge	591	{
3960	Serge	592	if(NOQUIET)
		593	{
		594	if(fr->p.flags & MPG123_SKIP_ID3V2)
		595	{
		596	if(VERBOSE3) fprintf(stderr, "Note: Skipping ID3v2 tag per user request.\n");
		597	}
		598	else /* Must be because of scary Tag properties. */
		599	warning2("ID3v2: Won't parse the ID3v2 tag with major version %u and flags 0x%xu - some extra code may be needed", major, flags);
		600	}
1905	serge	601	#endif
		602	if((ret2 = fr->rd->skip_bytes(fr,length)) < 0) /* will not store data in backbuff! */
		603	ret = ret2;
		604	#ifndef NO_ID3V2
		605	}
		606	else
		607	{
3960	Serge	608	unsigned char* tagdata = NULL;
1905	serge	609	fr->id3v2.version = major;
		610	/* try to interpret that beast */
		611	if((tagdata = (unsigned char*) malloc(length+1)) != NULL)
		612	{
		613	debug("ID3v2: analysing frames...");
		614	if((ret2 = fr->rd->read_frame_body(fr,tagdata,length)) > 0)
		615	{
		616	unsigned long tagpos = 0;
		617	debug1("ID3v2: have read at all %lu bytes for the tag now", (unsigned long)length+6);
		618	/* going to apply strlen for strings inside frames, make sure that it doesn't overflow! */
		619	tagdata[length] = 0;
		620	if(flags & EXTHEAD_FLAG)
		621	{
		622	debug("ID3v2: skipping extended header");
		623	if(!bytes_to_long(tagdata, tagpos))
		624	{
		625	ret = 0;
		626	if(NOQUIET) error4("Bad (non-synchsafe) tag offset: 0x%02x%02x%02x%02x", tagdata[0], tagdata[1], tagdata[2], tagdata[3]);
		627	}
		628	}
		629	if(ret > 0)
		630	{
		631	char id[5];
		632	unsigned long framesize;
		633	unsigned long fflags; /* need 16 bits, actually */
		634	id[4] = 0;
		635	/* pos now advanced after ext head, now a frame has to follow */
		636	while(tagpos < length-10) /* I want to read at least a full header */
		637	{
		638	int i = 0;
		639	unsigned long pos = tagpos;
		640	int head_part = fr->id3v2.version == 2 ? 3 : 4; /* bytes of frame title and of framesize value */
		641	/* level 1,2,3 - 0 is info from lame/info tag! */
		642	/* rva tags with ascending significance, then general frames */
		643	enum frame_types tt = unknown;
		644	/* we may have entered the padding zone or any other strangeness: check if we have valid frame id characters */
		645	for(i=0; i< head_part; ++i)
		646	if( !( ((tagdata[tagpos+i] > 47) && (tagdata[tagpos+i] < 58))
		647	\|\| ((tagdata[tagpos+i] > 64) && (tagdata[tagpos+i] < 91)) ) )
		648	{
		649	debug5("ID3v2: real tag data apparently ended after %lu bytes with 0x%02x%02x%02x%02x", tagpos, tagdata[tagpos], tagdata[tagpos+1], tagdata[tagpos+2], tagdata[tagpos+3]);
		650	/* This is no hard error... let's just hope that we got something meaningful already (ret==1 in that case). */
		651	goto tagparse_cleanup; /* Need to escape two loops here. */
		652	}
		653	if(ret > 0)
		654	{
		655	/* 4 or 3 bytes id */
		656	strncpy(id, (char*) tagdata+pos, head_part);
3960	Serge	657	id[head_part] = 0; /* terminate for 3 or 4 bytes */
1905	serge	658	pos += head_part;
		659	tagpos += head_part;
		660	/* size as 32 bits or 28 bits */
		661	if(fr->id3v2.version == 2) threebytes_to_long(tagdata+pos, framesize);
		662	else
		663	if(!bytes_to_long(tagdata+pos, framesize))
		664	{
		665	/* Just assume that up to now there was some good data. */
		666	if(NOQUIET) error1("ID3v2: non-syncsafe size of %s frame, skipping the remainder of tag", id);
		667	break;
		668	}
		669	if(VERBOSE3) fprintf(stderr, "Note: ID3v2 %s frame of size %lu\n", id, framesize);
		670	tagpos += head_part + framesize; /* the important advancement in whole tag */
		671	if(tagpos > length)
		672	{
		673	if(NOQUIET) error("Whoa! ID3v2 frame claims to be larger than the whole rest of the tag.");
		674	break;
		675	}
		676	pos += head_part;
		677	if(fr->id3v2.version > 2)
		678	{
		679	fflags = (((unsigned long) tagdata[pos]) << 8) \| ((unsigned long) tagdata[pos+1]);
		680	pos += 2;
		681	tagpos += 2;
		682	}
		683	else fflags = 0;
		684	/* for sanity, after full parsing tagpos should be == pos */
		685	/* debug4("ID3v2: found %s frame, size %lu (as bytes: 0x%08lx), flags 0x%016lx", id, framesize, framesize, fflags); */
		686	/* %0abc0000 %0h00kmnp */
		687	#define BAD_FFLAGS (unsigned long) 36784
		688	#define PRES_TAG_FFLAG 16384
		689	#define PRES_FILE_FFLAG 8192
		690	#define READ_ONLY_FFLAG 4096
		691	#define GROUP_FFLAG 64
		692	#define COMPR_FFLAG 8
		693	#define ENCR_FFLAG 4
		694	#define UNSYNC_FFLAG 2
		695	#define DATLEN_FFLAG 1
		696	if(head_part < 4 && promote_framename(fr, id) != 0) continue;
		697
		698	/* shall not or want not handle these */
		699	if(fflags & (BAD_FFLAGS \| COMPR_FFLAG \| ENCR_FFLAG))
		700	{
		701	if(NOQUIET) warning("ID3v2: skipping invalid/unsupported frame");
		702	continue;
		703	}
		704
		705	for(i = 0; i < KNOWN_FRAMES; ++i)
		706	if(!strncmp(frame_type[i], id, 4)){ tt = i; break; }
		707
		708	if(id[0] == 'T' && tt != extra) tt = text;
		709
		710	if(tt != unknown)
		711	{
		712	int rva_mode = -1; /* mix / album */
		713	unsigned long realsize = framesize;
		714	unsigned char* realdata = tagdata+pos;
		715	if((flags & UNSYNC_FLAG) \|\| (fflags & UNSYNC_FFLAG))
		716	{
		717	unsigned long ipos = 0;
		718	unsigned long opos = 0;
		719	debug("Id3v2: going to de-unsync the frame data");
		720	/* de-unsync: FF00 -> FF; real FF00 is simply represented as FF0000 ... */
		721	/* damn, that means I have to delete bytes from withing the data block... thus need temporal storage */
		722	/* standard mandates that de-unsync should always be safe if flag is set */
		723	realdata = (unsigned char) malloc(framesize); / will need <= bytes */
		724	if(realdata == NULL)
		725	{
		726	if(NOQUIET) error("ID3v2: unable to allocate working buffer for de-unsync");
		727	continue;
		728	}
		729	/* now going byte per byte through the data... */
		730	realdata[0] = tagdata[pos];
		731	opos = 1;
		732	for(ipos = pos+1; ipos < pos+framesize; ++ipos)
		733	{
		734	if(!((tagdata[ipos] == 0) && (tagdata[ipos-1] == 0xff)))
		735	{
		736	realdata[opos++] = tagdata[ipos];
		737	}
		738	}
		739	realsize = opos;
		740	debug2("ID3v2: de-unsync made %lu out of %lu bytes", realsize, framesize);
		741	}
		742	pos = 0; /* now at the beginning again... */
		743	switch(tt)
		744	{
		745	case comment:
		746	case uslt:
		747	process_comment(fr, tt, (char*)realdata, realsize, comment+1, id);
		748	break;
		749	case extra: /* perhaps foobar2000's work */
		750	process_extra(fr, (char*)realdata, realsize, extra+1, id);
		751	break;
		752	case rva2: /* "the" RVA tag */
		753	{
		754	/* starts with null-terminated identification */
		755	if(VERBOSE3) fprintf(stderr, "Note: RVA2 identification \"%s\"\n", realdata);
		756	/* default: some individual value, mix mode */
		757	rva_mode = 0;
		758	if( !strncasecmp((char*)realdata, "album", 5)
		759	\|\| !strncasecmp((char*)realdata, "audiophile", 10)
		760	\|\| !strncasecmp((char*)realdata, "user", 4))
		761	rva_mode = 1;
		762	if(fr->rva.level[rva_mode] <= rva2+1)
		763	{
		764	pos += strlen((char*) realdata) + 1;
		765	if(realdata[pos] == 1)
		766	{
		767	++pos;
		768	/* only handle master channel */
		769	debug("ID3v2: it is for the master channel");
		770	/* two bytes adjustment, one byte for bits representing peak - n bytes, eh bits, for peak */
		771	/* 16 bit signed integer = dB * 512 ... the double cast is needed to preserve the sign of negative values! */
		772	fr->rva.gain[rva_mode] = (float) ( (((short)((signed char)realdata[pos])) << 8) \| realdata[pos+1] ) / 512;
		773	pos += 2;
		774	if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]);
		775	/* heh, the peak value is represented by a number of bits - but in what manner? Skipping that part */
		776	fr->rva.peak[rva_mode] = 0;
		777	fr->rva.level[rva_mode] = rva2+1;
		778	}
		779	}
		780	}
		781	break;
		782	/* non-rva metainfo, simply store... */
		783	case text:
		784	process_text(fr, (char*)realdata, realsize, id);
		785	break;
		786	default: if(NOQUIET) error1("ID3v2: unknown frame type %i", tt);
		787	}
		788	if((flags & UNSYNC_FLAG) \|\| (fflags & UNSYNC_FFLAG)) free(realdata);
		789	}
		790	#undef BAD_FFLAGS
		791	#undef PRES_TAG_FFLAG
		792	#undef PRES_FILE_FFLAG
		793	#undef READ_ONLY_FFLAG
		794	#undef GROUP_FFLAG
		795	#undef COMPR_FFLAG
		796	#undef ENCR_FFLAG
		797	#undef UNSYNC_FFLAG
		798	#undef DATLEN_FFLAG
		799	}
		800	else break;
		801	#undef KNOWN_FRAMES
		802	}
		803	}
		804	}
		805	else
		806	{
3960	Serge	807	/* There are tags with zero length. Strictly not an error, then. */
		808	if(length > 0 && NOQUIET && ret2 != MPG123_NEED_MORE) error("ID3v2: Duh, not able to read ID3v2 tag data.");
1905	serge	809	ret = ret2;
		810	}
		811	tagparse_cleanup:
		812	free(tagdata);
		813	}
		814	else
		815	{
		816	if(NOQUIET) error1("ID3v2: Arrg! Unable to allocate %lu bytes for interpreting ID3v2 data - trying to skip instead.", length);
		817	if((ret2 = fr->rd->skip_bytes(fr,length)) < 0) ret = ret2; /* will not store data in backbuff! */
		818	else ret = 0;
		819	}
		820	}
		821	#endif /* NO_ID3V2 */
		822	/* skip footer if present */
		823	if((ret > 0) && (flags & FOOTER_FLAG) && ((ret2 = fr->rd->skip_bytes(fr,length)) < 0)) ret = ret2;
		824
		825	return ret;
		826	#undef UNSYNC_FLAG
		827	#undef EXTHEAD_FLAG
		828	#undef EXP_FLAG
		829	#undef FOOTER_FLAG
		830	#undef UNKOWN_FLAGS
		831	}
		832
		833	#ifndef NO_ID3V2 /* Disabling all the rest... */
		834
		835	static void convert_latin1(mpg123_string sb, const unsigned char s, size_t l, const int noquiet)
		836	{
		837	size_t length = l;
		838	size_t i;
		839	unsigned char *p;
		840	/* determine real length, a latin1 character can at most take 2 in UTF8 */
		841	for(i=0; i
		842	if(s[i] >= 0x80) ++length;
		843
		844	debug1("UTF-8 length: %lu", (unsigned long)length);
		845	/* one extra zero byte for paranoia */
		846	if(!mpg123_resize_string(sb, length+1)){ mpg123_free_string(sb); return ; }
		847
		848	p = (unsigned char) sb->p; / Signedness doesn't matter but it shows I thought about the non-issue */
		849	for(i=0; i
		850	if(s[i] < 0x80){ *p = s[i]; ++p; }
		851	else /* two-byte encoding */
		852	{
		853	*p = 0xc0 \| (s[i]>>6);
		854	*(p+1) = 0x80 \| (s[i] & 0x3f);
		855	p+=2;
		856	}
		857
		858	sb->p[length] = 0;
		859	sb->fill = length+1;
		860	}
		861
		862	/*
		863	Check if we have a byte oder mark(s) there, return:
		864	-1: little endian
		865	0: no BOM
		866	1: big endian
		867
		868	This modifies source and len to indicate the data _after_ the BOM(s).
3960	Serge	869	Note on nasty data: The last encountered BOM determines the endianness.
1905	serge	870	I have seen data with multiple BOMS, namely from "the" id3v2 program.
		871	Not nice, but what should I do?
		872	*/
		873	static int check_bom(const unsigned char** source, size_t *len)
		874	{
		875	int this_bom = 0;
		876	int further_bom = 0;
		877
		878	if(*len < 2) return 0;
		879
		880	if((source)[0] == 0xff && (source)[1] == 0xfe)
		881	this_bom = -1;
		882
		883	if((source)[0] == 0xfe && (source)[1] == 0xff)
		884	this_bom = 1;
		885
		886	/* Skip the detected BOM. */
		887	if(this_bom != 0)
		888	{
		889	*source += 2;
		890	*len -= 2;
		891	/* Check for following BOMs. The last one wins! */
		892	further_bom = check_bom(source, len);
		893	if(further_bom == 0) return this_bom; /* End of the recursion. */
		894	else return further_bom;
		895	}
		896	else return 0;
		897	}
		898
		899	#define FULLPOINT(f,s) ( (((f)&0x3ff)<<10) + ((s)&0x3ff) + 0x10000 )
		900	/* Remember: There's a limit at 0x1ffff. */
		901	#define UTF8LEN(x) ( (x)<0x80 ? 1 : ((x)<0x800 ? 2 : ((x)<0x10000 ? 3 : 4)))
		902	static void convert_utf16bom(mpg123_string sb, const unsigned char s, size_t l, const int noquiet)
		903	{
		904	size_t i;
		905	size_t n; /* number bytes that make up full pairs */
		906	unsigned char *p;
		907	size_t length = 0; /* the resulting UTF-8 length */
		908	/* Determine real length... extreme case can be more than utf-16 length. */
		909	size_t high = 0;
		910	size_t low = 1;
		911	int bom_endian;
		912
		913	debug1("convert_utf16 with length %lu", (unsigned long)l);
		914
		915	bom_endian = check_bom(&s, &l);
3960	Serge	916	debug1("UTF16 endianness check: %i", bom_endian);
1905	serge	917
		918	if(bom_endian == -1) /* little-endian */
		919	{
		920	high = 1; /* The second byte is the high byte. */
		921	low = 0; /* The first byte is the low byte. */
		922	}
		923
		924	n = (l/2)2; / number bytes that make up full pairs */
		925
		926	/* first: get length, check for errors -- stop at first one */
		927	for(i=0; i < n; i+=2)
		928	{
		929	unsigned long point = ((unsigned long) s[i+high]<<8) + s[i+low];
		930	if((point & 0xd800) == 0xd800) /* lead surrogate */
		931	{
		932	unsigned short second = (i+3 < l) ? (s[i+2+high]<<8) + s[i+2+low] : 0;
		933	if((second & 0xdc00) == 0xdc00) /* good... */
		934	{
		935	point = FULLPOINT(point,second);
		936	length += UTF8LEN(point); /* possibly 4 bytes */
		937	i+=2; /* We overstepped one word. */
		938	}
		939	else /* if no valid pair, break here */
		940	{
		941	if(noquiet) error2("Invalid UTF16 surrogate pair at %li (0x%04lx).", (unsigned long)i, point);
		942	n = i; /* Forget the half pair, END! */
		943	break;
		944	}
		945	}
		946	else length += UTF8LEN(point); /* 1,2 or 3 bytes */
		947	}
		948
		949	if(!mpg123_resize_string(sb, length+1)){ mpg123_free_string(sb); return ; }
		950
		951	/* Now really convert, skip checks as these have been done just before. */
		952	p = (unsigned char) sb->p; / Signedness doesn't matter but it shows I thought about the non-issue */
		953	for(i=0; i < n; i+=2)
		954	{
		955	unsigned long codepoint = ((unsigned long) s[i+high]<<8) + s[i+low];
		956	if((codepoint & 0xd800) == 0xd800) /* lead surrogate */
		957	{
		958	unsigned short second = (s[i+2+high]<<8) + s[i+2+low];
		959	codepoint = FULLPOINT(codepoint,second);
		960	i+=2; /* We overstepped one word. */
		961	}
		962	if(codepoint < 0x80) *p++ = (unsigned char) codepoint;
		963	else if(codepoint < 0x800)
		964	{
		965	*p++ = (unsigned char) (0xc0 \| (codepoint>>6));
		966	*p++ = (unsigned char) (0x80 \| (codepoint & 0x3f));
		967	}
		968	else if(codepoint < 0x10000)
		969	{
		970	*p++ = (unsigned char) (0xe0 \| (codepoint>>12));
		971	*p++ = 0x80 \| ((codepoint>>6) & 0x3f);
		972	*p++ = 0x80 \| (codepoint & 0x3f);
		973	}
		974	else if (codepoint < 0x200000)
		975	{
		976	*p++ = (unsigned char) (0xf0 \| codepoint>>18);
		977	*p++ = (unsigned char) (0x80 \| ((codepoint>>12) & 0x3f));
		978	*p++ = (unsigned char) (0x80 \| ((codepoint>>6) & 0x3f));
		979	*p++ = (unsigned char) (0x80 \| (codepoint & 0x3f));
		980	} /* ignore bigger ones (that are not possible here anyway) */
		981	}
		982	sb->p[sb->size-1] = 0; /* paranoia... */
		983	sb->fill = sb->size;
		984	}
		985	#undef UTF8LEN
		986	#undef FULLPOINT
		987
		988	static void convert_utf8(mpg123_string sb, const unsigned char source, size_t len, const int noquiet)
		989	{
		990	if(mpg123_resize_string(sb, len+1))
		991	{
		992	memcpy(sb->p, source, len);
		993	sb->p[len] = 0;
		994	sb->fill = len+1;
		995	}
		996	else mpg123_free_string(sb);
		997	}
		998
		999	#endif

Subversion Repositories Kolibri OS

(root)/programs/develop/libraries/libmpg123/id3.c – Rev 3960