WebSVN – Kolibri OS – Blame – /contrib/toolchain/binutils/gas/app.c

Rev	Author	Line No.	Line
5222	serge	1	/* This is the Assembler Pre-Processor
		2	Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
		3	1999, 2000, 2001, 2002, 2003, 2005, 2006, 2007, 2008, 2009, 2010, 2012
		4	Free Software Foundation, Inc.
		5
		6	This file is part of GAS, the GNU Assembler.
		7
		8	GAS is free software; you can redistribute it and/or modify
		9	it under the terms of the GNU General Public License as published by
		10	the Free Software Foundation; either version 3, or (at your option)
		11	any later version.
		12
		13	GAS is distributed in the hope that it will be useful, but WITHOUT
		14	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
		15	or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
		16	License for more details.
		17
		18	You should have received a copy of the GNU General Public License
		19	along with GAS; see the file COPYING. If not, write to the Free
		20	Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
		21	02110-1301, USA. */
		22
		23	/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90. */
		24	/* App, the assembler pre-processor. This pre-processor strips out
		25	excess spaces, turns single-quoted characters into a decimal
		26	constant, and turns the # in # into a
		27	.linefile. This needs better error-handling. */
		28
		29	#include "as.h"
		30
		31	#if (__STDC__ != 1)
		32	#ifndef const
		33	#define const /* empty */
		34	#endif
		35	#endif
		36
		37	#ifdef H_TICK_HEX
		38	int enable_h_tick_hex = 0;
		39	#endif
		40
		41	#ifdef TC_M68K
		42	/* Whether we are scrubbing in m68k MRI mode. This is different from
		43	flag_m68k_mri, because the two flags will be affected by the .mri
		44	pseudo-op at different times. */
		45	static int scrub_m68k_mri;
		46
		47	/* The pseudo-op which switches in and out of MRI mode. See the
		48	comment in do_scrub_chars. */
		49	static const char mri_pseudo[] = ".mri 0";
		50	#else
		51	#define scrub_m68k_mri 0
		52	#endif
		53
		54	#if defined TC_ARM && defined OBJ_ELF
		55	/* The pseudo-op for which we need to special-case `@' characters.
		56	See the comment in do_scrub_chars. */
		57	static const char symver_pseudo[] = ".symver";
		58	static const char * symver_state;
		59	#endif
		60
		61	static char lex[256];
		62	static const char symbol_chars[] =
		63	"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
		64
		65	#define LEX_IS_SYMBOL_COMPONENT 1
		66	#define LEX_IS_WHITESPACE 2
		67	#define LEX_IS_LINE_SEPARATOR 3
		68	#define LEX_IS_COMMENT_START 4
		69	#define LEX_IS_LINE_COMMENT_START 5
		70	#define LEX_IS_TWOCHAR_COMMENT_1ST 6
		71	#define LEX_IS_STRINGQUOTE 8
		72	#define LEX_IS_COLON 9
		73	#define LEX_IS_NEWLINE 10
		74	#define LEX_IS_ONECHAR_QUOTE 11
		75	#ifdef TC_V850
		76	#define LEX_IS_DOUBLEDASH_1ST 12
		77	#endif
		78	#ifdef TC_M32R
		79	#define DOUBLEBAR_PARALLEL
		80	#endif
		81	#ifdef DOUBLEBAR_PARALLEL
		82	#define LEX_IS_DOUBLEBAR_1ST 13
		83	#endif
		84	#define LEX_IS_PARALLEL_SEPARATOR 14
		85	#ifdef H_TICK_HEX
		86	#define LEX_IS_H 15
		87	#endif
		88	#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
		89	#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
		90	#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
		91	#define IS_PARALLEL_SEPARATOR(c) (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
		92	#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
		93	#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
		94	#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
		95
		96	static int process_escape (int);
		97
		98	/* FIXME-soon: The entire lexer/parser thingy should be
		99	built statically at compile time rather than dynamically
		100	each and every time the assembler is run. xoxorich. */
		101
		102	void
		103	do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
		104	{
		105	const char *p;
		106	int c;
		107
		108	lex[' '] = LEX_IS_WHITESPACE;
		109	lex['\t'] = LEX_IS_WHITESPACE;
		110	lex['\r'] = LEX_IS_WHITESPACE;
		111	lex['\n'] = LEX_IS_NEWLINE;
		112	lex[':'] = LEX_IS_COLON;
		113
		114	#ifdef TC_M68K
		115	scrub_m68k_mri = m68k_mri;
		116
		117	if (! m68k_mri)
		118	#endif
		119	{
		120	lex['"'] = LEX_IS_STRINGQUOTE;
		121
		122	#if ! defined (TC_HPPA) && ! defined (TC_I370)
		123	/* I370 uses single-quotes to delimit integer, float constants. */
		124	lex['\''] = LEX_IS_ONECHAR_QUOTE;
		125	#endif
		126
		127	#ifdef SINGLE_QUOTE_STRINGS
		128	lex['\''] = LEX_IS_STRINGQUOTE;
		129	#endif
		130	}
		131
		132	/* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
		133	in state 5 of do_scrub_chars must be changed. */
		134
		135	/* Note that these override the previous defaults, e.g. if ';' is a
		136	comment char, then it isn't a line separator. */
		137	for (p = symbol_chars; *p; ++p)
		138	lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
		139
		140	for (c = 128; c < 256; ++c)
		141	lex[c] = LEX_IS_SYMBOL_COMPONENT;
		142
		143	#ifdef tc_symbol_chars
		144	/* This macro permits the processor to specify all characters which
		145	may appears in an operand. This will prevent the scrubber from
		146	discarding meaningful whitespace in certain cases. The i386
		147	backend uses this to support prefixes, which can confuse the
		148	scrubber as to whether it is parsing operands or opcodes. */
		149	for (p = tc_symbol_chars; *p; ++p)
		150	lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
		151	#endif
		152
		153	/* The m68k backend wants to be able to change comment_chars. */
		154	#ifndef tc_comment_chars
		155	#define tc_comment_chars comment_chars
		156	#endif
		157	for (p = tc_comment_chars; *p; p++)
		158	lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
		159
		160	for (p = line_comment_chars; *p; p++)
		161	lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
		162
		163	for (p = line_separator_chars; *p; p++)
		164	lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
		165
		166	#ifdef tc_parallel_separator_chars
		167	/* This macro permits the processor to specify all characters which
		168	separate parallel insns on the same line. */
		169	for (p = tc_parallel_separator_chars; *p; p++)
		170	lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
		171	#endif
		172
		173	/* Only allow slash-star comments if slash is not in use.
		174	FIXME: This isn't right. We should always permit them. */
		175	if (lex['/'] == 0)
		176	lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
		177
		178	#ifdef TC_M68K
		179	if (m68k_mri)
		180	{
		181	lex['\''] = LEX_IS_STRINGQUOTE;
		182	lex[';'] = LEX_IS_COMMENT_START;
		183	lex['*'] = LEX_IS_LINE_COMMENT_START;
		184	/* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
		185	then it can't be used in an expression. */
		186	lex['!'] = LEX_IS_LINE_COMMENT_START;
		187	}
		188	#endif
		189
		190	#ifdef TC_V850
		191	lex['-'] = LEX_IS_DOUBLEDASH_1ST;
		192	#endif
		193	#ifdef DOUBLEBAR_PARALLEL
		194	lex['\|'] = LEX_IS_DOUBLEBAR_1ST;
		195	#endif
		196	#ifdef TC_D30V
		197	/* Must do this is we want VLIW instruction with "->" or "<-". */
		198	lex['-'] = LEX_IS_SYMBOL_COMPONENT;
		199	#endif
		200
		201	#ifdef H_TICK_HEX
		202	if (enable_h_tick_hex)
		203	{
		204	lex['h'] = LEX_IS_H;
		205	lex['H'] = LEX_IS_H;
		206	}
		207	#endif
		208	}
		209
		210	/* Saved state of the scrubber. */
		211	static int state;
		212	static int old_state;
		213	static char *out_string;
		214	static char out_buf[20];
		215	static int add_newlines;
		216	static char *saved_input;
		217	static size_t saved_input_len;
		218	static char input_buffer[32 * 1024];
		219	static const char *mri_state;
		220	static char mri_last_ch;
		221
		222	/* Data structure for saving the state of app across #include's. Note that
		223	app is called asynchronously to the parsing of the .include's, so our
		224	state at the time .include is interpreted is completely unrelated.
		225	That's why we have to save it all. */
		226
		227	struct app_save
		228	{
		229	int state;
		230	int old_state;
		231	char * out_string;
		232	char out_buf[sizeof (out_buf)];
		233	int add_newlines;
		234	char * saved_input;
		235	size_t saved_input_len;
		236	#ifdef TC_M68K
		237	int scrub_m68k_mri;
		238	#endif
		239	const char * mri_state;
		240	char mri_last_ch;
		241	#if defined TC_ARM && defined OBJ_ELF
		242	const char * symver_state;
		243	#endif
		244	};
		245
		246	char *
		247	app_push (void)
		248	{
		249	register struct app_save *saved;
		250
		251	saved = (struct app_save ) xmalloc (sizeof (saved));
		252	saved->state = state;
		253	saved->old_state = old_state;
		254	saved->out_string = out_string;
		255	memcpy (saved->out_buf, out_buf, sizeof (out_buf));
		256	saved->add_newlines = add_newlines;
		257	if (saved_input == NULL)
		258	saved->saved_input = NULL;
		259	else
		260	{
		261	saved->saved_input = (char *) xmalloc (saved_input_len);
		262	memcpy (saved->saved_input, saved_input, saved_input_len);
		263	saved->saved_input_len = saved_input_len;
		264	}
		265	#ifdef TC_M68K
		266	saved->scrub_m68k_mri = scrub_m68k_mri;
		267	#endif
		268	saved->mri_state = mri_state;
		269	saved->mri_last_ch = mri_last_ch;
		270	#if defined TC_ARM && defined OBJ_ELF
		271	saved->symver_state = symver_state;
		272	#endif
		273
		274	/* do_scrub_begin() is not useful, just wastes time. */
		275
		276	state = 0;
		277	saved_input = NULL;
		278	add_newlines = 0;
		279
		280	return (char *) saved;
		281	}
		282
		283	void
		284	app_pop (char *arg)
		285	{
		286	register struct app_save saved = (struct app_save ) arg;
		287
		288	/* There is no do_scrub_end (). */
		289	state = saved->state;
		290	old_state = saved->old_state;
		291	out_string = saved->out_string;
		292	memcpy (out_buf, saved->out_buf, sizeof (out_buf));
		293	add_newlines = saved->add_newlines;
		294	if (saved->saved_input == NULL)
		295	saved_input = NULL;
		296	else
		297	{
		298	gas_assert (saved->saved_input_len <= sizeof (input_buffer));
		299	memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
		300	saved_input = input_buffer;
		301	saved_input_len = saved->saved_input_len;
		302	free (saved->saved_input);
		303	}
		304	#ifdef TC_M68K
		305	scrub_m68k_mri = saved->scrub_m68k_mri;
		306	#endif
		307	mri_state = saved->mri_state;
		308	mri_last_ch = saved->mri_last_ch;
		309	#if defined TC_ARM && defined OBJ_ELF
		310	symver_state = saved->symver_state;
		311	#endif
		312
		313	free (arg);
		314	}
		315
		316	/* @@ This assumes that \n &c are the same on host and target. This is not
		317	necessarily true. */
		318
		319	static int
		320	process_escape (int ch)
		321	{
		322	switch (ch)
		323	{
		324	case 'b':
		325	return '\b';
		326	case 'f':
		327	return '\f';
		328	case 'n':
		329	return '\n';
		330	case 'r':
		331	return '\r';
		332	case 't':
		333	return '\t';
		334	case '\'':
		335	return '\'';
		336	case '"':
		337	return '\"';
		338	default:
		339	return ch;
		340	}
		341	}
		342
		343	/* This function is called to process input characters. The GET
		344	parameter is used to retrieve more input characters. GET should
		345	set its parameter to point to a buffer, and return the length of
		346	the buffer; it should return 0 at end of file. The scrubbed output
		347	characters are put into the buffer starting at TOSTART; the TOSTART
		348	buffer is TOLEN bytes in length. The function returns the number
		349	of scrubbed characters put into TOSTART. This will be TOLEN unless
		350	end of file was seen. This function is arranged as a state
		351	machine, and saves its state so that it may return at any point.
		352	This is the way the old code used to work. */
		353
		354	size_t
		355	do_scrub_chars (size_t (get) (char , size_t), char *tostart, size_t tolen)
		356	{
		357	char *to = tostart;
		358	char *toend = tostart + tolen;
		359	char *from;
		360	char *fromend;
		361	size_t fromlen;
		362	register int ch, ch2 = 0;
		363	/* Character that started the string we're working on. */
		364	static char quotechar;
		365
		366	/*State 0: beginning of normal line
		367	1: After first whitespace on line (flush more white)
		368	2: After first non-white (opcode) on line (keep 1white)
		369	3: after second white on line (into operands) (flush white)
		370	4: after putting out a .linefile, put out digits
		371	5: parsing a string, then go to old-state
		372	6: putting out \ escape in a "d string.
		373	7: no longer used
		374	8: no longer used
		375	9: After seeing symbol char in state 3 (keep 1white after symchar)
		376	10: After seeing whitespace in state 9 (keep white before symchar)
		377	11: After seeing a symbol character in state 0 (eg a label definition)
		378	-1: output string in out_string and go to the state in old_state
		379	-2: flush text until a '*' '/' is seen, then go to state old_state
		380	#ifdef TC_V850
		381	12: After seeing a dash, looking for a second dash as a start
		382	of comment.
		383	#endif
		384	#ifdef DOUBLEBAR_PARALLEL
		385	13: After seeing a vertical bar, looking for a second
		386	vertical bar as a parallel expression separator.
		387	#endif
		388	#ifdef TC_PREDICATE_START_CHAR
		389	14: After seeing a predicate start character at state 0, looking
		390	for a predicate end character as predicate.
		391	15: After seeing a predicate start character at state 1, looking
		392	for a predicate end character as predicate.
		393	#endif
		394	#ifdef TC_Z80
		395	16: After seeing an 'a' or an 'A' at the start of a symbol
		396	17: After seeing an 'f' or an 'F' in state 16
		397	#endif
		398	*/
		399
		400	/* I added states 9 and 10 because the MIPS ECOFF assembler uses
		401	constructs like ``.loc 1 20''. This was turning into ``.loc
		402	120''. States 9 and 10 ensure that a space is never dropped in
		403	between characters which could appear in an identifier. Ian
		404	Taylor, ian@cygnus.com.
		405
		406	I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
		407	correctly on the PA (and any other target where colons are optional).
		408	Jeff Law, law@cs.utah.edu.
		409
		410	I added state 13 so that something like "cmp r1, r2 \|\| trap #1" does not
		411	get squashed into "cmp r1,r2\|\|trap#1", with the all important space
		412	between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */
		413
		414	/* This macro gets the next input character. */
		415
		416	#define GET() \
		417	(from < fromend \
		418	? * (unsigned char *) (from++) \
		419	: (saved_input = NULL, \
		420	fromlen = (*get) (input_buffer, sizeof input_buffer), \
		421	from = input_buffer, \
		422	fromend = from + fromlen, \
		423	(fromlen == 0 \
		424	? EOF \
		425	: * (unsigned char *) (from++))))
		426
		427	/* This macro pushes a character back on the input stream. */
		428
		429	#define UNGET(uch) (*--from = (uch))
		430
		431	/* This macro puts a character into the output buffer. If this
		432	character fills the output buffer, this macro jumps to the label
		433	TOFULL. We use this rather ugly approach because we need to
		434	handle two different termination conditions: EOF on the input
		435	stream, and a full output buffer. It would be simpler if we
		436	always read in the entire input stream before processing it, but
		437	I don't want to make such a significant change to the assembler's
		438	memory usage. */
		439
		440	#define PUT(pch) \
		441	do \
		442	{ \
		443	*to++ = (pch); \
		444	if (to >= toend) \
		445	goto tofull; \
		446	} \
		447	while (0)
		448
		449	if (saved_input != NULL)
		450	{
		451	from = saved_input;
		452	fromend = from + saved_input_len;
		453	}
		454	else
		455	{
		456	fromlen = (*get) (input_buffer, sizeof input_buffer);
		457	if (fromlen == 0)
		458	return 0;
		459	from = input_buffer;
		460	fromend = from + fromlen;
		461	}
		462
		463	while (1)
		464	{
		465	/* The cases in this switch end with continue, in order to
		466	branch back to the top of this while loop and generate the
		467	next output character in the appropriate state. */
		468	switch (state)
		469	{
		470	case -1:
		471	ch = *out_string++;
		472	if (*out_string == '\0')
		473	{
		474	state = old_state;
		475	old_state = 3;
		476	}
		477	PUT (ch);
		478	continue;
		479
		480	case -2:
		481	for (;;)
		482	{
		483	do
		484	{
		485	ch = GET ();
		486
		487	if (ch == EOF)
		488	{
		489	as_warn (_("end of file in comment"));
		490	goto fromeof;
		491	}
		492
		493	if (ch == '\n')
		494	PUT ('\n');
		495	}
		496	while (ch != '*');
		497
		498	while ((ch = GET ()) == '*')
		499	;
		500
		501	if (ch == EOF)
		502	{
		503	as_warn (_("end of file in comment"));
		504	goto fromeof;
		505	}
		506
		507	if (ch == '/')
		508	break;
		509
		510	UNGET (ch);
		511	}
		512
		513	state = old_state;
		514	UNGET (' ');
		515	continue;
		516
		517	case 4:
		518	ch = GET ();
		519	if (ch == EOF)
		520	goto fromeof;
		521	else if (ch >= '0' && ch <= '9')
		522	PUT (ch);
		523	else
		524	{
		525	while (ch != EOF && IS_WHITESPACE (ch))
		526	ch = GET ();
		527	if (ch == '"')
		528	{
		529	quotechar = ch;
		530	state = 5;
		531	old_state = 3;
		532	PUT (ch);
		533	}
		534	else
		535	{
		536	while (ch != EOF && ch != '\n')
		537	ch = GET ();
		538	state = 0;
		539	PUT (ch);
		540	}
		541	}
		542	continue;
		543
		544	case 5:
		545	/* We are going to copy everything up to a quote character,
		546	with special handling for a backslash. We try to
		547	optimize the copying in the simple case without using the
		548	GET and PUT macros. */
		549	{
		550	char *s;
		551	ptrdiff_t len;
		552
		553	for (s = from; s < fromend; s++)
		554	{
		555	ch = *s;
		556	if (ch == '\\'
		557	\|\| ch == quotechar
		558	\|\| ch == '\n')
		559	break;
		560	}
		561	len = s - from;
		562	if (len > toend - to)
		563	len = toend - to;
		564	if (len > 0)
		565	{
		566	memcpy (to, from, len);
		567	to += len;
		568	from += len;
		569	if (to >= toend)
		570	goto tofull;
		571	}
		572	}
		573
		574	ch = GET ();
		575	if (ch == EOF)
		576	{
		577	/* This buffer is here specifically so
		578	that the UNGET below will work. */
		579	static char one_char_buf[1];
		580
		581	as_warn (_("end of file in string; '%c' inserted"), quotechar);
		582	state = old_state;
		583	from = fromend = one_char_buf + 1;
		584	fromlen = 1;
		585	UNGET ('\n');
		586	PUT (quotechar);
		587	}
		588	else if (ch == quotechar)
		589	{
		590	state = old_state;
		591	PUT (ch);
		592	}
		593	#ifndef NO_STRING_ESCAPES
		594	else if (ch == '\\')
		595	{
		596	state = 6;
		597	PUT (ch);
		598	}
		599	#endif
		600	else if (scrub_m68k_mri && ch == '\n')
		601	{
		602	/* Just quietly terminate the string. This permits lines like
		603	bne label loop if we haven't reach end yet. */
		604	state = old_state;
		605	UNGET (ch);
		606	PUT ('\'');
		607	}
		608	else
		609	{
		610	PUT (ch);
		611	}
		612	continue;
		613
		614	case 6:
		615	state = 5;
		616	ch = GET ();
		617	switch (ch)
		618	{
		619	/* Handle strings broken across lines, by turning '\n' into
		620	'\\' and 'n'. */
		621	case '\n':
		622	UNGET ('n');
		623	add_newlines++;
		624	PUT ('\\');
		625	continue;
		626
		627	case EOF:
		628	as_warn (_("end of file in string; '%c' inserted"), quotechar);
		629	PUT (quotechar);
		630	continue;
		631
		632	case '"':
		633	case '\\':
		634	case 'b':
		635	case 'f':
		636	case 'n':
		637	case 'r':
		638	case 't':
		639	case 'v':
		640	case 'x':
		641	case 'X':
		642	case '0':
		643	case '1':
		644	case '2':
		645	case '3':
		646	case '4':
		647	case '5':
		648	case '6':
		649	case '7':
		650	break;
		651
		652	default:
		653	#ifdef ONLY_STANDARD_ESCAPES
		654	as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
		655	#endif
		656	break;
		657	}
		658	PUT (ch);
		659	continue;
		660
		661	#ifdef DOUBLEBAR_PARALLEL
		662	case 13:
		663	ch = GET ();
		664	if (ch != '\|')
		665	abort ();
		666
		667	/* Reset back to state 1 and pretend that we are parsing a
		668	line from just after the first white space. */
		669	state = 1;
		670	PUT ('\|');
		671	#ifdef TC_TIC6X
		672	/* "\|\|^" is used for SPMASKed instructions. */
		673	ch = GET ();
		674	if (ch == EOF)
		675	goto fromeof;
		676	else if (ch == '^')
		677	PUT ('^');
		678	else
		679	UNGET (ch);
		680	#endif
		681	continue;
		682	#endif
		683	#ifdef TC_Z80
		684	case 16:
		685	/* We have seen an 'a' at the start of a symbol, look for an 'f'. */
		686	ch = GET ();
		687	if (ch == 'f' \|\| ch == 'F')
		688	{
		689	state = 17;
		690	PUT (ch);
		691	}
		692	else
		693	{
		694	state = 9;
		695	break;
		696	}
		697	case 17:
		698	/* We have seen "af" at the start of a symbol,
		699	a ' here is a part of that symbol. */
		700	ch = GET ();
		701	state = 9;
		702	if (ch == '\'')
		703	/* Change to avoid warning about unclosed string. */
		704	PUT ('`');
		705	else if (ch != EOF)
		706	UNGET (ch);
		707	break;
		708	#endif
		709	}
		710
		711	/* OK, we are somewhere in states 0 through 4 or 9 through 11. */
		712
		713	/* flushchar: */
		714	ch = GET ();
		715
		716	#ifdef TC_PREDICATE_START_CHAR
		717	if (ch == TC_PREDICATE_START_CHAR && (state == 0 \|\| state == 1))
		718	{
		719	state += 14;
		720	PUT (ch);
		721	continue;
		722	}
		723	else if (state == 14 \|\| state == 15)
		724	{
		725	if (ch == TC_PREDICATE_END_CHAR)
		726	{
		727	state -= 14;
		728	PUT (ch);
		729	ch = GET ();
		730	}
		731	else
		732	{
		733	PUT (ch);
		734	continue;
		735	}
		736	}
		737	#endif
		738
		739	recycle:
		740
		741	#if defined TC_ARM && defined OBJ_ELF
		742	/* We need to watch out for .symver directives. See the comment later
		743	in this function. */
		744	if (symver_state == NULL)
		745	{
		746	if ((state == 0 \|\| state == 1) && ch == symver_pseudo[0])
		747	symver_state = symver_pseudo + 1;
		748	}
		749	else
		750	{
		751	/* We advance to the next state if we find the right
		752	character. */
		753	if (ch != '\0' && (*symver_state == ch))
		754	++symver_state;
		755	else if (*symver_state != '\0')
		756	/* We did not get the expected character, or we didn't
		757	get a valid terminating character after seeing the
		758	entire pseudo-op, so we must go back to the beginning. */
		759	symver_state = NULL;
		760	else
		761	{
		762	/* We've read the entire pseudo-op. If this is the end
		763	of the line, go back to the beginning. */
		764	if (IS_NEWLINE (ch))
		765	symver_state = NULL;
		766	}
		767	}
		768	#endif /* TC_ARM && OBJ_ELF */
		769
		770	#ifdef TC_M68K
		771	/* We want to have pseudo-ops which control whether we are in
		772	MRI mode or not. Unfortunately, since m68k MRI mode affects
		773	the scrubber, that means that we need a special purpose
		774	recognizer here. */
		775	if (mri_state == NULL)
		776	{
		777	if ((state == 0 \|\| state == 1)
		778	&& ch == mri_pseudo[0])
		779	mri_state = mri_pseudo + 1;
		780	}
		781	else
		782	{
		783	/* We advance to the next state if we find the right
		784	character, or if we need a space character and we get any
		785	whitespace character, or if we need a '0' and we get a
		786	'1' (this is so that we only need one state to handle
		787	``.mri 0'' and ``.mri 1''). */
		788	if (ch != '\0'
		789	&& (*mri_state == ch
		790	\|\| (*mri_state == ' '
		791	&& lex[ch] == LEX_IS_WHITESPACE)
		792	\|\| (*mri_state == '0'
		793	&& ch == '1')))
		794	{
		795	mri_last_ch = ch;
		796	++mri_state;
		797	}
		798	else if (*mri_state != '\0'
		799	\|\| (lex[ch] != LEX_IS_WHITESPACE
		800	&& lex[ch] != LEX_IS_NEWLINE))
		801	{
		802	/* We did not get the expected character, or we didn't
		803	get a valid terminating character after seeing the
		804	entire pseudo-op, so we must go back to the
		805	beginning. */
		806	mri_state = NULL;
		807	}
		808	else
		809	{
		810	/* We've read the entire pseudo-op. mips_last_ch is
		811	either '0' or '1' indicating whether to enter or
		812	leave MRI mode. */
		813	do_scrub_begin (mri_last_ch == '1');
		814	mri_state = NULL;
		815
		816	/* We continue handling the character as usual. The
		817	main gas reader must also handle the .mri pseudo-op
		818	to control expression parsing and the like. */
		819	}
		820	}
		821	#endif
		822
		823	if (ch == EOF)
		824	{
		825	if (state != 0)
		826	{
		827	as_warn (_("end of file not at end of a line; newline inserted"));
		828	state = 0;
		829	PUT ('\n');
		830	}
		831	goto fromeof;
		832	}
		833
		834	switch (lex[ch])
		835	{
		836	case LEX_IS_WHITESPACE:
		837	do
		838	{
		839	ch = GET ();
		840	}
		841	while (ch != EOF && IS_WHITESPACE (ch));
		842	if (ch == EOF)
		843	goto fromeof;
		844
		845	if (state == 0)
		846	{
		847	/* Preserve a single whitespace character at the
		848	beginning of a line. */
		849	state = 1;
		850	UNGET (ch);
		851	PUT (' ');
		852	break;
		853	}
		854
		855	#ifdef KEEP_WHITE_AROUND_COLON
		856	if (lex[ch] == LEX_IS_COLON)
		857	{
		858	/* Only keep this white if there's no white after the
		859	colon. */
		860	ch2 = GET ();
		861	if (ch2 != EOF)
		862	UNGET (ch2);
		863	if (!IS_WHITESPACE (ch2))
		864	{
		865	state = 9;
		866	UNGET (ch);
		867	PUT (' ');
		868	break;
		869	}
		870	}
		871	#endif
		872	if (IS_COMMENT (ch)
		873	\|\| ch == '/'
		874	\|\| IS_LINE_SEPARATOR (ch)
		875	\|\| IS_PARALLEL_SEPARATOR (ch))
		876	{
		877	if (scrub_m68k_mri)
		878	{
		879	/* In MRI mode, we keep these spaces. */
		880	UNGET (ch);
		881	PUT (' ');
		882	break;
		883	}
		884	goto recycle;
		885	}
		886
		887	/* If we're in state 2 or 11, we've seen a non-white
		888	character followed by whitespace. If the next character
		889	is ':', this is whitespace after a label name which we
		890	normally must ignore. In MRI mode, though, spaces are
		891	not permitted between the label and the colon. */
		892	if ((state == 2 \|\| state == 11)
		893	&& lex[ch] == LEX_IS_COLON
		894	&& ! scrub_m68k_mri)
		895	{
		896	state = 1;
		897	PUT (ch);
		898	break;
		899	}
		900
		901	switch (state)
		902	{
		903	case 1:
		904	/* We can arrive here if we leave a leading whitespace
		905	character at the beginning of a line. */
		906	goto recycle;
		907	case 2:
		908	state = 3;
		909	if (to + 1 < toend)
		910	{
		911	/* Optimize common case by skipping UNGET/GET. */
		912	PUT (' '); /* Sp after opco */
		913	goto recycle;
		914	}
		915	UNGET (ch);
		916	PUT (' ');
		917	break;
		918	case 3:
		919	#ifndef TC_KEEP_OPERAND_SPACES
		920	/* For TI C6X, we keep these spaces as they may separate
		921	functional unit specifiers from operands. */
		922	if (scrub_m68k_mri)
		923	#endif
		924	{
		925	/* In MRI mode, we keep these spaces. */
		926	UNGET (ch);
		927	PUT (' ');
		928	break;
		929	}
		930	goto recycle; /* Sp in operands */
		931	case 9:
		932	case 10:
		933	#ifndef TC_KEEP_OPERAND_SPACES
		934	if (scrub_m68k_mri)
		935	#endif
		936	{
		937	/* In MRI mode, we keep these spaces. */
		938	state = 3;
		939	UNGET (ch);
		940	PUT (' ');
		941	break;
		942	}
		943	state = 10; /* Sp after symbol char */
		944	goto recycle;
		945	case 11:
		946	if (LABELS_WITHOUT_COLONS \|\| flag_m68k_mri)
		947	state = 1;
		948	else
		949	{
		950	/* We know that ch is not ':', since we tested that
		951	case above. Therefore this is not a label, so it
		952	must be the opcode, and we've just seen the
		953	whitespace after it. */
		954	state = 3;
		955	}
		956	UNGET (ch);
		957	PUT (' '); /* Sp after label definition. */
		958	break;
		959	default:
		960	BAD_CASE (state);
		961	}
		962	break;
		963
		964	case LEX_IS_TWOCHAR_COMMENT_1ST:
		965	ch2 = GET ();
		966	if (ch2 == '*')
		967	{
		968	for (;;)
		969	{
		970	do
		971	{
		972	ch2 = GET ();
		973	if (ch2 != EOF && IS_NEWLINE (ch2))
		974	add_newlines++;
		975	}
		976	while (ch2 != EOF && ch2 != '*');
		977
		978	while (ch2 == '*')
		979	ch2 = GET ();
		980
		981	if (ch2 == EOF \|\| ch2 == '/')
		982	break;
		983
		984	/* This UNGET will ensure that we count newlines
		985	correctly. */
		986	UNGET (ch2);
		987	}
		988
		989	if (ch2 == EOF)
		990	as_warn (_("end of file in multiline comment"));
		991
		992	ch = ' ';
		993	goto recycle;
		994	}
		995	#ifdef DOUBLESLASH_LINE_COMMENTS
		996	else if (ch2 == '/')
		997	{
		998	do
		999	{
		1000	ch = GET ();
		1001	}
		1002	while (ch != EOF && !IS_NEWLINE (ch));
		1003	if (ch == EOF)
		1004	as_warn ("end of file in comment; newline inserted");
		1005	state = 0;
		1006	PUT ('\n');
		1007	break;
		1008	}
		1009	#endif
		1010	else
		1011	{
		1012	if (ch2 != EOF)
		1013	UNGET (ch2);
		1014	if (state == 9 \|\| state == 10)
		1015	state = 3;
		1016	PUT (ch);
		1017	}
		1018	break;
		1019
		1020	case LEX_IS_STRINGQUOTE:
		1021	quotechar = ch;
		1022	if (state == 10)
		1023	{
		1024	/* Preserve the whitespace in foo "bar". */
		1025	UNGET (ch);
		1026	state = 3;
		1027	PUT (' ');
		1028
		1029	/* PUT didn't jump out. We could just break, but we
		1030	know what will happen, so optimize a bit. */
		1031	ch = GET ();
		1032	old_state = 3;
		1033	}
		1034	else if (state == 9)
		1035	old_state = 3;
		1036	else
		1037	old_state = state;
		1038	state = 5;
		1039	PUT (ch);
		1040	break;
		1041
		1042	#ifndef IEEE_STYLE
		1043	case LEX_IS_ONECHAR_QUOTE:
		1044	#ifdef H_TICK_HEX
		1045	if (state == 9 && enable_h_tick_hex)
		1046	{
		1047	char c;
		1048
		1049	c = GET ();
		1050	as_warn ("'%c found after symbol", c);
		1051	UNGET (c);
		1052	}
		1053	#endif
		1054	if (state == 10)
		1055	{
		1056	/* Preserve the whitespace in foo 'b'. */
		1057	UNGET (ch);
		1058	state = 3;
		1059	PUT (' ');
		1060	break;
		1061	}
		1062	ch = GET ();
		1063	if (ch == EOF)
		1064	{
		1065	as_warn (_("end of file after a one-character quote; \\0 inserted"));
		1066	ch = 0;
		1067	}
		1068	if (ch == '\\')
		1069	{
		1070	ch = GET ();
		1071	if (ch == EOF)
		1072	{
		1073	as_warn (_("end of file in escape character"));
		1074	ch = '\\';
		1075	}
		1076	else
		1077	ch = process_escape (ch);
		1078	}
		1079	sprintf (out_buf, "%d", (int) (unsigned char) ch);
		1080
		1081	/* None of these 'x constants for us. We want 'x'. */
		1082	if ((ch = GET ()) != '\'')
		1083	{
		1084	#ifdef REQUIRE_CHAR_CLOSE_QUOTE
		1085	as_warn (_("missing close quote; (assumed)"));
		1086	#else
		1087	if (ch != EOF)
		1088	UNGET (ch);
		1089	#endif
		1090	}
		1091	if (strlen (out_buf) == 1)
		1092	{
		1093	PUT (out_buf[0]);
		1094	break;
		1095	}
		1096	if (state == 9)
		1097	old_state = 3;
		1098	else
		1099	old_state = state;
		1100	state = -1;
		1101	out_string = out_buf;
		1102	PUT (*out_string++);
		1103	break;
		1104	#endif
		1105
		1106	case LEX_IS_COLON:
		1107	#ifdef KEEP_WHITE_AROUND_COLON
		1108	state = 9;
		1109	#else
		1110	if (state == 9 \|\| state == 10)
		1111	state = 3;
		1112	else if (state != 3)
		1113	state = 1;
		1114	#endif
		1115	PUT (ch);
		1116	break;
		1117
		1118	case LEX_IS_NEWLINE:
		1119	/* Roll out a bunch of newlines from inside comments, etc. */
		1120	if (add_newlines)
		1121	{
		1122	--add_newlines;
		1123	UNGET (ch);
		1124	}
		1125	/* Fall through. */
		1126
		1127	case LEX_IS_LINE_SEPARATOR:
		1128	state = 0;
		1129	PUT (ch);
		1130	break;
		1131
		1132	case LEX_IS_PARALLEL_SEPARATOR:
		1133	state = 1;
		1134	PUT (ch);
		1135	break;
		1136
		1137	#ifdef TC_V850
		1138	case LEX_IS_DOUBLEDASH_1ST:
		1139	ch2 = GET ();
		1140	if (ch2 != '-')
		1141	{
		1142	if (ch2 != EOF)
		1143	UNGET (ch2);
		1144	goto de_fault;
		1145	}
		1146	/* Read and skip to end of line. */
		1147	do
		1148	{
		1149	ch = GET ();
		1150	}
		1151	while (ch != EOF && ch != '\n');
		1152
		1153	if (ch == EOF)
		1154	as_warn (_("end of file in comment; newline inserted"));
		1155
		1156	state = 0;
		1157	PUT ('\n');
		1158	break;
		1159	#endif
		1160	#ifdef DOUBLEBAR_PARALLEL
		1161	case LEX_IS_DOUBLEBAR_1ST:
		1162	ch2 = GET ();
		1163	if (ch2 != EOF)
		1164	UNGET (ch2);
		1165	if (ch2 != '\|')
		1166	goto de_fault;
		1167
		1168	/* Handle '\|\|' in two states as invoking PUT twice might
		1169	result in the first one jumping out of this loop. We'd
		1170	then lose track of the state and one '\|' char. */
		1171	state = 13;
		1172	PUT ('\|');
		1173	break;
		1174	#endif
		1175	case LEX_IS_LINE_COMMENT_START:
		1176	/* FIXME-someday: The two character comment stuff was badly
		1177	thought out. On i386, we want '/' as line comment start
		1178	AND we want C style comments. hence this hack. The
		1179	whole lexical process should be reworked. xoxorich. */
		1180	if (ch == '/')
		1181	{
		1182	ch2 = GET ();
		1183	if (ch2 == '*')
		1184	{
		1185	old_state = 3;
		1186	state = -2;
		1187	break;
		1188	}
		1189	else
		1190	{
		1191	UNGET (ch2);
		1192	}
		1193	}
		1194
		1195	if (state == 0 \|\| state == 1) /* Only comment at start of line. */
		1196	{
		1197	int startch;
		1198
		1199	startch = ch;
		1200
		1201	do
		1202	{
		1203	ch = GET ();
		1204	}
		1205	while (ch != EOF && IS_WHITESPACE (ch));
		1206
		1207	if (ch == EOF)
		1208	{
		1209	as_warn (_("end of file in comment; newline inserted"));
		1210	PUT ('\n');
		1211	break;
		1212	}
		1213
		1214	if (ch < '0' \|\| ch > '9' \|\| state != 0 \|\| startch != '#')
		1215	{
		1216	/* Not a cpp line. */
		1217	while (ch != EOF && !IS_NEWLINE (ch))
		1218	ch = GET ();
		1219	if (ch == EOF)
		1220	as_warn (_("end of file in comment; newline inserted"));
		1221	state = 0;
		1222	PUT ('\n');
		1223	break;
		1224	}
		1225	/* Looks like `# 123 "filename"' from cpp. */
		1226	UNGET (ch);
		1227	old_state = 4;
		1228	state = -1;
		1229	if (scrub_m68k_mri)
		1230	out_string = "\tlinefile ";
		1231	else
		1232	out_string = "\t.linefile ";
		1233	PUT (*out_string++);
		1234	break;
		1235	}
		1236
		1237	#ifdef TC_D10V
		1238	/* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
		1239	Trap is the only short insn that has a first operand that is
		1240	neither register nor label.
		1241	We must prevent exef0f \|\|trap #1 to degenerate to exef0f \|\|trap#1 .
		1242	We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
		1243	already LEX_IS_LINE_COMMENT_START. However, it is the
		1244	only character in line_comment_chars for d10v, hence we
		1245	can recognize it as such. */
		1246	/* An alternative approach would be to reset the state to 1 when
		1247	we see '\|\|', '<'- or '->', but that seems to be overkill. */
		1248	if (state == 10)
		1249	PUT (' ');
		1250	#endif
		1251	/* We have a line comment character which is not at the
		1252	start of a line. If this is also a normal comment
		1253	character, fall through. Otherwise treat it as a default
		1254	character. */
		1255	if (strchr (tc_comment_chars, ch) == NULL
		1256	&& (! scrub_m68k_mri
		1257	\|\| (ch != '!' && ch != '*')))
		1258	goto de_fault;
		1259	if (scrub_m68k_mri
		1260	&& (ch == '!' \|\| ch == '*' \|\| ch == '#')
		1261	&& state != 1
		1262	&& state != 10)
		1263	goto de_fault;
		1264	/* Fall through. */
		1265	case LEX_IS_COMMENT_START:
		1266	#if defined TC_ARM && defined OBJ_ELF
		1267	/* On the ARM, `@' is the comment character.
		1268	Unfortunately this is also a special character in ELF .symver
		1269	directives (and .type, though we deal with those another way).
		1270	So we check if this line is such a directive, and treat
		1271	the character as default if so. This is a hack. */
		1272	if ((symver_state != NULL) && (*symver_state == 0))
		1273	goto de_fault;
		1274	#endif
		1275
		1276	#ifdef TC_ARM
		1277	/* For the ARM, care is needed not to damage occurrences of \@
		1278	by stripping the @ onwards. Yuck. */
		1279	if (to > tostart && *(to - 1) == '\\')
		1280	/* Do not treat the @ as a start-of-comment. */
		1281	goto de_fault;
		1282	#endif
		1283
		1284	#ifdef WARN_COMMENTS
		1285	if (!found_comment)
		1286	as_where (&found_comment_file, &found_comment);
		1287	#endif
		1288	do
		1289	{
		1290	ch = GET ();
		1291	}
		1292	while (ch != EOF && !IS_NEWLINE (ch));
		1293	if (ch == EOF)
		1294	as_warn (_("end of file in comment; newline inserted"));
		1295	state = 0;
		1296	PUT ('\n');
		1297	break;
		1298
		1299	#ifdef H_TICK_HEX
		1300	case LEX_IS_H:
		1301	/* Look for strings like H'[0-9A-Fa-f] and if found, replace
		1302	the H' with 0x to make them gas-style hex characters. */
		1303	if (enable_h_tick_hex)
		1304	{
		1305	char quot;
		1306
		1307	quot = GET ();
		1308	if (quot == '\'')
		1309	{
		1310	UNGET ('x');
		1311	ch = '0';
		1312	}
		1313	else
		1314	UNGET (quot);
		1315	}
		1316	/* FALL THROUGH */
		1317	#endif
		1318
		1319	case LEX_IS_SYMBOL_COMPONENT:
		1320	if (state == 10)
		1321	{
		1322	/* This is a symbol character following another symbol
		1323	character, with whitespace in between. We skipped
		1324	the whitespace earlier, so output it now. */
		1325	UNGET (ch);
		1326	state = 3;
		1327	PUT (' ');
		1328	break;
		1329	}
		1330
		1331	#ifdef TC_Z80
		1332	/* "af'" is a symbol containing '\''. */
		1333	if (state == 3 && (ch == 'a' \|\| ch == 'A'))
		1334	{
		1335	state = 16;
		1336	PUT (ch);
		1337	ch = GET ();
		1338	if (ch == 'f' \|\| ch == 'F')
		1339	{
		1340	state = 17;
		1341	PUT (ch);
		1342	break;
		1343	}
		1344	else
		1345	{
		1346	state = 9;
		1347	if (ch == EOF \|\| !IS_SYMBOL_COMPONENT (ch))
		1348	{
		1349	if (ch != EOF)
		1350	UNGET (ch);
		1351	break;
		1352	}
		1353	}
		1354	}
		1355	#endif
		1356	if (state == 3)
		1357	state = 9;
		1358
		1359	/* This is a common case. Quickly copy CH and all the
		1360	following symbol component or normal characters. */
		1361	if (to + 1 < toend
		1362	&& mri_state == NULL
		1363	#if defined TC_ARM && defined OBJ_ELF
		1364	&& symver_state == NULL
		1365	#endif
		1366	)
		1367	{
		1368	char *s;
		1369	ptrdiff_t len;
		1370
		1371	for (s = from; s < fromend; s++)
		1372	{
		1373	int type;
		1374
		1375	ch2 = (unsigned char ) s;
		1376	type = lex[ch2];
		1377	if (type != 0
		1378	&& type != LEX_IS_SYMBOL_COMPONENT)
		1379	break;
		1380	}
		1381
		1382	if (s > from)
		1383	/* Handle the last character normally, for
		1384	simplicity. */
		1385	--s;
		1386
		1387	len = s - from;
		1388
		1389	if (len > (toend - to) - 1)
		1390	len = (toend - to) - 1;
		1391
		1392	if (len > 0)
		1393	{
		1394	PUT (ch);
		1395	memcpy (to, from, len);
		1396	to += len;
		1397	from += len;
		1398	if (to >= toend)
		1399	goto tofull;
		1400	ch = GET ();
		1401	}
		1402	}
		1403
		1404	/* Fall through. */
		1405	default:
		1406	de_fault:
		1407	/* Some relatively `normal' character. */
		1408	if (state == 0)
		1409	{
		1410	state = 11; /* Now seeing label definition. */
		1411	}
		1412	else if (state == 1)
		1413	{
		1414	state = 2; /* Ditto. */
		1415	}
		1416	else if (state == 9)
		1417	{
		1418	if (!IS_SYMBOL_COMPONENT (ch))
		1419	state = 3;
		1420	}
		1421	else if (state == 10)
		1422	{
		1423	if (ch == '\\')
		1424	{
		1425	/* Special handling for backslash: a backslash may
		1426	be the beginning of a formal parameter (of a
		1427	macro) following another symbol character, with
		1428	whitespace in between. If that is the case, we
		1429	output a space before the parameter. Strictly
		1430	speaking, correct handling depends upon what the
		1431	macro parameter expands into; if the parameter
		1432	expands into something which does not start with
		1433	an operand character, then we don't want to keep
		1434	the space. We don't have enough information to
		1435	make the right choice, so here we are making the
		1436	choice which is more likely to be correct. */
		1437	if (to + 1 >= toend)
		1438	{
		1439	/* If we're near the end of the buffer, save the
		1440	character for the next time round. Otherwise
		1441	we'll lose our state. */
		1442	UNGET (ch);
		1443	goto tofull;
		1444	}
		1445	*to++ = ' ';
		1446	}
		1447
		1448	state = 3;
		1449	}
		1450	PUT (ch);
		1451	break;
		1452	}
		1453	}
		1454
		1455	/NOTREACHED/
		1456
		1457	fromeof:
		1458	/* We have reached the end of the input. */
		1459	return to - tostart;
		1460
		1461	tofull:
		1462	/* The output buffer is full. Save any input we have not yet
		1463	processed. */
		1464	if (fromend > from)
		1465	{
		1466	saved_input = from;
		1467	saved_input_len = fromend - from;
		1468	}
		1469	else
		1470	saved_input = NULL;
		1471
		1472	return to - tostart;
		1473	}

Subversion Repositories Kolibri OS

(root)/contrib/toolchain/binutils/gas/app.c – Rev 5222