WebSVN – Kolibri OS – Blame – /contrib/toolchain/binutils/gas/app.c

Rev	Author	Line No.	Line
5222	serge	1	/* This is the Assembler Pre-Processor
6324	serge	2	Copyright (C) 1987-2015 Free Software Foundation, Inc.
5222	serge	3
		4	This file is part of GAS, the GNU Assembler.
		5
		6	GAS is free software; you can redistribute it and/or modify
		7	it under the terms of the GNU General Public License as published by
		8	the Free Software Foundation; either version 3, or (at your option)
		9	any later version.
		10
		11	GAS is distributed in the hope that it will be useful, but WITHOUT
		12	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
		13	or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
		14	License for more details.
		15
		16	You should have received a copy of the GNU General Public License
		17	along with GAS; see the file COPYING. If not, write to the Free
		18	Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
		19	02110-1301, USA. */
		20
		21	/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90. */
		22	/* App, the assembler pre-processor. This pre-processor strips out
		23	excess spaces, turns single-quoted characters into a decimal
		24	constant, and turns the # in # into a
		25	.linefile. This needs better error-handling. */
		26
		27	#include "as.h"
		28
		29	#if (__STDC__ != 1)
		30	#ifndef const
		31	#define const /* empty */
		32	#endif
		33	#endif
		34
		35	#ifdef H_TICK_HEX
		36	int enable_h_tick_hex = 0;
		37	#endif
		38
		39	#ifdef TC_M68K
		40	/* Whether we are scrubbing in m68k MRI mode. This is different from
		41	flag_m68k_mri, because the two flags will be affected by the .mri
		42	pseudo-op at different times. */
		43	static int scrub_m68k_mri;
		44
		45	/* The pseudo-op which switches in and out of MRI mode. See the
		46	comment in do_scrub_chars. */
		47	static const char mri_pseudo[] = ".mri 0";
		48	#else
		49	#define scrub_m68k_mri 0
		50	#endif
		51
		52	#if defined TC_ARM && defined OBJ_ELF
		53	/* The pseudo-op for which we need to special-case `@' characters.
		54	See the comment in do_scrub_chars. */
		55	static const char symver_pseudo[] = ".symver";
		56	static const char * symver_state;
		57	#endif
		58
		59	static char lex[256];
		60	static const char symbol_chars[] =
		61	"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
		62
		63	#define LEX_IS_SYMBOL_COMPONENT 1
		64	#define LEX_IS_WHITESPACE 2
		65	#define LEX_IS_LINE_SEPARATOR 3
		66	#define LEX_IS_COMMENT_START 4
		67	#define LEX_IS_LINE_COMMENT_START 5
		68	#define LEX_IS_TWOCHAR_COMMENT_1ST 6
		69	#define LEX_IS_STRINGQUOTE 8
		70	#define LEX_IS_COLON 9
		71	#define LEX_IS_NEWLINE 10
		72	#define LEX_IS_ONECHAR_QUOTE 11
		73	#ifdef TC_V850
		74	#define LEX_IS_DOUBLEDASH_1ST 12
		75	#endif
		76	#ifdef TC_M32R
		77	#define DOUBLEBAR_PARALLEL
		78	#endif
		79	#ifdef DOUBLEBAR_PARALLEL
		80	#define LEX_IS_DOUBLEBAR_1ST 13
		81	#endif
		82	#define LEX_IS_PARALLEL_SEPARATOR 14
		83	#ifdef H_TICK_HEX
		84	#define LEX_IS_H 15
		85	#endif
		86	#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
		87	#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
		88	#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
		89	#define IS_PARALLEL_SEPARATOR(c) (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
		90	#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
		91	#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
		92	#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
		93
		94	static int process_escape (int);
		95
		96	/* FIXME-soon: The entire lexer/parser thingy should be
		97	built statically at compile time rather than dynamically
		98	each and every time the assembler is run. xoxorich. */
		99
		100	void
		101	do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
		102	{
		103	const char *p;
		104	int c;
		105
		106	lex[' '] = LEX_IS_WHITESPACE;
		107	lex['\t'] = LEX_IS_WHITESPACE;
		108	lex['\r'] = LEX_IS_WHITESPACE;
		109	lex['\n'] = LEX_IS_NEWLINE;
		110	lex[':'] = LEX_IS_COLON;
		111
		112	#ifdef TC_M68K
		113	scrub_m68k_mri = m68k_mri;
		114
		115	if (! m68k_mri)
		116	#endif
		117	{
		118	lex['"'] = LEX_IS_STRINGQUOTE;
		119
		120	#if ! defined (TC_HPPA) && ! defined (TC_I370)
		121	/* I370 uses single-quotes to delimit integer, float constants. */
		122	lex['\''] = LEX_IS_ONECHAR_QUOTE;
		123	#endif
		124
		125	#ifdef SINGLE_QUOTE_STRINGS
		126	lex['\''] = LEX_IS_STRINGQUOTE;
		127	#endif
		128	}
		129
		130	/* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
		131	in state 5 of do_scrub_chars must be changed. */
		132
		133	/* Note that these override the previous defaults, e.g. if ';' is a
		134	comment char, then it isn't a line separator. */
		135	for (p = symbol_chars; *p; ++p)
		136	lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
		137
		138	for (c = 128; c < 256; ++c)
		139	lex[c] = LEX_IS_SYMBOL_COMPONENT;
		140
		141	#ifdef tc_symbol_chars
		142	/* This macro permits the processor to specify all characters which
		143	may appears in an operand. This will prevent the scrubber from
		144	discarding meaningful whitespace in certain cases. The i386
		145	backend uses this to support prefixes, which can confuse the
		146	scrubber as to whether it is parsing operands or opcodes. */
		147	for (p = tc_symbol_chars; *p; ++p)
		148	lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
		149	#endif
		150
		151	/* The m68k backend wants to be able to change comment_chars. */
		152	#ifndef tc_comment_chars
		153	#define tc_comment_chars comment_chars
		154	#endif
		155	for (p = tc_comment_chars; *p; p++)
		156	lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
		157
		158	for (p = line_comment_chars; *p; p++)
		159	lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
		160
6324	serge	161	#ifndef tc_line_separator_chars
		162	#define tc_line_separator_chars line_separator_chars
		163	#endif
		164	for (p = tc_line_separator_chars; *p; p++)
5222	serge	165	lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
		166
		167	#ifdef tc_parallel_separator_chars
		168	/* This macro permits the processor to specify all characters which
		169	separate parallel insns on the same line. */
		170	for (p = tc_parallel_separator_chars; *p; p++)
		171	lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
		172	#endif
		173
		174	/* Only allow slash-star comments if slash is not in use.
		175	FIXME: This isn't right. We should always permit them. */
		176	if (lex['/'] == 0)
		177	lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
		178
		179	#ifdef TC_M68K
		180	if (m68k_mri)
		181	{
		182	lex['\''] = LEX_IS_STRINGQUOTE;
		183	lex[';'] = LEX_IS_COMMENT_START;
		184	lex['*'] = LEX_IS_LINE_COMMENT_START;
		185	/* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
		186	then it can't be used in an expression. */
		187	lex['!'] = LEX_IS_LINE_COMMENT_START;
		188	}
		189	#endif
		190
		191	#ifdef TC_V850
		192	lex['-'] = LEX_IS_DOUBLEDASH_1ST;
		193	#endif
		194	#ifdef DOUBLEBAR_PARALLEL
		195	lex['\|'] = LEX_IS_DOUBLEBAR_1ST;
		196	#endif
		197	#ifdef TC_D30V
		198	/* Must do this is we want VLIW instruction with "->" or "<-". */
		199	lex['-'] = LEX_IS_SYMBOL_COMPONENT;
		200	#endif
		201
		202	#ifdef H_TICK_HEX
		203	if (enable_h_tick_hex)
		204	{
		205	lex['h'] = LEX_IS_H;
		206	lex['H'] = LEX_IS_H;
		207	}
		208	#endif
		209	}
		210
		211	/* Saved state of the scrubber. */
		212	static int state;
		213	static int old_state;
		214	static char *out_string;
		215	static char out_buf[20];
		216	static int add_newlines;
		217	static char *saved_input;
		218	static size_t saved_input_len;
		219	static char input_buffer[32 * 1024];
		220	static const char *mri_state;
		221	static char mri_last_ch;
		222
		223	/* Data structure for saving the state of app across #include's. Note that
		224	app is called asynchronously to the parsing of the .include's, so our
		225	state at the time .include is interpreted is completely unrelated.
		226	That's why we have to save it all. */
		227
		228	struct app_save
		229	{
		230	int state;
		231	int old_state;
		232	char * out_string;
		233	char out_buf[sizeof (out_buf)];
		234	int add_newlines;
		235	char * saved_input;
		236	size_t saved_input_len;
		237	#ifdef TC_M68K
		238	int scrub_m68k_mri;
		239	#endif
		240	const char * mri_state;
		241	char mri_last_ch;
		242	#if defined TC_ARM && defined OBJ_ELF
		243	const char * symver_state;
		244	#endif
		245	};
		246
		247	char *
		248	app_push (void)
		249	{
6324	serge	250	struct app_save *saved;
5222	serge	251
		252	saved = (struct app_save ) xmalloc (sizeof (saved));
		253	saved->state = state;
		254	saved->old_state = old_state;
		255	saved->out_string = out_string;
		256	memcpy (saved->out_buf, out_buf, sizeof (out_buf));
		257	saved->add_newlines = add_newlines;
		258	if (saved_input == NULL)
		259	saved->saved_input = NULL;
		260	else
		261	{
		262	saved->saved_input = (char *) xmalloc (saved_input_len);
		263	memcpy (saved->saved_input, saved_input, saved_input_len);
		264	saved->saved_input_len = saved_input_len;
		265	}
		266	#ifdef TC_M68K
		267	saved->scrub_m68k_mri = scrub_m68k_mri;
		268	#endif
		269	saved->mri_state = mri_state;
		270	saved->mri_last_ch = mri_last_ch;
		271	#if defined TC_ARM && defined OBJ_ELF
		272	saved->symver_state = symver_state;
		273	#endif
		274
		275	/* do_scrub_begin() is not useful, just wastes time. */
		276
		277	state = 0;
		278	saved_input = NULL;
		279	add_newlines = 0;
		280
		281	return (char *) saved;
		282	}
		283
		284	void
		285	app_pop (char *arg)
		286	{
6324	serge	287	struct app_save saved = (struct app_save ) arg;
5222	serge	288
		289	/* There is no do_scrub_end (). */
		290	state = saved->state;
		291	old_state = saved->old_state;
		292	out_string = saved->out_string;
		293	memcpy (out_buf, saved->out_buf, sizeof (out_buf));
		294	add_newlines = saved->add_newlines;
		295	if (saved->saved_input == NULL)
		296	saved_input = NULL;
		297	else
		298	{
		299	gas_assert (saved->saved_input_len <= sizeof (input_buffer));
		300	memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
		301	saved_input = input_buffer;
		302	saved_input_len = saved->saved_input_len;
		303	free (saved->saved_input);
		304	}
		305	#ifdef TC_M68K
		306	scrub_m68k_mri = saved->scrub_m68k_mri;
		307	#endif
		308	mri_state = saved->mri_state;
		309	mri_last_ch = saved->mri_last_ch;
		310	#if defined TC_ARM && defined OBJ_ELF
		311	symver_state = saved->symver_state;
		312	#endif
		313
		314	free (arg);
		315	}
		316
		317	/* @@ This assumes that \n &c are the same on host and target. This is not
		318	necessarily true. */
		319
		320	static int
		321	process_escape (int ch)
		322	{
		323	switch (ch)
		324	{
		325	case 'b':
		326	return '\b';
		327	case 'f':
		328	return '\f';
		329	case 'n':
		330	return '\n';
		331	case 'r':
		332	return '\r';
		333	case 't':
		334	return '\t';
		335	case '\'':
		336	return '\'';
		337	case '"':
		338	return '\"';
		339	default:
		340	return ch;
		341	}
		342	}
		343
		344	/* This function is called to process input characters. The GET
		345	parameter is used to retrieve more input characters. GET should
		346	set its parameter to point to a buffer, and return the length of
		347	the buffer; it should return 0 at end of file. The scrubbed output
		348	characters are put into the buffer starting at TOSTART; the TOSTART
		349	buffer is TOLEN bytes in length. The function returns the number
		350	of scrubbed characters put into TOSTART. This will be TOLEN unless
		351	end of file was seen. This function is arranged as a state
		352	machine, and saves its state so that it may return at any point.
		353	This is the way the old code used to work. */
		354
		355	size_t
		356	do_scrub_chars (size_t (get) (char , size_t), char *tostart, size_t tolen)
		357	{
		358	char *to = tostart;
		359	char *toend = tostart + tolen;
		360	char *from;
		361	char *fromend;
		362	size_t fromlen;
6324	serge	363	int ch, ch2 = 0;
5222	serge	364	/* Character that started the string we're working on. */
		365	static char quotechar;
		366
		367	/*State 0: beginning of normal line
		368	1: After first whitespace on line (flush more white)
		369	2: After first non-white (opcode) on line (keep 1white)
		370	3: after second white on line (into operands) (flush white)
		371	4: after putting out a .linefile, put out digits
		372	5: parsing a string, then go to old-state
		373	6: putting out \ escape in a "d string.
		374	7: no longer used
		375	8: no longer used
		376	9: After seeing symbol char in state 3 (keep 1white after symchar)
		377	10: After seeing whitespace in state 9 (keep white before symchar)
		378	11: After seeing a symbol character in state 0 (eg a label definition)
		379	-1: output string in out_string and go to the state in old_state
		380	-2: flush text until a '*' '/' is seen, then go to state old_state
		381	#ifdef TC_V850
		382	12: After seeing a dash, looking for a second dash as a start
		383	of comment.
		384	#endif
		385	#ifdef DOUBLEBAR_PARALLEL
		386	13: After seeing a vertical bar, looking for a second
		387	vertical bar as a parallel expression separator.
		388	#endif
		389	#ifdef TC_PREDICATE_START_CHAR
		390	14: After seeing a predicate start character at state 0, looking
		391	for a predicate end character as predicate.
		392	15: After seeing a predicate start character at state 1, looking
		393	for a predicate end character as predicate.
		394	#endif
		395	#ifdef TC_Z80
		396	16: After seeing an 'a' or an 'A' at the start of a symbol
		397	17: After seeing an 'f' or an 'F' in state 16
		398	#endif
		399	*/
		400
		401	/* I added states 9 and 10 because the MIPS ECOFF assembler uses
		402	constructs like ``.loc 1 20''. This was turning into ``.loc
		403	120''. States 9 and 10 ensure that a space is never dropped in
		404	between characters which could appear in an identifier. Ian
		405	Taylor, ian@cygnus.com.
		406
		407	I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
		408	correctly on the PA (and any other target where colons are optional).
		409	Jeff Law, law@cs.utah.edu.
		410
		411	I added state 13 so that something like "cmp r1, r2 \|\| trap #1" does not
		412	get squashed into "cmp r1,r2\|\|trap#1", with the all important space
		413	between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */
		414
		415	/* This macro gets the next input character. */
		416
		417	#define GET() \
		418	(from < fromend \
		419	? * (unsigned char *) (from++) \
		420	: (saved_input = NULL, \
		421	fromlen = (*get) (input_buffer, sizeof input_buffer), \
		422	from = input_buffer, \
		423	fromend = from + fromlen, \
		424	(fromlen == 0 \
		425	? EOF \
		426	: * (unsigned char *) (from++))))
		427
		428	/* This macro pushes a character back on the input stream. */
		429
		430	#define UNGET(uch) (*--from = (uch))
		431
		432	/* This macro puts a character into the output buffer. If this
		433	character fills the output buffer, this macro jumps to the label
		434	TOFULL. We use this rather ugly approach because we need to
		435	handle two different termination conditions: EOF on the input
		436	stream, and a full output buffer. It would be simpler if we
		437	always read in the entire input stream before processing it, but
		438	I don't want to make such a significant change to the assembler's
		439	memory usage. */
		440
		441	#define PUT(pch) \
		442	do \
		443	{ \
		444	*to++ = (pch); \
		445	if (to >= toend) \
		446	goto tofull; \
		447	} \
		448	while (0)
		449
		450	if (saved_input != NULL)
		451	{
		452	from = saved_input;
		453	fromend = from + saved_input_len;
		454	}
		455	else
		456	{
		457	fromlen = (*get) (input_buffer, sizeof input_buffer);
		458	if (fromlen == 0)
		459	return 0;
		460	from = input_buffer;
		461	fromend = from + fromlen;
		462	}
		463
		464	while (1)
		465	{
		466	/* The cases in this switch end with continue, in order to
		467	branch back to the top of this while loop and generate the
		468	next output character in the appropriate state. */
		469	switch (state)
		470	{
		471	case -1:
		472	ch = *out_string++;
		473	if (*out_string == '\0')
		474	{
		475	state = old_state;
		476	old_state = 3;
		477	}
		478	PUT (ch);
		479	continue;
		480
		481	case -2:
		482	for (;;)
		483	{
		484	do
		485	{
		486	ch = GET ();
		487
		488	if (ch == EOF)
		489	{
		490	as_warn (_("end of file in comment"));
		491	goto fromeof;
		492	}
		493
		494	if (ch == '\n')
		495	PUT ('\n');
		496	}
		497	while (ch != '*');
		498
		499	while ((ch = GET ()) == '*')
		500	;
		501
		502	if (ch == EOF)
		503	{
		504	as_warn (_("end of file in comment"));
		505	goto fromeof;
		506	}
		507
		508	if (ch == '/')
		509	break;
		510
		511	UNGET (ch);
		512	}
		513
		514	state = old_state;
		515	UNGET (' ');
		516	continue;
		517
		518	case 4:
		519	ch = GET ();
		520	if (ch == EOF)
		521	goto fromeof;
		522	else if (ch >= '0' && ch <= '9')
		523	PUT (ch);
		524	else
		525	{
		526	while (ch != EOF && IS_WHITESPACE (ch))
		527	ch = GET ();
		528	if (ch == '"')
		529	{
		530	quotechar = ch;
		531	state = 5;
		532	old_state = 3;
		533	PUT (ch);
		534	}
		535	else
		536	{
		537	while (ch != EOF && ch != '\n')
		538	ch = GET ();
		539	state = 0;
		540	PUT (ch);
		541	}
		542	}
		543	continue;
		544
		545	case 5:
		546	/* We are going to copy everything up to a quote character,
		547	with special handling for a backslash. We try to
		548	optimize the copying in the simple case without using the
		549	GET and PUT macros. */
		550	{
		551	char *s;
		552	ptrdiff_t len;
		553
		554	for (s = from; s < fromend; s++)
		555	{
		556	ch = *s;
		557	if (ch == '\\'
		558	\|\| ch == quotechar
		559	\|\| ch == '\n')
		560	break;
		561	}
		562	len = s - from;
		563	if (len > toend - to)
		564	len = toend - to;
		565	if (len > 0)
		566	{
		567	memcpy (to, from, len);
		568	to += len;
		569	from += len;
		570	if (to >= toend)
		571	goto tofull;
		572	}
		573	}
		574
		575	ch = GET ();
		576	if (ch == EOF)
		577	{
		578	/* This buffer is here specifically so
		579	that the UNGET below will work. */
		580	static char one_char_buf[1];
		581
		582	as_warn (_("end of file in string; '%c' inserted"), quotechar);
		583	state = old_state;
		584	from = fromend = one_char_buf + 1;
		585	fromlen = 1;
		586	UNGET ('\n');
		587	PUT (quotechar);
		588	}
		589	else if (ch == quotechar)
		590	{
		591	state = old_state;
		592	PUT (ch);
		593	}
		594	#ifndef NO_STRING_ESCAPES
		595	else if (ch == '\\')
		596	{
		597	state = 6;
		598	PUT (ch);
		599	}
		600	#endif
		601	else if (scrub_m68k_mri && ch == '\n')
		602	{
		603	/* Just quietly terminate the string. This permits lines like
		604	bne label loop if we haven't reach end yet. */
		605	state = old_state;
		606	UNGET (ch);
		607	PUT ('\'');
		608	}
		609	else
		610	{
		611	PUT (ch);
		612	}
		613	continue;
		614
		615	case 6:
		616	state = 5;
		617	ch = GET ();
		618	switch (ch)
		619	{
		620	/* Handle strings broken across lines, by turning '\n' into
		621	'\\' and 'n'. */
		622	case '\n':
		623	UNGET ('n');
		624	add_newlines++;
		625	PUT ('\\');
		626	continue;
		627
		628	case EOF:
		629	as_warn (_("end of file in string; '%c' inserted"), quotechar);
		630	PUT (quotechar);
		631	continue;
		632
		633	case '"':
		634	case '\\':
		635	case 'b':
		636	case 'f':
		637	case 'n':
		638	case 'r':
		639	case 't':
		640	case 'v':
		641	case 'x':
		642	case 'X':
		643	case '0':
		644	case '1':
		645	case '2':
		646	case '3':
		647	case '4':
		648	case '5':
		649	case '6':
		650	case '7':
		651	break;
		652
		653	default:
		654	#ifdef ONLY_STANDARD_ESCAPES
		655	as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
		656	#endif
		657	break;
		658	}
		659	PUT (ch);
		660	continue;
		661
		662	#ifdef DOUBLEBAR_PARALLEL
		663	case 13:
		664	ch = GET ();
		665	if (ch != '\|')
		666	abort ();
		667
		668	/* Reset back to state 1 and pretend that we are parsing a
		669	line from just after the first white space. */
		670	state = 1;
		671	PUT ('\|');
		672	#ifdef TC_TIC6X
		673	/* "\|\|^" is used for SPMASKed instructions. */
		674	ch = GET ();
		675	if (ch == EOF)
		676	goto fromeof;
		677	else if (ch == '^')
		678	PUT ('^');
		679	else
		680	UNGET (ch);
		681	#endif
		682	continue;
		683	#endif
		684	#ifdef TC_Z80
		685	case 16:
		686	/* We have seen an 'a' at the start of a symbol, look for an 'f'. */
		687	ch = GET ();
		688	if (ch == 'f' \|\| ch == 'F')
		689	{
		690	state = 17;
		691	PUT (ch);
		692	}
		693	else
		694	{
		695	state = 9;
		696	break;
		697	}
		698	case 17:
		699	/* We have seen "af" at the start of a symbol,
		700	a ' here is a part of that symbol. */
		701	ch = GET ();
		702	state = 9;
		703	if (ch == '\'')
		704	/* Change to avoid warning about unclosed string. */
		705	PUT ('`');
		706	else if (ch != EOF)
		707	UNGET (ch);
		708	break;
		709	#endif
		710	}
		711
		712	/* OK, we are somewhere in states 0 through 4 or 9 through 11. */
		713
		714	/* flushchar: */
		715	ch = GET ();
		716
		717	#ifdef TC_PREDICATE_START_CHAR
		718	if (ch == TC_PREDICATE_START_CHAR && (state == 0 \|\| state == 1))
		719	{
		720	state += 14;
		721	PUT (ch);
		722	continue;
		723	}
		724	else if (state == 14 \|\| state == 15)
		725	{
		726	if (ch == TC_PREDICATE_END_CHAR)
		727	{
		728	state -= 14;
		729	PUT (ch);
		730	ch = GET ();
		731	}
		732	else
		733	{
		734	PUT (ch);
		735	continue;
		736	}
		737	}
		738	#endif
		739
		740	recycle:
		741
		742	#if defined TC_ARM && defined OBJ_ELF
		743	/* We need to watch out for .symver directives. See the comment later
		744	in this function. */
		745	if (symver_state == NULL)
		746	{
		747	if ((state == 0 \|\| state == 1) && ch == symver_pseudo[0])
		748	symver_state = symver_pseudo + 1;
		749	}
		750	else
		751	{
		752	/* We advance to the next state if we find the right
		753	character. */
		754	if (ch != '\0' && (*symver_state == ch))
		755	++symver_state;
		756	else if (*symver_state != '\0')
		757	/* We did not get the expected character, or we didn't
		758	get a valid terminating character after seeing the
		759	entire pseudo-op, so we must go back to the beginning. */
		760	symver_state = NULL;
		761	else
		762	{
		763	/* We've read the entire pseudo-op. If this is the end
		764	of the line, go back to the beginning. */
		765	if (IS_NEWLINE (ch))
		766	symver_state = NULL;
		767	}
		768	}
		769	#endif /* TC_ARM && OBJ_ELF */
		770
		771	#ifdef TC_M68K
		772	/* We want to have pseudo-ops which control whether we are in
		773	MRI mode or not. Unfortunately, since m68k MRI mode affects
		774	the scrubber, that means that we need a special purpose
		775	recognizer here. */
		776	if (mri_state == NULL)
		777	{
		778	if ((state == 0 \|\| state == 1)
		779	&& ch == mri_pseudo[0])
		780	mri_state = mri_pseudo + 1;
		781	}
		782	else
		783	{
		784	/* We advance to the next state if we find the right
		785	character, or if we need a space character and we get any
		786	whitespace character, or if we need a '0' and we get a
		787	'1' (this is so that we only need one state to handle
		788	``.mri 0'' and ``.mri 1''). */
		789	if (ch != '\0'
		790	&& (*mri_state == ch
		791	\|\| (*mri_state == ' '
		792	&& lex[ch] == LEX_IS_WHITESPACE)
		793	\|\| (*mri_state == '0'
		794	&& ch == '1')))
		795	{
		796	mri_last_ch = ch;
		797	++mri_state;
		798	}
		799	else if (*mri_state != '\0'
		800	\|\| (lex[ch] != LEX_IS_WHITESPACE
		801	&& lex[ch] != LEX_IS_NEWLINE))
		802	{
		803	/* We did not get the expected character, or we didn't
		804	get a valid terminating character after seeing the
		805	entire pseudo-op, so we must go back to the
		806	beginning. */
		807	mri_state = NULL;
		808	}
		809	else
		810	{
		811	/* We've read the entire pseudo-op. mips_last_ch is
		812	either '0' or '1' indicating whether to enter or
		813	leave MRI mode. */
		814	do_scrub_begin (mri_last_ch == '1');
		815	mri_state = NULL;
		816
		817	/* We continue handling the character as usual. The
		818	main gas reader must also handle the .mri pseudo-op
		819	to control expression parsing and the like. */
		820	}
		821	}
		822	#endif
		823
		824	if (ch == EOF)
		825	{
		826	if (state != 0)
		827	{
		828	as_warn (_("end of file not at end of a line; newline inserted"));
		829	state = 0;
		830	PUT ('\n');
		831	}
		832	goto fromeof;
		833	}
		834
		835	switch (lex[ch])
		836	{
		837	case LEX_IS_WHITESPACE:
		838	do
		839	{
		840	ch = GET ();
		841	}
		842	while (ch != EOF && IS_WHITESPACE (ch));
		843	if (ch == EOF)
		844	goto fromeof;
		845
		846	if (state == 0)
		847	{
		848	/* Preserve a single whitespace character at the
		849	beginning of a line. */
		850	state = 1;
		851	UNGET (ch);
		852	PUT (' ');
		853	break;
		854	}
		855
		856	#ifdef KEEP_WHITE_AROUND_COLON
		857	if (lex[ch] == LEX_IS_COLON)
		858	{
		859	/* Only keep this white if there's no white after the
		860	colon. */
		861	ch2 = GET ();
		862	if (ch2 != EOF)
		863	UNGET (ch2);
		864	if (!IS_WHITESPACE (ch2))
		865	{
		866	state = 9;
		867	UNGET (ch);
		868	PUT (' ');
		869	break;
		870	}
		871	}
		872	#endif
		873	if (IS_COMMENT (ch)
		874	\|\| ch == '/'
		875	\|\| IS_LINE_SEPARATOR (ch)
		876	\|\| IS_PARALLEL_SEPARATOR (ch))
		877	{
		878	if (scrub_m68k_mri)
		879	{
		880	/* In MRI mode, we keep these spaces. */
		881	UNGET (ch);
		882	PUT (' ');
		883	break;
		884	}
		885	goto recycle;
		886	}
		887
		888	/* If we're in state 2 or 11, we've seen a non-white
		889	character followed by whitespace. If the next character
		890	is ':', this is whitespace after a label name which we
		891	normally must ignore. In MRI mode, though, spaces are
		892	not permitted between the label and the colon. */
		893	if ((state == 2 \|\| state == 11)
		894	&& lex[ch] == LEX_IS_COLON
		895	&& ! scrub_m68k_mri)
		896	{
		897	state = 1;
		898	PUT (ch);
		899	break;
		900	}
		901
		902	switch (state)
		903	{
		904	case 1:
		905	/* We can arrive here if we leave a leading whitespace
		906	character at the beginning of a line. */
		907	goto recycle;
		908	case 2:
		909	state = 3;
		910	if (to + 1 < toend)
		911	{
		912	/* Optimize common case by skipping UNGET/GET. */
		913	PUT (' '); /* Sp after opco */
		914	goto recycle;
		915	}
		916	UNGET (ch);
		917	PUT (' ');
		918	break;
		919	case 3:
		920	#ifndef TC_KEEP_OPERAND_SPACES
		921	/* For TI C6X, we keep these spaces as they may separate
		922	functional unit specifiers from operands. */
		923	if (scrub_m68k_mri)
		924	#endif
		925	{
		926	/* In MRI mode, we keep these spaces. */
		927	UNGET (ch);
		928	PUT (' ');
		929	break;
		930	}
		931	goto recycle; /* Sp in operands */
		932	case 9:
		933	case 10:
		934	#ifndef TC_KEEP_OPERAND_SPACES
		935	if (scrub_m68k_mri)
		936	#endif
		937	{
		938	/* In MRI mode, we keep these spaces. */
		939	state = 3;
		940	UNGET (ch);
		941	PUT (' ');
		942	break;
		943	}
		944	state = 10; /* Sp after symbol char */
		945	goto recycle;
		946	case 11:
		947	if (LABELS_WITHOUT_COLONS \|\| flag_m68k_mri)
		948	state = 1;
		949	else
		950	{
		951	/* We know that ch is not ':', since we tested that
		952	case above. Therefore this is not a label, so it
		953	must be the opcode, and we've just seen the
		954	whitespace after it. */
		955	state = 3;
		956	}
		957	UNGET (ch);
		958	PUT (' '); /* Sp after label definition. */
		959	break;
		960	default:
		961	BAD_CASE (state);
		962	}
		963	break;
		964
		965	case LEX_IS_TWOCHAR_COMMENT_1ST:
		966	ch2 = GET ();
		967	if (ch2 == '*')
		968	{
		969	for (;;)
		970	{
		971	do
		972	{
		973	ch2 = GET ();
		974	if (ch2 != EOF && IS_NEWLINE (ch2))
		975	add_newlines++;
		976	}
		977	while (ch2 != EOF && ch2 != '*');
		978
		979	while (ch2 == '*')
		980	ch2 = GET ();
		981
		982	if (ch2 == EOF \|\| ch2 == '/')
		983	break;
		984
		985	/* This UNGET will ensure that we count newlines
		986	correctly. */
		987	UNGET (ch2);
		988	}
		989
		990	if (ch2 == EOF)
		991	as_warn (_("end of file in multiline comment"));
		992
		993	ch = ' ';
		994	goto recycle;
		995	}
		996	#ifdef DOUBLESLASH_LINE_COMMENTS
		997	else if (ch2 == '/')
		998	{
		999	do
		1000	{
		1001	ch = GET ();
		1002	}
		1003	while (ch != EOF && !IS_NEWLINE (ch));
		1004	if (ch == EOF)
		1005	as_warn ("end of file in comment; newline inserted");
		1006	state = 0;
		1007	PUT ('\n');
		1008	break;
		1009	}
		1010	#endif
		1011	else
		1012	{
		1013	if (ch2 != EOF)
		1014	UNGET (ch2);
		1015	if (state == 9 \|\| state == 10)
		1016	state = 3;
		1017	PUT (ch);
		1018	}
		1019	break;
		1020
		1021	case LEX_IS_STRINGQUOTE:
		1022	quotechar = ch;
		1023	if (state == 10)
		1024	{
		1025	/* Preserve the whitespace in foo "bar". */
		1026	UNGET (ch);
		1027	state = 3;
		1028	PUT (' ');
		1029
		1030	/* PUT didn't jump out. We could just break, but we
		1031	know what will happen, so optimize a bit. */
		1032	ch = GET ();
		1033	old_state = 3;
		1034	}
		1035	else if (state == 9)
		1036	old_state = 3;
		1037	else
		1038	old_state = state;
		1039	state = 5;
		1040	PUT (ch);
		1041	break;
		1042
		1043	#ifndef IEEE_STYLE
		1044	case LEX_IS_ONECHAR_QUOTE:
		1045	#ifdef H_TICK_HEX
		1046	if (state == 9 && enable_h_tick_hex)
		1047	{
		1048	char c;
		1049
		1050	c = GET ();
		1051	as_warn ("'%c found after symbol", c);
		1052	UNGET (c);
		1053	}
		1054	#endif
		1055	if (state == 10)
		1056	{
		1057	/* Preserve the whitespace in foo 'b'. */
		1058	UNGET (ch);
		1059	state = 3;
		1060	PUT (' ');
		1061	break;
		1062	}
		1063	ch = GET ();
		1064	if (ch == EOF)
		1065	{
		1066	as_warn (_("end of file after a one-character quote; \\0 inserted"));
		1067	ch = 0;
		1068	}
		1069	if (ch == '\\')
		1070	{
		1071	ch = GET ();
		1072	if (ch == EOF)
		1073	{
		1074	as_warn (_("end of file in escape character"));
		1075	ch = '\\';
		1076	}
		1077	else
		1078	ch = process_escape (ch);
		1079	}
		1080	sprintf (out_buf, "%d", (int) (unsigned char) ch);
		1081
		1082	/* None of these 'x constants for us. We want 'x'. */
		1083	if ((ch = GET ()) != '\'')
		1084	{
		1085	#ifdef REQUIRE_CHAR_CLOSE_QUOTE
		1086	as_warn (_("missing close quote; (assumed)"));
		1087	#else
		1088	if (ch != EOF)
		1089	UNGET (ch);
		1090	#endif
		1091	}
		1092	if (strlen (out_buf) == 1)
		1093	{
		1094	PUT (out_buf[0]);
		1095	break;
		1096	}
		1097	if (state == 9)
		1098	old_state = 3;
		1099	else
		1100	old_state = state;
		1101	state = -1;
		1102	out_string = out_buf;
		1103	PUT (*out_string++);
		1104	break;
		1105	#endif
		1106
		1107	case LEX_IS_COLON:
		1108	#ifdef KEEP_WHITE_AROUND_COLON
		1109	state = 9;
		1110	#else
		1111	if (state == 9 \|\| state == 10)
		1112	state = 3;
		1113	else if (state != 3)
		1114	state = 1;
		1115	#endif
		1116	PUT (ch);
		1117	break;
		1118
		1119	case LEX_IS_NEWLINE:
		1120	/* Roll out a bunch of newlines from inside comments, etc. */
		1121	if (add_newlines)
		1122	{
		1123	--add_newlines;
		1124	UNGET (ch);
		1125	}
		1126	/* Fall through. */
		1127
		1128	case LEX_IS_LINE_SEPARATOR:
		1129	state = 0;
		1130	PUT (ch);
		1131	break;
		1132
		1133	case LEX_IS_PARALLEL_SEPARATOR:
		1134	state = 1;
		1135	PUT (ch);
		1136	break;
		1137
		1138	#ifdef TC_V850
		1139	case LEX_IS_DOUBLEDASH_1ST:
		1140	ch2 = GET ();
		1141	if (ch2 != '-')
		1142	{
		1143	if (ch2 != EOF)
		1144	UNGET (ch2);
		1145	goto de_fault;
		1146	}
		1147	/* Read and skip to end of line. */
		1148	do
		1149	{
		1150	ch = GET ();
		1151	}
		1152	while (ch != EOF && ch != '\n');
		1153
		1154	if (ch == EOF)
		1155	as_warn (_("end of file in comment; newline inserted"));
		1156
		1157	state = 0;
		1158	PUT ('\n');
		1159	break;
		1160	#endif
		1161	#ifdef DOUBLEBAR_PARALLEL
		1162	case LEX_IS_DOUBLEBAR_1ST:
		1163	ch2 = GET ();
		1164	if (ch2 != EOF)
		1165	UNGET (ch2);
		1166	if (ch2 != '\|')
		1167	goto de_fault;
		1168
		1169	/* Handle '\|\|' in two states as invoking PUT twice might
		1170	result in the first one jumping out of this loop. We'd
		1171	then lose track of the state and one '\|' char. */
		1172	state = 13;
		1173	PUT ('\|');
		1174	break;
		1175	#endif
		1176	case LEX_IS_LINE_COMMENT_START:
		1177	/* FIXME-someday: The two character comment stuff was badly
		1178	thought out. On i386, we want '/' as line comment start
		1179	AND we want C style comments. hence this hack. The
		1180	whole lexical process should be reworked. xoxorich. */
		1181	if (ch == '/')
		1182	{
		1183	ch2 = GET ();
		1184	if (ch2 == '*')
		1185	{
		1186	old_state = 3;
		1187	state = -2;
		1188	break;
		1189	}
		1190	else
		1191	{
		1192	UNGET (ch2);
		1193	}
		1194	}
		1195
		1196	if (state == 0 \|\| state == 1) /* Only comment at start of line. */
		1197	{
		1198	int startch;
		1199
		1200	startch = ch;
		1201
		1202	do
		1203	{
		1204	ch = GET ();
		1205	}
		1206	while (ch != EOF && IS_WHITESPACE (ch));
		1207
		1208	if (ch == EOF)
		1209	{
		1210	as_warn (_("end of file in comment; newline inserted"));
		1211	PUT ('\n');
		1212	break;
		1213	}
		1214
		1215	if (ch < '0' \|\| ch > '9' \|\| state != 0 \|\| startch != '#')
		1216	{
		1217	/* Not a cpp line. */
		1218	while (ch != EOF && !IS_NEWLINE (ch))
		1219	ch = GET ();
		1220	if (ch == EOF)
6324	serge	1221	{
5222	serge	1222	as_warn (_("end of file in comment; newline inserted"));
6324	serge	1223	PUT ('\n');
		1224	}
		1225	else /* IS_NEWLINE (ch) */
		1226	{
		1227	/* To process non-zero add_newlines. */
		1228	UNGET (ch);
		1229	}
5222	serge	1230	state = 0;
		1231	break;
		1232	}
		1233	/* Looks like `# 123 "filename"' from cpp. */
		1234	UNGET (ch);
		1235	old_state = 4;
		1236	state = -1;
		1237	if (scrub_m68k_mri)
		1238	out_string = "\tlinefile ";
		1239	else
		1240	out_string = "\t.linefile ";
		1241	PUT (*out_string++);
		1242	break;
		1243	}
		1244
		1245	#ifdef TC_D10V
		1246	/* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
		1247	Trap is the only short insn that has a first operand that is
		1248	neither register nor label.
		1249	We must prevent exef0f \|\|trap #1 to degenerate to exef0f \|\|trap#1 .
		1250	We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
		1251	already LEX_IS_LINE_COMMENT_START. However, it is the
		1252	only character in line_comment_chars for d10v, hence we
		1253	can recognize it as such. */
		1254	/* An alternative approach would be to reset the state to 1 when
		1255	we see '\|\|', '<'- or '->', but that seems to be overkill. */
		1256	if (state == 10)
		1257	PUT (' ');
		1258	#endif
		1259	/* We have a line comment character which is not at the
		1260	start of a line. If this is also a normal comment
		1261	character, fall through. Otherwise treat it as a default
		1262	character. */
		1263	if (strchr (tc_comment_chars, ch) == NULL
		1264	&& (! scrub_m68k_mri
		1265	\|\| (ch != '!' && ch != '*')))
		1266	goto de_fault;
		1267	if (scrub_m68k_mri
		1268	&& (ch == '!' \|\| ch == '*' \|\| ch == '#')
		1269	&& state != 1
		1270	&& state != 10)
		1271	goto de_fault;
		1272	/* Fall through. */
		1273	case LEX_IS_COMMENT_START:
		1274	#if defined TC_ARM && defined OBJ_ELF
		1275	/* On the ARM, `@' is the comment character.
		1276	Unfortunately this is also a special character in ELF .symver
		1277	directives (and .type, though we deal with those another way).
		1278	So we check if this line is such a directive, and treat
		1279	the character as default if so. This is a hack. */
		1280	if ((symver_state != NULL) && (*symver_state == 0))
		1281	goto de_fault;
		1282	#endif
		1283
		1284	#ifdef TC_ARM
		1285	/* For the ARM, care is needed not to damage occurrences of \@
		1286	by stripping the @ onwards. Yuck. */
		1287	if (to > tostart && *(to - 1) == '\\')
		1288	/* Do not treat the @ as a start-of-comment. */
		1289	goto de_fault;
		1290	#endif
		1291
		1292	#ifdef WARN_COMMENTS
		1293	if (!found_comment)
		1294	as_where (&found_comment_file, &found_comment);
		1295	#endif
		1296	do
		1297	{
		1298	ch = GET ();
		1299	}
		1300	while (ch != EOF && !IS_NEWLINE (ch));
		1301	if (ch == EOF)
		1302	as_warn (_("end of file in comment; newline inserted"));
		1303	state = 0;
		1304	PUT ('\n');
		1305	break;
		1306
		1307	#ifdef H_TICK_HEX
		1308	case LEX_IS_H:
		1309	/* Look for strings like H'[0-9A-Fa-f] and if found, replace
		1310	the H' with 0x to make them gas-style hex characters. */
		1311	if (enable_h_tick_hex)
		1312	{
		1313	char quot;
		1314
		1315	quot = GET ();
		1316	if (quot == '\'')
		1317	{
		1318	UNGET ('x');
		1319	ch = '0';
		1320	}
		1321	else
		1322	UNGET (quot);
		1323	}
		1324	/* FALL THROUGH */
		1325	#endif
		1326
		1327	case LEX_IS_SYMBOL_COMPONENT:
		1328	if (state == 10)
		1329	{
		1330	/* This is a symbol character following another symbol
		1331	character, with whitespace in between. We skipped
		1332	the whitespace earlier, so output it now. */
		1333	UNGET (ch);
		1334	state = 3;
		1335	PUT (' ');
		1336	break;
		1337	}
		1338
		1339	#ifdef TC_Z80
		1340	/* "af'" is a symbol containing '\''. */
		1341	if (state == 3 && (ch == 'a' \|\| ch == 'A'))
		1342	{
		1343	state = 16;
		1344	PUT (ch);
		1345	ch = GET ();
		1346	if (ch == 'f' \|\| ch == 'F')
		1347	{
		1348	state = 17;
		1349	PUT (ch);
		1350	break;
		1351	}
		1352	else
		1353	{
		1354	state = 9;
		1355	if (ch == EOF \|\| !IS_SYMBOL_COMPONENT (ch))
		1356	{
		1357	if (ch != EOF)
		1358	UNGET (ch);
		1359	break;
		1360	}
		1361	}
		1362	}
		1363	#endif
		1364	if (state == 3)
		1365	state = 9;
		1366
		1367	/* This is a common case. Quickly copy CH and all the
		1368	following symbol component or normal characters. */
		1369	if (to + 1 < toend
		1370	&& mri_state == NULL
		1371	#if defined TC_ARM && defined OBJ_ELF
		1372	&& symver_state == NULL
		1373	#endif
		1374	)
		1375	{
		1376	char *s;
		1377	ptrdiff_t len;
		1378
		1379	for (s = from; s < fromend; s++)
		1380	{
		1381	int type;
		1382
		1383	ch2 = (unsigned char ) s;
		1384	type = lex[ch2];
		1385	if (type != 0
		1386	&& type != LEX_IS_SYMBOL_COMPONENT)
		1387	break;
		1388	}
		1389
		1390	if (s > from)
		1391	/* Handle the last character normally, for
		1392	simplicity. */
		1393	--s;
		1394
		1395	len = s - from;
		1396
		1397	if (len > (toend - to) - 1)
		1398	len = (toend - to) - 1;
		1399
		1400	if (len > 0)
		1401	{
		1402	PUT (ch);
		1403	memcpy (to, from, len);
		1404	to += len;
		1405	from += len;
		1406	if (to >= toend)
		1407	goto tofull;
		1408	ch = GET ();
		1409	}
		1410	}
		1411
		1412	/* Fall through. */
		1413	default:
		1414	de_fault:
		1415	/* Some relatively `normal' character. */
		1416	if (state == 0)
		1417	{
		1418	state = 11; /* Now seeing label definition. */
		1419	}
		1420	else if (state == 1)
		1421	{
		1422	state = 2; /* Ditto. */
		1423	}
		1424	else if (state == 9)
		1425	{
		1426	if (!IS_SYMBOL_COMPONENT (ch))
		1427	state = 3;
		1428	}
		1429	else if (state == 10)
		1430	{
		1431	if (ch == '\\')
		1432	{
		1433	/* Special handling for backslash: a backslash may
		1434	be the beginning of a formal parameter (of a
		1435	macro) following another symbol character, with
		1436	whitespace in between. If that is the case, we
		1437	output a space before the parameter. Strictly
		1438	speaking, correct handling depends upon what the
		1439	macro parameter expands into; if the parameter
		1440	expands into something which does not start with
		1441	an operand character, then we don't want to keep
		1442	the space. We don't have enough information to
		1443	make the right choice, so here we are making the
		1444	choice which is more likely to be correct. */
		1445	if (to + 1 >= toend)
		1446	{
		1447	/* If we're near the end of the buffer, save the
		1448	character for the next time round. Otherwise
		1449	we'll lose our state. */
		1450	UNGET (ch);
		1451	goto tofull;
		1452	}
		1453	*to++ = ' ';
		1454	}
		1455
		1456	state = 3;
		1457	}
		1458	PUT (ch);
		1459	break;
		1460	}
		1461	}
		1462
		1463	/NOTREACHED/
		1464
		1465	fromeof:
		1466	/* We have reached the end of the input. */
		1467	return to - tostart;
		1468
		1469	tofull:
		1470	/* The output buffer is full. Save any input we have not yet
		1471	processed. */
		1472	if (fromend > from)
		1473	{
		1474	saved_input = from;
		1475	saved_input_len = fromend - from;
		1476	}
		1477	else
		1478	saved_input = NULL;
		1479
		1480	return to - tostart;
		1481	}

Subversion Repositories Kolibri OS

(root)/contrib/toolchain/binutils/gas/app.c – Rev 6324