Subversion Repositories Kolibri OS

Rev

Rev 5222 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
5222 serge 1
/* This is the Assembler Pre-Processor
6324 serge 2
   Copyright (C) 1987-2015 Free Software Foundation, Inc.
5222 serge 3
 
4
   This file is part of GAS, the GNU Assembler.
5
 
6
   GAS is free software; you can redistribute it and/or modify
7
   it under the terms of the GNU General Public License as published by
8
   the Free Software Foundation; either version 3, or (at your option)
9
   any later version.
10
 
11
   GAS is distributed in the hope that it will be useful, but WITHOUT
12
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14
   License for more details.
15
 
16
   You should have received a copy of the GNU General Public License
17
   along with GAS; see the file COPYING.  If not, write to the Free
18
   Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19
   02110-1301, USA.  */
20
 
21
/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90.  */
22
/* App, the assembler pre-processor.  This pre-processor strips out
23
   excess spaces, turns single-quoted characters into a decimal
24
   constant, and turns the # in #    into a
25
   .linefile.  This needs better error-handling.  */
26
 
27
#include "as.h"
28
 
29
#if (__STDC__ != 1)
30
#ifndef const
31
#define const  /* empty */
32
#endif
33
#endif
34
 
35
#ifdef H_TICK_HEX
36
int enable_h_tick_hex = 0;
37
#endif
38
 
39
#ifdef TC_M68K
40
/* Whether we are scrubbing in m68k MRI mode.  This is different from
41
   flag_m68k_mri, because the two flags will be affected by the .mri
42
   pseudo-op at different times.  */
43
static int scrub_m68k_mri;
44
 
45
/* The pseudo-op which switches in and out of MRI mode.  See the
46
   comment in do_scrub_chars.  */
47
static const char mri_pseudo[] = ".mri 0";
48
#else
49
#define scrub_m68k_mri 0
50
#endif
51
 
52
#if defined TC_ARM && defined OBJ_ELF
53
/* The pseudo-op for which we need to special-case `@' characters.
54
   See the comment in do_scrub_chars.  */
55
static const char   symver_pseudo[] = ".symver";
56
static const char * symver_state;
57
#endif
58
 
59
static char lex[256];
60
static const char symbol_chars[] =
61
"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
62
 
63
#define LEX_IS_SYMBOL_COMPONENT		1
64
#define LEX_IS_WHITESPACE		2
65
#define LEX_IS_LINE_SEPARATOR		3
66
#define LEX_IS_COMMENT_START		4
67
#define LEX_IS_LINE_COMMENT_START	5
68
#define	LEX_IS_TWOCHAR_COMMENT_1ST	6
69
#define	LEX_IS_STRINGQUOTE		8
70
#define	LEX_IS_COLON			9
71
#define	LEX_IS_NEWLINE			10
72
#define	LEX_IS_ONECHAR_QUOTE		11
73
#ifdef TC_V850
74
#define LEX_IS_DOUBLEDASH_1ST		12
75
#endif
76
#ifdef TC_M32R
77
#define DOUBLEBAR_PARALLEL
78
#endif
79
#ifdef DOUBLEBAR_PARALLEL
80
#define LEX_IS_DOUBLEBAR_1ST		13
81
#endif
82
#define LEX_IS_PARALLEL_SEPARATOR	14
83
#ifdef H_TICK_HEX
84
#define LEX_IS_H			15
85
#endif
86
#define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
87
#define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
88
#define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
89
#define IS_PARALLEL_SEPARATOR(c)	(lex[c] == LEX_IS_PARALLEL_SEPARATOR)
90
#define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
91
#define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
92
#define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)
93
 
94
static int process_escape (int);
95
 
96
/* FIXME-soon: The entire lexer/parser thingy should be
97
   built statically at compile time rather than dynamically
98
   each and every time the assembler is run.  xoxorich.  */
99
 
100
void
101
do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
102
{
103
  const char *p;
104
  int c;
105
 
106
  lex[' '] = LEX_IS_WHITESPACE;
107
  lex['\t'] = LEX_IS_WHITESPACE;
108
  lex['\r'] = LEX_IS_WHITESPACE;
109
  lex['\n'] = LEX_IS_NEWLINE;
110
  lex[':'] = LEX_IS_COLON;
111
 
112
#ifdef TC_M68K
113
  scrub_m68k_mri = m68k_mri;
114
 
115
  if (! m68k_mri)
116
#endif
117
    {
118
      lex['"'] = LEX_IS_STRINGQUOTE;
119
 
120
#if ! defined (TC_HPPA) && ! defined (TC_I370)
121
      /* I370 uses single-quotes to delimit integer, float constants.  */
122
      lex['\''] = LEX_IS_ONECHAR_QUOTE;
123
#endif
124
 
125
#ifdef SINGLE_QUOTE_STRINGS
126
      lex['\''] = LEX_IS_STRINGQUOTE;
127
#endif
128
    }
129
 
130
  /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
131
     in state 5 of do_scrub_chars must be changed.  */
132
 
133
  /* Note that these override the previous defaults, e.g. if ';' is a
134
     comment char, then it isn't a line separator.  */
135
  for (p = symbol_chars; *p; ++p)
136
    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
137
 
138
  for (c = 128; c < 256; ++c)
139
    lex[c] = LEX_IS_SYMBOL_COMPONENT;
140
 
141
#ifdef tc_symbol_chars
142
  /* This macro permits the processor to specify all characters which
143
     may appears in an operand.  This will prevent the scrubber from
144
     discarding meaningful whitespace in certain cases.  The i386
145
     backend uses this to support prefixes, which can confuse the
146
     scrubber as to whether it is parsing operands or opcodes.  */
147
  for (p = tc_symbol_chars; *p; ++p)
148
    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
149
#endif
150
 
151
  /* The m68k backend wants to be able to change comment_chars.  */
152
#ifndef tc_comment_chars
153
#define tc_comment_chars comment_chars
154
#endif
155
  for (p = tc_comment_chars; *p; p++)
156
    lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
157
 
158
  for (p = line_comment_chars; *p; p++)
159
    lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
160
 
6324 serge 161
#ifndef tc_line_separator_chars
162
#define tc_line_separator_chars line_separator_chars
163
#endif
164
  for (p = tc_line_separator_chars; *p; p++)
5222 serge 165
    lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
166
 
167
#ifdef tc_parallel_separator_chars
168
  /* This macro permits the processor to specify all characters which
169
     separate parallel insns on the same line.  */
170
  for (p = tc_parallel_separator_chars; *p; p++)
171
    lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
172
#endif
173
 
174
  /* Only allow slash-star comments if slash is not in use.
175
     FIXME: This isn't right.  We should always permit them.  */
176
  if (lex['/'] == 0)
177
    lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
178
 
179
#ifdef TC_M68K
180
  if (m68k_mri)
181
    {
182
      lex['\''] = LEX_IS_STRINGQUOTE;
183
      lex[';'] = LEX_IS_COMMENT_START;
184
      lex['*'] = LEX_IS_LINE_COMMENT_START;
185
      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
186
	 then it can't be used in an expression.  */
187
      lex['!'] = LEX_IS_LINE_COMMENT_START;
188
    }
189
#endif
190
 
191
#ifdef TC_V850
192
  lex['-'] = LEX_IS_DOUBLEDASH_1ST;
193
#endif
194
#ifdef DOUBLEBAR_PARALLEL
195
  lex['|'] = LEX_IS_DOUBLEBAR_1ST;
196
#endif
197
#ifdef TC_D30V
198
  /* Must do this is we want VLIW instruction with "->" or "<-".  */
199
  lex['-'] = LEX_IS_SYMBOL_COMPONENT;
200
#endif
201
 
202
#ifdef H_TICK_HEX
203
  if (enable_h_tick_hex)
204
    {
205
      lex['h'] = LEX_IS_H;
206
      lex['H'] = LEX_IS_H;
207
    }
208
#endif
209
}
210
 
211
/* Saved state of the scrubber.  */
212
static int state;
213
static int old_state;
214
static char *out_string;
215
static char out_buf[20];
216
static int add_newlines;
217
static char *saved_input;
218
static size_t saved_input_len;
219
static char input_buffer[32 * 1024];
220
static const char *mri_state;
221
static char mri_last_ch;
222
 
223
/* Data structure for saving the state of app across #include's.  Note that
224
   app is called asynchronously to the parsing of the .include's, so our
225
   state at the time .include is interpreted is completely unrelated.
226
   That's why we have to save it all.  */
227
 
228
struct app_save
229
{
230
  int          state;
231
  int          old_state;
232
  char *       out_string;
233
  char         out_buf[sizeof (out_buf)];
234
  int          add_newlines;
235
  char *       saved_input;
236
  size_t       saved_input_len;
237
#ifdef TC_M68K
238
  int          scrub_m68k_mri;
239
#endif
240
  const char * mri_state;
241
  char         mri_last_ch;
242
#if defined TC_ARM && defined OBJ_ELF
243
  const char * symver_state;
244
#endif
245
};
246
 
247
char *
248
app_push (void)
249
{
6324 serge 250
  struct app_save *saved;
5222 serge 251
 
252
  saved = (struct app_save *) xmalloc (sizeof (*saved));
253
  saved->state = state;
254
  saved->old_state = old_state;
255
  saved->out_string = out_string;
256
  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
257
  saved->add_newlines = add_newlines;
258
  if (saved_input == NULL)
259
    saved->saved_input = NULL;
260
  else
261
    {
262
      saved->saved_input = (char *) xmalloc (saved_input_len);
263
      memcpy (saved->saved_input, saved_input, saved_input_len);
264
      saved->saved_input_len = saved_input_len;
265
    }
266
#ifdef TC_M68K
267
  saved->scrub_m68k_mri = scrub_m68k_mri;
268
#endif
269
  saved->mri_state = mri_state;
270
  saved->mri_last_ch = mri_last_ch;
271
#if defined TC_ARM && defined OBJ_ELF
272
  saved->symver_state = symver_state;
273
#endif
274
 
275
  /* do_scrub_begin() is not useful, just wastes time.  */
276
 
277
  state = 0;
278
  saved_input = NULL;
279
  add_newlines = 0;
280
 
281
  return (char *) saved;
282
}
283
 
284
void
285
app_pop (char *arg)
286
{
6324 serge 287
  struct app_save *saved = (struct app_save *) arg;
5222 serge 288
 
289
  /* There is no do_scrub_end ().  */
290
  state = saved->state;
291
  old_state = saved->old_state;
292
  out_string = saved->out_string;
293
  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
294
  add_newlines = saved->add_newlines;
295
  if (saved->saved_input == NULL)
296
    saved_input = NULL;
297
  else
298
    {
299
      gas_assert (saved->saved_input_len <= sizeof (input_buffer));
300
      memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
301
      saved_input = input_buffer;
302
      saved_input_len = saved->saved_input_len;
303
      free (saved->saved_input);
304
    }
305
#ifdef TC_M68K
306
  scrub_m68k_mri = saved->scrub_m68k_mri;
307
#endif
308
  mri_state = saved->mri_state;
309
  mri_last_ch = saved->mri_last_ch;
310
#if defined TC_ARM && defined OBJ_ELF
311
  symver_state = saved->symver_state;
312
#endif
313
 
314
  free (arg);
315
}
316
 
317
/* @@ This assumes that \n &c are the same on host and target.  This is not
318
   necessarily true.  */
319
 
320
static int
321
process_escape (int ch)
322
{
323
  switch (ch)
324
    {
325
    case 'b':
326
      return '\b';
327
    case 'f':
328
      return '\f';
329
    case 'n':
330
      return '\n';
331
    case 'r':
332
      return '\r';
333
    case 't':
334
      return '\t';
335
    case '\'':
336
      return '\'';
337
    case '"':
338
      return '\"';
339
    default:
340
      return ch;
341
    }
342
}
343
 
344
/* This function is called to process input characters.  The GET
345
   parameter is used to retrieve more input characters.  GET should
346
   set its parameter to point to a buffer, and return the length of
347
   the buffer; it should return 0 at end of file.  The scrubbed output
348
   characters are put into the buffer starting at TOSTART; the TOSTART
349
   buffer is TOLEN bytes in length.  The function returns the number
350
   of scrubbed characters put into TOSTART.  This will be TOLEN unless
351
   end of file was seen.  This function is arranged as a state
352
   machine, and saves its state so that it may return at any point.
353
   This is the way the old code used to work.  */
354
 
355
size_t
356
do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
357
{
358
  char *to = tostart;
359
  char *toend = tostart + tolen;
360
  char *from;
361
  char *fromend;
362
  size_t fromlen;
6324 serge 363
  int ch, ch2 = 0;
5222 serge 364
  /* Character that started the string we're working on.  */
365
  static char quotechar;
366
 
367
  /*State 0: beginning of normal line
368
	  1: After first whitespace on line (flush more white)
369
	  2: After first non-white (opcode) on line (keep 1white)
370
	  3: after second white on line (into operands) (flush white)
371
	  4: after putting out a .linefile, put out digits
372
	  5: parsing a string, then go to old-state
373
	  6: putting out \ escape in a "d string.
374
	  7: no longer used
375
	  8: no longer used
376
	  9: After seeing symbol char in state 3 (keep 1white after symchar)
377
	 10: After seeing whitespace in state 9 (keep white before symchar)
378
	 11: After seeing a symbol character in state 0 (eg a label definition)
379
	 -1: output string in out_string and go to the state in old_state
380
	 -2: flush text until a '*' '/' is seen, then go to state old_state
381
#ifdef TC_V850
382
	 12: After seeing a dash, looking for a second dash as a start
383
	     of comment.
384
#endif
385
#ifdef DOUBLEBAR_PARALLEL
386
	 13: After seeing a vertical bar, looking for a second
387
	     vertical bar as a parallel expression separator.
388
#endif
389
#ifdef TC_PREDICATE_START_CHAR
390
	 14: After seeing a predicate start character at state 0, looking
391
	     for a predicate end character as predicate.
392
	 15: After seeing a predicate start character at state 1, looking
393
	     for a predicate end character as predicate.
394
#endif
395
#ifdef TC_Z80
396
	 16: After seeing an 'a' or an 'A' at the start of a symbol
397
	 17: After seeing an 'f' or an 'F' in state 16
398
#endif
399
	  */
400
 
401
  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
402
     constructs like ``.loc 1 20''.  This was turning into ``.loc
403
     120''.  States 9 and 10 ensure that a space is never dropped in
404
     between characters which could appear in an identifier.  Ian
405
     Taylor, ian@cygnus.com.
406
 
407
     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
408
     correctly on the PA (and any other target where colons are optional).
409
     Jeff Law, law@cs.utah.edu.
410
 
411
     I added state 13 so that something like "cmp r1, r2 || trap #1" does not
412
     get squashed into "cmp r1,r2||trap#1", with the all important space
413
     between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
414
 
415
  /* This macro gets the next input character.  */
416
 
417
#define GET()							\
418
  (from < fromend						\
419
   ? * (unsigned char *) (from++)				\
420
   : (saved_input = NULL,					\
421
      fromlen = (*get) (input_buffer, sizeof input_buffer),	\
422
      from = input_buffer,					\
423
      fromend = from + fromlen,					\
424
      (fromlen == 0						\
425
       ? EOF							\
426
       : * (unsigned char *) (from++))))
427
 
428
  /* This macro pushes a character back on the input stream.  */
429
 
430
#define UNGET(uch) (*--from = (uch))
431
 
432
  /* This macro puts a character into the output buffer.  If this
433
     character fills the output buffer, this macro jumps to the label
434
     TOFULL.  We use this rather ugly approach because we need to
435
     handle two different termination conditions: EOF on the input
436
     stream, and a full output buffer.  It would be simpler if we
437
     always read in the entire input stream before processing it, but
438
     I don't want to make such a significant change to the assembler's
439
     memory usage.  */
440
 
441
#define PUT(pch)				\
442
  do						\
443
    {						\
444
      *to++ = (pch);				\
445
      if (to >= toend)				\
446
	goto tofull;				\
447
    }						\
448
  while (0)
449
 
450
  if (saved_input != NULL)
451
    {
452
      from = saved_input;
453
      fromend = from + saved_input_len;
454
    }
455
  else
456
    {
457
      fromlen = (*get) (input_buffer, sizeof input_buffer);
458
      if (fromlen == 0)
459
	return 0;
460
      from = input_buffer;
461
      fromend = from + fromlen;
462
    }
463
 
464
  while (1)
465
    {
466
      /* The cases in this switch end with continue, in order to
467
	 branch back to the top of this while loop and generate the
468
	 next output character in the appropriate state.  */
469
      switch (state)
470
	{
471
	case -1:
472
	  ch = *out_string++;
473
	  if (*out_string == '\0')
474
	    {
475
	      state = old_state;
476
	      old_state = 3;
477
	    }
478
	  PUT (ch);
479
	  continue;
480
 
481
	case -2:
482
	  for (;;)
483
	    {
484
	      do
485
		{
486
		  ch = GET ();
487
 
488
		  if (ch == EOF)
489
		    {
490
		      as_warn (_("end of file in comment"));
491
		      goto fromeof;
492
		    }
493
 
494
		  if (ch == '\n')
495
		    PUT ('\n');
496
		}
497
	      while (ch != '*');
498
 
499
	      while ((ch = GET ()) == '*')
500
		;
501
 
502
	      if (ch == EOF)
503
		{
504
		  as_warn (_("end of file in comment"));
505
		  goto fromeof;
506
		}
507
 
508
	      if (ch == '/')
509
		break;
510
 
511
	      UNGET (ch);
512
	    }
513
 
514
	  state = old_state;
515
	  UNGET (' ');
516
	  continue;
517
 
518
	case 4:
519
	  ch = GET ();
520
	  if (ch == EOF)
521
	    goto fromeof;
522
	  else if (ch >= '0' && ch <= '9')
523
	    PUT (ch);
524
	  else
525
	    {
526
	      while (ch != EOF && IS_WHITESPACE (ch))
527
		ch = GET ();
528
	      if (ch == '"')
529
		{
530
		  quotechar = ch;
531
		  state = 5;
532
		  old_state = 3;
533
		  PUT (ch);
534
		}
535
	      else
536
		{
537
		  while (ch != EOF && ch != '\n')
538
		    ch = GET ();
539
		  state = 0;
540
		  PUT (ch);
541
		}
542
	    }
543
	  continue;
544
 
545
	case 5:
546
	  /* We are going to copy everything up to a quote character,
547
	     with special handling for a backslash.  We try to
548
	     optimize the copying in the simple case without using the
549
	     GET and PUT macros.  */
550
	  {
551
	    char *s;
552
	    ptrdiff_t len;
553
 
554
	    for (s = from; s < fromend; s++)
555
	      {
556
		ch = *s;
557
		if (ch == '\\'
558
		    || ch == quotechar
559
		    || ch == '\n')
560
		  break;
561
	      }
562
	    len = s - from;
563
	    if (len > toend - to)
564
	      len = toend - to;
565
	    if (len > 0)
566
	      {
567
		memcpy (to, from, len);
568
		to += len;
569
		from += len;
570
		if (to >= toend)
571
		  goto tofull;
572
	      }
573
	  }
574
 
575
	  ch = GET ();
576
	  if (ch == EOF)
577
	    {
578
	      /* This buffer is here specifically so
579
		 that the UNGET below will work.  */
580
	      static char one_char_buf[1];
581
 
582
	      as_warn (_("end of file in string; '%c' inserted"), quotechar);
583
	      state = old_state;
584
	      from = fromend = one_char_buf + 1;
585
	      fromlen = 1;
586
	      UNGET ('\n');
587
	      PUT (quotechar);
588
	    }
589
	  else if (ch == quotechar)
590
	    {
591
	      state = old_state;
592
	      PUT (ch);
593
	    }
594
#ifndef NO_STRING_ESCAPES
595
	  else if (ch == '\\')
596
	    {
597
	      state = 6;
598
	      PUT (ch);
599
	    }
600
#endif
601
	  else if (scrub_m68k_mri && ch == '\n')
602
	    {
603
	      /* Just quietly terminate the string.  This permits lines like
604
		   bne	label	loop if we haven't reach end yet.  */
605
	      state = old_state;
606
	      UNGET (ch);
607
	      PUT ('\'');
608
	    }
609
	  else
610
	    {
611
	      PUT (ch);
612
	    }
613
	  continue;
614
 
615
	case 6:
616
	  state = 5;
617
	  ch = GET ();
618
	  switch (ch)
619
	    {
620
	      /* Handle strings broken across lines, by turning '\n' into
621
		 '\\' and 'n'.  */
622
	    case '\n':
623
	      UNGET ('n');
624
	      add_newlines++;
625
	      PUT ('\\');
626
	      continue;
627
 
628
	    case EOF:
629
	      as_warn (_("end of file in string; '%c' inserted"), quotechar);
630
	      PUT (quotechar);
631
	      continue;
632
 
633
	    case '"':
634
	    case '\\':
635
	    case 'b':
636
	    case 'f':
637
	    case 'n':
638
	    case 'r':
639
	    case 't':
640
	    case 'v':
641
	    case 'x':
642
	    case 'X':
643
	    case '0':
644
	    case '1':
645
	    case '2':
646
	    case '3':
647
	    case '4':
648
	    case '5':
649
	    case '6':
650
	    case '7':
651
	      break;
652
 
653
	    default:
654
#ifdef ONLY_STANDARD_ESCAPES
655
	      as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
656
#endif
657
	      break;
658
	    }
659
	  PUT (ch);
660
	  continue;
661
 
662
#ifdef DOUBLEBAR_PARALLEL
663
	case 13:
664
	  ch = GET ();
665
	  if (ch != '|')
666
	    abort ();
667
 
668
	  /* Reset back to state 1 and pretend that we are parsing a
669
	     line from just after the first white space.  */
670
	  state = 1;
671
	  PUT ('|');
672
#ifdef TC_TIC6X
673
	  /* "||^" is used for SPMASKed instructions.  */
674
	  ch = GET ();
675
	  if (ch == EOF)
676
	    goto fromeof;
677
	  else if (ch == '^')
678
	    PUT ('^');
679
	  else
680
	    UNGET (ch);
681
#endif
682
	  continue;
683
#endif
684
#ifdef TC_Z80
685
	case 16:
686
	  /* We have seen an 'a' at the start of a symbol, look for an 'f'.  */
687
	  ch = GET ();
688
	  if (ch == 'f' || ch == 'F')
689
	    {
690
	      state = 17;
691
	      PUT (ch);
692
	    }
693
	  else
694
	    {
695
	      state = 9;
696
	      break;
697
	    }
698
	case 17:
699
	  /* We have seen "af" at the start of a symbol,
700
	     a ' here is a part of that symbol.  */
701
	  ch = GET ();
702
	  state = 9;
703
	  if (ch == '\'')
704
	    /* Change to avoid warning about unclosed string.  */
705
	    PUT ('`');
706
	  else if (ch != EOF)
707
	    UNGET (ch);
708
	  break;
709
#endif
710
	}
711
 
712
      /* OK, we are somewhere in states 0 through 4 or 9 through 11.  */
713
 
714
      /* flushchar: */
715
      ch = GET ();
716
 
717
#ifdef TC_PREDICATE_START_CHAR
718
      if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
719
	{
720
	  state += 14;
721
	  PUT (ch);
722
	  continue;
723
	}
724
      else if (state == 14 || state == 15)
725
	{
726
	  if (ch == TC_PREDICATE_END_CHAR)
727
	    {
728
	      state -= 14;
729
	      PUT (ch);
730
	      ch = GET ();
731
	    }
732
	  else
733
	    {
734
	      PUT (ch);
735
	      continue;
736
	    }
737
	}
738
#endif
739
 
740
    recycle:
741
 
742
#if defined TC_ARM && defined OBJ_ELF
743
      /* We need to watch out for .symver directives.  See the comment later
744
	 in this function.  */
745
      if (symver_state == NULL)
746
	{
747
	  if ((state == 0 || state == 1) && ch == symver_pseudo[0])
748
	    symver_state = symver_pseudo + 1;
749
	}
750
      else
751
	{
752
	  /* We advance to the next state if we find the right
753
	     character.  */
754
	  if (ch != '\0' && (*symver_state == ch))
755
	    ++symver_state;
756
	  else if (*symver_state != '\0')
757
	    /* We did not get the expected character, or we didn't
758
	       get a valid terminating character after seeing the
759
	       entire pseudo-op, so we must go back to the beginning.  */
760
	    symver_state = NULL;
761
	  else
762
	    {
763
	      /* We've read the entire pseudo-op.  If this is the end
764
		 of the line, go back to the beginning.  */
765
	      if (IS_NEWLINE (ch))
766
		symver_state = NULL;
767
	    }
768
	}
769
#endif /* TC_ARM && OBJ_ELF */
770
 
771
#ifdef TC_M68K
772
      /* We want to have pseudo-ops which control whether we are in
773
	 MRI mode or not.  Unfortunately, since m68k MRI mode affects
774
	 the scrubber, that means that we need a special purpose
775
	 recognizer here.  */
776
      if (mri_state == NULL)
777
	{
778
	  if ((state == 0 || state == 1)
779
	      && ch == mri_pseudo[0])
780
	    mri_state = mri_pseudo + 1;
781
	}
782
      else
783
	{
784
	  /* We advance to the next state if we find the right
785
	     character, or if we need a space character and we get any
786
	     whitespace character, or if we need a '0' and we get a
787
	     '1' (this is so that we only need one state to handle
788
	     ``.mri 0'' and ``.mri 1'').  */
789
	  if (ch != '\0'
790
	      && (*mri_state == ch
791
		  || (*mri_state == ' '
792
		      && lex[ch] == LEX_IS_WHITESPACE)
793
		  || (*mri_state == '0'
794
		      && ch == '1')))
795
	    {
796
	      mri_last_ch = ch;
797
	      ++mri_state;
798
	    }
799
	  else if (*mri_state != '\0'
800
		   || (lex[ch] != LEX_IS_WHITESPACE
801
		       && lex[ch] != LEX_IS_NEWLINE))
802
	    {
803
	      /* We did not get the expected character, or we didn't
804
		 get a valid terminating character after seeing the
805
		 entire pseudo-op, so we must go back to the
806
		 beginning.  */
807
	      mri_state = NULL;
808
	    }
809
	  else
810
	    {
811
	      /* We've read the entire pseudo-op.  mips_last_ch is
812
		 either '0' or '1' indicating whether to enter or
813
		 leave MRI mode.  */
814
	      do_scrub_begin (mri_last_ch == '1');
815
	      mri_state = NULL;
816
 
817
	      /* We continue handling the character as usual.  The
818
		 main gas reader must also handle the .mri pseudo-op
819
		 to control expression parsing and the like.  */
820
	    }
821
	}
822
#endif
823
 
824
      if (ch == EOF)
825
	{
826
	  if (state != 0)
827
	    {
828
	      as_warn (_("end of file not at end of a line; newline inserted"));
829
	      state = 0;
830
	      PUT ('\n');
831
	    }
832
	  goto fromeof;
833
	}
834
 
835
      switch (lex[ch])
836
	{
837
	case LEX_IS_WHITESPACE:
838
	  do
839
	    {
840
	      ch = GET ();
841
	    }
842
	  while (ch != EOF && IS_WHITESPACE (ch));
843
	  if (ch == EOF)
844
	    goto fromeof;
845
 
846
	  if (state == 0)
847
	    {
848
	      /* Preserve a single whitespace character at the
849
		 beginning of a line.  */
850
	      state = 1;
851
	      UNGET (ch);
852
	      PUT (' ');
853
	      break;
854
	    }
855
 
856
#ifdef KEEP_WHITE_AROUND_COLON
857
	  if (lex[ch] == LEX_IS_COLON)
858
	    {
859
	      /* Only keep this white if there's no white *after* the
860
		 colon.  */
861
	      ch2 = GET ();
862
	      if (ch2 != EOF)
863
		UNGET (ch2);
864
	      if (!IS_WHITESPACE (ch2))
865
		{
866
		  state = 9;
867
		  UNGET (ch);
868
		  PUT (' ');
869
		  break;
870
		}
871
	    }
872
#endif
873
	  if (IS_COMMENT (ch)
874
	      || ch == '/'
875
	      || IS_LINE_SEPARATOR (ch)
876
	      || IS_PARALLEL_SEPARATOR (ch))
877
	    {
878
	      if (scrub_m68k_mri)
879
		{
880
		  /* In MRI mode, we keep these spaces.  */
881
		  UNGET (ch);
882
		  PUT (' ');
883
		  break;
884
		}
885
	      goto recycle;
886
	    }
887
 
888
	  /* If we're in state 2 or 11, we've seen a non-white
889
	     character followed by whitespace.  If the next character
890
	     is ':', this is whitespace after a label name which we
891
	     normally must ignore.  In MRI mode, though, spaces are
892
	     not permitted between the label and the colon.  */
893
	  if ((state == 2 || state == 11)
894
	      && lex[ch] == LEX_IS_COLON
895
	      && ! scrub_m68k_mri)
896
	    {
897
	      state = 1;
898
	      PUT (ch);
899
	      break;
900
	    }
901
 
902
	  switch (state)
903
	    {
904
	    case 1:
905
	      /* We can arrive here if we leave a leading whitespace
906
		 character at the beginning of a line.  */
907
	      goto recycle;
908
	    case 2:
909
	      state = 3;
910
	      if (to + 1 < toend)
911
		{
912
		  /* Optimize common case by skipping UNGET/GET.  */
913
		  PUT (' ');	/* Sp after opco */
914
		  goto recycle;
915
		}
916
	      UNGET (ch);
917
	      PUT (' ');
918
	      break;
919
	    case 3:
920
#ifndef TC_KEEP_OPERAND_SPACES
921
	      /* For TI C6X, we keep these spaces as they may separate
922
		 functional unit specifiers from operands.  */
923
	      if (scrub_m68k_mri)
924
#endif
925
		{
926
		  /* In MRI mode, we keep these spaces.  */
927
		  UNGET (ch);
928
		  PUT (' ');
929
		  break;
930
		}
931
	      goto recycle;	/* Sp in operands */
932
	    case 9:
933
	    case 10:
934
#ifndef TC_KEEP_OPERAND_SPACES
935
	      if (scrub_m68k_mri)
936
#endif
937
		{
938
		  /* In MRI mode, we keep these spaces.  */
939
		  state = 3;
940
		  UNGET (ch);
941
		  PUT (' ');
942
		  break;
943
		}
944
	      state = 10;	/* Sp after symbol char */
945
	      goto recycle;
946
	    case 11:
947
	      if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
948
		state = 1;
949
	      else
950
		{
951
		  /* We know that ch is not ':', since we tested that
952
		     case above.  Therefore this is not a label, so it
953
		     must be the opcode, and we've just seen the
954
		     whitespace after it.  */
955
		  state = 3;
956
		}
957
	      UNGET (ch);
958
	      PUT (' ');	/* Sp after label definition.  */
959
	      break;
960
	    default:
961
	      BAD_CASE (state);
962
	    }
963
	  break;
964
 
965
	case LEX_IS_TWOCHAR_COMMENT_1ST:
966
	  ch2 = GET ();
967
	  if (ch2 == '*')
968
	    {
969
	      for (;;)
970
		{
971
		  do
972
		    {
973
		      ch2 = GET ();
974
		      if (ch2 != EOF && IS_NEWLINE (ch2))
975
			add_newlines++;
976
		    }
977
		  while (ch2 != EOF && ch2 != '*');
978
 
979
		  while (ch2 == '*')
980
		    ch2 = GET ();
981
 
982
		  if (ch2 == EOF || ch2 == '/')
983
		    break;
984
 
985
		  /* This UNGET will ensure that we count newlines
986
		     correctly.  */
987
		  UNGET (ch2);
988
		}
989
 
990
	      if (ch2 == EOF)
991
		as_warn (_("end of file in multiline comment"));
992
 
993
	      ch = ' ';
994
	      goto recycle;
995
	    }
996
#ifdef DOUBLESLASH_LINE_COMMENTS
997
	  else if (ch2 == '/')
998
	    {
999
	      do
1000
		{
1001
		  ch = GET ();
1002
		}
1003
	      while (ch != EOF && !IS_NEWLINE (ch));
1004
	      if (ch == EOF)
1005
		as_warn ("end of file in comment; newline inserted");
1006
	      state = 0;
1007
	      PUT ('\n');
1008
	      break;
1009
	    }
1010
#endif
1011
	  else
1012
	    {
1013
	      if (ch2 != EOF)
1014
		UNGET (ch2);
1015
	      if (state == 9 || state == 10)
1016
		state = 3;
1017
	      PUT (ch);
1018
	    }
1019
	  break;
1020
 
1021
	case LEX_IS_STRINGQUOTE:
1022
	  quotechar = ch;
1023
	  if (state == 10)
1024
	    {
1025
	      /* Preserve the whitespace in foo "bar".  */
1026
	      UNGET (ch);
1027
	      state = 3;
1028
	      PUT (' ');
1029
 
1030
	      /* PUT didn't jump out.  We could just break, but we
1031
		 know what will happen, so optimize a bit.  */
1032
	      ch = GET ();
1033
	      old_state = 3;
1034
	    }
1035
	  else if (state == 9)
1036
	    old_state = 3;
1037
	  else
1038
	    old_state = state;
1039
	  state = 5;
1040
	  PUT (ch);
1041
	  break;
1042
 
1043
#ifndef IEEE_STYLE
1044
	case LEX_IS_ONECHAR_QUOTE:
1045
#ifdef H_TICK_HEX
1046
	  if (state == 9 && enable_h_tick_hex)
1047
	    {
1048
	      char c;
1049
 
1050
	      c = GET ();
1051
	      as_warn ("'%c found after symbol", c);
1052
	      UNGET (c);
1053
	    }
1054
#endif
1055
	  if (state == 10)
1056
	    {
1057
	      /* Preserve the whitespace in foo 'b'.  */
1058
	      UNGET (ch);
1059
	      state = 3;
1060
	      PUT (' ');
1061
	      break;
1062
	    }
1063
	  ch = GET ();
1064
	  if (ch == EOF)
1065
	    {
1066
	      as_warn (_("end of file after a one-character quote; \\0 inserted"));
1067
	      ch = 0;
1068
	    }
1069
	  if (ch == '\\')
1070
	    {
1071
	      ch = GET ();
1072
	      if (ch == EOF)
1073
		{
1074
		  as_warn (_("end of file in escape character"));
1075
		  ch = '\\';
1076
		}
1077
	      else
1078
		ch = process_escape (ch);
1079
	    }
1080
	  sprintf (out_buf, "%d", (int) (unsigned char) ch);
1081
 
1082
	  /* None of these 'x constants for us.  We want 'x'.  */
1083
	  if ((ch = GET ()) != '\'')
1084
	    {
1085
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
1086
	      as_warn (_("missing close quote; (assumed)"));
1087
#else
1088
	      if (ch != EOF)
1089
		UNGET (ch);
1090
#endif
1091
	    }
1092
	  if (strlen (out_buf) == 1)
1093
	    {
1094
	      PUT (out_buf[0]);
1095
	      break;
1096
	    }
1097
	  if (state == 9)
1098
	    old_state = 3;
1099
	  else
1100
	    old_state = state;
1101
	  state = -1;
1102
	  out_string = out_buf;
1103
	  PUT (*out_string++);
1104
	  break;
1105
#endif
1106
 
1107
	case LEX_IS_COLON:
1108
#ifdef KEEP_WHITE_AROUND_COLON
1109
	  state = 9;
1110
#else
1111
	  if (state == 9 || state == 10)
1112
	    state = 3;
1113
	  else if (state != 3)
1114
	    state = 1;
1115
#endif
1116
	  PUT (ch);
1117
	  break;
1118
 
1119
	case LEX_IS_NEWLINE:
1120
	  /* Roll out a bunch of newlines from inside comments, etc.  */
1121
	  if (add_newlines)
1122
	    {
1123
	      --add_newlines;
1124
	      UNGET (ch);
1125
	    }
1126
	  /* Fall through.  */
1127
 
1128
	case LEX_IS_LINE_SEPARATOR:
1129
	  state = 0;
1130
	  PUT (ch);
1131
	  break;
1132
 
1133
	case LEX_IS_PARALLEL_SEPARATOR:
1134
	  state = 1;
1135
	  PUT (ch);
1136
	  break;
1137
 
1138
#ifdef TC_V850
1139
	case LEX_IS_DOUBLEDASH_1ST:
1140
	  ch2 = GET ();
1141
	  if (ch2 != '-')
1142
	    {
1143
	      if (ch2 != EOF)
1144
		UNGET (ch2);
1145
	      goto de_fault;
1146
	    }
1147
	  /* Read and skip to end of line.  */
1148
	  do
1149
	    {
1150
	      ch = GET ();
1151
	    }
1152
	  while (ch != EOF && ch != '\n');
1153
 
1154
	  if (ch == EOF)
1155
	    as_warn (_("end of file in comment; newline inserted"));
1156
 
1157
	  state = 0;
1158
	  PUT ('\n');
1159
	  break;
1160
#endif
1161
#ifdef DOUBLEBAR_PARALLEL
1162
	case LEX_IS_DOUBLEBAR_1ST:
1163
	  ch2 = GET ();
1164
	  if (ch2 != EOF)
1165
	    UNGET (ch2);
1166
	  if (ch2 != '|')
1167
	    goto de_fault;
1168
 
1169
	  /* Handle '||' in two states as invoking PUT twice might
1170
	     result in the first one jumping out of this loop.  We'd
1171
	     then lose track of the state and one '|' char.  */
1172
	  state = 13;
1173
	  PUT ('|');
1174
	  break;
1175
#endif
1176
	case LEX_IS_LINE_COMMENT_START:
1177
	  /* FIXME-someday: The two character comment stuff was badly
1178
	     thought out.  On i386, we want '/' as line comment start
1179
	     AND we want C style comments.  hence this hack.  The
1180
	     whole lexical process should be reworked.  xoxorich.  */
1181
	  if (ch == '/')
1182
	    {
1183
	      ch2 = GET ();
1184
	      if (ch2 == '*')
1185
		{
1186
		  old_state = 3;
1187
		  state = -2;
1188
		  break;
1189
		}
1190
	      else
1191
		{
1192
		  UNGET (ch2);
1193
		}
1194
	    }
1195
 
1196
	  if (state == 0 || state == 1)	/* Only comment at start of line.  */
1197
	    {
1198
	      int startch;
1199
 
1200
	      startch = ch;
1201
 
1202
	      do
1203
		{
1204
		  ch = GET ();
1205
		}
1206
	      while (ch != EOF && IS_WHITESPACE (ch));
1207
 
1208
	      if (ch == EOF)
1209
		{
1210
		  as_warn (_("end of file in comment; newline inserted"));
1211
		  PUT ('\n');
1212
		  break;
1213
		}
1214
 
1215
	      if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1216
		{
1217
		  /* Not a cpp line.  */
1218
		  while (ch != EOF && !IS_NEWLINE (ch))
1219
		    ch = GET ();
1220
		  if (ch == EOF)
6324 serge 1221
		    {
5222 serge 1222
		    as_warn (_("end of file in comment; newline inserted"));
6324 serge 1223
		      PUT ('\n');
1224
		    }
1225
		  else /* IS_NEWLINE (ch) */
1226
		    {
1227
		      /* To process non-zero add_newlines.  */
1228
		      UNGET (ch);
1229
		    }
5222 serge 1230
		  state = 0;
1231
		  break;
1232
		}
1233
	      /* Looks like `# 123 "filename"' from cpp.  */
1234
	      UNGET (ch);
1235
	      old_state = 4;
1236
	      state = -1;
1237
	      if (scrub_m68k_mri)
1238
		out_string = "\tlinefile ";
1239
	      else
1240
		out_string = "\t.linefile ";
1241
	      PUT (*out_string++);
1242
	      break;
1243
	    }
1244
 
1245
#ifdef TC_D10V
1246
	  /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1247
	     Trap is the only short insn that has a first operand that is
1248
	     neither register nor label.
1249
	     We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1250
	     We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1251
	     already LEX_IS_LINE_COMMENT_START.  However, it is the
1252
	     only character in line_comment_chars for d10v, hence we
1253
	     can recognize it as such.  */
1254
	  /* An alternative approach would be to reset the state to 1 when
1255
	     we see '||', '<'- or '->', but that seems to be overkill.  */
1256
	  if (state == 10)
1257
	    PUT (' ');
1258
#endif
1259
	  /* We have a line comment character which is not at the
1260
	     start of a line.  If this is also a normal comment
1261
	     character, fall through.  Otherwise treat it as a default
1262
	     character.  */
1263
	  if (strchr (tc_comment_chars, ch) == NULL
1264
	      && (! scrub_m68k_mri
1265
		  || (ch != '!' && ch != '*')))
1266
	    goto de_fault;
1267
	  if (scrub_m68k_mri
1268
	      && (ch == '!' || ch == '*' || ch == '#')
1269
	      && state != 1
1270
	      && state != 10)
1271
	    goto de_fault;
1272
	  /* Fall through.  */
1273
	case LEX_IS_COMMENT_START:
1274
#if defined TC_ARM && defined OBJ_ELF
1275
	  /* On the ARM, `@' is the comment character.
1276
	     Unfortunately this is also a special character in ELF .symver
1277
	     directives (and .type, though we deal with those another way).
1278
	     So we check if this line is such a directive, and treat
1279
	     the character as default if so.  This is a hack.  */
1280
	  if ((symver_state != NULL) && (*symver_state == 0))
1281
	    goto de_fault;
1282
#endif
1283
 
1284
#ifdef TC_ARM
1285
	  /* For the ARM, care is needed not to damage occurrences of \@
1286
	     by stripping the @ onwards.  Yuck.  */
1287
	  if (to > tostart && *(to - 1) == '\\')
1288
	    /* Do not treat the @ as a start-of-comment.  */
1289
	    goto de_fault;
1290
#endif
1291
 
1292
#ifdef WARN_COMMENTS
1293
	  if (!found_comment)
1294
	    as_where (&found_comment_file, &found_comment);
1295
#endif
1296
	  do
1297
	    {
1298
	      ch = GET ();
1299
	    }
1300
	  while (ch != EOF && !IS_NEWLINE (ch));
1301
	  if (ch == EOF)
1302
	    as_warn (_("end of file in comment; newline inserted"));
1303
	  state = 0;
1304
	  PUT ('\n');
1305
	  break;
1306
 
1307
#ifdef H_TICK_HEX
1308
	case LEX_IS_H:
1309
	  /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1310
	     the H' with 0x to make them gas-style hex characters.  */
1311
	  if (enable_h_tick_hex)
1312
	    {
1313
	      char quot;
1314
 
1315
	      quot = GET ();
1316
	      if (quot == '\'')
1317
		{
1318
		  UNGET ('x');
1319
		  ch = '0';
1320
		}
1321
	      else
1322
		UNGET (quot);
1323
	    }
1324
	  /* FALL THROUGH */
1325
#endif
1326
 
1327
	case LEX_IS_SYMBOL_COMPONENT:
1328
	  if (state == 10)
1329
	    {
1330
	      /* This is a symbol character following another symbol
1331
		 character, with whitespace in between.  We skipped
1332
		 the whitespace earlier, so output it now.  */
1333
	      UNGET (ch);
1334
	      state = 3;
1335
	      PUT (' ');
1336
	      break;
1337
	    }
1338
 
1339
#ifdef TC_Z80
1340
	  /* "af'" is a symbol containing '\''.  */
1341
	  if (state == 3 && (ch == 'a' || ch == 'A'))
1342
	    {
1343
	      state = 16;
1344
	      PUT (ch);
1345
	      ch = GET ();
1346
	      if (ch == 'f' || ch == 'F')
1347
		{
1348
		  state = 17;
1349
		  PUT (ch);
1350
		  break;
1351
		}
1352
	      else
1353
		{
1354
		  state = 9;
1355
		  if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
1356
		    {
1357
		      if (ch != EOF)
1358
			UNGET (ch);
1359
		      break;
1360
		    }
1361
		}
1362
	    }
1363
#endif
1364
	  if (state == 3)
1365
	    state = 9;
1366
 
1367
	  /* This is a common case.  Quickly copy CH and all the
1368
	     following symbol component or normal characters.  */
1369
	  if (to + 1 < toend
1370
	      && mri_state == NULL
1371
#if defined TC_ARM && defined OBJ_ELF
1372
	      && symver_state == NULL
1373
#endif
1374
	      )
1375
	    {
1376
	      char *s;
1377
	      ptrdiff_t len;
1378
 
1379
	      for (s = from; s < fromend; s++)
1380
		{
1381
		  int type;
1382
 
1383
		  ch2 = *(unsigned char *) s;
1384
		  type = lex[ch2];
1385
		  if (type != 0
1386
		      && type != LEX_IS_SYMBOL_COMPONENT)
1387
		    break;
1388
		}
1389
 
1390
	      if (s > from)
1391
		/* Handle the last character normally, for
1392
		   simplicity.  */
1393
		--s;
1394
 
1395
	      len = s - from;
1396
 
1397
	      if (len > (toend - to) - 1)
1398
		len = (toend - to) - 1;
1399
 
1400
	      if (len > 0)
1401
		{
1402
		  PUT (ch);
1403
		  memcpy (to, from, len);
1404
		  to += len;
1405
		  from += len;
1406
		  if (to >= toend)
1407
		    goto tofull;
1408
		  ch = GET ();
1409
		}
1410
	    }
1411
 
1412
	  /* Fall through.  */
1413
	default:
1414
	de_fault:
1415
	  /* Some relatively `normal' character.  */
1416
	  if (state == 0)
1417
	    {
1418
	      state = 11;	/* Now seeing label definition.  */
1419
	    }
1420
	  else if (state == 1)
1421
	    {
1422
	      state = 2;	/* Ditto.  */
1423
	    }
1424
	  else if (state == 9)
1425
	    {
1426
	      if (!IS_SYMBOL_COMPONENT (ch))
1427
		state = 3;
1428
	    }
1429
	  else if (state == 10)
1430
	    {
1431
	      if (ch == '\\')
1432
		{
1433
		  /* Special handling for backslash: a backslash may
1434
		     be the beginning of a formal parameter (of a
1435
		     macro) following another symbol character, with
1436
		     whitespace in between.  If that is the case, we
1437
		     output a space before the parameter.  Strictly
1438
		     speaking, correct handling depends upon what the
1439
		     macro parameter expands into; if the parameter
1440
		     expands into something which does not start with
1441
		     an operand character, then we don't want to keep
1442
		     the space.  We don't have enough information to
1443
		     make the right choice, so here we are making the
1444
		     choice which is more likely to be correct.  */
1445
		  if (to + 1 >= toend)
1446
		    {
1447
		      /* If we're near the end of the buffer, save the
1448
		         character for the next time round.  Otherwise
1449
		         we'll lose our state.  */
1450
		      UNGET (ch);
1451
		      goto tofull;
1452
		    }
1453
		  *to++ = ' ';
1454
		}
1455
 
1456
	      state = 3;
1457
	    }
1458
	  PUT (ch);
1459
	  break;
1460
	}
1461
    }
1462
 
1463
  /*NOTREACHED*/
1464
 
1465
 fromeof:
1466
  /* We have reached the end of the input.  */
1467
  return to - tostart;
1468
 
1469
 tofull:
1470
  /* The output buffer is full.  Save any input we have not yet
1471
     processed.  */
1472
  if (fromend > from)
1473
    {
1474
      saved_input = from;
1475
      saved_input_len = fromend - from;
1476
    }
1477
  else
1478
    saved_input = NULL;
1479
 
1480
  return to - tostart;
1481
}