cppfiles.c (read_and_prescan): Map backslash-newline to '\r' (which cannot otherwise appear in the processed...

1999-03-16 16:06 -0500  Zack Weinberg  <zack@rabi.columbia.edu>
	* cppfiles.c (read_and_prescan): Map backslash-newline to '\r'
	(which cannot otherwise appear in the processed buffer) and
	move it out of tokens that it appears in the middle of.
	Improve performance.
	(find_position): New function.
	* cpplib.c: \r (one character) indicates backslash
	newline, not \\\n (two characters).  It cannot appear in the
	middle of a token.  Call CPP_BUMP_LINE (pfile) whenever
	parsing moves past \n or \r.  Increment pfile->lineno whenever
	a \n is placed into token_buffer.  Only one mark can exist at
	a time, and CPP_BUMP_LINE must not be used while it is
	active.  It is automatically cleared by cpp_pop_buffer and
	parse_goto_mark.  \r is not in is_hor_space or is_space.
	(NEWLINE_FIX, NEWLINE_FIX1, adjust_position,
	update_position, count_newlines, parse_move_mark): Removed.
	(parse_string, copy_comment): New functions.
	(parse_name): Returns void.
	(parse_set_mark, parse_clear_mark, parse_goto_mark): Take only
	one argument, a cpp_reader *.  Change for new marking scheme.
	(skip_comment): Handle CHILL line comments too.  Second
	argument is now first character of comment marker; all callers
	changed.  Issue error for unterminated block comment here.
	(cpp_skip_hspace): Recognize CHILL comments.
	(copy_rest_of_line): Likewise.  Call skip_comment and
	parse_string directly, don't go through cpp_get_token.  Emit
	"/**/" for block comments if -traditional (create_definition
	needs this).
	(do_define): Don't play with put_out_comments.
	(cpp_push_buffer): Initialize ->mark to -1.
	(cpp_buf_line_and_col): Just read out the values in the buffer
	structure.
	(output_line_command): Use cpp_buf_line_and_col.  Fix
	formatting.  Remove stale code.
	(cpp_get_token): Break out string parsing code to
	parse_string.  Use skip_comment for CHILL comments too.  Use
	copy_comment for put_out_comments instead of dinking with
	marks.  Remove stale code.  Don't call output_line_command
	unless it's necessary.
	* cpplib.h (parse_marker): Removed.
	(struct cpp_buffer): line_base is now a unsigned char *; add
	`mark' [long], remove `marks' [struct parse_marker *].
	(parse_set_mark, parse_clear_mark, parse_goto_mark): Update
	prototypes.
	(CPP_BUMP_LINE, CPP_BUMP_BUFFER_LINE): New macros.
	* cppinit.c (is_hor_space, is_space): '\r' is not considered
	whitespace.
	* cppexp.c (cpp_parse_expression): Use cpp_skip_hspace, not
	SKIP_WHITE_SPACE.
	* cpphash.c (macarg): Disable line commands while expanding.

From-SVN: r25802
This commit is contained in:
Zack Weinberg 1999-03-16 13:10:15 +00:00 committed by Dave Brolley
parent d888cbc492
commit 3fdc651fed
7 changed files with 668 additions and 585 deletions

View File

@ -1,3 +1,58 @@
1999-03-16 16:06 -0500 Zack Weinberg <zack@rabi.columbia.edu>
* cppfiles.c (read_and_prescan): Map backslash-newline to '\r'
(which cannot otherwise appear in the processed buffer) and
move it out of tokens that it appears in the middle of.
Improve performance.
(find_position): New function.
* cpplib.c: \r (one character) indicates backslash
newline, not \\\n (two characters). It cannot appear in the
middle of a token. Call CPP_BUMP_LINE (pfile) whenever
parsing moves past \n or \r. Increment pfile->lineno whenever
a \n is placed into token_buffer. Only one mark can exist at
a time, and CPP_BUMP_LINE must not be used while it is
active. It is automatically cleared by cpp_pop_buffer and
parse_goto_mark. \r is not in is_hor_space or is_space.
(NEWLINE_FIX, NEWLINE_FIX1, adjust_position,
update_position, count_newlines, parse_move_mark): Removed.
(parse_string, copy_comment): New functions.
(parse_name): Returns void.
(parse_set_mark, parse_clear_mark, parse_goto_mark): Take only
one argument, a cpp_reader *. Change for new marking scheme.
(skip_comment): Handle CHILL line comments too. Second
argument is now first character of comment marker; all callers
changed. Issue error for unterminated block comment here.
(cpp_skip_hspace): Recognize CHILL comments.
(copy_rest_of_line): Likewise. Call skip_comment and
parse_string directly, don't go through cpp_get_token. Emit
"/**/" for block comments if -traditional (create_definition
needs this).
(do_define): Don't play with put_out_comments.
(cpp_push_buffer): Initialize ->mark to -1.
(cpp_buf_line_and_col): Just read out the values in the buffer
structure.
(output_line_command): Use cpp_buf_line_and_col. Fix
formatting. Remove stale code.
(cpp_get_token): Break out string parsing code to
parse_string. Use skip_comment for CHILL comments too. Use
copy_comment for put_out_comments instead of dinking with
marks. Remove stale code. Don't call output_line_command
unless it's necessary.
* cpplib.h (parse_marker): Removed.
(struct cpp_buffer): line_base is now a unsigned char *; add
`mark' [long], remove `marks' [struct parse_marker *].
(parse_set_mark, parse_clear_mark, parse_goto_mark): Update
prototypes.
(CPP_BUMP_LINE, CPP_BUMP_BUFFER_LINE): New macros.
* cppinit.c (is_hor_space, is_space): '\r' is not considered
whitespace.
* cppexp.c (cpp_parse_expression): Use cpp_skip_hspace, not
SKIP_WHITE_SPACE.
* cpphash.c (macarg): Disable line commands while expanding.
Tue Mar 16 11:30:19 1999 Gavin Romig-Koch <gavin@cygnus.com>
* c-lex.c (yylex) : Remove warning for integer literals being

View File

@ -101,8 +101,6 @@ static HOST_WIDEST_INT right_shift PARAMS ((cpp_reader *, HOST_WIDEST_INT, int,
#define SKIP_OPERAND 8
/*#define UNSIGNEDP 16*/
#define SKIP_WHITE_SPACE(p) do { while (is_hor_space[*p]) p++; } while (0)
struct operation {
short op;
char rprio; /* Priority of op (relative to it right operand). */
@ -444,12 +442,12 @@ cpp_lex (pfile, skip_evaluation)
cpp_buffer *ip = CPP_BUFFER (pfile);
U_CHAR *tok;
SKIP_WHITE_SPACE (ip->cur);
cpp_skip_hspace (pfile);
if (*ip->cur == '(')
{
paren++;
ip->cur++; /* Skip over the paren */
SKIP_WHITE_SPACE (ip->cur);
cpp_skip_hspace (pfile);
}
if (!is_idstart[*ip->cur])
@ -460,7 +458,7 @@ cpp_lex (pfile, skip_evaluation)
while (is_idchar[*ip->cur])
++ip->cur;
len = ip->cur - tok;
SKIP_WHITE_SPACE (ip->cur);
cpp_skip_hspace (pfile);
if (paren)
{
if (*ip->cur != ')')

View File

@ -757,15 +757,51 @@ actual_directory (pfile, fname)
return x;
}
/* Read the entire contents of file DESC into buffer BUF, convert end-of-line
markers to canonical form, and convert trigraphs if enabled. Also, make
sure there is a newline at the end of the file. LEN is how much room we
have to start with (this can be expanded if necessary).
Returns -1 on failure, or the actual length of the data to be scanned.
/* Almost but not quite the same as adjust_position in cpplib.c.
Used only by read_and_prescan. */
static void
find_position (start, limit, linep, colp)
U_CHAR *start;
U_CHAR *limit;
long *linep;
long *colp;
{
long line = *linep, col = 0;
while (start < limit)
{
U_CHAR ch = *start++;
if (ch == '\n' || ch == '\r')
line++, col = 1;
else
col++;
}
*linep = line, *colp = col;
}
N.B. This function has been rearranged to out-of-line the uncommon cases
as much as possible; this is important to prevent it from being a
performance bottleneck. */
/* Read the entire contents of file DESC into buffer BUF. LEN is how
much memory to allocate initially; more will be allocated if
necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
canonical form (\n). If enabled, convert and/or warn about
trigraphs. Convert backslash-newline to a one-character escape
(\r) and remove it from "embarrassing" places (i.e. the middle of a
token). If there is no newline at the end of the file, add one and
warn. Returns -1 on failure, or the actual length of the data to
be scanned.
This function does a lot of work, and can be a serious performance
bottleneck. It has been tuned heavily; make sure you understand it
before hacking. The common case - no trigraphs, Unix style line
breaks, backslash-newline set off by whitespace, newline at EOF -
has been optimized at the expense of the others. The performance
penalty for DOS style line breaks (\r\n) is about 15%.
Warnings lose particularly heavily since we have to determine the
line number, which involves scanning from the beginning of the file
or from the last warning. The penalty for the absence of a newline
at the end of reload1.c is about 60%. (reload1.c is 329k.)
If your file has more than one kind of end-of-line marker, you
will get messed-up line numbering. */
static long
read_and_prescan (pfile, fp, desc, len)
@ -774,29 +810,47 @@ read_and_prescan (pfile, fp, desc, len)
int desc;
size_t len;
{
U_CHAR *buf = (U_CHAR *) xmalloc (len);
U_CHAR *buf = xmalloc (len);
U_CHAR *ip, *op, *line_base;
U_CHAR *ibase;
unsigned int line;
unsigned int line, deferred_newlines;
int count;
size_t offset;
/* 4096 bytes of buffer proper, 2 to detect running off the end without
address arithmetic all the time, and 2 for pushback in the case there's
a potential trigraph or end-of-line digraph at the end of a block. */
#define INTERMED_BUFFER_SIZE 4096
U_CHAR intermed[INTERMED_BUFFER_SIZE + 2 + 2];
/* PIPE_BUF bytes of buffer proper, 2 to detect running off the end
without address arithmetic all the time, and 2 for pushback in
the case there's a potential trigraph or end-of-line digraph at
the end of a block. */
U_CHAR intermed[PIPE_BUF + 2 + 2];
/* Table of characters that can't be handled in the inner loop.
Keep these contiguous to optimize the performance of the code generated
for the switch that uses them. */
#define SPECCASE_EMPTY 0
#define SPECCASE_NUL 1
#define SPECCASE_CR 2
#define SPECCASE_BACKSLASH 3
#define SPECCASE_QUESTION 4
U_CHAR speccase[256];
offset = 0;
op = buf;
line_base = buf;
line = 1;
ibase = intermed + 2;
deferred_newlines = 0;
memset (speccase, SPECCASE_EMPTY, sizeof (speccase));
speccase['\0'] = SPECCASE_NUL;
speccase['\r'] = SPECCASE_CR;
speccase['\\'] = SPECCASE_BACKSLASH;
if (CPP_OPTIONS (pfile)->trigraphs || CPP_OPTIONS (pfile)->warn_trigraphs)
speccase['?'] = SPECCASE_QUESTION;
for (;;)
{
read_next:
count = read (desc, intermed + 2, INTERMED_BUFFER_SIZE);
count = read (desc, intermed + 2, PIPE_BUF);
if (count < 0)
goto error;
else if (count == 0)
@ -806,16 +860,16 @@ read_and_prescan (pfile, fp, desc, len)
ip = ibase;
ibase = intermed + 2;
ibase[count] = ibase[count+1] = '\0';
if (offset > len)
{
size_t delta_op;
size_t delta_line_base;
len *= 2;
if (offset > len)
/* len overflowed.
This could happen if the file is larger than half the
maximum address space of the machine. */
/* len overflowed.
This could happen if the file is larger than half the
maximum address space of the machine. */
goto too_big;
delta_op = op - buf;
@ -827,93 +881,155 @@ read_and_prescan (pfile, fp, desc, len)
for (;;)
{
unsigned int c;
c = *ip++;
switch (c)
unsigned int span = 0;
/* Deal with \-newline in the middle of a token. */
if (deferred_newlines)
{
/* The default case is at the top so gcc will realize
it's the common case, and leave c in a register.
Also, cache utilization is a little better this way. */
default:
*op++ = c;
break;
case '\0':
while (speccase[ip[span]] == SPECCASE_EMPTY
&& ip[span] != '\n'
&& ip[span] != '\t'
&& ip[span] != ' ')
span++;
memcpy (op, ip, span);
op += span;
ip += span;
if (*ip == '\n' || *ip == '\t'
|| *ip == ' ' || *ip == ' ')
while (deferred_newlines)
deferred_newlines--, *op++ = '\r';
span = 0;
}
/* Copy as much as we can without special treatment. */
while (speccase[ip[span]] == SPECCASE_EMPTY) span++;
memcpy (op, ip, span);
op += span;
ip += span;
switch (speccase[*ip++])
{
case SPECCASE_NUL: /* \0 */
ibase[-1] = op[-1];
goto read_next;
case '\r':
if (*ip == '\n') ip++;
case SPECCASE_CR: /* \r */
if (*ip == '\n')
ip++;
else if (*ip == '\0')
{
--ibase;
intermed[1] = '\r';
goto read_next;
}
else if (ip[-2] == '\n')
continue;
*op++ = '\n';
line++;
line_base = op;
break;
case '\n':
if (*ip == '\r') ip++;
else if (*ip == '\0')
case SPECCASE_BACKSLASH: /* \ */
backslash:
{
/* If we're at the end of the intermediate buffer,
we have to shift the backslash down to the start
and come back next pass. */
if (*ip == '\0')
{
--ibase;
intermed[1] = '\n';
intermed[1] = '\\';
goto read_next;
}
*op++ = '\n';
line++;
line_base = op;
break;
case '?':
if (CPP_OPTIONS (pfile)->trigraphs
|| CPP_OPTIONS (pfile)->warn_trigraphs)
else if (*ip == '\n')
{
unsigned int d;
/* If we're at the end of the intermediate buffer,
we have to shift the ?'s down to the start and
come back next pass. */
d = ip[0];
if (d == '\0')
{
--ibase;
intermed[1] = '?';
goto read_next;
}
if (d != '?')
{
*op++ = '?';
break;
}
d = ip[1];
if (d == '\0')
ip++;
if (*ip == '\r') ip++;
if (*ip == '\n' || *ip == '\t' || *ip == ' ')
*op++ = '\r';
else if (op[-1] == '\t' || op[-1] == ' '
|| op[-1] == '\r' || op[-1] == '\n')
*op++ = '\r';
else
deferred_newlines++;
line++;
line_base = op;
}
else if (*ip == '\r')
{
ip++;
if (*ip == '\n') ip++;
else if (*ip == '\0')
{
ibase -= 2;
intermed[0] = intermed[1] = '?';
intermed[0] = '\\';
intermed[1] = '\r';
goto read_next;
}
if (!trigraph_table[d])
{
*op++ = '?';
break;
}
if (CPP_OPTIONS (pfile)->warn_trigraphs)
cpp_warning_with_line (pfile, line, op-line_base,
"trigraph ??%c encountered", d);
if (CPP_OPTIONS (pfile)->trigraphs)
*op++ = trigraph_table[d];
else if (*ip == '\r' || *ip == '\t' || *ip == ' ')
*op++ = '\r';
else
{
*op++ = '?';
*op++ = '?';
*op++ = d;
}
ip += 2;
deferred_newlines++;
line++;
line_base = op;
}
else
*op++ = c;
*op++ = '\\';
}
break;
case SPECCASE_QUESTION: /* ? */
{
unsigned int d;
/* If we're at the end of the intermediate buffer,
we have to shift the ?'s down to the start and
come back next pass. */
d = ip[0];
if (d == '\0')
{
--ibase;
intermed[1] = '?';
goto read_next;
}
if (d != '?')
{
*op++ = '?';
break;
}
d = ip[1];
if (d == '\0')
{
ibase -= 2;
intermed[0] = intermed[1] = '?';
goto read_next;
}
if (!trigraph_table[d])
{
*op++ = '?';
break;
}
if (CPP_OPTIONS (pfile)->warn_trigraphs)
{
long col;
find_position (line_base, op, &line, &col);
line_base = op - col;
cpp_warning_with_line (pfile, line, col,
"trigraph ??%c encountered", d);
}
if (CPP_OPTIONS (pfile)->trigraphs)
{
if (trigraph_table[d] == '\\')
goto backslash;
else
*op++ = trigraph_table[d];
}
else
{
*op++ = '?';
*op++ = '?';
*op++ = d;
}
ip += 2;
}
}
}
}
@ -922,47 +1038,48 @@ read_and_prescan (pfile, fp, desc, len)
return 0;
/* Deal with pushed-back chars at true EOF.
If two chars were pushed back, they must both be ?'s.
If one was, it might be ?, \r, or \n, and \r needs to
become \n.
This may be any of: ?? ? \ \r \n \\r \\n.
\r must become \n, \\r or \\n must become \r.
We know we have space already. */
if (ibase == intermed)
{
*op++ = '?';
*op++ = '?';
if (*ibase == '?')
{
*op++ = '?';
*op++ = '?';
}
else
*op++ = '\r';
}
else if (ibase == intermed + 1)
{
if (*ibase == '?')
*op++ = '?';
else
if (*ibase == '\r')
*op++ = '\n';
else
*op++ = *ibase;
}
if (op[-1] != '\n' || op[-2] == '\\')
if (op[-1] != '\n')
{
if (CPP_PEDANTIC (pfile))
cpp_pedwarn_with_line (pfile, line, op - line_base,
"no newline at end of file");
if (offset + 2 > len)
long col;
find_position (line_base, op, &line, &col);
cpp_warning_with_line (pfile, line, col, "no newline at end of file\n");
if (offset + 1 > len)
{
len += 2;
if (offset + 2 > len)
len += 1;
if (offset + 1 > len)
goto too_big;
buf = (U_CHAR *) xrealloc (buf, len);
op = buf + offset;
}
if (op[-1] == '\\')
*op++ = '\n';
*op++ = '\n';
}
fp->buf =
(U_CHAR *) ((len - offset < 20) ? (PTR) buf : xrealloc (buf, op - buf));
fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
return op - buf;
too_big:
cpp_error (pfile, "file is too large");
cpp_error (pfile, "file is too large (>%lu bytes)\n", (unsigned long)offset);
free (buf);
return -1;

View File

@ -787,6 +787,7 @@ macarg (pfile, rest_args)
/* Try to parse as much of the argument as exists at this
input stack level. */
pfile->no_macro_expand++;
CPP_OPTIONS (pfile)->no_line_commands++;
for (;;)
{
token = cpp_get_token (pfile);
@ -824,6 +825,7 @@ macarg (pfile, rest_args)
done:
CPP_OPTIONS (pfile)->put_out_comments = save_put_out_comments;
CPP_OPTIONS (pfile)->no_line_commands--;
pfile->no_macro_expand--;
return token;

View File

@ -255,15 +255,16 @@ U_CHAR is_idstart[256] =
['_'] = 1,
};
/* Table to tell if a character is horizontal space. */
/* Table to tell if a character is horizontal space.
\r is magical, so it is not in here. */
U_CHAR is_hor_space[256] =
{
[' '] = 1, ['\t'] = 1, ['\v'] = 1, ['\f'] = 1, ['\r'] = 1
[' '] = 1, ['\t'] = 1, ['\v'] = 1, ['\f'] = 1,
};
/* table to tell if a character is horizontal or vertical space. */
U_CHAR is_space[256] =
{
[' '] = 1, ['\t'] = 1, ['\v'] = 1, ['\f'] = 1, ['\r'] = 1, ['\n'] = 1,
[' '] = 1, ['\t'] = 1, ['\v'] = 1, ['\f'] = 1, ['\n'] = 1,
};
/* Table to handle trigraph conversion, which occurs before all other
processing, everywhere in the file. (This is necessary since one
@ -362,14 +363,12 @@ initialize_char_syntax (dollar_in_ident)
is_hor_space['\t'] = 1;
is_hor_space['\v'] = 1;
is_hor_space['\f'] = 1;
is_hor_space['\r'] = 1;
is_space[' '] = 1;
is_space['\t'] = 1;
is_space['\v'] = 1;
is_space['\f'] = 1;
is_space['\n'] = 1;
is_space['\r'] = 1;
/* trigraph conversion */
trigraph_table['='] = '#'; trigraph_table[')'] = ']';

File diff suppressed because it is too large Load Diff

View File

@ -72,19 +72,9 @@ enum cpp_token {
typedef enum cpp_token (*parse_underflow_t) PARAMS((cpp_reader *));
typedef int (*parse_cleanup_t) PARAMS((cpp_buffer *, cpp_reader *));
/* A parse_marker indicates a previous position,
which we can backtrack to. */
struct parse_marker {
cpp_buffer *buf;
struct parse_marker *next;
int position;
};
extern void parse_set_mark PARAMS ((struct parse_marker *, cpp_reader *));
extern void parse_clear_mark PARAMS ((struct parse_marker *));
extern void parse_goto_mark PARAMS((struct parse_marker *, cpp_reader *));
extern void parse_move_mark PARAMS((struct parse_marker *, cpp_reader *));
extern void parse_set_mark PARAMS ((cpp_reader *));
extern void parse_clear_mark PARAMS ((cpp_reader *));
extern void parse_goto_mark PARAMS ((cpp_reader *));
extern int cpp_handle_option PARAMS ((cpp_reader *, int, char **));
extern int cpp_handle_options PARAMS ((cpp_reader *, int, char **));
@ -95,15 +85,13 @@ extern enum cpp_token cpp_get_non_space_token PARAMS ((cpp_reader *));
/* This frees resources used by PFILE. */
extern void cpp_cleanup PARAMS ((cpp_reader *PFILE));
/* If we have a huge buffer, may need to cache more recent counts */
#define CPP_LINE_BASE(BUF) ((BUF)->buf + (BUF)->line_base)
struct cpp_buffer
{
unsigned char *cur; /* current position */
unsigned char *rlimit; /* end of valid data */
unsigned char *buf; /* entire buffer */
unsigned char *alimit; /* end of allocated buffer */
unsigned char *line_base; /* start of current line */
struct cpp_buffer *prev;
@ -120,13 +108,13 @@ struct cpp_buffer
to record control macros. */
struct include_hash *ihash;
long line_base;
long lineno; /* Line number at CPP_LINE_BASE. */
long colno; /* Column number at CPP_LINE_BASE. */
long mark; /* Saved position for lengthy backtrack. */
parse_underflow_t underflow;
parse_cleanup_t cleanup;
void *data;
struct parse_marker *marks;
/* Value of if_stack at start of this file.
Used to prohibit unmatched #endif (etc) in an include file. */
struct if_stack *if_stack;
@ -289,8 +277,12 @@ struct cpp_reader
#define CPP_ADJUST_WRITTEN(PFILE,DELTA) ((PFILE)->limit += (DELTA))
#define CPP_SET_WRITTEN(PFILE,N) ((PFILE)->limit = (PFILE)->token_buffer + (N))
#define CPP_OPTIONS(PFILE) ((PFILE)->opts)
/* Advance the current line by one. */
#define CPP_BUMP_BUFFER_LINE(PBUF) ((PBUF)->lineno++,\
(PBUF)->line_base = (PBUF)->cur)
#define CPP_BUMP_LINE(PFILE) CPP_BUMP_BUFFER_LINE(CPP_BUFFER(PFILE))
#define CPP_OPTIONS(PFILE) ((PFILE)->opts)
#define CPP_BUFFER(PFILE) ((PFILE)->buffer)
#define CPP_PREV_BUFFER(BUFFER) ((BUFFER)->prev)
/* The bottom of the buffer stack. */