cppfiles.c: Read files in, using mmap if possible, then prescan them separately.

* cppfiles.c: Read files in, using mmap if possible, then
	prescan them separately.
	(read_file, read_with_read): New functions.
	* cpplex.c: Don't define UCHAR_MAX.
	(_cpp_read_and_prescan): Rename to _cpp_prescan.  Don't read
	the file here.

	* cppinit.c (handle_option): Automatically define __cplusplus,
	__OBJC__, __ASEEMBLER__, _LANGUAGE_FORTRAN here when we see
	the respective -lang switch.

	* cpphash.h (enum node_type, struct hashnode, _cpp_lookup
	prototype): Move to...
	* cpplib.h: ... here.  Rename struct hashnode to struct
	cpp_hashnode and give it a typedef.  Rename _cpp_lookup to
	cpp_lookup.  Add 'fe_value' slot, a union tree_node *.

From-SVN: r34228
This commit is contained in:
Zack Weinberg 2000-05-28 05:56:38 +00:00 committed by Zack Weinberg
parent e79f71f73d
commit f8f769ea4e
9 changed files with 373 additions and 320 deletions

View File

@ -1,3 +1,22 @@
2000-05-27 Zack Weinberg <zack@wolery.cumb.org>
* cppfiles.c: Read files in, using mmap if possible, then
prescan them separately.
(read_file, read_with_read): New functions.
* cpplex.c: Don't define UCHAR_MAX.
(_cpp_read_and_prescan): Rename to _cpp_prescan. Don't read
the file here.
* cppinit.c (handle_option): Automatically define __cplusplus,
__OBJC__, __ASEEMBLER__, _LANGUAGE_FORTRAN here when we see
the respective -lang switch.
* cpphash.h (enum node_type, struct hashnode, _cpp_lookup
prototype): Move to...
* cpplib.h: ... here. Rename struct hashnode to struct
cpp_hashnode and give it a typedef. Rename _cpp_lookup to
cpp_lookup. Add 'fe_value' slot, a union tree_node *.
2000-05-27 Geoffrey Keating <geoffk@cygnus.com>
* config/rs6000/rs6000.md (movsi): Constify 'name'.

View File

@ -394,7 +394,7 @@ parse_assertion (pfile)
cpp_reader *pfile;
{
struct operation op;
HASHNODE *hp;
cpp_hashnode *hp;
struct predicate *pred;
cpp_toklist query;
enum cpp_ttype type;
@ -414,7 +414,7 @@ parse_assertion (pfile)
tok = pfile->token_buffer + old_written;
len = CPP_WRITTEN (pfile) - old_written;
hp = _cpp_lookup (pfile, tok, len);
hp = cpp_lookup (pfile, tok, len);
/* Look ahead for an open paren. */
_cpp_skip_hspace (pfile);

View File

@ -28,6 +28,17 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
#include "intl.h"
#include "mkdeps.h"
#ifdef HAVE_MMAP_FILE
# include <sys/mman.h>
# ifndef MMAP_THRESHOLD
# define MMAP_THRESHOLD 3 /* Minimum page count to mmap the file. */
# endif
#else /* No MMAP_FILE */
# undef MMAP_THRESHOLD
# define MMAP_THRESHOLD 0
#endif
static IHASH *redundant_include_p PARAMS ((cpp_reader *, IHASH *,
struct file_name_list *));
static IHASH *make_IHASH PARAMS ((const char *, const char *,
@ -45,8 +56,10 @@ static int eq_IHASH PARAMS ((const void *, const void *));
static int find_include_file PARAMS ((cpp_reader *, const char *,
struct file_name_list *,
IHASH **, int *));
static int read_include_file PARAMS ((cpp_reader *, int, IHASH *));
static inline int open_include_file PARAMS ((cpp_reader *, const char *));
static int read_include_file PARAMS ((cpp_reader *, int, IHASH *));
static ssize_t read_with_read PARAMS ((cpp_buffer *, int, ssize_t));
static ssize_t read_file PARAMS ((cpp_buffer *, int, ssize_t));
#if 0
static void hack_vms_include_specification PARAMS ((char *));
@ -678,8 +691,7 @@ read_include_file (pfile, fd, ihash)
IHASH *ihash;
{
struct stat st;
size_t st_size;
long length;
ssize_t length;
cpp_buffer *fp;
fp = cpp_push_buffer (pfile, NULL, 0);
@ -690,35 +702,37 @@ read_include_file (pfile, fd, ihash)
if (fstat (fd, &st) < 0)
goto perror_fail;
/* If fd points to a plain file, we know how big it is, so we can
allocate the buffer all at once. If fd is a pipe or terminal, we
can't. Most C source files are 4k or less, so we guess that. If
fd is something weird, like a directory, we don't want to read it
at all.
/* If fd points to a plain file, we might be able to mmap it; we can
definitely allocate the buffer all at once. If fd is a pipe or
terminal, we can't do either. If fd is something weird, like a
block device or a directory, we don't want to read it at all.
Unfortunately, different systems use different st.st_mode values
for pipes: some have S_ISFIFO, some S_ISSOCK, some are buggy and
zero the entire struct stat except a couple fields. Hence we don't
even try to figure out what something is, except for plain files,
directories, and block devices.
In all cases, read_and_prescan will resize the buffer if it
turns out there's more data than we thought. */
directories, and block devices. */
if (S_ISREG (st.st_mode))
{
/* off_t might have a wider range than size_t - in other words,
ssize_t st_size;
/* off_t might have a wider range than ssize_t - in other words,
the max size of a file might be bigger than the address
space. We can't handle a file that large. (Anyone with
a single source file bigger than 4GB needs to rethink
a single source file bigger than 2GB needs to rethink
their coding style.) */
st_size = (size_t) st.st_size;
if ((unsigned HOST_WIDEST_INT) st_size
!= (unsigned HOST_WIDEST_INT) st.st_size)
if (st.st_size > SSIZE_MAX)
{
cpp_error (pfile, "file `%s' is too large", ihash->name);
cpp_error (pfile, "%s is too large", ihash->name);
goto fail;
}
st_size = st.st_size;
length = read_file (fp, fd, st_size);
if (length == -1)
goto perror_fail;
if (length < st_size)
cpp_warning (pfile, "%s is shorter than expected\n", ihash->name);
}
else if (S_ISBLK (st.st_mode))
{
@ -732,25 +746,28 @@ read_include_file (pfile, fd, ihash)
}
else
{
/* We don't know how big this is. 4k is a decent first guess. */
st_size = 4096;
/* 8 kilobytes is a sensible starting size. It ought to be
bigger than the kernel pipe buffer, and it's definitely
bigger than the majority of C source files. */
length = read_with_read (fp, fd, 8 * 1024);
if (length == -1)
goto perror_fail;
}
/* Read the file, converting end-of-line characters and trigraphs
(if enabled). */
/* These must be set before prescan. */
fp->ihash = ihash;
fp->nominal_fname = ihash->name;
length = _cpp_read_and_prescan (pfile, fp, fd, st_size);
if (length < 0)
goto fail;
if (length == 0)
ihash->control_macro = U""; /* never re-include */
else
/* Temporary - I hope. */
length = _cpp_prescan (pfile, fp, length);
close (fd);
fp->rlimit = fp->buf + length;
fp->cur = fp->buf;
if (ihash->foundhere != ABSOLUTE_PATH)
fp->system_header_p = ihash->foundhere->sysp;
fp->system_header_p = ihash->foundhere->sysp;
fp->lineno = 1;
fp->line_base = fp->buf;
@ -761,6 +778,7 @@ read_include_file (pfile, fd, ihash)
pfile->input_stack_listing_current = 0;
pfile->only_seen_white = 2;
close (fd);
return 1;
perror_fail:
@ -772,6 +790,74 @@ read_include_file (pfile, fd, ihash)
return 0;
}
static ssize_t
read_file (fp, fd, size)
cpp_buffer *fp;
int fd;
ssize_t size;
{
static int pagesize = -1;
if (size == 0)
return 0;
if (pagesize == -1)
pagesize = getpagesize ();
#if MMAP_THRESHOLD
if (size / pagesize >= MMAP_THRESHOLD)
{
const U_CHAR *result
= (const U_CHAR *) mmap (0, size, PROT_READ, MAP_PRIVATE, fd, 0);
if (result != (const U_CHAR *)-1)
{
fp->buf = result;
fp->mapped = 1;
return size;
}
}
/* If mmap fails, try read. If there's really a problem, read will
fail too. */
#endif
return read_with_read (fp, fd, size);
}
static ssize_t
read_with_read (fp, fd, size)
cpp_buffer *fp;
int fd;
ssize_t size;
{
ssize_t offset, count;
U_CHAR *buf;
buf = (U_CHAR *) xmalloc (size);
offset = 0;
while ((count = read (fd, buf + offset, size - offset)) > 0)
{
offset += count;
if (offset == size)
buf = xrealloc (buf, (size *= 2));
}
if (count < 0)
{
free (buf);
return -1;
}
if (offset == 0)
{
free (buf);
return 0;
}
if (offset < size)
buf = xrealloc (buf, offset);
fp->buf = buf;
fp->mapped = 0;
return offset;
}
/* Given a path FNAME, extract the directory component and place it
onto the actual_dirs list. Return a pointer to the allocated
file_name_list structure. These structures are used to implement

View File

@ -110,7 +110,7 @@ struct hashdummy
static unsigned int hash_HASHNODE PARAMS ((const void *));
static int eq_HASHNODE PARAMS ((const void *, const void *));
static void del_HASHNODE PARAMS ((void *));
static HASHNODE *make_HASHNODE PARAMS ((const U_CHAR *, size_t,
static cpp_hashnode *make_HASHNODE PARAMS ((const U_CHAR *, size_t,
enum node_type, unsigned int));
static void dump_funlike_macro PARAMS ((cpp_reader *,
@ -118,10 +118,10 @@ static void dump_funlike_macro PARAMS ((cpp_reader *,
static int dump_hash_helper PARAMS ((void **, void *));
static void push_macro_expansion PARAMS ((cpp_reader *, const U_CHAR *,
int, HASHNODE *));
int, cpp_hashnode *));
static int unsafe_chars PARAMS ((cpp_reader *, int, int));
static enum cpp_ttype macarg PARAMS ((cpp_reader *, int));
static void special_symbol PARAMS ((cpp_reader *, HASHNODE *));
static void special_symbol PARAMS ((cpp_reader *, cpp_hashnode *));
static int compare_defs PARAMS ((cpp_reader *,
const struct funct_defn *,
const struct funct_defn *));
@ -195,7 +195,7 @@ static void scan_arguments PARAMS ((cpp_reader *,
const struct funct_defn *,
struct argdata *, const U_CHAR *));
static void stringify PARAMS ((cpp_reader *, struct argdata *));
static void funlike_macroexpand PARAMS ((cpp_reader *, HASHNODE *,
static void funlike_macroexpand PARAMS ((cpp_reader *, cpp_hashnode *,
struct argdata *));
/* Calculate hash of a string of length LEN. */
@ -213,16 +213,16 @@ _cpp_calc_hash (str, len)
return r + len;
}
/* Calculate hash of a HASHNODE structure. */
/* Calculate hash of a cpp_hashnode structure. */
static unsigned int
hash_HASHNODE (x)
const void *x;
{
const HASHNODE *h = (const HASHNODE *)x;
const cpp_hashnode *h = (const cpp_hashnode *)x;
return h->hash;
}
/* Compare a HASHNODE structure (already in the table) with a
/* Compare a cpp_hashnode structure (already in the table) with a
hashdummy structure (not yet in the table). This relies on the
rule that the existing entry is the first argument, the potential
entry the second. It also relies on the comparison function never
@ -233,36 +233,36 @@ eq_HASHNODE (x, y)
const void *x;
const void *y;
{
const HASHNODE *a = (const HASHNODE *)x;
const cpp_hashnode *a = (const cpp_hashnode *)x;
const struct hashdummy *b = (const struct hashdummy *)y;
return (a->length == b->length
&& !ustrncmp (a->name, b->name, a->length));
}
/* Destroy a HASHNODE. */
/* Destroy a cpp_hashnode. */
static void
del_HASHNODE (x)
void *x;
{
HASHNODE *h = (HASHNODE *)x;
cpp_hashnode *h = (cpp_hashnode *)x;
_cpp_free_definition (h);
free (h);
}
/* Allocate and initialize a HASHNODE structure.
/* Allocate and initialize a cpp_hashnode structure.
Caller must fill in the value field. */
static HASHNODE *
static cpp_hashnode *
make_HASHNODE (name, len, type, hash)
const U_CHAR *name;
size_t len;
enum node_type type;
unsigned int hash;
{
HASHNODE *hp = (HASHNODE *) xmalloc (sizeof (HASHNODE) + len);
U_CHAR *p = (U_CHAR *)hp + offsetof (HASHNODE, name);
cpp_hashnode *hp = (cpp_hashnode *) xmalloc (sizeof (cpp_hashnode) + len);
U_CHAR *p = (U_CHAR *)hp + offsetof (cpp_hashnode, name);
hp->type = type;
hp->length = len;
@ -277,21 +277,21 @@ make_HASHNODE (name, len, type, hash)
/* Find the hash node for name "name", of length LEN. */
HASHNODE *
_cpp_lookup (pfile, name, len)
cpp_hashnode *
cpp_lookup (pfile, name, len)
cpp_reader *pfile;
const U_CHAR *name;
int len;
{
struct hashdummy dummy;
HASHNODE *new, **slot;
cpp_hashnode *new, **slot;
unsigned int hash;
dummy.name = name;
dummy.length = len;
hash = _cpp_calc_hash (name, len);
slot = (HASHNODE **)
slot = (cpp_hashnode **)
htab_find_slot_with_hash (pfile->hashtab, (void *)&dummy, hash, INSERT);
if (*slot)
return *slot;
@ -315,7 +315,7 @@ _cpp_init_macro_hash (pfile)
void
_cpp_free_definition (h)
HASHNODE *h;
cpp_hashnode *h;
{
if (h->type == T_XCONST)
free ((PTR) h->value.cpval);
@ -860,7 +860,7 @@ int
_cpp_create_definition (pfile, list, hp)
cpp_reader *pfile;
cpp_toklist *list;
HASHNODE *hp;
cpp_hashnode *hp;
{
struct funct_defn *fdefn = 0;
struct object_defn *odefn = 0;
@ -1102,7 +1102,7 @@ _cpp_quote_string (pfile, src)
static void
special_symbol (pfile, hp)
cpp_reader *pfile;
HASHNODE *hp;
cpp_hashnode *hp;
{
const U_CHAR *buf;
cpp_buffer *ip;
@ -1187,12 +1187,12 @@ special_symbol (pfile, hp)
{
time_t tt = time (NULL);
struct tm *tb = localtime (&tt);
HASHNODE *d, *t;
cpp_hashnode *d, *t;
if (hp->type == T_DATE)
d = hp, t = _cpp_lookup (pfile, DSC("__TIME__"));
d = hp, t = cpp_lookup (pfile, DSC("__TIME__"));
else
t = hp, d = _cpp_lookup (pfile, DSC("__DATE__"));
t = hp, d = cpp_lookup (pfile, DSC("__DATE__"));
d->value.cpval = xmalloc (sizeof "'Oct 11 1347'");
sprintf ((char *)d->value.cpval, "\"%s %2d %4d\"",
@ -1229,7 +1229,7 @@ special_symbol (pfile, hp)
void
_cpp_macroexpand (pfile, hp)
cpp_reader *pfile;
HASHNODE *hp;
cpp_hashnode *hp;
{
const struct funct_defn *defn;
struct argdata *args;
@ -1481,7 +1481,7 @@ stringify (pfile, arg)
static void
funlike_macroexpand (pfile, hp, args)
cpp_reader *pfile;
HASHNODE *hp;
cpp_hashnode *hp;
struct argdata *args;
{
const struct funct_defn *defn = hp->value.fdefn;
@ -1731,7 +1731,7 @@ push_macro_expansion (pfile, xbuf, len, hp)
cpp_reader *pfile;
const U_CHAR *xbuf;
int len;
HASHNODE *hp;
cpp_hashnode *hp;
{
cpp_buffer *mbuf;
int advance_cur = 0;
@ -1840,7 +1840,7 @@ compare_defs (pfile, d1, d2)
void
_cpp_dump_definition (pfile, hp)
cpp_reader *pfile;
HASHNODE *hp;
cpp_hashnode *hp;
{
CPP_RESERVE (pfile, hp->length + sizeof "#define ");
CPP_PUTS_Q (pfile, "#define ", sizeof "#define " - 1);
@ -1945,7 +1945,7 @@ dump_hash_helper (h, p)
void **h;
void *p;
{
HASHNODE *hp = (HASHNODE *)*h;
cpp_hashnode *hp = (cpp_hashnode *)*h;
cpp_reader *pfile = (cpp_reader *)p;
if (hp->type == T_MACRO || hp->type == T_FMACRO

View File

@ -25,52 +25,6 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
typedef unsigned char U_CHAR;
#define U (const U_CHAR *) /* Intended use: U"string" */
/* The structure of a node in the hash table. The hash table
has entries for all tokens defined by #define commands (type T_MACRO),
plus some special tokens like __LINE__ (these each have their own
type, and the appropriate code is run when that type of node is seen.
It does not contain control words like "#define", which are recognized
by a separate piece of code. */
/* different flavors of hash nodes */
enum node_type
{
T_VOID = 0, /* no definition yet */
T_SPECLINE, /* `__LINE__' */
T_DATE, /* `__DATE__' */
T_FILE, /* `__FILE__' */
T_BASE_FILE, /* `__BASE_FILE__' */
T_INCLUDE_LEVEL, /* `__INCLUDE_LEVEL__' */
T_TIME, /* `__TIME__' */
T_STDC, /* `__STDC__' */
T_CONST, /* Constant string, used by `__SIZE_TYPE__' etc */
T_XCONST, /* Ditto, but the string is malloced memory */
T_POISON, /* poisoned identifier */
T_MACRO, /* object-like macro */
T_FMACRO, /* function-like macro */
T_IDENTITY, /* macro defined to itself */
T_EMPTY, /* macro defined to nothing */
T_ASSERTION /* predicate for #assert */
};
typedef struct hashnode HASHNODE;
struct hashnode
{
unsigned int hash; /* cached hash value */
unsigned short length; /* length of name */
ENUM_BITFIELD(node_type) type : 8; /* node type */
char disabled; /* macro turned off for rescan? */
union {
const U_CHAR *cpval; /* some predefined macros */
const struct object_defn *odefn; /* #define foo bar */
const struct funct_defn *fdefn; /* #define foo(x) bar(x) */
struct predicate *pred; /* #assert */
} value;
const U_CHAR name[1]; /* name[length] */
};
/* Structure used for assertion predicates. */
struct predicate
{
@ -109,14 +63,13 @@ struct ihash
struct ihash *next_this_file;
/* Location of the file in the include search path.
Used for include_next */
Used for include_next and to detect redundant includes. */
struct file_name_list *foundhere;
unsigned int hash; /* save hash value for future reference */
const char *nshort; /* name of file as referenced in #include;
points into name[] */
const U_CHAR *control_macro; /* macro, if any, preventing reinclusion -
see redundant_include_p */
const U_CHAR *control_macro; /* macro, if any, preventing reinclusion. */
const char name[1]; /* (partial) pathname of file */
};
typedef struct ihash IHASH;
@ -224,14 +177,12 @@ extern unsigned char _cpp_IStable[256];
/* In cpphash.c */
extern unsigned int _cpp_calc_hash PARAMS ((const U_CHAR *, size_t));
extern HASHNODE *_cpp_lookup PARAMS ((cpp_reader *,
const U_CHAR *, int));
extern void _cpp_free_definition PARAMS ((HASHNODE *));
extern int _cpp_create_definition PARAMS ((cpp_reader *,
cpp_toklist *, HASHNODE *));
extern void _cpp_dump_definition PARAMS ((cpp_reader *, HASHNODE *));
extern void _cpp_free_definition PARAMS ((cpp_hashnode *));
extern int _cpp_create_definition PARAMS ((cpp_reader *, cpp_toklist *,
cpp_hashnode *));
extern void _cpp_dump_definition PARAMS ((cpp_reader *, cpp_hashnode *));
extern void _cpp_quote_string PARAMS ((cpp_reader *, const U_CHAR *));
extern void _cpp_macroexpand PARAMS ((cpp_reader *, HASHNODE *));
extern void _cpp_macroexpand PARAMS ((cpp_reader *, cpp_hashnode *));
extern void _cpp_init_macro_hash PARAMS ((cpp_reader *));
extern void _cpp_dump_macro_hash PARAMS ((cpp_reader *));
@ -254,8 +205,8 @@ extern void _cpp_expand_to_buffer PARAMS ((cpp_reader *,
const unsigned char *, int));
extern int _cpp_parse_assertion PARAMS ((cpp_reader *));
extern enum cpp_ttype _cpp_lex_token PARAMS ((cpp_reader *));
extern long _cpp_read_and_prescan PARAMS ((cpp_reader *, cpp_buffer *,
int, size_t));
extern ssize_t _cpp_prescan PARAMS ((cpp_reader *, cpp_buffer *,
ssize_t));
extern void _cpp_init_input_buffer PARAMS ((cpp_reader *));
extern void _cpp_grow_token_buffer PARAMS ((cpp_reader *, long));
extern enum cpp_ttype _cpp_get_directive_token

View File

@ -670,7 +670,7 @@ initialize_builtins (pfile)
{
const struct builtin *b;
const U_CHAR *val;
HASHNODE *hp;
cpp_hashnode *hp;
for(b = builtin_array; b < builtin_array_end; b++)
{
if (b->type == T_STDC && CPP_TRADITIONAL (pfile))
@ -686,7 +686,7 @@ initialize_builtins (pfile)
else
val = b->value;
hp = _cpp_lookup (pfile, b->name, b->len);
hp = cpp_lookup (pfile, b->name, b->len);
hp->value.cpval = val;
hp->type = b->type;
@ -1252,6 +1252,7 @@ handle_option (pfile, argc, argv)
char **argv;
{
int i = 0;
struct cpp_pending *pend = CPP_OPTION (pfile, pending);
if (argv[i][0] != '-')
{
@ -1354,7 +1355,7 @@ handle_option (pfile, argc, argv)
CPP_OPTION (pfile, print_include_names) = 1;
break;
case OPT_D:
new_pending_directive (CPP_OPTION (pfile, pending), arg, cpp_define);
new_pending_directive (pend, arg, cpp_define);
break;
case OPT_pedantic_errors:
CPP_OPTION (pfile, pedantic_errors) = 1;
@ -1396,8 +1397,7 @@ handle_option (pfile, argc, argv)
CPP_OPTION (pfile, c99) = 0;
CPP_OPTION (pfile, objc) = 0;
CPP_OPTION (pfile, trigraphs) = 1;
new_pending_directive (CPP_OPTION (pfile, pending),
"__STRICT_ANSI__", cpp_define);
new_pending_directive (pend, "__STRICT_ANSI__", cpp_define);
break;
case OPT_lang_cplusplus:
CPP_OPTION (pfile, cplusplus) = 1;
@ -1405,21 +1405,29 @@ handle_option (pfile, argc, argv)
CPP_OPTION (pfile, c89) = 0;
CPP_OPTION (pfile, c99) = 0;
CPP_OPTION (pfile, objc) = 0;
new_pending_directive (pend, "__cplusplus", cpp_define);
break;
case OPT_lang_objc:
case OPT_lang_objcplusplus:
CPP_OPTION (pfile, cplusplus) = opt_code == OPT_lang_objcplusplus;
CPP_OPTION (pfile, cplusplus) = 1;
new_pending_directive (pend, "__cplusplus", cpp_define);
/* fall through */
case OPT_lang_objc:
CPP_OPTION (pfile, cplusplus_comments) = 1;
CPP_OPTION (pfile, c89) = 0;
CPP_OPTION (pfile, c99) = 0;
CPP_OPTION (pfile, objc) = 1;
new_pending_directive (pend, "__OBJC__", cpp_define);
break;
case OPT_lang_asm:
CPP_OPTION (pfile, lang_asm) = 1;
CPP_OPTION (pfile, dollars_in_ident) = 0;
new_pending_directive (pend, "__ASSEMBLER__", cpp_define);
break;
case OPT_lang_fortran:
CPP_OPTION (pfile, lang_fortran) = 1;
CPP_OPTION (pfile, traditional) = 1;
CPP_OPTION (pfile, cplusplus_comments) = 0;
new_pending_directive (pend, "_LANGUAGE_FORTRAN", cpp_define);
break;
case OPT_lang_chill:
CPP_OPTION (pfile, objc) = 0;

View File

@ -26,6 +26,10 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
#include "cpplib.h"
#include "cpphash.h"
#ifdef HAVE_MMAP_FILE
# include <sys/mman.h>
#endif
#define PEEKBUF(BUFFER, N) \
((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
#define GETBUF(BUFFER) \
@ -224,7 +228,7 @@ cpp_pop_buffer (pfile)
}
else if (buf->macro)
{
HASHNODE *m = buf->macro;
cpp_hashnode *m = buf->macro;
m->disabled = 0;
if ((m->type == T_FMACRO && buf->mapped)
@ -1622,9 +1626,9 @@ maybe_macroexpand (pfile, written)
{
U_CHAR *macro = pfile->token_buffer + written;
size_t len = CPP_WRITTEN (pfile) - written;
HASHNODE *hp = _cpp_lookup (pfile, macro, len);
cpp_hashnode *hp = cpp_lookup (pfile, macro, len);
/* _cpp_lookup never returns null. */
/* cpp_lookup never returns null. */
if (hp->type == T_VOID)
return 0;
if (hp->disabled || hp->type == T_IDENTITY)
@ -1892,14 +1896,10 @@ find_position (start, limit, linep)
return lbase;
}
/* The following table is used by _cpp_read_and_prescan. If we have
/* The following table is used by _cpp_prescan. If we have
designated initializers, it can be constant data; otherwise, it is
set up at runtime by _cpp_init_input_buffer. */
#ifndef UCHAR_MAX
#define UCHAR_MAX 255 /* assume 8-bit bytes */
#endif
#if (GCC_VERSION >= 2007)
#define init_chartab() /* nothing */
#define CHARTAB __extension__ static const U_CHAR chartab[UCHAR_MAX + 1] = {
@ -1937,9 +1937,10 @@ END
#define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
#define NONTRI(c) ((c) <= SPECCASE_QUESTION)
/* Read the entire contents of file DESC into buffer BUF. LEN is how
much memory to allocate initially; more will be allocated if
necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
/* Prescan pass over a file already loaded into BUF. This is
translation phases 1 and 2 (C99 5.1.1.2).
Convert end-of-line markers (\n, \r, \r\n, \n\r) to
canonical form (\n). If enabled, convert and/or warn about
trigraphs. Convert backslash-newline to a one-character escape
(\r) and remove it from "embarrassing" places (i.e. the middle of a
@ -1960,204 +1961,153 @@ END
at the end of reload1.c is about 60%. (reload1.c is 329k.)
If your file has more than one kind of end-of-line marker, you
will get messed-up line numbering.
So that the cases of the switch statement do not have to concern
themselves with the complications of reading beyond the end of the
buffer, the buffer is guaranteed to have at least 3 characters in
it (or however many are left in the file, if less) on entry to the
switch. This is enough to handle trigraphs and the "\\\n\r" and
"\\\r\n" cases.
The end of the buffer is marked by a '\\', which, being a special
character, guarantees we will exit the fast-scan loops and perform
a refill. */
long
_cpp_read_and_prescan (pfile, fp, desc, len)
will get messed-up line numbering. */
ssize_t
_cpp_prescan (pfile, fp, len)
cpp_reader *pfile;
cpp_buffer *fp;
int desc;
size_t len;
ssize_t len;
{
U_CHAR *buf = (U_CHAR *) xmalloc (len);
U_CHAR *ip, *op, *line_base;
U_CHAR *ibase;
U_CHAR *buf, *op;
const U_CHAR *ibase, *ip, *ilimit;
U_CHAR *line_base;
unsigned long line;
unsigned int deferred_newlines;
size_t offset;
int count = 0;
offset = 0;
deferred_newlines = 0;
op = buf;
line_base = buf;
/* Allocate an extra byte in case we must add a trailing \n. */
buf = (U_CHAR *) xmalloc (len + 1);
line_base = op = buf;
ip = ibase = fp->buf;
ilimit = ibase + len;
line = 1;
ibase = pfile->input_buffer + 3;
ip = ibase;
ip[-1] = '\0'; /* Guarantee no match with \n for SPECCASE_CR */
deferred_newlines = 0;
for (;;)
{
U_CHAR *near_buff_end;
const U_CHAR *iq;
count = read (desc, ibase, pfile->input_buffer_len);
if (count < 0)
goto error;
ibase[count] = '\\'; /* Marks end of buffer */
if (count)
/* Deal with \-newline, potentially in the middle of a token. */
if (deferred_newlines)
{
near_buff_end = pfile->input_buffer + count;
offset += count;
if (offset > len)
if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
{
size_t delta_op;
size_t delta_line_base;
len = offset * 2;
if (offset > len)
/* len overflowed.
This could happen if the file is larger than half the
maximum address space of the machine. */
goto too_big;
delta_op = op - buf;
delta_line_base = line_base - buf;
buf = (U_CHAR *) xrealloc (buf, len);
op = buf + delta_op;
line_base = buf + delta_line_base;
/* Previous was not white space. Skip to white
space, if we can, before outputting the \r's */
iq = ip;
while (iq < ilimit
&& *iq != ' '
&& *iq != '\t'
&& *iq != '\n'
&& NORMAL(*iq))
iq++;
memcpy (op, ip, iq - ip);
op += iq - ip;
ip += iq - ip;
if (! NORMAL(*ip))
goto do_speccase;
}
}
else
{
if (ip == ibase)
break;
/* Allow normal processing of the (at most 2) remaining
characters. The end-of-buffer marker is still present
and prevents false matches within the switch. */
near_buff_end = ibase - 1;
while (deferred_newlines)
deferred_newlines--, *op++ = '\r';
}
for (;;)
/* Copy as much as we can without special treatment. */
iq = ip;
while (iq < ilimit && NORMAL (*iq)) iq++;
memcpy (op, ip, iq - ip);
op += iq - ip;
ip += iq - ip;
do_speccase:
if (ip >= ilimit)
break;
switch (chartab[*ip++])
{
unsigned int span;
/* Deal with \-newline, potentially in the middle of a token. */
if (deferred_newlines)
case SPECCASE_CR: /* \r */
if (ip[-2] != '\n')
{
if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
{
/* Previous was not white space. Skip to white
space, if we can, before outputting the \r's */
span = 0;
while (ip[span] != ' '
&& ip[span] != '\t'
&& ip[span] != '\n'
&& NORMAL(ip[span]))
span++;
memcpy (op, ip, span);
op += span;
ip += span;
if (! NORMAL(ip[0]))
goto do_speccase;
}
while (deferred_newlines)
deferred_newlines--, *op++ = '\r';
if (ip < ilimit && *ip == '\n')
ip++;
*op++ = '\n';
}
break;
/* Copy as much as we can without special treatment. */
span = 0;
while (NORMAL (ip[span])) span++;
memcpy (op, ip, span);
op += span;
ip += span;
do_speccase:
if (ip > near_buff_end) /* Do we have enough chars? */
break;
switch (chartab[*ip++])
case SPECCASE_BACKSLASH: /* \ */
backslash:
if (ip < ilimit)
{
case SPECCASE_CR: /* \r */
if (ip[-2] != '\n')
{
if (*ip == '\n')
ip++;
*op++ = '\n';
}
break;
case SPECCASE_BACKSLASH: /* \ */
if (*ip == '\n')
{
deferred_newlines++;
ip++;
if (*ip == '\r') ip++;
break;
}
else if (*ip == '\r')
{
deferred_newlines++;
ip++;
if (*ip == '\n') ip++;
break;
}
else
*op++ = '\\';
}
*op++ = '\\';
break;
case SPECCASE_QUESTION: /* ? */
{
unsigned int d, t;
*op++ = '?'; /* Normal non-trigraph case */
if (ip > ilimit - 2 || ip[0] != '?')
break;
d = ip[1];
t = chartab[d];
if (NONTRI (t))
break;
case SPECCASE_QUESTION: /* ? */
if (CPP_OPTION (pfile, warn_trigraphs))
{
unsigned int d, t;
*op++ = '?'; /* Normal non-trigraph case */
if (ip[0] != '?')
break;
d = ip[1];
t = chartab[d];
if (NONTRI (t))
break;
if (CPP_OPTION (pfile, warn_trigraphs))
{
unsigned long col;
line_base = find_position (line_base, op, &line);
col = op - line_base + 1;
if (CPP_OPTION (pfile, trigraphs))
cpp_warning_with_line (pfile, line, col,
"trigraph ??%c converted to %c", d, t);
else
cpp_warning_with_line (pfile, line, col,
"trigraph ??%c ignored", d);
}
ip += 2;
unsigned long col;
line_base = find_position (line_base, op, &line);
col = op - line_base + 1;
if (CPP_OPTION (pfile, trigraphs))
{
op[-1] = t; /* Overwrite '?' */
if (t == '\\')
{
op--;
*--ip = '\\';
goto do_speccase; /* May need buffer refill */
}
}
cpp_warning_with_line (pfile, line, col,
"trigraph ??%c converted to %c", d, t);
else
cpp_warning_with_line (pfile, line, col,
"trigraph ??%c ignored", d);
}
ip += 2;
if (CPP_OPTION (pfile, trigraphs))
{
op[-1] = t; /* Overwrite '?' */
if (t == '\\')
{
*op++ = '?';
*op++ = d;
op--;
goto backslash;
}
}
break;
}
else
{
*op++ = '?';
*op++ = d;
}
}
break;
}
/* Copy previous char plus unprocessed (at most 2) chars
to beginning of buffer, refill it with another
read(), and continue processing */
memmove (ip - count - 1, ip - 1, 4 - (ip - near_buff_end));
ip -= count;
}
if (offset == 0)
return 0;
#ifdef HAVE_MMAP_FILE
if (fp->mapped)
munmap ((caddr_t) fp->buf, len);
else
#endif
free ((PTR) fp->buf);
if (op[-1] != '\n')
{
@ -2165,30 +2115,11 @@ _cpp_read_and_prescan (pfile, fp, desc, len)
line_base = find_position (line_base, op, &line);
col = op - line_base + 1;
cpp_warning_with_line (pfile, line, col, "no newline at end of file");
if (offset + 1 > len)
{
len += 1;
if (offset + 1 > len)
goto too_big;
buf = (U_CHAR *) xrealloc (buf, len);
op = buf + offset;
}
*op++ = '\n';
}
fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
fp->buf = buf;
return op - buf;
too_big:
cpp_notice (pfile, "%s is too large (>%lu bytes)", fp->ihash->name,
(unsigned long)offset);
free (buf);
return -1;
error:
cpp_error_from_errno (pfile, fp->ihash->name);
free (buf);
return -1;
}
/* Allocate pfile->input_buffer, and initialize chartab[]

View File

@ -339,7 +339,7 @@ static int
do_define (pfile)
cpp_reader *pfile;
{
HASHNODE *node;
cpp_hashnode *node;
int len;
const U_CHAR *sym;
cpp_toklist *list = &pfile->directbuf;
@ -371,7 +371,7 @@ do_define (pfile)
goto out;
}
node = _cpp_lookup (pfile, sym, len);
node = cpp_lookup (pfile, sym, len);
/* Check for poisoned identifiers now. All other checks
are done in cpphash.c. */
if (node->type == T_POISON)
@ -681,7 +681,7 @@ do_undef (pfile)
cpp_reader *pfile;
{
int len;
HASHNODE *hp;
cpp_hashnode *hp;
U_CHAR *name;
long here = CPP_WRITTEN (pfile);
enum cpp_ttype token;
@ -708,7 +708,7 @@ do_undef (pfile)
name = pfile->token_buffer + here;
CPP_SET_WRITTEN (pfile, here);
hp = _cpp_lookup (pfile, name, len);
hp = cpp_lookup (pfile, name, len);
if (hp->type == T_VOID)
; /* Not defined in the first place - do nothing. */
else if (hp->type == T_POISON)
@ -941,7 +941,7 @@ do_pragma_poison (pfile)
/* Poison these symbols so that all subsequent usage produces an
error message. */
U_CHAR *p;
HASHNODE *hp;
cpp_hashnode *hp;
long written;
size_t len;
enum cpp_ttype token;
@ -968,7 +968,7 @@ do_pragma_poison (pfile)
p = pfile->token_buffer + written;
len = CPP_PWRITTEN (pfile) - p;
hp = _cpp_lookup (pfile, p, len);
hp = cpp_lookup (pfile, p, len);
if (hp->type == T_POISON)
; /* It is allowed to poison the same identifier twice. */
else
@ -1528,7 +1528,7 @@ do_assert (pfile)
long old_written;
U_CHAR *sym;
size_t len;
HASHNODE *hp;
cpp_hashnode *hp;
struct predicate *pred = 0;
enum cpp_ttype type;
@ -1544,7 +1544,7 @@ do_assert (pfile)
sym = pfile->token_buffer + old_written;
len = CPP_WRITTEN (pfile) - old_written;
hp = _cpp_lookup (pfile, sym, len);
hp = cpp_lookup (pfile, sym, len);
if (_cpp_get_directive_token (pfile) != CPP_OPEN_PAREN)
ERROR ("missing token-sequence in #assert");
@ -1606,7 +1606,7 @@ do_unassert (pfile)
long old_written;
U_CHAR *sym;
size_t len;
HASHNODE *hp;
cpp_hashnode *hp;
cpp_toklist ans;
enum cpp_ttype type;
int specific = 0;
@ -1620,7 +1620,7 @@ do_unassert (pfile)
sym = pfile->token_buffer + old_written;
len = CPP_WRITTEN (pfile) - old_written;
hp = _cpp_lookup (pfile, sym, len);
hp = cpp_lookup (pfile, sym, len);
type = _cpp_get_directive_token (pfile);
if (type == CPP_OPEN_PAREN)
@ -1784,7 +1784,7 @@ cpp_defined (pfile, id, len)
const U_CHAR *id;
int len;
{
HASHNODE *hp = _cpp_lookup (pfile, id, len);
cpp_hashnode *hp = cpp_lookup (pfile, id, len);
if (hp->type == T_POISON)
{
cpp_error (pfile, "attempt to use poisoned `%s'", hp->name);

View File

@ -35,6 +35,7 @@ typedef struct cpp_printer cpp_printer;
typedef struct cpp_token cpp_token;
typedef struct cpp_toklist cpp_toklist;
typedef struct cpp_name cpp_name;
typedef struct cpp_hashnode cpp_hashnode;
/* The first two groups, apart from '=', can appear in preprocessor
expressions. This allows a lookup table to be implemented in
@ -238,7 +239,7 @@ struct cpp_buffer
/* If the buffer is the expansion of a macro, this points to the
macro's hash table entry. */
struct hashnode *macro;
struct cpp_hashnode *macro;
/* Value of if_stack at start of this file.
Used to prohibit unmatched #endif (etc) in an include file. */
@ -590,6 +591,61 @@ struct cpp_printer
/* Name under which this program was invoked. */
extern const char *progname;
/* The structure of a node in the hash table. The hash table
has entries for all tokens defined by #define commands (type T_MACRO),
plus some special tokens like __LINE__ (these each have their own
type, and the appropriate code is run when that type of node is seen.
It does not contain control words like "#define", which are recognized
by a separate piece of code. */
/* different flavors of hash nodes */
enum node_type
{
T_VOID = 0, /* no definition yet */
T_SPECLINE, /* `__LINE__' */
T_DATE, /* `__DATE__' */
T_FILE, /* `__FILE__' */
T_BASE_FILE, /* `__BASE_FILE__' */
T_INCLUDE_LEVEL, /* `__INCLUDE_LEVEL__' */
T_TIME, /* `__TIME__' */
T_STDC, /* `__STDC__' */
T_CONST, /* Constant string, used by `__SIZE_TYPE__' etc */
T_XCONST, /* Ditto, but the string is malloced memory */
T_POISON, /* poisoned identifier */
T_MACRO, /* object-like macro */
T_FMACRO, /* function-like macro */
T_IDENTITY, /* macro defined to itself */
T_EMPTY, /* macro defined to nothing */
T_ASSERTION /* predicate for #assert */
};
/* There is a slot in the hashnode for use by front ends when integrated
with cpplib. It holds a tree (see tree.h) but we mustn't drag that
header into every user of cpplib.h. cpplib does not do anything with
this slot except clear it when a new node is created. */
union tree_node;
struct cpp_hashnode
{
unsigned int hash; /* cached hash value */
unsigned short length; /* length of name */
ENUM_BITFIELD(node_type) type : 8; /* node type */
char disabled; /* macro turned off for rescan? */
union {
const unsigned char *cpval; /* some predefined macros */
const struct object_defn *odefn; /* #define foo bar */
const struct funct_defn *fdefn; /* #define foo(x) bar(x) */
struct predicate *pred; /* #assert */
} value;
union tree_node *fe_value; /* front end value */
const unsigned char name[1]; /* name[length] */
};
extern void _cpp_lex_file PARAMS((cpp_reader *));
extern int cpp_handle_options PARAMS ((cpp_reader *, int, char **));
extern enum cpp_ttype cpp_get_token PARAMS ((cpp_reader *));
@ -653,6 +709,8 @@ extern int cpp_idcmp PARAMS ((const unsigned char *,
/* In cpphash.c */
extern int cpp_defined PARAMS ((cpp_reader *,
const unsigned char *, int));
extern cpp_hashnode *cpp_lookup PARAMS ((cpp_reader *,
const unsigned char *, int));
/* In cppfiles.c */
extern int cpp_included PARAMS ((cpp_reader *, const char *));