gcc/libgfortran/io/read.c

1035 lines
20 KiB
C

/* Copyright (C) 2002, 2003, 2005, 2007, 2008, 2009 Free Software Foundation, Inc.
Contributed by Andy Vaught
F2003 I/O support contributed by Jerry DeLisle
This file is part of the GNU Fortran 95 runtime library (libgfortran).
Libgfortran is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgfortran is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "io.h"
#include <string.h>
#include <errno.h>
#include <ctype.h>
#include <stdlib.h>
#include <assert.h>
typedef unsigned char uchar;
/* read.c -- Deal with formatted reads */
/* set_integer()-- All of the integer assignments come here to
* actually place the value into memory. */
void
set_integer (void *dest, GFC_INTEGER_LARGEST value, int length)
{
switch (length)
{
#ifdef HAVE_GFC_INTEGER_16
case 16:
{
GFC_INTEGER_16 tmp = value;
memcpy (dest, (void *) &tmp, length);
}
break;
#endif
case 8:
{
GFC_INTEGER_8 tmp = value;
memcpy (dest, (void *) &tmp, length);
}
break;
case 4:
{
GFC_INTEGER_4 tmp = value;
memcpy (dest, (void *) &tmp, length);
}
break;
case 2:
{
GFC_INTEGER_2 tmp = value;
memcpy (dest, (void *) &tmp, length);
}
break;
case 1:
{
GFC_INTEGER_1 tmp = value;
memcpy (dest, (void *) &tmp, length);
}
break;
default:
internal_error (NULL, "Bad integer kind");
}
}
/* max_value()-- Given a length (kind), return the maximum signed or
* unsigned value */
GFC_UINTEGER_LARGEST
max_value (int length, int signed_flag)
{
GFC_UINTEGER_LARGEST value;
#if defined HAVE_GFC_REAL_16 || defined HAVE_GFC_REAL_10
int n;
#endif
switch (length)
{
#if defined HAVE_GFC_REAL_16 || defined HAVE_GFC_REAL_10
case 16:
case 10:
value = 1;
for (n = 1; n < 4 * length; n++)
value = (value << 2) + 3;
if (! signed_flag)
value = 2*value+1;
break;
#endif
case 8:
value = signed_flag ? 0x7fffffffffffffff : 0xffffffffffffffff;
break;
case 4:
value = signed_flag ? 0x7fffffff : 0xffffffff;
break;
case 2:
value = signed_flag ? 0x7fff : 0xffff;
break;
case 1:
value = signed_flag ? 0x7f : 0xff;
break;
default:
internal_error (NULL, "Bad integer kind");
}
return value;
}
/* convert_real()-- Convert a character representation of a floating
* point number to the machine number. Returns nonzero if there is a
* range problem during conversion. Note: many architectures
* (e.g. IA-64, HP-PA) require that the storage pointed to by the dest
* argument is properly aligned for the type in question. TODO:
* handle not-a-numbers and infinities. */
int
convert_real (st_parameter_dt *dtp, void *dest, const char *buffer, int length)
{
errno = 0;
switch (length)
{
case 4:
*((GFC_REAL_4*) dest) =
#if defined(HAVE_STRTOF)
strtof (buffer, NULL);
#else
(GFC_REAL_4) strtod (buffer, NULL);
#endif
break;
case 8:
*((GFC_REAL_8*) dest) = strtod (buffer, NULL);
break;
#if defined(HAVE_GFC_REAL_10) && defined (HAVE_STRTOLD)
case 10:
*((GFC_REAL_10*) dest) = strtold (buffer, NULL);
break;
#endif
#if defined(HAVE_GFC_REAL_16) && defined (HAVE_STRTOLD)
case 16:
*((GFC_REAL_16*) dest) = strtold (buffer, NULL);
break;
#endif
default:
internal_error (&dtp->common, "Unsupported real kind during IO");
}
if (errno == EINVAL)
{
generate_error (&dtp->common, LIBERROR_READ_VALUE,
"Error during floating point read");
next_record (dtp, 1);
return 1;
}
return 0;
}
/* read_l()-- Read a logical value */
void
read_l (st_parameter_dt *dtp, const fnode *f, char *dest, int length)
{
char *p;
int w;
w = f->u.w;
p = read_block_form (dtp, &w);
if (p == NULL)
return;
while (*p == ' ')
{
if (--w == 0)
goto bad;
p++;
}
if (*p == '.')
{
if (--w == 0)
goto bad;
p++;
}
switch (*p)
{
case 't':
case 'T':
set_integer (dest, (GFC_INTEGER_LARGEST) 1, length);
break;
case 'f':
case 'F':
set_integer (dest, (GFC_INTEGER_LARGEST) 0, length);
break;
default:
bad:
generate_error (&dtp->common, LIBERROR_READ_VALUE,
"Bad value on logical read");
next_record (dtp, 1);
break;
}
}
static gfc_char4_t
read_utf8 (st_parameter_dt *dtp, int *nbytes)
{
static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 };
static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
int i, nb, nread;
gfc_char4_t c;
char *s;
*nbytes = 1;
s = read_block_form (dtp, nbytes);
if (s == NULL)
return 0;
/* If this is a short read, just return. */
if (*nbytes == 0)
return 0;
c = (uchar) s[0];
if (c < 0x80)
return c;
/* The number of leading 1-bits in the first byte indicates how many
bytes follow. */
for (nb = 2; nb < 7; nb++)
if ((c & ~masks[nb-1]) == patns[nb-1])
goto found;
goto invalid;
found:
c = (c & masks[nb-1]);
nread = nb - 1;
s = read_block_form (dtp, &nread);
if (s == NULL)
return 0;
/* Decode the bytes read. */
for (i = 1; i < nb; i++)
{
gfc_char4_t n = *s++;
if ((n & 0xC0) != 0x80)
goto invalid;
c = ((c << 6) + (n & 0x3F));
}
/* Make sure the shortest possible encoding was used. */
if (c <= 0x7F && nb > 1) goto invalid;
if (c <= 0x7FF && nb > 2) goto invalid;
if (c <= 0xFFFF && nb > 3) goto invalid;
if (c <= 0x1FFFFF && nb > 4) goto invalid;
if (c <= 0x3FFFFFF && nb > 5) goto invalid;
/* Make sure the character is valid. */
if (c > 0x7FFFFFFF || (c >= 0xD800 && c <= 0xDFFF))
goto invalid;
return c;
invalid:
generate_error (&dtp->common, LIBERROR_READ_VALUE, "Invalid UTF-8 encoding");
return (gfc_char4_t) '?';
}
static void
read_utf8_char1 (st_parameter_dt *dtp, char *p, int len, int width)
{
gfc_char4_t c;
char *dest;
int nbytes;
int i, j;
len = (width < len) ? len : width;
dest = (char *) p;
/* Proceed with decoding one character at a time. */
for (j = 0; j < len; j++, dest++)
{
c = read_utf8 (dtp, &nbytes);
/* Check for a short read and if so, break out. */
if (nbytes == 0)
break;
*dest = c > 255 ? '?' : (uchar) c;
}
/* If there was a short read, pad the remaining characters. */
for (i = j; i < len; i++)
*dest++ = ' ';
return;
}
static void
read_default_char1 (st_parameter_dt *dtp, char *p, int len, int width)
{
char *s;
int m, n;
s = read_block_form (dtp, &width);
if (s == NULL)
return;
if (width > len)
s += (width - len);
m = (width > len) ? len : width;
memcpy (p, s, m);
n = len - width;
if (n > 0)
memset (p + m, ' ', n);
}
static void
read_utf8_char4 (st_parameter_dt *dtp, void *p, int len, int width)
{
gfc_char4_t *dest;
int nbytes;
int i, j;
len = (width < len) ? len : width;
dest = (gfc_char4_t *) p;
/* Proceed with decoding one character at a time. */
for (j = 0; j < len; j++, dest++)
{
*dest = read_utf8 (dtp, &nbytes);
/* Check for a short read and if so, break out. */
if (nbytes == 0)
break;
}
/* If there was a short read, pad the remaining characters. */
for (i = j; i < len; i++)
*dest++ = (gfc_char4_t) ' ';
return;
}
static void
read_default_char4 (st_parameter_dt *dtp, char *p, int len, int width)
{
char *s;
gfc_char4_t *dest;
int m, n;
s = read_block_form (dtp, &width);
if (s == NULL)
return;
if (width > len)
s += (width - len);
m = ((int) width > len) ? len : (int) width;
dest = (gfc_char4_t *) p;
for (n = 0; n < m; n++, dest++, s++)
*dest = (unsigned char ) *s;
for (n = 0; n < len - (int) width; n++, dest++)
*dest = (unsigned char) ' ';
}
/* read_a()-- Read a character record into a KIND=1 character destination,
processing UTF-8 encoding if necessary. */
void
read_a (st_parameter_dt *dtp, const fnode *f, char *p, int length)
{
int wi;
int w;
wi = f->u.w;
if (wi == -1) /* '(A)' edit descriptor */
wi = length;
w = wi;
/* Read in w characters, treating comma as not a separator. */
dtp->u.p.sf_read_comma = 0;
if (dtp->u.p.current_unit->flags.encoding == ENCODING_UTF8)
read_utf8_char1 (dtp, p, length, w);
else
read_default_char1 (dtp, p, length, w);
dtp->u.p.sf_read_comma =
dtp->u.p.current_unit->decimal_status == DECIMAL_COMMA ? 0 : 1;
}
/* read_a_char4()-- Read a character record into a KIND=4 character destination,
processing UTF-8 encoding if necessary. */
void
read_a_char4 (st_parameter_dt *dtp, const fnode *f, char *p, int length)
{
int w;
w = f->u.w;
if (w == -1) /* '(A)' edit descriptor */
w = length;
/* Read in w characters, treating comma as not a separator. */
dtp->u.p.sf_read_comma = 0;
if (dtp->u.p.current_unit->flags.encoding == ENCODING_UTF8)
read_utf8_char4 (dtp, p, length, w);
else
read_default_char4 (dtp, p, length, w);
dtp->u.p.sf_read_comma =
dtp->u.p.current_unit->decimal_status == DECIMAL_COMMA ? 0 : 1;
}
/* eat_leading_spaces()-- Given a character pointer and a width,
* ignore the leading spaces. */
static char *
eat_leading_spaces (int *width, char *p)
{
for (;;)
{
if (*width == 0 || *p != ' ')
break;
(*width)--;
p++;
}
return p;
}
static char
next_char (st_parameter_dt *dtp, char **p, int *w)
{
char c, *q;
if (*w == 0)
return '\0';
q = *p;
c = *q++;
*p = q;
(*w)--;
if (c != ' ')
return c;
if (dtp->u.p.blank_status != BLANK_UNSPECIFIED)
return ' '; /* return a blank to signal a null */
/* At this point, the rest of the field has to be trailing blanks */
while (*w > 0)
{
if (*q++ != ' ')
return '?';
(*w)--;
}
*p = q;
return '\0';
}
/* read_decimal()-- Read a decimal integer value. The values here are
* signed values. */
void
read_decimal (st_parameter_dt *dtp, const fnode *f, char *dest, int length)
{
GFC_UINTEGER_LARGEST value, maxv, maxv_10;
GFC_INTEGER_LARGEST v;
int w, negative;
char c, *p;
w = f->u.w;
p = read_block_form (dtp, &w);
if (p == NULL)
return;
p = eat_leading_spaces (&w, p);
if (w == 0)
{
set_integer (dest, (GFC_INTEGER_LARGEST) 0, length);
return;
}
maxv = max_value (length, 1);
maxv_10 = maxv / 10;
negative = 0;
value = 0;
switch (*p)
{
case '-':
negative = 1;
/* Fall through */
case '+':
p++;
if (--w == 0)
goto bad;
/* Fall through */
default:
break;
}
/* At this point we have a digit-string */
value = 0;
for (;;)
{
c = next_char (dtp, &p, &w);
if (c == '\0')
break;
if (c == ' ')
{
if (dtp->u.p.blank_status == BLANK_NULL) continue;
if (dtp->u.p.blank_status == BLANK_ZERO) c = '0';
}
if (c < '0' || c > '9')
goto bad;
if (value > maxv_10 && compile_options.range_check == 1)
goto overflow;
c -= '0';
value = 10 * value;
if (value > maxv - c && compile_options.range_check == 1)
goto overflow;
value += c;
}
v = value;
if (negative)
v = -v;
set_integer (dest, v, length);
return;
bad:
generate_error (&dtp->common, LIBERROR_READ_VALUE,
"Bad value during integer read");
next_record (dtp, 1);
return;
overflow:
generate_error (&dtp->common, LIBERROR_READ_OVERFLOW,
"Value overflowed during integer read");
next_record (dtp, 1);
}
/* read_radix()-- This function reads values for non-decimal radixes.
* The difference here is that we treat the values here as unsigned
* values for the purposes of overflow. If minus sign is present and
* the top bit is set, the value will be incorrect. */
void
read_radix (st_parameter_dt *dtp, const fnode *f, char *dest, int length,
int radix)
{
GFC_UINTEGER_LARGEST value, maxv, maxv_r;
GFC_INTEGER_LARGEST v;
int w, negative;
char c, *p;
w = f->u.w;
p = read_block_form (dtp, &w);
if (p == NULL)
return;
p = eat_leading_spaces (&w, p);
if (w == 0)
{
set_integer (dest, (GFC_INTEGER_LARGEST) 0, length);
return;
}
maxv = max_value (length, 0);
maxv_r = maxv / radix;
negative = 0;
value = 0;
switch (*p)
{
case '-':
negative = 1;
/* Fall through */
case '+':
p++;
if (--w == 0)
goto bad;
/* Fall through */
default:
break;
}
/* At this point we have a digit-string */
value = 0;
for (;;)
{
c = next_char (dtp, &p, &w);
if (c == '\0')
break;
if (c == ' ')
{
if (dtp->u.p.blank_status == BLANK_NULL) continue;
if (dtp->u.p.blank_status == BLANK_ZERO) c = '0';
}
switch (radix)
{
case 2:
if (c < '0' || c > '1')
goto bad;
break;
case 8:
if (c < '0' || c > '7')
goto bad;
break;
case 16:
switch (c)
{
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
break;
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
c = c - 'a' + '9' + 1;
break;
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
c = c - 'A' + '9' + 1;
break;
default:
goto bad;
}
break;
}
if (value > maxv_r)
goto overflow;
c -= '0';
value = radix * value;
if (maxv - c < value)
goto overflow;
value += c;
}
v = value;
if (negative)
v = -v;
set_integer (dest, v, length);
return;
bad:
generate_error (&dtp->common, LIBERROR_READ_VALUE,
"Bad value during integer read");
next_record (dtp, 1);
return;
overflow:
generate_error (&dtp->common, LIBERROR_READ_OVERFLOW,
"Value overflowed during integer read");
next_record (dtp, 1);
}
/* read_f()-- Read a floating point number with F-style editing, which
is what all of the other floating point descriptors behave as. The
tricky part is that optional spaces are allowed after an E or D,
and the implicit decimal point if a decimal point is not present in
the input. */
void
read_f (st_parameter_dt *dtp, const fnode *f, char *dest, int length)
{
int w, seen_dp, exponent;
int exponent_sign;
const char *p;
char *buffer;
char *out;
int seen_int_digit; /* Seen a digit before the decimal point? */
int seen_dec_digit; /* Seen a digit after the decimal point? */
seen_dp = 0;
seen_int_digit = 0;
seen_dec_digit = 0;
exponent_sign = 1;
exponent = 0;
w = f->u.w;
/* Read in the next block. */
p = read_block_form (dtp, &w);
if (p == NULL)
return;
p = eat_leading_spaces (&w, (char*) p);
if (w == 0)
goto zero;
/* In this buffer we're going to re-format the number cleanly to be parsed
by convert_real in the end; this assures we're using strtod from the
C library for parsing and thus probably get the best accuracy possible.
This process may add a '+0.0' in front of the number as well as change the
exponent because of an implicit decimal point or the like. Thus allocating
strlen ("+0.0e-1000") == 10 characters plus one for NUL more than the
original buffer had should be enough. */
buffer = gfc_alloca (w + 11);
out = buffer;
/* Optional sign */
if (*p == '-' || *p == '+')
{
if (*p == '-')
*(out++) = '-';
++p;
--w;
}
p = eat_leading_spaces (&w, (char*) p);
if (w == 0)
goto zero;
/* Process the mantissa string. */
while (w > 0)
{
switch (*p)
{
case ',':
if (dtp->u.p.current_unit->decimal_status != DECIMAL_COMMA)
goto bad_float;
/* Fall through. */
case '.':
if (seen_dp)
goto bad_float;
if (!seen_int_digit)
*(out++) = '0';
*(out++) = '.';
seen_dp = 1;
break;
case ' ':
if (dtp->u.p.blank_status == BLANK_ZERO)
{
*(out++) = '0';
goto found_digit;
}
else if (dtp->u.p.blank_status == BLANK_NULL)
break;
else
/* TODO: Should we check instead that there are only trailing
blanks here, as is done below for exponents? */
goto done;
/* Fall through. */
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
*(out++) = *p;
found_digit:
if (!seen_dp)
seen_int_digit = 1;
else
seen_dec_digit = 1;
break;
case '-':
case '+':
goto exponent;
case 'e':
case 'E':
case 'd':
case 'D':
++p;
--w;
goto exponent;
default:
goto bad_float;
}
++p;
--w;
}
/* No exponent has been seen, so we use the current scale factor. */
exponent = - dtp->u.p.scale_factor;
goto done;
/* At this point the start of an exponent has been found. */
exponent:
p = eat_leading_spaces (&w, (char*) p);
if (*p == '-' || *p == '+')
{
if (*p == '-')
exponent_sign = -1;
++p;
--w;
}
/* At this point a digit string is required. We calculate the value
of the exponent in order to take account of the scale factor and
the d parameter before explict conversion takes place. */
if (w == 0)
goto bad_float;
if (dtp->u.p.blank_status == BLANK_UNSPECIFIED)
{
while (w > 0 && isdigit (*p))
{
exponent *= 10;
exponent += *p - '0';
++p;
--w;
}
/* Only allow trailing blanks. */
while (w > 0)
{
if (*p != ' ')
goto bad_float;
++p;
--w;
}
}
else /* BZ or BN status is enabled. */
{
while (w > 0)
{
if (*p == ' ')
{
if (dtp->u.p.blank_status == BLANK_ZERO)
exponent *= 10;
else
assert (dtp->u.p.blank_status == BLANK_NULL);
}
else if (!isdigit (*p))
goto bad_float;
else
{
exponent *= 10;
exponent += *p - '0';
}
++p;
--w;
}
}
exponent *= exponent_sign;
done:
/* Use the precision specified in the format if no decimal point has been
seen. */
if (!seen_dp)
exponent -= f->u.real.d;
/* Output a trailing '0' after decimal point if not yet found. */
if (seen_dp && !seen_dec_digit)
*(out++) = '0';
/* Print out the exponent to finish the reformatted number. Maximum 4
digits for the exponent. */
if (exponent != 0)
{
int dig;
*(out++) = 'e';
if (exponent < 0)
{
*(out++) = '-';
exponent = - exponent;
}
assert (exponent < 10000);
for (dig = 3; dig >= 0; --dig)
{
out[dig] = (char) ('0' + exponent % 10);
exponent /= 10;
}
out += 4;
}
*(out++) = '\0';
/* Do the actual conversion. */
convert_real (dtp, dest, buffer, length);
return;
/* The value read is zero. */
zero:
switch (length)
{
case 4:
*((GFC_REAL_4 *) dest) = 0.0;
break;
case 8:
*((GFC_REAL_8 *) dest) = 0.0;
break;
#ifdef HAVE_GFC_REAL_10
case 10:
*((GFC_REAL_10 *) dest) = 0.0;
break;
#endif
#ifdef HAVE_GFC_REAL_16
case 16:
*((GFC_REAL_16 *) dest) = 0.0;
break;
#endif
default:
internal_error (&dtp->common, "Unsupported real kind during IO");
}
return;
bad_float:
generate_error (&dtp->common, LIBERROR_READ_VALUE,
"Bad value during floating point read");
next_record (dtp, 1);
return;
}
/* read_x()-- Deal with the X/TR descriptor. We just read some data
* and never look at it. */
void
read_x (st_parameter_dt * dtp, int n)
{
if ((dtp->u.p.current_unit->pad_status == PAD_NO || is_internal_unit (dtp))
&& dtp->u.p.current_unit->bytes_left < n)
n = dtp->u.p.current_unit->bytes_left;
dtp->u.p.sf_read_comma = 0;
if (n > 0)
read_sf (dtp, &n, 1);
dtp->u.p.sf_read_comma = 1;
dtp->u.p.current_unit->strm_pos += (gfc_offset) n;
}