Fix Linux getcwd for long paths

The getcwd syscall (so far?) can only handle path up to one page
in size.  There is no limit about directory hierarchy depth, though,
and the POSIX getcwd is supposed to handle this.  In that case fall
back to the generic getcwd.

Additionally, optimize the generic getcwd to use openat when possible
to change the asymptotic performance from O(N^2) to O(n).
This commit is contained in:
Ulrich Drepper 2011-05-08 08:37:19 -04:00
parent 28377d1bf5
commit 7fb90fb89b
11 changed files with 320 additions and 159 deletions

View File

@ -1,5 +1,18 @@
2011-05-08 Ulrich Drepper <drepper@gmail.com>
[BZ #12713]
* sysdeps/unix/sysv/linux/getcwd.c: If getcwd syscall report
ENAMETOOLONG use generic getcwd.
* sysdeps/posix/getcwd.c: Add support to use openat.
* sysdeps/unix/sysv/linux/Makefile [subdir=elf] (sysdep-rtld-routines):
Add dl-getcwd.
* sysdeps/unix/sysv/linux/dl-getcwd.c: New file.
* include/sys/stat.h: Define __fstatat macro.
* include/dirent.h: Add libc_hidden_proto for rewinddir.
* dirent/rewinddir.c: Add libc_hidden_def.
* sysdeps/mach/hurd/rewinddir.c: Likewise.
* sysdeps/unix/rewinddir.c: Likewise.
* include/dirent.h (__alloc_dir): Add flags parameter.
* sysdeps/unix/fdopendir.c (__fdopendir): Pass flags to __alloc_dir.
* sysdeps/unix/opendir.c (__opendir): Pass 0 in new parameter to

6
NEWS
View File

@ -1,4 +1,4 @@
GNU C Library NEWS -- history of user-visible changes. 2011-5-7
GNU C Library NEWS -- history of user-visible changes. 2011-5-8
Copyright (C) 1992-2009, 2010, 2011 Free Software Foundation, Inc.
See the end for copying conditions.
@ -23,8 +23,8 @@ Version 2.14
* The following bugs are resolved with this release:
11724, 12393, 12420, 12445, 12454, 12460, 12469, 12489, 12509, 12510,
12518, 12583, 12587, 12597, 12631, 12650, 12653, 12655, 12685, 12714,
12717, 12723, 12734
12518, 12583, 12587, 12597, 12631, 12650, 12653, 12655, 12685, 12713,
12714, 12717, 12723, 12734
Version 2.13

View File

@ -1,4 +1,4 @@
/* Copyright (C) 1991, 1995, 1996, 1997 Free Software Foundation, Inc.
/* Copyright (C) 1991, 1995, 1996, 1997, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@ -29,6 +29,7 @@ rewinddir (dirp)
__set_errno (ENOSYS);
/* No way to indicate failure. */
}
libc_hidden_def (rewinddir)
stub_warning (rewinddir)

View File

@ -32,4 +32,6 @@ extern DIR *__alloc_dir (int fd, bool close_fd, int flags,
const struct stat64 *statp)
internal_function;
libc_hidden_proto (rewinddir)
#endif

View File

@ -48,4 +48,6 @@ libc_hidden_proto (__fxstatat64)
#define fstat64(fd, buf) __fxstat64 (_STAT_VER, fd, buf)
#define fstat(fd, buf) __fxstat (_STAT_VER, fd, buf)
#define __fstat(fd, buf) __fxstat (_STAT_VER, fd, buf)
#define __fstatat(dfd, fname, buf, flag) \
__fxstatat (_STAT_VER, dfd, fname, buf, flag)
#endif

View File

@ -1,4 +1,4 @@
/* Copyright (C) 1994, 1997 Free Software Foundation, Inc.
/* Copyright (C) 1994, 1997, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@ -22,10 +22,10 @@
#include <unistd.h>
/* Rewind DIRP to the beginning of the directory. */
/* XXX should be __rewinddir ? */
void
rewinddir (dirp)
DIR *dirp;
{
seekdir (dirp, (off_t) 0L);
}
libc_hidden_def (rewinddir)

View File

@ -1,4 +1,4 @@
/* Copyright (C) 1991,92,93,94,95,96,97,98,99 Free Software Foundation, Inc.
/* Copyright (C) 1991,92,93,94,95,96,97,98,99,11 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@ -35,6 +35,7 @@
#endif
#include <errno.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
@ -171,6 +172,13 @@ extern char *alloca ();
# include <sys/param.h>
#endif
#if defined _LIBC && !defined NOT_IN_libc
# include <not-cancel.h>
#else
# define openat_not_cancel_3(dfd, name, mode) openat (dfd, name, mode)
# define close_not_cancel_no_status(fd) close (fd)
#endif
#ifndef PATH_MAX
# ifdef MAXPATHLEN
# define PATH_MAX MAXPATHLEN
@ -200,6 +208,12 @@ extern char *alloca ();
# define GETCWD_RETURN_TYPE char *
#endif
#ifdef __ASSUME_ATFCTS
# define have_openat 1
#else
static int have_openat = 0;
#endif
/* Get the pathname of the current working directory, and put it in SIZE
bytes of BUF. Returns NULL if the directory couldn't be determined or
SIZE was too small. If successful, returns BUF. In GNU, if BUF is
@ -211,6 +225,7 @@ __getcwd (buf, size)
char *buf;
size_t size;
{
#ifndef __ASSUME_ATFCTS
static const char dots[]
= "../../../../../../../../../../../../../../../../../../../../../../../\
../../../../../../../../../../../../../../../../../../../../../../../../../../\
@ -218,14 +233,15 @@ __getcwd (buf, size)
const char *dotp = &dots[sizeof (dots)];
const char *dotlist = dots;
size_t dotsize = sizeof (dots) - 1;
dev_t rootdev, thisdev;
ino_t rootino, thisino;
char *path;
register char *pathp;
struct stat st;
#endif
int prev_errno = errno;
size_t allocated = size;
DIR *dirstream = NULL;
bool fd_needs_closing = false;
int fd = AT_FDCWD;
char *path;
#ifndef NO_ALLOCATION
size_t allocated = size;
if (size == 0)
{
if (buf != NULL)
@ -237,189 +253,285 @@ __getcwd (buf, size)
allocated = PATH_MAX + 1;
}
if (buf != NULL)
path = buf;
else
if (buf == NULL)
{
path = malloc (allocated);
if (path == NULL)
return NULL;
}
else
#else
# define allocated size
#endif
path = buf;
pathp = path + allocated;
char *pathp = path + allocated;
*--pathp = '\0';
struct stat st;
if (__lstat (".", &st) < 0)
goto lose2;
thisdev = st.st_dev;
thisino = st.st_ino;
goto lose;
dev_t thisdev = st.st_dev;
ino_t thisino = st.st_ino;
if (__lstat ("/", &st) < 0)
goto lose2;
rootdev = st.st_dev;
rootino = st.st_ino;
goto lose;
dev_t rootdev = st.st_dev;
ino_t rootino = st.st_ino;
while (!(thisdev == rootdev && thisino == rootino))
{
register DIR *dirstream;
struct dirent *d;
dev_t dotdev;
ino_t dotino;
char mount_point;
/* Look at the parent directory. */
if (dotp == dotlist)
if (have_openat >= 0)
{
/* My, what a deep directory tree you have, Grandma. */
char *new;
if (dotlist == dots)
{
new = malloc (dotsize * 2 + 1);
if (new == NULL)
goto lose;
#ifdef HAVE_MEMPCPY
dotp = mempcpy (new, dots, dotsize);
#else
memcpy (new, dots, dotsize);
dotp = &new[dotsize];
int mode = O_RDONLY;
#ifdef O_CLOEXEC
mode |= O_CLOEXEC;
#endif
}
else
{
new = realloc ((__ptr_t) dotlist, dotsize * 2 + 1);
if (new == NULL)
goto lose;
dotp = &new[dotsize];
}
#ifdef HAVE_MEMPCPY
*((char *) mempcpy ((char *) dotp, new, dotsize)) = '\0';
dotsize *= 2;
#else
memcpy ((char *) dotp, new, dotsize);
dotsize *= 2;
new[dotsize] = '\0';
#endif
dotlist = new;
fd = openat_not_cancel_3 (fd, "..", mode);
}
else
fd = -1;
if (fd >= 0)
{
fd_needs_closing = true;
if (__fstat (fd, &st) < 0)
goto lose;
}
#ifndef __ASSUME_ATFCTS
else if (errno == ENOSYS)
{
have_openat = -1;
dotp -= 3;
/* Look at the parent directory. */
if (dotp == dotlist)
{
# ifdef NO_ALLOCATION
__set_errno (ENOMEM);
goto lose;
# else
/* My, what a deep directory tree you have, Grandma. */
char *new;
if (dotlist == dots)
{
new = malloc (dotsize * 2 + 1);
if (new == NULL)
goto lose;
# ifdef HAVE_MEMPCPY
dotp = mempcpy (new, dots, dotsize);
# else
memcpy (new, dots, dotsize);
dotp = &new[dotsize];
# endif
}
else
{
new = realloc ((__ptr_t) dotlist, dotsize * 2 + 1);
if (new == NULL)
goto lose;
dotp = &new[dotsize];
}
# ifdef HAVE_MEMPCPY
*((char *) mempcpy ((char *) dotp, new, dotsize)) = '\0';
dotsize *= 2;
# else
memcpy ((char *) dotp, new, dotsize);
dotsize *= 2;
new[dotsize] = '\0';
# endif
dotlist = new;
# endif
}
/* Figure out if this directory is a mount point. */
if (__lstat (dotp, &st) < 0)
dotp -= 3;
/* Figure out if this directory is a mount point. */
if (__lstat (dotp, &st) < 0)
goto lose;
}
#endif
else
goto lose;
dotdev = st.st_dev;
dotino = st.st_ino;
mount_point = dotdev != thisdev;
if (dirstream && __closedir (dirstream) != 0)
{
dirstream = NULL;
goto lose;
}
dev_t dotdev = st.st_dev;
ino_t dotino = st.st_ino;
bool mount_point = dotdev != thisdev;
/* Search for the last directory. */
dirstream = __opendir (dotp);
if (have_openat >= 0)
dirstream = __fdopendir (fd);
#ifndef __ASSUME_ATFCTS
else
dirstream = __opendir (dotp);
#endif
if (dirstream == NULL)
goto lose;
/* Clear errno to distinguish EOF from error if readdir returns
NULL. */
__set_errno (0);
while ((d = __readdir (dirstream)) != NULL)
fd_needs_closing = false;
struct dirent *d;
bool use_d_ino = true;
while (1)
{
if (d->d_name[0] == '.' &&
(d->d_name[1] == '\0' ||
(d->d_name[1] == '.' && d->d_name[2] == '\0')))
continue;
if (mount_point || (ino_t) d->d_ino == thisino)
/* Clear errno to distinguish EOF from error if readdir returns
NULL. */
__set_errno (0);
d = __readdir (dirstream);
if (d == NULL)
{
if (errno == 0)
{
/* When we've iterated through all directory entries
without finding one with a matching d_ino, rewind the
stream and consider each name again, but this time, using
lstat. This is necessary in a chroot on at least one
system. */
if (use_d_ino)
{
use_d_ino = false;
rewinddir (dirstream);
continue;
}
/* EOF on dirstream, which means that the current directory
has been removed. */
__set_errno (ENOENT);
}
goto lose;
}
if (d->d_type != DT_DIR && d->d_type != DT_UNKNOWN)
continue;
if (d->d_name[0] == '.'
&& (d->d_name[1] == '\0'
|| (d->d_name[1] == '.' && d->d_name[2] == '\0')))
continue;
if (use_d_ino && !mount_point && (ino_t) d->d_ino != thisino)
continue;
if (have_openat >= 0)
{
char name[dotlist + dotsize - dotp + 1 + _D_ALLOC_NAMLEN (d)];
#ifdef HAVE_MEMPCPY
char *tmp = mempcpy (name, dotp, dotlist + dotsize - dotp);
*tmp++ = '/';
strcpy (tmp, d->d_name);
#else
memcpy (name, dotp, dotlist + dotsize - dotp);
name[dotlist + dotsize - dotp] = '/';
strcpy (&name[dotlist + dotsize - dotp + 1], d->d_name);
#endif
/* We don't fail here if we cannot stat() a directory entry.
This can happen when (network) filesystems fail. If this
entry is in fact the one we are looking for we will find
out soon as we reach the end of the directory without
having found anything. */
if (__lstat (name, &st) >= 0
&& st.st_dev == thisdev && st.st_ino == thisino)
break;
if (__fstatat (fd, d->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
continue;
}
}
if (d == NULL)
{
int save = errno;
(void) __closedir (dirstream);
if (save == 0)
/* EOF on dirstream, which means that the current directory
has been removed. */
save = ENOENT;
__set_errno (save);
goto lose;
}
else
{
size_t namlen = _D_EXACT_NAMLEN (d);
if ((size_t) (pathp - path) <= namlen)
#ifndef __ASSUME_ATFCTS
else
{
if (size != 0)
{
(void) __closedir (dirstream);
__set_errno (ERANGE);
goto lose;
}
else
{
char *tmp;
size_t oldsize = allocated;
allocated = 2 * MAX (allocated, namlen);
tmp = realloc (path, allocated);
if (tmp == NULL)
{
(void) __closedir (dirstream);
__set_errno (ENOMEM);/* closedir might have changed it.*/
goto lose;
}
/* Move current contents up to the end of the buffer.
This is guaranteed to be non-overlapping. */
pathp = memcpy (tmp + allocated - (path + oldsize - pathp),
tmp + (pathp - path),
path + oldsize - pathp);
path = tmp;
}
char name[dotlist + dotsize - dotp + 1 + _D_ALLOC_NAMLEN (d)];
# ifdef HAVE_MEMPCPY
char *tmp = mempcpy (name, dotp, dotlist + dotsize - dotp);
*tmp++ = '/';
strcpy (tmp, d->d_name);
# else
memcpy (name, dotp, dotlist + dotsize - dotp);
name[dotlist + dotsize - dotp] = '/';
strcpy (&name[dotlist + dotsize - dotp + 1], d->d_name);
# endif
/* We don't fail here if we cannot stat() a directory entry.
This can happen when (network) filesystems fail. If this
entry is in fact the one we are looking for we will find
out soon as we reach the end of the directory without
having found anything. */
if (__lstat (name, &st) < 0)
continue;
}
pathp -= namlen;
(void) memcpy (pathp, d->d_name, namlen);
*--pathp = '/';
(void) __closedir (dirstream);
#endif
if (S_ISDIR (st.st_mode)
&& st.st_dev == thisdev && st.st_ino == thisino)
break;
}
size_t namlen = _D_EXACT_NAMLEN (d);
if ((size_t) (pathp - path) <= namlen)
{
#ifndef NO_ALLOCATION
if (size == 0)
{
size_t oldsize = allocated;
allocated = 2 * MAX (allocated, namlen);
char *tmp = realloc (path, allocated);
if (tmp == NULL)
goto lose;
/* Move current contents up to the end of the buffer.
This is guaranteed to be non-overlapping. */
pathp = memcpy (tmp + allocated - (path + oldsize - pathp),
tmp + (pathp - path),
path + oldsize - pathp);
path = tmp;
}
else
#endif
{
__set_errno (ERANGE);
goto lose;
}
}
pathp -= namlen;
(void) memcpy (pathp, d->d_name, namlen);
*--pathp = '/';
thisdev = dotdev;
thisino = dotino;
}
if (dirstream != NULL && __closedir (dirstream) != 0)
{
dirstream = NULL;
goto lose;
}
if (pathp == &path[allocated - 1])
*--pathp = '/';
#ifndef __ASSUME_ATFCTS
if (dotlist != dots)
free ((__ptr_t) dotlist);
#endif
memmove (path, pathp, path + allocated - pathp);
size_t used = path + allocated - pathp;
memmove (path, pathp, used);
if (size == 0)
/* Ensure that the buffer is only as large as necessary. */
buf = realloc (path, used);
if (buf == NULL)
/* Either buf was NULL all along, or `realloc' failed but
we still have the original string. */
buf = path;
/* Restore errno on successful return. */
__set_errno (prev_errno);
return path;
return buf;
lose:
lose:;
int save_errno = errno;
#ifndef __ASSUME_ATFCTS
if (dotlist != dots)
free ((__ptr_t) dotlist);
lose2:
#endif
if (dirstream != NULL)
__closedir (dirstream);
if (fd_needs_closing)
close_not_cancel_no_status (fd);
#ifndef NO_ALLOCATION
if (buf == NULL)
free (path);
#endif
__set_errno (save_errno);
return NULL;
}

View File

@ -1,4 +1,4 @@
/* Copyright (C) 1991, 1995-1998, 2005 Free Software Foundation, Inc.
/* Copyright (C) 1991, 1995-1998, 2005, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@ -35,3 +35,4 @@ rewinddir (dirp)
dirp->size = 0;
__libc_lock_unlock (dirp->lock);
}
libc_hidden_def (rewinddir)

View File

@ -147,7 +147,7 @@ sysdep_routines += xstatconv internal_statvfs internal_statvfs64 \
endif
ifeq ($(subdir),elf)
sysdep-rtld-routines += dl-brk dl-sbrk
sysdep-rtld-routines += dl-brk dl-sbrk dl-getcwd
CPPFLAGS-lddlibc4 += -DNOT_IN_libc
endif

View File

@ -0,0 +1 @@
#include "getcwd.c"

View File

@ -1,5 +1,5 @@
/* Determine current working directory. Linux version.
Copyright (C) 1997,1998,1999,2000,2002,2003,2006
Copyright (C) 1997,1998,1999,2000,2002,2003,2006,2011
Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
@ -45,20 +45,13 @@
compiling under 2.1.92+ the libc still runs under older kernels. */
# define no_syscall_getcwd 0
# define have_new_dcache 1
/* This is a trick since we don't define generic_getcwd. */
# define generic_getcwd getcwd
#else
/* The "proc" filesystem provides an easy method to retrieve the value.
For each process, the corresponding directory contains a symbolic link
named `cwd'. Reading the content of this link immediate gives us the
information. But we have to take care for systems which do not have
the proc filesystem mounted. Use the POSIX implementation in this case. */
static char *generic_getcwd (char *buf, size_t size) internal_function;
# if __NR_getcwd
/* Kernel 2.1.92 introduced a third way to get the current working
directory: a syscall. We've got to be careful that even when
compiling under 2.1.92+ the libc still runs under older kernels. */
compiling under 2.1.92+ the libc still runs under older kernels.
An additional problem is that the system call does not return
the path of directories longer than one page. */
static int no_syscall_getcwd;
static int have_new_dcache;
# else
@ -67,6 +60,13 @@ static int have_new_dcache = 1;
# endif
#endif
/* The "proc" filesystem provides an easy method to retrieve the value.
For each process, the corresponding directory contains a symbolic link
named `cwd'. Reading the content of this link immediate gives us the
information. But we have to take care for systems which do not have
the proc filesystem mounted. Use the POSIX implementation in this case. */
static char *generic_getcwd (char *buf, size_t size) internal_function;
char *
__getcwd (char *buf, size_t size)
{
@ -124,6 +124,33 @@ __getcwd (char *buf, size_t size)
return buf;
}
// XXX This should not be necessary but the full getcwd implementation
// drags in too much for the current build proces of ld.so to handle
#ifndef NOT_IN_libc
/* The system call cannot handle paths longer than a page.
Neither can the magic symlink in /proc/self. Just use the
generic implementation right away. */
if (errno == ENAMETOOLONG)
{
# ifndef NO_ALLOCATION
if (buf == NULL && size == 0)
{
free (path);
path = NULL;
}
# endif
result = generic_getcwd (path, size);
# ifndef NO_ALLOCATION
if (result == NULL && buf == NULL && size != 0)
free (path);
# endif
return result;
}
#endif
# if __ASSUME_GETCWD_SYSCALL
/* It should never happen that the `getcwd' syscall failed because
the buffer is too small if we allocated the buffer ourselves
@ -196,7 +223,7 @@ __getcwd (char *buf, size_t size)
#ifndef NO_ALLOCATION
/* Don't put restrictions on the length of the path unless the user does. */
if (size == 0)
if (buf == NULL && size == 0)
{
free (path);
path = NULL;
@ -214,9 +241,11 @@ __getcwd (char *buf, size_t size)
}
weak_alias (__getcwd, getcwd)
#if __ASSUME_GETCWD_SYSCALL == 0
// XXX This should not be necessary but the full getcwd implementation
// drags in too much for the current build proces of ld.so to handle
#ifndef NOT_IN_libc
/* Get the code for the generic version. */
# define GETCWD_RETURN_TYPE static char * internal_function
# define __getcwd generic_getcwd
# include <sysdeps/posix/getcwd.c>
#define GETCWD_RETURN_TYPE static char * internal_function
#define __getcwd generic_getcwd
#include <sysdeps/posix/getcwd.c>
#endif