Linux: Add memfd_create system call wrapper

The system call is somewhat obscure because it is closely related
to file descriptor sealing.  However, it is also the recommended
way to create alias mappings, which is why it has more general use.

No emulation is provided.  Except for the name of the
/proc/self/fd links, it would be possible to implement an
approximation using O_TMPFILE and tmpfs, but this does not appear
to be worth the added complexity.

Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Reviewed-by: Christian Brauner <christian.brauner@ubuntu.com>
This commit is contained in:
Florian Weimer 2017-11-23 10:00:40 +01:00
parent 0a9d1d62b3
commit 59d2cbb1fe
36 changed files with 249 additions and 1 deletions

View File

@ -1,3 +1,17 @@
2017-11-23 Florian Weimer <fweimer@redhat.com>
Linux: Add memfd_create system call wrapper
* sysdeps/unix/sysv/linux/Makefile [misc] (tests): Add
tst-memfd_create.
* sysdeps/unix/sysv/linux/bits/mman-linux.h [__USE_GNU]
(MFD_CLOEXEC, MFD_ALLOW_SEALING): Define.
[__USE_GNU] (memfd_create): Declare.
* sysdeps/unix/sysv/linux/Versions (GLIBC_2.27): Add memfd_create.
* sysdeps/unix/sysv/linux/syscalls.list (memfd_create): Add.
* sysdeps/unix/sysv/linux/tst-memfd_create.c: New file.
* sysdeps/unix/sysv/linux/**.abilist: Update.
* manual/llio.texi (Memory-mapped I/O): Document memfd_create.
2017-11-22 Joseph Myers <joseph@codesourcery.com>
* localedata/gen-locale.sh: Fix typo in variable name.

2
NEWS
View File

@ -35,6 +35,8 @@ Major new features:
are the same interfaces added in version 2.26 for some platforms where
this format is supported but is not the format of long double.
* glibc now implements the memfd_create function on Linux.
Deprecated and removed features, and other changes affecting compatibility:
* On GNU/Linux, the obsolete Linux constant PTRACE_SEIZE_DEVEL is no longer

View File

@ -1801,6 +1801,69 @@ the given @var{name} previously created by @code{shm_open}.
On failure @code{errno} is set.
@end deftypefn
@deftypefun int memfd_create (const char *@var{name}, unsigned int @var{flags})
@standards{Linux, sys/mman.h}
@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}}
The @code{memfd_create} function returns a file descriptor which can be
used to create memory mappings using the @code{mmap} function. It is
similar to the @code{shm_open} function in the sense that these mappings
are not backed by actual files. However, the descriptor returned by
@code{memfd_create} does not correspond to a named object; the
@var{name} argument is used for debugging purposes only (e.g., will
appear in @file{/proc}), and separate invocations of @code{memfd_create}
with the same @var{name} will not return descriptors for the same region
of memory. The descriptor can also be used to create alias mappings
within the same process.
The descriptor initially refers to a zero-length file. Before mappings
can be created which are backed by memory, the file size needs to be
increased with the @code{ftruncate} function. @xref{File Size}.
The @var{flags} argument can be a combination of the following flags:
@vtable @code
@item MFD_CLOEXEC
@standards{Linux, sys/mman.h}
The descriptor is created with the @code{O_CLOEXEC} flag.
@item MFD_ALLOW_SEALING
@standards{Linux, sys/mman.h}
The descriptor supports the addition of seals using the @code{fcntl}
function.
@item MFD_HUGETLB
@standards{Linux, sys/mman.h}
This requests that mappings created using the returned file descriptor
use a larger page size. See @code{MAP_HUGETLB} above for details.
This flag is incompatible with @code{MFD_ALLOW_SEALING}.
@end vtable
@code{memfd_create} returns a file descriptor on success, and @math{-1}
on failure.
The following @code{errno} error conditions are defined for this
function:
@table @code
@item EINVAL
An invalid combination is specified in @var{flags}, or @var{name} is
too long.
@item EFAULT
The @var{name} argument does not point to a string.
@item EMFILE
The operation would exceed the file descriptor limit for this process.
@item ENFILE
The operation would exceed the system-wide file descriptor limit.
@item ENOMEM
There is not enough memory for the operation.
@end table
@end deftypefun
@node Waiting for I/O
@section Waiting for Input or Output
@cindex waiting for input or output

View File

@ -44,7 +44,7 @@ sysdep_headers += sys/mount.h sys/acct.h sys/sysctl.h \
tests += tst-clone tst-clone2 tst-clone3 tst-fanotify tst-personality \
tst-quota tst-sync_file_range tst-sysconf-iov_max tst-ttyname \
test-errno-linux
test-errno-linux tst-memfd_create
# Generate the list of SYS_* macros for the system calls (__NR_*
# macros). The file syscall-names.list contains all possible system

View File

@ -166,6 +166,9 @@ libc {
GLIBC_2.15 {
process_vm_readv; process_vm_writev;
}
GLIBC_2.27 {
memfd_create;
}
GLIBC_PRIVATE {
# functions used in other libraries
__syscall_rt_sigqueueinfo;

View File

@ -2106,6 +2106,7 @@ GLIBC_2.26 reallocarray F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F
GLIBC_2.27 strfromf128 F
GLIBC_2.27 strtof128 F
GLIBC_2.27 strtof128_l F

View File

@ -2017,6 +2017,7 @@ GLIBC_2.26 reallocarray F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F
GLIBC_2.27 strfromf128 F
GLIBC_2.27 strtof128 F
GLIBC_2.27 strtof128_l F

View File

@ -107,6 +107,7 @@ GLIBC_2.26 reallocarray F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F
GLIBC_2.4 GLIBC_2.4 A
GLIBC_2.4 _Exit F
GLIBC_2.4 _IO_2_1_stderr_ D 0xa0

View File

@ -111,3 +111,19 @@
# define MCL_ONFAULT 4 /* Lock all pages that are
faulted in. */
#endif
#ifdef __USE_GNU
/* Flags for memfd_create. */
# define MFD_CLOEXEC 1U
# define MFD_ALLOW_SEALING 2U
# define MFD_HUGETLB 4U
__BEGIN_DECLS
/* Create a new memory file descriptor. NAME is a name for debugging.
FLAGS is a combination of the MFD_* constants. */
int memfd_create (const char *__name, unsigned int __flags) __THROW;
__END_DECLS
#endif /* __USE_GNU */

View File

@ -1871,6 +1871,7 @@ GLIBC_2.26 reallocarray F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F
GLIBC_2.3 GLIBC_2.3 A
GLIBC_2.3 __ctype_b_loc F
GLIBC_2.3 __ctype_tolower_loc F

View File

@ -2036,6 +2036,7 @@ GLIBC_2.26 wcstof128_l F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F
GLIBC_2.3 GLIBC_2.3 A
GLIBC_2.3 __ctype_b_loc F
GLIBC_2.3 __ctype_tolower_loc F

View File

@ -1900,6 +1900,7 @@ GLIBC_2.26 wcstof128_l F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F
GLIBC_2.3 GLIBC_2.3 A
GLIBC_2.3 __ctype_b_loc F
GLIBC_2.3 __ctype_tolower_loc F

View File

@ -108,6 +108,7 @@ GLIBC_2.26 reallocarray F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F
GLIBC_2.4 GLIBC_2.4 A
GLIBC_2.4 _Exit F
GLIBC_2.4 _IO_2_1_stderr_ D 0x98

View File

@ -1985,6 +1985,7 @@ GLIBC_2.26 reallocarray F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F
GLIBC_2.3 GLIBC_2.3 A
GLIBC_2.3 __ctype_b_loc F
GLIBC_2.3 __ctype_tolower_loc F

View File

@ -2106,3 +2106,4 @@ GLIBC_2.26 reallocarray F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F

View File

@ -1960,6 +1960,7 @@ GLIBC_2.26 reallocarray F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F
GLIBC_2.3 GLIBC_2.3 A
GLIBC_2.3 __ctype_b_loc F
GLIBC_2.3 __ctype_tolower_loc F

View File

@ -1958,6 +1958,7 @@ GLIBC_2.26 reallocarray F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F
GLIBC_2.3 GLIBC_2.3 A
GLIBC_2.3 __ctype_b_loc F
GLIBC_2.3 __ctype_tolower_loc F

View File

@ -1956,6 +1956,7 @@ GLIBC_2.26 reallocarray F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F
GLIBC_2.27 strfromf128 F
GLIBC_2.27 strtof128 F
GLIBC_2.27 strtof128_l F

View File

@ -1951,6 +1951,7 @@ GLIBC_2.26 reallocarray F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F
GLIBC_2.27 strfromf128 F
GLIBC_2.27 strtof128 F
GLIBC_2.27 strtof128_l F

View File

@ -2147,3 +2147,4 @@ GLIBC_2.26 reallocarray F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F

View File

@ -1989,6 +1989,7 @@ GLIBC_2.26 reallocarray F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F
GLIBC_2.3 GLIBC_2.3 A
GLIBC_2.3 __ctype_b_loc F
GLIBC_2.3 __ctype_tolower_loc F

View File

@ -1994,6 +1994,7 @@ GLIBC_2.26 reallocarray F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F
GLIBC_2.3 GLIBC_2.3 A
GLIBC_2.3 __ctype_b_loc F
GLIBC_2.3 __ctype_tolower_loc F

View File

@ -2201,3 +2201,4 @@ GLIBC_2.26 wcstof128_l F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F

View File

@ -108,6 +108,7 @@ GLIBC_2.26 reallocarray F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F
GLIBC_2.3 GLIBC_2.3 A
GLIBC_2.3 _Exit F
GLIBC_2.3 _IO_2_1_stderr_ D 0xe0

View File

@ -1989,6 +1989,7 @@ GLIBC_2.26 reallocarray F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F
GLIBC_2.27 strfromf128 F
GLIBC_2.27 strtof128 F
GLIBC_2.27 strtof128_l F

View File

@ -1890,6 +1890,7 @@ GLIBC_2.26 reallocarray F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F
GLIBC_2.27 strfromf128 F
GLIBC_2.27 strtof128 F
GLIBC_2.27 strtof128_l F

View File

@ -1875,6 +1875,7 @@ GLIBC_2.26 reallocarray F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F
GLIBC_2.3 GLIBC_2.3 A
GLIBC_2.3 __ctype_b_loc F
GLIBC_2.3 __ctype_tolower_loc F

View File

@ -1982,6 +1982,7 @@ GLIBC_2.26 reallocarray F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F
GLIBC_2.27 strfromf128 F
GLIBC_2.27 strtof128 F
GLIBC_2.27 strtof128_l F

View File

@ -1919,6 +1919,7 @@ GLIBC_2.26 reallocarray F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F
GLIBC_2.27 strfromf128 F
GLIBC_2.27 strtof128 F
GLIBC_2.27 strtof128_l F

View File

@ -109,3 +109,4 @@ setns EXTRA setns i:ii setns
process_vm_readv EXTRA process_vm_readv i:ipipii process_vm_readv
process_vm_writev EXTRA process_vm_writev i:ipipii process_vm_writev
memfd_create EXTRA memfd_create i:si memfd_create

View File

@ -2113,3 +2113,4 @@ GLIBC_2.26 reallocarray F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F

View File

@ -2113,3 +2113,4 @@ GLIBC_2.26 reallocarray F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F

View File

@ -2113,3 +2113,4 @@ GLIBC_2.26 reallocarray F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F

View File

@ -0,0 +1,121 @@
/* Test for the memfd_create system call.
Copyright (C) 2017 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <errno.h>
#include <fcntl.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <support/check.h>
#include <support/support.h>
#include <support/test-driver.h>
#include <support/xunistd.h>
#include <sys/mman.h>
/* Return true if the descriptor has the FD_CLOEXEC flag set. */
static bool
is_cloexec (int fd)
{
int flags = fcntl (fd, F_GETFD);
TEST_VERIFY (flags >= 0);
return flags & FD_CLOEXEC;
}
/* Return the seals set on FD. */
static int
get_seals (int fd)
{
int flags = fcntl (fd, F_GET_SEALS);
TEST_VERIFY (flags >= 0);
return flags;
}
/* Return true if the F_SEAL_SEAL flag is set on the descriptor. */
static bool
is_sealed (int fd)
{
return get_seals (fd) & F_SEAL_SEAL;
}
static int
do_test (void)
{
/* Initialized by the first call to memfd_create to 0 (memfd_create
unsupported) or 1 (memfd_create is implemented in the kernel).
Subsequent iterations check that the success/failure state is
consistent. */
int supported = -1;
for (int do_cloexec = 0; do_cloexec < 2; ++do_cloexec)
for (int do_sealing = 0; do_sealing < 2; ++do_sealing)
{
int flags = 0;
if (do_cloexec)
flags |= MFD_CLOEXEC;
if (do_sealing)
flags |= MFD_ALLOW_SEALING;
if (test_verbose > 0)
printf ("info: memfd_create with flags=0x%x\n", flags);
int fd = memfd_create ("tst-memfd_create", flags);
if (fd < 0)
{
if (errno == ENOSYS)
{
if (supported < 0)
{
printf ("warning: memfd_create is unsupported\n");
supported = 0;
continue;
}
TEST_VERIFY (supported == 0);
continue;
}
else
FAIL_EXIT1 ("memfd_create: %m");
}
if (supported < 0)
supported = 1;
TEST_VERIFY (supported > 0);
char *fd_path = xasprintf ("/proc/self/fd/%d", fd);
char *link = xreadlink (fd_path);
if (test_verbose > 0)
printf ("info: memfd link: %s\n", link);
TEST_VERIFY (strcmp (link, "memfd:tst-memfd_create (deleted)"));
TEST_VERIFY (is_cloexec (fd) == do_cloexec);
TEST_VERIFY (is_sealed (fd) == !do_sealing);
if (do_sealing)
{
TEST_VERIFY (fcntl (fd, F_ADD_SEALS, F_SEAL_WRITE) == 0);
TEST_VERIFY (!is_sealed (fd));
TEST_VERIFY (get_seals (fd) & F_SEAL_WRITE);
TEST_VERIFY (fcntl (fd, F_ADD_SEALS, F_SEAL_SEAL) == 0);
TEST_VERIFY (is_sealed (fd));
}
xclose (fd);
free (fd_path);
free (link);
}
if (supported == 0)
return EXIT_UNSUPPORTED;
return 0;
}
#include <support/test-driver.c>

View File

@ -1877,6 +1877,7 @@ GLIBC_2.26 wcstof128_l F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F
GLIBC_2.3 GLIBC_2.3 A
GLIBC_2.3 __ctype_b_loc F
GLIBC_2.3 __ctype_tolower_loc F

View File

@ -2120,3 +2120,4 @@ GLIBC_2.26 wcstof128_l F
GLIBC_2.27 GLIBC_2.27 A
GLIBC_2.27 glob F
GLIBC_2.27 glob64 F
GLIBC_2.27 memfd_create F