16e5726269
Since commit 7361c36c52
(af_unix: Allow credentials to work across
user and pid namespaces) af_unix performance dropped a lot.
This is because we now take a reference on pid and cred in each write(),
and release them in read(), usually done from another process,
eventually from another cpu. This triggers false sharing.
# Events: 154K cycles
#
# Overhead Command Shared Object Symbol
# ........ ....... .................. .........................
#
10.40% hackbench [kernel.kallsyms] [k] put_pid
8.60% hackbench [kernel.kallsyms] [k] unix_stream_recvmsg
7.87% hackbench [kernel.kallsyms] [k] unix_stream_sendmsg
6.11% hackbench [kernel.kallsyms] [k] do_raw_spin_lock
4.95% hackbench [kernel.kallsyms] [k] unix_scm_to_skb
4.87% hackbench [kernel.kallsyms] [k] pid_nr_ns
4.34% hackbench [kernel.kallsyms] [k] cred_to_ucred
2.39% hackbench [kernel.kallsyms] [k] unix_destruct_scm
2.24% hackbench [kernel.kallsyms] [k] sub_preempt_count
1.75% hackbench [kernel.kallsyms] [k] fget_light
1.51% hackbench [kernel.kallsyms] [k]
__mutex_lock_interruptible_slowpath
1.42% hackbench [kernel.kallsyms] [k] sock_alloc_send_pskb
This patch includes SCM_CREDENTIALS information in a af_unix message/skb
only if requested by the sender, [man 7 unix for details how to include
ancillary data using sendmsg() system call]
Note: This might break buggy applications that expected SCM_CREDENTIAL
from an unaware write() system call, and receiver not using SO_PASSCRED
socket option.
If SOCK_PASSCRED is set on source or destination socket, we still
include credentials for mere write() syscalls.
Performance boost in hackbench : more than 50% gain on a 16 thread
machine (2 quad-core cpus, 2 threads per core)
hackbench 20 thread 2000
4.228 sec instead of 9.102 sec
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
347 lines
7.7 KiB
C
347 lines
7.7 KiB
C
/* scm.c - Socket level control messages processing.
|
|
*
|
|
* Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
|
|
* Alignment and value checking mods by Craig Metz
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/signal.h>
|
|
#include <linux/capability.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/stat.h>
|
|
#include <linux/socket.h>
|
|
#include <linux/file.h>
|
|
#include <linux/fcntl.h>
|
|
#include <linux/net.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/netdevice.h>
|
|
#include <linux/security.h>
|
|
#include <linux/pid.h>
|
|
#include <linux/nsproxy.h>
|
|
#include <linux/slab.h>
|
|
|
|
#include <asm/system.h>
|
|
#include <asm/uaccess.h>
|
|
|
|
#include <net/protocol.h>
|
|
#include <linux/skbuff.h>
|
|
#include <net/sock.h>
|
|
#include <net/compat.h>
|
|
#include <net/scm.h>
|
|
|
|
|
|
/*
|
|
* Only allow a user to send credentials, that they could set with
|
|
* setu(g)id.
|
|
*/
|
|
|
|
static __inline__ int scm_check_creds(struct ucred *creds)
|
|
{
|
|
const struct cred *cred = current_cred();
|
|
|
|
if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) &&
|
|
((creds->uid == cred->uid || creds->uid == cred->euid ||
|
|
creds->uid == cred->suid) || capable(CAP_SETUID)) &&
|
|
((creds->gid == cred->gid || creds->gid == cred->egid ||
|
|
creds->gid == cred->sgid) || capable(CAP_SETGID))) {
|
|
return 0;
|
|
}
|
|
return -EPERM;
|
|
}
|
|
|
|
static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
|
|
{
|
|
int *fdp = (int*)CMSG_DATA(cmsg);
|
|
struct scm_fp_list *fpl = *fplp;
|
|
struct file **fpp;
|
|
int i, num;
|
|
|
|
num = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)))/sizeof(int);
|
|
|
|
if (num <= 0)
|
|
return 0;
|
|
|
|
if (num > SCM_MAX_FD)
|
|
return -EINVAL;
|
|
|
|
if (!fpl)
|
|
{
|
|
fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
|
|
if (!fpl)
|
|
return -ENOMEM;
|
|
*fplp = fpl;
|
|
fpl->count = 0;
|
|
fpl->max = SCM_MAX_FD;
|
|
}
|
|
fpp = &fpl->fp[fpl->count];
|
|
|
|
if (fpl->count + num > fpl->max)
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* Verify the descriptors and increment the usage count.
|
|
*/
|
|
|
|
for (i=0; i< num; i++)
|
|
{
|
|
int fd = fdp[i];
|
|
struct file *file;
|
|
|
|
if (fd < 0 || !(file = fget_raw(fd)))
|
|
return -EBADF;
|
|
*fpp++ = file;
|
|
fpl->count++;
|
|
}
|
|
return num;
|
|
}
|
|
|
|
void __scm_destroy(struct scm_cookie *scm)
|
|
{
|
|
struct scm_fp_list *fpl = scm->fp;
|
|
int i;
|
|
|
|
if (fpl) {
|
|
scm->fp = NULL;
|
|
if (current->scm_work_list) {
|
|
list_add_tail(&fpl->list, current->scm_work_list);
|
|
} else {
|
|
LIST_HEAD(work_list);
|
|
|
|
current->scm_work_list = &work_list;
|
|
|
|
list_add(&fpl->list, &work_list);
|
|
while (!list_empty(&work_list)) {
|
|
fpl = list_first_entry(&work_list, struct scm_fp_list, list);
|
|
|
|
list_del(&fpl->list);
|
|
for (i=fpl->count-1; i>=0; i--)
|
|
fput(fpl->fp[i]);
|
|
kfree(fpl);
|
|
}
|
|
|
|
current->scm_work_list = NULL;
|
|
}
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(__scm_destroy);
|
|
|
|
int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
|
|
{
|
|
struct cmsghdr *cmsg;
|
|
int err;
|
|
|
|
for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg))
|
|
{
|
|
err = -EINVAL;
|
|
|
|
/* Verify that cmsg_len is at least sizeof(struct cmsghdr) */
|
|
/* The first check was omitted in <= 2.2.5. The reasoning was
|
|
that parser checks cmsg_len in any case, so that
|
|
additional check would be work duplication.
|
|
But if cmsg_level is not SOL_SOCKET, we do not check
|
|
for too short ancillary data object at all! Oops.
|
|
OK, let's add it...
|
|
*/
|
|
if (!CMSG_OK(msg, cmsg))
|
|
goto error;
|
|
|
|
if (cmsg->cmsg_level != SOL_SOCKET)
|
|
continue;
|
|
|
|
switch (cmsg->cmsg_type)
|
|
{
|
|
case SCM_RIGHTS:
|
|
if (!sock->ops || sock->ops->family != PF_UNIX)
|
|
goto error;
|
|
err=scm_fp_copy(cmsg, &p->fp);
|
|
if (err<0)
|
|
goto error;
|
|
break;
|
|
case SCM_CREDENTIALS:
|
|
if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred)))
|
|
goto error;
|
|
memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred));
|
|
err = scm_check_creds(&p->creds);
|
|
if (err)
|
|
goto error;
|
|
|
|
if (!p->pid || pid_vnr(p->pid) != p->creds.pid) {
|
|
struct pid *pid;
|
|
err = -ESRCH;
|
|
pid = find_get_pid(p->creds.pid);
|
|
if (!pid)
|
|
goto error;
|
|
put_pid(p->pid);
|
|
p->pid = pid;
|
|
}
|
|
|
|
if (!p->cred ||
|
|
(p->cred->euid != p->creds.uid) ||
|
|
(p->cred->egid != p->creds.gid)) {
|
|
struct cred *cred;
|
|
err = -ENOMEM;
|
|
cred = prepare_creds();
|
|
if (!cred)
|
|
goto error;
|
|
|
|
cred->uid = cred->euid = p->creds.uid;
|
|
cred->gid = cred->egid = p->creds.gid;
|
|
if (p->cred)
|
|
put_cred(p->cred);
|
|
p->cred = cred;
|
|
}
|
|
break;
|
|
default:
|
|
goto error;
|
|
}
|
|
}
|
|
|
|
if (p->fp && !p->fp->count)
|
|
{
|
|
kfree(p->fp);
|
|
p->fp = NULL;
|
|
}
|
|
return 0;
|
|
|
|
error:
|
|
scm_destroy(p);
|
|
return err;
|
|
}
|
|
EXPORT_SYMBOL(__scm_send);
|
|
|
|
int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
|
|
{
|
|
struct cmsghdr __user *cm
|
|
= (__force struct cmsghdr __user *)msg->msg_control;
|
|
struct cmsghdr cmhdr;
|
|
int cmlen = CMSG_LEN(len);
|
|
int err;
|
|
|
|
if (MSG_CMSG_COMPAT & msg->msg_flags)
|
|
return put_cmsg_compat(msg, level, type, len, data);
|
|
|
|
if (cm==NULL || msg->msg_controllen < sizeof(*cm)) {
|
|
msg->msg_flags |= MSG_CTRUNC;
|
|
return 0; /* XXX: return error? check spec. */
|
|
}
|
|
if (msg->msg_controllen < cmlen) {
|
|
msg->msg_flags |= MSG_CTRUNC;
|
|
cmlen = msg->msg_controllen;
|
|
}
|
|
cmhdr.cmsg_level = level;
|
|
cmhdr.cmsg_type = type;
|
|
cmhdr.cmsg_len = cmlen;
|
|
|
|
err = -EFAULT;
|
|
if (copy_to_user(cm, &cmhdr, sizeof cmhdr))
|
|
goto out;
|
|
if (copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr)))
|
|
goto out;
|
|
cmlen = CMSG_SPACE(len);
|
|
if (msg->msg_controllen < cmlen)
|
|
cmlen = msg->msg_controllen;
|
|
msg->msg_control += cmlen;
|
|
msg->msg_controllen -= cmlen;
|
|
err = 0;
|
|
out:
|
|
return err;
|
|
}
|
|
EXPORT_SYMBOL(put_cmsg);
|
|
|
|
void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
|
|
{
|
|
struct cmsghdr __user *cm
|
|
= (__force struct cmsghdr __user*)msg->msg_control;
|
|
|
|
int fdmax = 0;
|
|
int fdnum = scm->fp->count;
|
|
struct file **fp = scm->fp->fp;
|
|
int __user *cmfptr;
|
|
int err = 0, i;
|
|
|
|
if (MSG_CMSG_COMPAT & msg->msg_flags) {
|
|
scm_detach_fds_compat(msg, scm);
|
|
return;
|
|
}
|
|
|
|
if (msg->msg_controllen > sizeof(struct cmsghdr))
|
|
fdmax = ((msg->msg_controllen - sizeof(struct cmsghdr))
|
|
/ sizeof(int));
|
|
|
|
if (fdnum < fdmax)
|
|
fdmax = fdnum;
|
|
|
|
for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i<fdmax;
|
|
i++, cmfptr++)
|
|
{
|
|
int new_fd;
|
|
err = security_file_receive(fp[i]);
|
|
if (err)
|
|
break;
|
|
err = get_unused_fd_flags(MSG_CMSG_CLOEXEC & msg->msg_flags
|
|
? O_CLOEXEC : 0);
|
|
if (err < 0)
|
|
break;
|
|
new_fd = err;
|
|
err = put_user(new_fd, cmfptr);
|
|
if (err) {
|
|
put_unused_fd(new_fd);
|
|
break;
|
|
}
|
|
/* Bump the usage count and install the file. */
|
|
get_file(fp[i]);
|
|
fd_install(new_fd, fp[i]);
|
|
}
|
|
|
|
if (i > 0)
|
|
{
|
|
int cmlen = CMSG_LEN(i*sizeof(int));
|
|
err = put_user(SOL_SOCKET, &cm->cmsg_level);
|
|
if (!err)
|
|
err = put_user(SCM_RIGHTS, &cm->cmsg_type);
|
|
if (!err)
|
|
err = put_user(cmlen, &cm->cmsg_len);
|
|
if (!err) {
|
|
cmlen = CMSG_SPACE(i*sizeof(int));
|
|
msg->msg_control += cmlen;
|
|
msg->msg_controllen -= cmlen;
|
|
}
|
|
}
|
|
if (i < fdnum || (fdnum && fdmax <= 0))
|
|
msg->msg_flags |= MSG_CTRUNC;
|
|
|
|
/*
|
|
* All of the files that fit in the message have had their
|
|
* usage counts incremented, so we just free the list.
|
|
*/
|
|
__scm_destroy(scm);
|
|
}
|
|
EXPORT_SYMBOL(scm_detach_fds);
|
|
|
|
struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
|
|
{
|
|
struct scm_fp_list *new_fpl;
|
|
int i;
|
|
|
|
if (!fpl)
|
|
return NULL;
|
|
|
|
new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]),
|
|
GFP_KERNEL);
|
|
if (new_fpl) {
|
|
for (i = 0; i < fpl->count; i++)
|
|
get_file(fpl->fp[i]);
|
|
new_fpl->max = new_fpl->count;
|
|
}
|
|
return new_fpl;
|
|
}
|
|
EXPORT_SYMBOL(scm_fp_dup);
|