io_uring-5.6-2020-02-28
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAl5ZXkgQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgprZqEACOvhiprH9Q75Pp+ZwQknM3xGyJRWI3Mbj9 ZOyTVTK0qhTeaq6rN4MLSYevXOh+L68x5WRt1YJ1UnQRE0i8+ZQyZczqLKxxl8gF trhbYDXjvXIWr9zvdtiL01PKKu4Vjjp6eZAomrbxCTFku0qn76fo9wDgGPRGL+Kx lNO/6QvCXr9EjDniEUhlQsxTad5xc4sL0cnL4s2i7RlTCYtW4WJXJMC/4Gkg69j+ W5GBZyjJDa8Sj3pEbLjtDtA4ooE9VMaldb7ZvR62ONUVwGpftPsbN7UhVlhyhpW+ 8v4ZEf07CxB246+hj7oL0RvEW3+/nB2hym1ySMXyBzpbx4O1JOUG7hQtNgdLRbCZ 27IOg2O36qbUKM1hUwn7Qm3XAfBPQdFpVmqE2+E9MEOKzigLzhRP6Bu5d9x9VQGh JDxsm3B8PRHFJVAasiYu0p7mlx/+BCLjB84UrMB3I9UCBuVfk4mtmuwZX+mcK2PR pV1xJlEMYKme3cz2/u6uB8p3Nq6ipE1nSVrI6AnfEvJbQ9sFL61KaG4wHKPvtb0y mlNgc4seSjiWcBR2/84561a4CSmlXAn9dWMIGdHFFA43mTPYGc5omTcM8FwcEDkW cTFGB8sFukcTNmOw62HUHYI1vPpowX6apV08lEQrScz7GiK5piTYqTFNneqEzcwZ 3bIMisH3Gg== =WheR -----END PGP SIGNATURE----- Merge tag 'io_uring-5.6-2020-02-28' of git://git.kernel.dk/linux-block Pull io_uring fixes from Jens Axboe: - Fix for a race with IOPOLL used with SQPOLL (Xiaoguang) - Only show ->fdinfo if procfs is enabled (Tobias) - Fix for a chain with multiple personalities in the SQEs - Fix for a missing free of personality idr on exit - Removal of the spin-for-work optimization - Fix for next work lookup on request completion - Fix for non-vec read/write result progation in case of links - Fix for a fileset references on switch - Fix for a recvmsg/sendmsg 32-bit compatability mode * tag 'io_uring-5.6-2020-02-28' of git://git.kernel.dk/linux-block: io_uring: fix 32-bit compatability with sendmsg/recvmsg io_uring: define and set show_fdinfo only if procfs is enabled io_uring: drop file set ref put/get on switch io_uring: import_single_range() returns 0/-ERROR io_uring: pick up link work on submit reference drop io-wq: ensure work->task_pid is cleared on init io-wq: remove spin-for-work optimization io_uring: fix poll_list race for SETUP_IOPOLL|SETUP_SQPOLL io_uring: fix personality idr leak io_uring: handle multiple personalities in link chains
This commit is contained in:
commit
74dea5d99d
19
fs/io-wq.c
19
fs/io-wq.c
@ -535,42 +535,23 @@ next:
|
||||
} while (1);
|
||||
}
|
||||
|
||||
static inline void io_worker_spin_for_work(struct io_wqe *wqe)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
while (++i < 1000) {
|
||||
if (io_wqe_run_queue(wqe))
|
||||
break;
|
||||
if (need_resched())
|
||||
break;
|
||||
cpu_relax();
|
||||
}
|
||||
}
|
||||
|
||||
static int io_wqe_worker(void *data)
|
||||
{
|
||||
struct io_worker *worker = data;
|
||||
struct io_wqe *wqe = worker->wqe;
|
||||
struct io_wq *wq = wqe->wq;
|
||||
bool did_work;
|
||||
|
||||
io_worker_start(wqe, worker);
|
||||
|
||||
did_work = false;
|
||||
while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
loop:
|
||||
if (did_work)
|
||||
io_worker_spin_for_work(wqe);
|
||||
spin_lock_irq(&wqe->lock);
|
||||
if (io_wqe_run_queue(wqe)) {
|
||||
__set_current_state(TASK_RUNNING);
|
||||
io_worker_handle_work(worker);
|
||||
did_work = true;
|
||||
goto loop;
|
||||
}
|
||||
did_work = false;
|
||||
/* drops the lock on success, retry */
|
||||
if (__io_worker_idle(wqe, worker)) {
|
||||
__release(&wqe->lock);
|
||||
|
14
fs/io-wq.h
14
fs/io-wq.h
@ -79,16 +79,10 @@ struct io_wq_work {
|
||||
pid_t task_pid;
|
||||
};
|
||||
|
||||
#define INIT_IO_WORK(work, _func) \
|
||||
do { \
|
||||
(work)->list.next = NULL; \
|
||||
(work)->func = _func; \
|
||||
(work)->files = NULL; \
|
||||
(work)->mm = NULL; \
|
||||
(work)->creds = NULL; \
|
||||
(work)->fs = NULL; \
|
||||
(work)->flags = 0; \
|
||||
} while (0) \
|
||||
#define INIT_IO_WORK(work, _func) \
|
||||
do { \
|
||||
*(work) = (struct io_wq_work){ .func = _func }; \
|
||||
} while (0) \
|
||||
|
||||
typedef void (get_work_fn)(struct io_wq_work *);
|
||||
typedef void (put_work_fn)(struct io_wq_work *);
|
||||
|
132
fs/io_uring.c
132
fs/io_uring.c
@ -183,17 +183,12 @@ struct fixed_file_table {
|
||||
struct file **files;
|
||||
};
|
||||
|
||||
enum {
|
||||
FFD_F_ATOMIC,
|
||||
};
|
||||
|
||||
struct fixed_file_data {
|
||||
struct fixed_file_table *table;
|
||||
struct io_ring_ctx *ctx;
|
||||
|
||||
struct percpu_ref refs;
|
||||
struct llist_head put_llist;
|
||||
unsigned long state;
|
||||
struct work_struct ref_work;
|
||||
struct completion done;
|
||||
};
|
||||
@ -1483,10 +1478,10 @@ static void io_free_req(struct io_kiocb *req)
|
||||
__attribute__((nonnull))
|
||||
static void io_put_req_find_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
|
||||
{
|
||||
io_req_find_next(req, nxtptr);
|
||||
|
||||
if (refcount_dec_and_test(&req->refs))
|
||||
if (refcount_dec_and_test(&req->refs)) {
|
||||
io_req_find_next(req, nxtptr);
|
||||
__io_free_req(req);
|
||||
}
|
||||
}
|
||||
|
||||
static void io_put_req(struct io_kiocb *req)
|
||||
@ -1821,6 +1816,10 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
|
||||
list_add(&req->list, &ctx->poll_list);
|
||||
else
|
||||
list_add_tail(&req->list, &ctx->poll_list);
|
||||
|
||||
if ((ctx->flags & IORING_SETUP_SQPOLL) &&
|
||||
wq_has_sleeper(&ctx->sqo_wait))
|
||||
wake_up(&ctx->sqo_wait);
|
||||
}
|
||||
|
||||
static void io_file_put(struct io_submit_state *state)
|
||||
@ -2071,7 +2070,7 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
|
||||
ssize_t ret;
|
||||
ret = import_single_range(rw, buf, sqe_len, *iovec, iter);
|
||||
*iovec = NULL;
|
||||
return ret;
|
||||
return ret < 0 ? ret : sqe_len;
|
||||
}
|
||||
|
||||
if (req->io) {
|
||||
@ -3002,6 +3001,11 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
|
||||
sr->len = READ_ONCE(sqe->len);
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (req->ctx->compat)
|
||||
sr->msg_flags |= MSG_CMSG_COMPAT;
|
||||
#endif
|
||||
|
||||
if (!io || req->opcode == IORING_OP_SEND)
|
||||
return 0;
|
||||
/* iovec is already imported */
|
||||
@ -3154,6 +3158,11 @@ static int io_recvmsg_prep(struct io_kiocb *req,
|
||||
sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
|
||||
sr->len = READ_ONCE(sqe->len);
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (req->ctx->compat)
|
||||
sr->msg_flags |= MSG_CMSG_COMPAT;
|
||||
#endif
|
||||
|
||||
if (!io || req->opcode == IORING_OP_RECV)
|
||||
return 0;
|
||||
/* iovec is already imported */
|
||||
@ -4705,11 +4714,21 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
{
|
||||
struct io_kiocb *linked_timeout;
|
||||
struct io_kiocb *nxt = NULL;
|
||||
const struct cred *old_creds = NULL;
|
||||
int ret;
|
||||
|
||||
again:
|
||||
linked_timeout = io_prep_linked_timeout(req);
|
||||
|
||||
if (req->work.creds && req->work.creds != current_cred()) {
|
||||
if (old_creds)
|
||||
revert_creds(old_creds);
|
||||
if (old_creds == req->work.creds)
|
||||
old_creds = NULL; /* restored original creds */
|
||||
else
|
||||
old_creds = override_creds(req->work.creds);
|
||||
}
|
||||
|
||||
ret = io_issue_sqe(req, sqe, &nxt, true);
|
||||
|
||||
/*
|
||||
@ -4735,7 +4754,7 @@ punt:
|
||||
|
||||
err:
|
||||
/* drop submission reference */
|
||||
io_put_req(req);
|
||||
io_put_req_find_next(req, &nxt);
|
||||
|
||||
if (linked_timeout) {
|
||||
if (!ret)
|
||||
@ -4759,6 +4778,8 @@ done_req:
|
||||
goto punt;
|
||||
goto again;
|
||||
}
|
||||
if (old_creds)
|
||||
revert_creds(old_creds);
|
||||
}
|
||||
|
||||
static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
@ -4803,7 +4824,6 @@ static inline void io_queue_link_head(struct io_kiocb *req)
|
||||
static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
|
||||
struct io_submit_state *state, struct io_kiocb **link)
|
||||
{
|
||||
const struct cred *old_creds = NULL;
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
unsigned int sqe_flags;
|
||||
int ret, id;
|
||||
@ -4818,14 +4838,12 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
|
||||
|
||||
id = READ_ONCE(sqe->personality);
|
||||
if (id) {
|
||||
const struct cred *personality_creds;
|
||||
|
||||
personality_creds = idr_find(&ctx->personality_idr, id);
|
||||
if (unlikely(!personality_creds)) {
|
||||
req->work.creds = idr_find(&ctx->personality_idr, id);
|
||||
if (unlikely(!req->work.creds)) {
|
||||
ret = -EINVAL;
|
||||
goto err_req;
|
||||
}
|
||||
old_creds = override_creds(personality_creds);
|
||||
get_cred(req->work.creds);
|
||||
}
|
||||
|
||||
/* same numerical values with corresponding REQ_F_*, safe to copy */
|
||||
@ -4837,8 +4855,6 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
|
||||
err_req:
|
||||
io_cqring_add_event(req, ret);
|
||||
io_double_put_req(req);
|
||||
if (old_creds)
|
||||
revert_creds(old_creds);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -4899,8 +4915,6 @@ err_req:
|
||||
}
|
||||
}
|
||||
|
||||
if (old_creds)
|
||||
revert_creds(old_creds);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -5081,9 +5095,8 @@ static int io_sq_thread(void *data)
|
||||
const struct cred *old_cred;
|
||||
mm_segment_t old_fs;
|
||||
DEFINE_WAIT(wait);
|
||||
unsigned inflight;
|
||||
unsigned long timeout;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
complete(&ctx->completions[1]);
|
||||
|
||||
@ -5091,39 +5104,19 @@ static int io_sq_thread(void *data)
|
||||
set_fs(USER_DS);
|
||||
old_cred = override_creds(ctx->creds);
|
||||
|
||||
ret = timeout = inflight = 0;
|
||||
timeout = jiffies + ctx->sq_thread_idle;
|
||||
while (!kthread_should_park()) {
|
||||
unsigned int to_submit;
|
||||
|
||||
if (inflight) {
|
||||
if (!list_empty(&ctx->poll_list)) {
|
||||
unsigned nr_events = 0;
|
||||
|
||||
if (ctx->flags & IORING_SETUP_IOPOLL) {
|
||||
/*
|
||||
* inflight is the count of the maximum possible
|
||||
* entries we submitted, but it can be smaller
|
||||
* if we dropped some of them. If we don't have
|
||||
* poll entries available, then we know that we
|
||||
* have nothing left to poll for. Reset the
|
||||
* inflight count to zero in that case.
|
||||
*/
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
if (!list_empty(&ctx->poll_list))
|
||||
io_iopoll_getevents(ctx, &nr_events, 0);
|
||||
else
|
||||
inflight = 0;
|
||||
mutex_unlock(&ctx->uring_lock);
|
||||
} else {
|
||||
/*
|
||||
* Normal IO, just pretend everything completed.
|
||||
* We don't have to poll completions for that.
|
||||
*/
|
||||
nr_events = inflight;
|
||||
}
|
||||
|
||||
inflight -= nr_events;
|
||||
if (!inflight)
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
if (!list_empty(&ctx->poll_list))
|
||||
io_iopoll_getevents(ctx, &nr_events, 0);
|
||||
else
|
||||
timeout = jiffies + ctx->sq_thread_idle;
|
||||
mutex_unlock(&ctx->uring_lock);
|
||||
}
|
||||
|
||||
to_submit = io_sqring_entries(ctx);
|
||||
@ -5152,7 +5145,7 @@ static int io_sq_thread(void *data)
|
||||
* more IO, we should wait for the application to
|
||||
* reap events and wake us up.
|
||||
*/
|
||||
if (inflight ||
|
||||
if (!list_empty(&ctx->poll_list) ||
|
||||
(!time_after(jiffies, timeout) && ret != -EBUSY &&
|
||||
!percpu_ref_is_dying(&ctx->refs))) {
|
||||
cond_resched();
|
||||
@ -5162,6 +5155,19 @@ static int io_sq_thread(void *data)
|
||||
prepare_to_wait(&ctx->sqo_wait, &wait,
|
||||
TASK_INTERRUPTIBLE);
|
||||
|
||||
/*
|
||||
* While doing polled IO, before going to sleep, we need
|
||||
* to check if there are new reqs added to poll_list, it
|
||||
* is because reqs may have been punted to io worker and
|
||||
* will be added to poll_list later, hence check the
|
||||
* poll_list again.
|
||||
*/
|
||||
if ((ctx->flags & IORING_SETUP_IOPOLL) &&
|
||||
!list_empty_careful(&ctx->poll_list)) {
|
||||
finish_wait(&ctx->sqo_wait, &wait);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Tell userspace we may need a wakeup call */
|
||||
ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP;
|
||||
/* make sure to read SQ tail after writing flags */
|
||||
@ -5189,8 +5195,7 @@ static int io_sq_thread(void *data)
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
ret = io_submit_sqes(ctx, to_submit, NULL, -1, &cur_mm, true);
|
||||
mutex_unlock(&ctx->uring_lock);
|
||||
if (ret > 0)
|
||||
inflight += ret;
|
||||
timeout = jiffies + ctx->sq_thread_idle;
|
||||
}
|
||||
|
||||
set_fs(old_fs);
|
||||
@ -5595,7 +5600,6 @@ static void io_ring_file_ref_switch(struct work_struct *work)
|
||||
|
||||
data = container_of(work, struct fixed_file_data, ref_work);
|
||||
io_ring_file_ref_flush(data);
|
||||
percpu_ref_get(&data->refs);
|
||||
percpu_ref_switch_to_percpu(&data->refs);
|
||||
}
|
||||
|
||||
@ -5771,8 +5775,13 @@ static void io_atomic_switch(struct percpu_ref *ref)
|
||||
{
|
||||
struct fixed_file_data *data;
|
||||
|
||||
/*
|
||||
* Juggle reference to ensure we hit zero, if needed, so we can
|
||||
* switch back to percpu mode
|
||||
*/
|
||||
data = container_of(ref, struct fixed_file_data, refs);
|
||||
clear_bit(FFD_F_ATOMIC, &data->state);
|
||||
percpu_ref_put(&data->refs);
|
||||
percpu_ref_get(&data->refs);
|
||||
}
|
||||
|
||||
static bool io_queue_file_removal(struct fixed_file_data *data,
|
||||
@ -5795,11 +5804,7 @@ static bool io_queue_file_removal(struct fixed_file_data *data,
|
||||
llist_add(&pfile->llist, &data->put_llist);
|
||||
|
||||
if (pfile == &pfile_stack) {
|
||||
if (!test_and_set_bit(FFD_F_ATOMIC, &data->state)) {
|
||||
percpu_ref_put(&data->refs);
|
||||
percpu_ref_switch_to_atomic(&data->refs,
|
||||
io_atomic_switch);
|
||||
}
|
||||
percpu_ref_switch_to_atomic(&data->refs, io_atomic_switch);
|
||||
wait_for_completion(&done);
|
||||
flush_work(&data->ref_work);
|
||||
return false;
|
||||
@ -5873,10 +5878,8 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
|
||||
up->offset++;
|
||||
}
|
||||
|
||||
if (ref_switch && !test_and_set_bit(FFD_F_ATOMIC, &data->state)) {
|
||||
percpu_ref_put(&data->refs);
|
||||
if (ref_switch)
|
||||
percpu_ref_switch_to_atomic(&data->refs, io_atomic_switch);
|
||||
}
|
||||
|
||||
return done ? done : err;
|
||||
}
|
||||
@ -6334,6 +6337,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
|
||||
io_sqe_buffer_unregister(ctx);
|
||||
io_sqe_files_unregister(ctx);
|
||||
io_eventfd_unregister(ctx);
|
||||
idr_destroy(&ctx->personality_idr);
|
||||
|
||||
#if defined(CONFIG_UNIX)
|
||||
if (ctx->ring_sock) {
|
||||
@ -6647,6 +6651,7 @@ out_fput:
|
||||
return submitted ? submitted : ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
static int io_uring_show_cred(int id, void *p, void *data)
|
||||
{
|
||||
const struct cred *cred = p;
|
||||
@ -6720,6 +6725,7 @@ static void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
|
||||
percpu_ref_put(&ctx->refs);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static const struct file_operations io_uring_fops = {
|
||||
.release = io_uring_release,
|
||||
@ -6731,7 +6737,9 @@ static const struct file_operations io_uring_fops = {
|
||||
#endif
|
||||
.poll = io_uring_poll,
|
||||
.fasync = io_uring_fasync,
|
||||
#ifdef CONFIG_PROC_FS
|
||||
.show_fdinfo = io_uring_show_fdinfo,
|
||||
#endif
|
||||
};
|
||||
|
||||
static int io_allocate_scq_urings(struct io_ring_ctx *ctx,
|
||||
|
Loading…
Reference in New Issue
Block a user