diff --git a/fs/io_uring.c b/fs/io_uring.c index fdc18321d70c..310f8d17c53e 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -231,7 +231,6 @@ struct io_ring_ctx { struct task_struct *sqo_thread; /* if using sq thread polling */ struct mm_struct *sqo_mm; wait_queue_head_t sqo_wait; - unsigned sqo_stop; struct { /* CQ ring */ @@ -329,9 +328,8 @@ struct io_kiocb { #define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */ #define REQ_F_FIXED_FILE 4 /* ctx owns file */ #define REQ_F_SEQ_PREV 8 /* sequential with previous */ -#define REQ_F_PREPPED 16 /* prep already done */ -#define REQ_F_IO_DRAIN 32 /* drain existing IO first */ -#define REQ_F_IO_DRAINED 64 /* drain done */ +#define REQ_F_IO_DRAIN 16 /* drain existing IO first */ +#define REQ_F_IO_DRAINED 32 /* drain done */ u64 user_data; u32 error; /* iopoll result from callback */ u32 sequence; @@ -490,7 +488,7 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx) } static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data, - long res, unsigned ev_flags) + long res) { struct io_uring_cqe *cqe; @@ -503,7 +501,7 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data, if (cqe) { WRITE_ONCE(cqe->user_data, ki_user_data); WRITE_ONCE(cqe->res, res); - WRITE_ONCE(cqe->flags, ev_flags); + WRITE_ONCE(cqe->flags, 0); } else { unsigned overflow = READ_ONCE(ctx->cq_ring->overflow); @@ -522,12 +520,12 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) } static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data, - long res, unsigned ev_flags) + long res) { unsigned long flags; spin_lock_irqsave(&ctx->completion_lock, flags); - io_cqring_fill_event(ctx, user_data, res, ev_flags); + io_cqring_fill_event(ctx, user_data, res); io_commit_cqring(ctx); spin_unlock_irqrestore(&ctx->completion_lock, flags); @@ -629,7 +627,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, req = list_first_entry(done, struct io_kiocb, list); list_del(&req->list); - io_cqring_fill_event(ctx, req->user_data, req->error, 0); + io_cqring_fill_event(ctx, req->user_data, req->error); (*nr_events)++; if (refcount_dec_and_test(&req->refs)) { @@ -777,7 +775,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2) kiocb_end_write(kiocb); - io_cqring_add_event(req->ctx, req->user_data, res, 0); + io_cqring_add_event(req->ctx, req->user_data, res); io_put_req(req); } @@ -896,9 +894,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, if (!req->file) return -EBADF; - /* For -EAGAIN retry, everything is already prepped */ - if (req->flags & REQ_F_PREPPED) - return 0; if (force_nonblock && !io_file_supports_async(req->file)) force_nonblock = false; @@ -941,7 +936,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, return -EINVAL; kiocb->ki_complete = io_complete_rw; } - req->flags |= REQ_F_PREPPED; return 0; } @@ -1216,7 +1210,7 @@ static int io_nop(struct io_kiocb *req, u64 user_data) if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; - io_cqring_add_event(ctx, user_data, err, 0); + io_cqring_add_event(ctx, user_data, err); io_put_req(req); return 0; } @@ -1227,16 +1221,12 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (!req->file) return -EBADF; - /* Prep already done (EAGAIN retry) */ - if (req->flags & REQ_F_PREPPED) - return 0; if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index)) return -EINVAL; - req->flags |= REQ_F_PREPPED; return 0; } @@ -1265,7 +1255,7 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe, end > 0 ? end : LLONG_MAX, fsync_flags & IORING_FSYNC_DATASYNC); - io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); + io_cqring_add_event(req->ctx, sqe->user_data, ret); io_put_req(req); return 0; } @@ -1277,16 +1267,12 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (!req->file) return -EBADF; - /* Prep already done (EAGAIN retry) */ - if (req->flags & REQ_F_PREPPED) - return 0; if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index)) return -EINVAL; - req->flags |= REQ_F_PREPPED; return ret; } @@ -1313,7 +1299,7 @@ static int io_sync_file_range(struct io_kiocb *req, ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags); - io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); + io_cqring_add_event(req->ctx, sqe->user_data, ret); io_put_req(req); return 0; } @@ -1371,7 +1357,7 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe) } spin_unlock_irq(&ctx->completion_lock); - io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); + io_cqring_add_event(req->ctx, sqe->user_data, ret); io_put_req(req); return 0; } @@ -1380,7 +1366,7 @@ static void io_poll_complete(struct io_ring_ctx *ctx, struct io_kiocb *req, __poll_t mask) { req->poll.done = true; - io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask), 0); + io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask)); io_commit_cqring(ctx); } @@ -1700,7 +1686,7 @@ restart: io_put_req(req); if (ret) { - io_cqring_add_event(ctx, sqe->user_data, ret, 0); + io_cqring_add_event(ctx, sqe->user_data, ret); io_put_req(req); } @@ -2005,7 +1991,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, continue; } - io_cqring_add_event(ctx, sqes[i].sqe->user_data, ret, 0); + io_cqring_add_event(ctx, sqes[i].sqe->user_data, ret); } if (statep) @@ -2028,7 +2014,7 @@ static int io_sq_thread(void *data) set_fs(USER_DS); timeout = inflight = 0; - while (!kthread_should_stop() && !ctx->sqo_stop) { + while (!kthread_should_park()) { bool all_fixed, mm_fault = false; int i; @@ -2090,7 +2076,7 @@ static int io_sq_thread(void *data) smp_mb(); if (!io_get_sqring(ctx, &sqes[0])) { - if (kthread_should_stop()) { + if (kthread_should_park()) { finish_wait(&ctx->sqo_wait, &wait); break; } @@ -2140,8 +2126,7 @@ static int io_sq_thread(void *data) mmput(cur_mm); } - if (kthread_should_park()) - kthread_parkme(); + kthread_parkme(); return 0; } @@ -2170,7 +2155,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) ret = io_submit_sqe(ctx, &s, statep); if (ret) - io_cqring_add_event(ctx, s.sqe->user_data, ret, 0); + io_cqring_add_event(ctx, s.sqe->user_data, ret); } io_commit_sqring(ctx); @@ -2182,6 +2167,8 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) static unsigned io_cqring_events(struct io_cq_ring *ring) { + /* See comment at the top of this file */ + smp_rmb(); return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head); } @@ -2194,11 +2181,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, { struct io_cq_ring *ring = ctx->cq_ring; sigset_t ksigmask, sigsaved; - DEFINE_WAIT(wait); int ret; - /* See comment at the top of this file */ - smp_rmb(); if (io_cqring_events(ring) >= min_events) return 0; @@ -2216,23 +2200,9 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, return ret; } - do { - prepare_to_wait(&ctx->wait, &wait, TASK_INTERRUPTIBLE); - - ret = 0; - /* See comment at the top of this file */ - smp_rmb(); - if (io_cqring_events(ring) >= min_events) - break; - - schedule(); - + ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events); + if (ret == -ERESTARTSYS) ret = -EINTR; - if (signal_pending(current)) - break; - } while (1); - - finish_wait(&ctx->wait, &wait); if (sig) restore_user_sigmask(sig, &sigsaved); @@ -2273,8 +2243,11 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) static void io_sq_thread_stop(struct io_ring_ctx *ctx) { if (ctx->sqo_thread) { - ctx->sqo_stop = 1; - mb(); + /* + * The park is a bit of a work-around, without it we get + * warning spews on shutdown with SQPOLL set and affinity + * set to a single CPU. + */ kthread_park(ctx->sqo_thread); kthread_stop(ctx->sqo_thread); ctx->sqo_thread = NULL; @@ -2467,10 +2440,11 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, ctx->sq_thread_idle = HZ; if (p->flags & IORING_SETUP_SQ_AFF) { - int cpu = array_index_nospec(p->sq_thread_cpu, - nr_cpu_ids); + int cpu = p->sq_thread_cpu; ret = -EINVAL; + if (cpu >= nr_cpu_ids) + goto err; if (!cpu_online(cpu)) goto err;