Merge tag 'for-linus-20190516' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:
 "A small set of fixes for io_uring.

  This contains:

   - smp_rmb() cleanup for io_cqring_events() (Jackie)

   - io_cqring_wait() simplification (Jackie)

   - removal of dead 'ev_flags' passing (me)

   - SQ poll CPU affinity verification fix (me)

   - SQ poll wait fix (Roman)

   - SQE command prep cleanup and fix (Stefan)"

* tag 'for-linus-20190516' of git://git.kernel.dk/linux-block:
  io_uring: use wait_event_interruptible for cq_wait conditional wait
  io_uring: adjust smp_rmb inside io_cqring_events
  io_uring: fix infinite wait in khread_park() on io_finish_async()
  io_uring: remove 'ev_flags' argument
  io_uring: fix failure to verify SQ_AFF cpu
  io_uring: fix race condition reading SQE data
diff --git a/fs/io_uring.c b/fs/io_uring.c
index fdc1832..310f8d1 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -231,7 +231,6 @@
 	struct task_struct	*sqo_thread;	/* if using sq thread polling */
 	struct mm_struct	*sqo_mm;
 	wait_queue_head_t	sqo_wait;
-	unsigned		sqo_stop;
 
 	struct {
 		/* CQ ring */
@@ -329,9 +328,8 @@
 #define REQ_F_IOPOLL_COMPLETED	2	/* polled IO has completed */
 #define REQ_F_FIXED_FILE	4	/* ctx owns file */
 #define REQ_F_SEQ_PREV		8	/* sequential with previous */
-#define REQ_F_PREPPED		16	/* prep already done */
-#define REQ_F_IO_DRAIN		32	/* drain existing IO first */
-#define REQ_F_IO_DRAINED	64	/* drain done */
+#define REQ_F_IO_DRAIN		16	/* drain existing IO first */
+#define REQ_F_IO_DRAINED	32	/* drain done */
 	u64			user_data;
 	u32			error;	/* iopoll result from callback */
 	u32			sequence;
@@ -490,7 +488,7 @@
 }
 
 static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
-				 long res, unsigned ev_flags)
+				 long res)
 {
 	struct io_uring_cqe *cqe;
 
@@ -503,7 +501,7 @@
 	if (cqe) {
 		WRITE_ONCE(cqe->user_data, ki_user_data);
 		WRITE_ONCE(cqe->res, res);
-		WRITE_ONCE(cqe->flags, ev_flags);
+		WRITE_ONCE(cqe->flags, 0);
 	} else {
 		unsigned overflow = READ_ONCE(ctx->cq_ring->overflow);
 
@@ -522,12 +520,12 @@
 }
 
 static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data,
-				long res, unsigned ev_flags)
+				long res)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&ctx->completion_lock, flags);
-	io_cqring_fill_event(ctx, user_data, res, ev_flags);
+	io_cqring_fill_event(ctx, user_data, res);
 	io_commit_cqring(ctx);
 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
 
@@ -629,7 +627,7 @@
 		req = list_first_entry(done, struct io_kiocb, list);
 		list_del(&req->list);
 
-		io_cqring_fill_event(ctx, req->user_data, req->error, 0);
+		io_cqring_fill_event(ctx, req->user_data, req->error);
 		(*nr_events)++;
 
 		if (refcount_dec_and_test(&req->refs)) {
@@ -777,7 +775,7 @@
 
 	kiocb_end_write(kiocb);
 
-	io_cqring_add_event(req->ctx, req->user_data, res, 0);
+	io_cqring_add_event(req->ctx, req->user_data, res);
 	io_put_req(req);
 }
 
@@ -896,9 +894,6 @@
 
 	if (!req->file)
 		return -EBADF;
-	/* For -EAGAIN retry, everything is already prepped */
-	if (req->flags & REQ_F_PREPPED)
-		return 0;
 
 	if (force_nonblock && !io_file_supports_async(req->file))
 		force_nonblock = false;
@@ -941,7 +936,6 @@
 			return -EINVAL;
 		kiocb->ki_complete = io_complete_rw;
 	}
-	req->flags |= REQ_F_PREPPED;
 	return 0;
 }
 
@@ -1216,7 +1210,7 @@
 	if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
 
-	io_cqring_add_event(ctx, user_data, err, 0);
+	io_cqring_add_event(ctx, user_data, err);
 	io_put_req(req);
 	return 0;
 }
@@ -1227,16 +1221,12 @@
 
 	if (!req->file)
 		return -EBADF;
-	/* Prep already done (EAGAIN retry) */
-	if (req->flags & REQ_F_PREPPED)
-		return 0;
 
 	if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
 	if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
 		return -EINVAL;
 
-	req->flags |= REQ_F_PREPPED;
 	return 0;
 }
 
@@ -1265,7 +1255,7 @@
 				end > 0 ? end : LLONG_MAX,
 				fsync_flags & IORING_FSYNC_DATASYNC);
 
-	io_cqring_add_event(req->ctx, sqe->user_data, ret, 0);
+	io_cqring_add_event(req->ctx, sqe->user_data, ret);
 	io_put_req(req);
 	return 0;
 }
@@ -1277,16 +1267,12 @@
 
 	if (!req->file)
 		return -EBADF;
-	/* Prep already done (EAGAIN retry) */
-	if (req->flags & REQ_F_PREPPED)
-		return 0;
 
 	if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
 	if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
 		return -EINVAL;
 
-	req->flags |= REQ_F_PREPPED;
 	return ret;
 }
 
@@ -1313,7 +1299,7 @@
 
 	ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags);
 
-	io_cqring_add_event(req->ctx, sqe->user_data, ret, 0);
+	io_cqring_add_event(req->ctx, sqe->user_data, ret);
 	io_put_req(req);
 	return 0;
 }
@@ -1371,7 +1357,7 @@
 	}
 	spin_unlock_irq(&ctx->completion_lock);
 
-	io_cqring_add_event(req->ctx, sqe->user_data, ret, 0);
+	io_cqring_add_event(req->ctx, sqe->user_data, ret);
 	io_put_req(req);
 	return 0;
 }
@@ -1380,7 +1366,7 @@
 			     __poll_t mask)
 {
 	req->poll.done = true;
-	io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask), 0);
+	io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask));
 	io_commit_cqring(ctx);
 }
 
@@ -1700,7 +1686,7 @@
 		io_put_req(req);
 
 		if (ret) {
-			io_cqring_add_event(ctx, sqe->user_data, ret, 0);
+			io_cqring_add_event(ctx, sqe->user_data, ret);
 			io_put_req(req);
 		}
 
@@ -2005,7 +1991,7 @@
 			continue;
 		}
 
-		io_cqring_add_event(ctx, sqes[i].sqe->user_data, ret, 0);
+		io_cqring_add_event(ctx, sqes[i].sqe->user_data, ret);
 	}
 
 	if (statep)
@@ -2028,7 +2014,7 @@
 	set_fs(USER_DS);
 
 	timeout = inflight = 0;
-	while (!kthread_should_stop() && !ctx->sqo_stop) {
+	while (!kthread_should_park()) {
 		bool all_fixed, mm_fault = false;
 		int i;
 
@@ -2090,7 +2076,7 @@
 			smp_mb();
 
 			if (!io_get_sqring(ctx, &sqes[0])) {
-				if (kthread_should_stop()) {
+				if (kthread_should_park()) {
 					finish_wait(&ctx->sqo_wait, &wait);
 					break;
 				}
@@ -2140,8 +2126,7 @@
 		mmput(cur_mm);
 	}
 
-	if (kthread_should_park())
-		kthread_parkme();
+	kthread_parkme();
 
 	return 0;
 }
@@ -2170,7 +2155,7 @@
 
 		ret = io_submit_sqe(ctx, &s, statep);
 		if (ret)
-			io_cqring_add_event(ctx, s.sqe->user_data, ret, 0);
+			io_cqring_add_event(ctx, s.sqe->user_data, ret);
 	}
 	io_commit_sqring(ctx);
 
@@ -2182,6 +2167,8 @@
 
 static unsigned io_cqring_events(struct io_cq_ring *ring)
 {
+	/* See comment at the top of this file */
+	smp_rmb();
 	return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head);
 }
 
@@ -2194,11 +2181,8 @@
 {
 	struct io_cq_ring *ring = ctx->cq_ring;
 	sigset_t ksigmask, sigsaved;
-	DEFINE_WAIT(wait);
 	int ret;
 
-	/* See comment at the top of this file */
-	smp_rmb();
 	if (io_cqring_events(ring) >= min_events)
 		return 0;
 
@@ -2216,23 +2200,9 @@
 			return ret;
 	}
 
-	do {
-		prepare_to_wait(&ctx->wait, &wait, TASK_INTERRUPTIBLE);
-
-		ret = 0;
-		/* See comment at the top of this file */
-		smp_rmb();
-		if (io_cqring_events(ring) >= min_events)
-			break;
-
-		schedule();
-
+	ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events);
+	if (ret == -ERESTARTSYS)
 		ret = -EINTR;
-		if (signal_pending(current))
-			break;
-	} while (1);
-
-	finish_wait(&ctx->wait, &wait);
 
 	if (sig)
 		restore_user_sigmask(sig, &sigsaved);
@@ -2273,8 +2243,11 @@
 static void io_sq_thread_stop(struct io_ring_ctx *ctx)
 {
 	if (ctx->sqo_thread) {
-		ctx->sqo_stop = 1;
-		mb();
+		/*
+		 * The park is a bit of a work-around, without it we get
+		 * warning spews on shutdown with SQPOLL set and affinity
+		 * set to a single CPU.
+		 */
 		kthread_park(ctx->sqo_thread);
 		kthread_stop(ctx->sqo_thread);
 		ctx->sqo_thread = NULL;
@@ -2467,10 +2440,11 @@
 			ctx->sq_thread_idle = HZ;
 
 		if (p->flags & IORING_SETUP_SQ_AFF) {
-			int cpu = array_index_nospec(p->sq_thread_cpu,
-							nr_cpu_ids);
+			int cpu = p->sq_thread_cpu;
 
 			ret = -EINVAL;
+			if (cpu >= nr_cpu_ids)
+				goto err;
 			if (!cpu_online(cpu))
 				goto err;