rpma: gpspm: introduce the busy_wait_polling toggle

The performance of the librpma_gpspm engine depends heavily
on how much CPU power it can use to its work.
One can want either to take all available CPU power
and see what the maximum possible performance is
or configure it less aggressively and collect the results
when the CPU is not solely dedicated to doing this one task.

The librpma_gpspm engine allows toggling between one and another
by either waiting for incoming requests in the kernel
using rpma_conn_completion_wait() (busy_wait_polling=0)
or trying to collect the completion as soon as it appears
by polling all the time using rpma_conn_completion_get()
(busy_wait_polling=1).

Signed-off-by: Oksana Salyk <oksana.salyk@intel.com>
diff --git a/HOWTO b/HOWTO
index e6078c5..889526d 100644
--- a/HOWTO
+++ b/HOWTO
@@ -2237,6 +2237,11 @@
 	Set to 1 only when Direct Write to PMem from the remote host is possible.
 	Otherwise, set to 0.
 
+.. option:: busy_wait_polling=bool : [librpma_*_server]
+
+	Set to 0 to wait for completion instead of busy-wait polling completion.
+	Default: 1.
+
 .. option:: interface=str : [netsplice] [net]
 
 	The IP address of the network interface used to send or receive UDP
diff --git a/engines/librpma_fio.c b/engines/librpma_fio.c
index 810b55e..3d605ed 100644
--- a/engines/librpma_fio.c
+++ b/engines/librpma_fio.c
@@ -50,6 +50,17 @@
 		.group	= FIO_OPT_G_LIBRPMA,
 	},
 	{
+		.name	= "busy_wait_polling",
+		.lname	= "Set to 0 to wait for completion instead of busy-wait polling completion.",
+		.type	= FIO_OPT_BOOL,
+		.off1	= offsetof(struct librpma_fio_options_values,
+					busy_wait_polling),
+		.help	= "Set to false if you want to reduce CPU usage",
+		.def	= "1",
+		.category = FIO_OPT_C_ENGINE,
+		.group	= FIO_OPT_G_LIBRPMA,
+	},
+	{
 		.name	= NULL,
 	},
 };
diff --git a/engines/librpma_fio.h b/engines/librpma_fio.h
index 8cfb2e2..fb89d99 100644
--- a/engines/librpma_fio.h
+++ b/engines/librpma_fio.h
@@ -41,6 +41,8 @@
 	char *port;
 	/* Direct Write to PMem is possible */
 	unsigned int direct_write_to_pmem;
+	/* Set to 0 to wait for completion instead of busy-wait polling completion. */
+	unsigned int busy_wait_polling;
 };
 
 extern struct fio_option librpma_fio_options[];
diff --git a/engines/librpma_gpspm.c b/engines/librpma_gpspm.c
index ac614f4..7414770 100644
--- a/engines/librpma_gpspm.c
+++ b/engines/librpma_gpspm.c
@@ -683,12 +683,33 @@
 	struct librpma_fio_server_data *csd = td->io_ops_data;
 	struct server_data *sd = csd->server_data;
 	struct rpma_completion *cmpl = &sd->msgs_queued[sd->msg_queued_nr];
+	struct librpma_fio_options_values *o = td->eo;
 	int ret;
 
 	ret = rpma_conn_completion_get(csd->conn, cmpl);
 	if (ret == RPMA_E_NO_COMPLETION) {
-		/* lack of completion is not an error */
-		return 0;
+		if (o->busy_wait_polling == 0) {
+			ret = rpma_conn_completion_wait(csd->conn);
+			if (ret == RPMA_E_NO_COMPLETION) {
+				/* lack of completion is not an error */
+				return 0;
+			} else if (ret != 0) {
+				librpma_td_verror(td, ret, "rpma_conn_completion_wait");
+				goto err_terminate;
+			}
+
+			ret = rpma_conn_completion_get(csd->conn, cmpl);
+			if (ret == RPMA_E_NO_COMPLETION) {
+				/* lack of completion is not an error */
+				return 0;
+			} else if (ret != 0) {
+				librpma_td_verror(td, ret, "rpma_conn_completion_get");
+				goto err_terminate;
+			}
+		} else {
+			/* lack of completion is not an error */
+			return 0;
+		}
 	} else if (ret != 0) {
 		librpma_td_verror(td, ret, "rpma_conn_completion_get");
 		goto err_terminate;
diff --git a/examples/librpma_gpspm-server.fio b/examples/librpma_gpspm-server.fio
index d618f2d..67e92a2 100644
--- a/examples/librpma_gpspm-server.fio
+++ b/examples/librpma_gpspm-server.fio
@@ -20,6 +20,8 @@
 # set to 1 (true) ONLY when Direct Write to PMem from the remote host is possible
 # (https://pmem.io/rpma/documentation/basic-direct-write-to-pmem.html)
 direct_write_to_pmem=0
+# set to 0 (false) to wait for completion instead of busy-wait polling completion.
+busy_wait_polling=1
 numjobs=1 # number of expected incomming connections
 iodepth=2 # number of parallel GPSPM requests
 size=100MiB # size of workspace for a single connection
diff --git a/fio.1 b/fio.1
index 18dc156..c391616 100644
--- a/fio.1
+++ b/fio.1
@@ -1999,6 +1999,10 @@
 .BI (librpma_*_server)direct_write_to_pmem \fR=\fPbool
 Set to 1 only when Direct Write to PMem from the remote host is possible. Otherwise, set to 0.
 .TP
+.BI (librpma_*_server)busy_wait_polling \fR=\fPbool
+Set to 0 to wait for completion instead of busy-wait polling completion.
+Default: 1.
+.TP
 .BI (netsplice,net)interface \fR=\fPstr
 The IP address of the network interface used to send or receive UDP
 multicast.