blob: 42bc143efd8b8288878a865489d319b36ecb317f [file] [log] [blame]
/*
* Software iWARP device driver for Linux
*
* Authors: Bernard Metzler <bmt@zurich.ibm.com>
* Fredy Neeser <nfd@zurich.ibm.com>
*
* Copyright (c) 2008-2010, IBM Corporation
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of IBM nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/net.h>
#include <linux/file.h>
#include <linux/scatterlist.h>
#include <linux/highmem.h>
#include <net/sock.h>
#include <net/tcp_states.h>
#include <net/tcp.h>
#include <rdma/iw_cm.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_umem.h>
#include "siw.h"
#include "siw_obj.h"
#include "siw_cm.h"
char siw_qp_state_to_string[SIW_QP_STATE_COUNT][sizeof "TERMINATE"] = {
[SIW_QP_STATE_IDLE] = "IDLE",
[SIW_QP_STATE_RTR] = "RTR",
[SIW_QP_STATE_RTS] = "RTS",
[SIW_QP_STATE_CLOSING] = "CLOSING",
[SIW_QP_STATE_TERMINATE] = "TERMINATE",
[SIW_QP_STATE_ERROR] = "ERROR",
[SIW_QP_STATE_MORIBUND] = "MORIBUND",
[SIW_QP_STATE_UNDEF] = "UNDEF"
};
/*
* iWARP (RDMAP, DDP and MPA) parameters as well as Softiwarp settings on a
* per-RDMAP message basis. Please keep order of initializer. All MPA len
* is initialized to minimum packet size.
*/
struct iwarp_msg_info iwarp_pktinfo[RDMAP_TERMINATE + 1] =
{ {
.hdr_len = sizeof(struct iwarp_rdma_write),
.ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_write) - 2),
.ctrl.dv = DDP_VERSION,
.ctrl.opcode = RDMAP_RDMA_WRITE,
.ctrl.rv = RDMAP_VERSION,
.ctrl.t = 1,
.ctrl.l = 1,
.proc_data = siw_proc_write
},
{
.hdr_len = sizeof(struct iwarp_rdma_rreq),
.ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rreq) - 2),
.ctrl.dv = DDP_VERSION,
.ctrl.opcode = RDMAP_RDMA_READ_REQ,
.ctrl.rv = RDMAP_VERSION,
.ctrl.t = 0,
.ctrl.l = 1,
.proc_data = siw_proc_rreq
},
{
.hdr_len = sizeof(struct iwarp_rdma_rresp),
.ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rresp) - 2),
.ctrl.dv = DDP_VERSION,
.ctrl.opcode = RDMAP_RDMA_READ_RESP,
.ctrl.rv = RDMAP_VERSION,
.ctrl.t = 1,
.ctrl.l = 1,
.proc_data = siw_proc_rresp
},
{
.hdr_len = sizeof(struct iwarp_send),
.ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
.ctrl.dv = DDP_VERSION,
.ctrl.opcode = RDMAP_SEND,
.ctrl.rv = RDMAP_VERSION,
.ctrl.t = 0,
.ctrl.l = 1,
.proc_data = siw_proc_send
},
{
.hdr_len = sizeof(struct iwarp_send_inv),
.ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
.ctrl.dv = DDP_VERSION,
.ctrl.opcode = RDMAP_SEND_INVAL,
.ctrl.rv = RDMAP_VERSION,
.ctrl.t = 0,
.ctrl.l = 1,
.proc_data = siw_proc_unsupp
},
{
.hdr_len = sizeof(struct iwarp_send),
.ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
.ctrl.dv = DDP_VERSION,
.ctrl.opcode = RDMAP_SEND_SE,
.ctrl.rv = RDMAP_VERSION,
.ctrl.t = 0,
.ctrl.l = 1,
.proc_data = siw_proc_send
},
{
.hdr_len = sizeof(struct iwarp_send_inv),
.ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
.ctrl.dv = DDP_VERSION,
.ctrl.opcode = RDMAP_SEND_SE_INVAL,
.ctrl.rv = RDMAP_VERSION,
.ctrl.t = 0,
.ctrl.l = 1,
.proc_data = siw_proc_unsupp
},
{
.hdr_len = sizeof(struct iwarp_terminate),
.ctrl.mpa_len = htons(sizeof(struct iwarp_terminate) - 2),
.ctrl.dv = DDP_VERSION,
.ctrl.opcode = RDMAP_TERMINATE,
.ctrl.rv = RDMAP_VERSION,
.ctrl.t = 0,
.ctrl.l = 1,
.proc_data = siw_proc_terminate
} };
static void siw_qp_llp_data_ready(struct sock *sk, int flags)
{
struct siw_qp *qp;
read_lock(&sk->sk_callback_lock);
if (unlikely(!sk->sk_user_data || !sk_to_qp(sk))) {
dprint(DBG_ON, " No QP: %p\n", sk->sk_user_data);
goto done;
}
qp = sk_to_qp(sk);
if (down_read_trylock(&qp->state_lock)) {
read_descriptor_t rd_desc = {.arg.data = qp, .count = 1};
dprint(DBG_SK|DBG_RX, "(QP%d): "
"state (before tcp_read_sock)=%d, flags=%x\n",
QP_ID(qp), qp->attrs.state, flags);
if (likely(qp->attrs.state == SIW_QP_STATE_RTS))
/*
* Implements data receive operation during
* socket callback. TCP gracefully catches
* the case where there is nothing to receive
* (not calling siw_tcp_rx_data() then).
*/
tcp_read_sock(sk, &rd_desc, siw_tcp_rx_data);
dprint(DBG_SK|DBG_RX, "(QP%d): "
"state (after tcp_read_sock)=%d, flags=%x\n",
QP_ID(qp), qp->attrs.state, flags);
up_read(&qp->state_lock);
} else {
dprint(DBG_SK|DBG_RX, "(QP%d): "
"Unable to acquire state_lock\n", QP_ID(qp));
}
done:
read_unlock(&sk->sk_callback_lock);
}
void siw_qp_llp_close(struct siw_qp *qp)
{
dprint(DBG_CM, "(QP%d): Enter: SIW QP state = %s, cep=0x%p\n",
QP_ID(qp), siw_qp_state_to_string[qp->attrs.state],
qp->cep);
down_write(&qp->state_lock);
qp->rx_ctx.rx_suspend = 1;
qp->tx_ctx.tx_suspend = 1;
qp->attrs.llp_stream_handle = NULL;
switch (qp->attrs.state) {
case SIW_QP_STATE_RTS:
case SIW_QP_STATE_RTR:
case SIW_QP_STATE_IDLE:
case SIW_QP_STATE_TERMINATE:
qp->attrs.state = SIW_QP_STATE_ERROR;
break;
/*
* SIW_QP_STATE_CLOSING:
*
* This is a forced close. shall the QP be moved to
* ERROR or IDLE ?
*/
case SIW_QP_STATE_CLOSING:
if (!TX_IDLE(qp))
qp->attrs.state = SIW_QP_STATE_ERROR;
else
qp->attrs.state = SIW_QP_STATE_IDLE;
break;
default:
dprint(DBG_CM, " No state transition needed: %d\n",
qp->attrs.state);
break;
}
siw_sq_flush(qp);
siw_rq_flush(qp);
up_write(&qp->state_lock);
dprint(DBG_CM, "(QP%d): Exit: SIW QP state = %s\n",
QP_ID(qp), siw_qp_state_to_string[qp->attrs.state]);
}
/*
* socket callback routine informing about newly available send space.
* Function schedules SQ work for processing SQ items.
*/
static void siw_qp_llp_write_space(struct sock *sk)
{
struct siw_qp *qp = sk_to_qp(sk);
/*
* TODO:
* Resemble sk_stream_write_space() logic for iWARP constraints:
* Clear SOCK_NOSPACE only if sendspace may hold some reasonable
* sized FPDU.
*/
#ifdef SIW_TX_FULLSEGS
struct socket *sock = sk->sk_socket;
if (sk_stream_wspace(sk) >= (int)qp->tx_ctx.fpdu_len && sock) {
clear_bit(SOCK_NOSPACE, &sock->flags);
siw_sq_queue_work(qp);
}
#else
sk_stream_write_space(sk);
if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
siw_sq_queue_work(qp);
#endif
}
static void siw_qp_socket_assoc(struct socket *s, struct siw_qp *qp)
{
struct sock *sk = s->sk;
write_lock_bh(&sk->sk_callback_lock);
qp->attrs.llp_stream_handle = s;
s->sk->sk_data_ready = siw_qp_llp_data_ready;
s->sk->sk_write_space = siw_qp_llp_write_space;
write_unlock_bh(&sk->sk_callback_lock);
}
static int siw_qp_irq_init(struct siw_qp *qp, int i)
{
struct siw_wqe *wqe;
dprint(DBG_CM|DBG_WR, "(QP%d): irq size: %d\n", QP_ID(qp), i);
INIT_LIST_HEAD(&qp->wqe_freelist);
/*
* Give the IRD one extra entry since after sending
* the RResponse it may trigger another peer RRequest
* before the RResponse goes back to freelist.
*/
i++;
while (i--) {
wqe = kzalloc(sizeof(struct siw_wqe), GFP_KERNEL);
if (!wqe) {
siw_qp_freeq_flush(qp);
return -ENOMEM;
}
INIT_LIST_HEAD(&wqe->list);
wr_type(wqe) = SIW_WR_RDMA_READ_RESP;
list_add(&wqe->list, &qp->wqe_freelist);
}
return 0;
}
static void siw_send_terminate(struct siw_qp *qp)
{
struct iwarp_terminate pkt;
memset(&pkt, 0, sizeof pkt);
/*
* TODO: send TERMINATE
*/
dprint(DBG_CM, "(QP%d): Todo\n", QP_ID(qp));
}
/*
* caller holds qp->state_lock
*/
int
siw_qp_modify(struct siw_qp *qp, struct siw_qp_attrs *attrs,
enum siw_qp_attr_mask mask)
{
int drop_conn, rv;
if (!mask)
return 0;
dprint(DBG_CM, "(QP%d)\n", QP_ID(qp));
if (mask != SIW_QP_ATTR_STATE) {
/*
* changes of qp attributes (maybe state, too)
*/
if (mask & SIW_QP_ATTR_ACCESS_FLAGS) {
if (attrs->flags & SIW_RDMA_BIND_ENABLED)
qp->attrs.flags |= SIW_RDMA_BIND_ENABLED;
else
qp->attrs.flags &= ~SIW_RDMA_BIND_ENABLED;
if (attrs->flags & SIW_RDMA_WRITE_ENABLED)
qp->attrs.flags |= SIW_RDMA_WRITE_ENABLED;
else
qp->attrs.flags &= ~SIW_RDMA_WRITE_ENABLED;
if (attrs->flags & SIW_RDMA_READ_ENABLED)
qp->attrs.flags |= SIW_RDMA_READ_ENABLED;
else
qp->attrs.flags &= ~SIW_RDMA_WRITE_ENABLED;
}
/*
* TODO: what else ??
*/
}
if (!(mask & SIW_QP_ATTR_STATE))
return 0;
dprint(DBG_CM, "(QP%d): SIW QP state: %s => %s\n", QP_ID(qp),
siw_qp_state_to_string[qp->attrs.state],
siw_qp_state_to_string[attrs->state]);
drop_conn = 0;
switch (qp->attrs.state) {
case SIW_QP_STATE_IDLE:
case SIW_QP_STATE_RTR:
switch (attrs->state) {
case SIW_QP_STATE_RTS:
if (!(mask & SIW_QP_ATTR_LLP_HANDLE)) {
dprint(DBG_ON, "(QP%d): socket?\n", QP_ID(qp));
return -EINVAL;
}
if (!(mask & SIW_QP_ATTR_MPA)) {
dprint(DBG_ON, "(QP%d): MPA?\n", QP_ID(qp));
return -EINVAL;
}
dprint(DBG_CM, "(QP%d): Enter RTS: "
"peer 0x%08x, local 0x%08x\n", QP_ID(qp),
qp->cep->llp.raddr.sin_addr.s_addr,
qp->cep->llp.laddr.sin_addr.s_addr);
/*
* Initialize global iWARP TX state
*/
qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 0;
qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 0;
qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 0;
/*
* Initialize global iWARP RX state
*/
qp->rx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 1;
qp->rx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 1;
qp->rx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 1;
/*
* init IRD freequeue, caller has already checked
* limits
*/
rv = siw_qp_irq_init(qp, attrs->ird);
if (rv)
return rv;
atomic_set(&qp->orq_space, attrs->ord);
qp->attrs.ord = attrs->ord;
qp->attrs.ird = attrs->ird;
qp->attrs.mpa = attrs->mpa;
/*
* move socket rx and tx under qp's control
*/
siw_qp_socket_assoc(attrs->llp_stream_handle, qp);
qp->attrs.state = SIW_QP_STATE_RTS;
/*
* set initial mss
*/
qp->tx_ctx.tcp_seglen =
get_tcp_mss(attrs->llp_stream_handle->sk);
break;
case SIW_QP_STATE_ERROR:
siw_rq_flush(qp);
qp->attrs.state = SIW_QP_STATE_ERROR;
drop_conn = 1;
break;
case SIW_QP_STATE_RTR:
/* ignore */
break;
default:
dprint(DBG_CM,
" QP state transition undefined: %s => %s\n",
siw_qp_state_to_string[qp->attrs.state],
siw_qp_state_to_string[attrs->state]);
break;
}
break;
case SIW_QP_STATE_RTS:
switch (attrs->state) {
case SIW_QP_STATE_CLOSING:
/*
* Verbs: move to IDLE if SQ and ORQ are empty.
* Move to ERROR otherwise. But first of all we must
* close the connection. So we keep CLOSING or ERROR
* as a transient state, schedule connection drop work
* and wait for the socket state change upcall to
* come back closed.
*/
if (TX_IDLE(qp))
qp->attrs.state = SIW_QP_STATE_CLOSING;
else {
qp->attrs.state = SIW_QP_STATE_ERROR;
siw_sq_flush(qp);
}
siw_rq_flush(qp);
drop_conn = 1;
break;
case SIW_QP_STATE_TERMINATE:
qp->attrs.state = SIW_QP_STATE_TERMINATE;
siw_send_terminate(qp);
drop_conn = 1;
break;
case SIW_QP_STATE_ERROR:
/*
* This is an emergency close.
*
* Any in progress transmit operation will get
* cancelled.
* This will likely result in a protocol failure,
* if a TX operation is in transit. The caller
* could unconditional wait to give the current
* operation a chance to complete.
* Esp., how to handle the non-empty IRQ case?
* The peer was asking for data transfer at a valid
* point in time.
*/
siw_sq_flush(qp);
siw_rq_flush(qp);
qp->attrs.state = SIW_QP_STATE_ERROR;
drop_conn = 1;
break;
default:
dprint(DBG_ON,
" QP state transition undefined: %s => %s\n",
siw_qp_state_to_string[qp->attrs.state],
siw_qp_state_to_string[attrs->state]);
break;
}
break;
case SIW_QP_STATE_TERMINATE:
switch (attrs->state) {
case SIW_QP_STATE_ERROR:
siw_rq_flush(qp);
qp->attrs.state = SIW_QP_STATE_ERROR;
if (!TX_IDLE(qp))
siw_sq_flush(qp);
break;
default:
dprint(DBG_ON,
" QP state transition undefined: %s => %s\n",
siw_qp_state_to_string[qp->attrs.state],
siw_qp_state_to_string[attrs->state]);
}
break;
case SIW_QP_STATE_CLOSING:
switch (attrs->state) {
case SIW_QP_STATE_IDLE:
BUG_ON(!TX_IDLE(qp));
qp->attrs.state = SIW_QP_STATE_IDLE;
break;
case SIW_QP_STATE_CLOSING:
/*
* The LLP may already moved the QP to closing
* due to graceful peer close init
*/
break;
case SIW_QP_STATE_ERROR:
/*
* QP was moved to CLOSING by LLP event
* not yet seen by user.
*/
qp->attrs.state = SIW_QP_STATE_ERROR;
if (!TX_IDLE(qp))
siw_sq_flush(qp);
siw_rq_flush(qp);
break;
default:
dprint(DBG_CM,
" QP state transition undefined: %s => %s\n",
siw_qp_state_to_string[qp->attrs.state],
siw_qp_state_to_string[attrs->state]);
return -ECONNABORTED;
}
break;
default:
dprint(DBG_CM, " NOP: State: %d\n", qp->attrs.state);
break;
}
if (drop_conn)
siw_qp_cm_drop(qp, 0);
return 0;
}
struct ib_qp *siw_get_ofaqp(struct ib_device *dev, int id)
{
struct siw_qp *qp = siw_qp_id2obj(siw_dev_ofa2siw(dev), id);
dprint(DBG_OBJ, ": dev_name: %s, OFA QPID: %d, QP: %p\n",
dev->name, id, qp);
if (qp) {
/*
* siw_qp_id2obj() increments object reference count
*/
siw_qp_put(qp);
dprint(DBG_OBJ, " QPID: %d\n", QP_ID(qp));
return &qp->ofa_qp;
}
return (struct ib_qp *)NULL;
}
/*
* siw_check_mem()
*
* Check protection domain, STAG state, access permissions and
* address range for memory object.
*
* @pd: Protection Domain memory should belong to
* @mem: memory to be checked
* @addr: starting addr of mem
* @perms: requested access permissions
* @len: len of memory interval to be checked
*
*/
int siw_check_mem(struct siw_pd *pd, struct siw_mem *mem, u64 addr,
enum siw_access_flags perms, int len)
{
if (siw_mem2mr(mem)->pd != pd) {
dprint(DBG_WR|DBG_ON, "(PD%d): PD mismatch %p : %p\n",
OBJ_ID(pd),
siw_mem2mr(mem)->pd, pd);
return -EINVAL;
}
if (mem->stag_state == STAG_INVALID) {
dprint(DBG_WR|DBG_ON, "(PD%d): STAG 0x%08x invalid\n",
OBJ_ID(pd), OBJ_ID(mem));
return -EPERM;
}
/*
* check access permissions
*/
if ((mem->perms & perms) < perms) {
dprint(DBG_WR|DBG_ON, "(PD%d): "
"INSUFFICIENT permissions 0x%08x : 0x%08x\n",
OBJ_ID(pd), mem->perms, perms);
return -EPERM;
}
/*
* Check address interval: we relax check to allow memory shrinked
* from the start address _after_ placing or fetching len bytes.
* TODO: this relaxation is probably overdone
*/
if (addr < mem->va || addr + len > mem->va + mem->len) {
dprint(DBG_WR|DBG_ON, "(PD%d): MEM interval len %d "
"[0x%016llx, 0x%016llx) out of bounds "
"[0x%016llx, 0x%016llx) for LKey=0x%08x\n",
OBJ_ID(pd), len, (unsigned long long)addr,
(unsigned long long)(addr + len),
(unsigned long long)mem->va,
(unsigned long long)(mem->va + mem->len),
OBJ_ID(mem));
return -EINVAL;
}
return 0;
}
/*
* siw_check_sge()
*
* Check SGE for access rights in given interval
*
* @pd: Protection Domain memory should belong to
* @sge: SGE to be checked
* @perms: requested access permissions
* @off: starting offset in SGE
* @len: len of memory interval to be checked
*
* NOTE: Function references each SGE's memory object (sge->mem)
* if not yet done. New reference is kept if check went ok and
* released if check failed. If sge->mem is already valid, no new
* lookup is being done and mem is not released it check fails.
*/
int
siw_check_sge(struct siw_pd *pd, struct siw_sge *sge,
enum siw_access_flags perms, u32 off, int len)
{
struct siw_dev *dev = pd->hdr.dev;
struct siw_mem *mem;
int new_ref = 0, rv = 0;
if (len + off > sge->len) {
rv = -EPERM;
goto fail;
}
if (sge->mem.obj == NULL) {
mem = siw_mem_id2obj(dev, sge->lkey >> 8);
if (!mem) {
rv = -EINVAL;
goto fail;
}
sge->mem.obj = mem;
new_ref = 1;
} else {
mem = sge->mem.obj;
new_ref = 0;
}
rv = siw_check_mem(pd, mem, sge->addr + off, perms, len);
if (rv)
goto fail;
return 0;
fail:
if (new_ref) {
siw_mem_put(mem);
sge->mem.obj = NULL;
}
return rv;
}
/*
* siw_check_sgl()
*
* Check permissions for a list of SGE's (SGL)
*
* @pd: Protection Domain SGL should belong to
* @sge: List of SGE to be checked
* @perms: requested access permissions
* @off: starting offset in SGL
* @len: len of memory interval to be checked
*
* Function checks only subinterval of SGL described by bytelen @len,
* check starts with byte offset @off which must be within
* the length of the first SGE.
*
* The caller is responsible for keeping @len + @off within
* the total byte len of the SGL.
*/
int siw_check_sgl(struct siw_pd *pd, struct siw_sge *sge,
enum siw_access_flags perms, u32 off, int len)
{
int rv = 0;
dprint(DBG_WR, "(PD%d): Enter\n", OBJ_ID(pd));
BUG_ON(off >= sge->len);
while (len > 0) {
dprint(DBG_WR, "(PD%d): sge=%p, perms=0x%x, "
"len=%d, off=%u, sge->len=%d\n",
OBJ_ID(pd), sge, perms, len, off, sge->len);
/*
* rdma verbs: do not check stag for a zero length sge
*/
if (sge->len == 0) {
sge++;
continue;
}
rv = siw_check_sge(pd, sge, perms, off, sge->len - off);
if (rv)
break;
len -= sge->len - off;
off = 0;
sge++;
}
return rv;
}
int siw_crc_array(struct hash_desc *desc, u8 *start, size_t len)
{
struct scatterlist sg;
sg_init_one(&sg, start, len);
return crypto_hash_update(desc, &sg, len);
}
int siw_crc_sg(struct hash_desc *desc, struct scatterlist *sg,
int off, int len)
{
int rv;
if (off == 0)
rv = crypto_hash_update(desc, sg, len);
else {
struct scatterlist t_sg;
sg_init_table(&t_sg, 1);
sg_set_page(&t_sg, sg_page(sg), len, off);
rv = crypto_hash_update(desc, &t_sg, len);
}
return rv;
}
/*
* siw_qp_freeq_flush()
*
* Flush any WQE on the QP's free list
*/
void siw_qp_freeq_flush(struct siw_qp *qp)
{
struct list_head *pos, *n;
struct siw_wqe *wqe;
dprint(DBG_OBJ|DBG_CM|DBG_WR, "(QP%d): Enter\n", QP_ID(qp));
if (list_empty(&qp->wqe_freelist))
return;
list_for_each_safe(pos, n, &qp->wqe_freelist) {
wqe = list_entry_wqe(pos);
list_del(&wqe->list);
kfree(wqe);
}
}
/*
* siw_sq_flush()
*
* Flush SQ and ORRQ entries to CQ.
* IRRQ entries are silently dropped.
*
* TODO: Add termination code for in-progress WQE.
* TODO: an in-progress WQE may have been partially
* processed. It should be enforced, that transmission
* of a started DDP segment must be completed if possible
* by any chance.
*
* Must be called with qp state write lock held.
* Therefore, SQ and ORQ lock must not be taken.
*/
void siw_sq_flush(struct siw_qp *qp)
{
struct list_head *pos, *n;
struct siw_wqe *wqe = tx_wqe(qp);
struct siw_cq *cq = qp->scq;
int async_event = 0;
dprint(DBG_OBJ|DBG_CM|DBG_WR, "(QP%d): Enter\n", QP_ID(qp));
/*
* flush the in-progress wqe, if there.
*/
if (wqe) {
/*
* TODO: Add iWARP Termination code
*/
tx_wqe(qp) = NULL;
dprint(DBG_WR,
" (QP%d): Flush current WQE %p, type %d\n",
QP_ID(qp), wqe, wr_type(wqe));
if (wr_type(wqe) == SIW_WR_RDMA_READ_RESP) {
siw_wqe_put(wqe);
wqe = NULL;
} else if (wr_type(wqe) != SIW_WR_RDMA_READ_REQ)
/*
* A RREQUEST is already on the ORRQ
*/
list_add_tail(&wqe->list, &qp->orq);
}
if (!list_empty(&qp->irq))
list_for_each_safe(pos, n, &qp->irq) {
wqe = list_entry_wqe(pos);
dprint(DBG_WR,
" (QP%d): Flush IRQ WQE %p, status %d\n",
QP_ID(qp), wqe, wqe->wr_status);
list_del(&wqe->list);
siw_wqe_put(wqe);
}
if (!list_empty(&qp->orq))
list_for_each_safe(pos, n, &qp->orq) {
wqe = list_entry_wqe(pos);
dprint(DBG_WR,
" (QP%d): Flush ORQ WQE %p, type %d,"
" status %d\n", QP_ID(qp), wqe, wr_type(wqe),
wqe->wr_status);
if (wqe->wr_status != SR_WR_DONE) {
async_event = 1;
wqe->wc_status = IB_WC_WR_FLUSH_ERR;
wqe->wr_status = SR_WR_DONE;
}
if (cq) {
lock_cq(cq);
list_move_tail(&wqe->list, &cq->queue);
/* TODO: enforce CQ limits */
atomic_inc(&cq->qlen);
unlock_cq(cq);
} else {
list_del(&wqe->list);
siw_wqe_put(wqe);
}
}
if (!list_empty(&qp->sq))
async_event = 1;
list_for_each_safe(pos, n, &qp->sq) {
wqe = list_entry_wqe(pos);
dprint(DBG_WR,
" (QP%d): Flush SQ WQE %p, type %d\n",
QP_ID(qp), wqe, wr_type(wqe));
if (cq) {
wqe->wc_status = IB_WC_WR_FLUSH_ERR;
wqe->wr_status = SR_WR_DONE;
lock_cq(cq);
list_move_tail(&wqe->list, &cq->queue);
/* TODO: enforce CQ limits */
atomic_inc(&cq->qlen);
unlock_cq(cq);
} else {
list_del(&wqe->list);
siw_wqe_put(wqe);
}
}
atomic_set(&qp->sq_space, qp->attrs.sq_size);
if (wqe != NULL && cq != NULL && cq->ofa_cq.comp_handler != NULL)
(*cq->ofa_cq.comp_handler)(&cq->ofa_cq, cq->ofa_cq.cq_context);
if (async_event)
siw_async_ev(qp, NULL, IB_EVENT_SQ_DRAINED);
}
/*
* siw_rq_flush()
*
* Flush recv queue entries to cq. An in-progress WQE may have some bytes
* processed (wqe->processed).
*
* Must be called with qp state write lock held.
* Therefore, RQ lock must not be taken.
*/
void siw_rq_flush(struct siw_qp *qp)
{
struct list_head *pos, *n;
struct siw_wqe *wqe;
struct siw_cq *cq;
dprint(DBG_OBJ|DBG_CM|DBG_WR, "(QP%d): Enter\n", QP_ID(qp));
/*
* Flush an in-progess WQE if present
*/
if (rx_wqe(qp)) {
if (qp->rx_ctx.hdr.ctrl.opcode != RDMAP_RDMA_WRITE)
list_add(&rx_wqe(qp)->list, &qp->rq);
else
siw_mem_put(rx_mem(qp));
rx_wqe(qp) = NULL;
}
if (list_empty(&qp->rq))
return;
cq = qp->rcq;
list_for_each_safe(pos, n, &qp->rq) {
wqe = list_entry_wqe(pos);
list_del_init(&wqe->list);
if (cq) {
wqe->wc_status = IB_WC_WR_FLUSH_ERR;
lock_cq(cq);
list_add_tail(&wqe->list, &cq->queue);
/* TODO: enforce CQ limits */
atomic_inc(&cq->qlen);
unlock_cq(cq);
} else
siw_wqe_put(wqe);
if (!qp->srq)
atomic_inc(&qp->rq_space);
else
atomic_inc(&qp->srq->space);
}
if (cq != NULL && cq->ofa_cq.comp_handler != NULL)
(*cq->ofa_cq.comp_handler)(&cq->ofa_cq, cq->ofa_cq.cq_context);
}