drivers/infiniband/hw/softiwarp/siw_qp_rx.c - pub/scm/linux/kernel/git/ericvh/bluegene - Git at Google

 /*
  * Software iWARP device driver for Linux
  *
  * Authors: Bernard Metzler <bmt@zurich.ibm.com>
  *          Fredy Neeser <nfd@zurich.ibm.com>
  *
  * Copyright (c) 2008-2010, IBM Corporation
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
  * General Public License (GPL) Version 2, available from the file
  * COPYING in the main directory of this source tree, or the
  * BSD license below:
  *
  *   Redistribution and use in source and binary forms, with or
  *   without modification, are permitted provided that the following
  *   conditions are met:
  *
  *   - Redistributions of source code must retain the above copyright notice,
  *     this list of conditions and the following disclaimer.
  *
  *   - Redistributions in binary form must reproduce the above copyright
  *     notice, this list of conditions and the following disclaimer in the
  *     documentation and/or other materials provided with the distribution.
  *
  *   - Neither the name of IBM nor the names of its contributors may be
  *     used to endorse or promote products derived from this software without
  *     specific prior written permission.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */

 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/net.h>
 #include <linux/scatterlist.h>
 #include <linux/highmem.h>
 #include <net/sock.h>
 #include <net/tcp_states.h>
 #include <net/tcp.h>

 #include <rdma/iw_cm.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_smi.h>
 #include <rdma/ib_user_verbs.h>
 #include <rdma/ib_umem.h>

 #include "siw.h"
 #include "siw_obj.h"
 #include "siw_cm.h"


 /*
  * ----------------------------
  * DDP reassembly for Softiwarp
  * ----------------------------
  * For the ordering of transmitted DDP segments, the relevant iWARP ordering
  * rules are as follows:
  *
  * - RDMAP (RFC 5040): Section 7.5, Rule 17:
  *   "RDMA Read Response Message processing at the Remote Peer (reading
  *    the specified Tagged Buffer) MUST be started only after the RDMA
  *    Read Request Message has been Delivered by the DDP layer (thus,
  *    all previous RDMA Messages have been properly submitted for
  *    ordered Placement)."
  *
  * - DDP (RFC 5041): Section 5.3:
  *   "At the Data Source, DDP:
  *    o MUST transmit DDP Messages in the order they were submitted to
  *      the DDP layer,
  *    o SHOULD transmit DDP Segments within a DDP Message in increasing
  *      MO order for Untagged DDP Messages, and in increasing TO order
  *      for Tagged DDP Messages."
  *
  * Combining these rules implies that, although RDMAP does not provide
  * ordering between operations that are generated from the two ends of an
  * RDMAP stream, DDP *must not* transmit an RDMA Read Response Message before
  * it has finished transmitting SQ operations that were already submitted
  * to the DDP layer. It follows that an iWARP transmitter must fully
  * serialize RDMAP messages belonging to the same QP.
  *
  * Given that a TCP socket receives DDP segments in peer transmit order,
  * we obtain the following ordering of received DDP segments:
  *
  * (i)  the received DDP segments of RDMAP messages for the same QP
  *      cannot be interleaved
  * (ii) the received DDP segments of a single RDMAP message *should*
  *      arrive in order.
  *
  * The Softiwarp transmitter obeys rule #2 in DDP Section 5.3.
  * With this property, the "should" becomes a "must" in (ii) above,
  * which simplifies DDP reassembly considerably.
  * The Softiwarp receiver currently relies on this property
  * and reports an error if DDP segments of the same RDMAP message
  * do not arrive in sequence.
  */

 static inline int siw_crc_rxhdr(struct siw_iwarp_rx *ctx)
 {
 	crypto_hash_init(&ctx->mpa_crc_hd);

 	return siw_crc_array(&ctx->mpa_crc_hd, (u8 *)&ctx->hdr,
 			     ctx->fpdu_part_rcvd);
 }


 /*
  * siw_rx_umem_init()
  *
  * Given memory region @mr and tagged offset @t_off within @mr,
  * resolve corresponding ib_umem_chunk memory chunk pointer
  * and update receive context variables to point at receive position.
  * returns 0 on sucess and failure otherwise.
  *
  * NOTE: This function expects virtual addresses.
  * TODO: Function needs generalization to support relative adressing
  *       aka "ZBVA".
  *
  * @rctx:	Receive Context to be updated
  * @mr:		Memory Region
  * @t_off:	Offset within Memory Region
  *
  */
 static int siw_rx_umem_init(struct siw_iwarp_rx *rctx, struct siw_mr *mr,
 			    u64 t_off)
 {
 	struct ib_umem_chunk	*chunk;
 	u64			off_mr;   /* offset into MR */
 	int			psge_idx; /* Index of PSGE */

 	off_mr = t_off - (mr->mem.va & PAGE_MASK);
 	/*
 	 * Equivalent to
 	 * off_mr = t_off - mr->mem.va;
 	 * off_mr += mr->umem->offset;
 	 */

 	/* Skip pages not referenced by t_off */
 	psge_idx = off_mr >> PAGE_SHIFT;

 	list_for_each_entry(chunk, &mr->umem->chunk_list, list) {
 		if (psge_idx < chunk->nents)
 			break;
 		psge_idx -= chunk->nents;
 	}
 	if (psge_idx >= chunk->nents) {
 		dprint(DBG_MM|DBG_ON, "(QP%d): Short chunk list\n",
 			RX_QPID(rctx));
 		return -EINVAL;
 	}
 	rctx->pg_idx = psge_idx;
 	rctx->pg_off = off_mr & ~PAGE_MASK;
 	rctx->umem_chunk = chunk;

 	dprint(DBG_MM, "(QP%d): New chunk, idx %d\n", RX_QPID(rctx), psge_idx);
 	return 0;
 }


 /*
  * siw_rx_umem()
  *
  * Receive data of @len into target referenced by @rctx.
  * This function does not check if umem is within bounds requested by
  * @len and @t_off. @umem_ends indicates if routine should
  * not update chunk position pointers after the point it is
  * currently receiving
  *
  * @rctx:	Receive Context
  * @len:	Number of bytes to place
  * @umen_ends:	1, if rctx chunk pointer should not be updated after len.
  */
 static int siw_rx_umem(struct siw_iwarp_rx *rctx, int len, int umem_ends)
 {
 	struct scatterlist	*p_list;
 	void			*dest;
 	struct ib_umem_chunk    *chunk = rctx->umem_chunk;
 	int			pg_off = rctx->pg_off,
 				copied = 0,
 				bytes,
 				rv;

 	while (len) {
 		bytes  = min(len, (int)PAGE_SIZE - pg_off);
 		p_list = &chunk->page_list[rctx->pg_idx];

 		dest = kmap_atomic(sg_page(p_list), KM_SOFTIRQ0);

 		rv = skb_copy_bits(rctx->skb, rctx->skb_offset, dest + pg_off,
 				   bytes);

 		dprint(DBG_RX, "(QP%d): Page #%d, "
 			"bytes=%u, rv=%d returned by skb_copy_bits()\n",
 			RX_QPID(rctx), rctx->pg_idx, bytes, rv);

 		if (likely(!rv)) {
 			if (rctx->crc_enabled)
 				rv = siw_crc_sg(&rctx->mpa_crc_hd, p_list,
 						pg_off, bytes);

 			rctx->skb_offset += bytes;
 			copied += bytes;
 			len -= bytes;
 			pg_off += bytes;
 		}

 		kunmap_atomic(dest, KM_SOFTIRQ0);

 		if (unlikely(rv)) {
 			rctx->skb_copied += copied;
 			rctx->skb_new -= copied;
 			copied = -EFAULT;

 			dprint(DBG_RX|DBG_ON, "(QP%d): failed with %d\n",
 				RX_QPID(rctx), rv);

 			goto out;
 		}
 		if (pg_off == PAGE_SIZE) {
 			/*
 			 * end of page
 			 */
 			pg_off = 0;
 			/*
 			 * reference next page chunk if
 			 * - all pages in chunk used AND
 			 * - current loop fills more into this umem
 			 *   OR the next receive will go into this umem
 			 *   starting at the position where we are leaving
 			 *   the routine.
 			 */
 			if (++rctx->pg_idx == chunk->nents &&
 				(len > 0 || !umem_ends)) {

 				rctx->pg_idx = 0;
 				chunk = mem_chunk_next(chunk);
 			}
 		}
 	}
 	/*
 	 * store chunk position for resume
 	 */
 	rctx->umem_chunk = chunk;
 	rctx->pg_off = pg_off;

 	rctx->skb_copied += copied;
 	rctx->skb_new -= copied;
 out:
 	return copied;
 }


 /*
  * siw_rresp_check_ntoh()
  *
  * Check incoming RRESP fragment header against expected
  * header values and update expected values for potential next
  * fragment.
  *
  * NOTE: This function must be called only if a RRESP DDP segment
  *       starts but not for fragmented consecutive pieces of an
  *       already started DDP segement.
  */
 static inline int siw_rresp_check_ntoh(struct siw_iwarp_rx *rctx)
 {
 	struct iwarp_rdma_rresp	*rresp = &rctx->hdr.rresp;
 	struct siw_wqe		*wqe = rctx->dest.wqe;

 	rresp->sink_stag = be32_to_cpu(rresp->sink_stag);
 	rresp->sink_to   = be64_to_cpu(rresp->sink_to);

 	if (rctx->first_ddp_seg) {
 		rctx->ddp_stag = wqe->wr.rread.sge[0].lkey;
 		rctx->ddp_to   = wqe->wr.rread.sge[0].addr;
 	}
 	if (rctx->ddp_stag != rresp->sink_stag) {
 		dprint(DBG_RX|DBG_ON,
 			" received STAG=%08x, expected STAG=%08x\n",
 			rresp->sink_stag, rctx->ddp_stag);
 		/*
 		 * Verbs: RI_EVENT_QP_LLP_INTEGRITY_ERROR_BAD_FPDU
 		 */
 		return -EINVAL;
 	}
 	if (rctx->ddp_to != rresp->sink_to) {
 		dprint(DBG_RX|DBG_ON,
 			" received TO=%016llx, expected TO=%016llx\n",
 			(unsigned long long)rresp->sink_to,
 			(unsigned long long)rctx->ddp_to);
 		/*
 		 * Verbs: RI_EVENT_QP_LLP_INTEGRITY_ERROR_BAD_FPDU
 		 */
 		return -EINVAL;
 	}
 	if (rctx->more_ddp_segs)
 		rctx->ddp_to += rctx->fpdu_part_rem;

 	else if (wqe->processed + rctx->fpdu_part_rem != wqe->bytes) {
 		dprint(DBG_RX|DBG_ON,
 			" RRESP length does not match RREQ, "
 			"peer sent=%d, expected %d\n",
 			wqe->processed + rctx->fpdu_part_rem, wqe->bytes);
 		return -EINVAL;
 	}
 	return 0;
 }

 /*
  * siw_write_check_ntoh()
  *
  * Check incoming WRITE fragment header against expected
  * header values and update expected values for potential next
  * fragment
  *
  * NOTE: This function must be called only if a WRITE DDP segment
  *       starts but not for fragmented consecutive pieces of an
  *       already started DDP segement.
  */
 static inline int siw_write_check_ntoh(struct siw_iwarp_rx *rctx)
 {
 	struct iwarp_rdma_write	*write = &rctx->hdr.rwrite;

 	write->sink_stag = be32_to_cpu(write->sink_stag);
 	write->sink_to   = be64_to_cpu(write->sink_to);

 	if (rctx->first_ddp_seg) {
 		rctx->ddp_stag = write->sink_stag;
 		rctx->ddp_to   = write->sink_to;
 	} else {
 		if (rctx->ddp_stag != write->sink_stag) {
 			dprint(DBG_RX|DBG_ON,
 				" received STAG=%08x, expected STAG=%08x\n",
 				write->sink_stag, rctx->ddp_stag);
 			/*
 			 * Verbs: RI_EVENT_QP_LLP_INTEGRITY_ERROR_BAD_FPDU
 			 */
 			return -EINVAL;
 		}
 		if (rctx->ddp_to !=  write->sink_to) {
 			dprint(DBG_RX|DBG_ON,
 				" received TO=%016llx, expected TO=%016llx\n",
 				(unsigned long long)write->sink_to,
 				(unsigned long long)rctx->ddp_to);
 			/*
 			 * Verbs: RI_EVENT_QP_LLP_INTEGRITY_ERROR_BAD_FPDU
 			 */
 			return -EINVAL;
 		}
 	}
 	/*
 	 * Update expected target offset for next incoming DDP segment
 	 */
 	if (rctx->more_ddp_segs != 0)
 		rctx->ddp_to += rctx->fpdu_part_rem;

 	return 0;
 }

 /*
  * siw_send_check_ntoh()
  *
  * Check incoming SEND fragment header against expected
  * header values and update expected MSN if no next
  * fragment expected
  *
  * NOTE: This function must be called only if a SEND DDP segment
  *       starts but not for fragmented consecutive pieces of an
  *       already started DDP segement.
  */
 static inline int siw_send_check_ntoh(struct siw_iwarp_rx *rctx)
 {
 	struct iwarp_send	*send = &rctx->hdr.send;
 	struct siw_wqe		*wqe = rctx->dest.wqe;

 	send->ddp_msn = be32_to_cpu(send->ddp_msn);
 	send->ddp_mo  = be32_to_cpu(send->ddp_mo);
 	send->ddp_qn  = be32_to_cpu(send->ddp_qn);

 	if (send->ddp_qn != RDMAP_UNTAGGED_QN_SEND) {
 		dprint(DBG_RX|DBG_ON, " Invalid DDP QN %d for SEND\n",
 			send->ddp_qn);
 		return -EINVAL;
 	}
 	if (send->ddp_msn != rctx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]) {
 		dprint(DBG_RX|DBG_ON, " received MSN=%d, expected MSN=%d\n",
 			rctx->ddp_msn[RDMAP_UNTAGGED_QN_SEND], send->ddp_msn);
 		/*
 		 * TODO: Error handling
 		 * async_event= RI_EVENT_QP_RQ_PROTECTION_ERROR_MSN_GAP;
 		 * cmpl_status= RI_WC_STATUS_LOCAL_QP_CATASTROPHIC;
 		 */
 		return -EINVAL;
 	}
 	if (send->ddp_mo != wqe->processed) {
 		dprint(DBG_RX|DBG_ON, " Received MO=%u, expected MO=%u\n",
 			send->ddp_mo, wqe->processed);
 		/*
 		 * Verbs: RI_EVENT_QP_LLP_INTEGRITY_ERROR_BAD_FPDU
 		 */
 		return -EINVAL;
 	}
 	if (rctx->first_ddp_seg) {
 		/* initialize user memory write position */
 		rctx->sge_idx = 0;
 		rctx->sge_off = 0;
 	}
 	if (wqe->bytes < wqe->processed + rctx->fpdu_part_rem) {
 		dprint(DBG_RX|DBG_ON, " Receive space short: %d < %d\n",
 			wqe->bytes - wqe->processed, rctx->fpdu_part_rem);
 		wqe->wc_status = IB_WC_LOC_LEN_ERR;
 		return -EINVAL;
 	}
 	return 0;
 }

 static inline struct siw_wqe *siw_get_rqe(struct siw_qp *qp)
 {
 	struct siw_wqe	*wqe = NULL;

 	if (!qp->srq) {
 		lock_rq(qp);
 		if (!list_empty(&qp->rq)) {
 			wqe = list_first_wqe(&qp->rq);
 			list_del_init(&wqe->list);
 			unlock_rq(qp);
 		} else {
 			unlock_rq(qp);
 			dprint(DBG_RX, " QP(%d): RQ empty!\n", QP_ID(qp));
 		}
 	} else {
 		wqe = siw_srq_fetch_wqe(qp);
 		if (!wqe)
 			dprint(DBG_RX, " QP(%d): SRQ empty!\n", QP_ID(qp));
 	}
 	return wqe;
 }


 /*
  * siw_proc_send:
  *
  * Process one incoming SEND and place data into memory referenced by
  * receive wqe.
  *
  * Function supports partially received sends (suspending/resuming
  * current receive wqe processing)
  *
  * return value:
  *	0:       reached the end of a DDP segment
  *	-EAGAIN: to be called again to finish the DDP segment
  */
 int siw_proc_send(struct siw_qp *qp, struct siw_iwarp_rx *rctx)
 {
 	struct siw_wqe	*wqe;
 	struct siw_sge	*sge;
 	struct siw_mr	*mr;
 	u32		data_bytes,	/* all data bytes available */
 			rcvd_bytes;	/* sum of data bytes rcvd */
 	int		rv = 0;

 	if (rctx->first_ddp_seg) {
 		WARN_ON(rx_wqe(qp) != NULL);

 		wqe = siw_get_rqe(qp);
 		if (!wqe)
 			return -ENOENT;

 		rx_wqe(qp) = wqe;
 		wqe->wr_status = SR_WR_INPROGRESS;
 	} else  {
 		wqe = rx_wqe(qp);
 		if (!wqe) {
 			/*
 			 * this is a siw bug!
 			 */
 			dprint(DBG_ON, "QP(%d): RQ failure\n", QP_ID(qp));
 			return -EPROTO;
 		}
 	}
 	if (rctx->state == SIW_GET_DATA_START) {
 		rv = siw_send_check_ntoh(rctx);
 		if (rv) {
 			siw_async_ev(qp, NULL, IB_EVENT_QP_FATAL);
 			return rv;
 		}
 		if (!rctx->fpdu_part_rem) /* zero length SEND */
 			return 0;
 	}
 	data_bytes = min(rctx->fpdu_part_rem, rctx->skb_new);
 	rcvd_bytes = 0;

 	while (data_bytes) {
 		struct siw_pd	*pd;
 		u32	sge_bytes;	/* data bytes avail for SGE */
 		int	umem_ends;	/* 1 if umem ends with current rcv */

 		sge = &wqe->wr.sgl.sge[rctx->sge_idx];

 		if (!sge->len) {
 			/* just skip empty sge's */
 			rctx->sge_idx++;
 			rctx->sge_off = 0;
 			continue;
 		}
 		sge_bytes = min(data_bytes, sge->len - rctx->sge_off);

 		/*
 		 * check with QP's PD if no SRQ present, SRQ's PD otherwise
 		 */
 		pd = qp->srq == NULL ? qp->pd : qp->srq->pd;

 		rv = siw_check_sge(pd, sge, SR_MEM_LWRITE, rctx->sge_off,
 				   sge_bytes);
 		if (rv) {
 			siw_async_ev(qp, NULL, IB_EVENT_QP_ACCESS_ERR);
 			break;
 		}
 		mr = siw_mem2mr(sge->mem.obj);

 		if (rctx->sge_off == 0) {
 			/*
 			 * started a new sge: update receive pointers
 			 */
 			rv = siw_rx_umem_init(rctx, mr, sge->addr);
 			if (rv)
 				break;
 		}
 		/*
 		 * Are we going to finish placing
 		 * - the last fragment of the current SGE or
 		 * - the last DDP segment (L=1) of the current RDMAP message?
 		 *
 		 * siw_rx_umem() must advance umem page_chunk position
 		 * after sucessful receive only, if receive into current
 		 * umem does not end. umem ends, if:
 		 * - current SGE gets completely filled, OR
 		 * - current MPA FPDU is last AND gets consumed now
 		 */
 		umem_ends = ((sge_bytes + rctx->sge_off == sge->len) ||
 			      (!rctx->more_ddp_segs &&
 			       rctx->fpdu_part_rcvd + sge_bytes ==
 					rctx->fpdu_part_rem)) ? 1 : 0;

 		rv = siw_rx_umem(rctx, sge_bytes, umem_ends);
 		if (rv != sge_bytes) {
 			/*
 			 * siw_rx_umem() must have updated
 			 * skb_new and skb_copied
 			 */
 			wqe->processed += rcvd_bytes;
 			return -EINVAL;
 		}
 		rctx->sge_off += rv;

 		if (rctx->sge_off == sge->len) {
 			rctx->sge_idx++;
 			rctx->sge_off = 0;
 		}
 		data_bytes -= rv;
 		rcvd_bytes += rv;

 		rctx->fpdu_part_rem -= rv;
 		rctx->fpdu_part_rcvd += rv;
 	}
 	wqe->processed += rcvd_bytes;

 	if (!rctx->fpdu_part_rem)
 		return 0;

 	return (rv < 0) ? rv : -EAGAIN;
 }

 /*
  * siw_proc_write:
  *
  * Place incoming WRITE after referencing and checking target buffer

  * Function supports partially received WRITEs (suspending/resuming
  * current receive processing)
  *
  * return value:
  *	0:       reached the end of a DDP segment
  *	-EAGAIN: to be called again to finish the DDP segment
  */

 int siw_proc_write(struct siw_qp *qp, struct siw_iwarp_rx *rctx)
 {
 	struct siw_dev		*dev = qp->hdr.dev;
 	struct iwarp_rdma_write	*write = &rctx->hdr.rwrite;
 	struct siw_mem		*mem;
 	int			bytes,
 				last_write,
 				rv;

 	if (rctx->state == SIW_GET_DATA_START) {

 		if (!rctx->fpdu_part_rem) /* zero length WRITE */
 			return 0;

 		rv = siw_write_check_ntoh(rctx);
 		if (rv) {
 			siw_async_ev(qp, NULL, IB_EVENT_QP_FATAL);
 			return rv;
 		}
 	}
 	bytes = min(rctx->fpdu_part_rem, rctx->skb_new);

 	/*
 	 * NOTE: bytes > 0 is always true, since this routine
 	 * gets only called if so.
 	 */
 	if (rctx->first_ddp_seg) {
 		/* DEBUG Code, to be removed */
 		if (rx_mem(qp) != 0) {
 			dprint(DBG_RX|DBG_ON, "(QP%d): Stale rctx state!\n",
 				QP_ID(qp));
 			return -EFAULT;
 		}
 		rx_mem(qp) = siw_mem_id2obj(dev, rctx->ddp_stag >> 8);
 	}
 	if (rx_mem(qp) == NULL) {
 		dprint(DBG_RX|DBG_ON, "(QP%d): "
 			"Sink STag not found or invalid,  STag=0x%08x\n",
 			QP_ID(qp), rctx->ddp_stag);
 		return -EINVAL;
 	}
 	mem = rx_mem(qp);
 	/*
 	 * Rtag not checked against mem's tag again because
 	 * hdr check guarantees same tag as before if fragmented
 	 */
 	rv = siw_check_mem(qp->pd, mem, write->sink_to + rctx->fpdu_part_rcvd,
 			   SR_MEM_RWRITE, bytes);
 	if (rv) {
 		siw_async_ev(qp, NULL, IB_EVENT_QP_ACCESS_ERR);
 		return rv;
 	}
 	if (rctx->first_ddp_seg) {
 		rv = siw_rx_umem_init(rctx, siw_mem2mr(mem), write->sink_to);
 		if (rv)
 			return -EINVAL;

 	} else if (!rctx->umem_chunk) {
 		/*
 		 * This should never happen.
 		 *
 		 * TODO: Remove tentative debug aid.
 		 */
 		dprint(DBG_RX|DBG_ON, "(QP%d): "
 			"Umem chunk not resolved!\n", QP_ID(qp));
 		return -EINVAL;
 	}
 	/*
 	 * Are we going to place the last piece of the last
 	 * DDP segment of the current RDMAP message?
 	 *
 	 * It is last if:
 	 * - rctx->fpdu_part_rem <= rctx->skb_new AND
 	 * - payload_rem (of current DDP segment) <= rctx->skb_new
 	 */
 	last_write = ((rctx->fpdu_part_rem <= rctx->skb_new) &&
 		      !rctx->more_ddp_segs) ? 1 : 0;

 	rv = siw_rx_umem(rctx, bytes, last_write);
 	if (rv != bytes)
 		return -EINVAL;

 	rctx->fpdu_part_rem -= rv;
 	rctx->fpdu_part_rcvd += rv;

 	if (!rctx->fpdu_part_rem)
 		return 0;

 	return (rv < 0) ? rv : -EAGAIN;
 }

 /*
  * inbound RREQ's cannot carry user data.
  */
 int siw_proc_rreq(struct siw_qp *qp, struct siw_iwarp_rx *rctx)
 {
 	if (!rctx->fpdu_part_rem)
 		return 0;

 	dprint(DBG_ON|DBG_RX, "(QP%d): RREQ with MPA len %d\n", QP_ID(qp),
 		rctx->hdr.ctrl.mpa_len);

 	return -EPROTO;
 }

 /*
  * siw_init_rresp:
  *
  * Process inbound RDMA READ REQ. Produce a pseudo READ RESPONSE WQE.
  * Put it at the tail of the IRQ, if there is another WQE currently in
  * transmit processing. If not, make it the current WQE to be processed
  * and schedule transmit processing.
  *
  * Can be called from softirq context and from process
  * context (RREAD socket loopback case!)
  *
  * return value:
  *	0:      success,
  *		failure code otherwise
  */

 int siw_init_rresp(struct siw_qp *qp, struct siw_iwarp_rx *rctx)
 {
 	struct siw_wqe 	*rsp;

 	rsp = siw_wqe_get(qp, SIW_WR_RDMA_READ_RESP);
 	if (rsp) {
 		rsp->wr.rresp.sge.len = be32_to_cpu(rctx->hdr.rreq.read_size);
 		rsp->bytes = rsp->wr.rresp.sge.len;	/* redundant */
 		rsp->processed = 0;

 		rsp->wr.rresp.sge.addr = be64_to_cpu(rctx->hdr.rreq.source_to);
 		rsp->wr.rresp.num_sge = rsp->bytes ? 1 : 0;

 		rsp->wr.rresp.sge.mem.obj = NULL;	/* defer lookup */
 		rsp->wr.rresp.sge.lkey =
 			be32_to_cpu(rctx->hdr.rreq.source_stag);

 		rsp->wr.rresp.raddr = be64_to_cpu(rctx->hdr.rreq.sink_to);
 		rsp->wr.rresp.rtag = rctx->hdr.rreq.sink_stag; /* NBO */

 	} else {
 		dprint(DBG_RX|DBG_ON, "(QP%d): IRD exceeded!\n", QP_ID(qp));
 		return -EPROTO;
 	}
 	rsp->wr_status = SR_WR_QUEUED;

 	/*
 	 * Insert into IRQ
 	 *
 	 * TODO: Revisit ordering of genuine SQ WRs and Read Response
 	 * pseudo-WRs. RDMAP specifies that there is no ordering among
 	 * the two directions of transmission, so there is a degree of
 	 * freedom.
 	 *
 	 * The current logic favours Read Responses over SQ work requests
 	 * that are queued but not already in progress.
 	 */
 	lock_sq(qp);
 	if (!tx_wqe(qp)) {
 		tx_wqe(qp) = rsp;
 		unlock_sq(qp);
 		/*
 		 * schedule TX work, even if SQ was supended due to
 		 * ORD limit: it is always OK (and may even prevent peers
 		 * from appl lock) to send RRESPONSE's
 		 */
 		siw_sq_queue_work(qp);
 	} else {
 		list_add_tail(&rsp->list, &qp->irq);
 		unlock_sq(qp);
 	}
 	return 0;
 }

 /*
  * siw_proc_rresp:
  *
  * Place incoming RRESP data into memory referenced by RREQ WQE.
  *
  * Function supports partially received RRESP's (suspending/resuming
  * current receive processing)
  */
 int siw_proc_rresp(struct siw_qp *qp, struct siw_iwarp_rx *rctx)
 {
 	struct siw_wqe	*wqe;
 	struct siw_mr	*mr;
 	struct siw_sge	*sge;
 	int		bytes,
 			is_last,
 			rv;

 	if (rctx->first_ddp_seg) {
 		WARN_ON(rx_wqe(qp) != NULL);
 		/*
 		 * fetch pending RREQ from orq
 		 */
 		lock_orq(qp);
 		if (!list_empty(&qp->orq)) {
 			wqe = list_first_entry(&qp->orq, struct siw_wqe, list);
 			list_del_init(&wqe->list);
 		} else {
 			unlock_orq(qp);
 			dprint(DBG_RX|DBG_ON, "(QP%d): ORQ empty\n",
 				QP_ID(qp));
 			/*
 			 * TODO: Should generate an async error
 			 */
 			rv = -ENODATA; /* or -ENOENT ? */
 			goto done;
 		}
 		unlock_orq(qp);

 		rx_wqe(qp) = wqe;

 		if (wr_type(wqe) != SIW_WR_RDMA_READ_REQ || wqe->processed) {
 			WARN_ON(wqe->processed);
 			WARN_ON(wr_type(wqe) != SIW_WR_RDMA_READ_REQ);
 			rv = -EINVAL;
 			goto done;
 		}

 		wqe->wr_status = SR_WR_INPROGRESS;

 		rv = siw_rresp_check_ntoh(rctx);
 		if (rv) {
 			siw_async_ev(qp, NULL, IB_EVENT_QP_FATAL);
 			goto done;
 		}
 	} else {
 		wqe = rx_wqe(qp);
 		if (!wqe) {
 			WARN_ON(1);
 			rv = -ENODATA;
 			goto done;
 		}
 	}
 	if (!rctx->fpdu_part_rem) /* zero length RRESPONSE */
 		return 0;

 	bytes = min(rctx->fpdu_part_rem, rctx->skb_new);
 	sge = wqe->wr.rread.sge; /* there is only one */

 	/*
 	 * check target memory which resolves memory on first fragment
 	 */
 	rv = siw_check_sge(qp->pd, sge, SR_MEM_LWRITE, wqe->processed, bytes);
 	if (rv) {
 		dprint(DBG_RX|DBG_ON, "(QP%d): siw_check_sge failed: %d\n",
 			QP_ID(qp), rv);
 		wqe->wc_status = IB_WC_LOC_PROT_ERR;
 		siw_async_ev(qp, NULL, IB_EVENT_QP_ACCESS_ERR);
 		goto done;
 	}
 	mr = siw_mem2mr(sge->mem.obj);

 	if (rctx->first_ddp_seg) {
 		rv = siw_rx_umem_init(rctx, mr, sge->addr);
 		if (rv) {
 			wqe->wc_status = IB_WC_LOC_PROT_ERR;
 			goto done;
 		}
 	} else if (!rctx->umem_chunk) {
 		/*
 		 * This should never happen.
 		 *
 		 * TODO: Remove tentative debug aid.
 		 */
 		dprint(DBG_RX|DBG_ON, "(QP%d): No target mem!\n", QP_ID(qp));
 		wqe->wc_status = IB_WC_GENERAL_ERR;
 		rv = -EPROTO;
 		goto done;
 	}
 	/*
 	 * Are we going to finish placing the last DDP segment (L=1)
 	 * of the current RDMAP message?
 	 *
 	 * NOTE: siw_rresp_check_ntoh() guarantees that the
 	 * last inbound RDMAP Read Response message exactly matches
 	 * with the RREQ WR.
 	 */
 	is_last = (bytes + wqe->processed == wqe->bytes) ? 1 : 0;

 	rv = siw_rx_umem(rctx,  bytes, is_last);
 	if (rv != bytes) {
 		wqe->wc_status = IB_WC_GENERAL_ERR;
 		rv = -EINVAL;
 		goto done;
 	}
 	rctx->fpdu_part_rem -= rv;
 	rctx->fpdu_part_rcvd += rv;

 	wqe->processed += rv;

 	if (!rctx->fpdu_part_rem)
 		return 0;
 done:
 	return (rv < 0) ? rv : -EAGAIN;
 }

 static void siw_drain_pkt(struct siw_qp *qp, struct siw_iwarp_rx *rctx)
 {
 	char	buf[4096];
 	int	len;

 	dprint(DBG_ON|DBG_RX, " (QP%d): drain %d bytes\n",
 		QP_ID(qp), rctx->fpdu_part_rem);

 	while (rctx->fpdu_part_rem) {
 		len = min(rctx->fpdu_part_rem, 4096);

 		skb_copy_bits(rctx->skb, rctx->skb_offset,
 				      buf, rctx->fpdu_part_rem);

 		rctx->skb_copied += len;
 		rctx->skb_offset += len;
 		rctx->skb_new -= len;
 		rctx->fpdu_part_rem -= len;
 	}
 }

 int siw_proc_unsupp(struct siw_qp *qp, struct siw_iwarp_rx *rctx)
 {
 	WARN_ON(1);
 	siw_drain_pkt(qp, rctx);
 	return 0;
 }


 int siw_proc_terminate(struct siw_qp *qp, struct siw_iwarp_rx *rctx)
 {
 	struct iwarp_terminate	*term = &rctx->hdr.terminate;

 	printk(KERN_INFO "(QP%d): RX Terminate: etype=%d, layer=%d, ecode=%d\n",
 		QP_ID(qp), term->term_ctrl.etype, term->term_ctrl.layer,
 		term->term_ctrl.ecode);

 	siw_drain_pkt(qp, rctx);
 	return 0;
 }


 static int siw_get_trailer(struct siw_qp *qp, struct siw_iwarp_rx *rctx)
 {
 	struct sk_buff	*skb = rctx->skb;
 	u8		*tbuf = (u8 *)&rctx->trailer.crc - rctx->pad;
 	int		avail;

 	avail = min(rctx->skb_new, rctx->fpdu_part_rem);

 	skb_copy_bits(skb, rctx->skb_offset,
 		      tbuf + rctx->fpdu_part_rcvd, avail);

 	rctx->fpdu_part_rcvd += avail;
 	rctx->fpdu_part_rem -= avail;

 	rctx->skb_new -= avail;
 	rctx->skb_offset += avail;
 	rctx->skb_copied += avail;

 	dprint(DBG_RX, " (QP%d): %d remaining (%d)\n", QP_ID(qp),
 		rctx->fpdu_part_rem, avail);

 	if (!rctx->fpdu_part_rem) {
 		u32	crc_in, crc_own = 0;
 		/*
 		 * check crc if required
 		 */
 		if (!rctx->crc_enabled)
 			return 0;

 		if (rctx->pad && siw_crc_array(&rctx->mpa_crc_hd,
 					       tbuf, rctx->pad) != 0)
 			return -EINVAL;

 		crypto_hash_final(&rctx->mpa_crc_hd, (u8 *)&crc_own);

 		/*
 		 * CRC32 is computed, transmitted and received directly in NBO,
 		 * so there's never a reason to convert byte order.
 		 */
 		crc_in = rctx->trailer.crc;

 		if (crc_in != crc_own) {
 			dprint(DBG_RX|DBG_ON,
 				" (QP%d): CRC ERROR in:=%08x, own=%08x\n",
 				QP_ID(qp), crc_in, crc_own);
 			return -EINVAL;
 		}
 		return 0;
 	}
 	return -EAGAIN;
 }


 static int siw_get_hdr(struct siw_iwarp_rx *rctx)
 {
 	struct sk_buff		*skb = rctx->skb;
 	struct iwarp_ctrl	*c_hdr = &rctx->hdr.ctrl;

 	int bytes;

 	if (rctx->fpdu_part_rcvd < sizeof(struct iwarp_ctrl)) {
 		/*
 		 * copy first fix part of iwarp hdr
 		 */
 		bytes = min_t(int, rctx->skb_new,
 			      sizeof(struct iwarp_ctrl) - rctx->fpdu_part_rcvd);

 		skb_copy_bits(skb, rctx->skb_offset,
 			      (char *)c_hdr + rctx->fpdu_part_rcvd, bytes);

 		rctx->fpdu_part_rcvd += bytes;

 		rctx->skb_new -= bytes;
 		rctx->skb_offset += bytes;
 		rctx->skb_copied += bytes;

 		if (!rctx->skb_new ||
 			rctx->fpdu_part_rcvd < sizeof(struct iwarp_ctrl)) {
 			return -EAGAIN;
 		}

 		if (c_hdr->opcode > RDMAP_TERMINATE) {
 			dprint(DBG_RX|DBG_ON, " opcode %d\n", c_hdr->opcode);
 			return -EINVAL;
 		}
 		if (c_hdr->dv != DDP_VERSION) {
 			dprint(DBG_RX|DBG_ON, " dversion %d\n", c_hdr->dv);
 			return -EINVAL;
 		}
 		if (c_hdr->rv != RDMAP_VERSION) {
 			dprint(DBG_RX|DBG_ON, " rversion %d\n", c_hdr->rv);
 			return -EINVAL;
 		}
 		dprint(DBG_RX, "(QP%d): New Header, opcode:%d\n",
 			RX_QPID(rctx), c_hdr->opcode);
 	}
 	/*
 	 * figure out len of current hdr: variable length of
 	 * iwarp hdr forces us to copy hdr information
 	 */
 	bytes = min(rctx->skb_new,
 		  iwarp_pktinfo[c_hdr->opcode].hdr_len - rctx->fpdu_part_rcvd);

 	skb_copy_bits(skb, rctx->skb_offset,
 		      (char *)c_hdr + rctx->fpdu_part_rcvd, bytes);

 	rctx->fpdu_part_rcvd += bytes;

 	rctx->skb_new -= bytes;
 	rctx->skb_offset += bytes;
 	rctx->skb_copied += bytes;

 	if (rctx->fpdu_part_rcvd == iwarp_pktinfo[c_hdr->opcode].hdr_len) {
 		/*
 		 * HDR receive completed. Check if the current DDP segment
 		 * starts a new RDMAP message or continues a previously
 		 * started RDMAP message.
 		 *
 		 * Note well from the comments on DDP reassembly:
 		 * - Support for unordered reception of DDP segments
 		 *   (or FPDUs) from different RDMAP messages is not needed.
 		 * - Unordered reception of DDP segments of the same
 		 *   RDMAP message is not supported. It is probably not
 		 *   needed with most peers.
 		 */
 		siw_dprint_hdr(&rctx->hdr, RX_QPID(rctx), "HDR received");

 		if (rctx->more_ddp_segs != 0) {
 			rctx->first_ddp_seg = 0;
 			if (rctx->prev_ddp_opcode != c_hdr->opcode) {
 				dprint(DBG_ON,
 					"packet intersection: %d <> %d\n",
 					rctx->prev_ddp_opcode, c_hdr->opcode);
 				return -EPROTO;
 			}
 		} else {
 			rctx->prev_ddp_opcode = c_hdr->opcode;
 			rctx->first_ddp_seg = 1;
 		}
 		rctx->more_ddp_segs = (c_hdr->l == 0) ? 1 : 0;

 		return 0;
 	}
 	return -EAGAIN;
 }

 static inline int siw_fpdu_payload_len(struct siw_iwarp_rx *rctx)
 {
 	return ((int)(rctx->hdr.ctrl.mpa_len) - rctx->fpdu_part_rcvd)
 		+ MPA_HDR_SIZE;
 }

 static inline int siw_fpdu_trailer_len(struct siw_iwarp_rx *rctx)
 {
 	int mpa_len = (int)rctx->hdr.ctrl.mpa_len + MPA_HDR_SIZE;

 	return MPA_CRC_SIZE + (-mpa_len & 0x3);
 }

 /*
  * siw_rreq_complete()
  *
  * Complete the current READ REQUEST after READ RESPONSE processing.
  * It may complete consecutive WQE's which were already SQ
  * processed before but are awaiting completion due to completion
  * ordering (see verbs 8.2.2.2).
  * The READ RESPONSE may also resume SQ processing if it was stalled
  * due to ORD exhaustion (see verbs 8.2.2.18)
  * Function stops completion when next READ REQUEST found or ORQ empty.
  */
 static void siw_rreq_complete(struct siw_wqe *wqe, int error)
 {
 	struct siw_qp		*qp = wqe->qp;
 	int			num_wc = 1;
 	enum ib_send_flags	flags;
 	LIST_HEAD(c_list);

 	flags = wr_flags(wqe);

 	if (flags & IB_SEND_SIGNALED)
 		list_add(&wqe->list, &c_list);
 	else {
 		atomic_inc(&qp->sq_space);
 		siw_wqe_put(wqe);
 		num_wc = 0;
 	}

 	lock_orq(qp);

 	/* More WQE's to complete following this RREQ? */
 	if (!list_empty(&qp->orq)) {
 		struct list_head *pos, *n;
 		list_for_each_safe(pos, n, &qp->orq) {
 			wqe = list_entry_wqe(pos);
 			if (wr_type(wqe) == SIW_WR_RDMA_READ_REQ)
 				break;
 			flags |= wr_flags(wqe);
 			num_wc++;
 			dprint(DBG_WR|DBG_ON,
 				"(QP%d): Resume completion, wr_type %d\n",
 				QP_ID(qp), wr_type(wqe));
 			list_move_tail(pos, &c_list);
 		}
 	}
 	unlock_orq(qp);

 	if (num_wc)
 		siw_sq_complete(&c_list, qp, num_wc, flags);

 	/*
 	 * Check if SQ processing was stalled due to ORD limit
 	 */
 	if (ORD_SUSPEND_SQ(qp)) {
 		lock_sq(qp);

 		wqe = siw_next_tx_wqe(qp);

 		if (wqe && !tx_wqe(qp)) {
 			WARN_ON(wr_type(wqe) != SIW_WR_RDMA_READ_REQ);
 			list_del_init(&wqe->list);
 			tx_wqe(qp) = wqe;

 			list_add_tail(&wqe->list, &qp->orq);

 			unlock_sq(qp);

 			dprint(DBG_RX, "(QP%d): SQ resume (%d)\n",
 				QP_ID(qp), atomic_read(&qp->sq_space));

 			siw_sq_queue_work(qp);
 		} else {
 			/* only new ORQ space if not next RREQ queued */
 			atomic_inc(&qp->orq_space);
 			unlock_sq(qp);
 		}
 	} else
 		atomic_inc(&qp->orq_space);
 }

 /*
  * siw_rdmap_complete()
  *
  * complete processing of an RDMA message after receiving all
  * DDP segmens
  *
  *   o SENDs + RRESPs will need for completion,
  *   o RREQs need for  READ RESPONSE initialization
  *   o WRITEs need memory dereferencing
  *
  * TODO: Could siw_[s,r]_complete() fail? (CQ full)
  */
 static inline int siw_rdmap_complete(struct siw_qp *qp,
 				     struct siw_iwarp_rx *rctx)
 {
 	struct siw_wqe	*wqe;
 	int rv = 0;

 	switch (rctx->hdr.ctrl.opcode) {

 	case RDMAP_SEND_SE:
 		wr_flags(rx_wqe(qp)) |= IB_SEND_SOLICITED;
 	case RDMAP_SEND:
 		rctx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]++;

 		wqe = rx_wqe(qp);

 		wqe->wc_status = IB_WC_SUCCESS;
 		wqe->wr_status = SR_WR_DONE;

 		siw_rq_complete(wqe, qp);

 		break;

 	case RDMAP_RDMA_READ_RESP:
 		rctx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ]++;

 		wqe = rx_wqe(qp);

 		wqe->wc_status = IB_WC_SUCCESS;
 		wqe->wr_status = SR_WR_DONE;

 		siw_rreq_complete(wqe, 0);

 		break;

 	case RDMAP_RDMA_READ_REQ:
 		rv = siw_init_rresp(qp, rctx);

 		break;

 	case RDMAP_RDMA_WRITE:
 		/*
 		 * Free References from memory object if
 		 * attached to receive context (inbound WRITE)
 		 * While a zero-length WRITE is allowed, the
 		 * current implementation does not create
 		 * a memory reference (it is unclear if memory
 		 * rights should be checked in that case!).
 		 *
 		 * TODO: check zero length WRITE semantics
 		 */
 		if (rx_mem(qp))
 			siw_mem_put(rx_mem(qp));
 		break;

 	default:
 		break;

 	}
 	rctx->umem_chunk = NULL; /* DEBUG aid, tentatively */
 	rx_wqe(qp) = NULL;	/* also clears MEM object for WRITE */

 	return rv;
 }

 /*
  * siw_rdmap_error()
  *
  * Abort processing of RDMAP message after failure.
  * SENDs + RRESPs will need for receive completion, if
  * already started.
  *
  * TODO: WRITE need local error to be surfaced.
  *
  */
 static inline void
 siw_rdmap_error(struct siw_qp *qp, struct siw_iwarp_rx *rctx, int status)
 {
 	struct siw_wqe	*wqe;

 	switch (rctx->hdr.ctrl.opcode) {

 	case RDMAP_SEND_SE:
 	case RDMAP_SEND:
 		rctx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]++;

 		wqe = rx_wqe(qp);
 		if (!wqe)
 			return;

 		if (rctx->hdr.ctrl.opcode == RDMAP_SEND_SE)
 			wr_flags(wqe) |= IB_SEND_SOLICITED;

 		if (!wqe->wc_status)
 			wqe->wc_status = IB_WC_GENERAL_ERR;

 		wqe->wr_status = SR_WR_DONE;
 		siw_rq_complete(wqe, qp);

 		break;

 	case RDMAP_RDMA_READ_RESP:
 		/*
 		 * A READ RESPONSE may flush consecutive WQE's
 		 * which were SQ processed before
 		 */
 		rctx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ]++;

 		if (rctx->state == SIW_GET_HDR || status == -ENODATA)
 			/*  eventual RREQ left untouched */
 			break;

 		wqe = rx_wqe(qp);
 		if (wqe) {
 			if (status)
 				wqe->wc_status = status;
 			else
 				wqe->wc_status = IB_WC_GENERAL_ERR;

 			wqe->wr_status = SR_WR_DONE;
 			/*
 			 * All errors turn the wqe into signalled.
 			 */
 			wr_flags(wqe) |= IB_SEND_SIGNALED;
 			siw_rreq_complete(wqe, status);
 		}
 		break;

 	case RDMAP_RDMA_WRITE:
 		/*
 		 * Free References from memory object if
 		 * attached to receive context (inbound WRITE)
 		 * While a zero-length WRITE is allowed, the
 		 * current implementation does not create
 		 * a memory reference (it is unclear if memory
 		 * rights should be checked in that case!).
 		 *
 		 * TODO: check zero length WRITE semantics
 		 */
 		if (rx_mem(qp))
 			siw_mem_put(rx_mem(qp));
 		break;

 	default:
 		break;
 	}
 	rctx->umem_chunk = NULL; /* DEBUG aid, tentatively */
 	rx_wqe(qp) = NULL;	/* also clears MEM object for WRITE */
 }

 /*
  * siw_tcp_rx_data()
  *
  * Main routine to consume inbound TCP payload
  *
  * @rd_desc:	read descriptor
  * @skb:	socket buffer
  * @off:	offset in skb
  * @len:	skb->len - offset : payload in skb
  */
 int siw_tcp_rx_data(read_descriptor_t *rd_desc, struct sk_buff *skb,
 		    unsigned int off, size_t len)
 {
 	struct siw_qp		*qp = rd_desc->arg.data;
 	struct siw_iwarp_rx	*rctx = &qp->rx_ctx;
 	int			rv;

 	rctx->skb = skb;
 	rctx->skb_new = skb->len - off;
 	rctx->skb_offset = off;
 	rctx->skb_copied = 0;

 	dprint(DBG_RX, "(QP%d): new data %d, rx-state %d\n", QP_ID(qp),
 		rctx->skb_new, rctx->state);

 	if (unlikely(rctx->rx_suspend == 1 ||
 		     qp->attrs.state != SIW_QP_STATE_RTS)) {
 		dprint(DBG_RX|DBG_ON, "(QP%d): failed. state rx:%d, qp:%d\n",
 			QP_ID(qp), qp->rx_ctx.state, qp->attrs.state);
 		return 0;
 	}
 	while (rctx->skb_new) {

 		switch (rctx->state) {

 		case SIW_GET_HDR:
 			rv = siw_get_hdr(rctx);
 			if (!rv) {
 				if (rctx->crc_enabled &&
 				    siw_crc_rxhdr(rctx) != 0) {
 					rv = -EINVAL;
 					break;
 				}
 				rctx->hdr.ctrl.mpa_len =
 					ntohs(rctx->hdr.ctrl.mpa_len);

 				rctx->fpdu_part_rem =
 					siw_fpdu_payload_len(rctx);

 				if (rctx->fpdu_part_rem)
 					rctx->pad = -rctx->fpdu_part_rem & 0x3;
 				else
 					rctx->pad = 0;

 				rctx->state = SIW_GET_DATA_START;
 				rctx->fpdu_part_rcvd = 0;
 			}
 			break;

 		case SIW_GET_DATA_MORE:
 			/*
 			 * Another data fragment of the same DDP segment.
 			 * Headers will not be checked again by the
 			 * opcode-specific data receive function below.
 			 * Setting first_ddp_seg = 0 avoids repeating
 			 * initializations that may occur only once per
 			 * DDP segment.
 			 */
 			rctx->first_ddp_seg = 0;

 		case SIW_GET_DATA_START:
 			/*
 			 * Headers will be checked by the opcode-specific
 			 * data receive function below.
 			 */
 			rv = siw_rx_data(qp, rctx);
 			if (!rv) {
 				rctx->fpdu_part_rem =
 					siw_fpdu_trailer_len(rctx);
 				rctx->fpdu_part_rcvd = 0;
 				rctx->state = SIW_GET_TRAILER;
 			} else
 				rctx->state = SIW_GET_DATA_MORE;

 			break;

 		case SIW_GET_TRAILER:
 			/*
 			 * read CRC + any padding
 			 */
 			rv = siw_get_trailer(qp, rctx);
 			if (!rv) {
 				/*
 				 * FPDU completed.
 				 * complete RDMAP message if last fragment
 				 */
 				rctx->state = SIW_GET_HDR;
 				rctx->fpdu_part_rcvd = 0;

 				if (!rctx->hdr.ctrl.l)
 					/* more frags */
 					break;

 				rv = siw_rdmap_complete(qp, rctx);
 				if (rv)
 					break;
 			}
 			break;

 		default:
 			WARN_ON(1);
 			rv = -EAGAIN;
 		}

 		if (unlikely(rv != 0 && rv != -EAGAIN)) {
 			/*
 			 * TODO: implement graceful error handling including
 			 *       generation (and processing) of TERMINATE
 			 *       messages.
 			 *
 			 *	 for now we are left with a bogus rx status
 			 *	 unable to receive any further byte.
 			 *	 BUT: code must handle difference between
 			 *
 			 * 	 o protocol syntax (FATAL, framing lost)
 			 *	 o crc	(FATAL, framing lost since we do not
 			 *	        trust packet header (??))
 			 *	 o local resource (maybe non fatal, framing
 			 *	   not lost)
 			 *
 			 *	 errors.
 			 */
 			siw_rdmap_error(qp, rctx, rv);

 			dprint(DBG_RX|DBG_ON,
 				"(QP%d): RX ERROR %d at RX state %d\n",
 				QP_ID(qp), rv, rctx->state);

 			siw_dprint_rctx(rctx);
 			/*
 			 * Calling siw_cm_queue_work() is safe without
 			 * releasing qp->state_lock because the QP state
 			 * will be transitioned to SIW_QP_STATE_ERROR
 			 * by the siw_work_handler() workqueue handler
 			 * after we return from siw_qp_llp_data_ready().
 			 */
 			siw_qp_cm_drop(qp, 1);

 			break;
 		}
 		if (rv) {
 			dprint(DBG_RX, "(QP%d): "
 				"Misaligned FPDU: State: %d, missing: %d\n",
 				QP_ID(qp), rctx->state, rctx->fpdu_part_rem);
 			break;
 		}
 	}
 	return rctx->skb_copied;
 }