Copyright (c) 2006 The Regents of the University of Michigan
All rights reserved.

Permission is granted to use, copy, create derivative works and
redistribute this software and such derivative works for any purpose,
so long as the name of the University of Michigan is not used in
any advertising or publicity pertaining to the use or distribution
of this software without specific, written prior authorization.  If
the above copyright notice or any other identification of the
university of michigan is included in any copy of any portion of
this software, then the disclaimer below must also be included.

This software is provided as is, without representation from the
University of Michigan as to its fitness for any purpose, and without
warranty by the university of michigan of any kind, either express
or implied, including without limitation the implied warranties of
merchantability and fitness for a particular purpose.  The Regents
of the University of Michigan shall not be liable for any damages,
including special, indirect, incidental, or consequential damages,
with respect to any claim arising out or in connection with the use
of the software, even if it has been or is hereafter advised of the
possibility of such damages.

Signed-off-by: George Dunlap <dunlapg@umich.edu>

diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/fs/Kconfig linux-2.6.14.3-RPCRDMA/fs/Kconfig
--- linux-2.6.14.3/fs/Kconfig	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/fs/Kconfig	2005-12-08 11:50:23.000000000 -0500
@@ -1447,6 +1447,15 @@ config NFSD_TCP
 	  TCP connections usually perform better than the default UDP when
 	  the network is lossy or congested.  If unsure, say Y.
 
+config NFSD_RDMA
+	tristate "Provide NFS server over RDMA support"
+	select SUNRPC_RDMA
+	default n
+	help
+	  If you want your NFS server to support RDMA connections, say Y here.
+	  Automatically selects SUNRPC_RDMA as well.
+	  If unsure, say N.
+
 config ROOT_NFS
 	bool "Root file system on NFS"
 	depends on NFS_FS=y && IP_PNP
@@ -1471,6 +1480,9 @@ config LOCKD_V4
 
 config EXPORTFS
 	tristate
+	
+config SUNRPC_RDMA
+	tristate
 
 config NFS_ACL_SUPPORT
 	tristate
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/fs/nfs/callback.c linux-2.6.14.3-RPCRDMA/fs/nfs/callback.c
--- linux-2.6.14.3/fs/nfs/callback.c	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/fs/nfs/callback.c	2005-12-08 11:50:23.000000000 -0500
@@ -81,7 +81,7 @@ static void nfs_callback_svc(struct svc_
 int nfs_callback_up(void)
 {
 	struct svc_serv *serv;
-	struct svc_sock *svsk;
+	struct svc_xprt *svsk;
 	int ret = 0;
 
 	lock_kernel();
@@ -98,9 +98,9 @@ int nfs_callback_up(void)
 	ret = svc_makesock(serv, IPPROTO_TCP, 0);
 	if (ret < 0)
 		goto out_destroy;
-	if (!list_empty(&serv->sv_permsocks)) {
-		svsk = list_entry(serv->sv_permsocks.next,
-				struct svc_sock, sk_list);
+	if (!list_empty(&serv->sv_permxprts)) {
+		svsk = list_entry(serv->sv_permxprts.next,
+				struct svc_xprt, sk_list);
 		nfs_callback_tcpport = ntohs(inet_sk(svsk->sk_sk)->sport);
 		dprintk ("Callback port = 0x%x\n", nfs_callback_tcpport);
 	} else
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/fs/nfsd/nfs3proc.c linux-2.6.14.3-RPCRDMA/fs/nfsd/nfs3proc.c
--- linux-2.6.14.3/fs/nfsd/nfs3proc.c	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/fs/nfsd/nfs3proc.c	2005-12-08 11:50:23.000000000 -0500
@@ -547,6 +547,8 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqst
 
 	nfserr = fh_verify(rqstp, &argp->fh, 0, MAY_NOP);
 
+	dprintk("%s: post fh_verify nfserr = %d\n", __FUNCTION__, nfserr);
+
 	/* Check special features of the file system. May request
 	 * different read/write sizes for file systems known to have
 	 * problems with large blocks */
@@ -561,6 +563,7 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqst
 	}
 
 	fh_put(&argp->fh);
+	dprintk("%s: nfserr = %d\n", __FUNCTION__, nfserr);
 	RETURN_STATUS(nfserr);
 }
 
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/fs/nfsd/nfs3xdr.c linux-2.6.14.3-RPCRDMA/fs/nfsd/nfs3xdr.c
--- linux-2.6.14.3/fs/nfsd/nfs3xdr.c	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/fs/nfsd/nfs3xdr.c	2005-12-08 11:50:23.000000000 -0500
@@ -1032,8 +1032,12 @@ int
 nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, u32 *p,
 					struct nfsd3_fsinfores *resp)
 {
+	dprintk("%s: writing response starting at %p\n", __FUNCTION__, p);
+
 	*p++ = xdr_zero;	/* no post_op_attr */
 
+	dprintk("%s: resp->status = %d\n", __FUNCTION__, resp->status);
+
 	if (resp->status == 0) {
 		*p++ = htonl(resp->f_rtmax);
 		*p++ = htonl(resp->f_rtpref);
@@ -1048,6 +1052,8 @@ nfs3svc_encode_fsinfores(struct svc_rqst
 		*p++ = htonl(resp->f_properties);
 	}
 
+	dprintk("%s: response ending at %p\n", __FUNCTION__, p);
+
 	return xdr_ressize_check(rqstp, p);
 }
 
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/fs/nfsd/nfsfh.c linux-2.6.14.3-RPCRDMA/fs/nfsd/nfsfh.c
--- linux-2.6.14.3/fs/nfsd/nfsfh.c	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/fs/nfsd/nfsfh.c	2005-12-08 11:50:23.000000000 -0500
@@ -130,6 +130,7 @@ fh_verify(struct svc_rqst *rqstp, struct
 		int data_left = fh->fh_size/4;
 
 		error = nfserr_stale;
+		dprintk("%s: rqstp->rq_client = %p\n", __FUNCTION__, NULL);
 		if (rqstp->rq_client == NULL)
 			goto out;
 		if (rqstp->rq_vers > 2)
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/fs/nfsd/nfssvc.c linux-2.6.14.3-RPCRDMA/fs/nfsd/nfssvc.c
--- linux-2.6.14.3/fs/nfsd/nfssvc.c	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/fs/nfsd/nfssvc.c	2005-12-08 11:50:23.000000000 -0500
@@ -104,15 +104,28 @@ nfsd_svc(unsigned short port, int nrserv
 		nfsd_serv = svc_create(&nfsd_program, NFSD_BUFSIZE);
 		if (nfsd_serv == NULL)
 			goto out;
+		dprintk("nfsd: calling svc_makesock for UDP\n");
 		error = svc_makesock(nfsd_serv, IPPROTO_UDP, port);
+		dprintk("%s: error = %d\n", __FUNCTION__, error);
 		if (error < 0)
 			goto failure;
 
 #ifdef CONFIG_NFSD_TCP
+		dprintk("nfsd: calling svc_makesock for TCP\n");
 		error = svc_makesock(nfsd_serv, IPPROTO_TCP, port);
+		dprintk("%s: error = %d\n", __FUNCTION__, error);
 		if (error < 0)
 			goto failure;
 #endif
+/* XXX There should be an condition on CONFIG_NFSD_RDMA here */
+		/* XXX we need a new protocol constant for RDMA */
+		dprintk("nfsd: calling svc_makesock for RDMA\n");
+		error = svc_makesock(nfsd_serv, IPPROTO_MAX + 1, port);
+/* 		error = svc_makexprt(nfsd_serv, IPPROTO_MAX + 1, port); */
+		dprintk("%s: error = %d\n", __FUNCTION__, error);
+		if (error < 0)
+			goto failure;
+
 		do_gettimeofday(&nfssvc_boot);		/* record boot time */
 	} else
 		nfsd_serv->sv_nrthreads++;
@@ -349,6 +362,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, u3
 	 */
 	if (!(nfserr && rqstp->rq_vers == 2)) {
 		xdr = proc->pc_encode;
+		
 		if (xdr && !xdr(rqstp, nfserrp,
 				rqstp->rq_resp)) {
 			/* Failed to encode result. Release cache entry */
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/include/linux/sunrpc/svc.h linux-2.6.14.3-RPCRDMA/include/linux/sunrpc/svc.h
--- linux-2.6.14.3/include/linux/sunrpc/svc.h	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/include/linux/sunrpc/svc.h	2005-12-08 11:50:23.000000000 -0500
@@ -26,6 +26,9 @@
  * a list of idle threads waiting for input.
  *
  * We currently do not support more than one RPC program per daemon.
+ *
+ * Deprecated fields to remove
+ * - sv_sockets
  */
 struct svc_serv {
 	struct list_head	sv_threads;	/* idle server threads */
@@ -37,8 +40,8 @@ struct svc_serv {
 	unsigned int		sv_bufsz;	/* datagram buffer size */
 	unsigned int		sv_xdrsize;	/* XDR buffer size */
 
-	struct list_head	sv_permsocks;	/* all permanent sockets */
-	struct list_head	sv_tempsocks;	/* all temporary sockets */
+	struct list_head	sv_permxprts;	/* all permanent transports */
+	struct list_head	sv_tempxprts;	/* all temporary transports */
 	int			sv_tmpcnt;	/* count of temporary sockets */
 
 	char *			sv_name;	/* service name */
@@ -110,7 +113,7 @@ static inline void svc_putu32(struct kve
  */
 struct svc_rqst {
 	struct list_head	rq_list;	/* idle list */
-	struct svc_sock *	rq_sock;	/* socket */
+	struct svc_xprt *	rq_sock;	/* socket */
 	struct sockaddr_in	rq_addr;	/* peer address */
 	int			rq_addrlen;
 
@@ -244,7 +247,7 @@ static inline void svc_free_allpages(str
 struct svc_deferred_req {
 	u32			prot;	/* protocol (UDP or TCP) */
 	struct sockaddr_in	addr;
-	struct svc_sock		*svsk;	/* where reply must go */
+	struct svc_xprt		*svsk;	/* where reply must go */
 	struct cache_deferred_req handle;
 	int			argslen;
 	u32			args[0];
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/include/linux/sunrpc/svcsock.h linux-2.6.14.3-RPCRDMA/include/linux/sunrpc/svcsock.h
--- linux-2.6.14.3/include/linux/sunrpc/svcsock.h	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/include/linux/sunrpc/svcsock.h	2005-12-08 11:50:23.000000000 -0500
@@ -1,7 +1,7 @@
 /*
  * linux/include/linux/sunrpc/svcsock.h
  *
- * RPC server socket I/O.
+ * RPC server transport-independent I/O.
  *
  * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
  */
@@ -12,15 +12,18 @@
 #include <linux/sunrpc/svc.h>
 
 /*
- * RPC server socket.
+ * RPC transport instance
  */
-struct svc_sock {
+struct svc_xprt {
 	struct list_head	sk_ready;	/* list of ready sockets */
-	struct list_head	sk_list;	/* list of all sockets */
+	struct list_head	sk_list;	/* list of all transports */
+
+	void *			sx_data;	/* transport-specific data */
+
 	struct socket *		sk_sock;	/* berkeley socket layer */
 	struct sock *		sk_sk;		/* INET layer */
 
-	struct svc_serv *	sk_server;	/* service for this socket */
+	struct svc_serv *	sx_server;	/* service for this socket */
 	unsigned int		sk_inuse;	/* use count */
 	unsigned long		sk_flags;
 #define	SK_BUSY		0			/* enqueued/receiving */
@@ -38,14 +41,22 @@ struct svc_sock {
 						 * be revisted */
 	struct semaphore        sk_sem;		/* to serialize sending data */
 
-	int			(*sk_recvfrom)(struct svc_rqst *rqstp);
-	int			(*sk_sendto)(struct svc_rqst *rqstp);
+	int			(*sx_recvfrom)(struct svc_rqst *rqstp);
+	int			(*sx_sendto)(struct svc_rqst *rqstp);
 
+	void                    (*sx_delete)(struct svc_xprt *xprt);
+	void			(*sx_put)(struct svc_xprt *xprt);
+	/* return 0 if insufficient space to write reply */
+	int			(*sx_has_wspace)(struct svc_xprt *xprt);
+	
 	/* We keep the old state_change and data_ready CB's here */
 	void			(*sk_ostate)(struct sock *);
 	void			(*sk_odata)(struct sock *, int bytes);
 	void			(*sk_owspace)(struct sock *);
 
+	/* if non-zero, the transport manages XDR buffers */
+	int			sx_manages_buffers;
+
 	/* private TCP part */
 	int			sk_reclen;	/* length of record */
 	int			sk_tcplen;	/* current read length */
@@ -56,10 +67,12 @@ struct svc_sock {
  * Function prototypes.
  */
 int		svc_makesock(struct svc_serv *, int, unsigned short);
-void		svc_delete_socket(struct svc_sock *);
+int		svc_makexprt(struct svc_serv *, int, unsigned short);
+void		svc_delete_socket(struct svc_xprt *);
 int		svc_recv(struct svc_serv *, struct svc_rqst *, long);
 int		svc_send(struct svc_rqst *);
 void		svc_drop(struct svc_rqst *);
 void		svc_sock_update_bufs(struct svc_serv *serv);
+void		svc_sock_enqueue(struct svc_xprt *svsk);
 
 #endif /* SUNRPC_SVCSOCK_H */
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/include/linux/sunrpc/svcxprt_rdma.h linux-2.6.14.3-RPCRDMA/include/linux/sunrpc/svcxprt_rdma.h
--- linux-2.6.14.3/include/linux/sunrpc/svcxprt_rdma.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/include/linux/sunrpc/svcxprt_rdma.h	2005-12-08 11:50:23.000000000 -0500
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2006 The Regents of the University of Michigan
+ * All rights reserved.
+ *
+ * Permission is granted to use, copy, create derivative works and
+ * redistribute this software and such derivative works for any purpose,
+ * so long as the name of the University of Michigan is not used in
+ * any advertising or publicity pertaining to the use or distribution
+ * of this software without specific, written prior authorization.  If
+ * the above copyright notice or any other identification of the
+ * university of michigan is included in any copy of any portion of
+ * this software, then the disclaimer below must also be included.
+ *
+ * This software is provided as is, without representation from the
+ * University of Michigan as to its fitness for any purpose, and without
+ * warranty by the university of michigan of any kind, either express
+ * or implied, including without limitation the implied warranties of
+ * merchantability and fitness for a particular purpose.  The Regents
+ * of the University of Michigan shall not be liable for any damages,
+ * including special, indirect, incidental, or consequential damages,
+ * with respect to any claim arising out or in connection with the use
+ * of the software, even if it has been or is hereafter advised of the
+ * possibility of such damages.
+ */
+
+#ifndef SVCXPRT_RDMA_H
+#define SVCXPRT_RDMA_H
+
+int svc_create_rdma_xprt(struct svc_serv *serv, int protocol, unsigned short port);
+
+#endif /* SVCXPRT_RDMA_H */
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/include/linux/sunrpc/svcxprt_rdma_kdapl.h linux-2.6.14.3-RPCRDMA/include/linux/sunrpc/svcxprt_rdma_kdapl.h
--- linux-2.6.14.3/include/linux/sunrpc/svcxprt_rdma_kdapl.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/include/linux/sunrpc/svcxprt_rdma_kdapl.h	2005-12-08 11:50:23.000000000 -0500
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2006 The Regents of the University of Michigan
+ * All rights reserved.
+ *
+ * Permission is granted to use, copy, create derivative works and
+ * redistribute this software and such derivative works for any purpose,
+ * so long as the name of the University of Michigan is not used in
+ * any advertising or publicity pertaining to the use or distribution
+ * of this software without specific, written prior authorization.  If
+ * the above copyright notice or any other identification of the
+ * university of michigan is included in any copy of any portion of
+ * this software, then the disclaimer below must also be included.
+ *
+ * This software is provided as is, without representation from the
+ * University of Michigan as to its fitness for any purpose, and without
+ * warranty by the university of michigan of any kind, either express
+ * or implied, including without limitation the implied warranties of
+ * merchantability and fitness for a particular purpose.  The Regents
+ * of the University of Michigan shall not be liable for any damages,
+ * including special, indirect, incidental, or consequential damages,
+ * with respect to any claim arising out or in connection with the use
+ * of the software, even if it has been or is hereafter advised of the
+ * possibility of such damages.
+ *
+ * Defines types and functions used for the implementation of an RDMA RPC
+ * transport.
+ *
+ * This header file depends on the inclusion of the following others:
+ * - dat/kdat.h: KDAT API types
+ * - linux/sunrpc/svcsock.h: struct svc_xprt
+ */
+
+#ifndef SVCXPRT_RDMA_KDAPL_H
+#define SVCXPRT_RDMA_KDAPL_H
+
+#define RDMA_MAX_PAGES 1 /* Max # pages posted for RDMA Recv */
+#define RDMA_CREDIT_PAGES 4 /* # pages for each credit buffer */
+
+enum rdma_proc {
+	RDMA_MSG = 0,
+	RDMA_NOMSG = 1,
+	RDMA_MSGP = 2,
+	RDMA_DONE = 3,
+	RDMA_ERROR = 4
+};
+
+/* 
+ * Public Service Point & associated structures
+ */
+struct rdma_psp {
+	DAT_PSP_HANDLE rp_psp_handle;
+	DAT_EVD_HANDLE rp_cr_evd;
+};
+
+/*
+ * Interface Adapter & associated structures
+ */
+struct rdma_ia {
+	struct list_head	ri_endpoints;
+	spinlock_t		ri_lock;
+	unsigned int		ri_initialized;
+	DAT_IA_HANDLE		ri_ia_handle;
+	DAT_EVD_HANDLE		ri_async_evd;    
+	DAT_IA_ATTR		ri_ia_attr;		
+	DAT_PROVIDER_ATTR       ri_pv_attr;		
+	struct rdma_psp		ri_psp;
+	DAT_PZ_HANDLE		ri_pz_handle;
+	DAT_LMR_HANDLE		ri_lmr_handle;
+	DAT_LMR_TRIPLET		ri_lmr_iov;
+#if 0 /* JFL: fixes compiler error */
+	DAT_CR_HANDLE *		ri_conn_req;		/* XXX should be list... */
+#else
+	DAT_CR_HANDLE		ri_conn_req;
+#endif
+/*	
+	int		ri_memreg_strategy;
+#if RPCRDMA_DEBUG
+	DAT_RMR_CONTEXT	ri_bind_rmr;
+#endif
+*/	
+};
+
+/*
+ * RDMA Credit
+ * Represents the storage corresponding to an RDMA credit
+ */
+struct rdma_credit {
+	struct page *		rb_pages[RDMA_CREDIT_PAGES];
+};
+
+/*
+ * RDMA endpoint
+ */
+struct rdma_ep {
+	struct list_head	re_list;	/* list of all endpoints */
+	struct rdma_credit 	re_credits[1];	/* XXX will be list */
+	int			re_connected;
+	int			re_closing;
+        wait_queue_head_t	re_connect_wait;
+	DAT_EP_HANDLE		re_handle;
+	DAT_EVD_HANDLE		re_conn_evd;
+	DAT_EVD_HANDLE		re_in_dto_evd;
+	DAT_EVD_HANDLE		re_out_dto_evd;
+	struct rdma_ia *	re_ia;
+#if 0 /* JFL: store the address data rather than a pointer to the provider's */
+      /*      address data, which the provider might delete on us.           */
+	struct sockaddr_in *	re_raddr;	/* the remote IA address */
+#else
+	struct sockaddr_in      re_raddr;
+#endif
+	struct svc_xprt *	re_xprt;
+	DAT_LMR_TRIPLET		re_resp[3];	/* head, page data & tail */
+	DAT_VLEN		re_recvlen;
+	struct page *		re_pages[RDMA_MAX_PAGES + 1];	/* 1 page for reply */
+};
+
+int rdma_init_ia(struct rdma_ia *ia, const DAT_NAME_PTR ia_name, DAT_COUNT evd_min_qlen);
+int rdma_create_pub_svc_point(struct svc_xprt *xprt, DAT_CONN_QUAL connection_qualifier);	 
+void rdma_close_ia(struct rdma_ia *ia);
+/* void rdma_close_ep(struct rdma_ep *ep); */
+int svc_rdma_ia_recvfrom(struct svc_rqst *rqstp);
+int svc_rdma_ep_recvfrom(struct svc_rqst *rqstp);
+
+#endif /* SVCXPRT_RDMA_KDAPL_H */
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/include/rdma/ib_addr.h linux-2.6.14.3-RPCRDMA/include/rdma/ib_addr.h
--- linux-2.6.14.3/include/rdma/ib_addr.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/include/rdma/ib_addr.h	2005-12-08 11:16:53.000000000 -0500
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ */
+
+#if !defined(IB_ADDR_H)
+#define IB_ADDR_H
+
+#include <linux/socket.h>
+#include <rdma/ib_verbs.h>
+
+struct ib_addr {
+	union ib_gid	sgid;
+	union ib_gid	dgid;
+	u16		pkey;
+};
+
+/**
+ * ib_translate_addr - Translate a local IP address to an Infiniband GID and
+ *   PKey.
+ */
+int ib_translate_addr(struct sockaddr *addr, union ib_gid *gid, u16 *pkey);
+
+/**
+ * ib_resolve_addr - Resolve source and destination IP addresses to
+ *   Infiniband network addresses.
+ * @src_addr: An optional source address to use in the resolution.  If a
+ *   source address is not provided, a usable address will be returned via
+ *   the callback.
+ * @dst_addr: The destination address to resolve.
+ * @addr: A reference to a data location that will receive the resolved
+ *   addresses.  The data location must remain valid until the callback has
+ *   been invoked.
+ * @timeout_ms: Amount of time to wait for the address resolution to complete.
+ * @callback: Call invoked once address resolution has completed, timed out,
+ *   or been canceled.  A status of 0 indicates success.
+ * @context: User-specified context associated with the call.
+ */
+int ib_resolve_addr(struct sockaddr *src_addr, struct sockaddr *dst_addr,
+		    struct ib_addr *addr, int timeout_ms,
+		    void (*callback)(int status, struct sockaddr *src_addr,
+				     struct ib_addr *addr, void *context),
+		    void *context);
+
+void ib_addr_cancel(struct ib_addr *addr);
+
+#endif /* IB_ADDR_H */
+
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/include/rdma/ib_at.h linux-2.6.14.3-RPCRDMA/include/rdma/ib_at.h
--- linux-2.6.14.3/include/rdma/ib_at.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/include/rdma/ib_at.h	2005-12-08 11:16:53.000000000 -0500
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2004,2005 Voltaire Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *
+ * $Id: ib_at.h 3202 2005-08-26 17:11:34Z roland $
+ */
+
+#if !defined( IB_AT_H )
+#define IB_AT_H
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_sa.h>
+
+enum ib_at_multipathing_type {
+	IB_AT_PATH_SAME_PORT	= 0,
+	IB_AT_PATH_SAME_HCA	= 1,	/* but different ports if applicable */
+	IB_AT_PATH_SAME_SYSTEM	= 2,	/* but different ports if applicable */
+	IB_AT_PATH_INDEPENDENT_HCA = 3,
+	IB_AT_PATH_SRC_ROUTE	= 4,	/* application controlled multipathing */
+};
+
+enum ib_at_route_flags {
+	IB_AT_ROUTE_USE_DEFAULTS	= 0,
+	IB_AT_ROUTE_FORCE_ATS		= 1,
+	IB_AT_ROUTE_FORCE_ARP		= 2,
+	IB_AT_ROUTE_FORCE_RESOLVE	= 4,
+};
+
+struct ib_at_path_attr {
+	u16 qos_tag;
+	__be16 pkey;
+	u8 multi_path_type;
+};
+
+struct ib_at_ib_route {
+	union ib_gid sgid;
+	union ib_gid dgid;
+	struct ib_device *out_dev;
+	int out_port;
+	struct ib_at_path_attr attr;
+};
+
+enum ib_at_op_status {
+	IB_AT_STATUS_INVALID	= 0,
+	IB_AT_STATUS_PENDING	= 1,
+	IB_AT_STATUS_COMPLETED	= 2,
+	IB_AT_STATUS_ERROR	= 3,
+	IB_AT_STATUS_CANCELED	= 4,
+};
+
+/*
+ * ib_at_completion structure - callback function parameters structure
+ * @completion: completion call back function
+ * @context: user defined context pointer
+ * @req_id: asynchronous request ID - optional, out
+ *
+ * The asynchronous resolution function behavior is as follows:
+ *	If the resolve operation can be fulfilled immediately, then the output
+ *	structures are set and the number of filled structures is returned.
+ *
+ *	If the resolve operation cannot by fulfilled immediately and 
+ *	an ib_at_completion structure is not provided,
+ *	then the function immediately returns -EWOULDBLOCK.
+ *
+ * 	If ib_at_completion structure is provided and an asynchronous 
+ *	operation is started, the function immediately returns zero,
+ *	and the request ID field (req_id) is set if the pointer is
+ *	non NULL. This request ID may be used to cancel the operation,
+ *	or to poll its status.
+ *
+ *	When an asynchronous operation completes (successfully or not), 
+ *	the callback function is called, passing the request ID, 
+ *	the supplied user context and the number of output structures.
+ *	If the asynchronous operation did not complete, a negative 
+ *	error code is return as the 'rec_num'.
+ *	Valid error codes are:
+ *		-EINTR: operation is canceled
+ *		-EIO:	request send failed
+ *		-ETIMEOUT: operation timed out
+ *
+ *	Returned value of zero records means that the resolution process
+ *	completed, but the given address could not be resolved at this time.
+ */
+struct ib_at_completion {
+	void (*fn)(u64 req_id, void *context, int rec_num);
+	void *context;
+	u64 req_id;
+};
+
+/**
+ * ib_at_route_by_ip - asynchronously resolve ip address to ib route
+ * @dst_ip: destination ip
+ * @src_ip: source ip - optional
+ * @tos: ip type of service
+ * @flags: ib_at_route_flags
+ * @ib_route: out structure
+ * @async_comp: asynchronous callback structure - optional
+ *
+ * Resolve the specified dst_ip to a &struct ib_route structure.
+ * src_ip can be provided to force specific output interface.
+ * flags can be used to select resolving method; currently IB-ARP or ATS.
+ *
+ * See ib_at_completion structure documentation for asynchronous
+ * operation details.
+ */
+int ib_at_route_by_ip(u32 dst_ip, u32 src_ip, int tos, u16 flags,
+		     struct ib_at_ib_route *ib_route,
+		     struct ib_at_completion *async_comp);
+
+/**
+ * ib_at_paths_by_route - asynchronously resolve ib route to ib path records
+ * @ib_route: ib route to resolve
+ * @mpath_type: ib_at_multipathing_type
+ * @path_arr: SA path record array - out
+ * @npath: maximal number of paths to return
+ * @async_comp: asynchronous callback structure - optional
+ *
+ * Resolve the specified ib_route to a SA path record array.
+ * Number of returned paths will not exceed npath.
+ * Multipathing type may be used to obtain redundant paths for APM,
+ * other failover schemes, bandwidth aggregation or source based routing.
+ * Note that multipathing request is meaningless unless npath is greater than 1.
+ *
+ * Returned ib_route structure includes the recommended pkey and qos_tag for
+ * this route.
+ *
+ * See ib_at_completion structure documentation for asynchronous operation
+ * details.
+ */
+int ib_at_paths_by_route(struct ib_at_ib_route *ib_route, u32 mpath_type,
+			struct ib_sa_path_rec *path_arr, int npath,
+			struct ib_at_completion *async_comp);
+
+/**
+ * ib_at_ips_by_gid - asynchronously resolve GID to IP addresses
+ * @gid: GID to resolve
+ * @dst_ips: array of IPs, out
+ * @nips: number of IP entries in dst_ips array
+ * @async_comp: asynchronous callback structure - optional
+ *
+ * Resolve the gid to IP addresses, but not more than nips.
+ * This function rely on the IB-ATS mechanism.
+ *
+ * See ib_at_completion structure documentation for asynchronous
+ * operation details.
+ */
+int ib_at_ips_by_gid(union ib_gid *gid, u32 *dst_ips, int nips,
+		    struct ib_at_completion *async_comp);
+
+/**
+ * ib_at_ips_by_subnet - return local IP addresses by IP subnet
+ * @network: network to resolve - optional
+ * @netmask: subnet net mask - optional
+ * @dst_ips: array of IPs, out
+ * @nips: number of IP entries in dst_ips array
+ *
+ * Return local IP addresses matching the network and netmask,
+ * but not more than nips.
+ * 
+ * Note that network and netmask as 0x0 or 0xffffffff returns all local IPs.
+ */
+int ib_at_ips_by_subnet(u32 network, u32 netmask, u32 *dst_ips, int nips);
+
+/**
+ * ib_at_invalidate_paths - invalidate possibly cached paths keyed by ib_route
+ * @ib_route: paths key - optional
+ *
+ * Returns number of invalidated paths.
+ * If ib_route is NULL, then the entire cache will be flushed.
+ */
+int ib_at_invalidate_paths(struct ib_at_ib_route *ib_route);
+
+/**
+ * ib_at_cancel - cancel possible active asynchronous operation
+ * @req_id: asynchronous request ID
+ *
+ * Return 0 if canceled, -1 if cancel failed (e.g. bad ID)
+ */
+int ib_at_cancel(u64 req_id);
+
+/**
+ * ib_at_status - poll asynchronous operation's status
+ * @req_id: asynchronous request ID ib_at_op_status
+ *
+ * Return non-negative ib_at_op_status value, 
+ * or -EINVAL if the request ID is invalid.
+ */
+int ib_at_status(u64 req_id);
+
+#endif /* IB_AT_H */
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/include/rdma/ib_cache.h linux-2.6.14.3-RPCRDMA/include/rdma/ib_cache.h
--- linux-2.6.14.3/include/rdma/ib_cache.h	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/include/rdma/ib_cache.h	2005-12-08 11:16:53.000000000 -0500
@@ -31,7 +31,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: ib_cache.h 1349 2004-12-16 21:09:43Z roland $
+ * $Id: ib_cache.h 3202 2005-08-26 17:11:34Z roland $
  */
 
 #ifndef _IB_CACHE_H
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/include/rdma/ib_cm.h linux-2.6.14.3-RPCRDMA/include/rdma/ib_cm.h
--- linux-2.6.14.3/include/rdma/ib_cm.h	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/include/rdma/ib_cm.h	2005-12-08 11:16:53.000000000 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
  * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
@@ -32,7 +32,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: ib_cm.h 2730 2005-06-28 16:43:03Z sean.hefty $
+ * $Id: ib_cm.h 4311 2005-12-05 18:42:01Z sean.hefty $
  */
 #if !defined(IB_CM_H)
 #define IB_CM_H
@@ -102,14 +102,14 @@ enum ib_cm_data_size {
 	IB_CM_APR_INFO_LENGTH		 = 72,
 	IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE = 216,
 	IB_CM_SIDR_REP_PRIVATE_DATA_SIZE = 136,
-	IB_CM_SIDR_REP_INFO_LENGTH	 = 72
+	IB_CM_SIDR_REP_INFO_LENGTH	 = 72,
+	IB_CM_PRIVATE_DATA_COMPARE_SIZE	 = 64
 };
 
 struct ib_cm_id;
 
 struct ib_cm_req_event_param {
 	struct ib_cm_id		*listen_id;
-	struct ib_device	*device;
 	u8			port;
 
 	struct ib_sa_path_rec	*primary_path;
@@ -220,7 +220,6 @@ struct ib_cm_apr_event_param {
 
 struct ib_cm_sidr_req_event_param {
 	struct ib_cm_id		*listen_id;
-	struct ib_device	*device;
 	u8			port;
 	u16			pkey;
 };
@@ -240,7 +239,6 @@ struct ib_cm_sidr_rep_event_param {
 	u32			qpn;
 	void			*info;
 	u8			info_len;
-
 };
 
 struct ib_cm_event {
@@ -284,6 +282,7 @@ typedef int (*ib_cm_handler)(struct ib_c
 struct ib_cm_id {
 	ib_cm_handler		cm_handler;
 	void			*context;
+	struct ib_device	*device;
 	__be64			service_id;
 	__be64			service_mask;
 	enum ib_cm_state	state;		/* internal CM/debug use */
@@ -295,6 +294,8 @@ struct ib_cm_id {
 
 /**
  * ib_create_cm_id - Allocate a communication identifier.
+ * @device: Device associated with the cm_id.  All related communication will
+ * be associated with the specified device.
  * @cm_handler: Callback invoked to notify the user of CM events.
  * @context: User specified context associated with the communication
  *   identifier.
@@ -302,7 +303,8 @@ struct ib_cm_id {
  * Communication identifiers are used to track connection states, service
  * ID resolution requests, and listen requests.
  */
-struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler,
+struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
+				 ib_cm_handler cm_handler,
 				 void *context);
 
 /**
@@ -316,6 +318,11 @@ void ib_destroy_cm_id(struct ib_cm_id *c
 #define IB_SERVICE_ID_AGN_MASK	__constant_cpu_to_be64(0xFF00000000000000ULL)
 #define IB_CM_ASSIGN_SERVICE_ID __constant_cpu_to_be64(0x0200000000000000ULL)
 
+struct ib_cm_private_data_compare {
+	u8  data[IB_CM_PRIVATE_DATA_COMPARE_SIZE];
+	u8  mask[IB_CM_PRIVATE_DATA_COMPARE_SIZE];
+};
+
 /**
  * ib_cm_listen - Initiates listening on the specified service ID for
  *   connection and service ID resolution requests.
@@ -328,10 +335,12 @@ void ib_destroy_cm_id(struct ib_cm_id *c
  *   range of service IDs.  If set to 0, the service ID is matched
  *   exactly.  This parameter is ignored if %service_id is set to
  *   IB_CM_ASSIGN_SERVICE_ID.
+ * @compare_data: This parameter is optional.  It specifies data that must
+ *   appear in the private data of a connection request for the specified
+ *   listen request.
  */
-int ib_cm_listen(struct ib_cm_id *cm_id,
-		 __be64 service_id,
-		 __be64 service_mask);
+int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
+		 struct ib_cm_private_data_compare *compare_data);
 
 struct ib_cm_req_param {
 	struct ib_sa_path_rec	*primary_path;
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/include/rdma/ib_fmr_pool.h linux-2.6.14.3-RPCRDMA/include/rdma/ib_fmr_pool.h
--- linux-2.6.14.3/include/rdma/ib_fmr_pool.h	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/include/rdma/ib_fmr_pool.h	2005-12-08 11:16:53.000000000 -0500
@@ -30,7 +30,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: ib_fmr_pool.h 2730 2005-06-28 16:43:03Z sean.hefty $
+ * $Id: ib_fmr_pool.h 3202 2005-08-26 17:11:34Z roland $
  */
 
 #if !defined(IB_FMR_POOL_H)
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/include/rdma/ib_mad.h linux-2.6.14.3-RPCRDMA/include/rdma/ib_mad.h
--- linux-2.6.14.3/include/rdma/ib_mad.h	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/include/rdma/ib_mad.h	2005-12-08 11:16:53.000000000 -0500
@@ -33,7 +33,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: ib_mad.h 2775 2005-07-02 13:42:12Z halr $
+ * $Id: ib_mad.h 3925 2005-10-31 23:03:17Z roland $
  */
 
 #if !defined( IB_MAD_H )
@@ -109,10 +109,14 @@
 #define IB_QP_SET_QKEY	0x80000000
 
 enum {
+	IB_MGMT_MAD_HDR = 24,
 	IB_MGMT_MAD_DATA = 232,
+	IB_MGMT_RMPP_HDR = 36,
 	IB_MGMT_RMPP_DATA = 220,
+	IB_MGMT_VENDOR_HDR = 40,
 	IB_MGMT_VENDOR_DATA = 216,
-	IB_MGMT_SA_DATA = 200
+	IB_MGMT_SA_HDR = 56,
+	IB_MGMT_SA_DATA = 200,
 };
 
 struct ib_mad_hdr {
@@ -203,26 +207,25 @@ struct ib_class_port_info
 
 /**
  * ib_mad_send_buf - MAD data buffer and work request for sends.
- * @mad: References an allocated MAD data buffer.  The size of the data
- *   buffer is specified in the @send_wr.length field.
- * @mapping: DMA mapping information.
+ * @next: A pointer used to chain together MADs for posting.
+ * @mad: References an allocated MAD data buffer.
  * @mad_agent: MAD agent that allocated the buffer.
+ * @ah: The address handle to use when sending the MAD.
  * @context: User-controlled context fields.
- * @send_wr: An initialized work request structure used when sending the MAD.
- *   The wr_id field of the work request is initialized to reference this
- *   data structure.
- * @sge: A scatter-gather list referenced by the work request.
+ * @timeout_ms: Time to wait for a response.
+ * @retries: Number of times to retry a request for a response.
  *
  * Users are responsible for initializing the MAD buffer itself, with the
  * exception of specifying the payload length field in any RMPP MAD.
  */
 struct ib_mad_send_buf {
-	struct ib_mad		*mad;
-	DECLARE_PCI_UNMAP_ADDR(mapping)
+	struct ib_mad_send_buf	*next;
+	void			*mad;
 	struct ib_mad_agent	*mad_agent;
+	struct ib_ah		*ah;
 	void			*context[2];
-	struct ib_send_wr	send_wr;
-	struct ib_sge		sge;
+	int			timeout_ms;
+	int			retries;
 };
 
 /**
@@ -287,7 +290,7 @@ typedef void (*ib_mad_send_handler)(stru
  * or @mad_send_wc.
  */
 typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
-				     struct ib_send_wr *send_wr,
+				     struct ib_mad_send_buf *send_buf,
 				     struct ib_mad_send_wc *mad_send_wc);
 
 /**
@@ -296,7 +299,7 @@ typedef void (*ib_mad_snoop_handler)(str
  * @mad_recv_wc: Received work completion information on the received MAD.
  *
  * MADs received in response to a send request operation will be handed to
- * the user after the send operation completes.  All data buffers given
+ * the user before the send operation completes.  All data buffers given
  * to registered agents through this routine are owned by the receiving
  * client, except for snooping agents.  Clients snooping MADs should not
  * modify the data referenced by @mad_recv_wc.
@@ -334,13 +337,13 @@ struct ib_mad_agent {
 
 /**
  * ib_mad_send_wc - MAD send completion information.
- * @wr_id: Work request identifier associated with the send MAD request.
+ * @send_buf: Send MAD data buffer associated with the send MAD request.
  * @status: Completion status.
  * @vendor_err: Optional vendor error information returned with a failed
  *   request.
  */
 struct ib_mad_send_wc {
-	u64			wr_id;
+	struct ib_mad_send_buf	*send_buf;
 	enum ib_wc_status	status;
 	u32			vendor_err;
 };
@@ -366,7 +369,7 @@ struct ib_mad_recv_buf {
  * @rmpp_list: Specifies a list of RMPP reassembled received MAD buffers.
  * @mad_len: The length of the received MAD, without duplicated headers.
  *
- * For received response, the wr_id field of the wc is set to the wr_id
+ * For received response, the wr_id contains a pointer to the ib_mad_send_buf
  *   for the corresponding send request.
  */
 struct ib_mad_recv_wc {
@@ -463,9 +466,9 @@ int ib_unregister_mad_agent(struct ib_ma
 /**
  * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
  *   with the registered client.
- * @mad_agent: Specifies the associated registration to post the send to.
- * @send_wr: Specifies the information needed to send the MAD(s).
- * @bad_send_wr: Specifies the MAD on which an error was encountered.
+ * @send_buf: Specifies the information needed to send the MAD(s).
+ * @bad_send_buf: Specifies the MAD on which an error was encountered.  This
+ *   parameter is optional if only a single MAD is posted.
  *
  * Sent MADs are not guaranteed to complete in the order that they were posted.
  *
@@ -479,9 +482,8 @@ int ib_unregister_mad_agent(struct ib_ma
  * defined data being transferred.  The paylen_newwin field should be
  * specified in network-byte order.
  */
-int ib_post_send_mad(struct ib_mad_agent *mad_agent,
-		     struct ib_send_wr *send_wr,
-		     struct ib_send_wr **bad_send_wr);
+int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
+		     struct ib_mad_send_buf **bad_send_buf);
 
 /**
  * ib_coalesce_recv_mad - Coalesces received MAD data into a single buffer.
@@ -507,23 +509,25 @@ void ib_free_recv_mad(struct ib_mad_recv
 /**
  * ib_cancel_mad - Cancels an outstanding send MAD operation.
  * @mad_agent: Specifies the registration associated with sent MAD.
- * @wr_id: Indicates the work request identifier of the MAD to cancel.
+ * @send_buf: Indicates the MAD to cancel.
  *
  * MADs will be returned to the user through the corresponding
  * ib_mad_send_handler.
  */
-void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id);
+void ib_cancel_mad(struct ib_mad_agent *mad_agent,
+		   struct ib_mad_send_buf *send_buf);
 
 /**
  * ib_modify_mad - Modifies an outstanding send MAD operation.
  * @mad_agent: Specifies the registration associated with sent MAD.
- * @wr_id: Indicates the work request identifier of the MAD to modify.
+ * @send_buf: Indicates the MAD to modify.
  * @timeout_ms: New timeout value for sent MAD.
  *
  * This call will reset the timeout value for a sent MAD to the specified
  * value.
  */
-int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms);
+int ib_modify_mad(struct ib_mad_agent *mad_agent,
+		  struct ib_mad_send_buf *send_buf, u32 timeout_ms);
 
 /**
  * ib_redirect_mad_qp - Registers a QP for MAD services.
@@ -572,7 +576,6 @@ int ib_process_mad_wc(struct ib_mad_agen
  * @remote_qpn: Specifies the QPN of the receiving node.
  * @pkey_index: Specifies which PKey the MAD will be sent using.  This field
  *   is valid only if the remote_qpn is QP 1.
- * @ah: References the address handle used to transfer to the remote node.
  * @rmpp_active: Indicates if the send will enable RMPP.
  * @hdr_len: Indicates the size of the data header of the MAD.  This length
  *   should include the common MAD header, RMPP header, plus any class
@@ -582,11 +585,10 @@ int ib_process_mad_wc(struct ib_mad_agen
  *   additional padding that may be necessary.
  * @gfp_mask: GFP mask used for the memory allocation.
  *
- * This is a helper routine that may be used to allocate a MAD.  Users are
- * not required to allocate outbound MADs using this call.  The returned
- * MAD send buffer will reference a data buffer usable for sending a MAD, along
+ * This routine allocates a MAD for sending.  The returned MAD send buffer
+ * will reference a data buffer usable for sending a MAD, along
  * with an initialized work request structure.  Users may modify the returned
- * MAD data buffer or work request before posting the send.
+ * MAD data buffer before posting the send.
  *
  * The returned data buffer will be cleared.  Users are responsible for
  * initializing the common MAD and any class specific headers.  If @rmpp_active
@@ -594,7 +596,7 @@ int ib_process_mad_wc(struct ib_mad_agen
  */
 struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
 					    u32 remote_qpn, u16 pkey_index,
-					    struct ib_ah *ah, int rmpp_active,
+					    int rmpp_active,
 					    int hdr_len, int data_len,
 					    gfp_t gfp_mask);
 
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/include/rdma/ib_marshall.h linux-2.6.14.3-RPCRDMA/include/rdma/ib_marshall.h
--- linux-2.6.14.3/include/rdma/ib_marshall.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/include/rdma/ib_marshall.h	2005-12-08 11:16:53.000000000 -0500
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(IB_USER_MARSHALL_H)
+#define IB_USER_MARSHALL_H
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_sa.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_user_sa.h>
+
+void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
+			     struct ib_qp_attr *src);
+
+void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
+			      struct ib_sa_path_rec *src);
+
+void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
+				struct ib_user_path_rec *src);
+
+#endif /* IB_USER_MARSHALL_H */
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/include/rdma/ib_pack.h linux-2.6.14.3-RPCRDMA/include/rdma/ib_pack.h
--- linux-2.6.14.3/include/rdma/ib_pack.h	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/include/rdma/ib_pack.h	2005-12-08 11:16:53.000000000 -0500
@@ -29,7 +29,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: ib_pack.h 1349 2004-12-16 21:09:43Z roland $
+ * $Id: ib_pack.h 3202 2005-08-26 17:11:34Z roland $
  */
 
 #ifndef IB_PACK_H
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/include/rdma/ib_sa.h linux-2.6.14.3-RPCRDMA/include/rdma/ib_sa.h
--- linux-2.6.14.3/include/rdma/ib_sa.h	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/include/rdma/ib_sa.h	2005-12-08 11:16:53.000000000 -0500
@@ -30,7 +30,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: ib_sa.h 2811 2005-07-06 18:11:43Z halr $
+ * $Id: ib_sa.h 3925 2005-10-31 23:03:17Z roland $
  */
 
 #ifndef IB_SA_H
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/include/rdma/ib_smi.h linux-2.6.14.3-RPCRDMA/include/rdma/ib_smi.h
--- linux-2.6.14.3/include/rdma/ib_smi.h	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/include/rdma/ib_smi.h	2005-12-08 11:16:53.000000000 -0500
@@ -33,7 +33,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: ib_smi.h 1389 2004-12-27 22:56:47Z roland $
+ * $Id: ib_smi.h 3202 2005-08-26 17:11:34Z roland $
  */
 
 #if !defined( IB_SMI_H )
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/include/rdma/ib_user_at.h linux-2.6.14.3-RPCRDMA/include/rdma/ib_user_at.h
--- linux-2.6.14.3/include/rdma/ib_user_at.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/include/rdma/ib_user_at.h	2005-12-08 11:16:53.000000000 -0500
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: ib_user_at.h 3202 2005-08-26 17:11:34Z roland $
+ */
+
+#ifndef IB_USER_AT_H
+#define IB_USER_AT_H
+
+#include <linux/types.h>
+#include <rdma/ib_verbs.h>
+
+#define IB_USER_AT_ABI_VERSION 1
+
+enum {
+	IB_USER_AT_CMD_ROUTE_BY_IP,
+	IB_USER_AT_CMD_PATHS_BY_ROUTE,
+	IB_USER_AT_CMD_IPS_BY_GID,
+	IB_USER_AT_CMD_IPS_BY_SUBNET,
+	IB_USER_AT_CMD_INVALIDATE_PATHS,
+	IB_USER_AT_CMD_CANCEL,
+	IB_USER_AT_CMD_STATUS,
+
+	IB_USER_AT_CMD_EVENT,
+};
+
+/*
+ * command ABI structures.
+ */
+struct ib_uat_cmd_hdr {
+	__u32 cmd;
+	__u16 in;
+	__u16 out;
+};
+
+enum ib_uat_multipathing_type {
+        IB_USER_AT_PATH_SAME_PORT    = 0,
+        IB_USER_AT_PATH_SAME_HCA     = 1,	/* but different ports if applicable */
+        IB_USER_AT_PATH_SAME_SYSTEM  = 2,	/* but different ports if applicable */
+        IB_USER_AT_PATH_INDEPENDENT_HCA = 3,
+        IB_USER_AT_PATH_SRC_ROUTE    = 4,	/* application controlled multipathing */
+};
+
+enum ib_uat_route_flags {
+        IB_USER_AT_ROUTE_USE_DEFAULTS	= 0,
+        IB_USER_AT_ROUTE_FORCE_ATS	= 1,
+        IB_USER_AT_ROUTE_FORCE_ARP	= 2,
+        IB_USER_AT_ROUTE_FORCE_RESOLVE	= 4,
+};
+
+struct ib_uat_path_attr {
+	__u16 qos_tag;
+	__u16 pkey;
+	__u8  multi_path_type;
+};
+
+struct ib_uat_ib_route {
+	__u8 sgid[16];
+	__u8 dgid[16];
+	struct ibv_device *out_dev;
+	int out_port;
+	struct ib_uat_path_attr attr;
+};
+
+enum ib_uat_op_status {
+        IB_USER_AT_STATUS_INVALID	= 0,
+        IB_USER_AT_STATUS_PENDING	= 1,
+        IB_USER_AT_STATUS_COMPLETED	= 2,
+        IB_USER_AT_STATUS_CANCELED	= 3,
+};
+
+struct ib_uat_completion {
+	void (*fn)(__u64 req_id, void *context, int rec_num);
+	void *context;
+	__u64 req_id;
+};
+
+struct ib_uat_paths_by_route_req {
+	struct ib_uat_ib_route *ib_route;
+	__u32 mpath_type;
+	struct ib_sa_path_rec *path_arr;
+	int npath;
+	struct ib_uat_completion *async_comp;
+	__u64 response;
+};
+
+struct ib_uat_paths_by_route_resp {
+	__u64 req_id;
+};
+
+struct ib_uat_route_by_ip_req {
+	__u32 dst_ip;
+	__u32 src_ip;
+	int   tos;
+	__u16 flags;
+	struct ib_uat_ib_route *ib_route;
+	struct ib_uat_completion *async_comp;
+	__u64 response;
+};
+
+struct ib_uat_route_by_ip_resp {
+	__u64 req_id;
+};
+
+struct ib_uat_ips_by_gid_req {
+	union ibv_gid *gid;
+	__u32 *dst_ips;
+	int    nips;
+	struct ib_uat_completion *async_comp;
+	__u64 response;
+};
+
+struct ib_uat_ips_by_gid_resp {
+	__u64 req_id;
+};
+
+struct ib_uat_ips_by_subnet_req {
+	__u32 network;
+	__u32 netmask;
+	__u32 *dst_ips;
+	int nips;
+};
+
+struct ib_uat_invalidate_paths_req {
+	struct ib_uat_ib_route *ib_route;
+};
+
+struct ib_uat_cancel_req {
+	__u64 req_id;
+};
+
+struct ib_uat_status_req {
+	__u64 req_id;
+};
+
+/*
+ * event notification ABI structures.
+ */
+struct ib_uat_event_get {
+	__u64 response;
+};
+
+struct ib_uat_event_resp {
+	__u64 callback;
+	__u64 context;
+	__u64 req_id;
+	int   rec_num;
+};
+#endif /* IB_USER_AT_H */
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/include/rdma/ib_user_cm.h linux-2.6.14.3-RPCRDMA/include/rdma/ib_user_cm.h
--- linux-2.6.14.3/include/rdma/ib_user_cm.h	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/include/rdma/ib_user_cm.h	2005-12-08 11:16:53.000000000 -0500
@@ -30,15 +30,15 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: ib_user_cm.h 2576 2005-06-09 17:00:30Z libor $
+ * $Id: ib_user_cm.h 4019 2005-11-11 00:33:09Z sean.hefty $
  */
 
 #ifndef IB_USER_CM_H
 #define IB_USER_CM_H
 
-#include <linux/types.h>
+#include <rdma/ib_user_sa.h>
 
-#define IB_USER_CM_ABI_VERSION 2
+#define IB_USER_CM_ABI_VERSION 4
 
 enum {
 	IB_USER_CM_CMD_CREATE_ID,
@@ -84,6 +84,7 @@ struct ib_ucm_create_id_resp {
 struct ib_ucm_destroy_id {
 	__u64 response;
 	__u32 id;
+	__u32 reserved;
 };
 
 struct ib_ucm_destroy_id_resp {
@@ -93,6 +94,7 @@ struct ib_ucm_destroy_id_resp {
 struct ib_ucm_attr_id {
 	__u64 response;
 	__u32 id;
+	__u32 reserved;
 };
 
 struct ib_ucm_attr_id_resp {
@@ -108,62 +110,11 @@ struct ib_ucm_init_qp_attr {
 	__u32 qp_state;
 };
 
-struct ib_ucm_ah_attr {
-	__u8	grh_dgid[16];
-	__u32	grh_flow_label;
-	__u16	dlid;
-	__u16	reserved;
-	__u8	grh_sgid_index;
-	__u8	grh_hop_limit;
-	__u8	grh_traffic_class;
-	__u8	sl;
-	__u8	src_path_bits;
-	__u8	static_rate;
-	__u8	is_global;
-	__u8	port_num;
-};
-
-struct ib_ucm_init_qp_attr_resp {
-	__u32	qp_attr_mask;
-	__u32	qp_state;
-	__u32	cur_qp_state;
-	__u32	path_mtu;
-	__u32	path_mig_state;
-	__u32	qkey;
-	__u32	rq_psn;
-	__u32	sq_psn;
-	__u32	dest_qp_num;
-	__u32	qp_access_flags;
-
-	struct ib_ucm_ah_attr	ah_attr;
-	struct ib_ucm_ah_attr	alt_ah_attr;
-
-	/* ib_qp_cap */
-	__u32	max_send_wr;
-	__u32	max_recv_wr;
-	__u32	max_send_sge;
-	__u32	max_recv_sge;
-	__u32	max_inline_data;
-
-	__u16	pkey_index;
-	__u16	alt_pkey_index;
-	__u8	en_sqd_async_notify;
-	__u8	sq_draining;
-	__u8	max_rd_atomic;
-	__u8	max_dest_rd_atomic;
-	__u8	min_rnr_timer;
-	__u8	port_num;
-	__u8	timeout;
-	__u8	retry_cnt;
-	__u8	rnr_retry;
-	__u8	alt_port_num;
-	__u8	alt_timeout;
-};
-
 struct ib_ucm_listen {
 	__be64 service_id;
 	__be64 service_mask;
 	__u32 id;
+	__u32 reserved;
 };
 
 struct ib_ucm_establish {
@@ -177,28 +128,6 @@ struct ib_ucm_private_data {
 	__u8  reserved[3];
 };
 
-struct ib_ucm_path_rec {
-	__u8  dgid[16];
-	__u8  sgid[16];
-	__be16 dlid;
-	__be16 slid;
-	__u32 raw_traffic;
-	__be32 flow_label;
-	__u32 reversible;
-	__u32 mtu;
-	__be16 pkey;
-	__u8  hop_limit;
-	__u8  traffic_class;
-	__u8  numb_path;
-	__u8  sl;
-	__u8  mtu_selector;
-	__u8  rate_selector;
-	__u8  rate;
-	__u8  packet_life_time_selector;
-	__u8  packet_life_time;
-	__u8  preference;
-};
-
 struct ib_ucm_req {
 	__u32 id;
 	__u32 qpn;
@@ -219,7 +148,7 @@ struct ib_ucm_req {
 	__u8  rnr_retry_count;
 	__u8  max_cm_retries;
 	__u8  srq;
-	__u8  reserved[1];
+	__u8  reserved[5];
 };
 
 struct ib_ucm_rep {
@@ -236,6 +165,7 @@ struct ib_ucm_rep {
 	__u8  flow_control;
 	__u8  rnr_retry_count;
 	__u8  srq;
+	__u8  reserved[4];
 };
 
 struct ib_ucm_info {
@@ -245,7 +175,7 @@ struct ib_ucm_info {
 	__u64 data;
 	__u8  info_len;
 	__u8  data_len;
-	__u8  reserved[2];
+	__u8  reserved[6];
 };
 
 struct ib_ucm_mra {
@@ -273,6 +203,7 @@ struct ib_ucm_sidr_req {
 	__u16 pkey;
 	__u8  len;
 	__u8  max_cm_retries;
+	__u8  reserved[4];
 };
 
 struct ib_ucm_sidr_rep {
@@ -284,7 +215,7 @@ struct ib_ucm_sidr_rep {
 	__u64 data;
 	__u8  info_len;
 	__u8  data_len;
-	__u8  reserved[2];
+	__u8  reserved[6];
 };
 /*
  * event notification ABI structures.
@@ -295,14 +226,12 @@ struct ib_ucm_event_get {
 	__u64 info;
 	__u8  data_len;
 	__u8  info_len;
-	__u8  reserved[2];
+	__u8  reserved[6];
 };
 
 struct ib_ucm_req_event_resp {
-	/* device */
-	/* port */
-	struct ib_ucm_path_rec primary_path;
-	struct ib_ucm_path_rec alternate_path;
+	struct ib_user_path_rec primary_path;
+	struct ib_user_path_rec alternate_path;
 	__be64                 remote_ca_guid;
 	__u32                  remote_qkey;
 	__u32                  remote_qpn;
@@ -316,6 +245,8 @@ struct ib_ucm_req_event_resp {
 	__u8  retry_count;
 	__u8  rnr_retry_count;
 	__u8  srq;
+	__u8  port;
+	__u8  reserved[7];
 };
 
 struct ib_ucm_rep_event_resp {
@@ -330,7 +261,7 @@ struct ib_ucm_rep_event_resp {
 	__u8  flow_control;
 	__u8  rnr_retry_count;
 	__u8  srq;
-	__u8  reserved[1];
+	__u8  reserved[5];
 };
 
 struct ib_ucm_rej_event_resp {
@@ -344,7 +275,7 @@ struct ib_ucm_mra_event_resp {
 };
 
 struct ib_ucm_lap_event_resp {
-	struct ib_ucm_path_rec path;
+	struct ib_user_path_rec path;
 };
 
 struct ib_ucm_apr_event_resp {
@@ -353,10 +284,9 @@ struct ib_ucm_apr_event_resp {
 };
 
 struct ib_ucm_sidr_req_event_resp {
-	/* device */
-	/* port */
 	__u16 pkey;
-	__u8  reserved[2];
+	__u8  port;
+	__u8  reserved;
 };
 
 struct ib_ucm_sidr_rep_event_resp {
@@ -376,6 +306,7 @@ struct ib_ucm_event_resp {
 	__u32 id;
 	__u32 event;
 	__u32 present;
+	__u32 reserved;
 	union {
 		struct ib_ucm_req_event_resp req_resp;
 		struct ib_ucm_rep_event_resp rep_resp;
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/include/rdma/ib_user_mad.h linux-2.6.14.3-RPCRDMA/include/rdma/ib_user_mad.h
--- linux-2.6.14.3/include/rdma/ib_user_mad.h	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/include/rdma/ib_user_mad.h	2005-12-08 11:16:53.000000000 -0500
@@ -30,7 +30,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: ib_user_mad.h 2814 2005-07-06 19:14:09Z halr $
+ * $Id: ib_user_mad.h 3202 2005-08-26 17:11:34Z roland $
  */
 
 #ifndef IB_USER_MAD_H
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/include/rdma/ib_user_sa.h linux-2.6.14.3-RPCRDMA/include/rdma/ib_user_sa.h
--- linux-2.6.14.3/include/rdma/ib_user_sa.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/include/rdma/ib_user_sa.h	2005-12-08 11:16:53.000000000 -0500
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IB_USER_SA_H
+#define IB_USER_SA_H
+
+#include <linux/types.h>
+
+struct ib_user_path_rec {
+	__u8	dgid[16];
+	__u8	sgid[16];
+	__be16	dlid;
+	__be16	slid;
+	__u32	raw_traffic;
+	__be32	flow_label;
+	__u32	reversible;
+	__u32	mtu;
+	__be16	pkey;
+	__u8	hop_limit;
+	__u8	traffic_class;
+	__u8	numb_path;
+	__u8	sl;
+	__u8	mtu_selector;
+	__u8	rate_selector;
+	__u8	rate;
+	__u8	packet_life_time_selector;
+	__u8	packet_life_time;
+	__u8	preference;
+};
+
+#endif /* IB_USER_SA_H */
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/include/rdma/ib_user_verbs.h linux-2.6.14.3-RPCRDMA/include/rdma/ib_user_verbs.h
--- linux-2.6.14.3/include/rdma/ib_user_verbs.h	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/include/rdma/ib_user_verbs.h	2005-12-08 11:16:53.000000000 -0500
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -30,7 +31,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: ib_user_verbs.h 2708 2005-06-24 17:27:21Z roland $
+ * $Id: ib_user_verbs.h 4019 2005-11-11 00:33:09Z sean.hefty $
  */
 
 #ifndef IB_USER_VERBS_H
@@ -42,15 +43,12 @@
  * Increment this value if any changes that break userspace ABI
  * compatibility are made.
  */
-#define IB_USER_VERBS_ABI_VERSION	2
+#define IB_USER_VERBS_ABI_VERSION	4
 
 enum {
-	IB_USER_VERBS_CMD_QUERY_PARAMS,
 	IB_USER_VERBS_CMD_GET_CONTEXT,
 	IB_USER_VERBS_CMD_QUERY_DEVICE,
 	IB_USER_VERBS_CMD_QUERY_PORT,
-	IB_USER_VERBS_CMD_QUERY_GID,
-	IB_USER_VERBS_CMD_QUERY_PKEY,
 	IB_USER_VERBS_CMD_ALLOC_PD,
 	IB_USER_VERBS_CMD_DEALLOC_PD,
 	IB_USER_VERBS_CMD_CREATE_AH,
@@ -65,6 +63,7 @@ enum {
 	IB_USER_VERBS_CMD_ALLOC_MW,
 	IB_USER_VERBS_CMD_BIND_MW,
 	IB_USER_VERBS_CMD_DEALLOC_MW,
+	IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL,
 	IB_USER_VERBS_CMD_CREATE_CQ,
 	IB_USER_VERBS_CMD_RESIZE_CQ,
 	IB_USER_VERBS_CMD_DESTROY_CQ,
@@ -90,8 +89,11 @@ enum {
  * Make sure that all structs defined in this file remain laid out so
  * that they pack the same way on 32-bit and 64-bit architectures (to
  * avoid incompatibility between 32-bit userspace and 64-bit kernels).
- * In particular do not use pointer types -- pass pointers in __u64
- * instead.
+ * Specifically:
+ *  - Do not use pointer types -- pass pointers in __u64 instead.
+ *  - Make sure that any structure larger than 4 bytes is padded to a
+ *    multiple of 8 bytes.  Otherwise the structure size will be
+ *    different between 32-bit and 64-bit architectures.
  */
 
 struct ib_uverbs_async_event_desc {
@@ -118,27 +120,14 @@ struct ib_uverbs_cmd_hdr {
 	__u16 out_words;
 };
 
-/*
- * No driver_data for "query params" command, since this is intended
- * to be a core function with no possible device dependence.
- */
-struct ib_uverbs_query_params {
-	__u64 response;
-};
-
-struct ib_uverbs_query_params_resp {
-	__u32 num_cq_events;
-};
-
 struct ib_uverbs_get_context {
 	__u64 response;
-	__u64 cq_fd_tab;
 	__u64 driver_data[0];
 };
 
 struct ib_uverbs_get_context_resp {
 	__u32 async_fd;
-	__u32 reserved;
+	__u32 num_comp_vectors;
 };
 
 struct ib_uverbs_query_device {
@@ -220,31 +209,6 @@ struct ib_uverbs_query_port_resp {
 	__u8  reserved[3];
 };
 
-struct ib_uverbs_query_gid {
-	__u64 response;
-	__u8  port_num;
-	__u8  index;
-	__u8  reserved[6];
-	__u64 driver_data[0];
-};
-
-struct ib_uverbs_query_gid_resp {
-	__u8  gid[16];
-};
-
-struct ib_uverbs_query_pkey {
-	__u64 response;
-	__u8  port_num;
-	__u8  index;
-	__u8  reserved[6];
-	__u64 driver_data[0];
-};
-
-struct ib_uverbs_query_pkey_resp {
-	__u16 pkey;
-	__u16 reserved;
-};
-
 struct ib_uverbs_alloc_pd {
 	__u64 response;
 	__u64 driver_data[0];
@@ -278,11 +242,21 @@ struct ib_uverbs_dereg_mr {
 	__u32 mr_handle;
 };
 
+struct ib_uverbs_create_comp_channel {
+	__u64 response;
+};
+
+struct ib_uverbs_create_comp_channel_resp {
+	__u32 fd;
+};
+
 struct ib_uverbs_create_cq {
 	__u64 response;
 	__u64 user_handle;
 	__u32 cqe;
-	__u32 event_handler;
+	__u32 comp_vector;
+	__s32 comp_channel;
+	__u32 reserved;
 	__u64 driver_data[0];
 };
 
@@ -291,6 +265,41 @@ struct ib_uverbs_create_cq_resp {
 	__u32 cqe;
 };
 
+struct ib_uverbs_poll_cq {
+	__u64 response;
+	__u32 cq_handle;
+	__u32 ne;
+};
+
+struct ib_uverbs_wc {
+	__u64 wr_id;
+	__u32 status;
+	__u32 opcode;
+	__u32 vendor_err;
+	__u32 byte_len;
+	__u32 imm_data;
+	__u32 qp_num;
+	__u32 src_qp;
+	__u32 wc_flags;
+	__u16 pkey_index;
+	__u16 slid;
+	__u8 sl;
+	__u8 dlid_path_bits;
+	__u8 port_num;
+	__u8 reserved;
+};
+
+struct ib_uverbs_poll_cq_resp {
+	__u32 count;
+	__u32 reserved;
+	struct ib_uverbs_wc wc[0];
+};
+
+struct ib_uverbs_req_notify_cq {
+	__u32 cq_handle;
+	__u32 solicited_only;
+};
+
 struct ib_uverbs_destroy_cq {
 	__u64 response;
 	__u32 cq_handle;
@@ -302,6 +311,64 @@ struct ib_uverbs_destroy_cq_resp {
 	__u32 async_events_reported;
 };
 
+struct ib_uverbs_global_route {
+	__u8  dgid[16];
+	__u32 flow_label;    
+	__u8  sgid_index;
+	__u8  hop_limit;
+	__u8  traffic_class;
+	__u8  reserved;
+};
+
+struct ib_uverbs_ah_attr {
+	struct ib_uverbs_global_route grh;
+	__u16 dlid;
+	__u8  sl;
+	__u8  src_path_bits;
+	__u8  static_rate;
+	__u8  is_global;
+	__u8  port_num;
+	__u8  reserved;
+};
+
+struct ib_uverbs_qp_attr {
+	__u32	qp_attr_mask;
+	__u32	qp_state;
+	__u32	cur_qp_state;
+	__u32	path_mtu;
+	__u32	path_mig_state;
+	__u32	qkey;
+	__u32	rq_psn;
+	__u32	sq_psn;
+	__u32	dest_qp_num;
+	__u32	qp_access_flags;
+
+	struct ib_uverbs_ah_attr ah_attr;
+	struct ib_uverbs_ah_attr alt_ah_attr;
+
+	/* ib_qp_cap */
+	__u32	max_send_wr;
+	__u32	max_recv_wr;
+	__u32	max_send_sge;
+	__u32	max_recv_sge;
+	__u32	max_inline_data;
+
+	__u16	pkey_index;
+	__u16	alt_pkey_index;
+	__u8	en_sqd_async_notify;
+	__u8	sq_draining;
+	__u8	max_rd_atomic;
+	__u8	max_dest_rd_atomic;
+	__u8	min_rnr_timer;
+	__u8	port_num;
+	__u8	timeout;
+	__u8	retry_cnt;
+	__u8	rnr_retry;
+	__u8	alt_port_num;
+	__u8	alt_timeout;
+	__u8	reserved[5];
+};
+
 struct ib_uverbs_create_qp {
 	__u64 response;
 	__u64 user_handle;
@@ -324,6 +391,11 @@ struct ib_uverbs_create_qp {
 struct ib_uverbs_create_qp_resp {
 	__u32 qp_handle;
 	__u32 qpn;
+	__u32 max_send_wr;
+	__u32 max_recv_wr;
+	__u32 max_send_sge;
+	__u32 max_recv_sge;
+	__u32 max_inline_data;
 };
 
 /*
@@ -388,6 +460,107 @@ struct ib_uverbs_destroy_qp_resp {
 	__u32 events_reported;
 };
 
+/*
+ * The ib_uverbs_sge structure isn't used anywhere, since we assume
+ * the ib_sge structure is packed the same way on 32-bit and 64-bit
+ * architectures in both kernel and user space.  It's just here to
+ * document the ABI.
+ */
+struct ib_uverbs_sge {
+	__u64 addr;
+	__u32 length;
+	__u32 lkey;
+};
+
+struct ib_uverbs_send_wr {
+	__u64 wr_id; 
+	__u32 num_sge;
+	__u32 opcode;
+	__u32 send_flags;
+	__u32 imm_data;
+	union {
+		struct {
+			__u64 remote_addr;
+			__u32 rkey;
+			__u32 reserved;
+		} rdma;
+		struct {
+			__u64 remote_addr;
+			__u64 compare_add;
+			__u64 swap;
+			__u32 rkey;
+			__u32 reserved;
+		} atomic;
+		struct {
+			__u32 ah;
+			__u32 remote_qpn;
+			__u32 remote_qkey;
+			__u32 reserved;
+		} ud;
+	} wr;
+};
+
+struct ib_uverbs_post_send {
+	__u64 response;
+	__u32 qp_handle;
+	__u32 wr_count;
+	__u32 sge_count;
+	__u32 wqe_size;
+	struct ib_uverbs_send_wr send_wr[0];
+};
+
+struct ib_uverbs_post_send_resp {
+	__u32 bad_wr;
+};
+
+struct ib_uverbs_recv_wr {
+	__u64 wr_id;
+	__u32 num_sge;
+	__u32 reserved;
+};
+
+struct ib_uverbs_post_recv {
+	__u64 response;
+	__u32 qp_handle;
+	__u32 wr_count;
+	__u32 sge_count;
+	__u32 wqe_size;
+	struct ib_uverbs_recv_wr recv_wr[0];
+};
+
+struct ib_uverbs_post_recv_resp {
+	__u32 bad_wr;
+};
+
+struct ib_uverbs_post_srq_recv {
+	__u64 response;
+	__u32 srq_handle;
+	__u32 wr_count;
+	__u32 sge_count;
+	__u32 wqe_size;
+	struct ib_uverbs_recv_wr recv[0];
+};
+
+struct ib_uverbs_post_srq_recv_resp {
+	__u32 bad_wr;
+};
+
+struct ib_uverbs_create_ah {
+	__u64 response;
+	__u64 user_handle;
+	__u32 pd_handle;
+	__u32 reserved;
+	struct ib_uverbs_ah_attr attr;
+};
+
+struct ib_uverbs_create_ah_resp {
+	__u32 ah_handle;
+};
+
+struct ib_uverbs_destroy_ah {
+	__u32 ah_handle;
+};
+
 struct ib_uverbs_attach_mcast {
 	__u8  gid[16];
 	__u32 qp_handle;
@@ -422,9 +595,7 @@ struct ib_uverbs_modify_srq {
 	__u32 srq_handle;
 	__u32 attr_mask;
 	__u32 max_wr;
-	__u32 max_sge;
 	__u32 srq_limit;
-	__u32 reserved;
 	__u64 driver_data[0];
 };
 
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/include/rdma/ib_verbs.h linux-2.6.14.3-RPCRDMA/include/rdma/ib_verbs.h
--- linux-2.6.14.3/include/rdma/ib_verbs.h	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/include/rdma/ib_verbs.h	2005-12-08 11:16:53.000000000 -0500
@@ -35,7 +35,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: ib_verbs.h 1349 2004-12-16 21:09:43Z roland $
+ * $Id: ib_verbs.h 4030 2005-11-13 04:48:01Z roland $
  */
 
 #if !defined(IB_VERBS_H)
@@ -48,6 +48,14 @@
 #include <asm/scatterlist.h>
 #include <asm/uaccess.h>
 
+/* XXX remove this compatibility hack when 2.6.15 is released */
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15)
+#define class_device_create(cls, parent, devt, device, fmt, arg...) \
+	class_device_create(cls, devt, device, fmt, ## arg)
+#endif /* XXX end of hack */
+
 union ib_gid {
 	u8	raw[16];
 	struct {
@@ -595,11 +603,8 @@ struct ib_send_wr {
 		} atomic;
 		struct {
 			struct ib_ah *ah;
-			struct ib_mad_hdr *mad_hdr;
 			u32	remote_qpn;
 			u32	remote_qkey;
-			int	timeout_ms; /* valid for MADs only */
-			int	retries;    /* valid for MADs only */
 			u16	pkey_index; /* valid for GSI only */
 			u8	port_num;   /* valid for DR SMPs on switch only */
 		} ud;
@@ -884,7 +889,7 @@ struct ib_device {
 						struct ib_ucontext *context,
 						struct ib_udata *udata);
 	int                        (*destroy_cq)(struct ib_cq *cq);
-	int                        (*resize_cq)(struct ib_cq *cq, int *cqe);
+	int                        (*resize_cq)(struct ib_cq *cq, int cqe);
 	int                        (*poll_cq)(struct ib_cq *cq, int num_entries,
 					      struct ib_wc *wc);
 	int                        (*peek_cq)(struct ib_cq *cq, int wc_cnt);
@@ -951,6 +956,10 @@ struct ib_device {
 		IB_DEV_UNREGISTERED
 	}                            reg_state;
 
+	u64			     uverbs_cmd_mask;
+	int			     uverbs_abi_ver;
+
+	__be64			     node_guid;
 	u8                           node_type;
 	u8                           phys_port_cnt;
 };
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/include/rdma/rdma_cm.h linux-2.6.14.3-RPCRDMA/include/rdma/rdma_cm.h
--- linux-2.6.14.3/include/rdma/rdma_cm.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/include/rdma/rdma_cm.h	2005-12-08 11:16:53.000000000 -0500
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ */
+
+#if !defined(RDMA_CM_H)
+#define RDMA_CM_H
+
+#include <linux/socket.h>
+#include <linux/in6.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_sa.h>
+
+/*
+ * Upon receiving a device removal event, users must destroy the associated
+ * RDMA identifier and release all resources allocated with the device.
+ */
+enum rdma_cm_event_type {
+	RDMA_CM_EVENT_ADDR_RESOLVED,
+	RDMA_CM_EVENT_ADDR_ERROR,
+	RDMA_CM_EVENT_ROUTE_RESOLVED,
+	RDMA_CM_EVENT_ROUTE_ERROR,
+	RDMA_CM_EVENT_CONNECT_REQUEST,
+	RDMA_CM_EVENT_CONNECT_RESPONSE,
+	RDMA_CM_EVENT_CONNECT_ERROR,
+	RDMA_CM_EVENT_UNREACHABLE,
+	RDMA_CM_EVENT_REJECTED,
+	RDMA_CM_EVENT_ESTABLISHED,
+	RDMA_CM_EVENT_DISCONNECTED,
+	RDMA_CM_EVENT_DEVICE_REMOVAL,
+};
+
+struct rdma_addr {
+	struct sockaddr src_addr;
+	u8		src_pad[sizeof(struct sockaddr_in6) -
+				sizeof(struct sockaddr)];
+	struct sockaddr dst_addr;
+	u8		dst_pad[sizeof(struct sockaddr_in6) -
+				sizeof(struct sockaddr)];
+	union {
+		struct ib_addr	ibaddr;
+	} addr;
+};
+
+struct rdma_route {
+	struct rdma_addr addr;
+	struct ib_sa_path_rec *path_rec;
+	int num_paths;
+};
+
+struct rdma_cm_event {
+	enum rdma_cm_event_type	 event;
+	int			 status;
+	void			*private_data;
+	u8			 private_data_len;
+};
+
+struct rdma_cm_id;
+
+/**
+ * rdma_cm_event_handler - Callback used to report user events.
+ *
+ * Notes: Users may not call rdma_destroy_id from this callback to destroy
+ *   the passed in id, or a corresponding listen id.  Returning a
+ *   non-zero value from the callback will destroy the corresponding id.
+ */
+typedef int (*rdma_cm_event_handler)(struct rdma_cm_id *id,
+				     struct rdma_cm_event *event);
+
+struct rdma_cm_id {
+	struct ib_device	*device;
+	void			*context;
+	struct ib_qp		*qp;
+	rdma_cm_event_handler	 event_handler;
+	struct rdma_route	 route;
+	u8			 port_num;
+};
+
+struct rdma_cm_id* rdma_create_id(rdma_cm_event_handler event_handler,
+				  void *context);
+
+void rdma_destroy_id(struct rdma_cm_id *id);
+
+/**
+ * rdma_bind_addr - Bind an RDMA identifier to a source address and
+ *   associated RDMA device, if needed.
+ *
+ * @id: RDMA identifier.
+ * @addr: Local address information.  Wildcard values are permitted.
+ *
+ * This associates a source address with the RDMA identifier before calling
+ * rdma_listen.  If a specific local address is given, the RDMA identifier will
+ * be bound to a local RDMA device.
+ */
+int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr);
+
+/**
+ * rdma_resolve_addr - Resolve destination and optional source addresses
+ *   from IP addresses to an RDMA address.  If successful, the specified
+ *   rdma_cm_id will be bound to a local device.
+ *
+ * @id: RDMA identifier.
+ * @src_addr: Source address information.  This parameter may be NULL.
+ * @dst_addr: Destination address information.
+ * @timeout_ms: Time to wait for resolution to complete.
+ */
+int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+		      struct sockaddr *dst_addr, int timeout_ms);
+
+/**
+ * rdma_resolve_route - Resolve the RDMA address bound to the RDMA identifier
+ *   into route information needed to establish a connection.
+ *
+ * This is called on the client side of a connection.
+ * Users must have first called rdma_resolve_addr to resolve a dst_addr
+ * into an RDMA address before calling this routine.
+ */
+int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms);
+
+/**
+ * rdma_create_qp - Allocate a QP and associate it with the specified RDMA
+ * identifier.
+ *
+ * QPs allocated to an rdma_cm_id will automatically be transitioned by the CMA
+ * through their states.
+ */
+int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
+		   struct ib_qp_init_attr *qp_init_attr);
+
+/**
+ * rdma_destroy_qp - Deallocate the QP associated with the specified RDMA
+ * identifier.
+ *
+ * Users must destroy any QP associated with an RDMA identifier before
+ * destroying the RDMA ID.
+ */
+void rdma_destroy_qp(struct rdma_cm_id *id);
+
+/**
+ * rdma_init_qp_attr - Initializes the QP attributes for use in transitioning
+ *   to a specified QP state.
+ * @id: Communication identifier associated with the QP attributes to
+ *   initialize.
+ * @qp_attr: On input, specifies the desired QP state.  On output, the
+ *   mandatory and desired optional attributes will be set in order to
+ *   modify the QP to the specified state.
+ * @qp_attr_mask: The QP attribute mask that may be used to transition the
+ *   QP to the specified state.
+ *
+ * Users must set the @qp_attr->qp_state to the desired QP state.  This call
+ * will set all required attributes for the given transition, along with
+ * known optional attributes.  Users may override the attributes returned from
+ * this call before calling ib_modify_qp.
+ *
+ * Users that wish to have their QP automatically transitioned through its
+ * states can associate a QP with the rdma_cm_id by calling rdma_create_qp().
+ */
+int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
+		       int *qp_attr_mask);
+
+struct rdma_conn_param {
+	const void *private_data;
+	u8 private_data_len;
+	u8 responder_resources;
+	u8 initiator_depth;
+	u8 flow_control;
+	u8 retry_count;		/* ignored when accepting */
+	u8 rnr_retry_count;
+	/* Fields below ignored if a QP is created on the rdma_cm_id. */
+	u8 srq;
+	u32 qp_num;
+	enum ib_qp_type qp_type;
+};
+
+/**
+ * rdma_connect - Initiate an active connection request.
+ *
+ * Users must have resolved a route for the rdma_cm_id to connect with
+ * by having called rdma_resolve_route before calling this routine.
+ */
+int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
+
+/**
+ * rdma_listen - This function is called by the passive side to
+ *   listen for incoming connection requests.
+ *
+ * Users must have bound the rdma_cm_id to a local address by calling
+ * rdma_bind_addr before calling this routine.
+ */
+int rdma_listen(struct rdma_cm_id *id, int backlog);
+
+/**
+ * rdma_accept - Called to accept a connection request or response.
+ * @id: Connection identifier associated with the request.
+ * @conn_param: Information needed to establish the connection.  This must be
+ *   provided if accepting a connection request.  If accepting a connection
+ *   response, this parameter must be NULL.
+ *
+ * Typically, this routine is only called by the listener to accept a connection
+ * request.  It must also be called on the active side of a connection if the
+ * user is performing their own QP transitions.
+ */
+int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
+
+/**
+ * rdma_reject - Called on the passive side to reject a connection request.
+ */
+int rdma_reject(struct rdma_cm_id *id, const void *private_data,
+		u8 private_data_len);
+
+/**
+ * rdma_disconnect - This function disconnects the associated QP.
+ */
+int rdma_disconnect(struct rdma_cm_id *id);
+
+#endif /* RDMA_CM_H */
+
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/include/rdma/rdma_user_cm.h linux-2.6.14.3-RPCRDMA/include/rdma/rdma_user_cm.h
--- linux-2.6.14.3/include/rdma/rdma_user_cm.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/include/rdma/rdma_user_cm.h	2005-12-08 11:16:53.000000000 -0500
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RDMA_USER_CM_H
+#define RDMA_USER_CM_H
+
+#include <linux/types.h>
+#include <linux/in6.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_user_sa.h>
+
+#define RDMA_USER_CM_ABI_VERSION 1
+
+#define RDMA_MAX_PRIVATE_DATA		256
+
+enum {
+	RDMA_USER_CM_CMD_CREATE_ID,
+	RDMA_USER_CM_CMD_DESTROY_ID,
+	RDMA_USER_CM_CMD_BIND_ADDR,
+	RDMA_USER_CM_CMD_RESOLVE_ADDR,
+	RDMA_USER_CM_CMD_RESOLVE_ROUTE,
+	RDMA_USER_CM_CMD_QUERY_ROUTE,
+	RDMA_USER_CM_CMD_CONNECT,
+	RDMA_USER_CM_CMD_LISTEN,
+	RDMA_USER_CM_CMD_ACCEPT,
+	RDMA_USER_CM_CMD_REJECT,
+	RDMA_USER_CM_CMD_DISCONNECT,
+	RDMA_USER_CM_CMD_INIT_QP_ATTR,
+	RDMA_USER_CM_CMD_GET_EVENT
+};
+
+/*
+ * command ABI structures.
+ */
+struct rdma_ucm_cmd_hdr {
+	__u32 cmd;
+	__u16 in;
+	__u16 out;
+};
+
+struct rdma_ucm_create_id {
+	__u64 uid;
+	__u64 response;
+};
+
+struct rdma_ucm_create_id_resp {
+	__u32 id;
+};
+
+struct rdma_ucm_destroy_id {
+	__u64 response;
+	__u32 id;
+	__u32 reserved;
+};
+
+struct rdma_ucm_destroy_id_resp {
+	__u32 events_reported;
+};
+
+struct rdma_ucm_bind_addr {
+	__u64 response;
+	struct sockaddr_in6 addr;
+	__u32 id;
+};
+
+struct rdma_ucm_resolve_addr {
+	struct sockaddr_in6 src_addr;
+	struct sockaddr_in6 dst_addr;
+	__u32 id;
+	__u32 timeout_ms;
+};
+
+struct rdma_ucm_resolve_route {
+	__u32 id;
+	__u32 timeout_ms;
+};
+
+struct rdma_ucm_query_route {
+	__u64 response;
+	__u32 id;
+	__u32 reserved;
+};
+
+struct rdma_ucm_query_route_resp {
+	__u64 node_guid;
+	struct ib_user_path_rec ib_route[2];
+	struct sockaddr_in6 src_addr;
+	struct sockaddr_in6 dst_addr;
+	__u32 num_paths;
+	__u8 port_num;
+	__u8 reserved[3];
+};
+
+struct rdma_ucm_conn_param {
+	__u32 qp_num;
+	__u32 qp_type;
+	__u8  private_data[RDMA_MAX_PRIVATE_DATA];
+	__u8  private_data_len;
+	__u8  srq;
+	__u8  responder_resources;
+	__u8  initiator_depth;
+	__u8  flow_control;
+	__u8  retry_count;
+	__u8  rnr_retry_count;
+	__u8  valid;
+};
+
+struct rdma_ucm_connect {
+	struct rdma_ucm_conn_param conn_param;
+	__u32 id;
+	__u32 reserved;
+};
+
+struct rdma_ucm_listen {
+	__u32 id;
+	__u32 backlog;
+};
+
+struct rdma_ucm_accept {
+	__u64 uid;
+	struct rdma_ucm_conn_param conn_param;
+	__u32 id;
+	__u32 reserved;
+};
+
+struct rdma_ucm_reject {
+	__u32 id;
+	__u8  private_data_len;
+	__u8  reserved[3];
+	__u8  private_data[RDMA_MAX_PRIVATE_DATA];
+};
+
+struct rdma_ucm_disconnect {
+	__u32 id;
+};
+
+struct rdma_ucm_init_qp_attr {
+	__u64 response;
+	__u32 id;
+	__u32 qp_state;
+};
+
+struct rdma_ucm_get_event {
+	__u64 response;
+};
+
+struct rdma_ucm_event_resp {
+	__u64 uid;
+	__u32 id;
+	__u32 event;
+	__u32 status;
+	__u8  private_data_len;
+	__u8  reserved[3];
+	__u8  private_data[RDMA_MAX_PRIVATE_DATA];
+};
+
+#endif /* RDMA_USER_CM_H */
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/net/ipv4/fib_frontend.c linux-2.6.14.3-RPCRDMA/net/ipv4/fib_frontend.c
--- linux-2.6.14.3/net/ipv4/fib_frontend.c	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/net/ipv4/fib_frontend.c	2005-12-08 11:25:14.000000000 -0500
@@ -661,4 +661,5 @@ void __init ip_fib_init(void)
 }
 
 EXPORT_SYMBOL(inet_addr_type);
+EXPORT_SYMBOL(ip_dev_find);
 EXPORT_SYMBOL(ip_rt_ioctl);
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/net/sunrpc/Makefile linux-2.6.14.3-RPCRDMA/net/sunrpc/Makefile
--- linux-2.6.14.3/net/sunrpc/Makefile	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/net/sunrpc/Makefile	2005-12-08 11:51:27.000000000 -0500
@@ -2,6 +2,7 @@
 # Makefile for Linux kernel SUN RPC
 #
 
+EXTRA_CFLAGS += -Idrivers/infiniband/ulp/kdapl/
 
 obj-$(CONFIG_SUNRPC) += sunrpc.o
 obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
@@ -10,6 +11,8 @@ sunrpc-y := clnt.o xprt.o sched.o \
 	    auth.o auth_null.o auth_unix.o \
 	    svc.o svcsock.o svcauth.o svcauth_unix.o \
 	    pmap_clnt.o timer.o xdr.o \
-	    sunrpc_syms.o cache.o rpc_pipe.o
+	    sunrpc_syms.o cache.o rpc_pipe.o \
+	    svcxprt_rdma.o svcxprt_rdma_kdapl.o
 sunrpc-$(CONFIG_PROC_FS) += stats.o
 sunrpc-$(CONFIG_SYSCTL) += sysctl.o
+sunrpc-$(CONFIG_SUNRPC_RDMA) += svcxprt_rdma.o svcxprt_rdma_kdapl.o
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/net/sunrpc/svc.c linux-2.6.14.3-RPCRDMA/net/sunrpc/svc.c
--- linux-2.6.14.3/net/sunrpc/svc.c	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/net/sunrpc/svc.c	2005-12-08 11:50:23.000000000 -0500
@@ -56,8 +56,8 @@ svc_create(struct svc_program *prog, uns
 	serv->sv_xdrsize   = xdrsize;
 	INIT_LIST_HEAD(&serv->sv_threads);
 	INIT_LIST_HEAD(&serv->sv_sockets);
-	INIT_LIST_HEAD(&serv->sv_tempsocks);
-	INIT_LIST_HEAD(&serv->sv_permsocks);
+	INIT_LIST_HEAD(&serv->sv_tempxprts);
+	INIT_LIST_HEAD(&serv->sv_permxprts);
 	spin_lock_init(&serv->sv_lock);
 
 	/* Remove any stale portmap registrations */
@@ -72,7 +72,7 @@ svc_create(struct svc_program *prog, uns
 void
 svc_destroy(struct svc_serv *serv)
 {
-	struct svc_sock	*svsk;
+	struct svc_xprt *xprt;
 
 	dprintk("RPC: svc_destroy(%s, %d)\n",
 				serv->sv_program->pg_name,
@@ -86,17 +86,28 @@ svc_destroy(struct svc_serv *serv)
 	} else
 		printk("svc_destroy: no threads for serv=%p!\n", serv);
 
-	while (!list_empty(&serv->sv_tempsocks)) {
-		svsk = list_entry(serv->sv_tempsocks.next,
-				  struct svc_sock,
+	while (!list_empty(&serv->sv_tempxprts)) {
+		xprt = list_entry(serv->sv_tempxprts.next,
+				  struct svc_xprt,
 				  sk_list);
-		svc_delete_socket(svsk);
+		dprintk("%s: deleting temp xprt %p\n", __FUNCTION__, xprt);
+		if (xprt->sx_delete) {
+			xprt->sx_delete(xprt);
+		} else {
+			dprintk("%s: xprt->sx_delete is NULL\n", __FUNCTION__);
+		}
 	}
-	while (!list_empty(&serv->sv_permsocks)) {
-		svsk = list_entry(serv->sv_permsocks.next,
-				  struct svc_sock,
-				  sk_list);
-		svc_delete_socket(svsk);
+
+	while (!list_empty(&serv->sv_permxprts)) {
+		xprt = list_entry(serv->sv_permxprts.next,
+				   struct svc_xprt,
+				   sk_list);
+		dprintk("%s: deleting perm xprt %p\n", __FUNCTION__, xprt);
+		if (xprt->sx_delete) {
+			xprt->sx_delete(xprt);
+		} else {
+			dprintk("%s: xprt->sx_delete is NULL\n", __FUNCTION__);
+		}
 	}
 	
 	cache_clean_deferred(serv);
@@ -273,18 +284,20 @@ svc_process(struct svc_serv *serv, struc
 	if (argv->iov_len < 6*4)
 		goto err_short_len;
 
-	/* setup response xdr_buf.
-	 * Initially it has just one page 
-	 */
-	svc_take_page(rqstp); /* must succeed */
-	resv->iov_base = page_address(rqstp->rq_respages[0]);
-	resv->iov_len = 0;
-	rqstp->rq_res.pages = rqstp->rq_respages+1;
-	rqstp->rq_res.len = 0;
-	rqstp->rq_res.page_base = 0;
-	rqstp->rq_res.page_len = 0;
-	rqstp->rq_res.buflen = PAGE_SIZE;
-	rqstp->rq_res.tail[0].iov_len = 0;
+	if (!rqstp->rq_sock->sx_manages_buffers) {
+		/* setup response xdr_buf.
+		 * Initially it has just one page 
+		 */
+		svc_take_page(rqstp); /* must succeed */
+		resv->iov_base = page_address(rqstp->rq_respages[0]);
+		resv->iov_len = 0;
+		rqstp->rq_res.pages = rqstp->rq_respages+1;
+		rqstp->rq_res.len = 0;
+		rqstp->rq_res.page_base = 0;
+		rqstp->rq_res.page_len = 0;
+		rqstp->rq_res.tail[0].iov_len = 0;
+	}
+
 	/* tcp needs a space for the record length... */
 	if (rqstp->rq_prot == IPPROTO_TCP)
 		svc_putu32(resv, 0);
@@ -324,6 +337,11 @@ svc_process(struct svc_serv *serv, struc
 		auth_stat = rpc_autherr_badcred;
 		auth_res = progp->pg_authenticate(rqstp);
 	}
+
+ 	if (auth_res != SVC_OK) {
+	  dprintk("%s: auth_res = %u\n", __FUNCTION__, auth_res);
+	}
+
 	switch (auth_res) {
 	case SVC_OK:
 		break;
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/net/sunrpc/svcsock.c linux-2.6.14.3-RPCRDMA/net/sunrpc/svcsock.c
--- linux-2.6.14.3/net/sunrpc/svcsock.c	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/net/sunrpc/svcsock.c	2005-12-08 11:50:23.000000000 -0500
@@ -43,6 +43,10 @@
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/stats.h>
 
+/* XXX move to module? */
+#include <linux/sunrpc/svcxprt_rdma.h>
+
+
 /* SMP locking strategy:
  *
  * 	svc_serv->sv_lock protects most stuff for that service.
@@ -63,13 +67,13 @@
 #define RPCDBG_FACILITY	RPCDBG_SVCSOCK
 
 
-static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
+static struct svc_xprt *svc_setup_socket(struct svc_serv *, struct socket *,
 					 int *errp, int pmap_reg);
 static void		svc_udp_data_ready(struct sock *, int);
 static int		svc_udp_recvfrom(struct svc_rqst *);
 static int		svc_udp_sendto(struct svc_rqst *);
 
-static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk);
+static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *svsk);
 static int svc_deferred_recv(struct svc_rqst *rqstp);
 static struct cache_deferred_req *svc_defer(struct cache_req *req);
 
@@ -119,7 +123,7 @@ svc_release_skb(struct svc_rqst *rqstp)
  * Any space to write?
  */
 static inline unsigned long
-svc_sock_wspace(struct svc_sock *svsk)
+svc_sock_wspace(struct svc_xprt *svsk)
 {
 	int wspace;
 
@@ -131,22 +135,67 @@ svc_sock_wspace(struct svc_sock *svsk)
 	return wspace;
 }
 
+/**
+ * svc_sock_has_write_space - Checks if there is enough space for the repsonse
+ * @xprt: the transport to write on
+ * @wspace: the number of bytes available for writing
+ */
+static inline int
+svc_sock_has_write_space(struct svc_xprt *xprt, long int wspace) {
+	struct svc_serv	*serv = xprt->sx_server;
+
+	set_bit(SOCK_NOSPACE, &xprt->sk_sock->flags);
+	if (((xprt->sk_reserved + serv->sv_bufsz)*2 > wspace)
+	    && !test_bit(SK_CLOSE, &xprt->sk_flags)
+	    && !test_bit(SK_CONN, &xprt->sk_flags)) {
+		/* Don't enqueue while not enough space for reply */
+		dprintk("svc: socket %p  no space, %d*2 > %ld, not enqueued\n",
+			xprt->sk_sk, xprt->sk_reserved+serv->sv_bufsz, wspace);
+		return 0;
+	}
+	clear_bit(SOCK_NOSPACE, &xprt->sk_sock->flags);
+
+	return 1;
+}
+
+/**
+ * svc_tcp_has_write_space - Checks if there is enough space for the repsonse
+ * @xprt: The transport instance the reply will be written to
+ **/
+static inline int
+svc_tcp_has_write_space(struct svc_xprt *xprt) {
+	return svc_sock_has_write_space(xprt, sk_stream_wspace(xprt->sk_sk));
+}
+
+/**
+ * svc_udp_has_write_space - Checks if there is enough space for the repsonse
+ * @xprt: The transport instance the reply will be written to
+ **/
+static inline int
+svc_udp_has_write_space(struct svc_xprt *xprt) {
+	return svc_sock_has_write_space(xprt, sock_wspace(xprt->sk_sk));
+}
+
 /*
  * Queue up a socket with data pending. If there are idle nfsd
  * processes, wake 'em up.
  *
  */
-static void
-svc_sock_enqueue(struct svc_sock *svsk)
+void
+svc_sock_enqueue(struct svc_xprt *xprt)
 {
-	struct svc_serv	*serv = svsk->sk_server;
+	struct svc_serv	*serv = xprt->sx_server;
 	struct svc_rqst	*rqstp;
 
-	if (!(svsk->sk_flags &
+	dprintk("%s: xprt = %p, serv = %p\n", __FUNCTION__, xprt, serv);
+
+	if (!(xprt->sk_flags &
 	      ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) ))
 		return;
-	if (test_bit(SK_DEAD, &svsk->sk_flags))
+	if (test_bit(SK_DEAD, &xprt->sk_flags)) {
+		dprintk("%s: xprt is dead, will not be enqueued\n", __FUNCTION__);
 		return;
+	}
 
 	spin_lock_bh(&serv->sv_lock);
 
@@ -155,56 +204,47 @@ svc_sock_enqueue(struct svc_sock *svsk)
 		printk(KERN_ERR
 			"svc_sock_enqueue: threads and sockets both waiting??\n");
 
-	if (test_bit(SK_DEAD, &svsk->sk_flags)) {
+	if (test_bit(SK_DEAD, &xprt->sk_flags)) {
 		/* Don't enqueue dead sockets */
-		dprintk("svc: socket %p is dead, not enqueued\n", svsk->sk_sk);
+		dprintk("svc: socket %p is dead, not enqueued\n", xprt->sk_sk);
 		goto out_unlock;
 	}
 
-	if (test_bit(SK_BUSY, &svsk->sk_flags)) {
+	if (test_bit(SK_BUSY, &xprt->sk_flags)) {
 		/* Don't enqueue socket while daemon is receiving */
-		dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk);
+		dprintk("svc: socket %p busy, not enqueued\n", xprt->sk_sk);
 		goto out_unlock;
 	}
 
-	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
-	if (((svsk->sk_reserved + serv->sv_bufsz)*2
-	     > svc_sock_wspace(svsk))
-	    && !test_bit(SK_CLOSE, &svsk->sk_flags)
-	    && !test_bit(SK_CONN, &svsk->sk_flags)) {
-		/* Don't enqueue while not enough space for reply */
-		dprintk("svc: socket %p  no space, %d*2 > %ld, not enqueued\n",
-			svsk->sk_sk, svsk->sk_reserved+serv->sv_bufsz,
-			svc_sock_wspace(svsk));
+	if (xprt->sx_has_wspace && !xprt->sx_has_wspace(xprt)) {
 		goto out_unlock;
 	}
-	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
 
 	/* Mark socket as busy. It will remain in this state until the
 	 * server has processed all pending data and put the socket back
 	 * on the idle list.
 	 */
-	set_bit(SK_BUSY, &svsk->sk_flags);
+	set_bit(SK_BUSY, &xprt->sk_flags);
 
 	if (!list_empty(&serv->sv_threads)) {
 		rqstp = list_entry(serv->sv_threads.next,
 				   struct svc_rqst,
 				   rq_list);
 		dprintk("svc: socket %p served by daemon %p\n",
-			svsk->sk_sk, rqstp);
+			xprt->sk_sk, rqstp);
 		svc_serv_dequeue(serv, rqstp);
 		if (rqstp->rq_sock)
 			printk(KERN_ERR 
 				"svc_sock_enqueue: server %p, rq_sock=%p!\n",
 				rqstp, rqstp->rq_sock);
-		rqstp->rq_sock = svsk;
-		svsk->sk_inuse++;
+		rqstp->rq_sock = xprt;
+		xprt->sk_inuse++;
 		rqstp->rq_reserved = serv->sv_bufsz;
-		svsk->sk_reserved += rqstp->rq_reserved;
+		xprt->sk_reserved += rqstp->rq_reserved;
 		wake_up(&rqstp->rq_wait);
 	} else {
-		dprintk("svc: socket %p put into queue\n", svsk->sk_sk);
-		list_add_tail(&svsk->sk_ready, &serv->sv_sockets);
+		dprintk("svc: socket %p put into queue\n", xprt->sk_sk);
+		list_add_tail(&xprt->sk_ready, &serv->sv_sockets);
 	}
 
 out_unlock:
@@ -214,16 +254,16 @@ out_unlock:
 /*
  * Dequeue the first socket.  Must be called with the serv->sv_lock held.
  */
-static inline struct svc_sock *
+static inline struct svc_xprt *
 svc_sock_dequeue(struct svc_serv *serv)
 {
-	struct svc_sock	*svsk;
+	struct svc_xprt	*svsk;
 
 	if (list_empty(&serv->sv_sockets))
 		return NULL;
 
 	svsk = list_entry(serv->sv_sockets.next,
-			  struct svc_sock, sk_ready);
+			  struct svc_xprt, sk_ready);
 	list_del_init(&svsk->sk_ready);
 
 	dprintk("svc: socket %p dequeued, inuse=%d\n",
@@ -239,7 +279,7 @@ svc_sock_dequeue(struct svc_serv *serv)
  * no (or insufficient) data.
  */
 static inline void
-svc_sock_received(struct svc_sock *svsk)
+svc_sock_received(struct svc_xprt *svsk)
 {
 	clear_bit(SK_BUSY, &svsk->sk_flags);
 	svc_sock_enqueue(svsk);
@@ -261,11 +301,11 @@ void svc_reserve(struct svc_rqst *rqstp,
 	space += rqstp->rq_res.head[0].iov_len;
 
 	if (space < rqstp->rq_reserved) {
-		struct svc_sock *svsk = rqstp->rq_sock;
-		spin_lock_bh(&svsk->sk_server->sv_lock);
+		struct svc_xprt *svsk = rqstp->rq_sock;
+		spin_lock_bh(&svsk->sx_server->sv_lock);
 		svsk->sk_reserved -= (rqstp->rq_reserved - space);
 		rqstp->rq_reserved = space;
-		spin_unlock_bh(&svsk->sk_server->sv_lock);
+		spin_unlock_bh(&svsk->sx_server->sv_lock);
 
 		svc_sock_enqueue(svsk);
 	}
@@ -275,9 +315,9 @@ void svc_reserve(struct svc_rqst *rqstp,
  * Release a socket after use.
  */
 static inline void
-svc_sock_put(struct svc_sock *svsk)
+svc_sock_put(struct svc_xprt *svsk)
 {
-	struct svc_serv *serv = svsk->sk_server;
+	struct svc_serv *serv = svsk->sx_server;
 
 	spin_lock_bh(&serv->sv_lock);
 	if (!--(svsk->sk_inuse) && test_bit(SK_DEAD, &svsk->sk_flags)) {
@@ -290,10 +330,20 @@ svc_sock_put(struct svc_sock *svsk)
 		spin_unlock_bh(&serv->sv_lock);
 }
 
+/**
+ * svc_xprt_release - prepares thread context for processing next request
+ * @rqstp: context of a server thread containing request/reply data
+ *
+ * This function removes the association between a particular server thread
+ * and a particular transport instance as well as returning the server
+ * thread context to a pristine state for handling the next request.
+ */
 static void
-svc_sock_release(struct svc_rqst *rqstp)
+svc_xprt_release(struct svc_rqst *rqstp)
 {
-	struct svc_sock	*svsk = rqstp->rq_sock;
+	struct svc_xprt	*svsk = rqstp->rq_sock;
+
+	dprintk("%s: rqstp = %p\n", __FUNCTION__, rqstp);
 
 	svc_release_skb(rqstp);
 
@@ -316,7 +366,7 @@ svc_sock_release(struct svc_rqst *rqstp)
 	svc_reserve(rqstp, 0);
 	rqstp->rq_sock = NULL;
 
-	svc_sock_put(svsk);
+	if (svsk->sx_put) svsk->sx_put(svsk);
 }
 
 /*
@@ -348,7 +398,7 @@ svc_wake_up(struct svc_serv *serv)
 static int
 svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
 {
-	struct svc_sock	*svsk = rqstp->rq_sock;
+	struct svc_xprt	*svsk = rqstp->rq_sock;
 	struct socket	*sock = svsk->sk_sock;
 	int		slen;
 	char 		buffer[CMSG_SPACE(sizeof(struct in_pktinfo))];
@@ -432,7 +482,7 @@ out:
  * Check input queue length
  */
 static int
-svc_recv_available(struct svc_sock *svsk)
+svc_recv_available(struct svc_xprt *svsk)
 {
 	mm_segment_t	oldfs;
 	struct socket	*sock = svsk->sk_sock;
@@ -468,7 +518,7 @@ svc_recvfrom(struct svc_rqst *rqstp, str
 	len = kernel_recvmsg(sock, &msg, iov, nr, buflen, MSG_DONTWAIT);
 
 	/* sock_recvmsg doesn't fill in the name/namelen, so we must..
-	 * possibly we should cache this in the svc_sock structure
+	 * possibly we should cache this in the svc_xprt structure
 	 * at accept time. FIXME
 	 */
 	alen = sizeof(rqstp->rq_addr);
@@ -512,7 +562,7 @@ svc_sock_setbufsize(struct socket *sock,
 static void
 svc_udp_data_ready(struct sock *sk, int count)
 {
-	struct svc_sock	*svsk = (struct svc_sock *)sk->sk_user_data;
+	struct svc_xprt	*svsk = (struct svc_xprt *)sk->sk_user_data;
 
 	if (svsk) {
 		dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n",
@@ -530,7 +580,7 @@ svc_udp_data_ready(struct sock *sk, int 
 static void
 svc_write_space(struct sock *sk)
 {
-	struct svc_sock	*svsk = (struct svc_sock *)(sk->sk_user_data);
+	struct svc_xprt	*svsk = (struct svc_xprt *)(sk->sk_user_data);
 
 	if (svsk) {
 		dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
@@ -554,8 +604,8 @@ csum_partial_copy_to_xdr(struct xdr_buf 
 static int
 svc_udp_recvfrom(struct svc_rqst *rqstp)
 {
-	struct svc_sock	*svsk = rqstp->rq_sock;
-	struct svc_serv	*serv = svsk->sk_server;
+	struct svc_xprt	*svsk = rqstp->rq_sock;
+	struct svc_serv	*serv = svsk->sx_server;
 	struct sk_buff	*skb;
 	int		err, len;
 
@@ -665,20 +715,23 @@ svc_udp_sendto(struct svc_rqst *rqstp)
 }
 
 static void
-svc_udp_init(struct svc_sock *svsk)
+svc_udp_init(struct svc_xprt *svsk)
 {
 	svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
 	svsk->sk_sk->sk_write_space = svc_write_space;
-	svsk->sk_recvfrom = svc_udp_recvfrom;
-	svsk->sk_sendto = svc_udp_sendto;
+	svsk->sx_sendto = svc_udp_sendto;
+	svsk->sx_recvfrom = svc_udp_recvfrom;
+	svsk->sx_delete = svc_delete_socket;
+	svsk->sx_put = svc_sock_put;
+	svsk->sx_has_wspace = svc_udp_has_write_space;
 
 	/* initialise setting must have enough space to
 	 * receive and respond to one request.  
 	 * svc_udp_recvfrom will re-adjust if necessary
 	 */
 	svc_sock_setbufsize(svsk->sk_sock,
-			    3 * svsk->sk_server->sv_bufsz,
-			    3 * svsk->sk_server->sv_bufsz);
+			    3 * svsk->sx_server->sv_bufsz,
+			    3 * svsk->sx_server->sv_bufsz);
 
 	set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */
 	set_bit(SK_CHNGBUF, &svsk->sk_flags);
@@ -691,7 +744,7 @@ svc_udp_init(struct svc_sock *svsk)
 static void
 svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
 {
-	struct svc_sock	*svsk = (struct svc_sock *)sk->sk_user_data;
+	struct svc_xprt	*svsk = (struct svc_xprt *)sk->sk_user_data;
 
 	dprintk("svc: socket %p TCP (listen) state change %d\n",
 		sk, sk->sk_state);
@@ -724,7 +777,7 @@ svc_tcp_listen_data_ready(struct sock *s
 static void
 svc_tcp_state_change(struct sock *sk)
 {
-	struct svc_sock	*svsk = (struct svc_sock *)sk->sk_user_data;
+	struct svc_xprt	*svsk = (struct svc_xprt *)sk->sk_user_data;
 
 	dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n",
 		sk, sk->sk_state, sk->sk_user_data);
@@ -742,7 +795,7 @@ svc_tcp_state_change(struct sock *sk)
 static void
 svc_tcp_data_ready(struct sock *sk, int count)
 {
-	struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
+	struct svc_xprt *svsk = (struct svc_xprt *)sk->sk_user_data;
 
 	dprintk("svc: socket %p TCP data ready (svsk %p)\n",
 		sk, sk->sk_user_data);
@@ -758,14 +811,14 @@ svc_tcp_data_ready(struct sock *sk, int 
  * Accept a TCP connection
  */
 static void
-svc_tcp_accept(struct svc_sock *svsk)
+svc_tcp_accept(struct svc_xprt *svsk)
 {
 	struct sockaddr_in sin;
-	struct svc_serv	*serv = svsk->sk_server;
+	struct svc_serv	*serv = svsk->sx_server;
 	struct socket	*sock = svsk->sk_sock;
 	struct socket	*newsock;
 	struct proto_ops *ops;
-	struct svc_sock	*newsvsk;
+	struct svc_xprt	*newsvsk;
 	int		err, slen;
 
 	dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
@@ -834,12 +887,12 @@ svc_tcp_accept(struct svc_sock *svsk)
 	 *
 	 * The only somewhat efficient mechanism would be if drop
 	 * old connections from the same IP first. But right now
-	 * we don't even record the client IP in svc_sock.
+	 * we don't even record the client IP in svc_xprt.
 	 */
 	if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) {
-		struct svc_sock *svsk = NULL;
+		struct svc_xprt *svsk = NULL;
 		spin_lock_bh(&serv->sv_lock);
-		if (!list_empty(&serv->sv_tempsocks)) {
+		if (!list_empty(&serv->sv_tempxprts)) {
 			if (net_ratelimit()) {
 				/* Try to help the admin */
 				printk(KERN_NOTICE "%s: too many open TCP "
@@ -856,8 +909,8 @@ svc_tcp_accept(struct svc_sock *svsk)
 			 * Always select the oldest socket. It's not fair,
 			 * but so is life
 			 */
-			svsk = list_entry(serv->sv_tempsocks.prev,
-					  struct svc_sock,
+			svsk = list_entry(serv->sv_tempxprts.prev,
+					  struct svc_xprt,
 					  sk_list);
 			set_bit(SK_CLOSE, &svsk->sk_flags);
 			svsk->sk_inuse ++;
@@ -866,7 +919,7 @@ svc_tcp_accept(struct svc_sock *svsk)
 
 		if (svsk) {
 			svc_sock_enqueue(svsk);
-			svc_sock_put(svsk);
+			if (svsk->sx_put) svsk->sx_put(svsk);
 		}
 
 	}
@@ -887,8 +940,8 @@ failed:
 static int
 svc_tcp_recvfrom(struct svc_rqst *rqstp)
 {
-	struct svc_sock	*svsk = rqstp->rq_sock;
-	struct svc_serv	*serv = svsk->sk_server;
+	struct svc_xprt	*svsk = rqstp->rq_sock;
+	struct svc_serv	*serv = svsk->sx_server;
 	int		len;
 	struct kvec vec[RPCSVC_MAXPAGES];
 	int pnum, vlen;
@@ -1031,7 +1084,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		svc_sock_received(svsk);
 	} else {
 		printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
-					svsk->sk_server->sv_name, -len);
+					svsk->sx_server->sv_name, -len);
 		svc_sock_received(svsk);
 	}
 
@@ -1061,7 +1114,7 @@ svc_tcp_sendto(struct svc_rqst *rqstp)
 	sent = svc_sendto(rqstp, &rqstp->rq_res);
 	if (sent != xbufp->len) {
 		printk(KERN_NOTICE "rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n",
-		       rqstp->rq_sock->sk_server->sv_name,
+		       rqstp->rq_sock->sx_server->sv_name,
 		       (sent<0)?"got error":"sent only",
 		       sent, xbufp->len);
 		svc_delete_socket(rqstp->rq_sock);
@@ -1071,13 +1124,16 @@ svc_tcp_sendto(struct svc_rqst *rqstp)
 }
 
 static void
-svc_tcp_init(struct svc_sock *svsk)
+svc_tcp_init(struct svc_xprt *svsk)
 {
 	struct sock	*sk = svsk->sk_sk;
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	svsk->sk_recvfrom = svc_tcp_recvfrom;
-	svsk->sk_sendto = svc_tcp_sendto;
+	svsk->sx_recvfrom = svc_tcp_recvfrom;
+	svsk->sx_sendto = svc_tcp_sendto;
+	svsk->sx_delete = svc_delete_socket;
+	svsk->sx_put = svc_sock_put;
+	svsk->sx_has_wspace = svc_tcp_has_write_space;
 
 	if (sk->sk_state == TCP_LISTEN) {
 		dprintk("setting up TCP socket for listening\n");
@@ -1099,8 +1155,8 @@ svc_tcp_init(struct svc_sock *svsk)
 		 * svc_tcp_recvfrom will re-adjust if necessary
 		 */
 		svc_sock_setbufsize(svsk->sk_sock,
-				    3 * svsk->sk_server->sv_bufsz,
-				    3 * svsk->sk_server->sv_bufsz);
+				    3 * svsk->sx_server->sv_bufsz,
+				    3 * svsk->sx_server->sv_bufsz);
 
 		set_bit(SK_CHNGBUF, &svsk->sk_flags);
 		set_bit(SK_DATA, &svsk->sk_flags);
@@ -1119,14 +1175,14 @@ svc_sock_update_bufs(struct svc_serv *se
 	struct list_head *le;
 
 	spin_lock_bh(&serv->sv_lock);
-	list_for_each(le, &serv->sv_permsocks) {
-		struct svc_sock *svsk = 
-			list_entry(le, struct svc_sock, sk_list);
+	list_for_each(le, &serv->sv_permxprts) {
+		struct svc_xprt *svsk = 
+			list_entry(le, struct svc_xprt, sk_list);
 		set_bit(SK_CHNGBUF, &svsk->sk_flags);
 	}
-	list_for_each(le, &serv->sv_tempsocks) {
-		struct svc_sock *svsk =
-			list_entry(le, struct svc_sock, sk_list);
+	list_for_each(le, &serv->sv_tempxprts) {
+		struct svc_xprt *svsk =
+			list_entry(le, struct svc_xprt, sk_list);
 		set_bit(SK_CHNGBUF, &svsk->sk_flags);
 	}
 	spin_unlock_bh(&serv->sv_lock);
@@ -1138,7 +1194,7 @@ svc_sock_update_bufs(struct svc_serv *se
 int
 svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout)
 {
-	struct svc_sock		*svsk =NULL;
+	struct svc_xprt		*svsk =NULL;
 	int			len;
 	int 			pages;
 	struct xdr_buf		*arg;
@@ -1188,9 +1244,9 @@ svc_recv(struct svc_serv *serv, struct s
 		return -EINTR;
 
 	spin_lock_bh(&serv->sv_lock);
-	if (!list_empty(&serv->sv_tempsocks)) {
-		svsk = list_entry(serv->sv_tempsocks.next,
-				  struct svc_sock, sk_list);
+	if (!list_empty(&serv->sv_tempxprts)) {
+		svsk = list_entry(serv->sv_tempxprts.next,
+				  struct svc_xprt, sk_list);
 		/* apparently the "standard" is that clients close
 		 * idle connections after 5 minutes, servers after
 		 * 6 minutes
@@ -1222,7 +1278,9 @@ svc_recv(struct svc_serv *serv, struct s
 		add_wait_queue(&rqstp->rq_wait, &wait);
 		spin_unlock_bh(&serv->sv_lock);
 
+		dprintk("%s: going to sleep...\n", __FUNCTION__);
 		schedule_timeout(timeout);
+		dprintk("%s: ...woke up\n", __FUNCTION__);
 
 		try_to_freeze();
 
@@ -1240,13 +1298,27 @@ svc_recv(struct svc_serv *serv, struct s
 
 	dprintk("svc: server %p, socket %p, inuse=%d\n",
 		 rqstp, svsk, svsk->sk_inuse);
-	len = svsk->sk_recvfrom(rqstp);
+	len = svsk->sx_recvfrom(rqstp);
 	dprintk("svc: got len=%d\n", len);
 
+	{
+		unsigned char *recv_buf = (unsigned char *) rqstp->rq_arg.head[0].iov_base;
+		int i;
+
+		for (i = 0; i < len; i += 4) {
+			dprintk("%p: %.2x %.2x %.2x %.2x\n",
+				&recv_buf[i],
+				recv_buf[i],
+				recv_buf[i + 1],
+				recv_buf[i + 2],
+				recv_buf[i + 3]);
+		}
+	}
+
 	/* No data, incomplete (TCP) read, or accept() */
 	if (len == 0 || len == -EAGAIN) {
 		rqstp->rq_res.len = 0;
-		svc_sock_release(rqstp);
+		svc_xprt_release(rqstp);
 		return -EAGAIN;
 	}
 	svsk->sk_lastrecv = get_seconds();
@@ -1254,7 +1326,7 @@ svc_recv(struct svc_serv *serv, struct s
 		/* push active sockets to end of list */
 		spin_lock_bh(&serv->sv_lock);
 		if (!list_empty(&svsk->sk_list))
-			list_move_tail(&svsk->sk_list, &serv->sv_tempsocks);
+			list_move_tail(&svsk->sk_list, &serv->sv_tempxprts);
 		spin_unlock_bh(&serv->sv_lock);
 	}
 
@@ -1273,7 +1345,7 @@ void
 svc_drop(struct svc_rqst *rqstp)
 {
 	dprintk("svc: socket %p dropped request\n", rqstp->rq_sock);
-	svc_sock_release(rqstp);
+	svc_xprt_release(rqstp);
 }
 
 /*
@@ -1282,7 +1354,7 @@ svc_drop(struct svc_rqst *rqstp)
 int
 svc_send(struct svc_rqst *rqstp)
 {
-	struct svc_sock	*svsk;
+	struct svc_xprt	*svsk;
 	int		len;
 	struct xdr_buf	*xb;
 
@@ -1301,29 +1373,77 @@ svc_send(struct svc_rqst *rqstp)
 		xb->page_len +
 		xb->tail[0].iov_len;
 
+	{
+		unsigned char *buf;
+		int i;
+
+		dprintk("%s: response length = %u (%u head + %u page + %u tail)\n",
+			__FUNCTION__,
+			xb->len, xb->head[0].iov_len, xb->page_len, xb->tail[0].iov_len);
+
+		buf = xb->head[0].iov_base;
+		dprintk("%s: reply head:\n", __FUNCTION__);
+		for (i = 0; i < xb->head[0].iov_len; i += 4) {
+			dprintk("%p: %.2x %.2x %.2x %.2x\n",
+				&buf[i],
+				buf[i],
+				buf[i + 1],
+				buf[i + 2],
+				buf[i + 3]);
+		}
+
+		if (xb->page_len != 0) {
+			buf = page_address(xb->pages[0]) + xb->page_base;
+			dprintk("%s: reply page data:\n", __FUNCTION__);
+			for (i = 0; i < xb->page_len; i += 4) {
+				dprintk("%p: %.2x %.2x %.2x %.2x\n",
+					&buf[i],
+					buf[i],
+					buf[i + 1],
+					buf[i + 2],
+					buf[i + 3]);
+			}
+		}
+	
+		if (xb->tail[0].iov_len != 0) {
+			buf = xb->tail[0].iov_base;
+			dprintk("%s: reply tail:\n", __FUNCTION__);
+			for (i = 0; i < xb->tail[0].iov_len; i += 4) {
+				dprintk("%p: %.2x %.2x %.2x %.2x\n",
+					&buf[i],
+					buf[i],
+					buf[i + 1],
+					buf[i + 2],
+					buf[i + 3]);
+			}
+		}
+
+	}
+
 	/* Grab svsk->sk_sem to serialize outgoing data. */
 	down(&svsk->sk_sem);
 	if (test_bit(SK_DEAD, &svsk->sk_flags))
 		len = -ENOTCONN;
 	else
-		len = svsk->sk_sendto(rqstp);
+		len = svsk->sx_sendto(rqstp);
 	up(&svsk->sk_sem);
-	svc_sock_release(rqstp);
+	svc_xprt_release(rqstp);
 
 	if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
 		return 0;
+	dprintk("%s: sent %d bytes\n", __FUNCTION__, len);
 	return len;
 }
 
 /*
- * Initialize socket for RPC use and create svc_sock struct
+ * Initialize socket for RPC use and create svc_xprt struct
  * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF.
  */
-static struct svc_sock *
+static struct svc_xprt *
 svc_setup_socket(struct svc_serv *serv, struct socket *sock,
 					int *errp, int pmap_register)
 {
-	struct svc_sock	*svsk;
+	struct svc_xprt	*svsk;
 	struct sock	*inet;
 
 	dprintk("svc: svc_setup_socket %p\n", sock);
@@ -1352,7 +1472,7 @@ svc_setup_socket(struct svc_serv *serv, 
 	svsk->sk_ostate = inet->sk_state_change;
 	svsk->sk_odata = inet->sk_data_ready;
 	svsk->sk_owspace = inet->sk_write_space;
-	svsk->sk_server = serv;
+	svsk->sx_server = serv;
 	svsk->sk_lastrecv = get_seconds();
 	INIT_LIST_HEAD(&svsk->sk_deferred);
 	INIT_LIST_HEAD(&svsk->sk_ready);
@@ -1367,11 +1487,11 @@ svc_setup_socket(struct svc_serv *serv, 
 	spin_lock_bh(&serv->sv_lock);
 	if (!pmap_register) {
 		set_bit(SK_TEMP, &svsk->sk_flags);
-		list_add(&svsk->sk_list, &serv->sv_tempsocks);
+		list_add(&svsk->sk_list, &serv->sv_tempxprts);
 		serv->sv_tmpcnt++;
 	} else {
 		clear_bit(SK_TEMP, &svsk->sk_flags);
-		list_add(&svsk->sk_list, &serv->sv_permsocks);
+		list_add(&svsk->sk_list, &serv->sv_permxprts);
 	}
 	spin_unlock_bh(&serv->sv_lock);
 
@@ -1389,7 +1509,7 @@ svc_setup_socket(struct svc_serv *serv, 
 static int
 svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin)
 {
-	struct svc_sock	*svsk;
+	struct svc_xprt	*svsk;
 	struct socket	*sock;
 	int		error;
 	int		type;
@@ -1436,14 +1556,14 @@ bummer:
  * Remove a dead socket
  */
 void
-svc_delete_socket(struct svc_sock *svsk)
+svc_delete_socket(struct svc_xprt *svsk)
 {
 	struct svc_serv	*serv;
 	struct sock	*sk;
 
 	dprintk("svc: svc_delete_socket(%p)\n", svsk);
 
-	serv = svsk->sk_server;
+	serv = svsk->sx_server;
 	sk = svsk->sk_sk;
 
 	sk->sk_state_change = svsk->sk_ostate;
@@ -1465,23 +1585,60 @@ svc_delete_socket(struct svc_sock *svsk)
 	} else {
 		spin_unlock_bh(&serv->sv_lock);
 		dprintk(KERN_NOTICE "svc: server socket destroy delayed\n");
-		/* svsk->sk_server = NULL; */
+		/* svsk->sx_server = NULL; */
 	}
 }
 
 /*
  * Make a socket for nfsd and lockd
+ * @deprecated switch to svc_makexprt
  */
 int
 svc_makesock(struct svc_serv *serv, int protocol, unsigned short port)
 {
 	struct sockaddr_in	sin;
 
-	dprintk("svc: creating socket proto = %d\n", protocol);
-	sin.sin_family      = AF_INET;
-	sin.sin_addr.s_addr = INADDR_ANY;
-	sin.sin_port        = htons(port);
-	return svc_create_socket(serv, protocol, &sin);
+/* 	dprintk("svc: creating socket proto = %d\n", protocol); */
+/* 	sin.sin_family      = AF_INET; */
+/* 	sin.sin_addr.s_addr = INADDR_ANY; */
+/* 	sin.sin_port        = htons(port); */
+/* 	return svc_create_socket(serv, protocol, &sin); */
+
+	
+	if (protocol == IPPROTO_UDP || protocol == IPPROTO_TCP) {
+		dprintk("svc: creating socket proto = %d\n", protocol);
+		sin.sin_family      = AF_INET;
+		sin.sin_addr.s_addr = INADDR_ANY;
+		sin.sin_port        = htons(port);
+		return svc_create_socket(serv, protocol, &sin);
+	} else { /* XXX need RDMA protocol constant */
+		dprintk("svc: creating RDMA transport\n");
+		return svc_create_rdma_xprt(serv, protocol, htons(port));
+	}
+}
+
+/*
+ * Make a transport instance for nfsd and lockd
+ * @param serv the RPC service this instance will belong to
+ * @param protocol the protocol for the instance
+ * @param port the port to listen on
+ * @return 0 on success, negative value for errors
+ */
+int
+svc_makexprt(struct svc_serv *serv, int protocol, unsigned short port)
+{
+	struct sockaddr_in	sin;
+	
+	if (protocol == IPPROTO_UDP || protocol == IPPROTO_TCP) {
+		dprintk("svc: creating socket proto = %d\n", protocol);
+		sin.sin_family      = AF_INET;
+		sin.sin_addr.s_addr = INADDR_ANY;
+		sin.sin_port        = htons(port);
+		return svc_create_socket(serv, protocol, &sin);
+	} else { /* XXX need RDMA protocol constant */
+		dprintk("svc: creating RDMA transport\n");
+		return svc_create_rdma_xprt(serv, protocol, port);
+	}
 }
 
 /*
@@ -1492,10 +1649,10 @@ static void svc_revisit(struct cache_def
 {
 	struct svc_deferred_req *dr = container_of(dreq, struct svc_deferred_req, handle);
 	struct svc_serv *serv = dreq->owner;
-	struct svc_sock *svsk;
+	struct svc_xprt *svsk;
 
 	if (too_many) {
-		svc_sock_put(dr->svsk);
+		if (dr->svsk->sx_put) dr->svsk->sx_put(dr->svsk);
 		kfree(dr);
 		return;
 	}
@@ -1507,7 +1664,7 @@ static void svc_revisit(struct cache_def
 	spin_unlock_bh(&serv->sv_lock);
 	set_bit(SK_DEFERRED, &svsk->sk_flags);
 	svc_sock_enqueue(svsk);
-	svc_sock_put(svsk);
+	if (svsk->sx_put) svsk->sx_put(svsk);
 }
 
 static struct cache_deferred_req *
@@ -1561,10 +1718,10 @@ static int svc_deferred_recv(struct svc_
 }
 
 
-static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk)
+static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *svsk)
 {
 	struct svc_deferred_req *dr = NULL;
-	struct svc_serv	*serv = svsk->sk_server;
+	struct svc_serv	*serv = svsk->sx_server;
 	
 	if (!test_bit(SK_DEFERRED, &svsk->sk_flags))
 		return NULL;
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/net/sunrpc/svcxprt_rdma.c linux-2.6.14.3-RPCRDMA/net/sunrpc/svcxprt_rdma.c
--- linux-2.6.14.3/net/sunrpc/svcxprt_rdma.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/net/sunrpc/svcxprt_rdma.c	2005-12-08 12:37:37.000000000 -0500
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2006 The Regents of the University of Michigan
+ * All rights reserved.
+ *
+ * Permission is granted to use, copy, create derivative works and
+ * redistribute this software and such derivative works for any purpose,
+ * so long as the name of the University of Michigan is not used in
+ * any advertising or publicity pertaining to the use or distribution
+ * of this software without specific, written prior authorization.  If
+ * the above copyright notice or any other identification of the
+ * university of michigan is included in any copy of any portion of
+ * this software, then the disclaimer below must also be included.
+ *
+ * This software is provided as is, without representation from the
+ * University of Michigan as to its fitness for any purpose, and without
+ * warranty by the university of michigan of any kind, either express
+ * or implied, including without limitation the implied warranties of
+ * merchantability and fitness for a particular purpose.  The Regents
+ * of the University of Michigan shall not be liable for any damages,
+ * including special, indirect, incidental, or consequential damages,
+ * with respect to any claim arising out or in connection with the use
+ * of the software, even if it has been or is hereafter advised of the
+ * possibility of such damages.
+ *
+ * RDMA Interface layer for RPC
+ * This file uses kDAPL-specific knowledge to implement an abstract transport
+ * implementation for RPC to use without needing to include kDAPL information.
+ */
+
+#if 1 /* JFL: needed for spin_lock_bh() and spin_unlock_bh() */
+#include <linux/interrupt.h>
+#endif
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svcsock.h> /* change to svcxprt.h later */
+#ifdef KDAPL12
+#include <dat/kdat.h>
+#else
+#include <kdat.h>
+#endif
+#include <linux/sunrpc/svcxprt_rdma.h>
+#include <linux/sunrpc/svcxprt_rdma_kdapl.h>
+#include <linux/sunrpc/debug.h>
+
+# define RPCDBG_FACILITY     RPCDBG_XPRT
+
+static struct rdma_ia default_ia = { .ri_initialized = 0 };
+
+/* XXX these are hard-wired for now */
+/* JFL: OpenIB kDAPL provider is called mthcaXy where */
+/* X=HCA number=0,1,... and y=port number=a,b,...     */
+static const DAT_NAME_PTR ia_name = "mthca0a";
+/* static const DAT_NAME_PTR ia_name = "ccil0"; */
+static DAT_COUNT evd_min_queue_length = 4;
+
+/**
+ * svc_rdma_ia_destroy - Remove the RDMA IA transport from the RPC service
+ * @xprt: the transport instance representing the RDMA IA
+ *
+ * Does a clean teardown of all the RDMA structures and removes the reference
+ * to them from the RPC service
+ **/
+void
+svc_rdma_ia_destroy(struct svc_xprt *xprt) {
+	dprintk("%s: removing the RDMA IA transport from RPC\n", __FUNCTION__);
+	list_del_init(&xprt->sk_list);
+	rdma_close_ia(xprt->sx_data);
+	kfree(xprt);
+}
+
+/**
+ * svc_create_rdma_xprt - Create an RDMA PSP for the RPC service
+ * @serv: the RPC service the PSP will belong to
+ * @protocol: currently unused
+ * @port: the port to listen on in network byte-order
+ *
+ * Returns 0 on success,
+ *        -1 if the interface adapter can't be initialized,
+ *        -2 if the public service point can't be created
+ **/
+int
+svc_create_rdma_xprt(struct svc_serv *serv, int protocol, unsigned short port)
+{
+	DAT_CONN_QUAL connection_qualifier = (DAT_CONN_QUAL) htons(port);
+	int err;
+	struct svc_xprt *xprt = NULL;
+	
+	dprintk("%s: default_ia.ri_initialized = %d\n", __FUNCTION__,
+	        default_ia.ri_initialized);
+
+	if (!default_ia.ri_initialized) {
+		if (rdma_init_ia(&default_ia, ia_name, evd_min_queue_length)) {
+			err = -1;
+			goto fail;
+		}
+	}
+	
+	if (!(xprt = kmalloc(sizeof(*xprt), GFP_KERNEL))) {
+		printk("%s: Not enough memory for transport structure\n", __FUNCTION__);
+		return -ENOMEM;
+	}
+	memset(xprt, 0, sizeof(*xprt));
+
+	set_bit(SK_BUSY, &xprt->sk_flags);
+	xprt->sx_server = serv;
+	xprt->sk_lastrecv = get_seconds();
+	xprt->sx_data = &default_ia;
+	xprt->sx_delete = svc_rdma_ia_destroy;
+	xprt->sx_recvfrom = svc_rdma_ia_recvfrom;
+	INIT_LIST_HEAD(&xprt->sk_deferred);
+	INIT_LIST_HEAD(&xprt->sk_ready);
+	sema_init(&xprt->sk_sem, 1);
+
+	/* XXX more stuff to set? */
+
+	spin_lock_bh(&serv->sv_lock);
+	list_add(&xprt->sk_list, &serv->sv_permxprts);
+	spin_unlock_bh(&serv->sv_lock);
+
+	clear_bit(SK_BUSY, &xprt->sk_flags);
+
+	dprintk("%s: created RDMA IA transport: %p\n", __FUNCTION__, xprt);
+
+	if (rdma_create_pub_svc_point(xprt, connection_qualifier)) {
+		err = -2;
+		goto fail;
+	}
+	
+	return 0;
+ fail:
+	svc_rdma_ia_destroy(xprt);
+	return err;
+}
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/net/sunrpc/svcxprt_rdma_kdapl.c linux-2.6.14.3-RPCRDMA/net/sunrpc/svcxprt_rdma_kdapl.c
--- linux-2.6.14.3/net/sunrpc/svcxprt_rdma_kdapl.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/net/sunrpc/svcxprt_rdma_kdapl.c	2005-12-19 23:07:06.000000000 -0500
@@ -0,0 +1,1288 @@
+/*
+ * Copyright (c) 2006 The Regents of the University of Michigan
+ * All rights reserved.
+ *
+ * Permission is granted to use, copy, create derivative works and
+ * redistribute this software and such derivative works for any purpose,
+ * so long as the name of the University of Michigan is not used in
+ * any advertising or publicity pertaining to the use or distribution
+ * of this software without specific, written prior authorization.  If
+ * the above copyright notice or any other identification of the
+ * university of michigan is included in any copy of any portion of
+ * this software, then the disclaimer below must also be included.
+ *
+ * This software is provided as is, without representation from the
+ * University of Michigan as to its fitness for any purpose, and without
+ * warranty by the university of michigan of any kind, either express
+ * or implied, including without limitation the implied warranties of
+ * merchantability and fitness for a particular purpose.  The Regents
+ * of the University of Michigan shall not be liable for any damages,
+ * including special, indirect, incidental, or consequential damages,
+ * with respect to any claim arising out or in connection with the use
+ * of the software, even if it has been or is hereafter advised of the
+ * possibility of such damages.
+ */
+#ifdef KDAPL12
+#include <dat/kdat.h>
+#else
+#include <kdat.h>
+#endif
+#include <linux/module.h>
+#include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/svcxprt_rdma_kdapl.h>
+#include <linux/sunrpc/debug.h>
+#include <linux/mm.h>		/* num_physpages */
+#include <linux/spinlock.h>     /* spinlock_t, spinlock functions */
+#include <linux/net.h>		/* SOCK_NOSPACE */
+#include <net/sock.h>		/* sk_stream_wspace, sock_wspace */
+#include <asm/io.h>		/* memset() */
+
+# define RPCDBG_FACILITY     RPCDBG_XPRT
+
+/* handle operations in tasklet context, using a single, global list */
+static void rdma_op_tasklet(unsigned long data);
+DECLARE_TASKLET(rdma_op_tasklets, rdma_op_tasklet, 0UL);
+
+static spinlock_t rdma_ops_lock = SPIN_LOCK_UNLOCKED;
+static LIST_HEAD(rdma_ops);
+
+struct rdma_op {
+	struct list_head list;
+	struct svc_xprt *xprt;
+};
+
+static const char *
+ststatus(unsigned int status)
+{
+	static const char * const ststrings[] = {
+		"success",
+		"flushed",
+		"local length error",
+		"local endpoint error",
+		"local protection error",
+		"bad response",
+		"remote access error",
+		"remote responder error",
+		"transport error",
+		"receiver not ready",
+		"partial packet",
+		"rmr operation error"
+
+	};
+	if (status <= 11)
+		return ststrings[status];
+	return "unknown";
+}
+
+static void
+dprintk_dat_err(const char *message, const char *function, DAT_RETURN datstatus) {
+	const char *major_message = NULL;
+	const char *minor_message = NULL;
+
+	if (dat_strerror(datstatus, &major_message, &minor_message) == DAT_SUCCESS) {
+		dprintk("%s: %s. %s: %s\n", function, message, major_message, minor_message);
+	} else {
+		dprintk("%s: %s\n", function, message);
+	}
+}
+
+/**
+ * async_evd_upcall - Upcall for "catastropic" errors on the interface adapter
+ * @instance_data: pointer to the &rdma_ia struct
+ * @event: the event that triggered the upcall
+ * @more_events: whether there are additional events on the EVD
+ **/
+static void
+async_evd_upcall(DAT_PVOID instance_data,
+                 const DAT_EVENT *event,
+		 DAT_BOOLEAN more_events)
+{
+	printk("%s: got event %#.8x\n", __FUNCTION__, event->event_number);
+}
+
+static inline void rdma_schedule_op_tasklet(struct svc_xprt *xprt)
+{
+        unsigned long lock_flags;
+	struct rdma_op *op;
+
+	op = kmalloc(sizeof *op, GFP_ATOMIC);
+	op->xprt = xprt;
+
+        spin_lock_irqsave(&rdma_ops_lock, lock_flags);
+        list_add_tail(&op->list, &rdma_ops);
+        spin_unlock_irqrestore(&rdma_ops_lock, lock_flags);
+        tasklet_schedule(&rdma_op_tasklets);
+}
+
+static void rdma_op_tasklet(unsigned long data)
+{
+        unsigned long lock_flags;
+	struct rdma_op *op;
+	struct svc_xprt *xprt;
+
+	dprintk("%s: entering\n", __FUNCTION__);
+        spin_lock_irqsave(&rdma_ops_lock, lock_flags);
+        while (!list_empty(&rdma_ops)) {
+                op = list_entry(rdma_ops.next, struct rdma_op, list);
+                list_del(&op->list);
+                spin_unlock_irqrestore(&rdma_ops_lock, lock_flags);
+
+                xprt = op->xprt;
+		kfree(op);		
+
+		set_bit(SK_DATA, &xprt->sk_flags);
+		svc_sock_enqueue(xprt);
+
+                spin_lock_irqsave(&rdma_ops_lock, lock_flags);
+        }
+        spin_unlock_irqrestore(&rdma_ops_lock, lock_flags);
+	dprintk("%s: leaving\n", __FUNCTION__);
+}
+
+/**
+ * dto_event_upcall - Handles data transfer events
+ *
+ * @instance_data: pointer to the rdma_ia struct
+ * @event: the event that triggered the upcall
+ * @more_events: whether there are additional events on the EVD
+ *
+ * This upcall handles DTO, (recv, send, bind and unbind) events.
+ * It is reentrant but has been specified using DAT_UPCALL_SINGLE_INSTANCE
+ * in order to maintain ordering of receives to keep server credits.
+ * It must also be prepared to be called from interrupt context,
+ * so it must not block or perform blocking calls.
+ *
+ * It is the responsibility of the scheduled tasklet to return
+ * recv buffers to the pool. NOTE: this affects synchronization of
+ * connection shutdown. That is, the structures required for
+ * the completion of the reply handler must remain intact until
+ * all memory has been reclaimed. There is some work here TBD.
+ *
+ * Note that send events are suppressed and do not result in an upcall.
+ **/
+static void
+in_dto_event_upcall(DAT_PVOID instance_data,
+		    const DAT_EVENT *event, DAT_BOOLEAN more_events)
+{
+	const DAT_DTO_COMPLETION_EVENT_DATA *dto_data = NULL;
+	struct svc_xprt *xprt;
+	struct rdma_ep *ep;
+
+	if (!event) {
+		dprintk("%s: event is NULL\n", __FUNCTION__);
+		return;
+	}
+
+	switch (event->event_number) {
+	case DAT_DTO_COMPLETION_EVENT:
+		dto_data = &event->event_data.dto_completion_event_data;
+
+		dprintk("%s: got DAT_DTO_COMPLETION_EVENT; status: %s\n",
+		       __FUNCTION__,
+		       ststatus(dto_data->status));
+		if (dto_data->status != DAT_DTO_SUCCESS) break;
+
+		if (!dto_data->user_cookie.as_ptr) {
+			dprintk("%s: cookie is NULL\n", __FUNCTION__);
+			return;
+		}
+
+		ep = (struct rdma_ep *) dto_data->user_cookie.as_ptr;
+		xprt = (struct svc_xprt *) ep->re_xprt;
+		ep->re_recvlen = dto_data->transfered_length;
+		dprintk("%s: received %llu bytes on ep %p\n", __FUNCTION__, ep->re_recvlen, ep);
+		rdma_schedule_op_tasklet(xprt);
+
+		return;
+	default:
+		dprintk("%s: got event %#.8x\n", __FUNCTION__, event->event_number);
+		return;
+	}
+}
+
+static void
+out_dto_event_upcall(DAT_PVOID instance_data,
+		 const DAT_EVENT *event, DAT_BOOLEAN more_events) 
+{
+	const DAT_DTO_COMPLETION_EVENT_DATA dto_data =
+		event->event_data.dto_completion_event_data;
+
+	if (dto_data.status == DAT_DTO_SUCCESS) {
+		dprintk("%s: succesfully sent %llu bytes\n", __FUNCTION__,
+			dto_data.transfered_length);
+	} else {
+                dprintk("%s: got DAT_DTO_COMPLETION_EVENT; status: %s\n",
+			__FUNCTION__,
+			ststatus(dto_data.status));
+	}
+}
+
+/**
+ * free_evd - Cleans up an EVD to allow it to be reclaimed.
+ * @evd_handle: pointer to the handle of the EVD to be retired
+ *
+ * Modifys the upcall policy prior to calling dat_evd_free to 
+ * ensure graceful retirement. After freeing, sets the handle
+ * to DAT_HANDLE_NULL to prevent subsequent use.
+ **/
+static void
+free_evd(DAT_EVD_HANDLE *evd_handle) {
+	DAT_RETURN datstatus;
+	DAT_EVENT junk;
+
+	if (*evd_handle == DAT_HANDLE_NULL) {
+		return;
+	}
+	
+	/* XXX do we need to synchronize this? */
+	datstatus = dat_evd_modify_upcall(*evd_handle,
+	                                  DAT_UPCALL_TEARDOWN,
+					  DAT_UPCALL_SAME);
+	if (datstatus != DAT_SUCCESS) {
+		dprintk_dat_err("dat_evd_modify_upcall failed", __FUNCTION__, datstatus);
+		return;
+	}
+
+	/* drain any remaining events from the EVD */
+	while ((datstatus = dat_evd_dequeue(*evd_handle, &junk)) == DAT_SUCCESS) {
+		dprintk("%s: drained outstanding event\n", __FUNCTION__);
+	}
+
+	if (DAT_GET_TYPE(datstatus) == DAT_QUEUE_EMPTY) {
+		dprintk("%s: EVD queue empty\n", __FUNCTION__);
+	} else {
+		dprintk_dat_err("dat_evd_dequeue failed", __FUNCTION__, datstatus);
+	}
+
+	datstatus = dat_evd_free(*evd_handle);
+	if (datstatus != DAT_SUCCESS) {
+		dprintk_dat_err("dat_evd_free failed", __FUNCTION__, datstatus);
+	} else {
+		*evd_handle = DAT_HANDLE_NULL;
+	}
+}
+
+/**
+ * rdma_close_ep - closes and deallocates an RDMA endpoint structure
+ * @ep: the endpoint to close
+ *
+ * The memory associated with the EP is freed, then dat_ep_disconnect
+ * is called and we wait for the disconnect event. Finally, before
+ * calling dat_ep_free, any remaining DTOs are drained.
+ **/
+static void
+rdma_close_ep(struct rdma_ep *ep) {
+	u32 i;
+	DAT_RETURN datstatus;
+
+	dprintk("%s: closing rdma_ep %p\n", __FUNCTION__, ep);
+
+	ep->re_closing = 1; /* XXX synchronize this? */
+
+	list_del_init(&ep->re_list); /* remove from IA list of EPs */
+	for (i = 0; i < RDMA_MAX_PAGES && ep->re_pages[i]; i++) {
+		free_page((unsigned long) ep->re_pages[i]);
+	}
+	free_page((unsigned long) ep->re_pages[RDMA_MAX_PAGES]); /* reply page */
+
+	/* XXX syncronize the connected variable */
+	if (ep->re_connected) {
+		datstatus = dat_ep_disconnect(ep->re_handle, DAT_CLOSE_ABRUPT_FLAG);
+		if (datstatus != DAT_SUCCESS) {
+			printk("%s: dat_ep_disconnect failed (%x)\n",
+			       __FUNCTION__, datstatus);
+		} else {
+			dprintk("%s: dat_ep_disconnect on %p succeeded\n",
+				__FUNCTION__, ep->re_handle);
+		}
+
+		dprintk("%s: waiting for disconnect event\n", __FUNCTION__);
+		wait_event_interruptible_timeout(ep->re_connect_wait,
+						 ep->re_connected == 0, 5 * HZ);
+		dprintk("%s: disconnect event recieved\n", __FUNCTION__);
+	} else {
+		dprintk("%s: EP already disconnected by peer\n", __FUNCTION__);
+	}
+
+ 	free_evd(&ep->re_conn_evd);
+ 	free_evd(&ep->re_in_dto_evd);
+ 	free_evd(&ep->re_out_dto_evd);
+
+	dprintk("%s: EP EVDs freed\n", __FUNCTION__);
+
+	datstatus = dat_ep_free(ep->re_handle);
+
+	dprintk("%s: datstatus = %x\n", __FUNCTION__, datstatus);
+	dprintk_dat_err("dat_ep_free", __FUNCTION__, datstatus);
+
+        if (datstatus != DAT_SUCCESS) {
+                printk("%s: dat_ep_free failed (%x)\n", __FUNCTION__, datstatus);
+        } else {
+                dprintk("%s: dat_ep_free on %p succeeded\n", __FUNCTION__, ep->re_handle);
+        }
+
+	/* rdma_ep structure is freed by svc_rdma_ep_destroy */
+}
+
+/**
+ * svc_rdma_ep_destroy - Remove an RDMA endpoint transport instance
+ * @xprt: the transport instance representing the RDMA endpoint
+ *
+ * Closes the endpoint and deallocates the associated structures
+ * XXX This probably doesn't corectly handle the closing of a xprt
+ * that is currently in use
+ **/
+void
+svc_rdma_ep_destroy(struct svc_xprt *xprt) {
+	struct svc_serv *serv = xprt->sx_server;
+	struct svc_xprt *xprt1 = NULL;
+	struct svc_xprt *temp = NULL;
+
+	dprintk("%s: removing EP xprt %p from server %p\n",
+		__FUNCTION__, xprt, serv);
+
+	dprintk("%s: temp xprts for server %p\n", __FUNCTION__, serv);
+	list_for_each_entry_safe(xprt1, temp, &serv->sv_tempxprts, sk_list) {
+		dprintk("\t%p\n", xprt1);
+	}
+
+	rdma_close_ep(xprt->sx_data);
+
+	spin_lock_bh(&serv->sv_lock);
+
+	list_del_init(&xprt->sk_list);
+	list_del_init(&xprt->sk_ready);
+
+	dprintk("%s: after list_del_init for %p\n", __FUNCTION__, xprt);
+	list_for_each_entry_safe(xprt1, temp, &serv->sv_tempxprts, sk_list) {
+		dprintk("\t%p\n", xprt1);
+	}
+
+	set_bit(SK_DEAD, &xprt->sk_flags);
+	serv->sv_tmpcnt--;
+
+	/* XXX there may be some additional work to check if the transport is in use */
+
+	spin_unlock_bh(&serv->sv_lock);
+
+	kfree(xprt->sx_data);
+}
+
+/*
+ * svc_rdma_ep_put - Release an endpoint structure after use.
+ * @xprt: the endpoint structure
+ */
+static inline void
+svc_rdma_ep_put(struct svc_xprt *xprt)
+{
+	struct svc_serv *serv = xprt->sx_server;
+
+	spin_lock_bh(&serv->sv_lock);
+	if (test_bit(SK_DEAD, &xprt->sk_flags)) {
+		spin_unlock_bh(&serv->sv_lock);
+		dprintk("svc: releasing dead RDMA endpoint\n");
+		kfree(xprt);
+	}
+	else
+		spin_unlock_bh(&serv->sv_lock);
+}
+
+/**
+ * handle_disconnect_event - finds the appropriate EP structure and destroys it
+ * @ia: the rdma_ia structure representing the IA the event occurred on
+ * @event_data: the DAT_EVENT_DATA for the disconnect event
+ */
+/* static void */
+/* handle_disconnect_event(struct rdma_ia *ia, DAT_EVENT_DATA *event_data) { */
+/* 	DAT_EP_HANDLE disconnected_ep_handle = */
+/* 		event_data->connect_event_data.ep_handle; */
+/* 	struct rdma_ep *ep = NULL; */
+/* 	struct rdma_ep *temp = NULL; */
+	
+/* 	dprintk("%s: ia = %p, ep_handle = %p\n", __FUNCTION__, ia, disconnected_ep_handle); */
+
+/* 	list_for_each_entry_safe(ep, temp, &ia->ri_endpoints, re_list) { */
+/* 		if (ep->re_handle == disconnected_ep_handle) { */
+/* 			dprintk("%s: destroying RDMA EP xprt %p\n", */
+/* 				__FUNCTION__, ep->re_xprt); */
+/* 			/\* XXX is this too much work for the upcall? *\/ */
+/* 			svc_rdma_ep_destroy(ep->re_xprt); */
+/* 		} */
+/* 	} */
+/* } */
+
+/**
+ * get_ep_for_handle - finds the endpoint structure given the handle
+ * @ia: represents the IA the event occurred on
+ * @ep_handle: the DAT_EP_HANDLE corresponding the the endpoint struct we want
+ *
+ * Since EVDs are valid across the entire IA, we must search the list of
+ * endpoints belonging to the IA, and find the one that corresponds to the
+ * given handle. The structure for that endpoint is returned, or NULL if
+ * no matching endpoint is found
+ */
+/* static struct rdma_ep * */
+/* get_ep_for_handle(struct rdma_ia *ia, DAT_EP_HANDLE ep_handle) { */
+/* 	struct rdma_ep *ep = NULL; */
+/* 	struct rdma_ep *temp = NULL; */
+	
+/* 	list_for_each_entry_safe(ep, temp, &ia->ri_endpoints, re_list) { */
+/* 		if (ep->re_handle == ep_handle) { */
+/* 			return ep; */
+/* 		} */
+/* 	} */
+	
+/* 	dprintk("%s: No endpoint found matching handle %p\n", */
+/* 		__FUNCTION__, ep_handle); */
+/* 	return NULL; */
+/* } */
+
+/**
+ * connection_event_upcall - Handles connection events for an endpoint
+ * @instance_data: pointer to the svc_xprt struct containing the rdma_ia struct
+ * @event: the event that triggered the upcall
+ * @more_events: whether there are additional events on the EVD
+ * 
+ * In the case of a DAT_CONNECTION_REQUEST_EVENT, the connection is accepted and
+ * the endpoint is associated with a new svc_xprt structure, which is subsequently
+ * enqueued with its SK_CONN bit set.
+ */
+static void
+connection_request_event_upcall(DAT_PVOID instance_data,
+				const DAT_EVENT *event,
+				DAT_BOOLEAN more_events)
+{
+	struct svc_xprt *xprt = (struct svc_xprt *) instance_data;
+	struct rdma_ia *ia = (struct rdma_ia *) xprt->sx_data;
+	const DAT_EVENT_DATA *event_data = &event->event_data;
+ 	DAT_CR_HANDLE cr_handle;
+
+	if (event->event_number != DAT_CONNECTION_REQUEST_EVENT) {
+		dprintk("%s: received unexpected event %x\n",
+			__FUNCTION__, event->event_number);
+		return;
+	}
+
+#ifdef KDAPL12
+	cr_handle = event_data->cr_arrival_event_data.cr_handle;
+#else
+	cr_handle = event_data->cr_arrival_event_data.cr;
+#endif
+	dprintk("%s: got DAT_CONNECTION_REQUEST_EVENT\n", __FUNCTION__);
+
+	if (ia->ri_conn_req != DAT_HANDLE_NULL) {
+		/* XXX not much of a queue just yet */
+		dprintk("%s: Connect request queue full\n", __FUNCTION__);
+		/* XXX is this really OK to call here? */
+		/* Should be rare if we have a real queue or reasonable size */
+		if (dat_cr_reject(cr_handle) == DAT_SUCCESS) {
+			dprintk("%s: connection rejected\n", __FUNCTION__);
+		} else {
+			dprintk("%s: dat_cr_reject failed\n", __FUNCTION__);
+		}
+	} else {
+		ia->ri_conn_req = cr_handle;
+		set_bit(SK_CONN, &xprt->sk_flags);
+		dprintk("%s: enqueueing waiting connection\n", __FUNCTION__);
+		svc_sock_enqueue(xprt);
+		/* XXX do we need a wake-up like svc_tcp_data_ready here? */
+	}
+
+	return;
+}
+
+/**
+ * connection_event_upcall - Handles connection events for an endpoint
+ * @instance_data: pointer to the rdma_ep struct
+ * @event: the event that triggered the upcall
+ * @more_events: whether there are additional events on the EVD
+ **/
+static void
+connection_event_upcall(DAT_PVOID instance_data,
+			const DAT_EVENT *event,
+			DAT_BOOLEAN more_events)
+{
+	struct rdma_ep	*ep = (struct rdma_ep *) instance_data;
+	struct svc_xprt	*xprt = ep->re_xprt;
+
+	switch (event->event_number) {
+	case DAT_CONNECTION_EVENT_ESTABLISHED:
+		dprintk("%s: got DAT_CONNECTION_EVENT_ESTABLISHED\n", __FUNCTION__);
+		ep->re_connected = 1;
+		return;
+	case DAT_CONNECTION_EVENT_DISCONNECTED:
+	case DAT_CONNECTION_EVENT_BROKEN:
+
+		if (event->event_number == DAT_CONNECTION_EVENT_DISCONNECTED) {
+			dprintk("%s: got DAT_CONNECTION_EVENT_DISCONNECTED\n",
+				__FUNCTION__);
+		} else if (event->event_number == DAT_CONNECTION_EVENT_BROKEN) {
+			dprintk("%s: got DAT_CONNECTION_EVENT_BROKEN\n",
+				__FUNCTION__);
+		}
+		ep->re_connected = 0;
+		wake_up_all(&ep->re_connect_wait);
+
+		/* If the disconnect was client initiated, queue this xprt
+		 * to be destroyed */
+		if (!ep->re_closing) {
+			set_bit(SK_CLOSE, &xprt->sk_flags);
+			svc_sock_enqueue(xprt);
+		}
+		return;
+	default:
+		dprintk("%s: got unknown event %#.8x\n", __FUNCTION__, event->event_number);
+		return;
+	}
+
+	return;
+}
+
+/**
+ * post_recv - posts receive buffers for the given endpoint
+ * @ep: the endpoint that will service the request
+ *
+ * The buffers owned by the endpoint are posted with a call
+ * to dat_ep_post_recv and the result of that call are returned.
+ **/
+static DAT_RETURN
+post_recv(struct rdma_ep *ep) {
+	DAT_LMR_TRIPLET local_iov;
+	DAT_DTO_COOKIE recv_cookie;
+
+	ep->re_recvlen = 0; /* set when the DTO occurs */
+
+	local_iov.lmr_context = ep->re_ia->ri_lmr_iov.lmr_context;
+	local_iov.pad = 0;
+	local_iov.virtual_address = page_to_phys(ep->re_pages[0]);
+	local_iov.segment_length = PAGE_SIZE;
+	recv_cookie.as_ptr = (DAT_PVOID) ep;
+
+	dprintk("%s: posting recv: %lld bytes at %#.8llx\n",
+		__FUNCTION__,
+		local_iov.segment_length,
+		local_iov.virtual_address);
+	return dat_ep_post_recv(ep->re_handle,
+				RDMA_MAX_PAGES,
+				&local_iov,
+				recv_cookie,
+				DAT_COMPLETION_DEFAULT_FLAG);
+}
+
+/**
+ * rdma_close_ia - close interface adapter after cleaning up associated objects
+ * @rdma_ia: pointer to interface adapter struct to close
+ **/
+void
+rdma_close_ia(struct rdma_ia *ia)
+{
+	DAT_RETURN datstatus;
+	struct rdma_psp * psp = &ia->ri_psp;
+	
+	dprintk("%s: closing ia %p\n", __FUNCTION__, ia);
+
+	/* check that endpoints are all deleted */
+	while (!list_empty(&ia->ri_endpoints)) {
+		struct rdma_ep *ep;
+		dprintk("%s: endpoints remain!\n", __FUNCTION__);
+	
+		ep = list_entry(ia->ri_endpoints.next,
+				struct rdma_ep,
+				re_list);
+		svc_rdma_ep_destroy(ep->re_xprt);
+	}
+	
+	/* Release Public Service Point */
+	
+	if (psp->rp_psp_handle != DAT_HANDLE_NULL) {
+		datstatus = dat_psp_free(psp->rp_psp_handle);
+		if (datstatus == DAT_SUCCESS) {
+			psp->rp_psp_handle = DAT_HANDLE_NULL;
+			dprintk("%s: freed public service point\n", __FUNCTION__);
+		} else {
+			dprintk("%s: dat_psp_free failed (%x)\n", __FUNCTION__, datstatus);
+		}
+	}
+
+	/* Release Event Dispatchers */
+
+	/* the async evd was created by the call to dat_ia_open, so it is destroyed
+	 * by dat_ia_close */
+	free_evd(&psp->rp_cr_evd);
+		
+	/* Free Pre-bound Memory */
+	
+	if (ia->ri_lmr_handle != DAT_HANDLE_NULL) {
+		datstatus = dat_lmr_free(ia->ri_lmr_handle);
+		if (datstatus == DAT_SUCCESS) {
+			ia->ri_lmr_handle = DAT_HANDLE_NULL;
+			dprintk("%s: LMR successfully freed\n", __FUNCTION__);
+		} else {
+			dprintk("%s: rdma_ia_close: dat_lmr_free failed\n", __FUNCTION__);
+		}
+	}
+	
+	/* Release the Protection Zone */
+
+	if (ia->ri_pz_handle != DAT_HANDLE_NULL) {
+		datstatus = dat_pz_free(ia->ri_pz_handle);
+		if (datstatus == DAT_SUCCESS) {
+			ia->ri_pz_handle = DAT_HANDLE_NULL;
+			dprintk("%s: dat_pz_free successful\n", __FUNCTION__);
+		} else {
+			dprintk("%s: dat_pz_free failed\n", __FUNCTION__);
+		}
+	}
+
+	/* Close the IA */
+
+	if (ia->ri_ia_handle != DAT_HANDLE_NULL) {
+		datstatus = dat_ia_close(ia->ri_ia_handle, DAT_CLOSE_GRACEFUL_FLAG);
+		if (datstatus == DAT_SUCCESS) {
+			ia->ri_ia_handle = DAT_HANDLE_NULL;
+			dprintk("%s: IA gracefully closed\n", __FUNCTION__);
+		} else {
+			dprintk("%s: graceful close failed (%x), trying abrupt\n", __FUNCTION__, datstatus);
+			datstatus = dat_ia_close(ia->ri_ia_handle, DAT_CLOSE_ABRUPT_FLAG);
+		}
+	}
+
+	ia->ri_initialized = 0;
+}
+
+/**
+ * rdma_init_ia - open and initialize the interface adapter, do some setup
+ * @rdma_ia: pointer to IA struct that will be written into by this function
+ * @ia_name: the IA name, e.g. "ib0", "ccil0"
+ * @evd_min_qlen: minimum length for the asynchronous event dispatcher queue
+ *
+ * In addition to basic initialization, the asynchronous event dispatcher is
+ * set, a protection zone is created, and memory is preregistered for the
+ * interface adapter.
+ **/
+int
+rdma_init_ia(struct rdma_ia * ia,
+             const DAT_NAME_PTR ia_name,
+	     DAT_COUNT evd_min_qlen)
+{
+	DAT_RETURN		datstatus;
+	DAT_UPCALL_OBJECT       upcall_obj;
+	DAT_REGION_DESCRIPTION	region;
+	DAT_VLEN		length;
+	DAT_MEM_PRIV_FLAGS	mem_priv;
+
+	dprintk("rdma_init_ia: initializing interface %s\n", ia_name);
+
+	if (ia->ri_initialized) {
+		dprintk("%s: adapter already initialized\n", __FUNCTION__);
+		return 2;
+	}
+
+	memset(ia, 0, sizeof *ia);
+
+	INIT_LIST_HEAD(&ia->ri_endpoints);
+	spin_lock_init(&ia->ri_lock);
+	ia->ri_async_evd = DAT_HANDLE_NULL;
+	
+	datstatus = dat_ia_open(ia_name,
+	                        evd_min_qlen,
+				&ia->ri_async_evd,
+				&ia->ri_ia_handle);
+				
+	if (datstatus != DAT_SUCCESS) {
+		dprintk("%s: dat_ia_open failed (%x)\n", __FUNCTION__, datstatus);
+		goto out;
+	}
+	
+	/* XXX need to set up upcall function */
+	
+	upcall_obj.instance_data = ia;
+	upcall_obj.upcall_func = async_evd_upcall;
+	datstatus = dat_evd_modify_upcall(ia->ri_async_evd,
+	                                  DAT_UPCALL_SINGLE_INSTANCE,
+					  &upcall_obj);
+	if (datstatus != DAT_SUCCESS) {
+		dprintk("%s: dat_evd_modify_upcall failed\n", __FUNCTION__);
+		/* not fatal, don't goto out */
+	}
+	
+	datstatus = dat_ia_query(ia->ri_ia_handle,
+	                         &ia->ri_async_evd,
+#ifdef KDAPL12
+				 DAT_IA_ALL,
+#endif
+				 &ia->ri_ia_attr,
+#ifdef KDAPL12
+				 DAT_PROVIDER_FIELD_ALL, 
+#endif
+				 &ia->ri_pv_attr);
+	if (datstatus != DAT_SUCCESS) {
+		dprintk("%s: dat_ia_query failed\n", __FUNCTION__);
+		goto out;
+	}
+	
+	/* Create protection zone */
+
+ 	datstatus = dat_pz_create(ia->ri_ia_handle, &ia->ri_pz_handle);
+	if (datstatus != DAT_SUCCESS) {
+		dprintk("%s: dat_pz_create failed\n", __FUNCTION__);
+		goto out;
+	} else {
+		dprintk("%s: PZ created successfully!\n", __FUNCTION__);
+	}
+
+	/* Register all physical memory. This is not as gaping a security hole
+	 * as it sounds, since all RDMA operations are server initiated. No
+	 * client is ever allowed to read from or write to server memory.
+	 */
+	region.for_pa = (DAT_PADDR) 0;
+	/* round up pgcount by 1MB (x86) */
+	length = (((DAT_VLEN) num_physpages + 0xFFULL) & ~0xFFULL) * PAGE_SIZE;
+	mem_priv = DAT_MEM_PRIV_LOCAL_READ_FLAG | DAT_MEM_PRIV_LOCAL_WRITE_FLAG;
+
+	/* XXX Debugging only */
+	mem_priv = DAT_MEM_PRIV_ALL_FLAG;
+
+	datstatus = dat_lmr_kcreate(ia->ri_ia_handle,
+	                            DAT_MEM_TYPE_IA,
+				    region,
+				    length,
+				    ia->ri_pz_handle,
+				    mem_priv,
+				    DAT_MEM_OPTIMIZE_IA,
+				    &ia->ri_lmr_handle,
+				    &ia->ri_lmr_iov.lmr_context,
+				    NULL,
+				    &ia->ri_lmr_iov.segment_length,
+				    &ia->ri_lmr_iov.virtual_address);
+	if (datstatus != DAT_SUCCESS) {
+		dprintk("%s: dat_lmr_kcreate failed\n", __FUNCTION__);
+		goto out;
+	} else {
+		dprintk("%s: LMR created successfully! Registered %llu physical pages (%#llx->%#llx, %lluMB)\n",
+			__FUNCTION__,
+			ia->ri_lmr_iov.segment_length >> PAGE_SHIFT,
+			ia->ri_lmr_iov.virtual_address,
+			ia->ri_lmr_iov.segment_length - 1,
+			ia->ri_lmr_iov.segment_length >> 20);
+	}
+
+out:
+	if (datstatus == DAT_SUCCESS) {
+		dprintk("rdma_init_ia: success, handle =  %p\n", ia->ri_ia_handle);
+		ia->ri_initialized = 1;
+		return 0;
+	} else if (ia->ri_ia_handle != DAT_HANDLE_NULL) {
+		rdma_close_ia(ia); /* sets handle to NULL */
+	}
+	dprintk("rdma_init_ia: failure\n");
+	return -1;
+}
+
+/**
+ * rdma_create_pub_svc_point - Create a public service point and add upcall
+ * @xprt: the transport to create the PSP for; contains the rdma_ia structure
+ * @connection_qualifier: the connection qualifier to use for
+ *        the service point
+ *	
+ * Creates a public service point, creates an EVD for it with appropriate
+ * upcall attached for connection request events.
+ * Returns 0 on success.
+ **/
+int
+rdma_create_pub_svc_point(struct svc_xprt *xprt,
+                          DAT_CONN_QUAL connection_qualifier)
+{
+	DAT_RETURN datstatus;
+	DAT_UPCALL_OBJECT cr_upcall_obj;
+	struct rdma_ia *ia = (struct rdma_ia *) xprt->sx_data;
+	struct rdma_psp *psp = &ia->ri_psp;
+
+	dprintk("%s: IA handle =  %p\n", __FUNCTION__, ia->ri_ia_handle);
+	dprintk("%s: psp = %p %d\n", __FUNCTION__, psp, (u32) connection_qualifier);
+	
+	/* create EVD for handling Connection Requested Events */
+	
+	cr_upcall_obj.instance_data = xprt;
+	cr_upcall_obj.upcall_func = connection_request_event_upcall;
+	
+	datstatus = dat_evd_kcreate(ia->ri_ia_handle,
+	                            4,
+				    DAT_UPCALL_SINGLE_INSTANCE,
+				    &cr_upcall_obj,
+				    DAT_EVD_CR_FLAG,
+				    &psp->rp_cr_evd);
+	if (datstatus != DAT_SUCCESS) {
+		dprintk("%s:%d: %s: dat_evd_kcreate failed\n", __FILE__, __LINE__, __FUNCTION__);
+		return -1;
+	}
+
+	dprintk("%s: Created Connection EVD =  %p\n", __FUNCTION__, psp->rp_cr_evd);
+
+	datstatus = dat_psp_create(ia->ri_ia_handle,
+	                           connection_qualifier, /* XXX */
+				   psp->rp_cr_evd,
+				   DAT_PSP_PROVIDER_FLAG,
+				   &psp->rp_psp_handle);
+	if (datstatus != DAT_SUCCESS) {
+		dprintk("%s:%d: dat_psp_create failed: ", __FUNCTION__, __LINE__);
+		if (datstatus == DAT_CONN_QUAL_IN_USE) {
+			dprintk("Connection Qualifier in use\n");
+		} else {
+			dprintk("%x", datstatus);
+		}
+		free_evd(&psp->rp_cr_evd);
+		return -1;
+	}
+	
+	dprintk("%s: success\n", __FUNCTION__);
+	return 0;
+}
+
+/**
+ * svc_rdma_ep_recvfrom - receives data for RPC from an RDMA endpoint
+ * @rqstp: the request structure defining the thread context
+ *
+ * Interprets the RPCRDMA header & generates the reply header before
+ * passing off to RPC for further processing. Returns the number of 
+ * bytes received.
+ **/
+int
+svc_rdma_ep_recvfrom(struct svc_rqst *rqstp) {
+	struct svc_xprt *xprt = rqstp->rq_sock;
+	struct rdma_ep *ep = (struct rdma_ep *) xprt->sx_data;
+	struct kvec *argv = &rqstp->rq_arg.head[0];
+	struct kvec *resv = &rqstp->rq_res.head[0];
+	u32 xid;
+	u32 vers;
+	u32 credit;
+	u32 proc;
+	u32 pos;
+	u32 handle;
+	u32 len;
+	u64 offset;
+/* 	u32 offset[2]; */
+	u32 i = 0;
+
+	dprintk("svc: rdma_recv %p data %d conn %d close %d\n",
+		xprt, test_bit(SK_DATA, &xprt->sk_flags),
+		test_bit(SK_CONN, &xprt->sk_flags),
+		test_bit(SK_CLOSE, &xprt->sk_flags));
+
+	if (test_bit(SK_CLOSE, &xprt->sk_flags)) {
+		dprintk("%s: got xprt %p in close state, destroying...\n",
+			__FUNCTION__, xprt);
+		svc_rdma_ep_destroy(xprt);
+		return 0;
+	}
+
+	if (ep->re_recvlen == 0) {
+		return 0;
+	}
+
+	clear_bit(SK_DATA, &xprt->sk_flags);
+	memcpy(&rqstp->rq_addr, &ep->re_raddr, sizeof(rqstp->rq_addr));
+	dprintk("%s: received %llu bytes from %u.%u.%u.%u\n",  __FUNCTION__,
+		ep->re_recvlen,
+		NIPQUAD(rqstp->rq_addr.sin_addr.s_addr));
+	rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
+
+	argv->iov_base = page_address(ep->re_pages[0]);
+	argv->iov_len = ep->re_recvlen;
+
+	/* setup reply buffers */
+	svc_take_page(rqstp); /* must succeed */
+	resv->iov_base = page_address(ep->re_pages[RDMA_MAX_PAGES]); /* XXX 1 page for now */
+	resv->iov_len = 0;
+	rqstp->rq_res.pages = rqstp->rq_respages+1;
+	rqstp->rq_res.len = 0;
+	rqstp->rq_res.page_base = 0;
+	rqstp->rq_res.page_len = 0;
+	rqstp->rq_res.tail[0].iov_len = 0;
+
+	xid = svc_getu32(argv);
+	svc_putu32(resv, xid);
+
+	vers = ntohl(svc_getu32(argv));
+	if (vers != 1) {
+		/* XXX create version mismatch reply */
+		return 0;
+	} 
+	svc_putu32(resv, htonl(vers));
+
+	credit = ntohl(svc_getu32(argv));
+	svc_putu32(resv, htonl(RDMA_MAX_PAGES)); /* XXX static credits for now */
+
+	dprintk("%s: RPCRDMA header:\n\tXID = %.8x\n\tversion = %u\n\trequested credits = %u\n",
+		__FUNCTION__, xid, vers, credit);
+
+	switch (proc = ntohl(svc_getu32(argv))) {
+	case RDMA_MSG:
+		while (svc_getu32(argv)) { /* read list */
+			pos = ntohl(svc_getu32(argv));
+			handle = svc_getu32(argv);
+			len = ntohl(svc_getu32(argv));
+			argv->iov_base = xdr_decode_hyper(argv->iov_base, &offset);
+			argv->iov_len -= sizeof(u64);
+			dprintk("%s: got read chunk: pos = %u, handle = %.8x, len = %u, offset = %llu\n",
+				__FUNCTION__, pos, handle, len, (u64) offset);
+		}
+		
+		while (svc_getu32(argv)) { /* write list */
+			for (i = ntohl(svc_getu32(argv)); i > 0; i--) {
+				handle = svc_getu32(argv);
+				len = ntohl(svc_getu32(argv));
+				argv->iov_base = xdr_decode_hyper(argv->iov_base, &offset);
+				argv->iov_len -= sizeof(u64);
+				dprintk("%s: got write chunk: handle = %.8x, len = %u, offset = %llu\n",
+					__FUNCTION__, handle, len, (u64) offset);
+			}
+		}
+		
+		if (svc_getu32(argv)) { /* reply chunk */
+			for (i = ntohl(svc_getu32(argv)); i > 0; i--) {
+				handle = svc_getu32(argv);
+				len = ntohl(svc_getu32(argv));
+				argv->iov_base = xdr_decode_hyper(argv->iov_base, &offset);
+				argv->iov_len -= sizeof(u64);
+				dprintk("%s: got reply segment: handle = %.8x, len = %u, offset = %llu\n",
+					__FUNCTION__, handle, len, (u64) offset);
+			}
+			
+		}
+
+		break;
+	default:
+		dprintk("%s: rdma_proc %u not handled yet\n", __FUNCTION__, proc);
+		return 0;
+	}
+
+	
+	{
+		unsigned char *recv_buf = page_address(ep->re_pages[0]);
+
+		dprintk("%s: RPCRDMA header\n", __FUNCTION__);
+		for (i = 0; &recv_buf[i] < (unsigned char *) argv->iov_base; i += 4) {
+			dprintk("%p: %.2x %.2x %.2x %.2x\n",
+				&recv_buf[i],
+				recv_buf[i],
+				recv_buf[i + 1],
+				recv_buf[i + 2],
+				recv_buf[i + 3]);
+		}
+	}
+
+	dprintk("%s: received %llu total bytes; remaining %u are RPC data\n",
+		__FUNCTION__, ep->re_recvlen, argv->iov_len);
+
+	/* Write remaining RPCRDMA reply header */
+	svc_putu32(resv, htonl(RDMA_MSG));
+	svc_putu32(resv, xdr_zero); /* no read list */
+	svc_putu32(resv, xdr_zero); /* no write list */
+	svc_putu32(resv, xdr_zero); /* no reply chunk */
+
+	return argv->iov_len;
+}
+
+/**
+ * svc_rdma_ep_sendto - sends RPC reply data on an RDMA enpoint
+ * @rqstp: the request structure containing the data to send
+ *
+ * First, the just-used recv buffer is re-posted, then the
+ * reply is sent and the number of bytes sent is returned
+ **/
+int
+svc_rdma_ep_sendto(struct svc_rqst *rqstp) {
+	struct svc_xprt *xprt = rqstp->rq_sock;
+	struct rdma_ep *ep = (struct rdma_ep *) xprt->sx_data;
+	struct xdr_buf *xb = &rqstp->rq_res;
+	DAT_RETURN datstatus;
+	DAT_DTO_COOKIE cookie = { .as_ptr = NULL };
+	int i;
+
+	/* re-post the buffer we used for this request */
+	datstatus = post_recv(ep);
+	if (datstatus != DAT_SUCCESS) {
+		dprintk("%s: post_recv failed (%#.8x)\n", __FUNCTION__, datstatus);
+		/* XXX What the heck should we do here? Close the connection? */
+	} else {
+		dprintk("%s: post_recv SUCCEEDED!\n", __FUNCTION__);
+	}
+
+	i = 0;
+	if ( xb->head[0].iov_len != 0 ) {
+	  ep->re_resp[i].virtual_address = virt_to_phys(xb->head[0].iov_base);
+	  ep->re_resp[i].segment_length = xb->head[0].iov_len;
+	  ep->re_resp[i].lmr_context = ep->re_ia->ri_lmr_iov.lmr_context;
+	  i++;
+	}
+
+	if (xb->page_len != 0) {
+	  dprintk("%s: xb->page_base              = %x\n", __FUNCTION__, xb->page_base);
+	  dprintk("%s: page_address(xb->pages[0]) = %p\n", __FUNCTION__, page_address(xb->pages[0]));
+	  dprintk("%s: page_to_phys(xb->pages[0]) = %x\n", __FUNCTION__, page_to_phys(xb->pages[0]));
+
+	  ep->re_resp[i].virtual_address = page_to_phys(xb->pages[0]);
+	  ep->re_resp[i].segment_length = xb->page_len - xb->page_base;
+	  ep->re_resp[i].lmr_context = ep->re_ia->ri_lmr_iov.lmr_context;
+	  i++;
+	}
+
+	if (xb->tail[0].iov_len != 0) {
+	  ep->re_resp[i].virtual_address = virt_to_phys(xb->tail[0].iov_base);
+	  ep->re_resp[i].segment_length = xb->tail[0].iov_len;
+	  ep->re_resp[i].lmr_context = ep->re_ia->ri_lmr_iov.lmr_context;
+	  i++;
+	}
+
+	if (i != 0) {
+		datstatus = dat_ep_post_send(ep->re_handle, i, ep->re_resp,
+				             cookie, 
+					     DAT_COMPLETION_DEFAULT_FLAG);
+		if (datstatus != DAT_SUCCESS) {
+			dprintk("%s: dat_ep_post_send failed %x\n", __FUNCTION__, datstatus);
+			return 0;
+		} else {
+			dprintk("%s: dat_ep_post_send SUCCESS\n", __FUNCTION__);
+		}
+	}
+
+	/* The transport is no longer busy, and can go back to the pool */
+	clear_bit(SK_BUSY, &xprt->sk_flags);
+
+	return xb->len;
+}
+
+/**
+ * svc_setup_endpoint - initializes an endpoind-backed transport instance
+ * @ia_xprt: The transport instance representing the IA; the parent
+ * @ep_xprt: The new transport instance, representing an RDMA endpoint
+ *
+ * The fields in the svc_xprt structure are initialized and the new transport
+ * is added to the list of temporary transports. It is not yet enqueued, as
+ * no data is pending. This occurs when a DTO arrives.
+ **/
+void
+svc_setup_endpoint(struct svc_xprt *ia_xprt, struct svc_xprt *ep_xprt, struct rdma_ep *ep) {
+	struct rdma_ia *ia = (struct rdma_ia *) ia_xprt->sx_data;
+	struct svc_serv *serv = ia_xprt->sx_server;
+	
+	set_bit(SK_BUSY, &ep_xprt->sk_flags);
+	ep_xprt->sx_server = serv;
+	ep_xprt->sk_lastrecv = get_seconds();
+	ep_xprt->sx_data = ep;
+	ep_xprt->sx_delete = svc_rdma_ep_destroy;
+	ep_xprt->sx_recvfrom = svc_rdma_ep_recvfrom;
+	ep_xprt->sx_sendto = svc_rdma_ep_sendto;
+	ep_xprt->sx_put = svc_rdma_ep_put;
+
+	INIT_LIST_HEAD(&ep_xprt->sk_deferred);
+	INIT_LIST_HEAD(&ep_xprt->sk_ready);
+	sema_init(&ep_xprt->sk_sem, 1);
+	ep_xprt->sx_manages_buffers = 1;
+	/* XXX more stuff to set? */
+
+	init_waitqueue_head(&ep->re_connect_wait);
+
+	spin_lock_bh(&serv->sv_lock);
+	list_add(&ep_xprt->sk_list, &serv->sv_tempxprts);
+	spin_unlock_bh(&serv->sv_lock);
+
+	spin_lock_bh(&ia->ri_lock);
+	list_add(&ep->re_list, &ia->ri_endpoints);
+	spin_unlock_bh(&ia->ri_lock);
+
+	ep->re_xprt = ep_xprt;
+	
+	clear_bit(SK_BUSY, &ep_xprt->sk_flags);
+}
+
+/**
+ * svc_rdma_ia_recvfrom - accepts a connection to the interface adapter
+ * @rqstp: the request structure defining the thread context
+ *
+ * The interfacte adpater structure is encapsulated within the svc_xprt
+ * structure. The waiting connection event is processed, and a new endpoint
+ * in created and associated with a new svc_xprt structure. This new transport
+ * is added to the list of temporary transports. 0 is returned since no
+ * data is actually read.
+ **/
+int
+svc_rdma_ia_recvfrom(struct svc_rqst *rqstp) {
+	DAT_RETURN datstatus;
+	struct svc_xprt *xprt = rqstp->rq_sock;
+	struct svc_xprt *newxprt = NULL;
+	struct rdma_ia *ia = (struct rdma_ia *) xprt->sx_data;
+	DAT_CR_HANDLE cr_handle = ia->ri_conn_req;
+	DAT_UPCALL_OBJECT conn_upcall_obj;
+	DAT_UPCALL_OBJECT in_dto_upcall_obj;
+	DAT_UPCALL_OBJECT out_dto_upcall_obj;
+	DAT_CR_PARAM cr_param;
+	DAT_EP_PARAM ep_param;
+	struct rdma_ep *ep = NULL;
+	u32 i;
+
+	dprintk("%s: cr_handle = %p\n", __FUNCTION__, cr_handle);
+
+	if (test_bit(SK_CONN, &xprt->sk_flags)) {
+		dprintk("%s: connection waiting\n", __FUNCTION__);
+	}
+
+	/* XXX is it OK to allocate memory here? Should this be ATOMIC? */
+	if (!(newxprt = kmalloc(sizeof(*newxprt), GFP_KERNEL))) {
+		printk("%s: Not enough memory for transport structure\n", __FUNCTION__);
+		goto fail;
+	}
+	memset(newxprt, 0, sizeof(*newxprt));
+	if (!(ep = kmalloc(sizeof(struct rdma_ep), GFP_NOFS))) {
+		printk("%s: Not enough memory for endpoint\n", __FUNCTION__);
+		goto fail;
+	}
+	memset(ep, 0, sizeof(*ep));
+
+	/* XXX +1 for the extra reply page */
+	for (i = 0; i < RDMA_MAX_PAGES + 1; i++) {
+		if (!(ep->re_pages[i] = alloc_page(GFP_NOFS))) {
+			printk("%s: Not enough memory to post recv buffers\n", __FUNCTION__);
+			goto fail;
+		}
+	}
+
+	datstatus = dat_cr_query(cr_handle,
+#ifdef KDAPL12
+				 DAT_CR_FIELD_ALL,
+#endif
+				 &cr_param);
+	if (datstatus != DAT_SUCCESS) {
+		printk("%s: cr_query failed %x\n", __FUNCTION__, datstatus);
+		goto fail;
+	}
+
+	/* Create EVDs for events on this endpoint */
+
+	conn_upcall_obj.instance_data = ep;
+	conn_upcall_obj.upcall_func = connection_event_upcall;
+	datstatus = dat_evd_kcreate(ia->ri_ia_handle,
+				    4,
+				    DAT_UPCALL_SINGLE_INSTANCE,
+				    &conn_upcall_obj,
+				    DAT_EVD_CONNECTION_FLAG,
+				    &ep->re_conn_evd);
+	if (datstatus != DAT_SUCCESS) {
+		printk("%s: dat_evd_kcreate failed (%x)\n", __FUNCTION__, datstatus);
+		goto fail;
+	}
+
+	in_dto_upcall_obj.instance_data = ep;
+	in_dto_upcall_obj.upcall_func = in_dto_event_upcall;
+	datstatus = dat_evd_kcreate(ia->ri_ia_handle,
+				    4,
+				    DAT_UPCALL_SINGLE_INSTANCE,
+				    &in_dto_upcall_obj,
+				    DAT_EVD_DTO_FLAG,
+				    &ep->re_in_dto_evd);
+	if (datstatus != DAT_SUCCESS) {
+		printk("%s: dat_evd_kcreate failed (%x)\n", __FUNCTION__, datstatus);
+		goto fail;
+	}
+
+	out_dto_upcall_obj.instance_data = ep;
+	out_dto_upcall_obj.upcall_func = out_dto_event_upcall;
+	datstatus = dat_evd_kcreate(ia->ri_ia_handle,
+				    4,
+				    DAT_UPCALL_SINGLE_INSTANCE,
+				    &out_dto_upcall_obj,
+				    DAT_EVD_DTO_FLAG,
+				    &ep->re_out_dto_evd);
+	if (datstatus != DAT_SUCCESS) {
+		printk("%s: dat_evd_kcreate failed (%x)\n", __FUNCTION__, datstatus);
+		goto fail;
+	}
+
+#ifdef KDAPL12
+	ep->re_handle = cr_param.local_ep_handle;
+#else
+	ep->re_handle = cr_param.local_ep;
+#endif
+	ep->re_ia = ia;
+
+	memset(&ep_param, 0, sizeof ep_param);
+#ifdef KDAPL12
+	ep_param.pz_handle = ia->ri_pz_handle;
+	ep_param.recv_evd_handle = ep->re_in_dto_evd;
+	ep_param.request_evd_handle = ep->re_out_dto_evd;
+	ep_param.connect_evd_handle = ep->re_conn_evd;
+#else
+	ep_param.pz = ia->ri_pz_handle;
+	ep_param.recv_evd = ep->re_in_dto_evd;
+	ep_param.request_evd = ep->re_out_dto_evd;
+	ep_param.connect_evd = ep->re_conn_evd;
+#endif
+
+	dprintk("%s: re_handle = %p, pz_handle = %p, in_dto_evd = %p, out_dto_evd = %p, conn_evd = %p\n",
+		__FUNCTION__, ep->re_handle, ia->ri_pz_handle,
+		ep->re_in_dto_evd, ep->re_out_dto_evd, ep->re_conn_evd);
+
+	datstatus = dat_ep_modify(ep->re_handle,
+				  DAT_EP_FIELD_PZ_HANDLE |
+				  DAT_EP_FIELD_RECV_EVD_HANDLE |
+				  DAT_EP_FIELD_REQUEST_EVD_HANDLE |
+				  DAT_EP_FIELD_CONNECT_EVD_HANDLE,
+				  &ep_param);
+	if (datstatus != DAT_SUCCESS) {
+		printk("%s: dat_ep_modify failed %x\n", __FUNCTION__, datstatus);
+		goto fail;
+	}
+
+	datstatus = post_recv(ep);
+	if (datstatus != DAT_SUCCESS) {
+		dprintk("%s: post_recv failed (%#.8x)\n", __FUNCTION__, datstatus);
+		goto fail;
+	}
+
+	svc_setup_endpoint(xprt, newxprt, ep);
+	dprintk("%s: created RDMA EP transport: %p\n", __FUNCTION__, newxprt);
+
+	datstatus = dat_cr_accept(cr_handle, DAT_HANDLE_NULL, (DAT_COUNT) 0, NULL);
+	if (datstatus != DAT_SUCCESS) {
+		dprintk("%s: dat_cr_accept failed\n", __FUNCTION__);
+		if (datstatus == DAT_INVALID_HANDLE) {
+			dprintk("\treason: DAT_INVALID_HANDLE\n");
+		} else if (datstatus == DAT_INVALID_PARAMETER) {
+			dprintk("\treason: DAT_INVALID_PARAMETER\n");		
+		} else {
+			dprintk("\treason: unknown (%x)\n", datstatus);		
+		}
+		goto fail;
+	}
+	dprintk("%s: dat_cr_accept SUCCEEDED! Hells, yeah\n", __FUNCTION__);
+	dprintk("BOO\n");
+	
+	ia->ri_conn_req = NULL; /* XXX should be a dequeue, when the connections are queued */
+
+	/* get the remote address */
+	memcpy(&ep->re_raddr, cr_param.remote_ia_address_ptr,
+	       sizeof ep->re_raddr);
+        
+	dprintk("%s: remote IA address: %u.%u.%u.%u\n", __FUNCTION__,
+		NIPQUAD(ep->re_raddr.sin_addr.s_addr));
+
+/* 	clear_bit(SK_CONN, &xprt->sk_flags); */
+/* 	clear_bit(SK_BUSY, &xprt->sk_flags); */
+	return 0;
+
+ fail:
+/* 	clear_bit(SK_CONN, &xprt->sk_flags); */
+/* 	clear_bit(SK_BUSY, &xprt->sk_flags); */
+	free_evd(&ep->re_conn_evd);
+	free_evd(&ep->re_in_dto_evd);
+	free_evd(&ep->re_out_dto_evd);
+	for (i = 0; i < RDMA_MAX_PAGES && ep->re_pages[i]; i++) {
+		free_page((unsigned long) ep->re_pages[i]);
+	}
+	free_page((unsigned long) ep->re_pages[RDMA_MAX_PAGES]); /* reply page */
+	kfree(ep);
+	kfree(newxprt);
+	return 0; /* XXX better return value? */
+}
+
diff -purN -x '#*#' -x infiniband -x include/rdma -x config -X linux-2.6.14.3/Documentation/dontdiff linux-2.6.14.3/scripts/Makefile.modinst linux-2.6.14.3-RPCRDMA/scripts/Makefile.modinst
--- linux-2.6.14.3/scripts/Makefile.modinst	2005-11-24 17:10:21.000000000 -0500
+++ linux-2.6.14.3-RPCRDMA/scripts/Makefile.modinst	2005-12-08 11:50:23.000000000 -0500
@@ -20,7 +20,7 @@ quiet_cmd_modules_install = INSTALL $@
       cmd_modules_install = mkdir -p $(2); cp $@ $(2)
 
 # Modules built outside the kernel source tree go into extra by default
-INSTALL_MOD_DIR ?= extra
+INSTALL_MOD_DIR ?= extra/
 ext-mod-dir = $(INSTALL_MOD_DIR)$(subst $(KBUILD_EXTMOD),,$(@D))
 
 modinst_dir = $(if $(KBUILD_EXTMOD),$(ext-mod-dir),kernel/$(@D))
