Fixed MTP to work with TWRP

2025-09-07 00:38:05 -04:00 · 2018-06-19 23:16:04 +02:00 · 2018-06-19 23:16:04 +02:00 · f6dfaef42e
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@ -0,0 +1,72 @@
+config SUNRPC
+	tristate
+
+config SUNRPC_GSS
+	tristate
+	select OID_REGISTRY
+
+config SUNRPC_BACKCHANNEL
+	bool
+	depends on SUNRPC
+
+config SUNRPC_SWAP
+	bool
+	depends on SUNRPC
+
+config RPCSEC_GSS_KRB5
+	tristate "Secure RPC: Kerberos V mechanism"
+	depends on SUNRPC && CRYPTO
+	depends on CRYPTO_MD5 && CRYPTO_DES && CRYPTO_CBC && CRYPTO_CTS
+	depends on CRYPTO_ECB && CRYPTO_HMAC && CRYPTO_SHA1 && CRYPTO_AES
+	depends on CRYPTO_ARC4
+	default y
+	select SUNRPC_GSS
+	help
+	  Choose Y here to enable Secure RPC using the Kerberos version 5
+	  GSS-API mechanism (RFC 1964).
+
+	  Secure RPC calls with Kerberos require an auxiliary user-space
+	  daemon which may be found in the Linux nfs-utils package
+	  available from http://linux-nfs.org/.  In addition, user-space
+	  Kerberos support should be installed.
+
+	  If unsure, say Y.
+
+config SUNRPC_DEBUG
+	bool "RPC: Enable dprintk debugging"
+	depends on SUNRPC && SYSCTL
+	help
+	  This option enables a sysctl-based debugging interface
+	  that is be used by the 'rpcdebug' utility to turn on or off
+	  logging of different aspects of the kernel RPC activity.
+
+	  Disabling this option will make your kernel slightly smaller,
+	  but makes troubleshooting NFS issues significantly harder.
+
+	  If unsure, say Y.
+
+config SUNRPC_XPRT_RDMA_CLIENT
+	tristate "RPC over RDMA Client Support"
+	depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
+	default SUNRPC && INFINIBAND
+	help
+	  This option allows the NFS client to support an RDMA-enabled
+	  transport.
+
+	  To compile RPC client RDMA transport support as a module,
+	  choose M here: the module will be called xprtrdma.
+
+	  If unsure, say N.
+
+config SUNRPC_XPRT_RDMA_SERVER
+	tristate "RPC over RDMA Server Support"
+	depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
+	default SUNRPC && INFINIBAND
+	help
+	  This option allows the NFS server to support an RDMA-enabled
+	  transport.
+
+	  To compile RPC server RDMA transport support as a module,
+	  choose M here: the module will be called svcrdma.
+
+	  If unsure, say N.
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@ -0,0 +1,19 @@
+#
+# Makefile for Linux kernel SUN RPC
+#
+
+
+obj-$(CONFIG_SUNRPC) += sunrpc.o
+obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
+
+obj-y += xprtrdma/
+
+sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
+	    auth.o auth_null.o auth_unix.o auth_generic.o \
+	    svc.o svcsock.o svcauth.o svcauth_unix.o \
+	    addr.o rpcb_clnt.o timer.o xdr.o \
+	    sunrpc_syms.o cache.o rpc_pipe.o \
+	    svc_xprt.o
+sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o
+sunrpc-$(CONFIG_PROC_FS) += stats.o
+sunrpc-$(CONFIG_SYSCTL) += sysctl.o
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c
@ -0,0 +1,357 @@
+/*
+ * Copyright 2009, Oracle.  All rights reserved.
+ *
+ * Convert socket addresses to presentation addresses and universal
+ * addresses, and vice versa.
+ *
+ * Universal addresses are introduced by RFC 1833 and further refined by
+ * recent RFCs describing NFSv4.  The universal address format is part
+ * of the external (network) interface provided by rpcbind version 3
+ * and 4, and by NFSv4.  Such an address is a string containing a
+ * presentation format IP address followed by a port number in
+ * "hibyte.lobyte" format.
+ *
+ * IPv6 addresses can also include a scope ID, typically denoted by
+ * a '%' followed by a device name or a non-negative integer.  Refer to
+ * RFC 4291, Section 2.2 for details on IPv6 presentation formats.
+ */
+
+#include <net/ipv6.h>
+#include <linux/sunrpc/addr.h>
+#include <linux/sunrpc/msg_prot.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+#if IS_ENABLED(CONFIG_IPV6)
+
+static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap,
+				  char *buf, const int buflen)
+{
+	const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
+	const struct in6_addr *addr = &sin6->sin6_addr;
+
+	/*
+	 * RFC 4291, Section 2.2.2
+	 *
+	 * Shorthanded ANY address
+	 */
+	if (ipv6_addr_any(addr))
+		return snprintf(buf, buflen, "::");
+
+	/*
+	 * RFC 4291, Section 2.2.2
+	 *
+	 * Shorthanded loopback address
+	 */
+	if (ipv6_addr_loopback(addr))
+		return snprintf(buf, buflen, "::1");
+
+	/*
+	 * RFC 4291, Section 2.2.3
+	 *
+	 * Special presentation address format for mapped v4
+	 * addresses.
+	 */
+	if (ipv6_addr_v4mapped(addr))
+		return snprintf(buf, buflen, "::ffff:%pI4",
+					&addr->s6_addr32[3]);
+
+	/*
+	 * RFC 4291, Section 2.2.1
+	 */
+	return snprintf(buf, buflen, "%pI6c", addr);
+}
+
+static size_t rpc_ntop6(const struct sockaddr *sap,
+			char *buf, const size_t buflen)
+{
+	const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
+	char scopebuf[IPV6_SCOPE_ID_LEN];
+	size_t len;
+	int rc;
+
+	len = rpc_ntop6_noscopeid(sap, buf, buflen);
+	if (unlikely(len == 0))
+		return len;
+
+	if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
+		return len;
+	if (sin6->sin6_scope_id == 0)
+		return len;
+
+	rc = snprintf(scopebuf, sizeof(scopebuf), "%c%u",
+			IPV6_SCOPE_DELIMITER, sin6->sin6_scope_id);
+	if (unlikely((size_t)rc > sizeof(scopebuf)))
+		return 0;
+
+	len += rc;
+	if (unlikely(len > buflen))
+		return 0;
+
+	strcat(buf, scopebuf);
+	return len;
+}
+
+#else	/* !IS_ENABLED(CONFIG_IPV6) */
+
+static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap,
+				  char *buf, const int buflen)
+{
+	return 0;
+}
+
+static size_t rpc_ntop6(const struct sockaddr *sap,
+			char *buf, const size_t buflen)
+{
+	return 0;
+}
+
+#endif	/* !IS_ENABLED(CONFIG_IPV6) */
+
+static int rpc_ntop4(const struct sockaddr *sap,
+		     char *buf, const size_t buflen)
+{
+	const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
+
+	return snprintf(buf, buflen, "%pI4", &sin->sin_addr);
+}
+
+/**
+ * rpc_ntop - construct a presentation address in @buf
+ * @sap: socket address
+ * @buf: construction area
+ * @buflen: size of @buf, in bytes
+ *
+ * Plants a %NUL-terminated string in @buf and returns the length
+ * of the string, excluding the %NUL.  Otherwise zero is returned.
+ */
+size_t rpc_ntop(const struct sockaddr *sap, char *buf, const size_t buflen)
+{
+	switch (sap->sa_family) {
+	case AF_INET:
+		return rpc_ntop4(sap, buf, buflen);
+	case AF_INET6:
+		return rpc_ntop6(sap, buf, buflen);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rpc_ntop);
+
+static size_t rpc_pton4(const char *buf, const size_t buflen,
+			struct sockaddr *sap, const size_t salen)
+{
+	struct sockaddr_in *sin = (struct sockaddr_in *)sap;
+	u8 *addr = (u8 *)&sin->sin_addr.s_addr;
+
+	if (buflen > INET_ADDRSTRLEN || salen < sizeof(struct sockaddr_in))
+		return 0;
+
+	memset(sap, 0, sizeof(struct sockaddr_in));
+
+	if (in4_pton(buf, buflen, addr, '\0', NULL) == 0)
+		return 0;
+
+	sin->sin_family = AF_INET;
+	return sizeof(struct sockaddr_in);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int rpc_parse_scope_id(struct net *net, const char *buf,
+			      const size_t buflen, const char *delim,
+			      struct sockaddr_in6 *sin6)
+{
+	char *p;
+	size_t len;
+
+	if ((buf + buflen) == delim)
+		return 1;
+
+	if (*delim != IPV6_SCOPE_DELIMITER)
+		return 0;
+
+	if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
+		return 0;
+
+	len = (buf + buflen) - delim - 1;
+	p = kstrndup(delim + 1, len, GFP_KERNEL);
+	if (p) {
+		u32 scope_id = 0;
+		struct net_device *dev;
+
+		dev = dev_get_by_name(net, p);
+		if (dev != NULL) {
+			scope_id = dev->ifindex;
+			dev_put(dev);
+		} else {
+			if (kstrtou32(p, 10, &scope_id) == 0) {
+				kfree(p);
+				return 0;
+			}
+		}
+
+		kfree(p);
+
+		sin6->sin6_scope_id = scope_id;
+		return 1;
+	}
+
+	return 0;
+}
+
+static size_t rpc_pton6(struct net *net, const char *buf, const size_t buflen,
+			struct sockaddr *sap, const size_t salen)
+{
+	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
+	u8 *addr = (u8 *)&sin6->sin6_addr.in6_u;
+	const char *delim;
+
+	if (buflen > (INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN) ||
+	    salen < sizeof(struct sockaddr_in6))
+		return 0;
+
+	memset(sap, 0, sizeof(struct sockaddr_in6));
+
+	if (in6_pton(buf, buflen, addr, IPV6_SCOPE_DELIMITER, &delim) == 0)
+		return 0;
+
+	if (!rpc_parse_scope_id(net, buf, buflen, delim, sin6))
+		return 0;
+
+	sin6->sin6_family = AF_INET6;
+	return sizeof(struct sockaddr_in6);
+}
+#else
+static size_t rpc_pton6(struct net *net, const char *buf, const size_t buflen,
+			struct sockaddr *sap, const size_t salen)
+{
+	return 0;
+}
+#endif
+
+/**
+ * rpc_pton - Construct a sockaddr in @sap
+ * @net: applicable network namespace
+ * @buf: C string containing presentation format IP address
+ * @buflen: length of presentation address in bytes
+ * @sap: buffer into which to plant socket address
+ * @salen: size of buffer in bytes
+ *
+ * Returns the size of the socket address if successful; otherwise
+ * zero is returned.
+ *
+ * Plants a socket address in @sap and returns the size of the
+ * socket address, if successful.  Returns zero if an error
+ * occurred.
+ */
+size_t rpc_pton(struct net *net, const char *buf, const size_t buflen,
+		struct sockaddr *sap, const size_t salen)
+{
+	unsigned int i;
+
+	for (i = 0; i < buflen; i++)
+		if (buf[i] == ':')
+			return rpc_pton6(net, buf, buflen, sap, salen);
+	return rpc_pton4(buf, buflen, sap, salen);
+}
+EXPORT_SYMBOL_GPL(rpc_pton);
+
+/**
+ * rpc_sockaddr2uaddr - Construct a universal address string from @sap.
+ * @sap: socket address
+ * @gfp_flags: allocation mode
+ *
+ * Returns a %NUL-terminated string in dynamically allocated memory;
+ * otherwise NULL is returned if an error occurred.  Caller must
+ * free the returned string.
+ */
+char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags)
+{
+	char portbuf[RPCBIND_MAXUADDRPLEN];
+	char addrbuf[RPCBIND_MAXUADDRLEN];
+	unsigned short port;
+
+	switch (sap->sa_family) {
+	case AF_INET:
+		if (rpc_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0)
+			return NULL;
+		port = ntohs(((struct sockaddr_in *)sap)->sin_port);
+		break;
+	case AF_INET6:
+		if (rpc_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0)
+			return NULL;
+		port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port);
+		break;
+	default:
+		return NULL;
+	}
+
+	if (snprintf(portbuf, sizeof(portbuf),
+		     ".%u.%u", port >> 8, port & 0xff) > (int)sizeof(portbuf))
+		return NULL;
+
+	if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) > sizeof(addrbuf))
+		return NULL;
+
+	return kstrdup(addrbuf, gfp_flags);
+}
+
+/**
+ * rpc_uaddr2sockaddr - convert a universal address to a socket address.
+ * @net: applicable network namespace
+ * @uaddr: C string containing universal address to convert
+ * @uaddr_len: length of universal address string
+ * @sap: buffer into which to plant socket address
+ * @salen: size of buffer
+ *
+ * @uaddr does not have to be '\0'-terminated, but kstrtou8() and
+ * rpc_pton() require proper string termination to be successful.
+ *
+ * Returns the size of the socket address if successful; otherwise
+ * zero is returned.
+ */
+size_t rpc_uaddr2sockaddr(struct net *net, const char *uaddr,
+			  const size_t uaddr_len, struct sockaddr *sap,
+			  const size_t salen)
+{
+	char *c, buf[RPCBIND_MAXUADDRLEN + sizeof('\0')];
+	u8 portlo, porthi;
+	unsigned short port;
+
+	if (uaddr_len > RPCBIND_MAXUADDRLEN)
+		return 0;
+
+	memcpy(buf, uaddr, uaddr_len);
+
+	buf[uaddr_len] = '\0';
+	c = strrchr(buf, '.');
+	if (unlikely(c == NULL))
+		return 0;
+	if (unlikely(kstrtou8(c + 1, 10, &portlo) != 0))
+		return 0;
+
+	*c = '\0';
+	c = strrchr(buf, '.');
+	if (unlikely(c == NULL))
+		return 0;
+	if (unlikely(kstrtou8(c + 1, 10, &porthi) != 0))
+		return 0;
+
+	port = (unsigned short)((porthi << 8) | portlo);
+
+	*c = '\0';
+	if (rpc_pton(net, buf, strlen(buf), sap, salen) == 0)
+		return 0;
+
+	switch (sap->sa_family) {
+	case AF_INET:
+		((struct sockaddr_in *)sap)->sin_port = htons(port);
+		return sizeof(struct sockaddr_in);
+	case AF_INET6:
+		((struct sockaddr_in6 *)sap)->sin6_port = htons(port);
+		return sizeof(struct sockaddr_in6);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rpc_uaddr2sockaddr);
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@ -0,0 +1,891 @@
+/*
+ * linux/net/sunrpc/auth.c
+ *
+ * Generic RPC client authentication API.
+ *
+ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/hash.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/gss_api.h>
+#include <linux/spinlock.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_AUTH
+#endif
+
+#define RPC_CREDCACHE_DEFAULT_HASHBITS	(4)
+struct rpc_cred_cache {
+	struct hlist_head	*hashtable;
+	unsigned int		hashbits;
+	spinlock_t		lock;
+};
+
+static unsigned int auth_hashbits = RPC_CREDCACHE_DEFAULT_HASHBITS;
+
+static DEFINE_SPINLOCK(rpc_authflavor_lock);
+static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
+	&authnull_ops,		/* AUTH_NULL */
+	&authunix_ops,		/* AUTH_UNIX */
+	NULL,			/* others can be loadable modules */
+};
+
+static LIST_HEAD(cred_unused);
+static unsigned long number_cred_unused;
+
+#define MAX_HASHTABLE_BITS (14)
+static int param_set_hashtbl_sz(const char *val, const struct kernel_param *kp)
+{
+	unsigned long num;
+	unsigned int nbits;
+	int ret;
+
+	if (!val)
+		goto out_inval;
+	ret = kstrtoul(val, 0, &num);
+	if (ret == -EINVAL)
+		goto out_inval;
+	nbits = fls(num);
+	if (num > (1U << nbits))
+		nbits++;
+	if (nbits > MAX_HASHTABLE_BITS || nbits < 2)
+		goto out_inval;
+	*(unsigned int *)kp->arg = nbits;
+	return 0;
+out_inval:
+	return -EINVAL;
+}
+
+static int param_get_hashtbl_sz(char *buffer, const struct kernel_param *kp)
+{
+	unsigned int nbits;
+
+	nbits = *(unsigned int *)kp->arg;
+	return sprintf(buffer, "%u", 1U << nbits);
+}
+
+#define param_check_hashtbl_sz(name, p) __param_check(name, p, unsigned int);
+
+static struct kernel_param_ops param_ops_hashtbl_sz = {
+	.set = param_set_hashtbl_sz,
+	.get = param_get_hashtbl_sz,
+};
+
+module_param_named(auth_hashtable_size, auth_hashbits, hashtbl_sz, 0644);
+MODULE_PARM_DESC(auth_hashtable_size, "RPC credential cache hashtable size");
+
+static unsigned long auth_max_cred_cachesize = ULONG_MAX;
+module_param(auth_max_cred_cachesize, ulong, 0644);
+MODULE_PARM_DESC(auth_max_cred_cachesize, "RPC credential maximum total cache size");
+
+static u32
+pseudoflavor_to_flavor(u32 flavor) {
+	if (flavor > RPC_AUTH_MAXFLAVOR)
+		return RPC_AUTH_GSS;
+	return flavor;
+}
+
+int
+rpcauth_register(const struct rpc_authops *ops)
+{
+	rpc_authflavor_t flavor;
+	int ret = -EPERM;
+
+	if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
+		return -EINVAL;
+	spin_lock(&rpc_authflavor_lock);
+	if (auth_flavors[flavor] == NULL) {
+		auth_flavors[flavor] = ops;
+		ret = 0;
+	}
+	spin_unlock(&rpc_authflavor_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(rpcauth_register);
+
+int
+rpcauth_unregister(const struct rpc_authops *ops)
+{
+	rpc_authflavor_t flavor;
+	int ret = -EPERM;
+
+	if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
+		return -EINVAL;
+	spin_lock(&rpc_authflavor_lock);
+	if (auth_flavors[flavor] == ops) {
+		auth_flavors[flavor] = NULL;
+		ret = 0;
+	}
+	spin_unlock(&rpc_authflavor_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(rpcauth_unregister);
+
+/**
+ * rpcauth_get_pseudoflavor - check if security flavor is supported
+ * @flavor: a security flavor
+ * @info: a GSS mech OID, quality of protection, and service value
+ *
+ * Verifies that an appropriate kernel module is available or already loaded.
+ * Returns an equivalent pseudoflavor, or RPC_AUTH_MAXFLAVOR if "flavor" is
+ * not supported locally.
+ */
+rpc_authflavor_t
+rpcauth_get_pseudoflavor(rpc_authflavor_t flavor, struct rpcsec_gss_info *info)
+{
+	const struct rpc_authops *ops;
+	rpc_authflavor_t pseudoflavor;
+
+	ops = auth_flavors[flavor];
+	if (ops == NULL)
+		request_module("rpc-auth-%u", flavor);
+	spin_lock(&rpc_authflavor_lock);
+	ops = auth_flavors[flavor];
+	if (ops == NULL || !try_module_get(ops->owner)) {
+		spin_unlock(&rpc_authflavor_lock);
+		return RPC_AUTH_MAXFLAVOR;
+	}
+	spin_unlock(&rpc_authflavor_lock);
+
+	pseudoflavor = flavor;
+	if (ops->info2flavor != NULL)
+		pseudoflavor = ops->info2flavor(info);
+
+	module_put(ops->owner);
+	return pseudoflavor;
+}
+EXPORT_SYMBOL_GPL(rpcauth_get_pseudoflavor);
+
+/**
+ * rpcauth_get_gssinfo - find GSS tuple matching a GSS pseudoflavor
+ * @pseudoflavor: GSS pseudoflavor to match
+ * @info: rpcsec_gss_info structure to fill in
+ *
+ * Returns zero and fills in "info" if pseudoflavor matches a
+ * supported mechanism.
+ */
+int
+rpcauth_get_gssinfo(rpc_authflavor_t pseudoflavor, struct rpcsec_gss_info *info)
+{
+	rpc_authflavor_t flavor = pseudoflavor_to_flavor(pseudoflavor);
+	const struct rpc_authops *ops;
+	int result;
+
+	if (flavor >= RPC_AUTH_MAXFLAVOR)
+		return -EINVAL;
+
+	ops = auth_flavors[flavor];
+	if (ops == NULL)
+		request_module("rpc-auth-%u", flavor);
+	spin_lock(&rpc_authflavor_lock);
+	ops = auth_flavors[flavor];
+	if (ops == NULL || !try_module_get(ops->owner)) {
+		spin_unlock(&rpc_authflavor_lock);
+		return -ENOENT;
+	}
+	spin_unlock(&rpc_authflavor_lock);
+
+	result = -ENOENT;
+	if (ops->flavor2info != NULL)
+		result = ops->flavor2info(pseudoflavor, info);
+
+	module_put(ops->owner);
+	return result;
+}
+EXPORT_SYMBOL_GPL(rpcauth_get_gssinfo);
+
+/**
+ * rpcauth_list_flavors - discover registered flavors and pseudoflavors
+ * @array: array to fill in
+ * @size: size of "array"
+ *
+ * Returns the number of array items filled in, or a negative errno.
+ *
+ * The returned array is not sorted by any policy.  Callers should not
+ * rely on the order of the items in the returned array.
+ */
+int
+rpcauth_list_flavors(rpc_authflavor_t *array, int size)
+{
+	rpc_authflavor_t flavor;
+	int result = 0;
+
+	spin_lock(&rpc_authflavor_lock);
+	for (flavor = 0; flavor < RPC_AUTH_MAXFLAVOR; flavor++) {
+		const struct rpc_authops *ops = auth_flavors[flavor];
+		rpc_authflavor_t pseudos[4];
+		int i, len;
+
+		if (result >= size) {
+			result = -ENOMEM;
+			break;
+		}
+
+		if (ops == NULL)
+			continue;
+		if (ops->list_pseudoflavors == NULL) {
+			array[result++] = ops->au_flavor;
+			continue;
+		}
+		len = ops->list_pseudoflavors(pseudos, ARRAY_SIZE(pseudos));
+		if (len < 0) {
+			result = len;
+			break;
+		}
+		for (i = 0; i < len; i++) {
+			if (result >= size) {
+				result = -ENOMEM;
+				break;
+			}
+			array[result++] = pseudos[i];
+		}
+	}
+	spin_unlock(&rpc_authflavor_lock);
+
+	dprintk("RPC:       %s returns %d\n", __func__, result);
+	return result;
+}
+EXPORT_SYMBOL_GPL(rpcauth_list_flavors);
+
+struct rpc_auth *
+rpcauth_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
+{
+	struct rpc_auth		*auth;
+	const struct rpc_authops *ops;
+	u32			flavor = pseudoflavor_to_flavor(args->pseudoflavor);
+
+	auth = ERR_PTR(-EINVAL);
+	if (flavor >= RPC_AUTH_MAXFLAVOR)
+		goto out;
+
+	if ((ops = auth_flavors[flavor]) == NULL)
+		request_module("rpc-auth-%u", flavor);
+	spin_lock(&rpc_authflavor_lock);
+	ops = auth_flavors[flavor];
+	if (ops == NULL || !try_module_get(ops->owner)) {
+		spin_unlock(&rpc_authflavor_lock);
+		goto out;
+	}
+	spin_unlock(&rpc_authflavor_lock);
+	auth = ops->create(args, clnt);
+	module_put(ops->owner);
+	if (IS_ERR(auth))
+		return auth;
+	if (clnt->cl_auth)
+		rpcauth_release(clnt->cl_auth);
+	clnt->cl_auth = auth;
+
+out:
+	return auth;
+}
+EXPORT_SYMBOL_GPL(rpcauth_create);
+
+void
+rpcauth_release(struct rpc_auth *auth)
+{
+	if (!atomic_dec_and_test(&auth->au_count))
+		return;
+	auth->au_ops->destroy(auth);
+}
+
+static DEFINE_SPINLOCK(rpc_credcache_lock);
+
+static void
+rpcauth_unhash_cred_locked(struct rpc_cred *cred)
+{
+	hlist_del_rcu(&cred->cr_hash);
+	smp_mb__before_atomic();
+	clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
+}
+
+static int
+rpcauth_unhash_cred(struct rpc_cred *cred)
+{
+	spinlock_t *cache_lock;
+	int ret;
+
+	cache_lock = &cred->cr_auth->au_credcache->lock;
+	spin_lock(cache_lock);
+	ret = atomic_read(&cred->cr_count) == 0;
+	if (ret)
+		rpcauth_unhash_cred_locked(cred);
+	spin_unlock(cache_lock);
+	return ret;
+}
+
+/*
+ * Initialize RPC credential cache
+ */
+int
+rpcauth_init_credcache(struct rpc_auth *auth)
+{
+	struct rpc_cred_cache *new;
+	unsigned int hashsize;
+
+	new = kmalloc(sizeof(*new), GFP_KERNEL);
+	if (!new)
+		goto out_nocache;
+	new->hashbits = auth_hashbits;
+	hashsize = 1U << new->hashbits;
+	new->hashtable = kcalloc(hashsize, sizeof(new->hashtable[0]), GFP_KERNEL);
+	if (!new->hashtable)
+		goto out_nohashtbl;
+	spin_lock_init(&new->lock);
+	auth->au_credcache = new;
+	return 0;
+out_nohashtbl:
+	kfree(new);
+out_nocache:
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(rpcauth_init_credcache);
+
+/*
+ * Setup a credential key lifetime timeout notification
+ */
+int
+rpcauth_key_timeout_notify(struct rpc_auth *auth, struct rpc_cred *cred)
+{
+	if (!cred->cr_auth->au_ops->key_timeout)
+		return 0;
+	return cred->cr_auth->au_ops->key_timeout(auth, cred);
+}
+EXPORT_SYMBOL_GPL(rpcauth_key_timeout_notify);
+
+bool
+rpcauth_cred_key_to_expire(struct rpc_cred *cred)
+{
+	if (!cred->cr_ops->crkey_to_expire)
+		return false;
+	return cred->cr_ops->crkey_to_expire(cred);
+}
+EXPORT_SYMBOL_GPL(rpcauth_cred_key_to_expire);
+
+char *
+rpcauth_stringify_acceptor(struct rpc_cred *cred)
+{
+	if (!cred->cr_ops->crstringify_acceptor)
+		return NULL;
+	return cred->cr_ops->crstringify_acceptor(cred);
+}
+EXPORT_SYMBOL_GPL(rpcauth_stringify_acceptor);
+
+/*
+ * Destroy a list of credentials
+ */
+static inline
+void rpcauth_destroy_credlist(struct list_head *head)
+{
+	struct rpc_cred *cred;
+
+	while (!list_empty(head)) {
+		cred = list_entry(head->next, struct rpc_cred, cr_lru);
+		list_del_init(&cred->cr_lru);
+		put_rpccred(cred);
+	}
+}
+
+/*
+ * Clear the RPC credential cache, and delete those credentials
+ * that are not referenced.
+ */
+void
+rpcauth_clear_credcache(struct rpc_cred_cache *cache)
+{
+	LIST_HEAD(free);
+	struct hlist_head *head;
+	struct rpc_cred	*cred;
+	unsigned int hashsize = 1U << cache->hashbits;
+	int		i;
+
+	spin_lock(&rpc_credcache_lock);
+	spin_lock(&cache->lock);
+	for (i = 0; i < hashsize; i++) {
+		head = &cache->hashtable[i];
+		while (!hlist_empty(head)) {
+			cred = hlist_entry(head->first, struct rpc_cred, cr_hash);
+			get_rpccred(cred);
+			if (!list_empty(&cred->cr_lru)) {
+				list_del(&cred->cr_lru);
+				number_cred_unused--;
+			}
+			list_add_tail(&cred->cr_lru, &free);
+			rpcauth_unhash_cred_locked(cred);
+		}
+	}
+	spin_unlock(&cache->lock);
+	spin_unlock(&rpc_credcache_lock);
+	rpcauth_destroy_credlist(&free);
+}
+
+/*
+ * Destroy the RPC credential cache
+ */
+void
+rpcauth_destroy_credcache(struct rpc_auth *auth)
+{
+	struct rpc_cred_cache *cache = auth->au_credcache;
+
+	if (cache) {
+		auth->au_credcache = NULL;
+		rpcauth_clear_credcache(cache);
+		kfree(cache->hashtable);
+		kfree(cache);
+	}
+}
+EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache);
+
+
+#define RPC_AUTH_EXPIRY_MORATORIUM (60 * HZ)
+
+/*
+ * Remove stale credentials. Avoid sleeping inside the loop.
+ */
+static long
+rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
+{
+	spinlock_t *cache_lock;
+	struct rpc_cred *cred, *next;
+	unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM;
+	long freed = 0;
+
+	list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) {
+
+		if (nr_to_scan-- == 0)
+			break;
+		/*
+		 * Enforce a 60 second garbage collection moratorium
+		 * Note that the cred_unused list must be time-ordered.
+		 */
+		if (time_in_range(cred->cr_expire, expired, jiffies) &&
+		    test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
+			break;
+
+		list_del_init(&cred->cr_lru);
+		number_cred_unused--;
+		freed++;
+		if (atomic_read(&cred->cr_count) != 0)
+			continue;
+
+		cache_lock = &cred->cr_auth->au_credcache->lock;
+		spin_lock(cache_lock);
+		if (atomic_read(&cred->cr_count) == 0) {
+			get_rpccred(cred);
+			list_add_tail(&cred->cr_lru, free);
+			rpcauth_unhash_cred_locked(cred);
+		}
+		spin_unlock(cache_lock);
+	}
+	return freed;
+}
+
+static unsigned long
+rpcauth_cache_do_shrink(int nr_to_scan)
+{
+	LIST_HEAD(free);
+	unsigned long freed;
+
+	spin_lock(&rpc_credcache_lock);
+	freed = rpcauth_prune_expired(&free, nr_to_scan);
+	spin_unlock(&rpc_credcache_lock);
+	rpcauth_destroy_credlist(&free);
+
+	return freed;
+}
+
+/*
+ * Run memory cache shrinker.
+ */
+static unsigned long
+rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+
+{
+	if ((sc->gfp_mask & GFP_KERNEL) != GFP_KERNEL)
+		return SHRINK_STOP;
+
+	/* nothing left, don't come back */
+	if (list_empty(&cred_unused))
+		return SHRINK_STOP;
+
+	return rpcauth_cache_do_shrink(sc->nr_to_scan);
+}
+
+static unsigned long
+rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+
+{
+	return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
+}
+
+static void
+rpcauth_cache_enforce_limit(void)
+{
+	unsigned long diff;
+	unsigned int nr_to_scan;
+
+	if (number_cred_unused <= auth_max_cred_cachesize)
+		return;
+	diff = number_cred_unused - auth_max_cred_cachesize;
+	nr_to_scan = 100;
+	if (diff < nr_to_scan)
+		nr_to_scan = diff;
+	rpcauth_cache_do_shrink(nr_to_scan);
+}
+
+/*
+ * Look up a process' credentials in the authentication cache
+ */
+struct rpc_cred *
+rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
+		int flags)
+{
+	LIST_HEAD(free);
+	struct rpc_cred_cache *cache = auth->au_credcache;
+	struct rpc_cred	*cred = NULL,
+			*entry, *new;
+	unsigned int nr;
+
+	nr = hash_long(from_kuid(&init_user_ns, acred->uid), cache->hashbits);
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) {
+		if (!entry->cr_ops->crmatch(acred, entry, flags))
+			continue;
+		if (flags & RPCAUTH_LOOKUP_RCU) {
+			if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) &&
+			    !test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags))
+				cred = entry;
+			break;
+		}
+		spin_lock(&cache->lock);
+		if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) {
+			spin_unlock(&cache->lock);
+			continue;
+		}
+		cred = get_rpccred(entry);
+		spin_unlock(&cache->lock);
+		break;
+	}
+	rcu_read_unlock();
+
+	if (cred != NULL)
+		goto found;
+
+	if (flags & RPCAUTH_LOOKUP_RCU)
+		return ERR_PTR(-ECHILD);
+
+	new = auth->au_ops->crcreate(auth, acred, flags);
+	if (IS_ERR(new)) {
+		cred = new;
+		goto out;
+	}
+
+	spin_lock(&cache->lock);
+	hlist_for_each_entry(entry, &cache->hashtable[nr], cr_hash) {
+		if (!entry->cr_ops->crmatch(acred, entry, flags))
+			continue;
+		cred = get_rpccred(entry);
+		break;
+	}
+	if (cred == NULL) {
+		cred = new;
+		set_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
+		hlist_add_head_rcu(&cred->cr_hash, &cache->hashtable[nr]);
+	} else
+		list_add_tail(&new->cr_lru, &free);
+	spin_unlock(&cache->lock);
+	rpcauth_cache_enforce_limit();
+found:
+	if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) &&
+	    cred->cr_ops->cr_init != NULL &&
+	    !(flags & RPCAUTH_LOOKUP_NEW)) {
+		int res = cred->cr_ops->cr_init(auth, cred);
+		if (res < 0) {
+			put_rpccred(cred);
+			cred = ERR_PTR(res);
+		}
+	}
+	rpcauth_destroy_credlist(&free);
+out:
+	return cred;
+}
+EXPORT_SYMBOL_GPL(rpcauth_lookup_credcache);
+
+struct rpc_cred *
+rpcauth_lookupcred(struct rpc_auth *auth, int flags)
+{
+	struct auth_cred acred;
+	struct rpc_cred *ret;
+	const struct cred *cred = current_cred();
+
+	dprintk("RPC:       looking up %s cred\n",
+		auth->au_ops->au_name);
+
+	memset(&acred, 0, sizeof(acred));
+	acred.uid = cred->fsuid;
+	acred.gid = cred->fsgid;
+	acred.group_info = cred->group_info;
+	ret = auth->au_ops->lookup_cred(auth, &acred, flags);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(rpcauth_lookupcred);
+
+void
+rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
+		  struct rpc_auth *auth, const struct rpc_credops *ops)
+{
+	INIT_HLIST_NODE(&cred->cr_hash);
+	INIT_LIST_HEAD(&cred->cr_lru);
+	atomic_set(&cred->cr_count, 1);
+	cred->cr_auth = auth;
+	cred->cr_ops = ops;
+	cred->cr_expire = jiffies;
+#ifdef RPC_DEBUG
+	cred->cr_magic = RPCAUTH_CRED_MAGIC;
+#endif
+	cred->cr_uid = acred->uid;
+}
+EXPORT_SYMBOL_GPL(rpcauth_init_cred);
+
+struct rpc_cred *
+rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
+{
+	dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid,
+			cred->cr_auth->au_ops->au_name, cred);
+	return get_rpccred(cred);
+}
+EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred);
+
+static struct rpc_cred *
+rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
+{
+	struct rpc_auth *auth = task->tk_client->cl_auth;
+	struct auth_cred acred = {
+		.uid = GLOBAL_ROOT_UID,
+		.gid = GLOBAL_ROOT_GID,
+	};
+
+	dprintk("RPC: %5u looking up %s cred\n",
+		task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
+	return auth->au_ops->lookup_cred(auth, &acred, lookupflags);
+}
+
+static struct rpc_cred *
+rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags)
+{
+	struct rpc_auth *auth = task->tk_client->cl_auth;
+
+	dprintk("RPC: %5u looking up %s cred\n",
+		task->tk_pid, auth->au_ops->au_name);
+	return rpcauth_lookupcred(auth, lookupflags);
+}
+
+static int
+rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct rpc_cred *new;
+	int lookupflags = 0;
+
+	if (flags & RPC_TASK_ASYNC)
+		lookupflags |= RPCAUTH_LOOKUP_NEW;
+	if (cred != NULL)
+		new = cred->cr_ops->crbind(task, cred, lookupflags);
+	else if (flags & RPC_TASK_ROOTCREDS)
+		new = rpcauth_bind_root_cred(task, lookupflags);
+	else
+		new = rpcauth_bind_new_cred(task, lookupflags);
+	if (IS_ERR(new))
+		return PTR_ERR(new);
+	if (req->rq_cred != NULL)
+		put_rpccred(req->rq_cred);
+	req->rq_cred = new;
+	return 0;
+}
+
+void
+put_rpccred(struct rpc_cred *cred)
+{
+	/* Fast path for unhashed credentials */
+	if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) == 0) {
+		if (atomic_dec_and_test(&cred->cr_count))
+			cred->cr_ops->crdestroy(cred);
+		return;
+	}
+
+	if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock))
+		return;
+	if (!list_empty(&cred->cr_lru)) {
+		number_cred_unused--;
+		list_del_init(&cred->cr_lru);
+	}
+	if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) {
+		if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) {
+			cred->cr_expire = jiffies;
+			list_add_tail(&cred->cr_lru, &cred_unused);
+			number_cred_unused++;
+			goto out_nodestroy;
+		}
+		if (!rpcauth_unhash_cred(cred)) {
+			/* We were hashed and someone looked us up... */
+			goto out_nodestroy;
+		}
+	}
+	spin_unlock(&rpc_credcache_lock);
+	cred->cr_ops->crdestroy(cred);
+	return;
+out_nodestroy:
+	spin_unlock(&rpc_credcache_lock);
+}
+EXPORT_SYMBOL_GPL(put_rpccred);
+
+__be32 *
+rpcauth_marshcred(struct rpc_task *task, __be32 *p)
+{
+	struct rpc_cred	*cred = task->tk_rqstp->rq_cred;
+
+	dprintk("RPC: %5u marshaling %s cred %p\n",
+		task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
+
+	return cred->cr_ops->crmarshal(task, p);
+}
+
+__be32 *
+rpcauth_checkverf(struct rpc_task *task, __be32 *p)
+{
+	struct rpc_cred	*cred = task->tk_rqstp->rq_cred;
+
+	dprintk("RPC: %5u validating %s cred %p\n",
+		task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
+
+	return cred->cr_ops->crvalidate(task, p);
+}
+
+static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
+				   __be32 *data, void *obj)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &rqstp->rq_snd_buf, data);
+	encode(rqstp, &xdr, obj);
+}
+
+int
+rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp,
+		__be32 *data, void *obj)
+{
+	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+
+	dprintk("RPC: %5u using %s cred %p to wrap rpc data\n",
+			task->tk_pid, cred->cr_ops->cr_name, cred);
+	if (cred->cr_ops->crwrap_req)
+		return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj);
+	/* By default, we encode the arguments normally. */
+	rpcauth_wrap_req_encode(encode, rqstp, data, obj);
+	return 0;
+}
+
+static int
+rpcauth_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
+			  __be32 *data, void *obj)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, data);
+	return decode(rqstp, &xdr, obj);
+}
+
+int
+rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp,
+		__be32 *data, void *obj)
+{
+	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+
+	dprintk("RPC: %5u using %s cred %p to unwrap rpc data\n",
+			task->tk_pid, cred->cr_ops->cr_name, cred);
+	if (cred->cr_ops->crunwrap_resp)
+		return cred->cr_ops->crunwrap_resp(task, decode, rqstp,
+						   data, obj);
+	/* By default, we decode the arguments normally. */
+	return rpcauth_unwrap_req_decode(decode, rqstp, data, obj);
+}
+
+int
+rpcauth_refreshcred(struct rpc_task *task)
+{
+	struct rpc_cred	*cred;
+	int err;
+
+	cred = task->tk_rqstp->rq_cred;
+	if (cred == NULL) {
+		err = rpcauth_bindcred(task, task->tk_msg.rpc_cred, task->tk_flags);
+		if (err < 0)
+			goto out;
+		cred = task->tk_rqstp->rq_cred;
+	}
+	dprintk("RPC: %5u refreshing %s cred %p\n",
+		task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
+
+	err = cred->cr_ops->crrefresh(task);
+out:
+	if (err < 0)
+		task->tk_status = err;
+	return err;
+}
+
+void
+rpcauth_invalcred(struct rpc_task *task)
+{
+	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+
+	dprintk("RPC: %5u invalidating %s cred %p\n",
+		task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
+	if (cred)
+		clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+}
+
+int
+rpcauth_uptodatecred(struct rpc_task *task)
+{
+	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+
+	return cred == NULL ||
+		test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0;
+}
+
+static struct shrinker rpc_cred_shrinker = {
+	.count_objects = rpcauth_cache_shrink_count,
+	.scan_objects = rpcauth_cache_shrink_scan,
+	.seeks = DEFAULT_SEEKS,
+};
+
+int __init rpcauth_init_module(void)
+{
+	int err;
+
+	err = rpc_init_authunix();
+	if (err < 0)
+		goto out1;
+	err = rpc_init_generic_auth();
+	if (err < 0)
+		goto out2;
+	register_shrinker(&rpc_cred_shrinker);
+	return 0;
+out2:
+	rpc_destroy_authunix();
+out1:
+	return err;
+}
+
+void rpcauth_remove_module(void)
+{
+	rpc_destroy_authunix();
+	rpc_destroy_generic_auth();
+	unregister_shrinker(&rpc_cred_shrinker);
+}
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@ -0,0 +1,290 @@
+/*
+ * Generic RPC credential
+ *
+ * Copyright (C) 2008, Trond Myklebust <Trond.Myklebust@netapp.com>
+ */
+
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/sunrpc/auth.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/debug.h>
+#include <linux/sunrpc/sched.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_AUTH
+#endif
+
+#define RPC_MACHINE_CRED_USERID		GLOBAL_ROOT_UID
+#define RPC_MACHINE_CRED_GROUPID	GLOBAL_ROOT_GID
+
+struct generic_cred {
+	struct rpc_cred gc_base;
+	struct auth_cred acred;
+};
+
+static struct rpc_auth generic_auth;
+static const struct rpc_credops generic_credops;
+
+/*
+ * Public call interface
+ */
+struct rpc_cred *rpc_lookup_cred(void)
+{
+	return rpcauth_lookupcred(&generic_auth, 0);
+}
+EXPORT_SYMBOL_GPL(rpc_lookup_cred);
+
+struct rpc_cred *rpc_lookup_cred_nonblock(void)
+{
+	return rpcauth_lookupcred(&generic_auth, RPCAUTH_LOOKUP_RCU);
+}
+EXPORT_SYMBOL_GPL(rpc_lookup_cred_nonblock);
+
+/*
+ * Public call interface for looking up machine creds.
+ */
+struct rpc_cred *rpc_lookup_machine_cred(const char *service_name)
+{
+	struct auth_cred acred = {
+		.uid = RPC_MACHINE_CRED_USERID,
+		.gid = RPC_MACHINE_CRED_GROUPID,
+		.principal = service_name,
+		.machine_cred = 1,
+	};
+
+	dprintk("RPC:       looking up machine cred for service %s\n",
+			service_name);
+	return generic_auth.au_ops->lookup_cred(&generic_auth, &acred, 0);
+}
+EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred);
+
+static struct rpc_cred *generic_bind_cred(struct rpc_task *task,
+		struct rpc_cred *cred, int lookupflags)
+{
+	struct rpc_auth *auth = task->tk_client->cl_auth;
+	struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred;
+
+	return auth->au_ops->lookup_cred(auth, acred, lookupflags);
+}
+
+/*
+ * Lookup generic creds for current process
+ */
+static struct rpc_cred *
+generic_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
+{
+	return rpcauth_lookup_credcache(&generic_auth, acred, flags);
+}
+
+static struct rpc_cred *
+generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
+{
+	struct generic_cred *gcred;
+
+	gcred = kmalloc(sizeof(*gcred), GFP_KERNEL);
+	if (gcred == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	rpcauth_init_cred(&gcred->gc_base, acred, &generic_auth, &generic_credops);
+	gcred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
+
+	gcred->acred.uid = acred->uid;
+	gcred->acred.gid = acred->gid;
+	gcred->acred.group_info = acred->group_info;
+	gcred->acred.ac_flags = 0;
+	if (gcred->acred.group_info != NULL)
+		get_group_info(gcred->acred.group_info);
+	gcred->acred.machine_cred = acred->machine_cred;
+	gcred->acred.principal = acred->principal;
+
+	dprintk("RPC:       allocated %s cred %p for uid %d gid %d\n",
+			gcred->acred.machine_cred ? "machine" : "generic",
+			gcred,
+			from_kuid(&init_user_ns, acred->uid),
+			from_kgid(&init_user_ns, acred->gid));
+	return &gcred->gc_base;
+}
+
+static void
+generic_free_cred(struct rpc_cred *cred)
+{
+	struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base);
+
+	dprintk("RPC:       generic_free_cred %p\n", gcred);
+	if (gcred->acred.group_info != NULL)
+		put_group_info(gcred->acred.group_info);
+	kfree(gcred);
+}
+
+static void
+generic_free_cred_callback(struct rcu_head *head)
+{
+	struct rpc_cred *cred = container_of(head, struct rpc_cred, cr_rcu);
+	generic_free_cred(cred);
+}
+
+static void
+generic_destroy_cred(struct rpc_cred *cred)
+{
+	call_rcu(&cred->cr_rcu, generic_free_cred_callback);
+}
+
+static int
+machine_cred_match(struct auth_cred *acred, struct generic_cred *gcred, int flags)
+{
+	if (!gcred->acred.machine_cred ||
+	    gcred->acred.principal != acred->principal ||
+	    !uid_eq(gcred->acred.uid, acred->uid) ||
+	    !gid_eq(gcred->acred.gid, acred->gid))
+		return 0;
+	return 1;
+}
+
+/*
+ * Match credentials against current process creds.
+ */
+static int
+generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
+{
+	struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base);
+	int i;
+
+	if (acred->machine_cred)
+		return machine_cred_match(acred, gcred, flags);
+
+	if (!uid_eq(gcred->acred.uid, acred->uid) ||
+	    !gid_eq(gcred->acred.gid, acred->gid) ||
+	    gcred->acred.machine_cred != 0)
+		goto out_nomatch;
+
+	/* Optimisation in the case where pointers are identical... */
+	if (gcred->acred.group_info == acred->group_info)
+		goto out_match;
+
+	/* Slow path... */
+	if (gcred->acred.group_info->ngroups != acred->group_info->ngroups)
+		goto out_nomatch;
+	for (i = 0; i < gcred->acred.group_info->ngroups; i++) {
+		if (!gid_eq(GROUP_AT(gcred->acred.group_info, i),
+				GROUP_AT(acred->group_info, i)))
+			goto out_nomatch;
+	}
+out_match:
+	return 1;
+out_nomatch:
+	return 0;
+}
+
+int __init rpc_init_generic_auth(void)
+{
+	return rpcauth_init_credcache(&generic_auth);
+}
+
+void rpc_destroy_generic_auth(void)
+{
+	rpcauth_destroy_credcache(&generic_auth);
+}
+
+/*
+ * Test the the current time (now) against the underlying credential key expiry
+ * minus a timeout and setup notification.
+ *
+ * The normal case:
+ * If 'now' is before the key expiry minus RPC_KEY_EXPIRE_TIMEO, set
+ * the RPC_CRED_NOTIFY_TIMEOUT flag to setup the underlying credential
+ * rpc_credops crmatch routine to notify this generic cred when it's key
+ * expiration is within RPC_KEY_EXPIRE_TIMEO, and return 0.
+ *
+ * The error case:
+ * If the underlying cred lookup fails, return -EACCES.
+ *
+ * The 'almost' error case:
+ * If 'now' is within key expiry minus RPC_KEY_EXPIRE_TIMEO, but not within
+ * key expiry minus RPC_KEY_EXPIRE_FAIL, set the RPC_CRED_EXPIRE_SOON bit
+ * on the acred ac_flags and return 0.
+ */
+static int
+generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred)
+{
+	struct auth_cred *acred = &container_of(cred, struct generic_cred,
+						gc_base)->acred;
+	struct rpc_cred *tcred;
+	int ret = 0;
+
+
+	/* Fast track for non crkey_timeout (no key) underlying credentials */
+	if (test_bit(RPC_CRED_NO_CRKEY_TIMEOUT, &acred->ac_flags))
+		return 0;
+
+	/* Fast track for the normal case */
+	if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags))
+		return 0;
+
+	/* lookup_cred either returns a valid referenced rpc_cred, or PTR_ERR */
+	tcred = auth->au_ops->lookup_cred(auth, acred, 0);
+	if (IS_ERR(tcred))
+		return -EACCES;
+
+	if (!tcred->cr_ops->crkey_timeout) {
+		set_bit(RPC_CRED_NO_CRKEY_TIMEOUT, &acred->ac_flags);
+		ret = 0;
+		goto out_put;
+	}
+
+	/* Test for the almost error case */
+	ret = tcred->cr_ops->crkey_timeout(tcred);
+	if (ret != 0) {
+		set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
+		ret = 0;
+	} else {
+		/* In case underlying cred key has been reset */
+		if (test_and_clear_bit(RPC_CRED_KEY_EXPIRE_SOON,
+					&acred->ac_flags))
+			dprintk("RPC:        UID %d Credential key reset\n",
+				from_kuid(&init_user_ns, tcred->cr_uid));
+		/* set up fasttrack for the normal case */
+		set_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags);
+	}
+
+out_put:
+	put_rpccred(tcred);
+	return ret;
+}
+
+static const struct rpc_authops generic_auth_ops = {
+	.owner = THIS_MODULE,
+	.au_name = "Generic",
+	.lookup_cred = generic_lookup_cred,
+	.crcreate = generic_create_cred,
+	.key_timeout = generic_key_timeout,
+};
+
+static struct rpc_auth generic_auth = {
+	.au_ops = &generic_auth_ops,
+	.au_count = ATOMIC_INIT(0),
+};
+
+static bool generic_key_to_expire(struct rpc_cred *cred)
+{
+	struct auth_cred *acred = &container_of(cred, struct generic_cred,
+						gc_base)->acred;
+	bool ret;
+
+	get_rpccred(cred);
+	ret = test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
+	put_rpccred(cred);
+
+	return ret;
+}
+
+static const struct rpc_credops generic_credops = {
+	.cr_name = "Generic cred",
+	.crdestroy = generic_destroy_cred,
+	.crbind = generic_bind_cred,
+	.crmatch = generic_match,
+	.crkey_to_expire = generic_key_to_expire,
+};
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@ -0,0 +1,14 @@
+#
+# Makefile for Linux kernel rpcsec_gss implementation
+#
+
+obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
+
+auth_rpcgss-y := auth_gss.o gss_generic_token.o \
+	gss_mech_switch.o svcauth_gss.o \
+	gss_rpc_upcall.o gss_rpc_xdr.o
+
+obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
+
+rpcsec_gss_krb5-y := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
+	gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
--- a/net/sunrpc/auth_gss/gss_generic_token.c
+++ b/net/sunrpc/auth_gss/gss_generic_token.c
@ -0,0 +1,234 @@
+/*
+ *  linux/net/sunrpc/gss_generic_token.c
+ *
+ *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/generic/util_token.c
+ *
+ *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson   <andros@umich.edu>
+ */
+
+/*
+ * Copyright 1993 by OpenVision Technologies, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appears in all copies and
+ * that both that copyright notice and this permission notice appear in
+ * supporting documentation, and that the name of OpenVision not be used
+ * in advertising or publicity pertaining to distribution of the software
+ * without specific, written prior permission. OpenVision makes no
+ * representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied warranty.
+ *
+ * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+ * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/gss_asn1.h>
+
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+
+/* TWRITE_STR from gssapiP_generic.h */
+#define TWRITE_STR(ptr, str, len) \
+	memcpy((ptr), (char *) (str), (len)); \
+	(ptr) += (len);
+
+/* XXXX this code currently makes the assumption that a mech oid will
+   never be longer than 127 bytes.  This assumption is not inherent in
+   the interfaces, so the code can be fixed if the OSI namespace
+   balloons unexpectedly. */
+
+/* Each token looks like this:
+
+0x60				tag for APPLICATION 0, SEQUENCE
+					(constructed, definite-length)
+	<length>		possible multiple bytes, need to parse/generate
+	0x06			tag for OBJECT IDENTIFIER
+		<moid_length>	compile-time constant string (assume 1 byte)
+		<moid_bytes>	compile-time constant string
+	<inner_bytes>		the ANY containing the application token
+					bytes 0,1 are the token type
+					bytes 2,n are the token data
+
+For the purposes of this abstraction, the token "header" consists of
+the sequence tag and length octets, the mech OID DER encoding, and the
+first two inner bytes, which indicate the token type.  The token
+"body" consists of everything else.
+
+*/
+
+static int
+der_length_size( int length)
+{
+	if (length < (1<<7))
+		return 1;
+	else if (length < (1<<8))
+		return 2;
+#if (SIZEOF_INT == 2)
+	else
+		return 3;
+#else
+	else if (length < (1<<16))
+		return 3;
+	else if (length < (1<<24))
+		return 4;
+	else
+		return 5;
+#endif
+}
+
+static void
+der_write_length(unsigned char **buf, int length)
+{
+	if (length < (1<<7)) {
+		*(*buf)++ = (unsigned char) length;
+	} else {
+		*(*buf)++ = (unsigned char) (der_length_size(length)+127);
+#if (SIZEOF_INT > 2)
+		if (length >= (1<<24))
+			*(*buf)++ = (unsigned char) (length>>24);
+		if (length >= (1<<16))
+			*(*buf)++ = (unsigned char) ((length>>16)&0xff);
+#endif
+		if (length >= (1<<8))
+			*(*buf)++ = (unsigned char) ((length>>8)&0xff);
+		*(*buf)++ = (unsigned char) (length&0xff);
+	}
+}
+
+/* returns decoded length, or < 0 on failure.  Advances buf and
+   decrements bufsize */
+
+static int
+der_read_length(unsigned char **buf, int *bufsize)
+{
+	unsigned char sf;
+	int ret;
+
+	if (*bufsize < 1)
+		return -1;
+	sf = *(*buf)++;
+	(*bufsize)--;
+	if (sf & 0x80) {
+		if ((sf &= 0x7f) > ((*bufsize)-1))
+			return -1;
+		if (sf > SIZEOF_INT)
+			return -1;
+		ret = 0;
+		for (; sf; sf--) {
+			ret = (ret<<8) + (*(*buf)++);
+			(*bufsize)--;
+		}
+	} else {
+		ret = sf;
+	}
+
+	return ret;
+}
+
+/* returns the length of a token, given the mech oid and the body size */
+
+int
+g_token_size(struct xdr_netobj *mech, unsigned int body_size)
+{
+	/* set body_size to sequence contents size */
+	body_size += 2 + (int) mech->len;         /* NEED overflow check */
+	return 1 + der_length_size(body_size) + body_size;
+}
+
+EXPORT_SYMBOL_GPL(g_token_size);
+
+/* fills in a buffer with the token header.  The buffer is assumed to
+   be the right size.  buf is advanced past the token header */
+
+void
+g_make_token_header(struct xdr_netobj *mech, int body_size, unsigned char **buf)
+{
+	*(*buf)++ = 0x60;
+	der_write_length(buf, 2 + mech->len + body_size);
+	*(*buf)++ = 0x06;
+	*(*buf)++ = (unsigned char) mech->len;
+	TWRITE_STR(*buf, mech->data, ((int) mech->len));
+}
+
+EXPORT_SYMBOL_GPL(g_make_token_header);
+
+/*
+ * Given a buffer containing a token, reads and verifies the token,
+ * leaving buf advanced past the token header, and setting body_size
+ * to the number of remaining bytes.  Returns 0 on success,
+ * G_BAD_TOK_HEADER for a variety of errors, and G_WRONG_MECH if the
+ * mechanism in the token does not match the mech argument.  buf and
+ * *body_size are left unmodified on error.
+ */
+u32
+g_verify_token_header(struct xdr_netobj *mech, int *body_size,
+		      unsigned char **buf_in, int toksize)
+{
+	unsigned char *buf = *buf_in;
+	int seqsize;
+	struct xdr_netobj toid;
+	int ret = 0;
+
+	if ((toksize-=1) < 0)
+		return G_BAD_TOK_HEADER;
+	if (*buf++ != 0x60)
+		return G_BAD_TOK_HEADER;
+
+	if ((seqsize = der_read_length(&buf, &toksize)) < 0)
+		return G_BAD_TOK_HEADER;
+
+	if (seqsize != toksize)
+		return G_BAD_TOK_HEADER;
+
+	if ((toksize-=1) < 0)
+		return G_BAD_TOK_HEADER;
+	if (*buf++ != 0x06)
+		return G_BAD_TOK_HEADER;
+
+	if ((toksize-=1) < 0)
+		return G_BAD_TOK_HEADER;
+	toid.len = *buf++;
+
+	if ((toksize-=toid.len) < 0)
+		return G_BAD_TOK_HEADER;
+	toid.data = buf;
+	buf+=toid.len;
+
+	if (! g_OID_equal(&toid, mech))
+		ret = G_WRONG_MECH;
+
+   /* G_WRONG_MECH is not returned immediately because it's more important
+      to return G_BAD_TOK_HEADER if the token header is in fact bad */
+
+	if ((toksize-=2) < 0)
+		return G_BAD_TOK_HEADER;
+
+	if (ret)
+		return ret;
+
+	if (!ret) {
+		*buf_in = buf;
+		*body_size = toksize;
+	}
+
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(g_verify_token_header);
+
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@ -0,0 +1,988 @@
+/*
+ *  linux/net/sunrpc/gss_krb5_crypto.c
+ *
+ *  Copyright (c) 2000-2008 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson   <andros@umich.edu>
+ *  Bruce Fields   <bfields@umich.edu>
+ */
+
+/*
+ * Copyright (C) 1998 by the FundsXpress, INC.
+ *
+ * All rights reserved.
+ *
+ * Export of this software from the United States of America may require
+ * a specific license from the United States Government.  It is the
+ * responsibility of any person or organization contemplating export to
+ * obtain such a license before exporting.
+ *
+ * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
+ * distribute this software and its documentation for any purpose and
+ * without fee is hereby granted, provided that the above copyright
+ * notice appear in all copies and that both that copyright notice and
+ * this permission notice appear in supporting documentation, and that
+ * the name of FundsXpress. not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.  FundsXpress makes no representations about the suitability of
+ * this software for any purpose.  It is provided "as is" without express
+ * or implied warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <linux/err.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/scatterlist.h>
+#include <linux/crypto.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/random.h>
+#include <linux/sunrpc/gss_krb5.h>
+#include <linux/sunrpc/xdr.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+u32
+krb5_encrypt(
+	struct crypto_blkcipher *tfm,
+	void * iv,
+	void * in,
+	void * out,
+	int length)
+{
+	u32 ret = -EINVAL;
+	struct scatterlist sg[1];
+	u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
+	struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
+
+	if (length % crypto_blkcipher_blocksize(tfm) != 0)
+		goto out;
+
+	if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
+		dprintk("RPC:       gss_k5encrypt: tfm iv size too large %d\n",
+			crypto_blkcipher_ivsize(tfm));
+		goto out;
+	}
+
+	if (iv)
+		memcpy(local_iv, iv, crypto_blkcipher_ivsize(tfm));
+
+	memcpy(out, in, length);
+	sg_init_one(sg, out, length);
+
+	ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, length);
+out:
+	dprintk("RPC:       krb5_encrypt returns %d\n", ret);
+	return ret;
+}
+
+u32
+krb5_decrypt(
+     struct crypto_blkcipher *tfm,
+     void * iv,
+     void * in,
+     void * out,
+     int length)
+{
+	u32 ret = -EINVAL;
+	struct scatterlist sg[1];
+	u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
+	struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
+
+	if (length % crypto_blkcipher_blocksize(tfm) != 0)
+		goto out;
+
+	if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
+		dprintk("RPC:       gss_k5decrypt: tfm iv size too large %d\n",
+			crypto_blkcipher_ivsize(tfm));
+		goto out;
+	}
+	if (iv)
+		memcpy(local_iv,iv, crypto_blkcipher_ivsize(tfm));
+
+	memcpy(out, in, length);
+	sg_init_one(sg, out, length);
+
+	ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, length);
+out:
+	dprintk("RPC:       gss_k5decrypt returns %d\n",ret);
+	return ret;
+}
+
+static int
+checksummer(struct scatterlist *sg, void *data)
+{
+	struct hash_desc *desc = data;
+
+	return crypto_hash_update(desc, sg, sg->length);
+}
+
+static int
+arcfour_hmac_md5_usage_to_salt(unsigned int usage, u8 salt[4])
+{
+	unsigned int ms_usage;
+
+	switch (usage) {
+	case KG_USAGE_SIGN:
+		ms_usage = 15;
+		break;
+	case KG_USAGE_SEAL:
+		ms_usage = 13;
+		break;
+	default:
+		return -EINVAL;
+	}
+	salt[0] = (ms_usage >> 0) & 0xff;
+	salt[1] = (ms_usage >> 8) & 0xff;
+	salt[2] = (ms_usage >> 16) & 0xff;
+	salt[3] = (ms_usage >> 24) & 0xff;
+
+	return 0;
+}
+
+static u32
+make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
+		       struct xdr_buf *body, int body_offset, u8 *cksumkey,
+		       unsigned int usage, struct xdr_netobj *cksumout)
+{
+	struct hash_desc                desc;
+	struct scatterlist              sg[1];
+	int err;
+	u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	u8 rc4salt[4];
+	struct crypto_hash *md5;
+	struct crypto_hash *hmac_md5;
+
+	if (cksumkey == NULL)
+		return GSS_S_FAILURE;
+
+	if (cksumout->len < kctx->gk5e->cksumlength) {
+		dprintk("%s: checksum buffer length, %u, too small for %s\n",
+			__func__, cksumout->len, kctx->gk5e->name);
+		return GSS_S_FAILURE;
+	}
+
+	if (arcfour_hmac_md5_usage_to_salt(usage, rc4salt)) {
+		dprintk("%s: invalid usage value %u\n", __func__, usage);
+		return GSS_S_FAILURE;
+	}
+
+	md5 = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(md5))
+		return GSS_S_FAILURE;
+
+	hmac_md5 = crypto_alloc_hash(kctx->gk5e->cksum_name, 0,
+				     CRYPTO_ALG_ASYNC);
+	if (IS_ERR(hmac_md5)) {
+		crypto_free_hash(md5);
+		return GSS_S_FAILURE;
+	}
+
+	desc.tfm = md5;
+	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	err = crypto_hash_init(&desc);
+	if (err)
+		goto out;
+	sg_init_one(sg, rc4salt, 4);
+	err = crypto_hash_update(&desc, sg, 4);
+	if (err)
+		goto out;
+
+	sg_init_one(sg, header, hdrlen);
+	err = crypto_hash_update(&desc, sg, hdrlen);
+	if (err)
+		goto out;
+	err = xdr_process_buf(body, body_offset, body->len - body_offset,
+			      checksummer, &desc);
+	if (err)
+		goto out;
+	err = crypto_hash_final(&desc, checksumdata);
+	if (err)
+		goto out;
+
+	desc.tfm = hmac_md5;
+	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	err = crypto_hash_init(&desc);
+	if (err)
+		goto out;
+	err = crypto_hash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength);
+	if (err)
+		goto out;
+
+	sg_init_one(sg, checksumdata, crypto_hash_digestsize(md5));
+	err = crypto_hash_digest(&desc, sg, crypto_hash_digestsize(md5),
+				 checksumdata);
+	if (err)
+		goto out;
+
+	memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
+	cksumout->len = kctx->gk5e->cksumlength;
+out:
+	crypto_free_hash(md5);
+	crypto_free_hash(hmac_md5);
+	return err ? GSS_S_FAILURE : 0;
+}
+
+/*
+ * checksum the plaintext data and hdrlen bytes of the token header
+ * The checksum is performed over the first 8 bytes of the
+ * gss token header and then over the data body
+ */
+u32
+make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
+	      struct xdr_buf *body, int body_offset, u8 *cksumkey,
+	      unsigned int usage, struct xdr_netobj *cksumout)
+{
+	struct hash_desc                desc;
+	struct scatterlist              sg[1];
+	int err;
+	u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	unsigned int checksumlen;
+
+	if (kctx->gk5e->ctype == CKSUMTYPE_HMAC_MD5_ARCFOUR)
+		return make_checksum_hmac_md5(kctx, header, hdrlen,
+					      body, body_offset,
+					      cksumkey, usage, cksumout);
+
+	if (cksumout->len < kctx->gk5e->cksumlength) {
+		dprintk("%s: checksum buffer length, %u, too small for %s\n",
+			__func__, cksumout->len, kctx->gk5e->name);
+		return GSS_S_FAILURE;
+	}
+
+	desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(desc.tfm))
+		return GSS_S_FAILURE;
+	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	checksumlen = crypto_hash_digestsize(desc.tfm);
+
+	if (cksumkey != NULL) {
+		err = crypto_hash_setkey(desc.tfm, cksumkey,
+					 kctx->gk5e->keylength);
+		if (err)
+			goto out;
+	}
+
+	err = crypto_hash_init(&desc);
+	if (err)
+		goto out;
+	sg_init_one(sg, header, hdrlen);
+	err = crypto_hash_update(&desc, sg, hdrlen);
+	if (err)
+		goto out;
+	err = xdr_process_buf(body, body_offset, body->len - body_offset,
+			      checksummer, &desc);
+	if (err)
+		goto out;
+	err = crypto_hash_final(&desc, checksumdata);
+	if (err)
+		goto out;
+
+	switch (kctx->gk5e->ctype) {
+	case CKSUMTYPE_RSA_MD5:
+		err = kctx->gk5e->encrypt(kctx->seq, NULL, checksumdata,
+					  checksumdata, checksumlen);
+		if (err)
+			goto out;
+		memcpy(cksumout->data,
+		       checksumdata + checksumlen - kctx->gk5e->cksumlength,
+		       kctx->gk5e->cksumlength);
+		break;
+	case CKSUMTYPE_HMAC_SHA1_DES3:
+		memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
+		break;
+	default:
+		BUG();
+		break;
+	}
+	cksumout->len = kctx->gk5e->cksumlength;
+out:
+	crypto_free_hash(desc.tfm);
+	return err ? GSS_S_FAILURE : 0;
+}
+
+/*
+ * checksum the plaintext data and hdrlen bytes of the token header
+ * Per rfc4121, sec. 4.2.4, the checksum is performed over the data
+ * body then over the first 16 octets of the MIC token
+ * Inclusion of the header data in the calculation of the
+ * checksum is optional.
+ */
+u32
+make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
+		 struct xdr_buf *body, int body_offset, u8 *cksumkey,
+		 unsigned int usage, struct xdr_netobj *cksumout)
+{
+	struct hash_desc desc;
+	struct scatterlist sg[1];
+	int err;
+	u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	unsigned int checksumlen;
+
+	if (kctx->gk5e->keyed_cksum == 0) {
+		dprintk("%s: expected keyed hash for %s\n",
+			__func__, kctx->gk5e->name);
+		return GSS_S_FAILURE;
+	}
+	if (cksumkey == NULL) {
+		dprintk("%s: no key supplied for %s\n",
+			__func__, kctx->gk5e->name);
+		return GSS_S_FAILURE;
+	}
+
+	desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0,
+							CRYPTO_ALG_ASYNC);
+	if (IS_ERR(desc.tfm))
+		return GSS_S_FAILURE;
+	checksumlen = crypto_hash_digestsize(desc.tfm);
+	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	err = crypto_hash_setkey(desc.tfm, cksumkey, kctx->gk5e->keylength);
+	if (err)
+		goto out;
+
+	err = crypto_hash_init(&desc);
+	if (err)
+		goto out;
+	err = xdr_process_buf(body, body_offset, body->len - body_offset,
+			      checksummer, &desc);
+	if (err)
+		goto out;
+	if (header != NULL) {
+		sg_init_one(sg, header, hdrlen);
+		err = crypto_hash_update(&desc, sg, hdrlen);
+		if (err)
+			goto out;
+	}
+	err = crypto_hash_final(&desc, checksumdata);
+	if (err)
+		goto out;
+
+	cksumout->len = kctx->gk5e->cksumlength;
+
+	switch (kctx->gk5e->ctype) {
+	case CKSUMTYPE_HMAC_SHA1_96_AES128:
+	case CKSUMTYPE_HMAC_SHA1_96_AES256:
+		/* note that this truncates the hash */
+		memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
+		break;
+	default:
+		BUG();
+		break;
+	}
+out:
+	crypto_free_hash(desc.tfm);
+	return err ? GSS_S_FAILURE : 0;
+}
+
+struct encryptor_desc {
+	u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
+	struct blkcipher_desc desc;
+	int pos;
+	struct xdr_buf *outbuf;
+	struct page **pages;
+	struct scatterlist infrags[4];
+	struct scatterlist outfrags[4];
+	int fragno;
+	int fraglen;
+};
+
+static int
+encryptor(struct scatterlist *sg, void *data)
+{
+	struct encryptor_desc *desc = data;
+	struct xdr_buf *outbuf = desc->outbuf;
+	struct page *in_page;
+	int thislen = desc->fraglen + sg->length;
+	int fraglen, ret;
+	int page_pos;
+
+	/* Worst case is 4 fragments: head, end of page 1, start
+	 * of page 2, tail.  Anything more is a bug. */
+	BUG_ON(desc->fragno > 3);
+
+	page_pos = desc->pos - outbuf->head[0].iov_len;
+	if (page_pos >= 0 && page_pos < outbuf->page_len) {
+		/* pages are not in place: */
+		int i = (page_pos + outbuf->page_base) >> PAGE_CACHE_SHIFT;
+		in_page = desc->pages[i];
+	} else {
+		in_page = sg_page(sg);
+	}
+	sg_set_page(&desc->infrags[desc->fragno], in_page, sg->length,
+		    sg->offset);
+	sg_set_page(&desc->outfrags[desc->fragno], sg_page(sg), sg->length,
+		    sg->offset);
+	desc->fragno++;
+	desc->fraglen += sg->length;
+	desc->pos += sg->length;
+
+	fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1);
+	thislen -= fraglen;
+
+	if (thislen == 0)
+		return 0;
+
+	sg_mark_end(&desc->infrags[desc->fragno - 1]);
+	sg_mark_end(&desc->outfrags[desc->fragno - 1]);
+
+	ret = crypto_blkcipher_encrypt_iv(&desc->desc, desc->outfrags,
+					  desc->infrags, thislen);
+	if (ret)
+		return ret;
+
+	sg_init_table(desc->infrags, 4);
+	sg_init_table(desc->outfrags, 4);
+
+	if (fraglen) {
+		sg_set_page(&desc->outfrags[0], sg_page(sg), fraglen,
+				sg->offset + sg->length - fraglen);
+		desc->infrags[0] = desc->outfrags[0];
+		sg_assign_page(&desc->infrags[0], in_page);
+		desc->fragno = 1;
+		desc->fraglen = fraglen;
+	} else {
+		desc->fragno = 0;
+		desc->fraglen = 0;
+	}
+	return 0;
+}
+
+int
+gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
+		    int offset, struct page **pages)
+{
+	int ret;
+	struct encryptor_desc desc;
+
+	BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0);
+
+	memset(desc.iv, 0, sizeof(desc.iv));
+	desc.desc.tfm = tfm;
+	desc.desc.info = desc.iv;
+	desc.desc.flags = 0;
+	desc.pos = offset;
+	desc.outbuf = buf;
+	desc.pages = pages;
+	desc.fragno = 0;
+	desc.fraglen = 0;
+
+	sg_init_table(desc.infrags, 4);
+	sg_init_table(desc.outfrags, 4);
+
+	ret = xdr_process_buf(buf, offset, buf->len - offset, encryptor, &desc);
+	return ret;
+}
+
+struct decryptor_desc {
+	u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
+	struct blkcipher_desc desc;
+	struct scatterlist frags[4];
+	int fragno;
+	int fraglen;
+};
+
+static int
+decryptor(struct scatterlist *sg, void *data)
+{
+	struct decryptor_desc *desc = data;
+	int thislen = desc->fraglen + sg->length;
+	int fraglen, ret;
+
+	/* Worst case is 4 fragments: head, end of page 1, start
+	 * of page 2, tail.  Anything more is a bug. */
+	BUG_ON(desc->fragno > 3);
+	sg_set_page(&desc->frags[desc->fragno], sg_page(sg), sg->length,
+		    sg->offset);
+	desc->fragno++;
+	desc->fraglen += sg->length;
+
+	fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1);
+	thislen -= fraglen;
+
+	if (thislen == 0)
+		return 0;
+
+	sg_mark_end(&desc->frags[desc->fragno - 1]);
+
+	ret = crypto_blkcipher_decrypt_iv(&desc->desc, desc->frags,
+					  desc->frags, thislen);
+	if (ret)
+		return ret;
+
+	sg_init_table(desc->frags, 4);
+
+	if (fraglen) {
+		sg_set_page(&desc->frags[0], sg_page(sg), fraglen,
+				sg->offset + sg->length - fraglen);
+		desc->fragno = 1;
+		desc->fraglen = fraglen;
+	} else {
+		desc->fragno = 0;
+		desc->fraglen = 0;
+	}
+	return 0;
+}
+
+int
+gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
+		    int offset)
+{
+	struct decryptor_desc desc;
+
+	/* XXXJBF: */
+	BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0);
+
+	memset(desc.iv, 0, sizeof(desc.iv));
+	desc.desc.tfm = tfm;
+	desc.desc.info = desc.iv;
+	desc.desc.flags = 0;
+	desc.fragno = 0;
+	desc.fraglen = 0;
+
+	sg_init_table(desc.frags, 4);
+
+	return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
+}
+
+/*
+ * This function makes the assumption that it was ultimately called
+ * from gss_wrap().
+ *
+ * The client auth_gss code moves any existing tail data into a
+ * separate page before calling gss_wrap.
+ * The server svcauth_gss code ensures that both the head and the
+ * tail have slack space of RPC_MAX_AUTH_SIZE before calling gss_wrap.
+ *
+ * Even with that guarantee, this function may be called more than
+ * once in the processing of gss_wrap().  The best we can do is
+ * verify at compile-time (see GSS_KRB5_SLACK_CHECK) that the
+ * largest expected shift will fit within RPC_MAX_AUTH_SIZE.
+ * At run-time we can verify that a single invocation of this
+ * function doesn't attempt to use more the RPC_MAX_AUTH_SIZE.
+ */
+
+int
+xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen)
+{
+	u8 *p;
+
+	if (shiftlen == 0)
+		return 0;
+
+	BUILD_BUG_ON(GSS_KRB5_MAX_SLACK_NEEDED > RPC_MAX_AUTH_SIZE);
+	BUG_ON(shiftlen > RPC_MAX_AUTH_SIZE);
+
+	p = buf->head[0].iov_base + base;
+
+	memmove(p + shiftlen, p, buf->head[0].iov_len - base);
+
+	buf->head[0].iov_len += shiftlen;
+	buf->len += shiftlen;
+
+	return 0;
+}
+
+static u32
+gss_krb5_cts_crypt(struct crypto_blkcipher *cipher, struct xdr_buf *buf,
+		   u32 offset, u8 *iv, struct page **pages, int encrypt)
+{
+	u32 ret;
+	struct scatterlist sg[1];
+	struct blkcipher_desc desc = { .tfm = cipher, .info = iv };
+	u8 data[GSS_KRB5_MAX_BLOCKSIZE * 2];
+	struct page **save_pages;
+	u32 len = buf->len - offset;
+
+	if (len > ARRAY_SIZE(data)) {
+		WARN_ON(0);
+		return -ENOMEM;
+	}
+
+	/*
+	 * For encryption, we want to read from the cleartext
+	 * page cache pages, and write the encrypted data to
+	 * the supplied xdr_buf pages.
+	 */
+	save_pages = buf->pages;
+	if (encrypt)
+		buf->pages = pages;
+
+	ret = read_bytes_from_xdr_buf(buf, offset, data, len);
+	buf->pages = save_pages;
+	if (ret)
+		goto out;
+
+	sg_init_one(sg, data, len);
+
+	if (encrypt)
+		ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, len);
+	else
+		ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, len);
+
+	if (ret)
+		goto out;
+
+	ret = write_bytes_to_xdr_buf(buf, offset, data, len);
+
+out:
+	return ret;
+}
+
+u32
+gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
+		     struct xdr_buf *buf, struct page **pages)
+{
+	u32 err;
+	struct xdr_netobj hmac;
+	u8 *cksumkey;
+	u8 *ecptr;
+	struct crypto_blkcipher *cipher, *aux_cipher;
+	int blocksize;
+	struct page **save_pages;
+	int nblocks, nbytes;
+	struct encryptor_desc desc;
+	u32 cbcbytes;
+	unsigned int usage;
+
+	if (kctx->initiate) {
+		cipher = kctx->initiator_enc;
+		aux_cipher = kctx->initiator_enc_aux;
+		cksumkey = kctx->initiator_integ;
+		usage = KG_USAGE_INITIATOR_SEAL;
+	} else {
+		cipher = kctx->acceptor_enc;
+		aux_cipher = kctx->acceptor_enc_aux;
+		cksumkey = kctx->acceptor_integ;
+		usage = KG_USAGE_ACCEPTOR_SEAL;
+	}
+	blocksize = crypto_blkcipher_blocksize(cipher);
+
+	/* hide the gss token header and insert the confounder */
+	offset += GSS_KRB5_TOK_HDR_LEN;
+	if (xdr_extend_head(buf, offset, kctx->gk5e->conflen))
+		return GSS_S_FAILURE;
+	gss_krb5_make_confounder(buf->head[0].iov_base + offset, kctx->gk5e->conflen);
+	offset -= GSS_KRB5_TOK_HDR_LEN;
+
+	if (buf->tail[0].iov_base != NULL) {
+		ecptr = buf->tail[0].iov_base + buf->tail[0].iov_len;
+	} else {
+		buf->tail[0].iov_base = buf->head[0].iov_base
+							+ buf->head[0].iov_len;
+		buf->tail[0].iov_len = 0;
+		ecptr = buf->tail[0].iov_base;
+	}
+
+	/* copy plaintext gss token header after filler (if any) */
+	memcpy(ecptr, buf->head[0].iov_base + offset, GSS_KRB5_TOK_HDR_LEN);
+	buf->tail[0].iov_len += GSS_KRB5_TOK_HDR_LEN;
+	buf->len += GSS_KRB5_TOK_HDR_LEN;
+
+	/* Do the HMAC */
+	hmac.len = GSS_KRB5_MAX_CKSUM_LEN;
+	hmac.data = buf->tail[0].iov_base + buf->tail[0].iov_len;
+
+	/*
+	 * When we are called, pages points to the real page cache
+	 * data -- which we can't go and encrypt!  buf->pages points
+	 * to scratch pages which we are going to send off to the
+	 * client/server.  Swap in the plaintext pages to calculate
+	 * the hmac.
+	 */
+	save_pages = buf->pages;
+	buf->pages = pages;
+
+	err = make_checksum_v2(kctx, NULL, 0, buf,
+			       offset + GSS_KRB5_TOK_HDR_LEN,
+			       cksumkey, usage, &hmac);
+	buf->pages = save_pages;
+	if (err)
+		return GSS_S_FAILURE;
+
+	nbytes = buf->len - offset - GSS_KRB5_TOK_HDR_LEN;
+	nblocks = (nbytes + blocksize - 1) / blocksize;
+	cbcbytes = 0;
+	if (nblocks > 2)
+		cbcbytes = (nblocks - 2) * blocksize;
+
+	memset(desc.iv, 0, sizeof(desc.iv));
+
+	if (cbcbytes) {
+		desc.pos = offset + GSS_KRB5_TOK_HDR_LEN;
+		desc.fragno = 0;
+		desc.fraglen = 0;
+		desc.pages = pages;
+		desc.outbuf = buf;
+		desc.desc.info = desc.iv;
+		desc.desc.flags = 0;
+		desc.desc.tfm = aux_cipher;
+
+		sg_init_table(desc.infrags, 4);
+		sg_init_table(desc.outfrags, 4);
+
+		err = xdr_process_buf(buf, offset + GSS_KRB5_TOK_HDR_LEN,
+				      cbcbytes, encryptor, &desc);
+		if (err)
+			goto out_err;
+	}
+
+	/* Make sure IV carries forward from any CBC results. */
+	err = gss_krb5_cts_crypt(cipher, buf,
+				 offset + GSS_KRB5_TOK_HDR_LEN + cbcbytes,
+				 desc.iv, pages, 1);
+	if (err) {
+		err = GSS_S_FAILURE;
+		goto out_err;
+	}
+
+	/* Now update buf to account for HMAC */
+	buf->tail[0].iov_len += kctx->gk5e->cksumlength;
+	buf->len += kctx->gk5e->cksumlength;
+
+out_err:
+	if (err)
+		err = GSS_S_FAILURE;
+	return err;
+}
+
+u32
+gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
+		     u32 *headskip, u32 *tailskip)
+{
+	struct xdr_buf subbuf;
+	u32 ret = 0;
+	u8 *cksum_key;
+	struct crypto_blkcipher *cipher, *aux_cipher;
+	struct xdr_netobj our_hmac_obj;
+	u8 our_hmac[GSS_KRB5_MAX_CKSUM_LEN];
+	u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN];
+	int nblocks, blocksize, cbcbytes;
+	struct decryptor_desc desc;
+	unsigned int usage;
+
+	if (kctx->initiate) {
+		cipher = kctx->acceptor_enc;
+		aux_cipher = kctx->acceptor_enc_aux;
+		cksum_key = kctx->acceptor_integ;
+		usage = KG_USAGE_ACCEPTOR_SEAL;
+	} else {
+		cipher = kctx->initiator_enc;
+		aux_cipher = kctx->initiator_enc_aux;
+		cksum_key = kctx->initiator_integ;
+		usage = KG_USAGE_INITIATOR_SEAL;
+	}
+	blocksize = crypto_blkcipher_blocksize(cipher);
+
+
+	/* create a segment skipping the header and leaving out the checksum */
+	xdr_buf_subsegment(buf, &subbuf, offset + GSS_KRB5_TOK_HDR_LEN,
+				    (buf->len - offset - GSS_KRB5_TOK_HDR_LEN -
+				     kctx->gk5e->cksumlength));
+
+	nblocks = (subbuf.len + blocksize - 1) / blocksize;
+
+	cbcbytes = 0;
+	if (nblocks > 2)
+		cbcbytes = (nblocks - 2) * blocksize;
+
+	memset(desc.iv, 0, sizeof(desc.iv));
+
+	if (cbcbytes) {
+		desc.fragno = 0;
+		desc.fraglen = 0;
+		desc.desc.info = desc.iv;
+		desc.desc.flags = 0;
+		desc.desc.tfm = aux_cipher;
+
+		sg_init_table(desc.frags, 4);
+
+		ret = xdr_process_buf(&subbuf, 0, cbcbytes, decryptor, &desc);
+		if (ret)
+			goto out_err;
+	}
+
+	/* Make sure IV carries forward from any CBC results. */
+	ret = gss_krb5_cts_crypt(cipher, &subbuf, cbcbytes, desc.iv, NULL, 0);
+	if (ret)
+		goto out_err;
+
+
+	/* Calculate our hmac over the plaintext data */
+	our_hmac_obj.len = sizeof(our_hmac);
+	our_hmac_obj.data = our_hmac;
+
+	ret = make_checksum_v2(kctx, NULL, 0, &subbuf, 0,
+			       cksum_key, usage, &our_hmac_obj);
+	if (ret)
+		goto out_err;
+
+	/* Get the packet's hmac value */
+	ret = read_bytes_from_xdr_buf(buf, buf->len - kctx->gk5e->cksumlength,
+				      pkt_hmac, kctx->gk5e->cksumlength);
+	if (ret)
+		goto out_err;
+
+	if (memcmp(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) {
+		ret = GSS_S_BAD_SIG;
+		goto out_err;
+	}
+	*headskip = kctx->gk5e->conflen;
+	*tailskip = kctx->gk5e->cksumlength;
+out_err:
+	if (ret && ret != GSS_S_BAD_SIG)
+		ret = GSS_S_FAILURE;
+	return ret;
+}
+
+/*
+ * Compute Kseq given the initial session key and the checksum.
+ * Set the key of the given cipher.
+ */
+int
+krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
+		       unsigned char *cksum)
+{
+	struct crypto_hash *hmac;
+	struct hash_desc desc;
+	struct scatterlist sg[1];
+	u8 Kseq[GSS_KRB5_MAX_KEYLEN];
+	u32 zeroconstant = 0;
+	int err;
+
+	dprintk("%s: entered\n", __func__);
+
+	hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(hmac)) {
+		dprintk("%s: error %ld, allocating hash '%s'\n",
+			__func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
+		return PTR_ERR(hmac);
+	}
+
+	desc.tfm = hmac;
+	desc.flags = 0;
+
+	err = crypto_hash_init(&desc);
+	if (err)
+		goto out_err;
+
+	/* Compute intermediate Kseq from session key */
+	err = crypto_hash_setkey(hmac, kctx->Ksess, kctx->gk5e->keylength);
+	if (err)
+		goto out_err;
+
+	sg_init_table(sg, 1);
+	sg_set_buf(sg, &zeroconstant, 4);
+
+	err = crypto_hash_digest(&desc, sg, 4, Kseq);
+	if (err)
+		goto out_err;
+
+	/* Compute final Kseq from the checksum and intermediate Kseq */
+	err = crypto_hash_setkey(hmac, Kseq, kctx->gk5e->keylength);
+	if (err)
+		goto out_err;
+
+	sg_set_buf(sg, cksum, 8);
+
+	err = crypto_hash_digest(&desc, sg, 8, Kseq);
+	if (err)
+		goto out_err;
+
+	err = crypto_blkcipher_setkey(cipher, Kseq, kctx->gk5e->keylength);
+	if (err)
+		goto out_err;
+
+	err = 0;
+
+out_err:
+	crypto_free_hash(hmac);
+	dprintk("%s: returning %d\n", __func__, err);
+	return err;
+}
+
+/*
+ * Compute Kcrypt given the initial session key and the plaintext seqnum.
+ * Set the key of cipher kctx->enc.
+ */
+int
+krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
+		       s32 seqnum)
+{
+	struct crypto_hash *hmac;
+	struct hash_desc desc;
+	struct scatterlist sg[1];
+	u8 Kcrypt[GSS_KRB5_MAX_KEYLEN];
+	u8 zeroconstant[4] = {0};
+	u8 seqnumarray[4];
+	int err, i;
+
+	dprintk("%s: entered, seqnum %u\n", __func__, seqnum);
+
+	hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(hmac)) {
+		dprintk("%s: error %ld, allocating hash '%s'\n",
+			__func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
+		return PTR_ERR(hmac);
+	}
+
+	desc.tfm = hmac;
+	desc.flags = 0;
+
+	err = crypto_hash_init(&desc);
+	if (err)
+		goto out_err;
+
+	/* Compute intermediate Kcrypt from session key */
+	for (i = 0; i < kctx->gk5e->keylength; i++)
+		Kcrypt[i] = kctx->Ksess[i] ^ 0xf0;
+
+	err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
+	if (err)
+		goto out_err;
+
+	sg_init_table(sg, 1);
+	sg_set_buf(sg, zeroconstant, 4);
+
+	err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
+	if (err)
+		goto out_err;
+
+	/* Compute final Kcrypt from the seqnum and intermediate Kcrypt */
+	err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
+	if (err)
+		goto out_err;
+
+	seqnumarray[0] = (unsigned char) ((seqnum >> 24) & 0xff);
+	seqnumarray[1] = (unsigned char) ((seqnum >> 16) & 0xff);
+	seqnumarray[2] = (unsigned char) ((seqnum >> 8) & 0xff);
+	seqnumarray[3] = (unsigned char) ((seqnum >> 0) & 0xff);
+
+	sg_set_buf(sg, seqnumarray, 4);
+
+	err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
+	if (err)
+		goto out_err;
+
+	err = crypto_blkcipher_setkey(cipher, Kcrypt, kctx->gk5e->keylength);
+	if (err)
+		goto out_err;
+
+	err = 0;
+
+out_err:
+	crypto_free_hash(hmac);
+	dprintk("%s: returning %d\n", __func__, err);
+	return err;
+}
+
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@ -0,0 +1,327 @@
+/*
+ * COPYRIGHT (c) 2008
+ * The Regents of the University of Michigan
+ * ALL RIGHTS RESERVED
+ *
+ * Permission is granted to use, copy, create derivative works
+ * and redistribute this software and such derivative works
+ * for any purpose, so long as the name of The University of
+ * Michigan is not used in any advertising or publicity
+ * pertaining to the use of distribution of this software
+ * without specific, written prior authorization.  If the
+ * above copyright notice or any other identification of the
+ * University of Michigan is included in any copy of any
+ * portion of this software, then the disclaimer below must
+ * also be included.
+ *
+ * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION
+ * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY
+ * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY OF
+ * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
+ * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+ * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE
+ * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR
+ * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN
+ * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGES.
+ */
+
+/*
+ * Copyright (C) 1998 by the FundsXpress, INC.
+ *
+ * All rights reserved.
+ *
+ * Export of this software from the United States of America may require
+ * a specific license from the United States Government.  It is the
+ * responsibility of any person or organization contemplating export to
+ * obtain such a license before exporting.
+ *
+ * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
+ * distribute this software and its documentation for any purpose and
+ * without fee is hereby granted, provided that the above copyright
+ * notice appear in all copies and that both that copyright notice and
+ * this permission notice appear in supporting documentation, and that
+ * the name of FundsXpress. not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.  FundsXpress makes no representations about the suitability of
+ * this software for any purpose.  It is provided "as is" without express
+ * or implied warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <linux/err.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/sunrpc/gss_krb5.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/lcm.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+/*
+ * This is the n-fold function as described in rfc3961, sec 5.1
+ * Taken from MIT Kerberos and modified.
+ */
+
+static void krb5_nfold(u32 inbits, const u8 *in,
+		       u32 outbits, u8 *out)
+{
+	unsigned long ulcm;
+	int byte, i, msbit;
+
+	/* the code below is more readable if I make these bytes
+	   instead of bits */
+
+	inbits >>= 3;
+	outbits >>= 3;
+
+	/* first compute lcm(n,k) */
+	ulcm = lcm(inbits, outbits);
+
+	/* now do the real work */
+
+	memset(out, 0, outbits);
+	byte = 0;
+
+	/* this will end up cycling through k lcm(k,n)/k times, which
+	   is correct */
+	for (i = ulcm-1; i >= 0; i--) {
+		/* compute the msbit in k which gets added into this byte */
+		msbit = (
+			/* first, start with the msbit in the first,
+			 * unrotated byte */
+			 ((inbits << 3) - 1)
+			 /* then, for each byte, shift to the right
+			  * for each repetition */
+			 + (((inbits << 3) + 13) * (i/inbits))
+			 /* last, pick out the correct byte within
+			  * that shifted repetition */
+			 + ((inbits - (i % inbits)) << 3)
+			 ) % (inbits << 3);
+
+		/* pull out the byte value itself */
+		byte += (((in[((inbits - 1) - (msbit >> 3)) % inbits] << 8)|
+				  (in[((inbits) - (msbit >> 3)) % inbits]))
+				 >> ((msbit & 7) + 1)) & 0xff;
+
+		/* do the addition */
+		byte += out[i % outbits];
+		out[i % outbits] = byte & 0xff;
+
+		/* keep around the carry bit, if any */
+		byte >>= 8;
+
+	}
+
+	/* if there's a carry bit left over, add it back in */
+	if (byte) {
+		for (i = outbits - 1; i >= 0; i--) {
+			/* do the addition */
+			byte += out[i];
+			out[i] = byte & 0xff;
+
+			/* keep around the carry bit, if any */
+			byte >>= 8;
+		}
+	}
+}
+
+/*
+ * This is the DK (derive_key) function as described in rfc3961, sec 5.1
+ * Taken from MIT Kerberos and modified.
+ */
+
+u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
+		    const struct xdr_netobj *inkey,
+		    struct xdr_netobj *outkey,
+		    const struct xdr_netobj *in_constant,
+		    gfp_t gfp_mask)
+{
+	size_t blocksize, keybytes, keylength, n;
+	unsigned char *inblockdata, *outblockdata, *rawkey;
+	struct xdr_netobj inblock, outblock;
+	struct crypto_blkcipher *cipher;
+	u32 ret = EINVAL;
+
+	blocksize = gk5e->blocksize;
+	keybytes = gk5e->keybytes;
+	keylength = gk5e->keylength;
+
+	if ((inkey->len != keylength) || (outkey->len != keylength))
+		goto err_return;
+
+	cipher = crypto_alloc_blkcipher(gk5e->encrypt_name, 0,
+					CRYPTO_ALG_ASYNC);
+	if (IS_ERR(cipher))
+		goto err_return;
+	if (crypto_blkcipher_setkey(cipher, inkey->data, inkey->len))
+		goto err_return;
+
+	/* allocate and set up buffers */
+
+	ret = ENOMEM;
+	inblockdata = kmalloc(blocksize, gfp_mask);
+	if (inblockdata == NULL)
+		goto err_free_cipher;
+
+	outblockdata = kmalloc(blocksize, gfp_mask);
+	if (outblockdata == NULL)
+		goto err_free_in;
+
+	rawkey = kmalloc(keybytes, gfp_mask);
+	if (rawkey == NULL)
+		goto err_free_out;
+
+	inblock.data = (char *) inblockdata;
+	inblock.len = blocksize;
+
+	outblock.data = (char *) outblockdata;
+	outblock.len = blocksize;
+
+	/* initialize the input block */
+
+	if (in_constant->len == inblock.len) {
+		memcpy(inblock.data, in_constant->data, inblock.len);
+	} else {
+		krb5_nfold(in_constant->len * 8, in_constant->data,
+			   inblock.len * 8, inblock.data);
+	}
+
+	/* loop encrypting the blocks until enough key bytes are generated */
+
+	n = 0;
+	while (n < keybytes) {
+		(*(gk5e->encrypt))(cipher, NULL, inblock.data,
+				   outblock.data, inblock.len);
+
+		if ((keybytes - n) <= outblock.len) {
+			memcpy(rawkey + n, outblock.data, (keybytes - n));
+			break;
+		}
+
+		memcpy(rawkey + n, outblock.data, outblock.len);
+		memcpy(inblock.data, outblock.data, outblock.len);
+		n += outblock.len;
+	}
+
+	/* postprocess the key */
+
+	inblock.data = (char *) rawkey;
+	inblock.len = keybytes;
+
+	BUG_ON(gk5e->mk_key == NULL);
+	ret = (*(gk5e->mk_key))(gk5e, &inblock, outkey);
+	if (ret) {
+		dprintk("%s: got %d from mk_key function for '%s'\n",
+			__func__, ret, gk5e->encrypt_name);
+		goto err_free_raw;
+	}
+
+	/* clean memory, free resources and exit */
+
+	ret = 0;
+
+err_free_raw:
+	memset(rawkey, 0, keybytes);
+	kfree(rawkey);
+err_free_out:
+	memset(outblockdata, 0, blocksize);
+	kfree(outblockdata);
+err_free_in:
+	memset(inblockdata, 0, blocksize);
+	kfree(inblockdata);
+err_free_cipher:
+	crypto_free_blkcipher(cipher);
+err_return:
+	return ret;
+}
+
+#define smask(step) ((1<<step)-1)
+#define pstep(x, step) (((x)&smask(step))^(((x)>>step)&smask(step)))
+#define parity_char(x) pstep(pstep(pstep((x), 4), 2), 1)
+
+static void mit_des_fixup_key_parity(u8 key[8])
+{
+	int i;
+	for (i = 0; i < 8; i++) {
+		key[i] &= 0xfe;
+		key[i] |= 1^parity_char(key[i]);
+	}
+}
+
+/*
+ * This is the des3 key derivation postprocess function
+ */
+u32 gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e,
+			   struct xdr_netobj *randombits,
+			   struct xdr_netobj *key)
+{
+	int i;
+	u32 ret = EINVAL;
+
+	if (key->len != 24) {
+		dprintk("%s: key->len is %d\n", __func__, key->len);
+		goto err_out;
+	}
+	if (randombits->len != 21) {
+		dprintk("%s: randombits->len is %d\n",
+			__func__, randombits->len);
+		goto err_out;
+	}
+
+	/* take the seven bytes, move them around into the top 7 bits of the
+	   8 key bytes, then compute the parity bits.  Do this three times. */
+
+	for (i = 0; i < 3; i++) {
+		memcpy(key->data + i*8, randombits->data + i*7, 7);
+		key->data[i*8+7] = (((key->data[i*8]&1)<<1) |
+				    ((key->data[i*8+1]&1)<<2) |
+				    ((key->data[i*8+2]&1)<<3) |
+				    ((key->data[i*8+3]&1)<<4) |
+				    ((key->data[i*8+4]&1)<<5) |
+				    ((key->data[i*8+5]&1)<<6) |
+				    ((key->data[i*8+6]&1)<<7));
+
+		mit_des_fixup_key_parity(key->data + i*8);
+	}
+	ret = 0;
+err_out:
+	return ret;
+}
+
+/*
+ * This is the aes key derivation postprocess function
+ */
+u32 gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e,
+			  struct xdr_netobj *randombits,
+			  struct xdr_netobj *key)
+{
+	u32 ret = EINVAL;
+
+	if (key->len != 16 && key->len != 32) {
+		dprintk("%s: key->len is %d\n", __func__, key->len);
+		goto err_out;
+	}
+	if (randombits->len != 16 && randombits->len != 32) {
+		dprintk("%s: randombits->len is %d\n",
+			__func__, randombits->len);
+		goto err_out;
+	}
+	if (randombits->len != key->len) {
+		dprintk("%s: randombits->len is %d, key->len is %d\n",
+			__func__, randombits->len, key->len);
+		goto err_out;
+	}
+	memcpy(key->data, randombits->data, key->len);
+	ret = 0;
+err_out:
+	return ret;
+}
+
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@ -0,0 +1,788 @@
+/*
+ *  linux/net/sunrpc/gss_krb5_mech.c
+ *
+ *  Copyright (c) 2001-2008 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson <andros@umich.edu>
+ *  J. Bruce Fields <bfields@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/sunrpc/auth.h>
+#include <linux/sunrpc/gss_krb5.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/crypto.h>
+#include <linux/sunrpc/gss_krb5_enctypes.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_AUTH
+#endif
+
+static struct gss_api_mech gss_kerberos_mech;	/* forward declaration */
+
+static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
+	/*
+	 * DES (All DES enctypes are mapped to the same gss functionality)
+	 */
+	{
+	  .etype = ENCTYPE_DES_CBC_RAW,
+	  .ctype = CKSUMTYPE_RSA_MD5,
+	  .name = "des-cbc-crc",
+	  .encrypt_name = "cbc(des)",
+	  .cksum_name = "md5",
+	  .encrypt = krb5_encrypt,
+	  .decrypt = krb5_decrypt,
+	  .mk_key = NULL,
+	  .signalg = SGN_ALG_DES_MAC_MD5,
+	  .sealalg = SEAL_ALG_DES,
+	  .keybytes = 7,
+	  .keylength = 8,
+	  .blocksize = 8,
+	  .conflen = 8,
+	  .cksumlength = 8,
+	  .keyed_cksum = 0,
+	},
+	/*
+	 * RC4-HMAC
+	 */
+	{
+	  .etype = ENCTYPE_ARCFOUR_HMAC,
+	  .ctype = CKSUMTYPE_HMAC_MD5_ARCFOUR,
+	  .name = "rc4-hmac",
+	  .encrypt_name = "ecb(arc4)",
+	  .cksum_name = "hmac(md5)",
+	  .encrypt = krb5_encrypt,
+	  .decrypt = krb5_decrypt,
+	  .mk_key = NULL,
+	  .signalg = SGN_ALG_HMAC_MD5,
+	  .sealalg = SEAL_ALG_MICROSOFT_RC4,
+	  .keybytes = 16,
+	  .keylength = 16,
+	  .blocksize = 1,
+	  .conflen = 8,
+	  .cksumlength = 8,
+	  .keyed_cksum = 1,
+	},
+	/*
+	 * 3DES
+	 */
+	{
+	  .etype = ENCTYPE_DES3_CBC_RAW,
+	  .ctype = CKSUMTYPE_HMAC_SHA1_DES3,
+	  .name = "des3-hmac-sha1",
+	  .encrypt_name = "cbc(des3_ede)",
+	  .cksum_name = "hmac(sha1)",
+	  .encrypt = krb5_encrypt,
+	  .decrypt = krb5_decrypt,
+	  .mk_key = gss_krb5_des3_make_key,
+	  .signalg = SGN_ALG_HMAC_SHA1_DES3_KD,
+	  .sealalg = SEAL_ALG_DES3KD,
+	  .keybytes = 21,
+	  .keylength = 24,
+	  .blocksize = 8,
+	  .conflen = 8,
+	  .cksumlength = 20,
+	  .keyed_cksum = 1,
+	},
+	/*
+	 * AES128
+	 */
+	{
+	  .etype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
+	  .ctype = CKSUMTYPE_HMAC_SHA1_96_AES128,
+	  .name = "aes128-cts",
+	  .encrypt_name = "cts(cbc(aes))",
+	  .cksum_name = "hmac(sha1)",
+	  .encrypt = krb5_encrypt,
+	  .decrypt = krb5_decrypt,
+	  .mk_key = gss_krb5_aes_make_key,
+	  .encrypt_v2 = gss_krb5_aes_encrypt,
+	  .decrypt_v2 = gss_krb5_aes_decrypt,
+	  .signalg = -1,
+	  .sealalg = -1,
+	  .keybytes = 16,
+	  .keylength = 16,
+	  .blocksize = 16,
+	  .conflen = 16,
+	  .cksumlength = 12,
+	  .keyed_cksum = 1,
+	},
+	/*
+	 * AES256
+	 */
+	{
+	  .etype = ENCTYPE_AES256_CTS_HMAC_SHA1_96,
+	  .ctype = CKSUMTYPE_HMAC_SHA1_96_AES256,
+	  .name = "aes256-cts",
+	  .encrypt_name = "cts(cbc(aes))",
+	  .cksum_name = "hmac(sha1)",
+	  .encrypt = krb5_encrypt,
+	  .decrypt = krb5_decrypt,
+	  .mk_key = gss_krb5_aes_make_key,
+	  .encrypt_v2 = gss_krb5_aes_encrypt,
+	  .decrypt_v2 = gss_krb5_aes_decrypt,
+	  .signalg = -1,
+	  .sealalg = -1,
+	  .keybytes = 32,
+	  .keylength = 32,
+	  .blocksize = 16,
+	  .conflen = 16,
+	  .cksumlength = 12,
+	  .keyed_cksum = 1,
+	},
+};
+
+static const int num_supported_enctypes =
+	ARRAY_SIZE(supported_gss_krb5_enctypes);
+
+static int
+supported_gss_krb5_enctype(int etype)
+{
+	int i;
+	for (i = 0; i < num_supported_enctypes; i++)
+		if (supported_gss_krb5_enctypes[i].etype == etype)
+			return 1;
+	return 0;
+}
+
+static const struct gss_krb5_enctype *
+get_gss_krb5_enctype(int etype)
+{
+	int i;
+	for (i = 0; i < num_supported_enctypes; i++)
+		if (supported_gss_krb5_enctypes[i].etype == etype)
+			return &supported_gss_krb5_enctypes[i];
+	return NULL;
+}
+
+static const void *
+simple_get_bytes(const void *p, const void *end, void *res, int len)
+{
+	const void *q = (const void *)((const char *)p + len);
+	if (unlikely(q > end || q < p))
+		return ERR_PTR(-EFAULT);
+	memcpy(res, p, len);
+	return q;
+}
+
+static const void *
+simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
+{
+	const void *q;
+	unsigned int len;
+
+	p = simple_get_bytes(p, end, &len, sizeof(len));
+	if (IS_ERR(p))
+		return p;
+	q = (const void *)((const char *)p + len);
+	if (unlikely(q > end || q < p))
+		return ERR_PTR(-EFAULT);
+	res->data = kmemdup(p, len, GFP_NOFS);
+	if (unlikely(res->data == NULL))
+		return ERR_PTR(-ENOMEM);
+	res->len = len;
+	return q;
+}
+
+static inline const void *
+get_key(const void *p, const void *end,
+	struct krb5_ctx *ctx, struct crypto_blkcipher **res)
+{
+	struct xdr_netobj	key;
+	int			alg;
+
+	p = simple_get_bytes(p, end, &alg, sizeof(alg));
+	if (IS_ERR(p))
+		goto out_err;
+
+	switch (alg) {
+	case ENCTYPE_DES_CBC_CRC:
+	case ENCTYPE_DES_CBC_MD4:
+	case ENCTYPE_DES_CBC_MD5:
+		/* Map all these key types to ENCTYPE_DES_CBC_RAW */
+		alg = ENCTYPE_DES_CBC_RAW;
+		break;
+	}
+
+	if (!supported_gss_krb5_enctype(alg)) {
+		printk(KERN_WARNING "gss_kerberos_mech: unsupported "
+			"encryption key algorithm %d\n", alg);
+		p = ERR_PTR(-EINVAL);
+		goto out_err;
+	}
+	p = simple_get_netobj(p, end, &key);
+	if (IS_ERR(p))
+		goto out_err;
+
+	*res = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
+							CRYPTO_ALG_ASYNC);
+	if (IS_ERR(*res)) {
+		printk(KERN_WARNING "gss_kerberos_mech: unable to initialize "
+			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
+		*res = NULL;
+		goto out_err_free_key;
+	}
+	if (crypto_blkcipher_setkey(*res, key.data, key.len)) {
+		printk(KERN_WARNING "gss_kerberos_mech: error setting key for "
+			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
+		goto out_err_free_tfm;
+	}
+
+	kfree(key.data);
+	return p;
+
+out_err_free_tfm:
+	crypto_free_blkcipher(*res);
+out_err_free_key:
+	kfree(key.data);
+	p = ERR_PTR(-EINVAL);
+out_err:
+	return p;
+}
+
+static int
+gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
+{
+	int tmp;
+
+	p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
+	if (IS_ERR(p))
+		goto out_err;
+
+	/* Old format supports only DES!  Any other enctype uses new format */
+	ctx->enctype = ENCTYPE_DES_CBC_RAW;
+
+	ctx->gk5e = get_gss_krb5_enctype(ctx->enctype);
+	if (ctx->gk5e == NULL) {
+		p = ERR_PTR(-EINVAL);
+		goto out_err;
+	}
+
+	/* The downcall format was designed before we completely understood
+	 * the uses of the context fields; so it includes some stuff we
+	 * just give some minimal sanity-checking, and some we ignore
+	 * completely (like the next twenty bytes): */
+	if (unlikely(p + 20 > end || p + 20 < p)) {
+		p = ERR_PTR(-EFAULT);
+		goto out_err;
+	}
+	p += 20;
+	p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
+	if (IS_ERR(p))
+		goto out_err;
+	if (tmp != SGN_ALG_DES_MAC_MD5) {
+		p = ERR_PTR(-ENOSYS);
+		goto out_err;
+	}
+	p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
+	if (IS_ERR(p))
+		goto out_err;
+	if (tmp != SEAL_ALG_DES) {
+		p = ERR_PTR(-ENOSYS);
+		goto out_err;
+	}
+	p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
+	if (IS_ERR(p))
+		goto out_err;
+	p = simple_get_bytes(p, end, &ctx->seq_send, sizeof(ctx->seq_send));
+	if (IS_ERR(p))
+		goto out_err;
+	p = simple_get_netobj(p, end, &ctx->mech_used);
+	if (IS_ERR(p))
+		goto out_err;
+	p = get_key(p, end, ctx, &ctx->enc);
+	if (IS_ERR(p))
+		goto out_err_free_mech;
+	p = get_key(p, end, ctx, &ctx->seq);
+	if (IS_ERR(p))
+		goto out_err_free_key1;
+	if (p != end) {
+		p = ERR_PTR(-EFAULT);
+		goto out_err_free_key2;
+	}
+
+	return 0;
+
+out_err_free_key2:
+	crypto_free_blkcipher(ctx->seq);
+out_err_free_key1:
+	crypto_free_blkcipher(ctx->enc);
+out_err_free_mech:
+	kfree(ctx->mech_used.data);
+out_err:
+	return PTR_ERR(p);
+}
+
+static struct crypto_blkcipher *
+context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key)
+{
+	struct crypto_blkcipher *cp;
+
+	cp = crypto_alloc_blkcipher(cname, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(cp)) {
+		dprintk("gss_kerberos_mech: unable to initialize "
+			"crypto algorithm %s\n", cname);
+		return NULL;
+	}
+	if (crypto_blkcipher_setkey(cp, key, ctx->gk5e->keylength)) {
+		dprintk("gss_kerberos_mech: error setting key for "
+			"crypto algorithm %s\n", cname);
+		crypto_free_blkcipher(cp);
+		return NULL;
+	}
+	return cp;
+}
+
+static inline void
+set_cdata(u8 cdata[GSS_KRB5_K5CLENGTH], u32 usage, u8 seed)
+{
+	cdata[0] = (usage>>24)&0xff;
+	cdata[1] = (usage>>16)&0xff;
+	cdata[2] = (usage>>8)&0xff;
+	cdata[3] = usage&0xff;
+	cdata[4] = seed;
+}
+
+static int
+context_derive_keys_des3(struct krb5_ctx *ctx, gfp_t gfp_mask)
+{
+	struct xdr_netobj c, keyin, keyout;
+	u8 cdata[GSS_KRB5_K5CLENGTH];
+	u32 err;
+
+	c.len = GSS_KRB5_K5CLENGTH;
+	c.data = cdata;
+
+	keyin.data = ctx->Ksess;
+	keyin.len = ctx->gk5e->keylength;
+	keyout.len = ctx->gk5e->keylength;
+
+	/* seq uses the raw key */
+	ctx->seq = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
+					   ctx->Ksess);
+	if (ctx->seq == NULL)
+		goto out_err;
+
+	ctx->enc = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
+					   ctx->Ksess);
+	if (ctx->enc == NULL)
+		goto out_free_seq;
+
+	/* derive cksum */
+	set_cdata(cdata, KG_USAGE_SIGN, KEY_USAGE_SEED_CHECKSUM);
+	keyout.data = ctx->cksum;
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
+	if (err) {
+		dprintk("%s: Error %d deriving cksum key\n",
+			__func__, err);
+		goto out_free_enc;
+	}
+
+	return 0;
+
+out_free_enc:
+	crypto_free_blkcipher(ctx->enc);
+out_free_seq:
+	crypto_free_blkcipher(ctx->seq);
+out_err:
+	return -EINVAL;
+}
+
+/*
+ * Note that RC4 depends on deriving keys using the sequence
+ * number or the checksum of a token.  Therefore, the final keys
+ * cannot be calculated until the token is being constructed!
+ */
+static int
+context_derive_keys_rc4(struct krb5_ctx *ctx)
+{
+	struct crypto_hash *hmac;
+	char sigkeyconstant[] = "signaturekey";
+	int slen = strlen(sigkeyconstant) + 1;	/* include null terminator */
+	struct hash_desc desc;
+	struct scatterlist sg[1];
+	int err;
+
+	dprintk("RPC:       %s: entered\n", __func__);
+	/*
+	 * derive cksum (aka Ksign) key
+	 */
+	hmac = crypto_alloc_hash(ctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(hmac)) {
+		dprintk("%s: error %ld allocating hash '%s'\n",
+			__func__, PTR_ERR(hmac), ctx->gk5e->cksum_name);
+		err = PTR_ERR(hmac);
+		goto out_err;
+	}
+
+	err = crypto_hash_setkey(hmac, ctx->Ksess, ctx->gk5e->keylength);
+	if (err)
+		goto out_err_free_hmac;
+
+	sg_init_table(sg, 1);
+	sg_set_buf(sg, sigkeyconstant, slen);
+
+	desc.tfm = hmac;
+	desc.flags = 0;
+
+	err = crypto_hash_init(&desc);
+	if (err)
+		goto out_err_free_hmac;
+
+	err = crypto_hash_digest(&desc, sg, slen, ctx->cksum);
+	if (err)
+		goto out_err_free_hmac;
+	/*
+	 * allocate hash, and blkciphers for data and seqnum encryption
+	 */
+	ctx->enc = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
+					  CRYPTO_ALG_ASYNC);
+	if (IS_ERR(ctx->enc)) {
+		err = PTR_ERR(ctx->enc);
+		goto out_err_free_hmac;
+	}
+
+	ctx->seq = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
+					  CRYPTO_ALG_ASYNC);
+	if (IS_ERR(ctx->seq)) {
+		crypto_free_blkcipher(ctx->enc);
+		err = PTR_ERR(ctx->seq);
+		goto out_err_free_hmac;
+	}
+
+	dprintk("RPC:       %s: returning success\n", __func__);
+
+	err = 0;
+
+out_err_free_hmac:
+	crypto_free_hash(hmac);
+out_err:
+	dprintk("RPC:       %s: returning %d\n", __func__, err);
+	return err;
+}
+
+static int
+context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
+{
+	struct xdr_netobj c, keyin, keyout;
+	u8 cdata[GSS_KRB5_K5CLENGTH];
+	u32 err;
+
+	c.len = GSS_KRB5_K5CLENGTH;
+	c.data = cdata;
+
+	keyin.data = ctx->Ksess;
+	keyin.len = ctx->gk5e->keylength;
+	keyout.len = ctx->gk5e->keylength;
+
+	/* initiator seal encryption */
+	set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
+	keyout.data = ctx->initiator_seal;
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
+	if (err) {
+		dprintk("%s: Error %d deriving initiator_seal key\n",
+			__func__, err);
+		goto out_err;
+	}
+	ctx->initiator_enc = context_v2_alloc_cipher(ctx,
+						     ctx->gk5e->encrypt_name,
+						     ctx->initiator_seal);
+	if (ctx->initiator_enc == NULL)
+		goto out_err;
+
+	/* acceptor seal encryption */
+	set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
+	keyout.data = ctx->acceptor_seal;
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
+	if (err) {
+		dprintk("%s: Error %d deriving acceptor_seal key\n",
+			__func__, err);
+		goto out_free_initiator_enc;
+	}
+	ctx->acceptor_enc = context_v2_alloc_cipher(ctx,
+						    ctx->gk5e->encrypt_name,
+						    ctx->acceptor_seal);
+	if (ctx->acceptor_enc == NULL)
+		goto out_free_initiator_enc;
+
+	/* initiator sign checksum */
+	set_cdata(cdata, KG_USAGE_INITIATOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
+	keyout.data = ctx->initiator_sign;
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
+	if (err) {
+		dprintk("%s: Error %d deriving initiator_sign key\n",
+			__func__, err);
+		goto out_free_acceptor_enc;
+	}
+
+	/* acceptor sign checksum */
+	set_cdata(cdata, KG_USAGE_ACCEPTOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
+	keyout.data = ctx->acceptor_sign;
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
+	if (err) {
+		dprintk("%s: Error %d deriving acceptor_sign key\n",
+			__func__, err);
+		goto out_free_acceptor_enc;
+	}
+
+	/* initiator seal integrity */
+	set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
+	keyout.data = ctx->initiator_integ;
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
+	if (err) {
+		dprintk("%s: Error %d deriving initiator_integ key\n",
+			__func__, err);
+		goto out_free_acceptor_enc;
+	}
+
+	/* acceptor seal integrity */
+	set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
+	keyout.data = ctx->acceptor_integ;
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
+	if (err) {
+		dprintk("%s: Error %d deriving acceptor_integ key\n",
+			__func__, err);
+		goto out_free_acceptor_enc;
+	}
+
+	switch (ctx->enctype) {
+	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
+	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
+		ctx->initiator_enc_aux =
+			context_v2_alloc_cipher(ctx, "cbc(aes)",
+						ctx->initiator_seal);
+		if (ctx->initiator_enc_aux == NULL)
+			goto out_free_acceptor_enc;
+		ctx->acceptor_enc_aux =
+			context_v2_alloc_cipher(ctx, "cbc(aes)",
+						ctx->acceptor_seal);
+		if (ctx->acceptor_enc_aux == NULL) {
+			crypto_free_blkcipher(ctx->initiator_enc_aux);
+			goto out_free_acceptor_enc;
+		}
+	}
+
+	return 0;
+
+out_free_acceptor_enc:
+	crypto_free_blkcipher(ctx->acceptor_enc);
+out_free_initiator_enc:
+	crypto_free_blkcipher(ctx->initiator_enc);
+out_err:
+	return -EINVAL;
+}
+
+static int
+gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
+		gfp_t gfp_mask)
+{
+	int keylen;
+
+	p = simple_get_bytes(p, end, &ctx->flags, sizeof(ctx->flags));
+	if (IS_ERR(p))
+		goto out_err;
+	ctx->initiate = ctx->flags & KRB5_CTX_FLAG_INITIATOR;
+
+	p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
+	if (IS_ERR(p))
+		goto out_err;
+	p = simple_get_bytes(p, end, &ctx->seq_send64, sizeof(ctx->seq_send64));
+	if (IS_ERR(p))
+		goto out_err;
+	/* set seq_send for use by "older" enctypes */
+	ctx->seq_send = ctx->seq_send64;
+	if (ctx->seq_send64 != ctx->seq_send) {
+		dprintk("%s: seq_send64 %lx, seq_send %x overflow?\n", __func__,
+			(unsigned long)ctx->seq_send64, ctx->seq_send);
+		p = ERR_PTR(-EINVAL);
+		goto out_err;
+	}
+	p = simple_get_bytes(p, end, &ctx->enctype, sizeof(ctx->enctype));
+	if (IS_ERR(p))
+		goto out_err;
+	/* Map ENCTYPE_DES3_CBC_SHA1 to ENCTYPE_DES3_CBC_RAW */
+	if (ctx->enctype == ENCTYPE_DES3_CBC_SHA1)
+		ctx->enctype = ENCTYPE_DES3_CBC_RAW;
+	ctx->gk5e = get_gss_krb5_enctype(ctx->enctype);
+	if (ctx->gk5e == NULL) {
+		dprintk("gss_kerberos_mech: unsupported krb5 enctype %u\n",
+			ctx->enctype);
+		p = ERR_PTR(-EINVAL);
+		goto out_err;
+	}
+	keylen = ctx->gk5e->keylength;
+
+	p = simple_get_bytes(p, end, ctx->Ksess, keylen);
+	if (IS_ERR(p))
+		goto out_err;
+
+	if (p != end) {
+		p = ERR_PTR(-EINVAL);
+		goto out_err;
+	}
+
+	ctx->mech_used.data = kmemdup(gss_kerberos_mech.gm_oid.data,
+				      gss_kerberos_mech.gm_oid.len, gfp_mask);
+	if (unlikely(ctx->mech_used.data == NULL)) {
+		p = ERR_PTR(-ENOMEM);
+		goto out_err;
+	}
+	ctx->mech_used.len = gss_kerberos_mech.gm_oid.len;
+
+	switch (ctx->enctype) {
+	case ENCTYPE_DES3_CBC_RAW:
+		return context_derive_keys_des3(ctx, gfp_mask);
+	case ENCTYPE_ARCFOUR_HMAC:
+		return context_derive_keys_rc4(ctx);
+	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
+	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
+		return context_derive_keys_new(ctx, gfp_mask);
+	default:
+		return -EINVAL;
+	}
+
+out_err:
+	return PTR_ERR(p);
+}
+
+static int
+gss_import_sec_context_kerberos(const void *p, size_t len,
+				struct gss_ctx *ctx_id,
+				time_t *endtime,
+				gfp_t gfp_mask)
+{
+	const void *end = (const void *)((const char *)p + len);
+	struct  krb5_ctx *ctx;
+	int ret;
+
+	ctx = kzalloc(sizeof(*ctx), gfp_mask);
+	if (ctx == NULL)
+		return -ENOMEM;
+
+	if (len == 85)
+		ret = gss_import_v1_context(p, end, ctx);
+	else
+		ret = gss_import_v2_context(p, end, ctx, gfp_mask);
+
+	if (ret == 0) {
+		ctx_id->internal_ctx_id = ctx;
+		if (endtime)
+			*endtime = ctx->endtime;
+	} else
+		kfree(ctx);
+
+	dprintk("RPC:       %s: returning %d\n", __func__, ret);
+	return ret;
+}
+
+static void
+gss_delete_sec_context_kerberos(void *internal_ctx) {
+	struct krb5_ctx *kctx = internal_ctx;
+
+	crypto_free_blkcipher(kctx->seq);
+	crypto_free_blkcipher(kctx->enc);
+	crypto_free_blkcipher(kctx->acceptor_enc);
+	crypto_free_blkcipher(kctx->initiator_enc);
+	crypto_free_blkcipher(kctx->acceptor_enc_aux);
+	crypto_free_blkcipher(kctx->initiator_enc_aux);
+	kfree(kctx->mech_used.data);
+	kfree(kctx);
+}
+
+static const struct gss_api_ops gss_kerberos_ops = {
+	.gss_import_sec_context	= gss_import_sec_context_kerberos,
+	.gss_get_mic		= gss_get_mic_kerberos,
+	.gss_verify_mic		= gss_verify_mic_kerberos,
+	.gss_wrap		= gss_wrap_kerberos,
+	.gss_unwrap		= gss_unwrap_kerberos,
+	.gss_delete_sec_context	= gss_delete_sec_context_kerberos,
+};
+
+static struct pf_desc gss_kerberos_pfs[] = {
+	[0] = {
+		.pseudoflavor = RPC_AUTH_GSS_KRB5,
+		.qop = GSS_C_QOP_DEFAULT,
+		.service = RPC_GSS_SVC_NONE,
+		.name = "krb5",
+	},
+	[1] = {
+		.pseudoflavor = RPC_AUTH_GSS_KRB5I,
+		.qop = GSS_C_QOP_DEFAULT,
+		.service = RPC_GSS_SVC_INTEGRITY,
+		.name = "krb5i",
+	},
+	[2] = {
+		.pseudoflavor = RPC_AUTH_GSS_KRB5P,
+		.qop = GSS_C_QOP_DEFAULT,
+		.service = RPC_GSS_SVC_PRIVACY,
+		.name = "krb5p",
+	},
+};
+
+MODULE_ALIAS("rpc-auth-gss-krb5");
+MODULE_ALIAS("rpc-auth-gss-krb5i");
+MODULE_ALIAS("rpc-auth-gss-krb5p");
+MODULE_ALIAS("rpc-auth-gss-390003");
+MODULE_ALIAS("rpc-auth-gss-390004");
+MODULE_ALIAS("rpc-auth-gss-390005");
+MODULE_ALIAS("rpc-auth-gss-1.2.840.113554.1.2.2");
+
+static struct gss_api_mech gss_kerberos_mech = {
+	.gm_name	= "krb5",
+	.gm_owner	= THIS_MODULE,
+	.gm_oid		= { 9, "\x2a\x86\x48\x86\xf7\x12\x01\x02\x02" },
+	.gm_ops		= &gss_kerberos_ops,
+	.gm_pf_num	= ARRAY_SIZE(gss_kerberos_pfs),
+	.gm_pfs		= gss_kerberos_pfs,
+	.gm_upcall_enctypes = KRB5_SUPPORTED_ENCTYPES,
+};
+
+static int __init init_kerberos_module(void)
+{
+	int status;
+
+	status = gss_mech_register(&gss_kerberos_mech);
+	if (status)
+		printk("Failed to register kerberos gss mechanism!\n");
+	return status;
+}
+
+static void __exit cleanup_kerberos_module(void)
+{
+	gss_mech_unregister(&gss_kerberos_mech);
+}
+
+MODULE_LICENSE("GPL");
+module_init(init_kerberos_module);
+module_exit(cleanup_kerberos_module);
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@ -0,0 +1,229 @@
+/*
+ *  linux/net/sunrpc/gss_krb5_seal.c
+ *
+ *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5seal.c
+ *
+ *  Copyright (c) 2000-2008 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson	<andros@umich.edu>
+ *  J. Bruce Fields	<bfields@umich.edu>
+ */
+
+/*
+ * Copyright 1993 by OpenVision Technologies, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appears in all copies and
+ * that both that copyright notice and this permission notice appear in
+ * supporting documentation, and that the name of OpenVision not be used
+ * in advertising or publicity pertaining to distribution of the software
+ * without specific, written prior permission. OpenVision makes no
+ * representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied warranty.
+ *
+ * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+ * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Copyright (C) 1998 by the FundsXpress, INC.
+ *
+ * All rights reserved.
+ *
+ * Export of this software from the United States of America may require
+ * a specific license from the United States Government.  It is the
+ * responsibility of any person or organization contemplating export to
+ * obtain such a license before exporting.
+ *
+ * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
+ * distribute this software and its documentation for any purpose and
+ * without fee is hereby granted, provided that the above copyright
+ * notice appear in all copies and that both that copyright notice and
+ * this permission notice appear in supporting documentation, and that
+ * the name of FundsXpress. not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.  FundsXpress makes no representations about the suitability of
+ * this software for any purpose.  It is provided "as is" without express
+ * or implied warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <linux/types.h>
+#include <linux/jiffies.h>
+#include <linux/sunrpc/gss_krb5.h>
+#include <linux/random.h>
+#include <linux/crypto.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+DEFINE_SPINLOCK(krb5_seq_lock);
+
+static void *
+setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
+{
+	u16 *ptr;
+	void *krb5_hdr;
+	int body_size = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
+
+	token->len = g_token_size(&ctx->mech_used, body_size);
+
+	ptr = (u16 *)token->data;
+	g_make_token_header(&ctx->mech_used, body_size, (unsigned char **)&ptr);
+
+	/* ptr now at start of header described in rfc 1964, section 1.2.1: */
+	krb5_hdr = ptr;
+	*ptr++ = KG_TOK_MIC_MSG;
+	/*
+	 * signalg is stored as if it were converted from LE to host endian, even
+	 * though it's an opaque pair of bytes according to the RFC.
+	 */
+	*ptr++ = (__force u16)cpu_to_le16(ctx->gk5e->signalg);
+	*ptr++ = SEAL_ALG_NONE;
+	*ptr = 0xffff;
+
+	return krb5_hdr;
+}
+
+static void *
+setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
+{
+	u16 *ptr;
+	void *krb5_hdr;
+	u8 *p, flags = 0x00;
+
+	if ((ctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0)
+		flags |= 0x01;
+	if (ctx->flags & KRB5_CTX_FLAG_ACCEPTOR_SUBKEY)
+		flags |= 0x04;
+
+	/* Per rfc 4121, sec 4.2.6.1, there is no header,
+	 * just start the token */
+	krb5_hdr = ptr = (u16 *)token->data;
+
+	*ptr++ = KG2_TOK_MIC;
+	p = (u8 *)ptr;
+	*p++ = flags;
+	*p++ = 0xff;
+	ptr = (u16 *)p;
+	*ptr++ = 0xffff;
+	*ptr = 0xffff;
+
+	token->len = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
+	return krb5_hdr;
+}
+
+static u32
+gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
+		struct xdr_netobj *token)
+{
+	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
+					    .data = cksumdata};
+	void			*ptr;
+	s32			now;
+	u32			seq_send;
+	u8			*cksumkey;
+
+	dprintk("RPC:       %s\n", __func__);
+	BUG_ON(ctx == NULL);
+
+	now = get_seconds();
+
+	ptr = setup_token(ctx, token);
+
+	if (ctx->gk5e->keyed_cksum)
+		cksumkey = ctx->cksum;
+	else
+		cksumkey = NULL;
+
+	if (make_checksum(ctx, ptr, 8, text, 0, cksumkey,
+			  KG_USAGE_SIGN, &md5cksum))
+		return GSS_S_FAILURE;
+
+	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
+
+	spin_lock(&krb5_seq_lock);
+	seq_send = ctx->seq_send++;
+	spin_unlock(&krb5_seq_lock);
+
+	if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff,
+			      seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))
+		return GSS_S_FAILURE;
+
+	return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
+}
+
+static u32
+gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
+		struct xdr_netobj *token)
+{
+	char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	struct xdr_netobj cksumobj = { .len = sizeof(cksumdata),
+				       .data = cksumdata};
+	void *krb5_hdr;
+	s32 now;
+	u64 seq_send;
+	u8 *cksumkey;
+	unsigned int cksum_usage;
+
+	dprintk("RPC:       %s\n", __func__);
+
+	krb5_hdr = setup_token_v2(ctx, token);
+
+	/* Set up the sequence number. Now 64-bits in clear
+	 * text and w/o direction indicator */
+	spin_lock(&krb5_seq_lock);
+	seq_send = ctx->seq_send64++;
+	spin_unlock(&krb5_seq_lock);
+	*((__be64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send);
+
+	if (ctx->initiate) {
+		cksumkey = ctx->initiator_sign;
+		cksum_usage = KG_USAGE_INITIATOR_SIGN;
+	} else {
+		cksumkey = ctx->acceptor_sign;
+		cksum_usage = KG_USAGE_ACCEPTOR_SIGN;
+	}
+
+	if (make_checksum_v2(ctx, krb5_hdr, GSS_KRB5_TOK_HDR_LEN,
+			     text, 0, cksumkey, cksum_usage, &cksumobj))
+		return GSS_S_FAILURE;
+
+	memcpy(krb5_hdr + GSS_KRB5_TOK_HDR_LEN, cksumobj.data, cksumobj.len);
+
+	now = get_seconds();
+
+	return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
+}
+
+u32
+gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
+		     struct xdr_netobj *token)
+{
+	struct krb5_ctx		*ctx = gss_ctx->internal_ctx_id;
+
+	switch (ctx->enctype) {
+	default:
+		BUG();
+	case ENCTYPE_DES_CBC_RAW:
+	case ENCTYPE_DES3_CBC_RAW:
+	case ENCTYPE_ARCFOUR_HMAC:
+		return gss_get_mic_v1(ctx, text, token);
+	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
+	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
+		return gss_get_mic_v2(ctx, text, token);
+	}
+}
+
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@ -0,0 +1,166 @@
+/*
+ *  linux/net/sunrpc/gss_krb5_seqnum.c
+ *
+ *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/util_seqnum.c
+ *
+ *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson   <andros@umich.edu>
+ */
+
+/*
+ * Copyright 1993 by OpenVision Technologies, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appears in all copies and
+ * that both that copyright notice and this permission notice appear in
+ * supporting documentation, and that the name of OpenVision not be used
+ * in advertising or publicity pertaining to distribution of the software
+ * without specific, written prior permission. OpenVision makes no
+ * representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied warranty.
+ *
+ * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+ * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <linux/types.h>
+#include <linux/sunrpc/gss_krb5.h>
+#include <linux/crypto.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+static s32
+krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
+		      unsigned char *cksum, unsigned char *buf)
+{
+	struct crypto_blkcipher *cipher;
+	unsigned char plain[8];
+	s32 code;
+
+	dprintk("RPC:       %s:\n", __func__);
+	cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
+					CRYPTO_ALG_ASYNC);
+	if (IS_ERR(cipher))
+		return PTR_ERR(cipher);
+
+	plain[0] = (unsigned char) ((seqnum >> 24) & 0xff);
+	plain[1] = (unsigned char) ((seqnum >> 16) & 0xff);
+	plain[2] = (unsigned char) ((seqnum >> 8) & 0xff);
+	plain[3] = (unsigned char) ((seqnum >> 0) & 0xff);
+	plain[4] = direction;
+	plain[5] = direction;
+	plain[6] = direction;
+	plain[7] = direction;
+
+	code = krb5_rc4_setup_seq_key(kctx, cipher, cksum);
+	if (code)
+		goto out;
+
+	code = krb5_encrypt(cipher, cksum, plain, buf, 8);
+out:
+	crypto_free_blkcipher(cipher);
+	return code;
+}
+s32
+krb5_make_seq_num(struct krb5_ctx *kctx,
+		struct crypto_blkcipher *key,
+		int direction,
+		u32 seqnum,
+		unsigned char *cksum, unsigned char *buf)
+{
+	unsigned char plain[8];
+
+	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
+		return krb5_make_rc4_seq_num(kctx, direction, seqnum,
+					     cksum, buf);
+
+	plain[0] = (unsigned char) (seqnum & 0xff);
+	plain[1] = (unsigned char) ((seqnum >> 8) & 0xff);
+	plain[2] = (unsigned char) ((seqnum >> 16) & 0xff);
+	plain[3] = (unsigned char) ((seqnum >> 24) & 0xff);
+
+	plain[4] = direction;
+	plain[5] = direction;
+	plain[6] = direction;
+	plain[7] = direction;
+
+	return krb5_encrypt(key, cksum, plain, buf, 8);
+}
+
+static s32
+krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
+		     unsigned char *buf, int *direction, s32 *seqnum)
+{
+	struct crypto_blkcipher *cipher;
+	unsigned char plain[8];
+	s32 code;
+
+	dprintk("RPC:       %s:\n", __func__);
+	cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
+					CRYPTO_ALG_ASYNC);
+	if (IS_ERR(cipher))
+		return PTR_ERR(cipher);
+
+	code = krb5_rc4_setup_seq_key(kctx, cipher, cksum);
+	if (code)
+		goto out;
+
+	code = krb5_decrypt(cipher, cksum, buf, plain, 8);
+	if (code)
+		goto out;
+
+	if ((plain[4] != plain[5]) || (plain[4] != plain[6])
+				   || (plain[4] != plain[7])) {
+		code = (s32)KG_BAD_SEQ;
+		goto out;
+	}
+
+	*direction = plain[4];
+
+	*seqnum = ((plain[0] << 24) | (plain[1] << 16) |
+					(plain[2] << 8) | (plain[3]));
+out:
+	crypto_free_blkcipher(cipher);
+	return code;
+}
+
+s32
+krb5_get_seq_num(struct krb5_ctx *kctx,
+	       unsigned char *cksum,
+	       unsigned char *buf,
+	       int *direction, u32 *seqnum)
+{
+	s32 code;
+	unsigned char plain[8];
+	struct crypto_blkcipher *key = kctx->seq;
+
+	dprintk("RPC:       krb5_get_seq_num:\n");
+
+	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
+		return krb5_get_rc4_seq_num(kctx, cksum, buf,
+					    direction, seqnum);
+
+	if ((code = krb5_decrypt(key, cksum, buf, plain, 8)))
+		return code;
+
+	if ((plain[4] != plain[5]) || (plain[4] != plain[6]) ||
+	    (plain[4] != plain[7]))
+		return (s32)KG_BAD_SEQ;
+
+	*direction = plain[4];
+
+	*seqnum = ((plain[0]) |
+		   (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24));
+
+	return 0;
+}
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@ -0,0 +1,226 @@
+/*
+ *  linux/net/sunrpc/gss_krb5_unseal.c
+ *
+ *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5unseal.c
+ *
+ *  Copyright (c) 2000-2008 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson   <andros@umich.edu>
+ */
+
+/*
+ * Copyright 1993 by OpenVision Technologies, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appears in all copies and
+ * that both that copyright notice and this permission notice appear in
+ * supporting documentation, and that the name of OpenVision not be used
+ * in advertising or publicity pertaining to distribution of the software
+ * without specific, written prior permission. OpenVision makes no
+ * representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied warranty.
+ *
+ * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+ * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Copyright (C) 1998 by the FundsXpress, INC.
+ *
+ * All rights reserved.
+ *
+ * Export of this software from the United States of America may require
+ * a specific license from the United States Government.  It is the
+ * responsibility of any person or organization contemplating export to
+ * obtain such a license before exporting.
+ *
+ * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
+ * distribute this software and its documentation for any purpose and
+ * without fee is hereby granted, provided that the above copyright
+ * notice appear in all copies and that both that copyright notice and
+ * this permission notice appear in supporting documentation, and that
+ * the name of FundsXpress. not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.  FundsXpress makes no representations about the suitability of
+ * this software for any purpose.  It is provided "as is" without express
+ * or implied warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <linux/types.h>
+#include <linux/jiffies.h>
+#include <linux/sunrpc/gss_krb5.h>
+#include <linux/crypto.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+
+/* read_token is a mic token, and message_buffer is the data that the mic was
+ * supposedly taken over. */
+
+static u32
+gss_verify_mic_v1(struct krb5_ctx *ctx,
+		struct xdr_buf *message_buffer, struct xdr_netobj *read_token)
+{
+	int			signalg;
+	int			sealalg;
+	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
+					    .data = cksumdata};
+	s32			now;
+	int			direction;
+	u32			seqnum;
+	unsigned char		*ptr = (unsigned char *)read_token->data;
+	int			bodysize;
+	u8			*cksumkey;
+
+	dprintk("RPC:       krb5_read_token\n");
+
+	if (g_verify_token_header(&ctx->mech_used, &bodysize, &ptr,
+					read_token->len))
+		return GSS_S_DEFECTIVE_TOKEN;
+
+	if ((ptr[0] != ((KG_TOK_MIC_MSG >> 8) & 0xff)) ||
+	    (ptr[1] !=  (KG_TOK_MIC_MSG & 0xff)))
+		return GSS_S_DEFECTIVE_TOKEN;
+
+	/* XXX sanity-check bodysize?? */
+
+	signalg = ptr[2] + (ptr[3] << 8);
+	if (signalg != ctx->gk5e->signalg)
+		return GSS_S_DEFECTIVE_TOKEN;
+
+	sealalg = ptr[4] + (ptr[5] << 8);
+	if (sealalg != SEAL_ALG_NONE)
+		return GSS_S_DEFECTIVE_TOKEN;
+
+	if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
+		return GSS_S_DEFECTIVE_TOKEN;
+
+	if (ctx->gk5e->keyed_cksum)
+		cksumkey = ctx->cksum;
+	else
+		cksumkey = NULL;
+
+	if (make_checksum(ctx, ptr, 8, message_buffer, 0,
+			  cksumkey, KG_USAGE_SIGN, &md5cksum))
+		return GSS_S_FAILURE;
+
+	if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
+					ctx->gk5e->cksumlength))
+		return GSS_S_BAD_SIG;
+
+	/* it got through unscathed.  Make sure the context is unexpired */
+
+	now = get_seconds();
+
+	if (now > ctx->endtime)
+		return GSS_S_CONTEXT_EXPIRED;
+
+	/* do sequencing checks */
+
+	if (krb5_get_seq_num(ctx, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
+			     &direction, &seqnum))
+		return GSS_S_FAILURE;
+
+	if ((ctx->initiate && direction != 0xff) ||
+	    (!ctx->initiate && direction != 0))
+		return GSS_S_BAD_SIG;
+
+	return GSS_S_COMPLETE;
+}
+
+static u32
+gss_verify_mic_v2(struct krb5_ctx *ctx,
+		struct xdr_buf *message_buffer, struct xdr_netobj *read_token)
+{
+	char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	struct xdr_netobj cksumobj = {.len = sizeof(cksumdata),
+				      .data = cksumdata};
+	s32 now;
+	u8 *ptr = read_token->data;
+	u8 *cksumkey;
+	u8 flags;
+	int i;
+	unsigned int cksum_usage;
+
+	dprintk("RPC:       %s\n", __func__);
+
+	if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_MIC)
+		return GSS_S_DEFECTIVE_TOKEN;
+
+	flags = ptr[2];
+	if ((!ctx->initiate && (flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)) ||
+	    (ctx->initiate && !(flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)))
+		return GSS_S_BAD_SIG;
+
+	if (flags & KG2_TOKEN_FLAG_SEALED) {
+		dprintk("%s: token has unexpected sealed flag\n", __func__);
+		return GSS_S_FAILURE;
+	}
+
+	for (i = 3; i < 8; i++)
+		if (ptr[i] != 0xff)
+			return GSS_S_DEFECTIVE_TOKEN;
+
+	if (ctx->initiate) {
+		cksumkey = ctx->acceptor_sign;
+		cksum_usage = KG_USAGE_ACCEPTOR_SIGN;
+	} else {
+		cksumkey = ctx->initiator_sign;
+		cksum_usage = KG_USAGE_INITIATOR_SIGN;
+	}
+
+	if (make_checksum_v2(ctx, ptr, GSS_KRB5_TOK_HDR_LEN, message_buffer, 0,
+			     cksumkey, cksum_usage, &cksumobj))
+		return GSS_S_FAILURE;
+
+	if (memcmp(cksumobj.data, ptr + GSS_KRB5_TOK_HDR_LEN,
+				ctx->gk5e->cksumlength))
+		return GSS_S_BAD_SIG;
+
+	/* it got through unscathed.  Make sure the context is unexpired */
+	now = get_seconds();
+	if (now > ctx->endtime)
+		return GSS_S_CONTEXT_EXPIRED;
+
+	/*
+	 * NOTE: the sequence number at ptr + 8 is skipped, rpcsec_gss
+	 * doesn't want it checked; see page 6 of rfc 2203.
+	 */
+
+	return GSS_S_COMPLETE;
+}
+
+u32
+gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
+			struct xdr_buf *message_buffer,
+			struct xdr_netobj *read_token)
+{
+	struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
+
+	switch (ctx->enctype) {
+	default:
+		BUG();
+	case ENCTYPE_DES_CBC_RAW:
+	case ENCTYPE_DES3_CBC_RAW:
+	case ENCTYPE_ARCFOUR_HMAC:
+		return gss_verify_mic_v1(ctx, message_buffer, read_token);
+	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
+	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
+		return gss_verify_mic_v2(ctx, message_buffer, read_token);
+	}
+}
+
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@ -0,0 +1,626 @@
+/*
+ * COPYRIGHT (c) 2008
+ * The Regents of the University of Michigan
+ * ALL RIGHTS RESERVED
+ *
+ * Permission is granted to use, copy, create derivative works
+ * and redistribute this software and such derivative works
+ * for any purpose, so long as the name of The University of
+ * Michigan is not used in any advertising or publicity
+ * pertaining to the use of distribution of this software
+ * without specific, written prior authorization.  If the
+ * above copyright notice or any other identification of the
+ * University of Michigan is included in any copy of any
+ * portion of this software, then the disclaimer below must
+ * also be included.
+ *
+ * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION
+ * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY
+ * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY OF
+ * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
+ * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+ * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE
+ * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR
+ * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN
+ * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGES.
+ */
+
+#include <linux/types.h>
+#include <linux/jiffies.h>
+#include <linux/sunrpc/gss_krb5.h>
+#include <linux/random.h>
+#include <linux/pagemap.h>
+#include <linux/crypto.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_AUTH
+#endif
+
+static inline int
+gss_krb5_padding(int blocksize, int length)
+{
+	return blocksize - (length % blocksize);
+}
+
+static inline void
+gss_krb5_add_padding(struct xdr_buf *buf, int offset, int blocksize)
+{
+	int padding = gss_krb5_padding(blocksize, buf->len - offset);
+	char *p;
+	struct kvec *iov;
+
+	if (buf->page_len || buf->tail[0].iov_len)
+		iov = &buf->tail[0];
+	else
+		iov = &buf->head[0];
+	p = iov->iov_base + iov->iov_len;
+	iov->iov_len += padding;
+	buf->len += padding;
+	memset(p, padding, padding);
+}
+
+static inline int
+gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize)
+{
+	u8 *ptr;
+	u8 pad;
+	size_t len = buf->len;
+
+	if (len <= buf->head[0].iov_len) {
+		pad = *(u8 *)(buf->head[0].iov_base + len - 1);
+		if (pad > buf->head[0].iov_len)
+			return -EINVAL;
+		buf->head[0].iov_len -= pad;
+		goto out;
+	} else
+		len -= buf->head[0].iov_len;
+	if (len <= buf->page_len) {
+		unsigned int last = (buf->page_base + len - 1)
+					>>PAGE_CACHE_SHIFT;
+		unsigned int offset = (buf->page_base + len - 1)
+					& (PAGE_CACHE_SIZE - 1);
+		ptr = kmap_atomic(buf->pages[last]);
+		pad = *(ptr + offset);
+		kunmap_atomic(ptr);
+		goto out;
+	} else
+		len -= buf->page_len;
+	BUG_ON(len > buf->tail[0].iov_len);
+	pad = *(u8 *)(buf->tail[0].iov_base + len - 1);
+out:
+	/* XXX: NOTE: we do not adjust the page lengths--they represent
+	 * a range of data in the real filesystem page cache, and we need
+	 * to know that range so the xdr code can properly place read data.
+	 * However adjusting the head length, as we do above, is harmless.
+	 * In the case of a request that fits into a single page, the server
+	 * also uses length and head length together to determine the original
+	 * start of the request to copy the request for deferal; so it's
+	 * easier on the server if we adjust head and tail length in tandem.
+	 * It's not really a problem that we don't fool with the page and
+	 * tail lengths, though--at worst badly formed xdr might lead the
+	 * server to attempt to parse the padding.
+	 * XXX: Document all these weird requirements for gss mechanism
+	 * wrap/unwrap functions. */
+	if (pad > blocksize)
+		return -EINVAL;
+	if (buf->len > pad)
+		buf->len -= pad;
+	else
+		return -EINVAL;
+	return 0;
+}
+
+void
+gss_krb5_make_confounder(char *p, u32 conflen)
+{
+	static u64 i = 0;
+	u64 *q = (u64 *)p;
+
+	/* rfc1964 claims this should be "random".  But all that's really
+	 * necessary is that it be unique.  And not even that is necessary in
+	 * our case since our "gssapi" implementation exists only to support
+	 * rpcsec_gss, so we know that the only buffers we will ever encrypt
+	 * already begin with a unique sequence number.  Just to hedge my bets
+	 * I'll make a half-hearted attempt at something unique, but ensuring
+	 * uniqueness would mean worrying about atomicity and rollover, and I
+	 * don't care enough. */
+
+	/* initialize to random value */
+	if (i == 0) {
+		i = prandom_u32();
+		i = (i << 32) | prandom_u32();
+	}
+
+	switch (conflen) {
+	case 16:
+		*q++ = i++;
+		/* fall through */
+	case 8:
+		*q++ = i++;
+		break;
+	default:
+		BUG();
+	}
+}
+
+/* Assumptions: the head and tail of inbuf are ours to play with.
+ * The pages, however, may be real pages in the page cache and we replace
+ * them with scratch pages from **pages before writing to them. */
+/* XXX: obviously the above should be documentation of wrap interface,
+ * and shouldn't be in this kerberos-specific file. */
+
+/* XXX factor out common code with seal/unseal. */
+
+static u32
+gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
+		struct xdr_buf *buf, struct page **pages)
+{
+	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
+					    .data = cksumdata};
+	int			blocksize = 0, plainlen;
+	unsigned char		*ptr, *msg_start;
+	s32			now;
+	int			headlen;
+	struct page		**tmp_pages;
+	u32			seq_send;
+	u8			*cksumkey;
+	u32			conflen = kctx->gk5e->conflen;
+
+	dprintk("RPC:       %s\n", __func__);
+
+	now = get_seconds();
+
+	blocksize = crypto_blkcipher_blocksize(kctx->enc);
+	gss_krb5_add_padding(buf, offset, blocksize);
+	BUG_ON((buf->len - offset) % blocksize);
+	plainlen = conflen + buf->len - offset;
+
+	headlen = g_token_size(&kctx->mech_used,
+		GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength + plainlen) -
+		(buf->len - offset);
+
+	ptr = buf->head[0].iov_base + offset;
+	/* shift data to make room for header. */
+	xdr_extend_head(buf, offset, headlen);
+
+	/* XXX Would be cleverer to encrypt while copying. */
+	BUG_ON((buf->len - offset - headlen) % blocksize);
+
+	g_make_token_header(&kctx->mech_used,
+				GSS_KRB5_TOK_HDR_LEN +
+				kctx->gk5e->cksumlength + plainlen, &ptr);
+
+
+	/* ptr now at header described in rfc 1964, section 1.2.1: */
+	ptr[0] = (unsigned char) ((KG_TOK_WRAP_MSG >> 8) & 0xff);
+	ptr[1] = (unsigned char) (KG_TOK_WRAP_MSG & 0xff);
+
+	msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength;
+
+	/*
+	 * signalg and sealalg are stored as if they were converted from LE
+	 * to host endian, even though they're opaque pairs of bytes according
+	 * to the RFC.
+	 */
+	*(__le16 *)(ptr + 2) = cpu_to_le16(kctx->gk5e->signalg);
+	*(__le16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg);
+	ptr[6] = 0xff;
+	ptr[7] = 0xff;
+
+	gss_krb5_make_confounder(msg_start, conflen);
+
+	if (kctx->gk5e->keyed_cksum)
+		cksumkey = kctx->cksum;
+	else
+		cksumkey = NULL;
+
+	/* XXXJBF: UGH!: */
+	tmp_pages = buf->pages;
+	buf->pages = pages;
+	if (make_checksum(kctx, ptr, 8, buf, offset + headlen - conflen,
+					cksumkey, KG_USAGE_SEAL, &md5cksum))
+		return GSS_S_FAILURE;
+	buf->pages = tmp_pages;
+
+	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
+
+	spin_lock(&krb5_seq_lock);
+	seq_send = kctx->seq_send++;
+	spin_unlock(&krb5_seq_lock);
+
+	/* XXX would probably be more efficient to compute checksum
+	 * and encrypt at the same time: */
+	if ((krb5_make_seq_num(kctx, kctx->seq, kctx->initiate ? 0 : 0xff,
+			       seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)))
+		return GSS_S_FAILURE;
+
+	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
+		struct crypto_blkcipher *cipher;
+		int err;
+		cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
+						CRYPTO_ALG_ASYNC);
+		if (IS_ERR(cipher))
+			return GSS_S_FAILURE;
+
+		krb5_rc4_setup_enc_key(kctx, cipher, seq_send);
+
+		err = gss_encrypt_xdr_buf(cipher, buf,
+					  offset + headlen - conflen, pages);
+		crypto_free_blkcipher(cipher);
+		if (err)
+			return GSS_S_FAILURE;
+	} else {
+		if (gss_encrypt_xdr_buf(kctx->enc, buf,
+					offset + headlen - conflen, pages))
+			return GSS_S_FAILURE;
+	}
+
+	return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
+}
+
+static u32
+gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
+{
+	int			signalg;
+	int			sealalg;
+	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
+					    .data = cksumdata};
+	s32			now;
+	int			direction;
+	s32			seqnum;
+	unsigned char		*ptr;
+	int			bodysize;
+	void			*data_start, *orig_start;
+	int			data_len;
+	int			blocksize;
+	u32			conflen = kctx->gk5e->conflen;
+	int			crypt_offset;
+	u8			*cksumkey;
+
+	dprintk("RPC:       gss_unwrap_kerberos\n");
+
+	ptr = (u8 *)buf->head[0].iov_base + offset;
+	if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr,
+					buf->len - offset))
+		return GSS_S_DEFECTIVE_TOKEN;
+
+	if ((ptr[0] != ((KG_TOK_WRAP_MSG >> 8) & 0xff)) ||
+	    (ptr[1] !=  (KG_TOK_WRAP_MSG & 0xff)))
+		return GSS_S_DEFECTIVE_TOKEN;
+
+	/* XXX sanity-check bodysize?? */
+
+	/* get the sign and seal algorithms */
+
+	signalg = ptr[2] + (ptr[3] << 8);
+	if (signalg != kctx->gk5e->signalg)
+		return GSS_S_DEFECTIVE_TOKEN;
+
+	sealalg = ptr[4] + (ptr[5] << 8);
+	if (sealalg != kctx->gk5e->sealalg)
+		return GSS_S_DEFECTIVE_TOKEN;
+
+	if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
+		return GSS_S_DEFECTIVE_TOKEN;
+
+	/*
+	 * Data starts after token header and checksum.  ptr points
+	 * to the beginning of the token header
+	 */
+	crypt_offset = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) -
+					(unsigned char *)buf->head[0].iov_base;
+
+	/*
+	 * Need plaintext seqnum to derive encryption key for arcfour-hmac
+	 */
+	if (krb5_get_seq_num(kctx, ptr + GSS_KRB5_TOK_HDR_LEN,
+			     ptr + 8, &direction, &seqnum))
+		return GSS_S_BAD_SIG;
+
+	if ((kctx->initiate && direction != 0xff) ||
+	    (!kctx->initiate && direction != 0))
+		return GSS_S_BAD_SIG;
+
+	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
+		struct crypto_blkcipher *cipher;
+		int err;
+
+		cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
+						CRYPTO_ALG_ASYNC);
+		if (IS_ERR(cipher))
+			return GSS_S_FAILURE;
+
+		krb5_rc4_setup_enc_key(kctx, cipher, seqnum);
+
+		err = gss_decrypt_xdr_buf(cipher, buf, crypt_offset);
+		crypto_free_blkcipher(cipher);
+		if (err)
+			return GSS_S_DEFECTIVE_TOKEN;
+	} else {
+		if (gss_decrypt_xdr_buf(kctx->enc, buf, crypt_offset))
+			return GSS_S_DEFECTIVE_TOKEN;
+	}
+
+	if (kctx->gk5e->keyed_cksum)
+		cksumkey = kctx->cksum;
+	else
+		cksumkey = NULL;
+
+	if (make_checksum(kctx, ptr, 8, buf, crypt_offset,
+					cksumkey, KG_USAGE_SEAL, &md5cksum))
+		return GSS_S_FAILURE;
+
+	if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
+						kctx->gk5e->cksumlength))
+		return GSS_S_BAD_SIG;
+
+	/* it got through unscathed.  Make sure the context is unexpired */
+
+	now = get_seconds();
+
+	if (now > kctx->endtime)
+		return GSS_S_CONTEXT_EXPIRED;
+
+	/* do sequencing checks */
+
+	/* Copy the data back to the right position.  XXX: Would probably be
+	 * better to copy and encrypt at the same time. */
+
+	blocksize = crypto_blkcipher_blocksize(kctx->enc);
+	data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) +
+					conflen;
+	orig_start = buf->head[0].iov_base + offset;
+	data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
+	memmove(orig_start, data_start, data_len);
+	buf->head[0].iov_len -= (data_start - orig_start);
+	buf->len -= (data_start - orig_start);
+
+	if (gss_krb5_remove_padding(buf, blocksize))
+		return GSS_S_DEFECTIVE_TOKEN;
+
+	return GSS_S_COMPLETE;
+}
+
+/*
+ * We can shift data by up to LOCAL_BUF_LEN bytes in a pass.  If we need
+ * to do more than that, we shift repeatedly.  Kevin Coffman reports
+ * seeing 28 bytes as the value used by Microsoft clients and servers
+ * with AES, so this constant is chosen to allow handling 28 in one pass
+ * without using too much stack space.
+ *
+ * If that proves to a problem perhaps we could use a more clever
+ * algorithm.
+ */
+#define LOCAL_BUF_LEN 32u
+
+static void rotate_buf_a_little(struct xdr_buf *buf, unsigned int shift)
+{
+	char head[LOCAL_BUF_LEN];
+	char tmp[LOCAL_BUF_LEN];
+	unsigned int this_len, i;
+
+	BUG_ON(shift > LOCAL_BUF_LEN);
+
+	read_bytes_from_xdr_buf(buf, 0, head, shift);
+	for (i = 0; i + shift < buf->len; i += LOCAL_BUF_LEN) {
+		this_len = min(LOCAL_BUF_LEN, buf->len - (i + shift));
+		read_bytes_from_xdr_buf(buf, i+shift, tmp, this_len);
+		write_bytes_to_xdr_buf(buf, i, tmp, this_len);
+	}
+	write_bytes_to_xdr_buf(buf, buf->len - shift, head, shift);
+}
+
+static void _rotate_left(struct xdr_buf *buf, unsigned int shift)
+{
+	int shifted = 0;
+	int this_shift;
+
+	shift %= buf->len;
+	while (shifted < shift) {
+		this_shift = min(shift - shifted, LOCAL_BUF_LEN);
+		rotate_buf_a_little(buf, this_shift);
+		shifted += this_shift;
+	}
+}
+
+static void rotate_left(u32 base, struct xdr_buf *buf, unsigned int shift)
+{
+	struct xdr_buf subbuf;
+
+	xdr_buf_subsegment(buf, &subbuf, base, buf->len - base);
+	_rotate_left(&subbuf, shift);
+}
+
+static u32
+gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
+		     struct xdr_buf *buf, struct page **pages)
+{
+	int		blocksize;
+	u8		*ptr, *plainhdr;
+	s32		now;
+	u8		flags = 0x00;
+	__be16		*be16ptr;
+	__be64		*be64ptr;
+	u32		err;
+
+	dprintk("RPC:       %s\n", __func__);
+
+	if (kctx->gk5e->encrypt_v2 == NULL)
+		return GSS_S_FAILURE;
+
+	/* make room for gss token header */
+	if (xdr_extend_head(buf, offset, GSS_KRB5_TOK_HDR_LEN))
+		return GSS_S_FAILURE;
+
+	/* construct gss token header */
+	ptr = plainhdr = buf->head[0].iov_base + offset;
+	*ptr++ = (unsigned char) ((KG2_TOK_WRAP>>8) & 0xff);
+	*ptr++ = (unsigned char) (KG2_TOK_WRAP & 0xff);
+
+	if ((kctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0)
+		flags |= KG2_TOKEN_FLAG_SENTBYACCEPTOR;
+	if ((kctx->flags & KRB5_CTX_FLAG_ACCEPTOR_SUBKEY) != 0)
+		flags |= KG2_TOKEN_FLAG_ACCEPTORSUBKEY;
+	/* We always do confidentiality in wrap tokens */
+	flags |= KG2_TOKEN_FLAG_SEALED;
+
+	*ptr++ = flags;
+	*ptr++ = 0xff;
+	be16ptr = (__be16 *)ptr;
+
+	blocksize = crypto_blkcipher_blocksize(kctx->acceptor_enc);
+	*be16ptr++ = 0;
+	/* "inner" token header always uses 0 for RRC */
+	*be16ptr++ = 0;
+
+	be64ptr = (__be64 *)be16ptr;
+	spin_lock(&krb5_seq_lock);
+	*be64ptr = cpu_to_be64(kctx->seq_send64++);
+	spin_unlock(&krb5_seq_lock);
+
+	err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, pages);
+	if (err)
+		return err;
+
+	now = get_seconds();
+	return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
+}
+
+static u32
+gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
+{
+	s32		now;
+	u8		*ptr;
+	u8		flags = 0x00;
+	u16		ec, rrc;
+	int		err;
+	u32		headskip, tailskip;
+	u8		decrypted_hdr[GSS_KRB5_TOK_HDR_LEN];
+	unsigned int	movelen;
+
+
+	dprintk("RPC:       %s\n", __func__);
+
+	if (kctx->gk5e->decrypt_v2 == NULL)
+		return GSS_S_FAILURE;
+
+	ptr = buf->head[0].iov_base + offset;
+
+	if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_WRAP)
+		return GSS_S_DEFECTIVE_TOKEN;
+
+	flags = ptr[2];
+	if ((!kctx->initiate && (flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)) ||
+	    (kctx->initiate && !(flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)))
+		return GSS_S_BAD_SIG;
+
+	if ((flags & KG2_TOKEN_FLAG_SEALED) == 0) {
+		dprintk("%s: token missing expected sealed flag\n", __func__);
+		return GSS_S_DEFECTIVE_TOKEN;
+	}
+
+	if (ptr[3] != 0xff)
+		return GSS_S_DEFECTIVE_TOKEN;
+
+	ec = be16_to_cpup((__be16 *)(ptr + 4));
+	rrc = be16_to_cpup((__be16 *)(ptr + 6));
+
+	/*
+	 * NOTE: the sequence number at ptr + 8 is skipped, rpcsec_gss
+	 * doesn't want it checked; see page 6 of rfc 2203.
+	 */
+
+	if (rrc != 0)
+		rotate_left(offset + 16, buf, rrc);
+
+	err = (*kctx->gk5e->decrypt_v2)(kctx, offset, buf,
+					&headskip, &tailskip);
+	if (err)
+		return GSS_S_FAILURE;
+
+	/*
+	 * Retrieve the decrypted gss token header and verify
+	 * it against the original
+	 */
+	err = read_bytes_from_xdr_buf(buf,
+				buf->len - GSS_KRB5_TOK_HDR_LEN - tailskip,
+				decrypted_hdr, GSS_KRB5_TOK_HDR_LEN);
+	if (err) {
+		dprintk("%s: error %u getting decrypted_hdr\n", __func__, err);
+		return GSS_S_FAILURE;
+	}
+	if (memcmp(ptr, decrypted_hdr, 6)
+				|| memcmp(ptr + 8, decrypted_hdr + 8, 8)) {
+		dprintk("%s: token hdr, plaintext hdr mismatch!\n", __func__);
+		return GSS_S_FAILURE;
+	}
+
+	/* do sequencing checks */
+
+	/* it got through unscathed.  Make sure the context is unexpired */
+	now = get_seconds();
+	if (now > kctx->endtime)
+		return GSS_S_CONTEXT_EXPIRED;
+
+	/*
+	 * Move the head data back to the right position in xdr_buf.
+	 * We ignore any "ec" data since it might be in the head or
+	 * the tail, and we really don't need to deal with it.
+	 * Note that buf->head[0].iov_len may indicate the available
+	 * head buffer space rather than that actually occupied.
+	 */
+	movelen = min_t(unsigned int, buf->head[0].iov_len, buf->len);
+	movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip;
+	BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
+							buf->head[0].iov_len);
+	memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen);
+	buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip;
+	buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip;
+
+	/* Trim off the trailing "extra count" and checksum blob */
+	xdr_buf_trim(buf, ec + GSS_KRB5_TOK_HDR_LEN + tailskip);
+	return GSS_S_COMPLETE;
+}
+
+u32
+gss_wrap_kerberos(struct gss_ctx *gctx, int offset,
+		  struct xdr_buf *buf, struct page **pages)
+{
+	struct krb5_ctx	*kctx = gctx->internal_ctx_id;
+
+	switch (kctx->enctype) {
+	default:
+		BUG();
+	case ENCTYPE_DES_CBC_RAW:
+	case ENCTYPE_DES3_CBC_RAW:
+	case ENCTYPE_ARCFOUR_HMAC:
+		return gss_wrap_kerberos_v1(kctx, offset, buf, pages);
+	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
+	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
+		return gss_wrap_kerberos_v2(kctx, offset, buf, pages);
+	}
+}
+
+u32
+gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf)
+{
+	struct krb5_ctx	*kctx = gctx->internal_ctx_id;
+
+	switch (kctx->enctype) {
+	default:
+		BUG();
+	case ENCTYPE_DES_CBC_RAW:
+	case ENCTYPE_DES3_CBC_RAW:
+	case ENCTYPE_ARCFOUR_HMAC:
+		return gss_unwrap_kerberos_v1(kctx, offset, buf);
+	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
+	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
+		return gss_unwrap_kerberos_v2(kctx, offset, buf);
+	}
+}
+
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@ -0,0 +1,481 @@
+/*
+ *  linux/net/sunrpc/gss_mech_switch.c
+ *
+ *  Copyright (c) 2001 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  J. Bruce Fields   <bfields@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/oid_registry.h>
+#include <linux/sunrpc/msg_prot.h>
+#include <linux/sunrpc/gss_asn1.h>
+#include <linux/sunrpc/auth_gss.h>
+#include <linux/sunrpc/svcauth_gss.h>
+#include <linux/sunrpc/gss_err.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/gss_api.h>
+#include <linux/sunrpc/clnt.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+static LIST_HEAD(registered_mechs);
+static DEFINE_SPINLOCK(registered_mechs_lock);
+
+static void
+gss_mech_free(struct gss_api_mech *gm)
+{
+	struct pf_desc *pf;
+	int i;
+
+	for (i = 0; i < gm->gm_pf_num; i++) {
+		pf = &gm->gm_pfs[i];
+		kfree(pf->auth_domain_name);
+		pf->auth_domain_name = NULL;
+	}
+}
+
+static inline char *
+make_auth_domain_name(char *name)
+{
+	static char	*prefix = "gss/";
+	char		*new;
+
+	new = kmalloc(strlen(name) + strlen(prefix) + 1, GFP_KERNEL);
+	if (new) {
+		strcpy(new, prefix);
+		strcat(new, name);
+	}
+	return new;
+}
+
+static int
+gss_mech_svc_setup(struct gss_api_mech *gm)
+{
+	struct pf_desc *pf;
+	int i, status;
+
+	for (i = 0; i < gm->gm_pf_num; i++) {
+		pf = &gm->gm_pfs[i];
+		pf->auth_domain_name = make_auth_domain_name(pf->name);
+		status = -ENOMEM;
+		if (pf->auth_domain_name == NULL)
+			goto out;
+		status = svcauth_gss_register_pseudoflavor(pf->pseudoflavor,
+							pf->auth_domain_name);
+		if (status)
+			goto out;
+	}
+	return 0;
+out:
+	gss_mech_free(gm);
+	return status;
+}
+
+/**
+ * gss_mech_register - register a GSS mechanism
+ * @gm: GSS mechanism handle
+ *
+ * Returns zero if successful, or a negative errno.
+ */
+int gss_mech_register(struct gss_api_mech *gm)
+{
+	int status;
+
+	status = gss_mech_svc_setup(gm);
+	if (status)
+		return status;
+	spin_lock(&registered_mechs_lock);
+	list_add(&gm->gm_list, &registered_mechs);
+	spin_unlock(&registered_mechs_lock);
+	dprintk("RPC:       registered gss mechanism %s\n", gm->gm_name);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gss_mech_register);
+
+/**
+ * gss_mech_unregister - release a GSS mechanism
+ * @gm: GSS mechanism handle
+ *
+ */
+void gss_mech_unregister(struct gss_api_mech *gm)
+{
+	spin_lock(&registered_mechs_lock);
+	list_del(&gm->gm_list);
+	spin_unlock(&registered_mechs_lock);
+	dprintk("RPC:       unregistered gss mechanism %s\n", gm->gm_name);
+	gss_mech_free(gm);
+}
+EXPORT_SYMBOL_GPL(gss_mech_unregister);
+
+struct gss_api_mech *gss_mech_get(struct gss_api_mech *gm)
+{
+	__module_get(gm->gm_owner);
+	return gm;
+}
+EXPORT_SYMBOL(gss_mech_get);
+
+static struct gss_api_mech *
+_gss_mech_get_by_name(const char *name)
+{
+	struct gss_api_mech	*pos, *gm = NULL;
+
+	spin_lock(&registered_mechs_lock);
+	list_for_each_entry(pos, &registered_mechs, gm_list) {
+		if (0 == strcmp(name, pos->gm_name)) {
+			if (try_module_get(pos->gm_owner))
+				gm = pos;
+			break;
+		}
+	}
+	spin_unlock(&registered_mechs_lock);
+	return gm;
+
+}
+
+struct gss_api_mech * gss_mech_get_by_name(const char *name)
+{
+	struct gss_api_mech *gm = NULL;
+
+	gm = _gss_mech_get_by_name(name);
+	if (!gm) {
+		request_module("rpc-auth-gss-%s", name);
+		gm = _gss_mech_get_by_name(name);
+	}
+	return gm;
+}
+
+struct gss_api_mech *gss_mech_get_by_OID(struct rpcsec_gss_oid *obj)
+{
+	struct gss_api_mech	*pos, *gm = NULL;
+	char buf[32];
+
+	if (sprint_oid(obj->data, obj->len, buf, sizeof(buf)) < 0)
+		return NULL;
+	dprintk("RPC:       %s(%s)\n", __func__, buf);
+	request_module("rpc-auth-gss-%s", buf);
+
+	spin_lock(&registered_mechs_lock);
+	list_for_each_entry(pos, &registered_mechs, gm_list) {
+		if (obj->len == pos->gm_oid.len) {
+			if (0 == memcmp(obj->data, pos->gm_oid.data, obj->len)) {
+				if (try_module_get(pos->gm_owner))
+					gm = pos;
+				break;
+			}
+		}
+	}
+	spin_unlock(&registered_mechs_lock);
+	return gm;
+}
+
+static inline int
+mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor)
+{
+	int i;
+
+	for (i = 0; i < gm->gm_pf_num; i++) {
+		if (gm->gm_pfs[i].pseudoflavor == pseudoflavor)
+			return 1;
+	}
+	return 0;
+}
+
+static struct gss_api_mech *_gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
+{
+	struct gss_api_mech *gm = NULL, *pos;
+
+	spin_lock(&registered_mechs_lock);
+	list_for_each_entry(pos, &registered_mechs, gm_list) {
+		if (!mech_supports_pseudoflavor(pos, pseudoflavor))
+			continue;
+		if (try_module_get(pos->gm_owner))
+			gm = pos;
+		break;
+	}
+	spin_unlock(&registered_mechs_lock);
+	return gm;
+}
+
+struct gss_api_mech *
+gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
+{
+	struct gss_api_mech *gm;
+
+	gm = _gss_mech_get_by_pseudoflavor(pseudoflavor);
+
+	if (!gm) {
+		request_module("rpc-auth-gss-%u", pseudoflavor);
+		gm = _gss_mech_get_by_pseudoflavor(pseudoflavor);
+	}
+	return gm;
+}
+
+/**
+ * gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors
+ * @array: array to fill in
+ * @size: size of "array"
+ *
+ * Returns the number of array items filled in, or a negative errno.
+ *
+ * The returned array is not sorted by any policy.  Callers should not
+ * rely on the order of the items in the returned array.
+ */
+int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr, int size)
+{
+	struct gss_api_mech *pos = NULL;
+	int j, i = 0;
+
+	spin_lock(&registered_mechs_lock);
+	list_for_each_entry(pos, &registered_mechs, gm_list) {
+		for (j = 0; j < pos->gm_pf_num; j++) {
+			if (i >= size) {
+				spin_unlock(&registered_mechs_lock);
+				return -ENOMEM;
+			}
+			array_ptr[i++] = pos->gm_pfs[j].pseudoflavor;
+		}
+	}
+	spin_unlock(&registered_mechs_lock);
+	return i;
+}
+
+/**
+ * gss_svc_to_pseudoflavor - map a GSS service number to a pseudoflavor
+ * @gm: GSS mechanism handle
+ * @qop: GSS quality-of-protection value
+ * @service: GSS service value
+ *
+ * Returns a matching security flavor, or RPC_AUTH_MAXFLAVOR if none is found.
+ */
+rpc_authflavor_t gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 qop,
+					 u32 service)
+{
+	int i;
+
+	for (i = 0; i < gm->gm_pf_num; i++) {
+		if (gm->gm_pfs[i].qop == qop &&
+		    gm->gm_pfs[i].service == service) {
+			return gm->gm_pfs[i].pseudoflavor;
+		}
+	}
+	return RPC_AUTH_MAXFLAVOR;
+}
+
+/**
+ * gss_mech_info2flavor - look up a pseudoflavor given a GSS tuple
+ * @info: a GSS mech OID, quality of protection, and service value
+ *
+ * Returns a matching pseudoflavor, or RPC_AUTH_MAXFLAVOR if the tuple is
+ * not supported.
+ */
+rpc_authflavor_t gss_mech_info2flavor(struct rpcsec_gss_info *info)
+{
+	rpc_authflavor_t pseudoflavor;
+	struct gss_api_mech *gm;
+
+	gm = gss_mech_get_by_OID(&info->oid);
+	if (gm == NULL)
+		return RPC_AUTH_MAXFLAVOR;
+
+	pseudoflavor = gss_svc_to_pseudoflavor(gm, info->qop, info->service);
+
+	gss_mech_put(gm);
+	return pseudoflavor;
+}
+
+/**
+ * gss_mech_flavor2info - look up a GSS tuple for a given pseudoflavor
+ * @pseudoflavor: GSS pseudoflavor to match
+ * @info: rpcsec_gss_info structure to fill in
+ *
+ * Returns zero and fills in "info" if pseudoflavor matches a
+ * supported mechanism.  Otherwise a negative errno is returned.
+ */
+int gss_mech_flavor2info(rpc_authflavor_t pseudoflavor,
+			 struct rpcsec_gss_info *info)
+{
+	struct gss_api_mech *gm;
+	int i;
+
+	gm = gss_mech_get_by_pseudoflavor(pseudoflavor);
+	if (gm == NULL)
+		return -ENOENT;
+
+	for (i = 0; i < gm->gm_pf_num; i++) {
+		if (gm->gm_pfs[i].pseudoflavor == pseudoflavor) {
+			memcpy(info->oid.data, gm->gm_oid.data, gm->gm_oid.len);
+			info->oid.len = gm->gm_oid.len;
+			info->qop = gm->gm_pfs[i].qop;
+			info->service = gm->gm_pfs[i].service;
+			gss_mech_put(gm);
+			return 0;
+		}
+	}
+
+	gss_mech_put(gm);
+	return -ENOENT;
+}
+
+u32
+gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor)
+{
+	int i;
+
+	for (i = 0; i < gm->gm_pf_num; i++) {
+		if (gm->gm_pfs[i].pseudoflavor == pseudoflavor)
+			return gm->gm_pfs[i].service;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(gss_pseudoflavor_to_service);
+
+char *
+gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service)
+{
+	int i;
+
+	for (i = 0; i < gm->gm_pf_num; i++) {
+		if (gm->gm_pfs[i].service == service)
+			return gm->gm_pfs[i].auth_domain_name;
+	}
+	return NULL;
+}
+
+void
+gss_mech_put(struct gss_api_mech * gm)
+{
+	if (gm)
+		module_put(gm->gm_owner);
+}
+EXPORT_SYMBOL(gss_mech_put);
+
+/* The mech could probably be determined from the token instead, but it's just
+ * as easy for now to pass it in. */
+int
+gss_import_sec_context(const void *input_token, size_t bufsize,
+		       struct gss_api_mech	*mech,
+		       struct gss_ctx		**ctx_id,
+		       time_t			*endtime,
+		       gfp_t gfp_mask)
+{
+	if (!(*ctx_id = kzalloc(sizeof(**ctx_id), gfp_mask)))
+		return -ENOMEM;
+	(*ctx_id)->mech_type = gss_mech_get(mech);
+
+	return mech->gm_ops->gss_import_sec_context(input_token, bufsize,
+						*ctx_id, endtime, gfp_mask);
+}
+
+/* gss_get_mic: compute a mic over message and return mic_token. */
+
+u32
+gss_get_mic(struct gss_ctx	*context_handle,
+	    struct xdr_buf	*message,
+	    struct xdr_netobj	*mic_token)
+{
+	 return context_handle->mech_type->gm_ops
+		->gss_get_mic(context_handle,
+			      message,
+			      mic_token);
+}
+
+/* gss_verify_mic: check whether the provided mic_token verifies message. */
+
+u32
+gss_verify_mic(struct gss_ctx		*context_handle,
+	       struct xdr_buf		*message,
+	       struct xdr_netobj	*mic_token)
+{
+	return context_handle->mech_type->gm_ops
+		->gss_verify_mic(context_handle,
+				 message,
+				 mic_token);
+}
+
+/*
+ * This function is called from both the client and server code.
+ * Each makes guarantees about how much "slack" space is available
+ * for the underlying function in "buf"'s head and tail while
+ * performing the wrap.
+ *
+ * The client and server code allocate RPC_MAX_AUTH_SIZE extra
+ * space in both the head and tail which is available for use by
+ * the wrap function.
+ *
+ * Underlying functions should verify they do not use more than
+ * RPC_MAX_AUTH_SIZE of extra space in either the head or tail
+ * when performing the wrap.
+ */
+u32
+gss_wrap(struct gss_ctx	*ctx_id,
+	 int		offset,
+	 struct xdr_buf	*buf,
+	 struct page	**inpages)
+{
+	return ctx_id->mech_type->gm_ops
+		->gss_wrap(ctx_id, offset, buf, inpages);
+}
+
+u32
+gss_unwrap(struct gss_ctx	*ctx_id,
+	   int			offset,
+	   struct xdr_buf	*buf)
+{
+	return ctx_id->mech_type->gm_ops
+		->gss_unwrap(ctx_id, offset, buf);
+}
+
+
+/* gss_delete_sec_context: free all resources associated with context_handle.
+ * Note this differs from the RFC 2744-specified prototype in that we don't
+ * bother returning an output token, since it would never be used anyway. */
+
+u32
+gss_delete_sec_context(struct gss_ctx	**context_handle)
+{
+	dprintk("RPC:       gss_delete_sec_context deleting %p\n",
+			*context_handle);
+
+	if (!*context_handle)
+		return GSS_S_NO_CONTEXT;
+	if ((*context_handle)->internal_ctx_id)
+		(*context_handle)->mech_type->gm_ops
+			->gss_delete_sec_context((*context_handle)
+							->internal_ctx_id);
+	gss_mech_put((*context_handle)->mech_type);
+	kfree(*context_handle);
+	*context_handle=NULL;
+	return GSS_S_COMPLETE;
+}
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@ -0,0 +1,382 @@
+/*
+ *  linux/net/sunrpc/gss_rpc_upcall.c
+ *
+ *  Copyright (C) 2012 Simo Sorce <simo@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/types.h>
+#include <linux/un.h>
+
+#include <linux/sunrpc/svcauth.h>
+#include "gss_rpc_upcall.h"
+
+#define GSSPROXY_SOCK_PATHNAME	"/var/run/gssproxy.sock"
+
+#define GSSPROXY_PROGRAM	(400112u)
+#define GSSPROXY_VERS_1		(1u)
+
+/*
+ * Encoding/Decoding functions
+ */
+
+enum {
+	GSSX_NULL = 0,	/* Unused */
+        GSSX_INDICATE_MECHS = 1,
+        GSSX_GET_CALL_CONTEXT = 2,
+        GSSX_IMPORT_AND_CANON_NAME = 3,
+        GSSX_EXPORT_CRED = 4,
+        GSSX_IMPORT_CRED = 5,
+        GSSX_ACQUIRE_CRED = 6,
+        GSSX_STORE_CRED = 7,
+        GSSX_INIT_SEC_CONTEXT = 8,
+        GSSX_ACCEPT_SEC_CONTEXT = 9,
+        GSSX_RELEASE_HANDLE = 10,
+        GSSX_GET_MIC = 11,
+        GSSX_VERIFY = 12,
+        GSSX_WRAP = 13,
+        GSSX_UNWRAP = 14,
+        GSSX_WRAP_SIZE_LIMIT = 15,
+};
+
+#define PROC(proc, name)				\
+[GSSX_##proc] = {					\
+	.p_proc   = GSSX_##proc,			\
+	.p_encode = (kxdreproc_t)gssx_enc_##name,	\
+	.p_decode = (kxdrdproc_t)gssx_dec_##name,	\
+	.p_arglen = GSSX_ARG_##name##_sz,		\
+	.p_replen = GSSX_RES_##name##_sz, 		\
+	.p_statidx = GSSX_##proc,			\
+	.p_name   = #proc,				\
+}
+
+static struct rpc_procinfo gssp_procedures[] = {
+	PROC(INDICATE_MECHS, indicate_mechs),
+        PROC(GET_CALL_CONTEXT, get_call_context),
+        PROC(IMPORT_AND_CANON_NAME, import_and_canon_name),
+        PROC(EXPORT_CRED, export_cred),
+        PROC(IMPORT_CRED, import_cred),
+        PROC(ACQUIRE_CRED, acquire_cred),
+        PROC(STORE_CRED, store_cred),
+        PROC(INIT_SEC_CONTEXT, init_sec_context),
+        PROC(ACCEPT_SEC_CONTEXT, accept_sec_context),
+        PROC(RELEASE_HANDLE, release_handle),
+        PROC(GET_MIC, get_mic),
+        PROC(VERIFY, verify),
+        PROC(WRAP, wrap),
+        PROC(UNWRAP, unwrap),
+        PROC(WRAP_SIZE_LIMIT, wrap_size_limit),
+};
+
+
+
+/*
+ * Common transport functions
+ */
+
+static const struct rpc_program gssp_program;
+
+static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt)
+{
+	static const struct sockaddr_un gssp_localaddr = {
+		.sun_family		= AF_LOCAL,
+		.sun_path		= GSSPROXY_SOCK_PATHNAME,
+	};
+	struct rpc_create_args args = {
+		.net		= net,
+		.protocol	= XPRT_TRANSPORT_LOCAL,
+		.address	= (struct sockaddr *)&gssp_localaddr,
+		.addrsize	= sizeof(gssp_localaddr),
+		.servername	= "localhost",
+		.program	= &gssp_program,
+		.version	= GSSPROXY_VERS_1,
+		.authflavor	= RPC_AUTH_NULL,
+		/*
+		 * Note we want connection to be done in the caller's
+		 * filesystem namespace.  We therefore turn off the idle
+		 * timeout, which would result in reconnections being
+		 * done without the correct namespace:
+		 */
+		.flags		= RPC_CLNT_CREATE_NOPING |
+				  RPC_CLNT_CREATE_NO_IDLE_TIMEOUT
+	};
+	struct rpc_clnt *clnt;
+	int result = 0;
+
+	clnt = rpc_create(&args);
+	if (IS_ERR(clnt)) {
+		dprintk("RPC:       failed to create AF_LOCAL gssproxy "
+				"client (errno %ld).\n", PTR_ERR(clnt));
+		result = PTR_ERR(clnt);
+		*_clnt = NULL;
+		goto out;
+	}
+
+	dprintk("RPC:       created new gssp local client (gssp_local_clnt: "
+			"%p)\n", clnt);
+	*_clnt = clnt;
+
+out:
+	return result;
+}
+
+void init_gssp_clnt(struct sunrpc_net *sn)
+{
+	mutex_init(&sn->gssp_lock);
+	sn->gssp_clnt = NULL;
+}
+
+int set_gssp_clnt(struct net *net)
+{
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+	struct rpc_clnt *clnt;
+	int ret;
+
+	mutex_lock(&sn->gssp_lock);
+	ret = gssp_rpc_create(net, &clnt);
+	if (!ret) {
+		if (sn->gssp_clnt)
+			rpc_shutdown_client(sn->gssp_clnt);
+		sn->gssp_clnt = clnt;
+	}
+	mutex_unlock(&sn->gssp_lock);
+	return ret;
+}
+
+void clear_gssp_clnt(struct sunrpc_net *sn)
+{
+	mutex_lock(&sn->gssp_lock);
+	if (sn->gssp_clnt) {
+		rpc_shutdown_client(sn->gssp_clnt);
+		sn->gssp_clnt = NULL;
+	}
+	mutex_unlock(&sn->gssp_lock);
+}
+
+static struct rpc_clnt *get_gssp_clnt(struct sunrpc_net *sn)
+{
+	struct rpc_clnt *clnt;
+
+	mutex_lock(&sn->gssp_lock);
+	clnt = sn->gssp_clnt;
+	if (clnt)
+		atomic_inc(&clnt->cl_count);
+	mutex_unlock(&sn->gssp_lock);
+	return clnt;
+}
+
+static int gssp_call(struct net *net, struct rpc_message *msg)
+{
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+	struct rpc_clnt *clnt;
+	int status;
+
+	clnt = get_gssp_clnt(sn);
+	if (!clnt)
+		return -EIO;
+	status = rpc_call_sync(clnt, msg, 0);
+	if (status < 0) {
+		dprintk("gssp: rpc_call returned error %d\n", -status);
+		switch (status) {
+		case -EPROTONOSUPPORT:
+			status = -EINVAL;
+			break;
+		case -ECONNREFUSED:
+		case -ETIMEDOUT:
+		case -ENOTCONN:
+			status = -EAGAIN;
+			break;
+		case -ERESTARTSYS:
+			if (signalled ())
+				status = -EINTR;
+			break;
+		default:
+			break;
+		}
+	}
+	rpc_release_client(clnt);
+	return status;
+}
+
+static void gssp_free_receive_pages(struct gssx_arg_accept_sec_context *arg)
+{
+	int i;
+
+	for (i = 0; i < arg->npages && arg->pages[i]; i++)
+		__free_page(arg->pages[i]);
+}
+
+static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg)
+{
+	arg->npages = DIV_ROUND_UP(NGROUPS_MAX * 4, PAGE_SIZE);
+	arg->pages = kzalloc(arg->npages * sizeof(struct page *), GFP_KERNEL);
+	/*
+	 * XXX: actual pages are allocated by xdr layer in
+	 * xdr_partial_copy_from_skb.
+	 */
+	if (!arg->pages)
+		return -ENOMEM;
+	return 0;
+}
+
+/*
+ * Public functions
+ */
+
+/* numbers somewhat arbitrary but large enough for current needs */
+#define GSSX_MAX_OUT_HANDLE	128
+#define GSSX_MAX_SRC_PRINC	256
+#define GSSX_KMEMBUF (GSSX_max_output_handle_sz + \
+			GSSX_max_oid_sz + \
+			GSSX_max_princ_sz + \
+			sizeof(struct svc_cred))
+
+int gssp_accept_sec_context_upcall(struct net *net,
+				struct gssp_upcall_data *data)
+{
+	struct gssx_ctx ctxh = {
+		.state = data->in_handle
+	};
+	struct gssx_arg_accept_sec_context arg = {
+		.input_token = data->in_token,
+	};
+	struct gssx_ctx rctxh = {
+		/*
+		 * pass in the max length we expect for each of these
+		 * buffers but let the xdr code kmalloc them:
+		 */
+		.exported_context_token.len = GSSX_max_output_handle_sz,
+		.mech.len = GSS_OID_MAX_LEN,
+		.src_name.display_name.len = GSSX_max_princ_sz
+	};
+	struct gssx_res_accept_sec_context res = {
+		.context_handle = &rctxh,
+		.output_token = &data->out_token
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &gssp_procedures[GSSX_ACCEPT_SEC_CONTEXT],
+		.rpc_argp = &arg,
+		.rpc_resp = &res,
+		.rpc_cred = NULL, /* FIXME ? */
+	};
+	struct xdr_netobj client_name = { 0 , NULL };
+	int ret;
+
+	if (data->in_handle.len != 0)
+		arg.context_handle = &ctxh;
+	res.output_token->len = GSSX_max_output_token_sz;
+
+	ret = gssp_alloc_receive_pages(&arg);
+	if (ret)
+		return ret;
+
+	/* use nfs/ for targ_name ? */
+
+	ret = gssp_call(net, &msg);
+
+	gssp_free_receive_pages(&arg);
+
+	/* we need to fetch all data even in case of error so
+	 * that we can free special strctures is they have been allocated */
+	data->major_status = res.status.major_status;
+	data->minor_status = res.status.minor_status;
+	if (res.context_handle) {
+		data->out_handle = rctxh.exported_context_token;
+		data->mech_oid.len = rctxh.mech.len;
+		if (rctxh.mech.data)
+			memcpy(data->mech_oid.data, rctxh.mech.data,
+						data->mech_oid.len);
+		client_name = rctxh.src_name.display_name;
+	}
+
+	if (res.options.count == 1) {
+		gssx_buffer *value = &res.options.data[0].value;
+		/* Currently we only decode CREDS_VALUE, if we add
+		 * anything else we'll have to loop and match on the
+		 * option name */
+		if (value->len == 1) {
+			/* steal group info from struct svc_cred */
+			data->creds = *(struct svc_cred *)value->data;
+			data->found_creds = 1;
+		}
+		/* whether we use it or not, free data */
+		kfree(value->data);
+	}
+
+	if (res.options.count != 0) {
+		kfree(res.options.data);
+	}
+
+	/* convert to GSS_NT_HOSTBASED_SERVICE form and set into creds */
+	if (data->found_creds && client_name.data != NULL) {
+		char *c;
+
+		data->creds.cr_principal = kstrndup(client_name.data,
+						client_name.len, GFP_KERNEL);
+		if (data->creds.cr_principal) {
+			/* terminate and remove realm part */
+			c = strchr(data->creds.cr_principal, '@');
+			if (c) {
+				*c = '\0';
+
+				/* change service-hostname delimiter */
+				c = strchr(data->creds.cr_principal, '/');
+				if (c) *c = '@';
+			}
+			if (!c) {
+				/* not a service principal */
+				kfree(data->creds.cr_principal);
+				data->creds.cr_principal = NULL;
+			}
+		}
+	}
+	kfree(client_name.data);
+
+	return ret;
+}
+
+void gssp_free_upcall_data(struct gssp_upcall_data *data)
+{
+	kfree(data->in_handle.data);
+	kfree(data->out_handle.data);
+	kfree(data->out_token.data);
+	free_svc_cred(&data->creds);
+}
+
+/*
+ * Initialization stuff
+ */
+
+static const struct rpc_version gssp_version1 = {
+	.number		= GSSPROXY_VERS_1,
+	.nrprocs	= ARRAY_SIZE(gssp_procedures),
+	.procs		= gssp_procedures,
+};
+
+static const struct rpc_version *gssp_version[] = {
+	NULL,
+	&gssp_version1,
+};
+
+static struct rpc_stat gssp_stats;
+
+static const struct rpc_program gssp_program = {
+	.name		= "gssproxy",
+	.number		= GSSPROXY_PROGRAM,
+	.nrvers		= ARRAY_SIZE(gssp_version),
+	.version	= gssp_version,
+	.stats		= &gssp_stats,
+};
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.h
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.h
@ -0,0 +1,48 @@
+/*
+ *  linux/net/sunrpc/gss_rpc_upcall.h
+ *
+ *  Copyright (C) 2012 Simo Sorce <simo@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _GSS_RPC_UPCALL_H
+#define _GSS_RPC_UPCALL_H
+
+#include <linux/sunrpc/gss_api.h>
+#include <linux/sunrpc/auth_gss.h>
+#include "gss_rpc_xdr.h"
+#include "../netns.h"
+
+struct gssp_upcall_data {
+	struct xdr_netobj in_handle;
+	struct gssp_in_token in_token;
+	struct xdr_netobj out_handle;
+	struct xdr_netobj out_token;
+	struct rpcsec_gss_oid mech_oid;
+	struct svc_cred creds;
+	int found_creds;
+	int major_status;
+	int minor_status;
+};
+
+int gssp_accept_sec_context_upcall(struct net *net,
+				struct gssp_upcall_data *data);
+void gssp_free_upcall_data(struct gssp_upcall_data *data);
+
+void init_gssp_clnt(struct sunrpc_net *);
+int set_gssp_clnt(struct net *);
+void clear_gssp_clnt(struct sunrpc_net *);
+#endif /* _GSS_RPC_UPCALL_H */
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@ -0,0 +1,839 @@
+/*
+ * GSS Proxy upcall module
+ *
+ *  Copyright (C) 2012 Simo Sorce <simo@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/sunrpc/svcauth.h>
+#include "gss_rpc_xdr.h"
+
+static int gssx_enc_bool(struct xdr_stream *xdr, int v)
+{
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 4);
+	if (unlikely(p == NULL))
+		return -ENOSPC;
+	*p = v ? xdr_one : xdr_zero;
+	return 0;
+}
+
+static int gssx_dec_bool(struct xdr_stream *xdr, u32 *v)
+{
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		return -ENOSPC;
+	*v = be32_to_cpu(*p);
+	return 0;
+}
+
+static int gssx_enc_buffer(struct xdr_stream *xdr,
+			   gssx_buffer *buf)
+{
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, sizeof(u32) + buf->len);
+	if (!p)
+		return -ENOSPC;
+	xdr_encode_opaque(p, buf->data, buf->len);
+	return 0;
+}
+
+static int gssx_enc_in_token(struct xdr_stream *xdr,
+			     struct gssp_in_token *in)
+{
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
+		return -ENOSPC;
+	*p = cpu_to_be32(in->page_len);
+
+	/* all we need to do is to write pages */
+	xdr_write_pages(xdr, in->pages, in->page_base, in->page_len);
+
+	return 0;
+}
+
+
+static int gssx_dec_buffer(struct xdr_stream *xdr,
+			   gssx_buffer *buf)
+{
+	u32 length;
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		return -ENOSPC;
+
+	length = be32_to_cpup(p);
+	p = xdr_inline_decode(xdr, length);
+	if (unlikely(p == NULL))
+		return -ENOSPC;
+
+	if (buf->len == 0) {
+		/* we intentionally are not interested in this buffer */
+		return 0;
+	}
+	if (length > buf->len)
+		return -ENOSPC;
+
+	if (!buf->data) {
+		buf->data = kmemdup(p, length, GFP_KERNEL);
+		if (!buf->data)
+			return -ENOMEM;
+	} else {
+		memcpy(buf->data, p, length);
+	}
+	buf->len = length;
+	return 0;
+}
+
+static int gssx_enc_option(struct xdr_stream *xdr,
+			   struct gssx_option *opt)
+{
+	int err;
+
+	err = gssx_enc_buffer(xdr, &opt->option);
+	if (err)
+		return err;
+	err = gssx_enc_buffer(xdr, &opt->value);
+	return err;
+}
+
+static int gssx_dec_option(struct xdr_stream *xdr,
+			   struct gssx_option *opt)
+{
+	int err;
+
+	err = gssx_dec_buffer(xdr, &opt->option);
+	if (err)
+		return err;
+	err = gssx_dec_buffer(xdr, &opt->value);
+	return err;
+}
+
+static int dummy_enc_opt_array(struct xdr_stream *xdr,
+				struct gssx_option_array *oa)
+{
+	__be32 *p;
+
+	if (oa->count != 0)
+		return -EINVAL;
+
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
+		return -ENOSPC;
+	*p = 0;
+
+	return 0;
+}
+
+static int dummy_dec_opt_array(struct xdr_stream *xdr,
+				struct gssx_option_array *oa)
+{
+	struct gssx_option dummy;
+	u32 count, i;
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		return -ENOSPC;
+	count = be32_to_cpup(p++);
+	memset(&dummy, 0, sizeof(dummy));
+	for (i = 0; i < count; i++) {
+		gssx_dec_option(xdr, &dummy);
+	}
+
+	oa->count = 0;
+	oa->data = NULL;
+	return 0;
+}
+
+static int get_host_u32(struct xdr_stream *xdr, u32 *res)
+{
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (!p)
+		return -EINVAL;
+	/* Contents of linux creds are all host-endian: */
+	memcpy(res, p, sizeof(u32));
+	return 0;
+}
+
+static int gssx_dec_linux_creds(struct xdr_stream *xdr,
+				struct svc_cred *creds)
+{
+	u32 length;
+	__be32 *p;
+	u32 tmp;
+	u32 N;
+	int i, err;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		return -ENOSPC;
+
+	length = be32_to_cpup(p);
+
+	if (length > (3 + NGROUPS_MAX) * sizeof(u32))
+		return -ENOSPC;
+
+	/* uid */
+	err = get_host_u32(xdr, &tmp);
+	if (err)
+		return err;
+	creds->cr_uid = make_kuid(&init_user_ns, tmp);
+
+	/* gid */
+	err = get_host_u32(xdr, &tmp);
+	if (err)
+		return err;
+	creds->cr_gid = make_kgid(&init_user_ns, tmp);
+
+	/* number of additional gid's */
+	err = get_host_u32(xdr, &tmp);
+	if (err)
+		return err;
+	N = tmp;
+	if ((3 + N) * sizeof(u32) != length)
+		return -EINVAL;
+	creds->cr_group_info = groups_alloc(N);
+	if (creds->cr_group_info == NULL)
+		return -ENOMEM;
+
+	/* gid's */
+	for (i = 0; i < N; i++) {
+		kgid_t kgid;
+		err = get_host_u32(xdr, &tmp);
+		if (err)
+			goto out_free_groups;
+		err = -EINVAL;
+		kgid = make_kgid(&init_user_ns, tmp);
+		if (!gid_valid(kgid))
+			goto out_free_groups;
+		GROUP_AT(creds->cr_group_info, i) = kgid;
+	}
+
+	return 0;
+out_free_groups:
+	groups_free(creds->cr_group_info);
+	return err;
+}
+
+static int gssx_dec_option_array(struct xdr_stream *xdr,
+				 struct gssx_option_array *oa)
+{
+	struct svc_cred *creds;
+	u32 count, i;
+	__be32 *p;
+	int err;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		return -ENOSPC;
+	count = be32_to_cpup(p++);
+	if (!count)
+		return 0;
+
+	/* we recognize only 1 currently: CREDS_VALUE */
+	oa->count = 1;
+
+	oa->data = kmalloc(sizeof(struct gssx_option), GFP_KERNEL);
+	if (!oa->data)
+		return -ENOMEM;
+
+	creds = kmalloc(sizeof(struct svc_cred), GFP_KERNEL);
+	if (!creds) {
+		kfree(oa->data);
+		return -ENOMEM;
+	}
+
+	oa->data[0].option.data = CREDS_VALUE;
+	oa->data[0].option.len = sizeof(CREDS_VALUE);
+	oa->data[0].value.data = (void *)creds;
+	oa->data[0].value.len = 0;
+
+	for (i = 0; i < count; i++) {
+		gssx_buffer dummy = { 0, NULL };
+		u32 length;
+
+		/* option buffer */
+		p = xdr_inline_decode(xdr, 4);
+		if (unlikely(p == NULL))
+			return -ENOSPC;
+
+		length = be32_to_cpup(p);
+		p = xdr_inline_decode(xdr, length);
+		if (unlikely(p == NULL))
+			return -ENOSPC;
+
+		if (length == sizeof(CREDS_VALUE) &&
+		    memcmp(p, CREDS_VALUE, sizeof(CREDS_VALUE)) == 0) {
+			/* We have creds here. parse them */
+			err = gssx_dec_linux_creds(xdr, creds);
+			if (err)
+				return err;
+			oa->data[0].value.len = 1; /* presence */
+		} else {
+			/* consume uninteresting buffer */
+			err = gssx_dec_buffer(xdr, &dummy);
+			if (err)
+				return err;
+		}
+	}
+	return 0;
+}
+
+static int gssx_dec_status(struct xdr_stream *xdr,
+			   struct gssx_status *status)
+{
+	__be32 *p;
+	int err;
+
+	/* status->major_status */
+	p = xdr_inline_decode(xdr, 8);
+	if (unlikely(p == NULL))
+		return -ENOSPC;
+	p = xdr_decode_hyper(p, &status->major_status);
+
+	/* status->mech */
+	err = gssx_dec_buffer(xdr, &status->mech);
+	if (err)
+		return err;
+
+	/* status->minor_status */
+	p = xdr_inline_decode(xdr, 8);
+	if (unlikely(p == NULL))
+		return -ENOSPC;
+	p = xdr_decode_hyper(p, &status->minor_status);
+
+	/* status->major_status_string */
+	err = gssx_dec_buffer(xdr, &status->major_status_string);
+	if (err)
+		return err;
+
+	/* status->minor_status_string */
+	err = gssx_dec_buffer(xdr, &status->minor_status_string);
+	if (err)
+		return err;
+
+	/* status->server_ctx */
+	err = gssx_dec_buffer(xdr, &status->server_ctx);
+	if (err)
+		return err;
+
+	/* we assume we have no options for now, so simply consume them */
+	/* status->options */
+	err = dummy_dec_opt_array(xdr, &status->options);
+
+	return err;
+}
+
+static int gssx_enc_call_ctx(struct xdr_stream *xdr,
+			     struct gssx_call_ctx *ctx)
+{
+	struct gssx_option opt;
+	__be32 *p;
+	int err;
+
+	/* ctx->locale */
+	err = gssx_enc_buffer(xdr, &ctx->locale);
+	if (err)
+		return err;
+
+	/* ctx->server_ctx */
+	err = gssx_enc_buffer(xdr, &ctx->server_ctx);
+	if (err)
+		return err;
+
+	/* we always want to ask for lucid contexts */
+	/* ctx->options */
+	p = xdr_reserve_space(xdr, 4);
+	*p = cpu_to_be32(2);
+
+	/* we want a lucid_v1 context */
+	opt.option.data = LUCID_OPTION;
+	opt.option.len = sizeof(LUCID_OPTION);
+	opt.value.data = LUCID_VALUE;
+	opt.value.len = sizeof(LUCID_VALUE);
+	err = gssx_enc_option(xdr, &opt);
+
+	/* ..and user creds */
+	opt.option.data = CREDS_OPTION;
+	opt.option.len = sizeof(CREDS_OPTION);
+	opt.value.data = CREDS_VALUE;
+	opt.value.len = sizeof(CREDS_VALUE);
+	err = gssx_enc_option(xdr, &opt);
+
+	return err;
+}
+
+static int gssx_dec_name_attr(struct xdr_stream *xdr,
+			     struct gssx_name_attr *attr)
+{
+	int err;
+
+	/* attr->attr */
+	err = gssx_dec_buffer(xdr, &attr->attr);
+	if (err)
+		return err;
+
+	/* attr->value */
+	err = gssx_dec_buffer(xdr, &attr->value);
+	if (err)
+		return err;
+
+	/* attr->extensions */
+	err = dummy_dec_opt_array(xdr, &attr->extensions);
+
+	return err;
+}
+
+static int dummy_enc_nameattr_array(struct xdr_stream *xdr,
+				    struct gssx_name_attr_array *naa)
+{
+	__be32 *p;
+
+	if (naa->count != 0)
+		return -EINVAL;
+
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
+		return -ENOSPC;
+	*p = 0;
+
+	return 0;
+}
+
+static int dummy_dec_nameattr_array(struct xdr_stream *xdr,
+				    struct gssx_name_attr_array *naa)
+{
+	struct gssx_name_attr dummy = { .attr = {.len = 0} };
+	u32 count, i;
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		return -ENOSPC;
+	count = be32_to_cpup(p++);
+	for (i = 0; i < count; i++) {
+		gssx_dec_name_attr(xdr, &dummy);
+	}
+
+	naa->count = 0;
+	naa->data = NULL;
+	return 0;
+}
+
+static struct xdr_netobj zero_netobj = {};
+
+static struct gssx_name_attr_array zero_name_attr_array = {};
+
+static struct gssx_option_array zero_option_array = {};
+
+static int gssx_enc_name(struct xdr_stream *xdr,
+			 struct gssx_name *name)
+{
+	int err;
+
+	/* name->display_name */
+	err = gssx_enc_buffer(xdr, &name->display_name);
+	if (err)
+		return err;
+
+	/* name->name_type */
+	err = gssx_enc_buffer(xdr, &zero_netobj);
+	if (err)
+		return err;
+
+	/* name->exported_name */
+	err = gssx_enc_buffer(xdr, &zero_netobj);
+	if (err)
+		return err;
+
+	/* name->exported_composite_name */
+	err = gssx_enc_buffer(xdr, &zero_netobj);
+	if (err)
+		return err;
+
+	/* leave name_attributes empty for now, will add once we have any
+	 * to pass up at all */
+	/* name->name_attributes */
+	err = dummy_enc_nameattr_array(xdr, &zero_name_attr_array);
+	if (err)
+		return err;
+
+	/* leave options empty for now, will add once we have any options
+	 * to pass up at all */
+	/* name->extensions */
+	err = dummy_enc_opt_array(xdr, &zero_option_array);
+
+	return err;
+}
+
+
+static int gssx_dec_name(struct xdr_stream *xdr,
+			 struct gssx_name *name)
+{
+	struct xdr_netobj dummy_netobj = { .len = 0 };
+	struct gssx_name_attr_array dummy_name_attr_array = { .count = 0 };
+	struct gssx_option_array dummy_option_array = { .count = 0 };
+	int err;
+
+	/* name->display_name */
+	err = gssx_dec_buffer(xdr, &name->display_name);
+	if (err)
+		return err;
+
+	/* name->name_type */
+	err = gssx_dec_buffer(xdr, &dummy_netobj);
+	if (err)
+		return err;
+
+	/* name->exported_name */
+	err = gssx_dec_buffer(xdr, &dummy_netobj);
+	if (err)
+		return err;
+
+	/* name->exported_composite_name */
+	err = gssx_dec_buffer(xdr, &dummy_netobj);
+	if (err)
+		return err;
+
+	/* we assume we have no attributes for now, so simply consume them */
+	/* name->name_attributes */
+	err = dummy_dec_nameattr_array(xdr, &dummy_name_attr_array);
+	if (err)
+		return err;
+
+	/* we assume we have no options for now, so simply consume them */
+	/* name->extensions */
+	err = dummy_dec_opt_array(xdr, &dummy_option_array);
+
+	return err;
+}
+
+static int dummy_enc_credel_array(struct xdr_stream *xdr,
+				  struct gssx_cred_element_array *cea)
+{
+	__be32 *p;
+
+	if (cea->count != 0)
+		return -EINVAL;
+
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
+		return -ENOSPC;
+	*p = 0;
+
+	return 0;
+}
+
+static int gssx_enc_cred(struct xdr_stream *xdr,
+			 struct gssx_cred *cred)
+{
+	int err;
+
+	/* cred->desired_name */
+	err = gssx_enc_name(xdr, &cred->desired_name);
+	if (err)
+		return err;
+
+	/* cred->elements */
+	err = dummy_enc_credel_array(xdr, &cred->elements);
+	if (err)
+		return err;
+
+	/* cred->cred_handle_reference */
+	err = gssx_enc_buffer(xdr, &cred->cred_handle_reference);
+	if (err)
+		return err;
+
+	/* cred->needs_release */
+	err = gssx_enc_bool(xdr, cred->needs_release);
+
+	return err;
+}
+
+static int gssx_enc_ctx(struct xdr_stream *xdr,
+			struct gssx_ctx *ctx)
+{
+	__be32 *p;
+	int err;
+
+	/* ctx->exported_context_token */
+	err = gssx_enc_buffer(xdr, &ctx->exported_context_token);
+	if (err)
+		return err;
+
+	/* ctx->state */
+	err = gssx_enc_buffer(xdr, &ctx->state);
+	if (err)
+		return err;
+
+	/* ctx->need_release */
+	err = gssx_enc_bool(xdr, ctx->need_release);
+	if (err)
+		return err;
+
+	/* ctx->mech */
+	err = gssx_enc_buffer(xdr, &ctx->mech);
+	if (err)
+		return err;
+
+	/* ctx->src_name */
+	err = gssx_enc_name(xdr, &ctx->src_name);
+	if (err)
+		return err;
+
+	/* ctx->targ_name */
+	err = gssx_enc_name(xdr, &ctx->targ_name);
+	if (err)
+		return err;
+
+	/* ctx->lifetime */
+	p = xdr_reserve_space(xdr, 8+8);
+	if (!p)
+		return -ENOSPC;
+	p = xdr_encode_hyper(p, ctx->lifetime);
+
+	/* ctx->ctx_flags */
+	p = xdr_encode_hyper(p, ctx->ctx_flags);
+
+	/* ctx->locally_initiated */
+	err = gssx_enc_bool(xdr, ctx->locally_initiated);
+	if (err)
+		return err;
+
+	/* ctx->open */
+	err = gssx_enc_bool(xdr, ctx->open);
+	if (err)
+		return err;
+
+	/* leave options empty for now, will add once we have any options
+	 * to pass up at all */
+	/* ctx->options */
+	err = dummy_enc_opt_array(xdr, &ctx->options);
+
+	return err;
+}
+
+static int gssx_dec_ctx(struct xdr_stream *xdr,
+			struct gssx_ctx *ctx)
+{
+	__be32 *p;
+	int err;
+
+	/* ctx->exported_context_token */
+	err = gssx_dec_buffer(xdr, &ctx->exported_context_token);
+	if (err)
+		return err;
+
+	/* ctx->state */
+	err = gssx_dec_buffer(xdr, &ctx->state);
+	if (err)
+		return err;
+
+	/* ctx->need_release */
+	err = gssx_dec_bool(xdr, &ctx->need_release);
+	if (err)
+		return err;
+
+	/* ctx->mech */
+	err = gssx_dec_buffer(xdr, &ctx->mech);
+	if (err)
+		return err;
+
+	/* ctx->src_name */
+	err = gssx_dec_name(xdr, &ctx->src_name);
+	if (err)
+		return err;
+
+	/* ctx->targ_name */
+	err = gssx_dec_name(xdr, &ctx->targ_name);
+	if (err)
+		return err;
+
+	/* ctx->lifetime */
+	p = xdr_inline_decode(xdr, 8+8);
+	if (unlikely(p == NULL))
+		return -ENOSPC;
+	p = xdr_decode_hyper(p, &ctx->lifetime);
+
+	/* ctx->ctx_flags */
+	p = xdr_decode_hyper(p, &ctx->ctx_flags);
+
+	/* ctx->locally_initiated */
+	err = gssx_dec_bool(xdr, &ctx->locally_initiated);
+	if (err)
+		return err;
+
+	/* ctx->open */
+	err = gssx_dec_bool(xdr, &ctx->open);
+	if (err)
+		return err;
+
+	/* we assume we have no options for now, so simply consume them */
+	/* ctx->options */
+	err = dummy_dec_opt_array(xdr, &ctx->options);
+
+	return err;
+}
+
+static int gssx_enc_cb(struct xdr_stream *xdr, struct gssx_cb *cb)
+{
+	__be32 *p;
+	int err;
+
+	/* cb->initiator_addrtype */
+	p = xdr_reserve_space(xdr, 8);
+	if (!p)
+		return -ENOSPC;
+	p = xdr_encode_hyper(p, cb->initiator_addrtype);
+
+	/* cb->initiator_address */
+	err = gssx_enc_buffer(xdr, &cb->initiator_address);
+	if (err)
+		return err;
+
+	/* cb->acceptor_addrtype */
+	p = xdr_reserve_space(xdr, 8);
+	if (!p)
+		return -ENOSPC;
+	p = xdr_encode_hyper(p, cb->acceptor_addrtype);
+
+	/* cb->acceptor_address */
+	err = gssx_enc_buffer(xdr, &cb->acceptor_address);
+	if (err)
+		return err;
+
+	/* cb->application_data */
+	err = gssx_enc_buffer(xdr, &cb->application_data);
+
+	return err;
+}
+
+void gssx_enc_accept_sec_context(struct rpc_rqst *req,
+				 struct xdr_stream *xdr,
+				 struct gssx_arg_accept_sec_context *arg)
+{
+	int err;
+
+	err = gssx_enc_call_ctx(xdr, &arg->call_ctx);
+	if (err)
+		goto done;
+
+	/* arg->context_handle */
+	if (arg->context_handle)
+		err = gssx_enc_ctx(xdr, arg->context_handle);
+	else
+		err = gssx_enc_bool(xdr, 0);
+	if (err)
+		goto done;
+
+	/* arg->cred_handle */
+	if (arg->cred_handle)
+		err = gssx_enc_cred(xdr, arg->cred_handle);
+	else
+		err = gssx_enc_bool(xdr, 0);
+	if (err)
+		goto done;
+
+	/* arg->input_token */
+	err = gssx_enc_in_token(xdr, &arg->input_token);
+	if (err)
+		goto done;
+
+	/* arg->input_cb */
+	if (arg->input_cb)
+		err = gssx_enc_cb(xdr, arg->input_cb);
+	else
+		err = gssx_enc_bool(xdr, 0);
+	if (err)
+		goto done;
+
+	err = gssx_enc_bool(xdr, arg->ret_deleg_cred);
+	if (err)
+		goto done;
+
+	/* leave options empty for now, will add once we have any options
+	 * to pass up at all */
+	/* arg->options */
+	err = dummy_enc_opt_array(xdr, &arg->options);
+
+	xdr_inline_pages(&req->rq_rcv_buf,
+		PAGE_SIZE/2 /* pretty arbitrary */,
+		arg->pages, 0 /* page base */, arg->npages * PAGE_SIZE);
+done:
+	if (err)
+		dprintk("RPC:       gssx_enc_accept_sec_context: %d\n", err);
+}
+
+int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
+				struct xdr_stream *xdr,
+				struct gssx_res_accept_sec_context *res)
+{
+	u32 value_follows;
+	int err;
+
+	/* res->status */
+	err = gssx_dec_status(xdr, &res->status);
+	if (err)
+		return err;
+
+	/* res->context_handle */
+	err = gssx_dec_bool(xdr, &value_follows);
+	if (err)
+		return err;
+	if (value_follows) {
+		err = gssx_dec_ctx(xdr, res->context_handle);
+		if (err)
+			return err;
+	} else {
+		res->context_handle = NULL;
+	}
+
+	/* res->output_token */
+	err = gssx_dec_bool(xdr, &value_follows);
+	if (err)
+		return err;
+	if (value_follows) {
+		err = gssx_dec_buffer(xdr, res->output_token);
+		if (err)
+			return err;
+	} else {
+		res->output_token = NULL;
+	}
+
+	/* res->delegated_cred_handle */
+	err = gssx_dec_bool(xdr, &value_follows);
+	if (err)
+		return err;
+	if (value_follows) {
+		/* we do not support upcall servers sending this data. */
+		return -EINVAL;
+	}
+
+	/* res->options */
+	err = gssx_dec_option_array(xdr, &res->options);
+
+	return err;
+}
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.h
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h
@ -0,0 +1,267 @@
+/*
+ * GSS Proxy upcall module
+ *
+ *  Copyright (C) 2012 Simo Sorce <simo@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _LINUX_GSS_RPC_XDR_H
+#define _LINUX_GSS_RPC_XDR_H
+
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/xprtsock.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_AUTH
+#endif
+
+#define LUCID_OPTION "exported_context_type"
+#define LUCID_VALUE  "linux_lucid_v1"
+#define CREDS_OPTION "exported_creds_type"
+#define CREDS_VALUE  "linux_creds_v1"
+
+typedef struct xdr_netobj gssx_buffer;
+typedef struct xdr_netobj utf8string;
+typedef struct xdr_netobj gssx_OID;
+
+enum gssx_cred_usage {
+	GSSX_C_INITIATE = 1,
+	GSSX_C_ACCEPT = 2,
+	GSSX_C_BOTH = 3,
+};
+
+struct gssx_option {
+	gssx_buffer option;
+	gssx_buffer value;
+};
+
+struct gssx_option_array {
+	u32 count;
+	struct gssx_option *data;
+};
+
+struct gssx_status {
+	u64 major_status;
+	gssx_OID mech;
+	u64 minor_status;
+	utf8string major_status_string;
+	utf8string minor_status_string;
+	gssx_buffer server_ctx;
+	struct gssx_option_array options;
+};
+
+struct gssx_call_ctx {
+	utf8string locale;
+	gssx_buffer server_ctx;
+	struct gssx_option_array options;
+};
+
+struct gssx_name_attr {
+	gssx_buffer attr;
+	gssx_buffer value;
+	struct gssx_option_array extensions;
+};
+
+struct gssx_name_attr_array {
+	u32 count;
+	struct gssx_name_attr *data;
+};
+
+struct gssx_name {
+	gssx_buffer display_name;
+};
+typedef struct gssx_name gssx_name;
+
+struct gssx_cred_element {
+	gssx_name MN;
+	gssx_OID mech;
+	u32 cred_usage;
+	u64 initiator_time_rec;
+	u64 acceptor_time_rec;
+	struct gssx_option_array options;
+};
+
+struct gssx_cred_element_array {
+	u32 count;
+	struct gssx_cred_element *data;
+};
+
+struct gssx_cred {
+	gssx_name desired_name;
+	struct gssx_cred_element_array elements;
+	gssx_buffer cred_handle_reference;
+	u32 needs_release;
+};
+
+struct gssx_ctx {
+	gssx_buffer exported_context_token;
+	gssx_buffer state;
+	u32 need_release;
+	gssx_OID mech;
+	gssx_name src_name;
+	gssx_name targ_name;
+	u64 lifetime;
+	u64 ctx_flags;
+	u32 locally_initiated;
+	u32 open;
+	struct gssx_option_array options;
+};
+
+struct gssx_cb {
+	u64 initiator_addrtype;
+	gssx_buffer initiator_address;
+	u64 acceptor_addrtype;
+	gssx_buffer acceptor_address;
+	gssx_buffer application_data;
+};
+
+
+/* This structure is not defined in the protocol.
+ * It is used in the kernel to carry around a big buffer
+ * as a set of pages */
+struct gssp_in_token {
+	struct page **pages;	/* Array of contiguous pages */
+	unsigned int page_base;	/* Start of page data */
+	unsigned int page_len;	/* Length of page data */
+};
+
+struct gssx_arg_accept_sec_context {
+	struct gssx_call_ctx call_ctx;
+	struct gssx_ctx *context_handle;
+	struct gssx_cred *cred_handle;
+	struct gssp_in_token input_token;
+	struct gssx_cb *input_cb;
+	u32 ret_deleg_cred;
+	struct gssx_option_array options;
+	struct page **pages;
+	unsigned int npages;
+};
+
+struct gssx_res_accept_sec_context {
+	struct gssx_status status;
+	struct gssx_ctx *context_handle;
+	gssx_buffer *output_token;
+	/* struct gssx_cred *delegated_cred_handle; not used in kernel */
+	struct gssx_option_array options;
+};
+
+
+
+#define gssx_enc_indicate_mechs NULL
+#define gssx_dec_indicate_mechs NULL
+#define gssx_enc_get_call_context NULL
+#define gssx_dec_get_call_context NULL
+#define gssx_enc_import_and_canon_name NULL
+#define gssx_dec_import_and_canon_name NULL
+#define gssx_enc_export_cred NULL
+#define gssx_dec_export_cred NULL
+#define gssx_enc_import_cred NULL
+#define gssx_dec_import_cred NULL
+#define gssx_enc_acquire_cred NULL
+#define gssx_dec_acquire_cred NULL
+#define gssx_enc_store_cred NULL
+#define gssx_dec_store_cred NULL
+#define gssx_enc_init_sec_context NULL
+#define gssx_dec_init_sec_context NULL
+void gssx_enc_accept_sec_context(struct rpc_rqst *req,
+				 struct xdr_stream *xdr,
+				 struct gssx_arg_accept_sec_context *args);
+int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
+				struct xdr_stream *xdr,
+				struct gssx_res_accept_sec_context *res);
+#define gssx_enc_release_handle NULL
+#define gssx_dec_release_handle NULL
+#define gssx_enc_get_mic NULL
+#define gssx_dec_get_mic NULL
+#define gssx_enc_verify NULL
+#define gssx_dec_verify NULL
+#define gssx_enc_wrap NULL
+#define gssx_dec_wrap NULL
+#define gssx_enc_unwrap NULL
+#define gssx_dec_unwrap NULL
+#define gssx_enc_wrap_size_limit NULL
+#define gssx_dec_wrap_size_limit NULL
+
+/* non implemented calls are set to 0 size */
+#define GSSX_ARG_indicate_mechs_sz 0
+#define GSSX_RES_indicate_mechs_sz 0
+#define GSSX_ARG_get_call_context_sz 0
+#define GSSX_RES_get_call_context_sz 0
+#define GSSX_ARG_import_and_canon_name_sz 0
+#define GSSX_RES_import_and_canon_name_sz 0
+#define GSSX_ARG_export_cred_sz 0
+#define GSSX_RES_export_cred_sz 0
+#define GSSX_ARG_import_cred_sz 0
+#define GSSX_RES_import_cred_sz 0
+#define GSSX_ARG_acquire_cred_sz 0
+#define GSSX_RES_acquire_cred_sz 0
+#define GSSX_ARG_store_cred_sz 0
+#define GSSX_RES_store_cred_sz 0
+#define GSSX_ARG_init_sec_context_sz 0
+#define GSSX_RES_init_sec_context_sz 0
+
+#define GSSX_default_in_call_ctx_sz (4 + 4 + 4 + \
+			8 + sizeof(LUCID_OPTION) + sizeof(LUCID_VALUE) + \
+			8 + sizeof(CREDS_OPTION) + sizeof(CREDS_VALUE))
+#define GSSX_default_in_ctx_hndl_sz (4 + 4+8 + 4 + 4 + 6*4 + 6*4 + 8 + 8 + \
+					4 + 4 + 4)
+#define GSSX_default_in_cred_sz 4 /* we send in no cred_handle */
+#define GSSX_default_in_token_sz 4 /* does *not* include token data */
+#define GSSX_default_in_cb_sz 4 /* we do not use channel bindings */
+#define GSSX_ARG_accept_sec_context_sz (GSSX_default_in_call_ctx_sz + \
+					GSSX_default_in_ctx_hndl_sz + \
+					GSSX_default_in_cred_sz + \
+					GSSX_default_in_token_sz + \
+					GSSX_default_in_cb_sz + \
+					4 /* no deleg creds boolean */ + \
+					4) /* empty options */
+
+/* somewhat arbitrary numbers but large enough (we ignore some of the data
+ * sent down, but it is part of the protocol so we need enough space to take
+ * it in) */
+#define GSSX_default_status_sz 8 + 24 + 8 + 256 + 256 + 16 + 4
+#define GSSX_max_output_handle_sz 128
+#define GSSX_max_oid_sz 16
+#define GSSX_max_princ_sz 256
+#define GSSX_default_ctx_sz (GSSX_max_output_handle_sz + \
+			     16 + 4 + GSSX_max_oid_sz + \
+			     2 * GSSX_max_princ_sz + \
+			     8 + 8 + 4 + 4 + 4)
+#define GSSX_max_output_token_sz 1024
+/* grouplist not included; we allocate separate pages for that: */
+#define GSSX_max_creds_sz (4 + 4 + 4 /* + NGROUPS_MAX*4 */)
+#define GSSX_RES_accept_sec_context_sz (GSSX_default_status_sz + \
+					GSSX_default_ctx_sz + \
+					GSSX_max_output_token_sz + \
+					4 + GSSX_max_creds_sz)
+
+#define GSSX_ARG_release_handle_sz 0
+#define GSSX_RES_release_handle_sz 0
+#define GSSX_ARG_get_mic_sz 0
+#define GSSX_RES_get_mic_sz 0
+#define GSSX_ARG_verify_sz 0
+#define GSSX_RES_verify_sz 0
+#define GSSX_ARG_wrap_sz 0
+#define GSSX_RES_wrap_sz 0
+#define GSSX_ARG_unwrap_sz 0
+#define GSSX_RES_unwrap_sz 0
+#define GSSX_ARG_wrap_size_limit_sz 0
+#define GSSX_RES_wrap_size_limit_sz 0
+
+
+
+#endif /* _LINUX_GSS_RPC_XDR_H */
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@ -0,0 +1,144 @@
+/*
+ * linux/net/sunrpc/auth_null.c
+ *
+ * AUTH_NULL authentication. Really :-)
+ *
+ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/sunrpc/clnt.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_AUTH
+#endif
+
+static struct rpc_auth null_auth;
+static struct rpc_cred null_cred;
+
+static struct rpc_auth *
+nul_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
+{
+	atomic_inc(&null_auth.au_count);
+	return &null_auth;
+}
+
+static void
+nul_destroy(struct rpc_auth *auth)
+{
+}
+
+/*
+ * Lookup NULL creds for current process
+ */
+static struct rpc_cred *
+nul_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
+{
+	if (flags & RPCAUTH_LOOKUP_RCU)
+		return &null_cred;
+	return get_rpccred(&null_cred);
+}
+
+/*
+ * Destroy cred handle.
+ */
+static void
+nul_destroy_cred(struct rpc_cred *cred)
+{
+}
+
+/*
+ * Match cred handle against current process
+ */
+static int
+nul_match(struct auth_cred *acred, struct rpc_cred *cred, int taskflags)
+{
+	return 1;
+}
+
+/*
+ * Marshal credential.
+ */
+static __be32 *
+nul_marshal(struct rpc_task *task, __be32 *p)
+{
+	*p++ = htonl(RPC_AUTH_NULL);
+	*p++ = 0;
+	*p++ = htonl(RPC_AUTH_NULL);
+	*p++ = 0;
+
+	return p;
+}
+
+/*
+ * Refresh credential. This is a no-op for AUTH_NULL
+ */
+static int
+nul_refresh(struct rpc_task *task)
+{
+	set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags);
+	return 0;
+}
+
+static __be32 *
+nul_validate(struct rpc_task *task, __be32 *p)
+{
+	rpc_authflavor_t	flavor;
+	u32			size;
+
+	flavor = ntohl(*p++);
+	if (flavor != RPC_AUTH_NULL) {
+		printk("RPC: bad verf flavor: %u\n", flavor);
+		return ERR_PTR(-EIO);
+	}
+
+	size = ntohl(*p++);
+	if (size != 0) {
+		printk("RPC: bad verf size: %u\n", size);
+		return ERR_PTR(-EIO);
+	}
+
+	return p;
+}
+
+const struct rpc_authops authnull_ops = {
+	.owner		= THIS_MODULE,
+	.au_flavor	= RPC_AUTH_NULL,
+	.au_name	= "NULL",
+	.create		= nul_create,
+	.destroy	= nul_destroy,
+	.lookup_cred	= nul_lookup_cred,
+};
+
+static
+struct rpc_auth null_auth = {
+	.au_cslack	= 4,
+	.au_rslack	= 2,
+	.au_ops		= &authnull_ops,
+	.au_flavor	= RPC_AUTH_NULL,
+	.au_count	= ATOMIC_INIT(0),
+};
+
+static
+const struct rpc_credops null_credops = {
+	.cr_name	= "AUTH_NULL",
+	.crdestroy	= nul_destroy_cred,
+	.crbind		= rpcauth_generic_bind_cred,
+	.crmatch	= nul_match,
+	.crmarshal	= nul_marshal,
+	.crrefresh	= nul_refresh,
+	.crvalidate	= nul_validate,
+};
+
+static
+struct rpc_cred null_cred = {
+	.cr_lru		= LIST_HEAD_INIT(null_cred.cr_lru),
+	.cr_auth	= &null_auth,
+	.cr_ops		= &null_credops,
+	.cr_count	= ATOMIC_INIT(1),
+	.cr_flags	= 1UL << RPCAUTH_CRED_UPTODATE,
+#ifdef RPC_DEBUG
+	.cr_magic	= RPCAUTH_CRED_MAGIC,
+#endif
+};
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@ -0,0 +1,247 @@
+/*
+ * linux/net/sunrpc/auth_unix.c
+ *
+ * UNIX-style authentication; no AUTH_SHORT support
+ *
+ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/auth.h>
+#include <linux/user_namespace.h>
+
+#define NFS_NGROUPS	16
+
+struct unx_cred {
+	struct rpc_cred		uc_base;
+	kgid_t			uc_gid;
+	kgid_t			uc_gids[NFS_NGROUPS];
+};
+#define uc_uid			uc_base.cr_uid
+
+#define UNX_WRITESLACK		(21 + (UNX_MAXNODENAME >> 2))
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_AUTH
+#endif
+
+static struct rpc_auth		unix_auth;
+static const struct rpc_credops	unix_credops;
+
+static struct rpc_auth *
+unx_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
+{
+	dprintk("RPC:       creating UNIX authenticator for client %p\n",
+			clnt);
+	atomic_inc(&unix_auth.au_count);
+	return &unix_auth;
+}
+
+static void
+unx_destroy(struct rpc_auth *auth)
+{
+	dprintk("RPC:       destroying UNIX authenticator %p\n", auth);
+	rpcauth_clear_credcache(auth->au_credcache);
+}
+
+/*
+ * Lookup AUTH_UNIX creds for current process
+ */
+static struct rpc_cred *
+unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
+{
+	return rpcauth_lookup_credcache(auth, acred, flags);
+}
+
+static struct rpc_cred *
+unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
+{
+	struct unx_cred	*cred;
+	unsigned int groups = 0;
+	unsigned int i;
+
+	dprintk("RPC:       allocating UNIX cred for uid %d gid %d\n",
+			from_kuid(&init_user_ns, acred->uid),
+			from_kgid(&init_user_ns, acred->gid));
+
+	if (!(cred = kmalloc(sizeof(*cred), GFP_NOFS)))
+		return ERR_PTR(-ENOMEM);
+
+	rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops);
+	cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
+
+	if (acred->group_info != NULL)
+		groups = acred->group_info->ngroups;
+	if (groups > NFS_NGROUPS)
+		groups = NFS_NGROUPS;
+
+	cred->uc_gid = acred->gid;
+	for (i = 0; i < groups; i++)
+		cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
+	if (i < NFS_NGROUPS)
+		cred->uc_gids[i] = INVALID_GID;
+
+	return &cred->uc_base;
+}
+
+static void
+unx_free_cred(struct unx_cred *unx_cred)
+{
+	dprintk("RPC:       unx_free_cred %p\n", unx_cred);
+	kfree(unx_cred);
+}
+
+static void
+unx_free_cred_callback(struct rcu_head *head)
+{
+	struct unx_cred *unx_cred = container_of(head, struct unx_cred, uc_base.cr_rcu);
+	unx_free_cred(unx_cred);
+}
+
+static void
+unx_destroy_cred(struct rpc_cred *cred)
+{
+	call_rcu(&cred->cr_rcu, unx_free_cred_callback);
+}
+
+/*
+ * Match credentials against current process creds.
+ * The root_override argument takes care of cases where the caller may
+ * request root creds (e.g. for NFS swapping).
+ */
+static int
+unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags)
+{
+	struct unx_cred	*cred = container_of(rcred, struct unx_cred, uc_base);
+	unsigned int groups = 0;
+	unsigned int i;
+
+
+	if (!uid_eq(cred->uc_uid, acred->uid) || !gid_eq(cred->uc_gid, acred->gid))
+		return 0;
+
+	if (acred->group_info != NULL)
+		groups = acred->group_info->ngroups;
+	if (groups > NFS_NGROUPS)
+		groups = NFS_NGROUPS;
+	for (i = 0; i < groups ; i++)
+		if (!gid_eq(cred->uc_gids[i], GROUP_AT(acred->group_info, i)))
+			return 0;
+	if (groups < NFS_NGROUPS && gid_valid(cred->uc_gids[groups]))
+		return 0;
+	return 1;
+}
+
+/*
+ * Marshal credentials.
+ * Maybe we should keep a cached credential for performance reasons.
+ */
+static __be32 *
+unx_marshal(struct rpc_task *task, __be32 *p)
+{
+	struct rpc_clnt	*clnt = task->tk_client;
+	struct unx_cred	*cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base);
+	__be32		*base, *hold;
+	int		i;
+
+	*p++ = htonl(RPC_AUTH_UNIX);
+	base = p++;
+	*p++ = htonl(jiffies/HZ);
+
+	/*
+	 * Copy the UTS nodename captured when the client was created.
+	 */
+	p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
+
+	*p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid));
+	*p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid));
+	hold = p++;
+	for (i = 0; i < 16 && gid_valid(cred->uc_gids[i]); i++)
+		*p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i]));
+	*hold = htonl(p - hold - 1);		/* gid array length */
+	*base = htonl((p - base - 1) << 2);	/* cred length */
+
+	*p++ = htonl(RPC_AUTH_NULL);
+	*p++ = htonl(0);
+
+	return p;
+}
+
+/*
+ * Refresh credentials. This is a no-op for AUTH_UNIX
+ */
+static int
+unx_refresh(struct rpc_task *task)
+{
+	set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags);
+	return 0;
+}
+
+static __be32 *
+unx_validate(struct rpc_task *task, __be32 *p)
+{
+	rpc_authflavor_t	flavor;
+	u32			size;
+
+	flavor = ntohl(*p++);
+	if (flavor != RPC_AUTH_NULL &&
+	    flavor != RPC_AUTH_UNIX &&
+	    flavor != RPC_AUTH_SHORT) {
+		printk("RPC: bad verf flavor: %u\n", flavor);
+		return ERR_PTR(-EIO);
+	}
+
+	size = ntohl(*p++);
+	if (size > RPC_MAX_AUTH_SIZE) {
+		printk("RPC: giant verf size: %u\n", size);
+		return ERR_PTR(-EIO);
+	}
+	task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2;
+	p += (size >> 2);
+
+	return p;
+}
+
+int __init rpc_init_authunix(void)
+{
+	return rpcauth_init_credcache(&unix_auth);
+}
+
+void rpc_destroy_authunix(void)
+{
+	rpcauth_destroy_credcache(&unix_auth);
+}
+
+const struct rpc_authops authunix_ops = {
+	.owner		= THIS_MODULE,
+	.au_flavor	= RPC_AUTH_UNIX,
+	.au_name	= "UNIX",
+	.create		= unx_create,
+	.destroy	= unx_destroy,
+	.lookup_cred	= unx_lookup_cred,
+	.crcreate	= unx_create_cred,
+};
+
+static
+struct rpc_auth		unix_auth = {
+	.au_cslack	= UNX_WRITESLACK,
+	.au_rslack	= 2,			/* assume AUTH_NULL verf */
+	.au_ops		= &authunix_ops,
+	.au_flavor	= RPC_AUTH_UNIX,
+	.au_count	= ATOMIC_INIT(0),
+};
+
+static
+const struct rpc_credops unix_credops = {
+	.cr_name	= "AUTH_UNIX",
+	.crdestroy	= unx_destroy_cred,
+	.crbind		= rpcauth_generic_bind_cred,
+	.crmatch	= unx_match,
+	.crmarshal	= unx_marshal,
+	.crrefresh	= unx_refresh,
+	.crvalidate	= unx_validate,
+};
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@ -0,0 +1,325 @@
+/******************************************************************************
+
+(c) 2007 Network Appliance, Inc.  All Rights Reserved.
+(c) 2009 NetApp.  All Rights Reserved.
+
+NetApp provides this source code under the GPL v2 License.
+The GPL v2 license is available at
+http://opensource.org/licenses/gpl-license.php.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+
+#include <linux/tcp.h>
+#include <linux/slab.h>
+#include <linux/sunrpc/xprt.h>
+#include <linux/export.h>
+#include <linux/sunrpc/bc_xprt.h>
+
+#ifdef RPC_DEBUG
+#define RPCDBG_FACILITY	RPCDBG_TRANS
+#endif
+
+/*
+ * Helper routines that track the number of preallocation elements
+ * on the transport.
+ */
+static inline int xprt_need_to_requeue(struct rpc_xprt *xprt)
+{
+	return xprt->bc_alloc_count > 0;
+}
+
+static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n)
+{
+	xprt->bc_alloc_count += n;
+}
+
+static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n)
+{
+	return xprt->bc_alloc_count -= n;
+}
+
+/*
+ * Free the preallocated rpc_rqst structure and the memory
+ * buffers hanging off of it.
+ */
+static void xprt_free_allocation(struct rpc_rqst *req)
+{
+	struct xdr_buf *xbufp;
+
+	dprintk("RPC:        free allocations for req= %p\n", req);
+	WARN_ON_ONCE(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
+	xbufp = &req->rq_private_buf;
+	free_page((unsigned long)xbufp->head[0].iov_base);
+	xbufp = &req->rq_snd_buf;
+	free_page((unsigned long)xbufp->head[0].iov_base);
+	kfree(req);
+}
+
+/*
+ * Preallocate up to min_reqs structures and related buffers for use
+ * by the backchannel.  This function can be called multiple times
+ * when creating new sessions that use the same rpc_xprt.  The
+ * preallocated buffers are added to the pool of resources used by
+ * the rpc_xprt.  Anyone of these resources may be used used by an
+ * incoming callback request.  It's up to the higher levels in the
+ * stack to enforce that the maximum number of session slots is not
+ * being exceeded.
+ *
+ * Some callback arguments can be large.  For example, a pNFS server
+ * using multiple deviceids.  The list can be unbound, but the client
+ * has the ability to tell the server the maximum size of the callback
+ * requests.  Each deviceID is 16 bytes, so allocate one page
+ * for the arguments to have enough room to receive a number of these
+ * deviceIDs.  The NFS client indicates to the pNFS server that its
+ * callback requests can be up to 4096 bytes in size.
+ */
+int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
+{
+	struct page *page_rcv = NULL, *page_snd = NULL;
+	struct xdr_buf *xbufp = NULL;
+	struct rpc_rqst *req, *tmp;
+	struct list_head tmp_list;
+	int i;
+
+	dprintk("RPC:       setup backchannel transport\n");
+
+	/*
+	 * We use a temporary list to keep track of the preallocated
+	 * buffers.  Once we're done building the list we splice it
+	 * into the backchannel preallocation list off of the rpc_xprt
+	 * struct.  This helps minimize the amount of time the list
+	 * lock is held on the rpc_xprt struct.  It also makes cleanup
+	 * easier in case of memory allocation errors.
+	 */
+	INIT_LIST_HEAD(&tmp_list);
+	for (i = 0; i < min_reqs; i++) {
+		/* Pre-allocate one backchannel rpc_rqst */
+		req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL);
+		if (req == NULL) {
+			printk(KERN_ERR "Failed to create bc rpc_rqst\n");
+			goto out_free;
+		}
+
+		/* Add the allocated buffer to the tmp list */
+		dprintk("RPC:       adding req= %p\n", req);
+		list_add(&req->rq_bc_pa_list, &tmp_list);
+
+		req->rq_xprt = xprt;
+		INIT_LIST_HEAD(&req->rq_list);
+		INIT_LIST_HEAD(&req->rq_bc_list);
+
+		/* Preallocate one XDR receive buffer */
+		page_rcv = alloc_page(GFP_KERNEL);
+		if (page_rcv == NULL) {
+			printk(KERN_ERR "Failed to create bc receive xbuf\n");
+			goto out_free;
+		}
+		xbufp = &req->rq_rcv_buf;
+		xbufp->head[0].iov_base = page_address(page_rcv);
+		xbufp->head[0].iov_len = PAGE_SIZE;
+		xbufp->tail[0].iov_base = NULL;
+		xbufp->tail[0].iov_len = 0;
+		xbufp->page_len = 0;
+		xbufp->len = PAGE_SIZE;
+		xbufp->buflen = PAGE_SIZE;
+
+		/* Preallocate one XDR send buffer */
+		page_snd = alloc_page(GFP_KERNEL);
+		if (page_snd == NULL) {
+			printk(KERN_ERR "Failed to create bc snd xbuf\n");
+			goto out_free;
+		}
+
+		xbufp = &req->rq_snd_buf;
+		xbufp->head[0].iov_base = page_address(page_snd);
+		xbufp->head[0].iov_len = 0;
+		xbufp->tail[0].iov_base = NULL;
+		xbufp->tail[0].iov_len = 0;
+		xbufp->page_len = 0;
+		xbufp->len = 0;
+		xbufp->buflen = PAGE_SIZE;
+	}
+
+	/*
+	 * Add the temporary list to the backchannel preallocation list
+	 */
+	spin_lock_bh(&xprt->bc_pa_lock);
+	list_splice(&tmp_list, &xprt->bc_pa_list);
+	xprt_inc_alloc_count(xprt, min_reqs);
+	spin_unlock_bh(&xprt->bc_pa_lock);
+
+	dprintk("RPC:       setup backchannel transport done\n");
+	return 0;
+
+out_free:
+	/*
+	 * Memory allocation failed, free the temporary list
+	 */
+	list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list) {
+		list_del(&req->rq_bc_pa_list);
+		xprt_free_allocation(req);
+	}
+
+	dprintk("RPC:       setup backchannel transport failed\n");
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
+
+/**
+ * xprt_destroy_backchannel - Destroys the backchannel preallocated structures.
+ * @xprt:	the transport holding the preallocated strucures
+ * @max_reqs	the maximum number of preallocated structures to destroy
+ *
+ * Since these structures may have been allocated by multiple calls
+ * to xprt_setup_backchannel, we only destroy up to the maximum number
+ * of reqs specified by the caller.
+ */
+void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs)
+{
+	struct rpc_rqst *req = NULL, *tmp = NULL;
+
+	dprintk("RPC:        destroy backchannel transport\n");
+
+	if (max_reqs == 0)
+		goto out;
+
+	spin_lock_bh(&xprt->bc_pa_lock);
+	xprt_dec_alloc_count(xprt, max_reqs);
+	list_for_each_entry_safe(req, tmp, &xprt->bc_pa_list, rq_bc_pa_list) {
+		dprintk("RPC:        req=%p\n", req);
+		list_del(&req->rq_bc_pa_list);
+		xprt_free_allocation(req);
+		if (--max_reqs == 0)
+			break;
+	}
+	spin_unlock_bh(&xprt->bc_pa_lock);
+
+out:
+	dprintk("RPC:        backchannel list empty= %s\n",
+		list_empty(&xprt->bc_pa_list) ? "true" : "false");
+}
+EXPORT_SYMBOL_GPL(xprt_destroy_backchannel);
+
+static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid)
+{
+	struct rpc_rqst *req = NULL;
+
+	dprintk("RPC:       allocate a backchannel request\n");
+	if (list_empty(&xprt->bc_pa_list))
+		goto not_found;
+
+	req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst,
+				rq_bc_pa_list);
+	req->rq_reply_bytes_recvd = 0;
+	req->rq_bytes_sent = 0;
+	memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
+			sizeof(req->rq_private_buf));
+	req->rq_xid = xid;
+	req->rq_connect_cookie = xprt->connect_cookie;
+not_found:
+	dprintk("RPC:       backchannel req=%p\n", req);
+	return req;
+}
+
+/*
+ * Return the preallocated rpc_rqst structure and XDR buffers
+ * associated with this rpc_task.
+ */
+void xprt_free_bc_request(struct rpc_rqst *req)
+{
+	struct rpc_xprt *xprt = req->rq_xprt;
+
+	dprintk("RPC:       free backchannel req=%p\n", req);
+
+	req->rq_connect_cookie = xprt->connect_cookie - 1;
+	smp_mb__before_atomic();
+	WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
+	clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
+	smp_mb__after_atomic();
+
+	if (!xprt_need_to_requeue(xprt)) {
+		/*
+		 * The last remaining session was destroyed while this
+		 * entry was in use.  Free the entry and don't attempt
+		 * to add back to the list because there is no need to
+		 * have anymore preallocated entries.
+		 */
+		dprintk("RPC:       Last session removed req=%p\n", req);
+		xprt_free_allocation(req);
+		return;
+	}
+
+	/*
+	 * Return it to the list of preallocations so that it
+	 * may be reused by a new callback request.
+	 */
+	spin_lock_bh(&xprt->bc_pa_lock);
+	list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
+	spin_unlock_bh(&xprt->bc_pa_lock);
+}
+
+/*
+ * One or more rpc_rqst structure have been preallocated during the
+ * backchannel setup.  Buffer space for the send and private XDR buffers
+ * has been preallocated as well.  Use xprt_alloc_bc_request to allocate
+ * to this request.  Use xprt_free_bc_request to return it.
+ *
+ * We know that we're called in soft interrupt context, grab the spin_lock
+ * since there is no need to grab the bottom half spin_lock.
+ *
+ * Return an available rpc_rqst, otherwise NULL if non are available.
+ */
+struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid)
+{
+	struct rpc_rqst *req;
+
+	spin_lock(&xprt->bc_pa_lock);
+	list_for_each_entry(req, &xprt->bc_pa_list, rq_bc_pa_list) {
+		if (req->rq_connect_cookie != xprt->connect_cookie)
+			continue;
+		if (req->rq_xid == xid)
+			goto found;
+	}
+	req = xprt_alloc_bc_request(xprt, xid);
+found:
+	spin_unlock(&xprt->bc_pa_lock);
+	return req;
+}
+
+/*
+ * Add callback request to callback list.  The callback
+ * service sleeps on the sv_cb_waitq waiting for new
+ * requests.  Wake it up after adding enqueing the
+ * request.
+ */
+void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
+{
+	struct rpc_xprt *xprt = req->rq_xprt;
+	struct svc_serv *bc_serv = xprt->bc_serv;
+
+	spin_lock(&xprt->bc_pa_lock);
+	list_del(&req->rq_bc_pa_list);
+	spin_unlock(&xprt->bc_pa_lock);
+
+	req->rq_private_buf.len = copied;
+	set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
+
+	dprintk("RPC:       add callback request to list\n");
+	spin_lock(&bc_serv->sv_cb_lock);
+	list_add(&req->rq_bc_list, &bc_serv->sv_cb_list);
+	wake_up(&bc_serv->sv_cb_waitq);
+	spin_unlock(&bc_serv->sv_cb_lock);
+}
+
--- a/net/sunrpc/bc_svc.c
+++ b/net/sunrpc/bc_svc.c
@ -0,0 +1,63 @@
+/******************************************************************************
+
+(c) 2007 Network Appliance, Inc.  All Rights Reserved.
+(c) 2009 NetApp.  All Rights Reserved.
+
+NetApp provides this source code under the GPL v2 License.
+The GPL v2 license is available at
+http://opensource.org/licenses/gpl-license.php.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+
+/*
+ * The NFSv4.1 callback service helper routines.
+ * They implement the transport level processing required to send the
+ * reply over an existing open connection previously established by the client.
+ */
+
+#include <linux/module.h>
+
+#include <linux/sunrpc/xprt.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/bc_xprt.h>
+
+#define RPCDBG_FACILITY	RPCDBG_SVCDSP
+
+/* Empty callback ops */
+static const struct rpc_call_ops nfs41_callback_ops = {
+};
+
+
+/*
+ * Send the callback reply
+ */
+int bc_send(struct rpc_rqst *req)
+{
+	struct rpc_task *task;
+	int ret;
+
+	dprintk("RPC:       bc_send req= %p\n", req);
+	task = rpc_run_bc_task(req, &nfs41_callback_ops);
+	if (IS_ERR(task))
+		ret = PTR_ERR(task);
+	else {
+		WARN_ON_ONCE(atomic_read(&task->tk_count) != 1);
+		ret = task->tk_status;
+		rpc_put_task(task);
+	}
+	dprintk("RPC:       bc_send ret= %d\n", ret);
+	return ret;
+}
+
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@ -0,0 +1,42 @@
+#ifndef __SUNRPC_NETNS_H__
+#define __SUNRPC_NETNS_H__
+
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+struct cache_detail;
+
+struct sunrpc_net {
+	struct proc_dir_entry *proc_net_rpc;
+	struct cache_detail *ip_map_cache;
+	struct cache_detail *unix_gid_cache;
+	struct cache_detail *rsc_cache;
+	struct cache_detail *rsi_cache;
+
+	struct super_block *pipefs_sb;
+	struct rpc_pipe *gssd_dummy;
+	struct mutex pipefs_sb_lock;
+
+	struct list_head all_clients;
+	spinlock_t rpc_client_lock;
+
+	struct rpc_clnt *rpcb_local_clnt;
+	struct rpc_clnt *rpcb_local_clnt4;
+	spinlock_t rpcb_clnt_lock;
+	unsigned int rpcb_users;
+	unsigned int rpcb_is_af_local : 1;
+
+	struct mutex gssp_lock;
+	struct rpc_clnt *gssp_clnt;
+	int use_gss_proxy;
+	int pipe_version;
+	atomic_t pipe_users;
+	struct proc_dir_entry *use_gssp_proc;
+};
+
+extern int sunrpc_net_id;
+
+int ip_map_cache_create(struct net *);
+void ip_map_cache_destroy(struct net *);
+
+#endif
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@ -0,0 +1,187 @@
+/*
+ * linux/net/sunrpc/socklib.c
+ *
+ * Common socket helper routines for RPC client and server
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/compiler.h>
+#include <linux/netdevice.h>
+#include <linux/gfp.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <linux/pagemap.h>
+#include <linux/udp.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/export.h>
+
+
+/**
+ * xdr_skb_read_bits - copy some data bits from skb to internal buffer
+ * @desc: sk_buff copy helper
+ * @to: copy destination
+ * @len: number of bytes to copy
+ *
+ * Possibly called several times to iterate over an sk_buff and copy
+ * data out of it.
+ */
+size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
+{
+	if (len > desc->count)
+		len = desc->count;
+	if (unlikely(skb_copy_bits(desc->skb, desc->offset, to, len)))
+		return 0;
+	desc->count -= len;
+	desc->offset += len;
+	return len;
+}
+EXPORT_SYMBOL_GPL(xdr_skb_read_bits);
+
+/**
+ * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer
+ * @desc: sk_buff copy helper
+ * @to: copy destination
+ * @len: number of bytes to copy
+ *
+ * Same as skb_read_bits, but calculate a checksum at the same time.
+ */
+static size_t xdr_skb_read_and_csum_bits(struct xdr_skb_reader *desc, void *to, size_t len)
+{
+	unsigned int pos;
+	__wsum csum2;
+
+	if (len > desc->count)
+		len = desc->count;
+	pos = desc->offset;
+	csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len, 0);
+	desc->csum = csum_block_add(desc->csum, csum2, pos);
+	desc->count -= len;
+	desc->offset += len;
+	return len;
+}
+
+/**
+ * xdr_partial_copy_from_skb - copy data out of an skb
+ * @xdr: target XDR buffer
+ * @base: starting offset
+ * @desc: sk_buff copy helper
+ * @copy_actor: virtual method for copying data
+ *
+ */
+ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor)
+{
+	struct page	**ppage = xdr->pages;
+	unsigned int	len, pglen = xdr->page_len;
+	ssize_t		copied = 0;
+	size_t		ret;
+
+	len = xdr->head[0].iov_len;
+	if (base < len) {
+		len -= base;
+		ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len);
+		copied += ret;
+		if (ret != len || !desc->count)
+			goto out;
+		base = 0;
+	} else
+		base -= len;
+
+	if (unlikely(pglen == 0))
+		goto copy_tail;
+	if (unlikely(base >= pglen)) {
+		base -= pglen;
+		goto copy_tail;
+	}
+	if (base || xdr->page_base) {
+		pglen -= base;
+		base += xdr->page_base;
+		ppage += base >> PAGE_CACHE_SHIFT;
+		base &= ~PAGE_CACHE_MASK;
+	}
+	do {
+		char *kaddr;
+
+		/* ACL likes to be lazy in allocating pages - ACLs
+		 * are small by default but can get huge. */
+		if (unlikely(*ppage == NULL)) {
+			*ppage = alloc_page(GFP_ATOMIC);
+			if (unlikely(*ppage == NULL)) {
+				if (copied == 0)
+					copied = -ENOMEM;
+				goto out;
+			}
+		}
+
+		len = PAGE_CACHE_SIZE;
+		kaddr = kmap_atomic(*ppage);
+		if (base) {
+			len -= base;
+			if (pglen < len)
+				len = pglen;
+			ret = copy_actor(desc, kaddr + base, len);
+			base = 0;
+		} else {
+			if (pglen < len)
+				len = pglen;
+			ret = copy_actor(desc, kaddr, len);
+		}
+		flush_dcache_page(*ppage);
+		kunmap_atomic(kaddr);
+		copied += ret;
+		if (ret != len || !desc->count)
+			goto out;
+		ppage++;
+	} while ((pglen -= len) != 0);
+copy_tail:
+	len = xdr->tail[0].iov_len;
+	if (base < len)
+		copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base);
+out:
+	return copied;
+}
+EXPORT_SYMBOL_GPL(xdr_partial_copy_from_skb);
+
+/**
+ * csum_partial_copy_to_xdr - checksum and copy data
+ * @xdr: target XDR buffer
+ * @skb: source skb
+ *
+ * We have set things up such that we perform the checksum of the UDP
+ * packet in parallel with the copies into the RPC client iovec.  -DaveM
+ */
+int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
+{
+	struct xdr_skb_reader	desc;
+
+	desc.skb = skb;
+	desc.offset = sizeof(struct udphdr);
+	desc.count = skb->len - desc.offset;
+
+	if (skb_csum_unnecessary(skb))
+		goto no_checksum;
+
+	desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
+	if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_and_csum_bits) < 0)
+		return -1;
+	if (desc.offset != skb->len) {
+		__wsum csum2;
+		csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0);
+		desc.csum = csum_block_add(desc.csum, csum2, desc.offset);
+	}
+	if (desc.count)
+		return -1;
+	if (csum_fold(desc.csum))
+		return -1;
+	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+	    !skb->csum_complete_sw)
+		netdev_rx_csum_fault(skb->dev);
+	return 0;
+no_checksum:
+	if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0)
+		return -1;
+	if (desc.count)
+		return -1;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(csum_partial_copy_to_xdr);
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@ -0,0 +1,282 @@
+/*
+ * linux/net/sunrpc/stats.c
+ *
+ * procfs-based user access to generic RPC statistics. The stats files
+ * reside in /proc/net/rpc.
+ *
+ * The read routines assume that the buffer passed in is just big enough.
+ * If you implement an RPC service that has its own stats routine which
+ * appends the generic RPC stats, make sure you don't exceed the PAGE_SIZE
+ * limit.
+ *
+ * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/metrics.h>
+#include <linux/rcupdate.h>
+
+#include "netns.h"
+
+#define RPCDBG_FACILITY	RPCDBG_MISC
+
+/*
+ * Get RPC client stats
+ */
+static int rpc_proc_show(struct seq_file *seq, void *v) {
+	const struct rpc_stat	*statp = seq->private;
+	const struct rpc_program *prog = statp->program;
+	unsigned int i, j;
+
+	seq_printf(seq,
+		"net %u %u %u %u\n",
+			statp->netcnt,
+			statp->netudpcnt,
+			statp->nettcpcnt,
+			statp->nettcpconn);
+	seq_printf(seq,
+		"rpc %u %u %u\n",
+			statp->rpccnt,
+			statp->rpcretrans,
+			statp->rpcauthrefresh);
+
+	for (i = 0; i < prog->nrvers; i++) {
+		const struct rpc_version *vers = prog->version[i];
+		if (!vers)
+			continue;
+		seq_printf(seq, "proc%u %u",
+					vers->number, vers->nrprocs);
+		for (j = 0; j < vers->nrprocs; j++)
+			seq_printf(seq, " %u",
+					vers->procs[j].p_count);
+		seq_putc(seq, '\n');
+	}
+	return 0;
+}
+
+static int rpc_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, rpc_proc_show, PDE_DATA(inode));
+}
+
+static const struct file_operations rpc_proc_fops = {
+	.owner = THIS_MODULE,
+	.open = rpc_proc_open,
+	.read  = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/*
+ * Get RPC server stats
+ */
+void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
+	const struct svc_program *prog = statp->program;
+	const struct svc_procedure *proc;
+	const struct svc_version *vers;
+	unsigned int i, j;
+
+	seq_printf(seq,
+		"net %u %u %u %u\n",
+			statp->netcnt,
+			statp->netudpcnt,
+			statp->nettcpcnt,
+			statp->nettcpconn);
+	seq_printf(seq,
+		"rpc %u %u %u %u %u\n",
+			statp->rpccnt,
+			statp->rpcbadfmt+statp->rpcbadauth+statp->rpcbadclnt,
+			statp->rpcbadfmt,
+			statp->rpcbadauth,
+			statp->rpcbadclnt);
+
+	for (i = 0; i < prog->pg_nvers; i++) {
+		if (!(vers = prog->pg_vers[i]) || !(proc = vers->vs_proc))
+			continue;
+		seq_printf(seq, "proc%d %u", i, vers->vs_nproc);
+		for (j = 0; j < vers->vs_nproc; j++, proc++)
+			seq_printf(seq, " %u", proc->pc_count);
+		seq_putc(seq, '\n');
+	}
+}
+EXPORT_SYMBOL_GPL(svc_seq_show);
+
+/**
+ * rpc_alloc_iostats - allocate an rpc_iostats structure
+ * @clnt: RPC program, version, and xprt
+ *
+ */
+struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt)
+{
+	return kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL);
+}
+EXPORT_SYMBOL_GPL(rpc_alloc_iostats);
+
+/**
+ * rpc_free_iostats - release an rpc_iostats structure
+ * @stats: doomed rpc_iostats structure
+ *
+ */
+void rpc_free_iostats(struct rpc_iostats *stats)
+{
+	kfree(stats);
+}
+EXPORT_SYMBOL_GPL(rpc_free_iostats);
+
+/**
+ * rpc_count_iostats - tally up per-task stats
+ * @task: completed rpc_task
+ * @stats: array of stat structures
+ *
+ * Relies on the caller for serialization.
+ */
+void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct rpc_iostats *op_metrics;
+	ktime_t delta;
+
+	if (!stats || !req)
+		return;
+
+	op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx];
+
+	op_metrics->om_ops++;
+	op_metrics->om_ntrans += req->rq_ntrans;
+	op_metrics->om_timeouts += task->tk_timeouts;
+
+	op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent;
+	op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd;
+
+	delta = ktime_sub(req->rq_xtime, task->tk_start);
+	op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta);
+
+	op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt);
+
+	delta = ktime_sub(ktime_get(), task->tk_start);
+	op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta);
+}
+EXPORT_SYMBOL_GPL(rpc_count_iostats);
+
+static void _print_name(struct seq_file *seq, unsigned int op,
+			struct rpc_procinfo *procs)
+{
+	if (procs[op].p_name)
+		seq_printf(seq, "\t%12s: ", procs[op].p_name);
+	else if (op == 0)
+		seq_printf(seq, "\t        NULL: ");
+	else
+		seq_printf(seq, "\t%12u: ", op);
+}
+
+void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
+{
+	struct rpc_iostats *stats = clnt->cl_metrics;
+	struct rpc_xprt *xprt;
+	unsigned int op, maxproc = clnt->cl_maxproc;
+
+	if (!stats)
+		return;
+
+	seq_printf(seq, "\tRPC iostats version: %s  ", RPC_IOSTATS_VERS);
+	seq_printf(seq, "p/v: %u/%u (%s)\n",
+			clnt->cl_prog, clnt->cl_vers, clnt->cl_program->name);
+
+	rcu_read_lock();
+	xprt = rcu_dereference(clnt->cl_xprt);
+	if (xprt)
+		xprt->ops->print_stats(xprt, seq);
+	rcu_read_unlock();
+
+	seq_printf(seq, "\tper-op statistics\n");
+	for (op = 0; op < maxproc; op++) {
+		struct rpc_iostats *metrics = &stats[op];
+		_print_name(seq, op, clnt->cl_procinfo);
+		seq_printf(seq, "%lu %lu %lu %Lu %Lu %Lu %Lu %Lu\n",
+				metrics->om_ops,
+				metrics->om_ntrans,
+				metrics->om_timeouts,
+				metrics->om_bytes_sent,
+				metrics->om_bytes_recv,
+				ktime_to_ms(metrics->om_queue),
+				ktime_to_ms(metrics->om_rtt),
+				ktime_to_ms(metrics->om_execute));
+	}
+}
+EXPORT_SYMBOL_GPL(rpc_print_iostats);
+
+/*
+ * Register/unregister RPC proc files
+ */
+static inline struct proc_dir_entry *
+do_register(struct net *net, const char *name, void *data,
+	    const struct file_operations *fops)
+{
+	struct sunrpc_net *sn;
+
+	dprintk("RPC:       registering /proc/net/rpc/%s\n", name);
+	sn = net_generic(net, sunrpc_net_id);
+	return proc_create_data(name, 0, sn->proc_net_rpc, fops, data);
+}
+
+struct proc_dir_entry *
+rpc_proc_register(struct net *net, struct rpc_stat *statp)
+{
+	return do_register(net, statp->program->name, statp, &rpc_proc_fops);
+}
+EXPORT_SYMBOL_GPL(rpc_proc_register);
+
+void
+rpc_proc_unregister(struct net *net, const char *name)
+{
+	struct sunrpc_net *sn;
+
+	sn = net_generic(net, sunrpc_net_id);
+	remove_proc_entry(name, sn->proc_net_rpc);
+}
+EXPORT_SYMBOL_GPL(rpc_proc_unregister);
+
+struct proc_dir_entry *
+svc_proc_register(struct net *net, struct svc_stat *statp, const struct file_operations *fops)
+{
+	return do_register(net, statp->program->pg_name, statp, fops);
+}
+EXPORT_SYMBOL_GPL(svc_proc_register);
+
+void
+svc_proc_unregister(struct net *net, const char *name)
+{
+	struct sunrpc_net *sn;
+
+	sn = net_generic(net, sunrpc_net_id);
+	remove_proc_entry(name, sn->proc_net_rpc);
+}
+EXPORT_SYMBOL_GPL(svc_proc_unregister);
+
+int rpc_proc_init(struct net *net)
+{
+	struct sunrpc_net *sn;
+
+	dprintk("RPC:       registering /proc/net/rpc\n");
+	sn = net_generic(net, sunrpc_net_id);
+	sn->proc_net_rpc = proc_mkdir("rpc", net->proc_net);
+	if (sn->proc_net_rpc == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void rpc_proc_exit(struct net *net)
+{
+	dprintk("RPC:       unregistering /proc/net/rpc\n");
+	remove_proc_entry("rpc", net->proc_net);
+}
+
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h
@ -0,0 +1,66 @@
+/******************************************************************************
+
+(c) 2008 NetApp.  All Rights Reserved.
+
+NetApp provides this source code under the GPL v2 License.
+The GPL v2 license is available at
+http://opensource.org/licenses/gpl-license.php.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+
+/*
+ * Functions and macros used internally by RPC
+ */
+
+#ifndef _NET_SUNRPC_SUNRPC_H
+#define _NET_SUNRPC_SUNRPC_H
+
+#include <linux/net.h>
+
+/*
+ * Header for dynamically allocated rpc buffers.
+ */
+struct rpc_buffer {
+	size_t	len;
+	char	data[];
+};
+
+static inline int rpc_reply_expected(struct rpc_task *task)
+{
+	return (task->tk_msg.rpc_proc != NULL) &&
+		(task->tk_msg.rpc_proc->p_decode != NULL);
+}
+
+static inline int sock_is_loopback(struct sock *sk)
+{
+	struct dst_entry *dst;
+	int loopback = 0;
+	rcu_read_lock();
+	dst = rcu_dereference(sk->sk_dst_cache);
+	if (dst && dst->dev &&
+	    (dst->dev->features & NETIF_F_LOOPBACK))
+		loopback = 1;
+	rcu_read_unlock();
+	return loopback;
+}
+
+int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
+		    struct page *headpage, unsigned long headoffset,
+		    struct page *tailpage, unsigned long tailoffset);
+
+int rpc_clients_notifier_register(void);
+void rpc_clients_notifier_unregister(void);
+#endif /* _NET_SUNRPC_SUNRPC_H */
+
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@ -0,0 +1,133 @@
+/*
+ * linux/net/sunrpc/sunrpc_syms.c
+ *
+ * Symbols exported by the sunrpc module.
+ *
+ * Copyright (C) 1997 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/uio.h>
+#include <linux/unistd.h>
+#include <linux/init.h>
+
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/auth.h>
+#include <linux/workqueue.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
+#include <linux/sunrpc/xprtsock.h>
+
+#include "netns.h"
+
+int sunrpc_net_id;
+EXPORT_SYMBOL_GPL(sunrpc_net_id);
+
+static __net_init int sunrpc_init_net(struct net *net)
+{
+	int err;
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+	err = rpc_proc_init(net);
+	if (err)
+		goto err_proc;
+
+	err = ip_map_cache_create(net);
+	if (err)
+		goto err_ipmap;
+
+	err = unix_gid_cache_create(net);
+	if (err)
+		goto err_unixgid;
+
+	err = rpc_pipefs_init_net(net);
+	if (err)
+		goto err_pipefs;
+
+	INIT_LIST_HEAD(&sn->all_clients);
+	spin_lock_init(&sn->rpc_client_lock);
+	spin_lock_init(&sn->rpcb_clnt_lock);
+	return 0;
+
+err_pipefs:
+	unix_gid_cache_destroy(net);
+err_unixgid:
+	ip_map_cache_destroy(net);
+err_ipmap:
+	rpc_proc_exit(net);
+err_proc:
+	return err;
+}
+
+static __net_exit void sunrpc_exit_net(struct net *net)
+{
+	rpc_pipefs_exit_net(net);
+	unix_gid_cache_destroy(net);
+	ip_map_cache_destroy(net);
+	rpc_proc_exit(net);
+}
+
+static struct pernet_operations sunrpc_net_ops = {
+	.init = sunrpc_init_net,
+	.exit = sunrpc_exit_net,
+	.id = &sunrpc_net_id,
+	.size = sizeof(struct sunrpc_net),
+};
+
+static int __init
+init_sunrpc(void)
+{
+	int err = rpc_init_mempool();
+	if (err)
+		goto out;
+	err = rpcauth_init_module();
+	if (err)
+		goto out2;
+
+	cache_initialize();
+
+	err = register_pernet_subsys(&sunrpc_net_ops);
+	if (err)
+		goto out3;
+
+	err = register_rpc_pipefs();
+	if (err)
+		goto out4;
+#ifdef RPC_DEBUG
+	rpc_register_sysctl();
+#endif
+	svc_init_xprt_sock();	/* svc sock transport */
+	init_socket_xprt();	/* clnt sock transport */
+	return 0;
+
+out4:
+	unregister_pernet_subsys(&sunrpc_net_ops);
+out3:
+	rpcauth_remove_module();
+out2:
+	rpc_destroy_mempool();
+out:
+	return err;
+}
+
+static void __exit
+cleanup_sunrpc(void)
+{
+	rpcauth_remove_module();
+	cleanup_socket_xprt();
+	svc_cleanup_xprt_sock();
+	unregister_rpc_pipefs();
+	rpc_destroy_mempool();
+	unregister_pernet_subsys(&sunrpc_net_ops);
+#ifdef RPC_DEBUG
+	rpc_unregister_sysctl();
+#endif
+	rcu_barrier(); /* Wait for completion of call_rcu()'s */
+}
+MODULE_LICENSE("GPL");
+fs_initcall(init_sunrpc); /* Ensure we're initialised before nfs */
+module_exit(cleanup_sunrpc);
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@ -0,0 +1,166 @@
+/*
+ * linux/net/sunrpc/svcauth.c
+ *
+ * The generic interface for RPC authentication on the server side.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ *
+ * CHANGES
+ * 19-Apr-2000 Chris Evans      - Security fix
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/sunrpc/types.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/svcauth.h>
+#include <linux/err.h>
+#include <linux/hash.h>
+
+#define RPCDBG_FACILITY	RPCDBG_AUTH
+
+
+/*
+ * Table of authenticators
+ */
+extern struct auth_ops svcauth_null;
+extern struct auth_ops svcauth_unix;
+
+static DEFINE_SPINLOCK(authtab_lock);
+static struct auth_ops	*authtab[RPC_AUTH_MAXFLAVOR] = {
+	[0] = &svcauth_null,
+	[1] = &svcauth_unix,
+};
+
+int
+svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
+{
+	rpc_authflavor_t	flavor;
+	struct auth_ops		*aops;
+
+	*authp = rpc_auth_ok;
+
+	flavor = svc_getnl(&rqstp->rq_arg.head[0]);
+
+	dprintk("svc: svc_authenticate (%d)\n", flavor);
+
+	spin_lock(&authtab_lock);
+	if (flavor >= RPC_AUTH_MAXFLAVOR || !(aops = authtab[flavor]) ||
+	    !try_module_get(aops->owner)) {
+		spin_unlock(&authtab_lock);
+		*authp = rpc_autherr_badcred;
+		return SVC_DENIED;
+	}
+	spin_unlock(&authtab_lock);
+
+	rqstp->rq_auth_slack = 0;
+
+	rqstp->rq_authop = aops;
+	return aops->accept(rqstp, authp);
+}
+EXPORT_SYMBOL_GPL(svc_authenticate);
+
+int svc_set_client(struct svc_rqst *rqstp)
+{
+	return rqstp->rq_authop->set_client(rqstp);
+}
+EXPORT_SYMBOL_GPL(svc_set_client);
+
+/* A request, which was authenticated, has now executed.
+ * Time to finalise the credentials and verifier
+ * and release and resources
+ */
+int svc_authorise(struct svc_rqst *rqstp)
+{
+	struct auth_ops *aops = rqstp->rq_authop;
+	int rv = 0;
+
+	rqstp->rq_authop = NULL;
+
+	if (aops) {
+		rv = aops->release(rqstp);
+		module_put(aops->owner);
+	}
+	return rv;
+}
+
+int
+svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops)
+{
+	int rv = -EINVAL;
+	spin_lock(&authtab_lock);
+	if (flavor < RPC_AUTH_MAXFLAVOR && authtab[flavor] == NULL) {
+		authtab[flavor] = aops;
+		rv = 0;
+	}
+	spin_unlock(&authtab_lock);
+	return rv;
+}
+EXPORT_SYMBOL_GPL(svc_auth_register);
+
+void
+svc_auth_unregister(rpc_authflavor_t flavor)
+{
+	spin_lock(&authtab_lock);
+	if (flavor < RPC_AUTH_MAXFLAVOR)
+		authtab[flavor] = NULL;
+	spin_unlock(&authtab_lock);
+}
+EXPORT_SYMBOL_GPL(svc_auth_unregister);
+
+/**************************************************
+ * 'auth_domains' are stored in a hash table indexed by name.
+ * When the last reference to an 'auth_domain' is dropped,
+ * the object is unhashed and freed.
+ * If auth_domain_lookup fails to find an entry, it will return
+ * it's second argument 'new'.  If this is non-null, it will
+ * have been atomically linked into the table.
+ */
+
+#define	DN_HASHBITS	6
+#define	DN_HASHMAX	(1<<DN_HASHBITS)
+
+static struct hlist_head	auth_domain_table[DN_HASHMAX];
+static spinlock_t	auth_domain_lock =
+	__SPIN_LOCK_UNLOCKED(auth_domain_lock);
+
+void auth_domain_put(struct auth_domain *dom)
+{
+	if (atomic_dec_and_lock(&dom->ref.refcount, &auth_domain_lock)) {
+		hlist_del(&dom->hash);
+		dom->flavour->domain_release(dom);
+		spin_unlock(&auth_domain_lock);
+	}
+}
+EXPORT_SYMBOL_GPL(auth_domain_put);
+
+struct auth_domain *
+auth_domain_lookup(char *name, struct auth_domain *new)
+{
+	struct auth_domain *hp;
+	struct hlist_head *head;
+
+	head = &auth_domain_table[hash_str(name, DN_HASHBITS)];
+
+	spin_lock(&auth_domain_lock);
+
+	hlist_for_each_entry(hp, head, hash) {
+		if (strcmp(hp->name, name)==0) {
+			kref_get(&hp->ref);
+			spin_unlock(&auth_domain_lock);
+			return hp;
+		}
+	}
+	if (new)
+		hlist_add_head(&new->hash, head);
+	spin_unlock(&auth_domain_lock);
+	return new;
+}
+EXPORT_SYMBOL_GPL(auth_domain_lookup);
+
+struct auth_domain *auth_domain_find(char *name)
+{
+	return auth_domain_lookup(name, NULL);
+}
+EXPORT_SYMBOL_GPL(auth_domain_find);
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@ -0,0 +1,914 @@
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/sunrpc/types.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/svcauth.h>
+#include <linux/sunrpc/gss_api.h>
+#include <linux/sunrpc/addr.h>
+#include <linux/err.h>
+#include <linux/seq_file.h>
+#include <linux/hash.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <net/sock.h>
+#include <net/ipv6.h>
+#include <linux/kernel.h>
+#include <linux/user_namespace.h>
+#define RPCDBG_FACILITY	RPCDBG_AUTH
+
+
+#include "netns.h"
+
+/*
+ * AUTHUNIX and AUTHNULL credentials are both handled here.
+ * AUTHNULL is treated just like AUTHUNIX except that the uid/gid
+ * are always nobody (-2).  i.e. we do the same IP address checks for
+ * AUTHNULL as for AUTHUNIX, and that is done here.
+ */
+
+
+struct unix_domain {
+	struct auth_domain	h;
+	/* other stuff later */
+};
+
+extern struct auth_ops svcauth_null;
+extern struct auth_ops svcauth_unix;
+
+static void svcauth_unix_domain_release(struct auth_domain *dom)
+{
+	struct unix_domain *ud = container_of(dom, struct unix_domain, h);
+
+	kfree(dom->name);
+	kfree(ud);
+}
+
+struct auth_domain *unix_domain_find(char *name)
+{
+	struct auth_domain *rv;
+	struct unix_domain *new = NULL;
+
+	rv = auth_domain_lookup(name, NULL);
+	while(1) {
+		if (rv) {
+			if (new && rv != &new->h)
+				svcauth_unix_domain_release(&new->h);
+
+			if (rv->flavour != &svcauth_unix) {
+				auth_domain_put(rv);
+				return NULL;
+			}
+			return rv;
+		}
+
+		new = kmalloc(sizeof(*new), GFP_KERNEL);
+		if (new == NULL)
+			return NULL;
+		kref_init(&new->h.ref);
+		new->h.name = kstrdup(name, GFP_KERNEL);
+		if (new->h.name == NULL) {
+			kfree(new);
+			return NULL;
+		}
+		new->h.flavour = &svcauth_unix;
+		rv = auth_domain_lookup(name, &new->h);
+	}
+}
+EXPORT_SYMBOL_GPL(unix_domain_find);
+
+
+/**************************************************
+ * cache for IP address to unix_domain
+ * as needed by AUTH_UNIX
+ */
+#define	IP_HASHBITS	8
+#define	IP_HASHMAX	(1<<IP_HASHBITS)
+
+struct ip_map {
+	struct cache_head	h;
+	char			m_class[8]; /* e.g. "nfsd" */
+	struct in6_addr		m_addr;
+	struct unix_domain	*m_client;
+};
+
+static void ip_map_put(struct kref *kref)
+{
+	struct cache_head *item = container_of(kref, struct cache_head, ref);
+	struct ip_map *im = container_of(item, struct ip_map,h);
+
+	if (test_bit(CACHE_VALID, &item->flags) &&
+	    !test_bit(CACHE_NEGATIVE, &item->flags))
+		auth_domain_put(&im->m_client->h);
+	kfree(im);
+}
+
+static inline int hash_ip6(const struct in6_addr *ip)
+{
+	return hash_32(ipv6_addr_hash(ip), IP_HASHBITS);
+}
+static int ip_map_match(struct cache_head *corig, struct cache_head *cnew)
+{
+	struct ip_map *orig = container_of(corig, struct ip_map, h);
+	struct ip_map *new = container_of(cnew, struct ip_map, h);
+	return strcmp(orig->m_class, new->m_class) == 0 &&
+	       ipv6_addr_equal(&orig->m_addr, &new->m_addr);
+}
+static void ip_map_init(struct cache_head *cnew, struct cache_head *citem)
+{
+	struct ip_map *new = container_of(cnew, struct ip_map, h);
+	struct ip_map *item = container_of(citem, struct ip_map, h);
+
+	strcpy(new->m_class, item->m_class);
+	new->m_addr = item->m_addr;
+}
+static void update(struct cache_head *cnew, struct cache_head *citem)
+{
+	struct ip_map *new = container_of(cnew, struct ip_map, h);
+	struct ip_map *item = container_of(citem, struct ip_map, h);
+
+	kref_get(&item->m_client->h.ref);
+	new->m_client = item->m_client;
+}
+static struct cache_head *ip_map_alloc(void)
+{
+	struct ip_map *i = kmalloc(sizeof(*i), GFP_KERNEL);
+	if (i)
+		return &i->h;
+	else
+		return NULL;
+}
+
+static void ip_map_request(struct cache_detail *cd,
+				  struct cache_head *h,
+				  char **bpp, int *blen)
+{
+	char text_addr[40];
+	struct ip_map *im = container_of(h, struct ip_map, h);
+
+	if (ipv6_addr_v4mapped(&(im->m_addr))) {
+		snprintf(text_addr, 20, "%pI4", &im->m_addr.s6_addr32[3]);
+	} else {
+		snprintf(text_addr, 40, "%pI6", &im->m_addr);
+	}
+	qword_add(bpp, blen, im->m_class);
+	qword_add(bpp, blen, text_addr);
+	(*bpp)[-1] = '\n';
+}
+
+static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct in6_addr *addr);
+static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, struct unix_domain *udom, time_t expiry);
+
+static int ip_map_parse(struct cache_detail *cd,
+			  char *mesg, int mlen)
+{
+	/* class ipaddress [domainname] */
+	/* should be safe just to use the start of the input buffer
+	 * for scratch: */
+	char *buf = mesg;
+	int len;
+	char class[8];
+	union {
+		struct sockaddr		sa;
+		struct sockaddr_in	s4;
+		struct sockaddr_in6	s6;
+	} address;
+	struct sockaddr_in6 sin6;
+	int err;
+
+	struct ip_map *ipmp;
+	struct auth_domain *dom;
+	time_t expiry;
+
+	if (mesg[mlen-1] != '\n')
+		return -EINVAL;
+	mesg[mlen-1] = 0;
+
+	/* class */
+	len = qword_get(&mesg, class, sizeof(class));
+	if (len <= 0) return -EINVAL;
+
+	/* ip address */
+	len = qword_get(&mesg, buf, mlen);
+	if (len <= 0) return -EINVAL;
+
+	if (rpc_pton(cd->net, buf, len, &address.sa, sizeof(address)) == 0)
+		return -EINVAL;
+	switch (address.sa.sa_family) {
+	case AF_INET:
+		/* Form a mapped IPv4 address in sin6 */
+		sin6.sin6_family = AF_INET6;
+		ipv6_addr_set_v4mapped(address.s4.sin_addr.s_addr,
+				&sin6.sin6_addr);
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case AF_INET6:
+		memcpy(&sin6, &address.s6, sizeof(sin6));
+		break;
+#endif
+	default:
+		return -EINVAL;
+	}
+
+	expiry = get_expiry(&mesg);
+	if (expiry ==0)
+		return -EINVAL;
+
+	/* domainname, or empty for NEGATIVE */
+	len = qword_get(&mesg, buf, mlen);
+	if (len < 0) return -EINVAL;
+
+	if (len) {
+		dom = unix_domain_find(buf);
+		if (dom == NULL)
+			return -ENOENT;
+	} else
+		dom = NULL;
+
+	/* IPv6 scope IDs are ignored for now */
+	ipmp = __ip_map_lookup(cd, class, &sin6.sin6_addr);
+	if (ipmp) {
+		err = __ip_map_update(cd, ipmp,
+			     container_of(dom, struct unix_domain, h),
+			     expiry);
+	} else
+		err = -ENOMEM;
+
+	if (dom)
+		auth_domain_put(dom);
+
+	cache_flush();
+	return err;
+}
+
+static int ip_map_show(struct seq_file *m,
+		       struct cache_detail *cd,
+		       struct cache_head *h)
+{
+	struct ip_map *im;
+	struct in6_addr addr;
+	char *dom = "-no-domain-";
+
+	if (h == NULL) {
+		seq_puts(m, "#class IP domain\n");
+		return 0;
+	}
+	im = container_of(h, struct ip_map, h);
+	/* class addr domain */
+	addr = im->m_addr;
+
+	if (test_bit(CACHE_VALID, &h->flags) &&
+	    !test_bit(CACHE_NEGATIVE, &h->flags))
+		dom = im->m_client->h.name;
+
+	if (ipv6_addr_v4mapped(&addr)) {
+		seq_printf(m, "%s %pI4 %s\n",
+			im->m_class, &addr.s6_addr32[3], dom);
+	} else {
+		seq_printf(m, "%s %pI6 %s\n", im->m_class, &addr, dom);
+	}
+	return 0;
+}
+
+
+static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class,
+		struct in6_addr *addr)
+{
+	struct ip_map ip;
+	struct cache_head *ch;
+
+	strcpy(ip.m_class, class);
+	ip.m_addr = *addr;
+	ch = sunrpc_cache_lookup(cd, &ip.h,
+				 hash_str(class, IP_HASHBITS) ^
+				 hash_ip6(addr));
+
+	if (ch)
+		return container_of(ch, struct ip_map, h);
+	else
+		return NULL;
+}
+
+static inline struct ip_map *ip_map_lookup(struct net *net, char *class,
+		struct in6_addr *addr)
+{
+	struct sunrpc_net *sn;
+
+	sn = net_generic(net, sunrpc_net_id);
+	return __ip_map_lookup(sn->ip_map_cache, class, addr);
+}
+
+static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm,
+		struct unix_domain *udom, time_t expiry)
+{
+	struct ip_map ip;
+	struct cache_head *ch;
+
+	ip.m_client = udom;
+	ip.h.flags = 0;
+	if (!udom)
+		set_bit(CACHE_NEGATIVE, &ip.h.flags);
+	ip.h.expiry_time = expiry;
+	ch = sunrpc_cache_update(cd, &ip.h, &ipm->h,
+				 hash_str(ipm->m_class, IP_HASHBITS) ^
+				 hash_ip6(&ipm->m_addr));
+	if (!ch)
+		return -ENOMEM;
+	cache_put(ch, cd);
+	return 0;
+}
+
+static inline int ip_map_update(struct net *net, struct ip_map *ipm,
+		struct unix_domain *udom, time_t expiry)
+{
+	struct sunrpc_net *sn;
+
+	sn = net_generic(net, sunrpc_net_id);
+	return __ip_map_update(sn->ip_map_cache, ipm, udom, expiry);
+}
+
+void svcauth_unix_purge(struct net *net)
+{
+	struct sunrpc_net *sn;
+
+	sn = net_generic(net, sunrpc_net_id);
+	cache_purge(sn->ip_map_cache);
+}
+EXPORT_SYMBOL_GPL(svcauth_unix_purge);
+
+static inline struct ip_map *
+ip_map_cached_get(struct svc_xprt *xprt)
+{
+	struct ip_map *ipm = NULL;
+	struct sunrpc_net *sn;
+
+	if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
+		spin_lock(&xprt->xpt_lock);
+		ipm = xprt->xpt_auth_cache;
+		if (ipm != NULL) {
+			sn = net_generic(xprt->xpt_net, sunrpc_net_id);
+			if (cache_is_expired(sn->ip_map_cache, &ipm->h)) {
+				/*
+				 * The entry has been invalidated since it was
+				 * remembered, e.g. by a second mount from the
+				 * same IP address.
+				 */
+				xprt->xpt_auth_cache = NULL;
+				spin_unlock(&xprt->xpt_lock);
+				cache_put(&ipm->h, sn->ip_map_cache);
+				return NULL;
+			}
+			cache_get(&ipm->h);
+		}
+		spin_unlock(&xprt->xpt_lock);
+	}
+	return ipm;
+}
+
+static inline void
+ip_map_cached_put(struct svc_xprt *xprt, struct ip_map *ipm)
+{
+	if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
+		spin_lock(&xprt->xpt_lock);
+		if (xprt->xpt_auth_cache == NULL) {
+			/* newly cached, keep the reference */
+			xprt->xpt_auth_cache = ipm;
+			ipm = NULL;
+		}
+		spin_unlock(&xprt->xpt_lock);
+	}
+	if (ipm) {
+		struct sunrpc_net *sn;
+
+		sn = net_generic(xprt->xpt_net, sunrpc_net_id);
+		cache_put(&ipm->h, sn->ip_map_cache);
+	}
+}
+
+void
+svcauth_unix_info_release(struct svc_xprt *xpt)
+{
+	struct ip_map *ipm;
+
+	ipm = xpt->xpt_auth_cache;
+	if (ipm != NULL) {
+		struct sunrpc_net *sn;
+
+		sn = net_generic(xpt->xpt_net, sunrpc_net_id);
+		cache_put(&ipm->h, sn->ip_map_cache);
+	}
+}
+
+/****************************************************************************
+ * auth.unix.gid cache
+ * simple cache to map a UID to a list of GIDs
+ * because AUTH_UNIX aka AUTH_SYS has a max of 16
+ */
+#define	GID_HASHBITS	8
+#define	GID_HASHMAX	(1<<GID_HASHBITS)
+
+struct unix_gid {
+	struct cache_head	h;
+	kuid_t			uid;
+	struct group_info	*gi;
+};
+
+static int unix_gid_hash(kuid_t uid)
+{
+	return hash_long(from_kuid(&init_user_ns, uid), GID_HASHBITS);
+}
+
+static void unix_gid_put(struct kref *kref)
+{
+	struct cache_head *item = container_of(kref, struct cache_head, ref);
+	struct unix_gid *ug = container_of(item, struct unix_gid, h);
+	if (test_bit(CACHE_VALID, &item->flags) &&
+	    !test_bit(CACHE_NEGATIVE, &item->flags))
+		put_group_info(ug->gi);
+	kfree(ug);
+}
+
+static int unix_gid_match(struct cache_head *corig, struct cache_head *cnew)
+{
+	struct unix_gid *orig = container_of(corig, struct unix_gid, h);
+	struct unix_gid *new = container_of(cnew, struct unix_gid, h);
+	return uid_eq(orig->uid, new->uid);
+}
+static void unix_gid_init(struct cache_head *cnew, struct cache_head *citem)
+{
+	struct unix_gid *new = container_of(cnew, struct unix_gid, h);
+	struct unix_gid *item = container_of(citem, struct unix_gid, h);
+	new->uid = item->uid;
+}
+static void unix_gid_update(struct cache_head *cnew, struct cache_head *citem)
+{
+	struct unix_gid *new = container_of(cnew, struct unix_gid, h);
+	struct unix_gid *item = container_of(citem, struct unix_gid, h);
+
+	get_group_info(item->gi);
+	new->gi = item->gi;
+}
+static struct cache_head *unix_gid_alloc(void)
+{
+	struct unix_gid *g = kmalloc(sizeof(*g), GFP_KERNEL);
+	if (g)
+		return &g->h;
+	else
+		return NULL;
+}
+
+static void unix_gid_request(struct cache_detail *cd,
+			     struct cache_head *h,
+			     char **bpp, int *blen)
+{
+	char tuid[20];
+	struct unix_gid *ug = container_of(h, struct unix_gid, h);
+
+	snprintf(tuid, 20, "%u", from_kuid(&init_user_ns, ug->uid));
+	qword_add(bpp, blen, tuid);
+	(*bpp)[-1] = '\n';
+}
+
+static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, kuid_t uid);
+
+static int unix_gid_parse(struct cache_detail *cd,
+			char *mesg, int mlen)
+{
+	/* uid expiry Ngid gid0 gid1 ... gidN-1 */
+	int id;
+	kuid_t uid;
+	int gids;
+	int rv;
+	int i;
+	int err;
+	time_t expiry;
+	struct unix_gid ug, *ugp;
+
+	if (mesg[mlen - 1] != '\n')
+		return -EINVAL;
+	mesg[mlen-1] = 0;
+
+	rv = get_int(&mesg, &id);
+	if (rv)
+		return -EINVAL;
+	uid = make_kuid(&init_user_ns, id);
+	ug.uid = uid;
+
+	expiry = get_expiry(&mesg);
+	if (expiry == 0)
+		return -EINVAL;
+
+	rv = get_int(&mesg, &gids);
+	if (rv || gids < 0 || gids > 8192)
+		return -EINVAL;
+
+	ug.gi = groups_alloc(gids);
+	if (!ug.gi)
+		return -ENOMEM;
+
+	for (i = 0 ; i < gids ; i++) {
+		int gid;
+		kgid_t kgid;
+		rv = get_int(&mesg, &gid);
+		err = -EINVAL;
+		if (rv)
+			goto out;
+		kgid = make_kgid(&init_user_ns, gid);
+		if (!gid_valid(kgid))
+			goto out;
+		GROUP_AT(ug.gi, i) = kgid;
+	}
+
+	ugp = unix_gid_lookup(cd, uid);
+	if (ugp) {
+		struct cache_head *ch;
+		ug.h.flags = 0;
+		ug.h.expiry_time = expiry;
+		ch = sunrpc_cache_update(cd,
+					 &ug.h, &ugp->h,
+					 unix_gid_hash(uid));
+		if (!ch)
+			err = -ENOMEM;
+		else {
+			err = 0;
+			cache_put(ch, cd);
+		}
+	} else
+		err = -ENOMEM;
+ out:
+	if (ug.gi)
+		put_group_info(ug.gi);
+	return err;
+}
+
+static int unix_gid_show(struct seq_file *m,
+			 struct cache_detail *cd,
+			 struct cache_head *h)
+{
+	struct user_namespace *user_ns = &init_user_ns;
+	struct unix_gid *ug;
+	int i;
+	int glen;
+
+	if (h == NULL) {
+		seq_puts(m, "#uid cnt: gids...\n");
+		return 0;
+	}
+	ug = container_of(h, struct unix_gid, h);
+	if (test_bit(CACHE_VALID, &h->flags) &&
+	    !test_bit(CACHE_NEGATIVE, &h->flags))
+		glen = ug->gi->ngroups;
+	else
+		glen = 0;
+
+	seq_printf(m, "%u %d:", from_kuid_munged(user_ns, ug->uid), glen);
+	for (i = 0; i < glen; i++)
+		seq_printf(m, " %d", from_kgid_munged(user_ns, GROUP_AT(ug->gi, i)));
+	seq_printf(m, "\n");
+	return 0;
+}
+
+static struct cache_detail unix_gid_cache_template = {
+	.owner		= THIS_MODULE,
+	.hash_size	= GID_HASHMAX,
+	.name		= "auth.unix.gid",
+	.cache_put	= unix_gid_put,
+	.cache_request	= unix_gid_request,
+	.cache_parse	= unix_gid_parse,
+	.cache_show	= unix_gid_show,
+	.match		= unix_gid_match,
+	.init		= unix_gid_init,
+	.update		= unix_gid_update,
+	.alloc		= unix_gid_alloc,
+};
+
+int unix_gid_cache_create(struct net *net)
+{
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+	struct cache_detail *cd;
+	int err;
+
+	cd = cache_create_net(&unix_gid_cache_template, net);
+	if (IS_ERR(cd))
+		return PTR_ERR(cd);
+	err = cache_register_net(cd, net);
+	if (err) {
+		cache_destroy_net(cd, net);
+		return err;
+	}
+	sn->unix_gid_cache = cd;
+	return 0;
+}
+
+void unix_gid_cache_destroy(struct net *net)
+{
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+	struct cache_detail *cd = sn->unix_gid_cache;
+
+	sn->unix_gid_cache = NULL;
+	cache_purge(cd);
+	cache_unregister_net(cd, net);
+	cache_destroy_net(cd, net);
+}
+
+static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, kuid_t uid)
+{
+	struct unix_gid ug;
+	struct cache_head *ch;
+
+	ug.uid = uid;
+	ch = sunrpc_cache_lookup(cd, &ug.h, unix_gid_hash(uid));
+	if (ch)
+		return container_of(ch, struct unix_gid, h);
+	else
+		return NULL;
+}
+
+static struct group_info *unix_gid_find(kuid_t uid, struct svc_rqst *rqstp)
+{
+	struct unix_gid *ug;
+	struct group_info *gi;
+	int ret;
+	struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net,
+					    sunrpc_net_id);
+
+	ug = unix_gid_lookup(sn->unix_gid_cache, uid);
+	if (!ug)
+		return ERR_PTR(-EAGAIN);
+	ret = cache_check(sn->unix_gid_cache, &ug->h, &rqstp->rq_chandle);
+	switch (ret) {
+	case -ENOENT:
+		return ERR_PTR(-ENOENT);
+	case -ETIMEDOUT:
+		return ERR_PTR(-ESHUTDOWN);
+	case 0:
+		gi = get_group_info(ug->gi);
+		cache_put(&ug->h, sn->unix_gid_cache);
+		return gi;
+	default:
+		return ERR_PTR(-EAGAIN);
+	}
+}
+
+int
+svcauth_unix_set_client(struct svc_rqst *rqstp)
+{
+	struct sockaddr_in *sin;
+	struct sockaddr_in6 *sin6, sin6_storage;
+	struct ip_map *ipm;
+	struct group_info *gi;
+	struct svc_cred *cred = &rqstp->rq_cred;
+	struct svc_xprt *xprt = rqstp->rq_xprt;
+	struct net *net = xprt->xpt_net;
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+	switch (rqstp->rq_addr.ss_family) {
+	case AF_INET:
+		sin = svc_addr_in(rqstp);
+		sin6 = &sin6_storage;
+		ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &sin6->sin6_addr);
+		break;
+	case AF_INET6:
+		sin6 = svc_addr_in6(rqstp);
+		break;
+	default:
+		BUG();
+	}
+
+	rqstp->rq_client = NULL;
+	if (rqstp->rq_proc == 0)
+		return SVC_OK;
+
+	ipm = ip_map_cached_get(xprt);
+	if (ipm == NULL)
+		ipm = __ip_map_lookup(sn->ip_map_cache, rqstp->rq_server->sv_program->pg_class,
+				    &sin6->sin6_addr);
+
+	if (ipm == NULL)
+		return SVC_DENIED;
+
+	switch (cache_check(sn->ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
+		default:
+			BUG();
+		case -ETIMEDOUT:
+			return SVC_CLOSE;
+		case -EAGAIN:
+			return SVC_DROP;
+		case -ENOENT:
+			return SVC_DENIED;
+		case 0:
+			rqstp->rq_client = &ipm->m_client->h;
+			kref_get(&rqstp->rq_client->ref);
+			ip_map_cached_put(xprt, ipm);
+			break;
+	}
+
+	gi = unix_gid_find(cred->cr_uid, rqstp);
+	switch (PTR_ERR(gi)) {
+	case -EAGAIN:
+		return SVC_DROP;
+	case -ESHUTDOWN:
+		return SVC_CLOSE;
+	case -ENOENT:
+		break;
+	default:
+		put_group_info(cred->cr_group_info);
+		cred->cr_group_info = gi;
+	}
+	return SVC_OK;
+}
+
+EXPORT_SYMBOL_GPL(svcauth_unix_set_client);
+
+static int
+svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
+{
+	struct kvec	*argv = &rqstp->rq_arg.head[0];
+	struct kvec	*resv = &rqstp->rq_res.head[0];
+	struct svc_cred	*cred = &rqstp->rq_cred;
+
+	cred->cr_group_info = NULL;
+	cred->cr_principal = NULL;
+	rqstp->rq_client = NULL;
+
+	if (argv->iov_len < 3*4)
+		return SVC_GARBAGE;
+
+	if (svc_getu32(argv) != 0) {
+		dprintk("svc: bad null cred\n");
+		*authp = rpc_autherr_badcred;
+		return SVC_DENIED;
+	}
+	if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
+		dprintk("svc: bad null verf\n");
+		*authp = rpc_autherr_badverf;
+		return SVC_DENIED;
+	}
+
+	/* Signal that mapping to nobody uid/gid is required */
+	cred->cr_uid = INVALID_UID;
+	cred->cr_gid = INVALID_GID;
+	cred->cr_group_info = groups_alloc(0);
+	if (cred->cr_group_info == NULL)
+		return SVC_CLOSE; /* kmalloc failure - client must retry */
+
+	/* Put NULL verifier */
+	svc_putnl(resv, RPC_AUTH_NULL);
+	svc_putnl(resv, 0);
+
+	rqstp->rq_cred.cr_flavor = RPC_AUTH_NULL;
+	return SVC_OK;
+}
+
+static int
+svcauth_null_release(struct svc_rqst *rqstp)
+{
+	if (rqstp->rq_client)
+		auth_domain_put(rqstp->rq_client);
+	rqstp->rq_client = NULL;
+	if (rqstp->rq_cred.cr_group_info)
+		put_group_info(rqstp->rq_cred.cr_group_info);
+	rqstp->rq_cred.cr_group_info = NULL;
+
+	return 0; /* don't drop */
+}
+
+
+struct auth_ops svcauth_null = {
+	.name		= "null",
+	.owner		= THIS_MODULE,
+	.flavour	= RPC_AUTH_NULL,
+	.accept 	= svcauth_null_accept,
+	.release	= svcauth_null_release,
+	.set_client	= svcauth_unix_set_client,
+};
+
+
+static int
+svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
+{
+	struct kvec	*argv = &rqstp->rq_arg.head[0];
+	struct kvec	*resv = &rqstp->rq_res.head[0];
+	struct svc_cred	*cred = &rqstp->rq_cred;
+	u32		slen, i;
+	int		len   = argv->iov_len;
+
+	cred->cr_group_info = NULL;
+	cred->cr_principal = NULL;
+	rqstp->rq_client = NULL;
+
+	if ((len -= 3*4) < 0)
+		return SVC_GARBAGE;
+
+	svc_getu32(argv);			/* length */
+	svc_getu32(argv);			/* time stamp */
+	slen = XDR_QUADLEN(svc_getnl(argv));	/* machname length */
+	if (slen > 64 || (len -= (slen + 3)*4) < 0)
+		goto badcred;
+	argv->iov_base = (void*)((__be32*)argv->iov_base + slen);	/* skip machname */
+	argv->iov_len -= slen*4;
+	/*
+	 * Note: we skip uid_valid()/gid_valid() checks here for
+	 * backwards compatibility with clients that use -1 id's.
+	 * Instead, -1 uid or gid is later mapped to the
+	 * (export-specific) anonymous id by nfsd_setuser.
+	 * Supplementary gid's will be left alone.
+	 */
+	cred->cr_uid = make_kuid(&init_user_ns, svc_getnl(argv)); /* uid */
+	cred->cr_gid = make_kgid(&init_user_ns, svc_getnl(argv)); /* gid */
+	slen = svc_getnl(argv);			/* gids length */
+	if (slen > 16 || (len -= (slen + 2)*4) < 0)
+		goto badcred;
+	cred->cr_group_info = groups_alloc(slen);
+	if (cred->cr_group_info == NULL)
+		return SVC_CLOSE;
+	for (i = 0; i < slen; i++) {
+		kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv));
+		GROUP_AT(cred->cr_group_info, i) = kgid;
+	}
+	if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
+		*authp = rpc_autherr_badverf;
+		return SVC_DENIED;
+	}
+
+	/* Put NULL verifier */
+	svc_putnl(resv, RPC_AUTH_NULL);
+	svc_putnl(resv, 0);
+
+	rqstp->rq_cred.cr_flavor = RPC_AUTH_UNIX;
+	return SVC_OK;
+
+badcred:
+	*authp = rpc_autherr_badcred;
+	return SVC_DENIED;
+}
+
+static int
+svcauth_unix_release(struct svc_rqst *rqstp)
+{
+	/* Verifier (such as it is) is already in place.
+	 */
+	if (rqstp->rq_client)
+		auth_domain_put(rqstp->rq_client);
+	rqstp->rq_client = NULL;
+	if (rqstp->rq_cred.cr_group_info)
+		put_group_info(rqstp->rq_cred.cr_group_info);
+	rqstp->rq_cred.cr_group_info = NULL;
+
+	return 0;
+}
+
+
+struct auth_ops svcauth_unix = {
+	.name		= "unix",
+	.owner		= THIS_MODULE,
+	.flavour	= RPC_AUTH_UNIX,
+	.accept 	= svcauth_unix_accept,
+	.release	= svcauth_unix_release,
+	.domain_release	= svcauth_unix_domain_release,
+	.set_client	= svcauth_unix_set_client,
+};
+
+static struct cache_detail ip_map_cache_template = {
+	.owner		= THIS_MODULE,
+	.hash_size	= IP_HASHMAX,
+	.name		= "auth.unix.ip",
+	.cache_put	= ip_map_put,
+	.cache_request	= ip_map_request,
+	.cache_parse	= ip_map_parse,
+	.cache_show	= ip_map_show,
+	.match		= ip_map_match,
+	.init		= ip_map_init,
+	.update		= update,
+	.alloc		= ip_map_alloc,
+};
+
+int ip_map_cache_create(struct net *net)
+{
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+	struct cache_detail *cd;
+	int err;
+
+	cd = cache_create_net(&ip_map_cache_template, net);
+	if (IS_ERR(cd))
+		return PTR_ERR(cd);
+	err = cache_register_net(cd, net);
+	if (err) {
+		cache_destroy_net(cd, net);
+		return err;
+	}
+	sn->ip_map_cache = cd;
+	return 0;
+}
+
+void ip_map_cache_destroy(struct net *net)
+{
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+	struct cache_detail *cd = sn->ip_map_cache;
+
+	sn->ip_map_cache = NULL;
+	cache_purge(cd);
+	cache_unregister_net(cd, net);
+	cache_destroy_net(cd, net);
+}
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@ -0,0 +1,185 @@
+/*
+ * linux/net/sunrpc/sysctl.c
+ *
+ * Sysctl interface to sunrpc module.
+ *
+ * I would prefer to register the sunrpc table below sys/net, but that's
+ * impossible at the moment.
+ */
+
+#include <linux/types.h>
+#include <linux/linkage.h>
+#include <linux/ctype.h>
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+#include <linux/module.h>
+
+#include <asm/uaccess.h>
+#include <linux/sunrpc/types.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/svc_xprt.h>
+
+#include "netns.h"
+
+/*
+ * Declare the debug flags here
+ */
+unsigned int	rpc_debug;
+EXPORT_SYMBOL_GPL(rpc_debug);
+
+unsigned int	nfs_debug;
+EXPORT_SYMBOL_GPL(nfs_debug);
+
+unsigned int	nfsd_debug;
+EXPORT_SYMBOL_GPL(nfsd_debug);
+
+unsigned int	nlm_debug;
+EXPORT_SYMBOL_GPL(nlm_debug);
+
+#ifdef RPC_DEBUG
+
+static struct ctl_table_header *sunrpc_table_header;
+static struct ctl_table sunrpc_table[];
+
+void
+rpc_register_sysctl(void)
+{
+	if (!sunrpc_table_header)
+		sunrpc_table_header = register_sysctl_table(sunrpc_table);
+}
+
+void
+rpc_unregister_sysctl(void)
+{
+	if (sunrpc_table_header) {
+		unregister_sysctl_table(sunrpc_table_header);
+		sunrpc_table_header = NULL;
+	}
+}
+
+static int proc_do_xprt(struct ctl_table *table, int write,
+			void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	char tmpbuf[256];
+	size_t len;
+
+	if ((*ppos && !write) || !*lenp) {
+		*lenp = 0;
+		return 0;
+	}
+	len = svc_print_xprts(tmpbuf, sizeof(tmpbuf));
+	return simple_read_from_buffer(buffer, *lenp, ppos, tmpbuf, len);
+}
+
+static int
+proc_dodebug(struct ctl_table *table, int write,
+				void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	char		tmpbuf[20], c, *s;
+	char __user *p;
+	unsigned int	value;
+	size_t		left, len;
+
+	if ((*ppos && !write) || !*lenp) {
+		*lenp = 0;
+		return 0;
+	}
+
+	left = *lenp;
+
+	if (write) {
+		if (!access_ok(VERIFY_READ, buffer, left))
+			return -EFAULT;
+		p = buffer;
+		while (left && __get_user(c, p) >= 0 && isspace(c))
+			left--, p++;
+		if (!left)
+			goto done;
+
+		if (left > sizeof(tmpbuf) - 1)
+			return -EINVAL;
+		if (copy_from_user(tmpbuf, p, left))
+			return -EFAULT;
+		tmpbuf[left] = '\0';
+
+		for (s = tmpbuf, value = 0; '0' <= *s && *s <= '9'; s++, left--)
+			value = 10 * value + (*s - '0');
+		if (*s && !isspace(*s))
+			return -EINVAL;
+		while (left && isspace(*s))
+			left--, s++;
+		*(unsigned int *) table->data = value;
+		/* Display the RPC tasks on writing to rpc_debug */
+		if (strcmp(table->procname, "rpc_debug") == 0)
+			rpc_show_tasks(&init_net);
+	} else {
+		if (!access_ok(VERIFY_WRITE, buffer, left))
+			return -EFAULT;
+		len = sprintf(tmpbuf, "%d", *(unsigned int *) table->data);
+		if (len > left)
+			len = left;
+		if (__copy_to_user(buffer, tmpbuf, len))
+			return -EFAULT;
+		if ((left -= len) > 0) {
+			if (put_user('\n', (char __user *)buffer + len))
+				return -EFAULT;
+			left--;
+		}
+	}
+
+done:
+	*lenp -= left;
+	*ppos += *lenp;
+	return 0;
+}
+
+
+static struct ctl_table debug_table[] = {
+	{
+		.procname	= "rpc_debug",
+		.data		= &rpc_debug,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dodebug
+	},
+	{
+		.procname	= "nfs_debug",
+		.data		= &nfs_debug,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dodebug
+	},
+	{
+		.procname	= "nfsd_debug",
+		.data		= &nfsd_debug,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dodebug
+	},
+	{
+		.procname	= "nlm_debug",
+		.data		= &nlm_debug,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dodebug
+	},
+	{
+		.procname	= "transports",
+		.maxlen		= 256,
+		.mode		= 0444,
+		.proc_handler	= proc_do_xprt,
+	},
+	{ }
+};
+
+static struct ctl_table sunrpc_table[] = {
+	{
+		.procname	= "sunrpc",
+		.mode		= 0555,
+		.child		= debug_table
+	},
+	{ }
+};
+
+#endif
--- a/net/sunrpc/timer.c
+++ b/net/sunrpc/timer.c
@ -0,0 +1,122 @@
+/*
+ * linux/net/sunrpc/timer.c
+ *
+ * Estimate RPC request round trip time.
+ *
+ * Based on packet round-trip and variance estimator algorithms described
+ * in appendix A of "Congestion Avoidance and Control" by Van Jacobson
+ * and Michael J. Karels (ACM Computer Communication Review; Proceedings
+ * of the Sigcomm '88 Symposium in Stanford, CA, August, 1988).
+ *
+ * This RTT estimator is used only for RPC over datagram protocols.
+ *
+ * Copyright (C) 2002 Trond Myklebust <trond.myklebust@fys.uio.no>
+ */
+
+#include <asm/param.h>
+
+#include <linux/types.h>
+#include <linux/unistd.h>
+#include <linux/module.h>
+
+#include <linux/sunrpc/clnt.h>
+
+#define RPC_RTO_MAX (60*HZ)
+#define RPC_RTO_INIT (HZ/5)
+#define RPC_RTO_MIN (HZ/10)
+
+/**
+ * rpc_init_rtt - Initialize an RPC RTT estimator context
+ * @rt: context to initialize
+ * @timeo: initial timeout value, in jiffies
+ *
+ */
+void rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo)
+{
+	unsigned long init = 0;
+	unsigned int i;
+
+	rt->timeo = timeo;
+
+	if (timeo > RPC_RTO_INIT)
+		init = (timeo - RPC_RTO_INIT) << 3;
+	for (i = 0; i < 5; i++) {
+		rt->srtt[i] = init;
+		rt->sdrtt[i] = RPC_RTO_INIT;
+		rt->ntimeouts[i] = 0;
+	}
+}
+EXPORT_SYMBOL_GPL(rpc_init_rtt);
+
+/**
+ * rpc_update_rtt - Update an RPC RTT estimator context
+ * @rt: context to update
+ * @timer: timer array index (request type)
+ * @m: recent actual RTT, in jiffies
+ *
+ * NB: When computing the smoothed RTT and standard deviation,
+ *     be careful not to produce negative intermediate results.
+ */
+void rpc_update_rtt(struct rpc_rtt *rt, unsigned int timer, long m)
+{
+	long *srtt, *sdrtt;
+
+	if (timer-- == 0)
+		return;
+
+	/* jiffies wrapped; ignore this one */
+	if (m < 0)
+		return;
+
+	if (m == 0)
+		m = 1L;
+
+	srtt = (long *)&rt->srtt[timer];
+	m -= *srtt >> 3;
+	*srtt += m;
+
+	if (m < 0)
+		m = -m;
+
+	sdrtt = (long *)&rt->sdrtt[timer];
+	m -= *sdrtt >> 2;
+	*sdrtt += m;
+
+	/* Set lower bound on the variance */
+	if (*sdrtt < RPC_RTO_MIN)
+		*sdrtt = RPC_RTO_MIN;
+}
+EXPORT_SYMBOL_GPL(rpc_update_rtt);
+
+/**
+ * rpc_calc_rto - Provide an estimated timeout value
+ * @rt: context to use for calculation
+ * @timer: timer array index (request type)
+ *
+ * Estimate RTO for an NFS RPC sent via an unreliable datagram.  Use
+ * the mean and mean deviation of RTT for the appropriate type of RPC
+ * for frequently issued RPCs, and a fixed default for the others.
+ *
+ * The justification for doing "other" this way is that these RPCs
+ * happen so infrequently that timer estimation would probably be
+ * stale.  Also, since many of these RPCs are non-idempotent, a
+ * conservative timeout is desired.
+ *
+ * getattr, lookup,
+ * read, write, commit     - A+4D
+ * other                   - timeo
+ */
+unsigned long rpc_calc_rto(struct rpc_rtt *rt, unsigned int timer)
+{
+	unsigned long res;
+
+	if (timer-- == 0)
+		return rt->timeo;
+
+	res = ((rt->srtt[timer] + 7) >> 3) + rt->sdrtt[timer];
+	if (res > RPC_RTO_MAX)
+		res = RPC_RTO_MAX;
+
+	return res;
+}
+EXPORT_SYMBOL_GPL(rpc_calc_rto);
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@ -0,0 +1,8 @@
+obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o
+
+xprtrdma-y := transport.o rpc_rdma.o verbs.o
+
+obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o
+
+svcrdma-y := svc_rdma.o svc_rdma_transport.o \
+	svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@ -0,0 +1,875 @@
+/*
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *      Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ *      Neither the name of the Network Appliance, Inc. nor the names of
+ *      its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written
+ *      permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * rpc_rdma.c
+ *
+ * This file contains the guts of the RPC RDMA protocol, and
+ * does marshaling/unmarshaling, etc. It is also where interfacing
+ * to the Linux RPC framework lives.
+ */
+
+#include "xprt_rdma.h"
+
+#include <linux/highmem.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_TRANS
+#endif
+
+#ifdef RPC_DEBUG
+static const char transfertypes[][12] = {
+	"pure inline",	/* no chunks */
+	" read chunk",	/* some argument via rdma read */
+	"*read chunk",	/* entire request via rdma read */
+	"write chunk",	/* some result via rdma write */
+	"reply chunk"	/* entire reply via rdma write */
+};
+#endif
+
+/*
+ * Chunk assembly from upper layer xdr_buf.
+ *
+ * Prepare the passed-in xdr_buf into representation as RPC/RDMA chunk
+ * elements. Segments are then coalesced when registered, if possible
+ * within the selected memreg mode.
+ *
+ * Returns positive number of segments converted, or a negative errno.
+ */
+
+static int
+rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
+	enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs)
+{
+	int len, n = 0, p;
+	int page_base;
+	struct page **ppages;
+
+	if (pos == 0 && xdrbuf->head[0].iov_len) {
+		seg[n].mr_page = NULL;
+		seg[n].mr_offset = xdrbuf->head[0].iov_base;
+		seg[n].mr_len = xdrbuf->head[0].iov_len;
+		++n;
+	}
+
+	len = xdrbuf->page_len;
+	ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
+	page_base = xdrbuf->page_base & ~PAGE_MASK;
+	p = 0;
+	while (len && n < nsegs) {
+		if (!ppages[p]) {
+			/* alloc the pagelist for receiving buffer */
+			ppages[p] = alloc_page(GFP_ATOMIC);
+			if (!ppages[p])
+				return -ENOMEM;
+		}
+		seg[n].mr_page = ppages[p];
+		seg[n].mr_offset = (void *)(unsigned long) page_base;
+		seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len);
+		if (seg[n].mr_len > PAGE_SIZE)
+			return -EIO;
+		len -= seg[n].mr_len;
+		++n;
+		++p;
+		page_base = 0;	/* page offset only applies to first page */
+	}
+
+	/* Message overflows the seg array */
+	if (len && n == nsegs)
+		return -EIO;
+
+	if (xdrbuf->tail[0].iov_len) {
+		/* the rpcrdma protocol allows us to omit any trailing
+		 * xdr pad bytes, saving the server an RDMA operation. */
+		if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
+			return n;
+		if (n == nsegs)
+			/* Tail remains, but we're out of segments */
+			return -EIO;
+		seg[n].mr_page = NULL;
+		seg[n].mr_offset = xdrbuf->tail[0].iov_base;
+		seg[n].mr_len = xdrbuf->tail[0].iov_len;
+		++n;
+	}
+
+	return n;
+}
+
+/*
+ * Create read/write chunk lists, and reply chunks, for RDMA
+ *
+ *   Assume check against THRESHOLD has been done, and chunks are required.
+ *   Assume only encoding one list entry for read|write chunks. The NFSv3
+ *     protocol is simple enough to allow this as it only has a single "bulk
+ *     result" in each procedure - complicated NFSv4 COMPOUNDs are not. (The
+ *     RDMA/Sessions NFSv4 proposal addresses this for future v4 revs.)
+ *
+ * When used for a single reply chunk (which is a special write
+ * chunk used for the entire reply, rather than just the data), it
+ * is used primarily for READDIR and READLINK which would otherwise
+ * be severely size-limited by a small rdma inline read max. The server
+ * response will come back as an RDMA Write, followed by a message
+ * of type RDMA_NOMSG carrying the xid and length. As a result, reply
+ * chunks do not provide data alignment, however they do not require
+ * "fixup" (moving the response to the upper layer buffer) either.
+ *
+ * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
+ *
+ *  Read chunklist (a linked list):
+ *   N elements, position P (same P for all chunks of same arg!):
+ *    1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0
+ *
+ *  Write chunklist (a list of (one) counted array):
+ *   N elements:
+ *    1 - N - HLOO - HLOO - ... - HLOO - 0
+ *
+ *  Reply chunk (a counted array):
+ *   N elements:
+ *    1 - N - HLOO - HLOO - ... - HLOO
+ *
+ * Returns positive RPC/RDMA header size, or negative errno.
+ */
+
+static ssize_t
+rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
+		struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type)
+{
+	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
+	int n, nsegs, nchunks = 0;
+	unsigned int pos;
+	struct rpcrdma_mr_seg *seg = req->rl_segments;
+	struct rpcrdma_read_chunk *cur_rchunk = NULL;
+	struct rpcrdma_write_array *warray = NULL;
+	struct rpcrdma_write_chunk *cur_wchunk = NULL;
+	__be32 *iptr = headerp->rm_body.rm_chunks;
+
+	if (type == rpcrdma_readch || type == rpcrdma_areadch) {
+		/* a read chunk - server will RDMA Read our memory */
+		cur_rchunk = (struct rpcrdma_read_chunk *) iptr;
+	} else {
+		/* a write or reply chunk - server will RDMA Write our memory */
+		*iptr++ = xdr_zero;	/* encode a NULL read chunk list */
+		if (type == rpcrdma_replych)
+			*iptr++ = xdr_zero;	/* a NULL write chunk list */
+		warray = (struct rpcrdma_write_array *) iptr;
+		cur_wchunk = (struct rpcrdma_write_chunk *) (warray + 1);
+	}
+
+	if (type == rpcrdma_replych || type == rpcrdma_areadch)
+		pos = 0;
+	else
+		pos = target->head[0].iov_len;
+
+	nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS);
+	if (nsegs < 0)
+		return nsegs;
+
+	do {
+		n = rpcrdma_register_external(seg, nsegs,
+						cur_wchunk != NULL, r_xprt);
+		if (n <= 0)
+			goto out;
+		if (cur_rchunk) {	/* read */
+			cur_rchunk->rc_discrim = xdr_one;
+			/* all read chunks have the same "position" */
+			cur_rchunk->rc_position = htonl(pos);
+			cur_rchunk->rc_target.rs_handle = htonl(seg->mr_rkey);
+			cur_rchunk->rc_target.rs_length = htonl(seg->mr_len);
+			xdr_encode_hyper(
+					(__be32 *)&cur_rchunk->rc_target.rs_offset,
+					seg->mr_base);
+			dprintk("RPC:       %s: read chunk "
+				"elem %d@0x%llx:0x%x pos %u (%s)\n", __func__,
+				seg->mr_len, (unsigned long long)seg->mr_base,
+				seg->mr_rkey, pos, n < nsegs ? "more" : "last");
+			cur_rchunk++;
+			r_xprt->rx_stats.read_chunk_count++;
+		} else {		/* write/reply */
+			cur_wchunk->wc_target.rs_handle = htonl(seg->mr_rkey);
+			cur_wchunk->wc_target.rs_length = htonl(seg->mr_len);
+			xdr_encode_hyper(
+					(__be32 *)&cur_wchunk->wc_target.rs_offset,
+					seg->mr_base);
+			dprintk("RPC:       %s: %s chunk "
+				"elem %d@0x%llx:0x%x (%s)\n", __func__,
+				(type == rpcrdma_replych) ? "reply" : "write",
+				seg->mr_len, (unsigned long long)seg->mr_base,
+				seg->mr_rkey, n < nsegs ? "more" : "last");
+			cur_wchunk++;
+			if (type == rpcrdma_replych)
+				r_xprt->rx_stats.reply_chunk_count++;
+			else
+				r_xprt->rx_stats.write_chunk_count++;
+			r_xprt->rx_stats.total_rdma_request += seg->mr_len;
+		}
+		nchunks++;
+		seg   += n;
+		nsegs -= n;
+	} while (nsegs);
+
+	/* success. all failures return above */
+	req->rl_nchunks = nchunks;
+
+	/*
+	 * finish off header. If write, marshal discrim and nchunks.
+	 */
+	if (cur_rchunk) {
+		iptr = (__be32 *) cur_rchunk;
+		*iptr++ = xdr_zero;	/* finish the read chunk list */
+		*iptr++ = xdr_zero;	/* encode a NULL write chunk list */
+		*iptr++ = xdr_zero;	/* encode a NULL reply chunk */
+	} else {
+		warray->wc_discrim = xdr_one;
+		warray->wc_nchunks = htonl(nchunks);
+		iptr = (__be32 *) cur_wchunk;
+		if (type == rpcrdma_writech) {
+			*iptr++ = xdr_zero; /* finish the write chunk list */
+			*iptr++ = xdr_zero; /* encode a NULL reply chunk */
+		}
+	}
+
+	/*
+	 * Return header size.
+	 */
+	return (unsigned char *)iptr - (unsigned char *)headerp;
+
+out:
+	if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR) {
+		for (pos = 0; nchunks--;)
+			pos += rpcrdma_deregister_external(
+					&req->rl_segments[pos], r_xprt);
+	}
+	return n;
+}
+
+/*
+ * Marshal chunks. This routine returns the header length
+ * consumed by marshaling.
+ *
+ * Returns positive RPC/RDMA header size, or negative errno.
+ */
+
+ssize_t
+rpcrdma_marshal_chunks(struct rpc_rqst *rqst, ssize_t result)
+{
+	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+	struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)req->rl_base;
+
+	if (req->rl_rtype != rpcrdma_noch)
+		result = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf,
+					       headerp, req->rl_rtype);
+	else if (req->rl_wtype != rpcrdma_noch)
+		result = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf,
+					       headerp, req->rl_wtype);
+	return result;
+}
+
+/*
+ * Copy write data inline.
+ * This function is used for "small" requests. Data which is passed
+ * to RPC via iovecs (or page list) is copied directly into the
+ * pre-registered memory buffer for this request. For small amounts
+ * of data, this is efficient. The cutoff value is tunable.
+ */
+static int
+rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
+{
+	int i, npages, curlen;
+	int copy_len;
+	unsigned char *srcp, *destp;
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
+	int page_base;
+	struct page **ppages;
+
+	destp = rqst->rq_svec[0].iov_base;
+	curlen = rqst->rq_svec[0].iov_len;
+	destp += curlen;
+	/*
+	 * Do optional padding where it makes sense. Alignment of write
+	 * payload can help the server, if our setting is accurate.
+	 */
+	pad -= (curlen + 36/*sizeof(struct rpcrdma_msg_padded)*/);
+	if (pad < 0 || rqst->rq_slen - curlen < RPCRDMA_INLINE_PAD_THRESH)
+		pad = 0;	/* don't pad this request */
+
+	dprintk("RPC:       %s: pad %d destp 0x%p len %d hdrlen %d\n",
+		__func__, pad, destp, rqst->rq_slen, curlen);
+
+	copy_len = rqst->rq_snd_buf.page_len;
+
+	if (rqst->rq_snd_buf.tail[0].iov_len) {
+		curlen = rqst->rq_snd_buf.tail[0].iov_len;
+		if (destp + copy_len != rqst->rq_snd_buf.tail[0].iov_base) {
+			memmove(destp + copy_len,
+				rqst->rq_snd_buf.tail[0].iov_base, curlen);
+			r_xprt->rx_stats.pullup_copy_count += curlen;
+		}
+		dprintk("RPC:       %s: tail destp 0x%p len %d\n",
+			__func__, destp + copy_len, curlen);
+		rqst->rq_svec[0].iov_len += curlen;
+	}
+	r_xprt->rx_stats.pullup_copy_count += copy_len;
+
+	page_base = rqst->rq_snd_buf.page_base;
+	ppages = rqst->rq_snd_buf.pages + (page_base >> PAGE_SHIFT);
+	page_base &= ~PAGE_MASK;
+	npages = PAGE_ALIGN(page_base+copy_len) >> PAGE_SHIFT;
+	for (i = 0; copy_len && i < npages; i++) {
+		curlen = PAGE_SIZE - page_base;
+		if (curlen > copy_len)
+			curlen = copy_len;
+		dprintk("RPC:       %s: page %d destp 0x%p len %d curlen %d\n",
+			__func__, i, destp, copy_len, curlen);
+		srcp = kmap_atomic(ppages[i]);
+		memcpy(destp, srcp+page_base, curlen);
+		kunmap_atomic(srcp);
+		rqst->rq_svec[0].iov_len += curlen;
+		destp += curlen;
+		copy_len -= curlen;
+		page_base = 0;
+	}
+	/* header now contains entire send message */
+	return pad;
+}
+
+/*
+ * Marshal a request: the primary job of this routine is to choose
+ * the transfer modes. See comments below.
+ *
+ * Uses multiple RDMA IOVs for a request:
+ *  [0] -- RPC RDMA header, which uses memory from the *start* of the
+ *         preregistered buffer that already holds the RPC data in
+ *         its middle.
+ *  [1] -- the RPC header/data, marshaled by RPC and the NFS protocol.
+ *  [2] -- optional padding.
+ *  [3] -- if padded, header only in [1] and data here.
+ *
+ * Returns zero on success, otherwise a negative errno.
+ */
+
+int
+rpcrdma_marshal_req(struct rpc_rqst *rqst)
+{
+	struct rpc_xprt *xprt = rqst->rq_xprt;
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+	char *base;
+	size_t rpclen, padlen;
+	ssize_t hdrlen;
+	struct rpcrdma_msg *headerp;
+
+	/*
+	 * rpclen gets amount of data in first buffer, which is the
+	 * pre-registered buffer.
+	 */
+	base = rqst->rq_svec[0].iov_base;
+	rpclen = rqst->rq_svec[0].iov_len;
+
+	/* build RDMA header in private area at front */
+	headerp = (struct rpcrdma_msg *) req->rl_base;
+	/* don't htonl XID, it's already done in request */
+	headerp->rm_xid = rqst->rq_xid;
+	headerp->rm_vers = xdr_one;
+	headerp->rm_credit = htonl(r_xprt->rx_buf.rb_max_requests);
+	headerp->rm_type = htonl(RDMA_MSG);
+
+	/*
+	 * Chunks needed for results?
+	 *
+	 * o If the expected result is under the inline threshold, all ops
+	 *   return as inline (but see later).
+	 * o Large non-read ops return as a single reply chunk.
+	 * o Large read ops return data as write chunk(s), header as inline.
+	 *
+	 * Note: the NFS code sending down multiple result segments implies
+	 * the op is one of read, readdir[plus], readlink or NFSv4 getacl.
+	 */
+
+	/*
+	 * This code can handle read chunks, write chunks OR reply
+	 * chunks -- only one type. If the request is too big to fit
+	 * inline, then we will choose read chunks. If the request is
+	 * a READ, then use write chunks to separate the file data
+	 * into pages; otherwise use reply chunks.
+	 */
+	if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst))
+		req->rl_wtype = rpcrdma_noch;
+	else if (rqst->rq_rcv_buf.page_len == 0)
+		req->rl_wtype = rpcrdma_replych;
+	else if (rqst->rq_rcv_buf.flags & XDRBUF_READ)
+		req->rl_wtype = rpcrdma_writech;
+	else
+		req->rl_wtype = rpcrdma_replych;
+
+	/*
+	 * Chunks needed for arguments?
+	 *
+	 * o If the total request is under the inline threshold, all ops
+	 *   are sent as inline.
+	 * o Large non-write ops are sent with the entire message as a
+	 *   single read chunk (protocol 0-position special case).
+	 * o Large write ops transmit data as read chunk(s), header as
+	 *   inline.
+	 *
+	 * Note: the NFS code sending down multiple argument segments
+	 * implies the op is a write.
+	 * TBD check NFSv4 setacl
+	 */
+	if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
+		req->rl_rtype = rpcrdma_noch;
+	else if (rqst->rq_snd_buf.page_len == 0)
+		req->rl_rtype = rpcrdma_areadch;
+	else
+		req->rl_rtype = rpcrdma_readch;
+
+	/* The following simplification is not true forever */
+	if (req->rl_rtype != rpcrdma_noch && req->rl_wtype == rpcrdma_replych)
+		req->rl_wtype = rpcrdma_noch;
+	if (req->rl_rtype != rpcrdma_noch && req->rl_wtype != rpcrdma_noch) {
+		dprintk("RPC:       %s: cannot marshal multiple chunk lists\n",
+			__func__);
+		return -EIO;
+	}
+
+	hdrlen = 28; /*sizeof *headerp;*/
+	padlen = 0;
+
+	/*
+	 * Pull up any extra send data into the preregistered buffer.
+	 * When padding is in use and applies to the transfer, insert
+	 * it and change the message type.
+	 */
+	if (req->rl_rtype == rpcrdma_noch) {
+
+		padlen = rpcrdma_inline_pullup(rqst,
+						RPCRDMA_INLINE_PAD_VALUE(rqst));
+
+		if (padlen) {
+			headerp->rm_type = htonl(RDMA_MSGP);
+			headerp->rm_body.rm_padded.rm_align =
+				htonl(RPCRDMA_INLINE_PAD_VALUE(rqst));
+			headerp->rm_body.rm_padded.rm_thresh =
+				htonl(RPCRDMA_INLINE_PAD_THRESH);
+			headerp->rm_body.rm_padded.rm_pempty[0] = xdr_zero;
+			headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
+			headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
+			hdrlen += 2 * sizeof(u32); /* extra words in padhdr */
+			if (req->rl_wtype != rpcrdma_noch) {
+				dprintk("RPC:       %s: invalid chunk list\n",
+					__func__);
+				return -EIO;
+			}
+		} else {
+			headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero;
+			headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero;
+			headerp->rm_body.rm_nochunks.rm_empty[2] = xdr_zero;
+			/* new length after pullup */
+			rpclen = rqst->rq_svec[0].iov_len;
+			/*
+			 * Currently we try to not actually use read inline.
+			 * Reply chunks have the desirable property that
+			 * they land, packed, directly in the target buffers
+			 * without headers, so they require no fixup. The
+			 * additional RDMA Write op sends the same amount
+			 * of data, streams on-the-wire and adds no overhead
+			 * on receive. Therefore, we request a reply chunk
+			 * for non-writes wherever feasible and efficient.
+			 */
+			if (req->rl_wtype == rpcrdma_noch)
+				req->rl_wtype = rpcrdma_replych;
+		}
+	}
+
+	hdrlen = rpcrdma_marshal_chunks(rqst, hdrlen);
+	if (hdrlen < 0)
+		return hdrlen;
+
+	dprintk("RPC:       %s: %s: hdrlen %zd rpclen %zd padlen %zd"
+		" headerp 0x%p base 0x%p lkey 0x%x\n",
+		__func__, transfertypes[req->rl_wtype], hdrlen, rpclen, padlen,
+		headerp, base, req->rl_iov.lkey);
+
+	/*
+	 * initialize send_iov's - normally only two: rdma chunk header and
+	 * single preregistered RPC header buffer, but if padding is present,
+	 * then use a preregistered (and zeroed) pad buffer between the RPC
+	 * header and any write data. In all non-rdma cases, any following
+	 * data has been copied into the RPC header buffer.
+	 */
+	req->rl_send_iov[0].addr = req->rl_iov.addr;
+	req->rl_send_iov[0].length = hdrlen;
+	req->rl_send_iov[0].lkey = req->rl_iov.lkey;
+
+	req->rl_send_iov[1].addr = req->rl_iov.addr + (base - req->rl_base);
+	req->rl_send_iov[1].length = rpclen;
+	req->rl_send_iov[1].lkey = req->rl_iov.lkey;
+
+	req->rl_niovs = 2;
+
+	if (padlen) {
+		struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+
+		req->rl_send_iov[2].addr = ep->rep_pad.addr;
+		req->rl_send_iov[2].length = padlen;
+		req->rl_send_iov[2].lkey = ep->rep_pad.lkey;
+
+		req->rl_send_iov[3].addr = req->rl_send_iov[1].addr + rpclen;
+		req->rl_send_iov[3].length = rqst->rq_slen - rpclen;
+		req->rl_send_iov[3].lkey = req->rl_iov.lkey;
+
+		req->rl_niovs = 4;
+	}
+
+	return 0;
+}
+
+/*
+ * Chase down a received write or reply chunklist to get length
+ * RDMA'd by server. See map at rpcrdma_create_chunks()! :-)
+ */
+static int
+rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __be32 **iptrp)
+{
+	unsigned int i, total_len;
+	struct rpcrdma_write_chunk *cur_wchunk;
+
+	i = ntohl(**iptrp);	/* get array count */
+	if (i > max)
+		return -1;
+	cur_wchunk = (struct rpcrdma_write_chunk *) (*iptrp + 1);
+	total_len = 0;
+	while (i--) {
+		struct rpcrdma_segment *seg = &cur_wchunk->wc_target;
+		ifdebug(FACILITY) {
+			u64 off;
+			xdr_decode_hyper((__be32 *)&seg->rs_offset, &off);
+			dprintk("RPC:       %s: chunk %d@0x%llx:0x%x\n",
+				__func__,
+				ntohl(seg->rs_length),
+				(unsigned long long)off,
+				ntohl(seg->rs_handle));
+		}
+		total_len += ntohl(seg->rs_length);
+		++cur_wchunk;
+	}
+	/* check and adjust for properly terminated write chunk */
+	if (wrchunk) {
+		__be32 *w = (__be32 *) cur_wchunk;
+		if (*w++ != xdr_zero)
+			return -1;
+		cur_wchunk = (struct rpcrdma_write_chunk *) w;
+	}
+	if ((char *) cur_wchunk > rep->rr_base + rep->rr_len)
+		return -1;
+
+	*iptrp = (__be32 *) cur_wchunk;
+	return total_len;
+}
+
+/*
+ * Scatter inline received data back into provided iov's.
+ */
+static void
+rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
+{
+	int i, npages, curlen, olen;
+	char *destp;
+	struct page **ppages;
+	int page_base;
+
+	curlen = rqst->rq_rcv_buf.head[0].iov_len;
+	if (curlen > copy_len) {	/* write chunk header fixup */
+		curlen = copy_len;
+		rqst->rq_rcv_buf.head[0].iov_len = curlen;
+	}
+
+	dprintk("RPC:       %s: srcp 0x%p len %d hdrlen %d\n",
+		__func__, srcp, copy_len, curlen);
+
+	/* Shift pointer for first receive segment only */
+	rqst->rq_rcv_buf.head[0].iov_base = srcp;
+	srcp += curlen;
+	copy_len -= curlen;
+
+	olen = copy_len;
+	i = 0;
+	rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen;
+	page_base = rqst->rq_rcv_buf.page_base;
+	ppages = rqst->rq_rcv_buf.pages + (page_base >> PAGE_SHIFT);
+	page_base &= ~PAGE_MASK;
+
+	if (copy_len && rqst->rq_rcv_buf.page_len) {
+		npages = PAGE_ALIGN(page_base +
+			rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT;
+		for (; i < npages; i++) {
+			curlen = PAGE_SIZE - page_base;
+			if (curlen > copy_len)
+				curlen = copy_len;
+			dprintk("RPC:       %s: page %d"
+				" srcp 0x%p len %d curlen %d\n",
+				__func__, i, srcp, copy_len, curlen);
+			destp = kmap_atomic(ppages[i]);
+			memcpy(destp + page_base, srcp, curlen);
+			flush_dcache_page(ppages[i]);
+			kunmap_atomic(destp);
+			srcp += curlen;
+			copy_len -= curlen;
+			if (copy_len == 0)
+				break;
+			page_base = 0;
+		}
+	}
+
+	if (copy_len && rqst->rq_rcv_buf.tail[0].iov_len) {
+		curlen = copy_len;
+		if (curlen > rqst->rq_rcv_buf.tail[0].iov_len)
+			curlen = rqst->rq_rcv_buf.tail[0].iov_len;
+		if (rqst->rq_rcv_buf.tail[0].iov_base != srcp)
+			memmove(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen);
+		dprintk("RPC:       %s: tail srcp 0x%p len %d curlen %d\n",
+			__func__, srcp, copy_len, curlen);
+		rqst->rq_rcv_buf.tail[0].iov_len = curlen;
+		copy_len -= curlen; ++i;
+	} else
+		rqst->rq_rcv_buf.tail[0].iov_len = 0;
+
+	if (pad) {
+		/* implicit padding on terminal chunk */
+		unsigned char *p = rqst->rq_rcv_buf.tail[0].iov_base;
+		while (pad--)
+			p[rqst->rq_rcv_buf.tail[0].iov_len++] = 0;
+	}
+
+	if (copy_len)
+		dprintk("RPC:       %s: %d bytes in"
+			" %d extra segments (%d lost)\n",
+			__func__, olen, i, copy_len);
+
+	/* TBD avoid a warning from call_decode() */
+	rqst->rq_private_buf = rqst->rq_rcv_buf;
+}
+
+void
+rpcrdma_connect_worker(struct work_struct *work)
+{
+	struct rpcrdma_ep *ep =
+		container_of(work, struct rpcrdma_ep, rep_connect_worker.work);
+	struct rpc_xprt *xprt = ep->rep_xprt;
+
+	spin_lock_bh(&xprt->transport_lock);
+	if (++xprt->connect_cookie == 0)	/* maintain a reserved value */
+		++xprt->connect_cookie;
+	if (ep->rep_connected > 0) {
+		if (!xprt_test_and_set_connected(xprt))
+			xprt_wake_pending_tasks(xprt, 0);
+	} else {
+		if (xprt_test_and_clear_connected(xprt))
+			xprt_wake_pending_tasks(xprt, -ENOTCONN);
+	}
+	spin_unlock_bh(&xprt->transport_lock);
+}
+
+/*
+ * This function is called when an async event is posted to
+ * the connection which changes the connection state. All it
+ * does at this point is mark the connection up/down, the rpc
+ * timers do the rest.
+ */
+void
+rpcrdma_conn_func(struct rpcrdma_ep *ep)
+{
+	schedule_delayed_work(&ep->rep_connect_worker, 0);
+}
+
+/*
+ * Called as a tasklet to do req/reply match and complete a request
+ * Errors must result in the RPC task either being awakened, or
+ * allowed to timeout, to discover the errors at that time.
+ */
+void
+rpcrdma_reply_handler(struct rpcrdma_rep *rep)
+{
+	struct rpcrdma_msg *headerp;
+	struct rpcrdma_req *req;
+	struct rpc_rqst *rqst;
+	struct rpc_xprt *xprt = rep->rr_xprt;
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+	__be32 *iptr;
+	int rdmalen, status;
+	unsigned long cwnd;
+
+	/* Check status. If bad, signal disconnect and return rep to pool */
+	if (rep->rr_len == ~0U) {
+		rpcrdma_recv_buffer_put(rep);
+		if (r_xprt->rx_ep.rep_connected == 1) {
+			r_xprt->rx_ep.rep_connected = -EIO;
+			rpcrdma_conn_func(&r_xprt->rx_ep);
+		}
+		return;
+	}
+	if (rep->rr_len < 28) {
+		dprintk("RPC:       %s: short/invalid reply\n", __func__);
+		goto repost;
+	}
+	headerp = (struct rpcrdma_msg *) rep->rr_base;
+	if (headerp->rm_vers != xdr_one) {
+		dprintk("RPC:       %s: invalid version %d\n",
+			__func__, ntohl(headerp->rm_vers));
+		goto repost;
+	}
+
+	/* Get XID and try for a match. */
+	spin_lock(&xprt->transport_lock);
+	rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);
+	if (rqst == NULL) {
+		spin_unlock(&xprt->transport_lock);
+		dprintk("RPC:       %s: reply 0x%p failed "
+			"to match any request xid 0x%08x len %d\n",
+			__func__, rep, headerp->rm_xid, rep->rr_len);
+repost:
+		r_xprt->rx_stats.bad_reply_count++;
+		rep->rr_func = rpcrdma_reply_handler;
+		if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
+			rpcrdma_recv_buffer_put(rep);
+
+		return;
+	}
+
+	/* get request object */
+	req = rpcr_to_rdmar(rqst);
+	if (req->rl_reply) {
+		spin_unlock(&xprt->transport_lock);
+		dprintk("RPC:       %s: duplicate reply 0x%p to RPC "
+			"request 0x%p: xid 0x%08x\n", __func__, rep, req,
+			headerp->rm_xid);
+		goto repost;
+	}
+
+	dprintk("RPC:       %s: reply 0x%p completes request 0x%p\n"
+		"                   RPC request 0x%p xid 0x%08x\n",
+			__func__, rep, req, rqst, headerp->rm_xid);
+
+	/* from here on, the reply is no longer an orphan */
+	req->rl_reply = rep;
+	xprt->reestablish_timeout = 0;
+
+	/* check for expected message types */
+	/* The order of some of these tests is important. */
+	switch (headerp->rm_type) {
+	case htonl(RDMA_MSG):
+		/* never expect read chunks */
+		/* never expect reply chunks (two ways to check) */
+		/* never expect write chunks without having offered RDMA */
+		if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
+		    (headerp->rm_body.rm_chunks[1] == xdr_zero &&
+		     headerp->rm_body.rm_chunks[2] != xdr_zero) ||
+		    (headerp->rm_body.rm_chunks[1] != xdr_zero &&
+		     req->rl_nchunks == 0))
+			goto badheader;
+		if (headerp->rm_body.rm_chunks[1] != xdr_zero) {
+			/* count any expected write chunks in read reply */
+			/* start at write chunk array count */
+			iptr = &headerp->rm_body.rm_chunks[2];
+			rdmalen = rpcrdma_count_chunks(rep,
+						req->rl_nchunks, 1, &iptr);
+			/* check for validity, and no reply chunk after */
+			if (rdmalen < 0 || *iptr++ != xdr_zero)
+				goto badheader;
+			rep->rr_len -=
+			    ((unsigned char *)iptr - (unsigned char *)headerp);
+			status = rep->rr_len + rdmalen;
+			r_xprt->rx_stats.total_rdma_reply += rdmalen;
+			/* special case - last chunk may omit padding */
+			if (rdmalen &= 3) {
+				rdmalen = 4 - rdmalen;
+				status += rdmalen;
+			}
+		} else {
+			/* else ordinary inline */
+			rdmalen = 0;
+			iptr = (__be32 *)((unsigned char *)headerp + 28);
+			rep->rr_len -= 28; /*sizeof *headerp;*/
+			status = rep->rr_len;
+		}
+		/* Fix up the rpc results for upper layer */
+		rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, rdmalen);
+		break;
+
+	case htonl(RDMA_NOMSG):
+		/* never expect read or write chunks, always reply chunks */
+		if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
+		    headerp->rm_body.rm_chunks[1] != xdr_zero ||
+		    headerp->rm_body.rm_chunks[2] != xdr_one ||
+		    req->rl_nchunks == 0)
+			goto badheader;
+		iptr = (__be32 *)((unsigned char *)headerp + 28);
+		rdmalen = rpcrdma_count_chunks(rep, req->rl_nchunks, 0, &iptr);
+		if (rdmalen < 0)
+			goto badheader;
+		r_xprt->rx_stats.total_rdma_reply += rdmalen;
+		/* Reply chunk buffer already is the reply vector - no fixup. */
+		status = rdmalen;
+		break;
+
+badheader:
+	default:
+		dprintk("%s: invalid rpcrdma reply header (type %d):"
+				" chunks[012] == %d %d %d"
+				" expected chunks <= %d\n",
+				__func__, ntohl(headerp->rm_type),
+				headerp->rm_body.rm_chunks[0],
+				headerp->rm_body.rm_chunks[1],
+				headerp->rm_body.rm_chunks[2],
+				req->rl_nchunks);
+		status = -EIO;
+		r_xprt->rx_stats.bad_reply_count++;
+		break;
+	}
+
+	cwnd = xprt->cwnd;
+	xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
+	if (xprt->cwnd > cwnd)
+		xprt_release_rqst_cong(rqst->rq_task);
+
+	dprintk("RPC:       %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
+			__func__, xprt, rqst, status);
+	xprt_complete_rqst(rqst->rq_task, status);
+	spin_unlock(&xprt->transport_lock);
+}
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *      Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ *      Neither the name of the Network Appliance, Inc. nor the names of
+ *      its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written
+ *      permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Tom Tucker <tom@opengridcomputing.com>
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+#include <linux/workqueue.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/svc_rdma.h>
+#include "xprt_rdma.h"
+
+#define RPCDBG_FACILITY	RPCDBG_SVCXPRT
+
+/* RPC/RDMA parameters */
+unsigned int svcrdma_ord = RPCRDMA_ORD;
+static unsigned int min_ord = 1;
+static unsigned int max_ord = 4096;
+unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
+static unsigned int min_max_requests = 4;
+static unsigned int max_max_requests = 16384;
+unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE;
+static unsigned int min_max_inline = 4096;
+static unsigned int max_max_inline = 65536;
+
+atomic_t rdma_stat_recv;
+atomic_t rdma_stat_read;
+atomic_t rdma_stat_write;
+atomic_t rdma_stat_sq_starve;
+atomic_t rdma_stat_rq_starve;
+atomic_t rdma_stat_rq_poll;
+atomic_t rdma_stat_rq_prod;
+atomic_t rdma_stat_sq_poll;
+atomic_t rdma_stat_sq_prod;
+
+/* Temporary NFS request map and context caches */
+struct kmem_cache *svc_rdma_map_cachep;
+struct kmem_cache *svc_rdma_ctxt_cachep;
+
+struct workqueue_struct *svc_rdma_wq;
+
+/*
+ * This function implements reading and resetting an atomic_t stat
+ * variable through read/write to a proc file. Any write to the file
+ * resets the associated statistic to zero. Any read returns it's
+ * current value.
+ */
+static int read_reset_stat(struct ctl_table *table, int write,
+			   void __user *buffer, size_t *lenp,
+			   loff_t *ppos)
+{
+	atomic_t *stat = (atomic_t *)table->data;
+
+	if (!stat)
+		return -EINVAL;
+
+	if (write)
+		atomic_set(stat, 0);
+	else {
+		char str_buf[32];
+		char *data;
+		int len = snprintf(str_buf, 32, "%d\n", atomic_read(stat));
+		if (len >= 32)
+			return -EFAULT;
+		len = strlen(str_buf);
+		if (*ppos > len) {
+			*lenp = 0;
+			return 0;
+		}
+		data = &str_buf[*ppos];
+		len -= *ppos;
+		if (len > *lenp)
+			len = *lenp;
+		if (len && copy_to_user(buffer, str_buf, len))
+			return -EFAULT;
+		*lenp = len;
+		*ppos += len;
+	}
+	return 0;
+}
+
+static struct ctl_table_header *svcrdma_table_header;
+static struct ctl_table svcrdma_parm_table[] = {
+	{
+		.procname	= "max_requests",
+		.data		= &svcrdma_max_requests,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &min_max_requests,
+		.extra2		= &max_max_requests
+	},
+	{
+		.procname	= "max_req_size",
+		.data		= &svcrdma_max_req_size,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &min_max_inline,
+		.extra2		= &max_max_inline
+	},
+	{
+		.procname	= "max_outbound_read_requests",
+		.data		= &svcrdma_ord,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &min_ord,
+		.extra2		= &max_ord,
+	},
+
+	{
+		.procname	= "rdma_stat_read",
+		.data		= &rdma_stat_read,
+		.maxlen		= sizeof(atomic_t),
+		.mode		= 0644,
+		.proc_handler	= read_reset_stat,
+	},
+	{
+		.procname	= "rdma_stat_recv",
+		.data		= &rdma_stat_recv,
+		.maxlen		= sizeof(atomic_t),
+		.mode		= 0644,
+		.proc_handler	= read_reset_stat,
+	},
+	{
+		.procname	= "rdma_stat_write",
+		.data		= &rdma_stat_write,
+		.maxlen		= sizeof(atomic_t),
+		.mode		= 0644,
+		.proc_handler	= read_reset_stat,
+	},
+	{
+		.procname	= "rdma_stat_sq_starve",
+		.data		= &rdma_stat_sq_starve,
+		.maxlen		= sizeof(atomic_t),
+		.mode		= 0644,
+		.proc_handler	= read_reset_stat,
+	},
+	{
+		.procname	= "rdma_stat_rq_starve",
+		.data		= &rdma_stat_rq_starve,
+		.maxlen		= sizeof(atomic_t),
+		.mode		= 0644,
+		.proc_handler	= read_reset_stat,
+	},
+	{
+		.procname	= "rdma_stat_rq_poll",
+		.data		= &rdma_stat_rq_poll,
+		.maxlen		= sizeof(atomic_t),
+		.mode		= 0644,
+		.proc_handler	= read_reset_stat,
+	},
+	{
+		.procname	= "rdma_stat_rq_prod",
+		.data		= &rdma_stat_rq_prod,
+		.maxlen		= sizeof(atomic_t),
+		.mode		= 0644,
+		.proc_handler	= read_reset_stat,
+	},
+	{
+		.procname	= "rdma_stat_sq_poll",
+		.data		= &rdma_stat_sq_poll,
+		.maxlen		= sizeof(atomic_t),
+		.mode		= 0644,
+		.proc_handler	= read_reset_stat,
+	},
+	{
+		.procname	= "rdma_stat_sq_prod",
+		.data		= &rdma_stat_sq_prod,
+		.maxlen		= sizeof(atomic_t),
+		.mode		= 0644,
+		.proc_handler	= read_reset_stat,
+	},
+	{ },
+};
+
+static struct ctl_table svcrdma_table[] = {
+	{
+		.procname	= "svc_rdma",
+		.mode		= 0555,
+		.child		= svcrdma_parm_table
+	},
+	{ },
+};
+
+static struct ctl_table svcrdma_root_table[] = {
+	{
+		.procname	= "sunrpc",
+		.mode		= 0555,
+		.child		= svcrdma_table
+	},
+	{ },
+};
+
+void svc_rdma_cleanup(void)
+{
+	dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
+	destroy_workqueue(svc_rdma_wq);
+	if (svcrdma_table_header) {
+		unregister_sysctl_table(svcrdma_table_header);
+		svcrdma_table_header = NULL;
+	}
+	svc_unreg_xprt_class(&svc_rdma_class);
+	kmem_cache_destroy(svc_rdma_map_cachep);
+	kmem_cache_destroy(svc_rdma_ctxt_cachep);
+}
+
+int svc_rdma_init(void)
+{
+	dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
+	dprintk("\tsvcrdma_ord      : %d\n", svcrdma_ord);
+	dprintk("\tmax_requests     : %d\n", svcrdma_max_requests);
+	dprintk("\tsq_depth         : %d\n",
+		svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
+	dprintk("\tmax_inline       : %d\n", svcrdma_max_req_size);
+
+	svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
+	if (!svc_rdma_wq)
+		return -ENOMEM;
+
+	if (!svcrdma_table_header)
+		svcrdma_table_header =
+			register_sysctl_table(svcrdma_root_table);
+
+	/* Create the temporary map cache */
+	svc_rdma_map_cachep = kmem_cache_create("svc_rdma_map_cache",
+						sizeof(struct svc_rdma_req_map),
+						0,
+						SLAB_HWCACHE_ALIGN,
+						NULL);
+	if (!svc_rdma_map_cachep) {
+		printk(KERN_INFO "Could not allocate map cache.\n");
+		goto err0;
+	}
+
+	/* Create the temporary context cache */
+	svc_rdma_ctxt_cachep =
+		kmem_cache_create("svc_rdma_ctxt_cache",
+				  sizeof(struct svc_rdma_op_ctxt),
+				  0,
+				  SLAB_HWCACHE_ALIGN,
+				  NULL);
+	if (!svc_rdma_ctxt_cachep) {
+		printk(KERN_INFO "Could not allocate WR ctxt cache.\n");
+		goto err1;
+	}
+
+	/* Register RDMA with the SVC transport switch */
+	svc_reg_xprt_class(&svc_rdma_class);
+	return 0;
+ err1:
+	kmem_cache_destroy(svc_rdma_map_cachep);
+ err0:
+	unregister_sysctl_table(svcrdma_table_header);
+	destroy_workqueue(svc_rdma_wq);
+	return -ENOMEM;
+}
+MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
+MODULE_DESCRIPTION("SVC RDMA Transport");
+MODULE_LICENSE("Dual BSD/GPL");
+module_init(svc_rdma_init);
+module_exit(svc_rdma_cleanup);
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@ -0,0 +1,386 @@
+/*
+ * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *      Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ *      Neither the name of the Network Appliance, Inc. nor the names of
+ *      its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written
+ *      permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Tom Tucker <tom@opengridcomputing.com>
+ */
+
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/debug.h>
+#include <asm/unaligned.h>
+#include <linux/sunrpc/rpc_rdma.h>
+#include <linux/sunrpc/svc_rdma.h>
+
+#define RPCDBG_FACILITY	RPCDBG_SVCXPRT
+
+/*
+ * Decodes a read chunk list. The expected format is as follows:
+ *    descrim  : xdr_one
+ *    position : u32 offset into XDR stream
+ *    handle   : u32 RKEY
+ *    . . .
+ *  end-of-list: xdr_zero
+ */
+static u32 *decode_read_list(u32 *va, u32 *vaend)
+{
+	struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
+
+	while (ch->rc_discrim != xdr_zero) {
+		if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) >
+		    (unsigned long)vaend) {
+			dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch);
+			return NULL;
+		}
+		ch++;
+	}
+	return (u32 *)&ch->rc_position;
+}
+
+/*
+ * Determine number of chunks and total bytes in chunk list. The chunk
+ * list has already been verified to fit within the RPCRDMA header.
+ */
+void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch,
+			       int *ch_count, int *byte_count)
+{
+	/* compute the number of bytes represented by read chunks */
+	*byte_count = 0;
+	*ch_count = 0;
+	for (; ch->rc_discrim != 0; ch++) {
+		*byte_count = *byte_count + ntohl(ch->rc_target.rs_length);
+		*ch_count = *ch_count + 1;
+	}
+}
+
+/*
+ * Decodes a write chunk list. The expected format is as follows:
+ *    descrim  : xdr_one
+ *    nchunks  : <count>
+ *       handle   : u32 RKEY              ---+
+ *       length   : u32 <len of segment>     |
+ *       offset   : remove va                + <count>
+ *       . . .                               |
+ *                                        ---+
+ */
+static u32 *decode_write_list(u32 *va, u32 *vaend)
+{
+	unsigned long start, end;
+	int nchunks;
+
+	struct rpcrdma_write_array *ary =
+		(struct rpcrdma_write_array *)va;
+
+	/* Check for not write-array */
+	if (ary->wc_discrim == xdr_zero)
+		return (u32 *)&ary->wc_nchunks;
+
+	if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
+	    (unsigned long)vaend) {
+		dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
+		return NULL;
+	}
+	nchunks = ntohl(ary->wc_nchunks);
+
+	start = (unsigned long)&ary->wc_array[0];
+	end = (unsigned long)vaend;
+	if (nchunks < 0 ||
+	    nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
+	    (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
+		dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
+			ary, nchunks, vaend);
+		return NULL;
+	}
+	/*
+	 * rs_length is the 2nd 4B field in wc_target and taking its
+	 * address skips the list terminator
+	 */
+	return (u32 *)&ary->wc_array[nchunks].wc_target.rs_length;
+}
+
+static u32 *decode_reply_array(u32 *va, u32 *vaend)
+{
+	unsigned long start, end;
+	int nchunks;
+	struct rpcrdma_write_array *ary =
+		(struct rpcrdma_write_array *)va;
+
+	/* Check for no reply-array */
+	if (ary->wc_discrim == xdr_zero)
+		return (u32 *)&ary->wc_nchunks;
+
+	if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
+	    (unsigned long)vaend) {
+		dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
+		return NULL;
+	}
+	nchunks = ntohl(ary->wc_nchunks);
+
+	start = (unsigned long)&ary->wc_array[0];
+	end = (unsigned long)vaend;
+	if (nchunks < 0 ||
+	    nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
+	    (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
+		dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
+			ary, nchunks, vaend);
+		return NULL;
+	}
+	return (u32 *)&ary->wc_array[nchunks];
+}
+
+int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
+			    struct svc_rqst *rqstp)
+{
+	struct rpcrdma_msg *rmsgp = NULL;
+	u32 *va;
+	u32 *vaend;
+	u32 hdr_len;
+
+	rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
+
+	/* Verify that there's enough bytes for header + something */
+	if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) {
+		dprintk("svcrdma: header too short = %d\n",
+			rqstp->rq_arg.len);
+		return -EINVAL;
+	}
+
+	/* Decode the header */
+	rmsgp->rm_xid = ntohl(rmsgp->rm_xid);
+	rmsgp->rm_vers = ntohl(rmsgp->rm_vers);
+	rmsgp->rm_credit = ntohl(rmsgp->rm_credit);
+	rmsgp->rm_type = ntohl(rmsgp->rm_type);
+
+	if (rmsgp->rm_vers != RPCRDMA_VERSION)
+		return -ENOSYS;
+
+	/* Pull in the extra for the padded case and bump our pointer */
+	if (rmsgp->rm_type == RDMA_MSGP) {
+		int hdrlen;
+		rmsgp->rm_body.rm_padded.rm_align =
+			ntohl(rmsgp->rm_body.rm_padded.rm_align);
+		rmsgp->rm_body.rm_padded.rm_thresh =
+			ntohl(rmsgp->rm_body.rm_padded.rm_thresh);
+
+		va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
+		rqstp->rq_arg.head[0].iov_base = va;
+		hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
+		rqstp->rq_arg.head[0].iov_len -= hdrlen;
+		if (hdrlen > rqstp->rq_arg.len)
+			return -EINVAL;
+		return hdrlen;
+	}
+
+	/* The chunk list may contain either a read chunk list or a write
+	 * chunk list and a reply chunk list.
+	 */
+	va = &rmsgp->rm_body.rm_chunks[0];
+	vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
+	va = decode_read_list(va, vaend);
+	if (!va)
+		return -EINVAL;
+	va = decode_write_list(va, vaend);
+	if (!va)
+		return -EINVAL;
+	va = decode_reply_array(va, vaend);
+	if (!va)
+		return -EINVAL;
+
+	rqstp->rq_arg.head[0].iov_base = va;
+	hdr_len = (unsigned long)va - (unsigned long)rmsgp;
+	rqstp->rq_arg.head[0].iov_len -= hdr_len;
+
+	*rdma_req = rmsgp;
+	return hdr_len;
+}
+
+int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp)
+{
+	struct rpcrdma_msg *rmsgp = NULL;
+	struct rpcrdma_read_chunk *ch;
+	struct rpcrdma_write_array *ary;
+	u32 *va;
+	u32 hdrlen;
+
+	dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n",
+		rqstp);
+	rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
+
+	/* Pull in the extra for the padded case and bump our pointer */
+	if (rmsgp->rm_type == RDMA_MSGP) {
+		va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
+		rqstp->rq_arg.head[0].iov_base = va;
+		hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
+		rqstp->rq_arg.head[0].iov_len -= hdrlen;
+		return hdrlen;
+	}
+
+	/*
+	 * Skip all chunks to find RPC msg. These were previously processed
+	 */
+	va = &rmsgp->rm_body.rm_chunks[0];
+
+	/* Skip read-list */
+	for (ch = (struct rpcrdma_read_chunk *)va;
+	     ch->rc_discrim != xdr_zero; ch++);
+	va = (u32 *)&ch->rc_position;
+
+	/* Skip write-list */
+	ary = (struct rpcrdma_write_array *)va;
+	if (ary->wc_discrim == xdr_zero)
+		va = (u32 *)&ary->wc_nchunks;
+	else
+		/*
+		 * rs_length is the 2nd 4B field in wc_target and taking its
+		 * address skips the list terminator
+		 */
+		va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length;
+
+	/* Skip reply-array */
+	ary = (struct rpcrdma_write_array *)va;
+	if (ary->wc_discrim == xdr_zero)
+		va = (u32 *)&ary->wc_nchunks;
+	else
+		va = (u32 *)&ary->wc_array[ary->wc_nchunks];
+
+	rqstp->rq_arg.head[0].iov_base = va;
+	hdrlen = (unsigned long)va - (unsigned long)rmsgp;
+	rqstp->rq_arg.head[0].iov_len -= hdrlen;
+
+	return hdrlen;
+}
+
+int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
+			      struct rpcrdma_msg *rmsgp,
+			      enum rpcrdma_errcode err, u32 *va)
+{
+	u32 *startp = va;
+
+	*va++ = htonl(rmsgp->rm_xid);
+	*va++ = htonl(rmsgp->rm_vers);
+	*va++ = htonl(xprt->sc_max_requests);
+	*va++ = htonl(RDMA_ERROR);
+	*va++ = htonl(err);
+	if (err == ERR_VERS) {
+		*va++ = htonl(RPCRDMA_VERSION);
+		*va++ = htonl(RPCRDMA_VERSION);
+	}
+
+	return (int)((unsigned long)va - (unsigned long)startp);
+}
+
+int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
+{
+	struct rpcrdma_write_array *wr_ary;
+
+	/* There is no read-list in a reply */
+
+	/* skip write list */
+	wr_ary = (struct rpcrdma_write_array *)
+		&rmsgp->rm_body.rm_chunks[1];
+	if (wr_ary->wc_discrim)
+		wr_ary = (struct rpcrdma_write_array *)
+			&wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)].
+			wc_target.rs_length;
+	else
+		wr_ary = (struct rpcrdma_write_array *)
+			&wr_ary->wc_nchunks;
+
+	/* skip reply array */
+	if (wr_ary->wc_discrim)
+		wr_ary = (struct rpcrdma_write_array *)
+			&wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)];
+	else
+		wr_ary = (struct rpcrdma_write_array *)
+			&wr_ary->wc_nchunks;
+
+	return (unsigned long) wr_ary - (unsigned long) rmsgp;
+}
+
+void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
+{
+	struct rpcrdma_write_array *ary;
+
+	/* no read-list */
+	rmsgp->rm_body.rm_chunks[0] = xdr_zero;
+
+	/* write-array discrim */
+	ary = (struct rpcrdma_write_array *)
+		&rmsgp->rm_body.rm_chunks[1];
+	ary->wc_discrim = xdr_one;
+	ary->wc_nchunks = htonl(chunks);
+
+	/* write-list terminator */
+	ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
+
+	/* reply-array discriminator */
+	ary->wc_array[chunks].wc_target.rs_length = xdr_zero;
+}
+
+void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary,
+				 int chunks)
+{
+	ary->wc_discrim = xdr_one;
+	ary->wc_nchunks = htonl(chunks);
+}
+
+void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
+				     int chunk_no,
+				     __be32 rs_handle,
+				     __be64 rs_offset,
+				     u32 write_len)
+{
+	struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
+	seg->rs_handle = rs_handle;
+	seg->rs_offset = rs_offset;
+	seg->rs_length = htonl(write_len);
+}
+
+void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
+				  struct rpcrdma_msg *rdma_argp,
+				  struct rpcrdma_msg *rdma_resp,
+				  enum rpcrdma_proc rdma_type)
+{
+	rdma_resp->rm_xid = htonl(rdma_argp->rm_xid);
+	rdma_resp->rm_vers = htonl(rdma_argp->rm_vers);
+	rdma_resp->rm_credit = htonl(xprt->sc_max_requests);
+	rdma_resp->rm_type = htonl(rdma_type);
+
+	/* Encode <nul> chunks lists */
+	rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
+	rdma_resp->rm_body.rm_chunks[1] = xdr_zero;
+	rdma_resp->rm_body.rm_chunks[2] = xdr_zero;
+}
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@ -0,0 +1,614 @@
+/*
+ * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
+ * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *      Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ *      Neither the name of the Network Appliance, Inc. nor the names of
+ *      its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written
+ *      permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Tom Tucker <tom@opengridcomputing.com>
+ */
+
+#include <linux/sunrpc/debug.h>
+#include <linux/sunrpc/rpc_rdma.h>
+#include <linux/spinlock.h>
+#include <linux/highmem.h>
+#include <asm/unaligned.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_cm.h>
+#include <linux/sunrpc/svc_rdma.h>
+
+#define RPCDBG_FACILITY	RPCDBG_SVCXPRT
+
+/*
+ * Replace the pages in the rq_argpages array with the pages from the SGE in
+ * the RDMA_RECV completion. The SGL should contain full pages up until the
+ * last one.
+ */
+static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
+			       struct svc_rdma_op_ctxt *ctxt,
+			       u32 byte_count)
+{
+	struct page *page;
+	u32 bc;
+	int sge_no;
+
+	/* Swap the page in the SGE with the page in argpages */
+	page = ctxt->pages[0];
+	put_page(rqstp->rq_pages[0]);
+	rqstp->rq_pages[0] = page;
+
+	/* Set up the XDR head */
+	rqstp->rq_arg.head[0].iov_base = page_address(page);
+	rqstp->rq_arg.head[0].iov_len =
+		min_t(size_t, byte_count, ctxt->sge[0].length);
+	rqstp->rq_arg.len = byte_count;
+	rqstp->rq_arg.buflen = byte_count;
+
+	/* Compute bytes past head in the SGL */
+	bc = byte_count - rqstp->rq_arg.head[0].iov_len;
+
+	/* If data remains, store it in the pagelist */
+	rqstp->rq_arg.page_len = bc;
+	rqstp->rq_arg.page_base = 0;
+	rqstp->rq_arg.pages = &rqstp->rq_pages[1];
+	sge_no = 1;
+	while (bc && sge_no < ctxt->count) {
+		page = ctxt->pages[sge_no];
+		put_page(rqstp->rq_pages[sge_no]);
+		rqstp->rq_pages[sge_no] = page;
+		bc -= min_t(u32, bc, ctxt->sge[sge_no].length);
+		rqstp->rq_arg.buflen += ctxt->sge[sge_no].length;
+		sge_no++;
+	}
+	rqstp->rq_respages = &rqstp->rq_pages[sge_no];
+	rqstp->rq_next_page = rqstp->rq_respages + 1;
+
+	/* We should never run out of SGE because the limit is defined to
+	 * support the max allowed RPC data length
+	 */
+	BUG_ON(bc && (sge_no == ctxt->count));
+	BUG_ON((rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len)
+	       != byte_count);
+	BUG_ON(rqstp->rq_arg.len != byte_count);
+
+	/* If not all pages were used from the SGL, free the remaining ones */
+	bc = sge_no;
+	while (sge_no < ctxt->count) {
+		page = ctxt->pages[sge_no++];
+		put_page(page);
+	}
+	ctxt->count = bc;
+
+	/* Set up tail */
+	rqstp->rq_arg.tail[0].iov_base = NULL;
+	rqstp->rq_arg.tail[0].iov_len = 0;
+}
+
+static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
+{
+	if (rdma_node_get_transport(xprt->sc_cm_id->device->node_type) ==
+	     RDMA_TRANSPORT_IWARP)
+		return 1;
+	else
+		return min_t(int, sge_count, xprt->sc_max_sge);
+}
+
+typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt,
+			      struct svc_rqst *rqstp,
+			      struct svc_rdma_op_ctxt *head,
+			      int *page_no,
+			      u32 *page_offset,
+			      u32 rs_handle,
+			      u32 rs_length,
+			      u64 rs_offset,
+			      int last);
+
+/* Issue an RDMA_READ using the local lkey to map the data sink */
+static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
+			       struct svc_rqst *rqstp,
+			       struct svc_rdma_op_ctxt *head,
+			       int *page_no,
+			       u32 *page_offset,
+			       u32 rs_handle,
+			       u32 rs_length,
+			       u64 rs_offset,
+			       int last)
+{
+	struct ib_send_wr read_wr;
+	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
+	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
+	int ret, read, pno;
+	u32 pg_off = *page_offset;
+	u32 pg_no = *page_no;
+
+	ctxt->direction = DMA_FROM_DEVICE;
+	ctxt->read_hdr = head;
+	pages_needed =
+		min_t(int, pages_needed, rdma_read_max_sge(xprt, pages_needed));
+	read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
+
+	for (pno = 0; pno < pages_needed; pno++) {
+		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
+
+		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
+		head->arg.page_len += len;
+		head->arg.len += len;
+		if (!pg_off)
+			head->count++;
+		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
+		rqstp->rq_next_page = rqstp->rq_respages + 1;
+		ctxt->sge[pno].addr =
+			ib_dma_map_page(xprt->sc_cm_id->device,
+					head->arg.pages[pg_no], pg_off,
+					PAGE_SIZE - pg_off,
+					DMA_FROM_DEVICE);
+		ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
+					   ctxt->sge[pno].addr);
+		if (ret)
+			goto err;
+		atomic_inc(&xprt->sc_dma_used);
+
+		/* The lkey here is either a local dma lkey or a dma_mr lkey */
+		ctxt->sge[pno].lkey = xprt->sc_dma_lkey;
+		ctxt->sge[pno].length = len;
+		ctxt->count++;
+
+		/* adjust offset and wrap to next page if needed */
+		pg_off += len;
+		if (pg_off == PAGE_SIZE) {
+			pg_off = 0;
+			pg_no++;
+		}
+		rs_length -= len;
+	}
+
+	if (last && rs_length == 0)
+		set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+	else
+		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+
+	memset(&read_wr, 0, sizeof(read_wr));
+	read_wr.wr_id = (unsigned long)ctxt;
+	read_wr.opcode = IB_WR_RDMA_READ;
+	ctxt->wr_op = read_wr.opcode;
+	read_wr.send_flags = IB_SEND_SIGNALED;
+	read_wr.wr.rdma.rkey = rs_handle;
+	read_wr.wr.rdma.remote_addr = rs_offset;
+	read_wr.sg_list = ctxt->sge;
+	read_wr.num_sge = pages_needed;
+
+	ret = svc_rdma_send(xprt, &read_wr);
+	if (ret) {
+		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
+		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+		goto err;
+	}
+
+	/* return current location in page array */
+	*page_no = pg_no;
+	*page_offset = pg_off;
+	ret = read;
+	atomic_inc(&rdma_stat_read);
+	return ret;
+ err:
+	svc_rdma_unmap_dma(ctxt);
+	svc_rdma_put_context(ctxt, 0);
+	return ret;
+}
+
+/* Issue an RDMA_READ using an FRMR to map the data sink */
+static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
+				struct svc_rqst *rqstp,
+				struct svc_rdma_op_ctxt *head,
+				int *page_no,
+				u32 *page_offset,
+				u32 rs_handle,
+				u32 rs_length,
+				u64 rs_offset,
+				int last)
+{
+	struct ib_send_wr read_wr;
+	struct ib_send_wr inv_wr;
+	struct ib_send_wr fastreg_wr;
+	u8 key;
+	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
+	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
+	struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
+	int ret, read, pno;
+	u32 pg_off = *page_offset;
+	u32 pg_no = *page_no;
+
+	if (IS_ERR(frmr))
+		return -ENOMEM;
+
+	ctxt->direction = DMA_FROM_DEVICE;
+	ctxt->frmr = frmr;
+	pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len);
+	read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
+
+	frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]);
+	frmr->direction = DMA_FROM_DEVICE;
+	frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
+	frmr->map_len = pages_needed << PAGE_SHIFT;
+	frmr->page_list_len = pages_needed;
+
+	for (pno = 0; pno < pages_needed; pno++) {
+		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
+
+		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
+		head->arg.page_len += len;
+		head->arg.len += len;
+		if (!pg_off)
+			head->count++;
+		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
+		rqstp->rq_next_page = rqstp->rq_respages + 1;
+		frmr->page_list->page_list[pno] =
+			ib_dma_map_page(xprt->sc_cm_id->device,
+					head->arg.pages[pg_no], 0,
+					PAGE_SIZE, DMA_FROM_DEVICE);
+		ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
+					   frmr->page_list->page_list[pno]);
+		if (ret)
+			goto err;
+		atomic_inc(&xprt->sc_dma_used);
+
+		/* adjust offset and wrap to next page if needed */
+		pg_off += len;
+		if (pg_off == PAGE_SIZE) {
+			pg_off = 0;
+			pg_no++;
+		}
+		rs_length -= len;
+	}
+
+	if (last && rs_length == 0)
+		set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+	else
+		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+
+	/* Bump the key */
+	key = (u8)(frmr->mr->lkey & 0x000000FF);
+	ib_update_fast_reg_key(frmr->mr, ++key);
+
+	ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset;
+	ctxt->sge[0].lkey = frmr->mr->lkey;
+	ctxt->sge[0].length = read;
+	ctxt->count = 1;
+	ctxt->read_hdr = head;
+
+	/* Prepare FASTREG WR */
+	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
+	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
+	fastreg_wr.send_flags = IB_SEND_SIGNALED;
+	fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
+	fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
+	fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
+	fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+	fastreg_wr.wr.fast_reg.length = frmr->map_len;
+	fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
+	fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
+	fastreg_wr.next = &read_wr;
+
+	/* Prepare RDMA_READ */
+	memset(&read_wr, 0, sizeof(read_wr));
+	read_wr.send_flags = IB_SEND_SIGNALED;
+	read_wr.wr.rdma.rkey = rs_handle;
+	read_wr.wr.rdma.remote_addr = rs_offset;
+	read_wr.sg_list = ctxt->sge;
+	read_wr.num_sge = 1;
+	if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
+		read_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
+		read_wr.wr_id = (unsigned long)ctxt;
+		read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
+	} else {
+		read_wr.opcode = IB_WR_RDMA_READ;
+		read_wr.next = &inv_wr;
+		/* Prepare invalidate */
+		memset(&inv_wr, 0, sizeof(inv_wr));
+		inv_wr.wr_id = (unsigned long)ctxt;
+		inv_wr.opcode = IB_WR_LOCAL_INV;
+		inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
+		inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
+	}
+	ctxt->wr_op = read_wr.opcode;
+
+	/* Post the chain */
+	ret = svc_rdma_send(xprt, &fastreg_wr);
+	if (ret) {
+		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
+		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+		goto err;
+	}
+
+	/* return current location in page array */
+	*page_no = pg_no;
+	*page_offset = pg_off;
+	ret = read;
+	atomic_inc(&rdma_stat_read);
+	return ret;
+ err:
+	svc_rdma_unmap_dma(ctxt);
+	svc_rdma_put_context(ctxt, 0);
+	svc_rdma_put_frmr(xprt, frmr);
+	return ret;
+}
+
+static int rdma_read_chunks(struct svcxprt_rdma *xprt,
+			    struct rpcrdma_msg *rmsgp,
+			    struct svc_rqst *rqstp,
+			    struct svc_rdma_op_ctxt *head)
+{
+	int page_no, ch_count, ret;
+	struct rpcrdma_read_chunk *ch;
+	u32 page_offset, byte_count;
+	u64 rs_offset;
+	rdma_reader_fn reader;
+
+	/* If no read list is present, return 0 */
+	ch = svc_rdma_get_read_chunk(rmsgp);
+	if (!ch)
+		return 0;
+
+	svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
+	if (ch_count > RPCSVC_MAXPAGES)
+		return -EINVAL;
+
+	/* The request is completed when the RDMA_READs complete. The
+	 * head context keeps all the pages that comprise the
+	 * request.
+	 */
+	head->arg.head[0] = rqstp->rq_arg.head[0];
+	head->arg.tail[0] = rqstp->rq_arg.tail[0];
+	head->arg.pages = &head->pages[head->count];
+	head->hdr_count = head->count;
+	head->arg.page_base = 0;
+	head->arg.page_len = 0;
+	head->arg.len = rqstp->rq_arg.len;
+	head->arg.buflen = rqstp->rq_arg.buflen;
+
+	/* Use FRMR if supported */
+	if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)
+		reader = rdma_read_chunk_frmr;
+	else
+		reader = rdma_read_chunk_lcl;
+
+	page_no = 0; page_offset = 0;
+	for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
+	     ch->rc_discrim != 0; ch++) {
+
+		xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset,
+				 &rs_offset);
+		byte_count = ntohl(ch->rc_target.rs_length);
+
+		while (byte_count > 0) {
+			ret = reader(xprt, rqstp, head,
+				     &page_no, &page_offset,
+				     ntohl(ch->rc_target.rs_handle),
+				     byte_count, rs_offset,
+				     ((ch+1)->rc_discrim == 0) /* last */
+				     );
+			if (ret < 0)
+				goto err;
+			byte_count -= ret;
+			rs_offset += ret;
+			head->arg.buflen += ret;
+		}
+	}
+	ret = 1;
+ err:
+	/* Detach arg pages. svc_recv will replenish them */
+	for (page_no = 0;
+	     &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++)
+		rqstp->rq_pages[page_no] = NULL;
+
+	return ret;
+}
+
+/*
+ * To avoid a separate RDMA READ just for a handful of zero bytes,
+ * RFC 5666 section 3.7 allows the client to omit the XDR zero pad
+ * in chunk lists.
+ */
+static void
+rdma_fix_xdr_pad(struct xdr_buf *buf)
+{
+	unsigned int page_len = buf->page_len;
+	unsigned int size = (XDR_QUADLEN(page_len) << 2) - page_len;
+	unsigned int offset, pg_no;
+	char *p;
+
+	if (size == 0)
+		return;
+
+	pg_no = page_len >> PAGE_SHIFT;
+	offset = page_len & ~PAGE_MASK;
+	p = page_address(buf->pages[pg_no]);
+	memset(p + offset, 0, size);
+
+	buf->page_len += size;
+	buf->buflen += size;
+	buf->len += size;
+}
+
+static int rdma_read_complete(struct svc_rqst *rqstp,
+			      struct svc_rdma_op_ctxt *head)
+{
+	int page_no;
+	int ret;
+
+	BUG_ON(!head);
+
+	/* Copy RPC pages */
+	for (page_no = 0; page_no < head->count; page_no++) {
+		put_page(rqstp->rq_pages[page_no]);
+		rqstp->rq_pages[page_no] = head->pages[page_no];
+	}
+	/* Point rq_arg.pages past header */
+	rdma_fix_xdr_pad(&head->arg);
+	rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];
+	rqstp->rq_arg.page_len = head->arg.page_len;
+	rqstp->rq_arg.page_base = head->arg.page_base;
+
+	/* rq_respages starts after the last arg page */
+	rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
+	rqstp->rq_next_page = rqstp->rq_respages + 1;
+
+	/* Rebuild rq_arg head and tail. */
+	rqstp->rq_arg.head[0] = head->arg.head[0];
+	rqstp->rq_arg.tail[0] = head->arg.tail[0];
+	rqstp->rq_arg.len = head->arg.len;
+	rqstp->rq_arg.buflen = head->arg.buflen;
+
+	/* Free the context */
+	svc_rdma_put_context(head, 0);
+
+	/* XXX: What should this be? */
+	rqstp->rq_prot = IPPROTO_MAX;
+	svc_xprt_copy_addrs(rqstp, rqstp->rq_xprt);
+
+	ret = rqstp->rq_arg.head[0].iov_len
+		+ rqstp->rq_arg.page_len
+		+ rqstp->rq_arg.tail[0].iov_len;
+	dprintk("svcrdma: deferred read ret=%d, rq_arg.len =%d, "
+		"rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n",
+		ret, rqstp->rq_arg.len,	rqstp->rq_arg.head[0].iov_base,
+		rqstp->rq_arg.head[0].iov_len);
+
+	return ret;
+}
+
+/*
+ * Set up the rqstp thread context to point to the RQ buffer. If
+ * necessary, pull additional data from the client with an RDMA_READ
+ * request.
+ */
+int svc_rdma_recvfrom(struct svc_rqst *rqstp)
+{
+	struct svc_xprt *xprt = rqstp->rq_xprt;
+	struct svcxprt_rdma *rdma_xprt =
+		container_of(xprt, struct svcxprt_rdma, sc_xprt);
+	struct svc_rdma_op_ctxt *ctxt = NULL;
+	struct rpcrdma_msg *rmsgp;
+	int ret = 0;
+	int len;
+
+	dprintk("svcrdma: rqstp=%p\n", rqstp);
+
+	spin_lock_bh(&rdma_xprt->sc_rq_dto_lock);
+	if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
+		ctxt = list_entry(rdma_xprt->sc_read_complete_q.next,
+				  struct svc_rdma_op_ctxt,
+				  dto_q);
+		list_del_init(&ctxt->dto_q);
+		spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
+		return rdma_read_complete(rqstp, ctxt);
+	} else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
+		ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next,
+				  struct svc_rdma_op_ctxt,
+				  dto_q);
+		list_del_init(&ctxt->dto_q);
+	} else {
+		atomic_inc(&rdma_stat_rq_starve);
+		clear_bit(XPT_DATA, &xprt->xpt_flags);
+		ctxt = NULL;
+	}
+	spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
+	if (!ctxt) {
+		/* This is the EAGAIN path. The svc_recv routine will
+		 * return -EAGAIN, the nfsd thread will go to call into
+		 * svc_recv again and we shouldn't be on the active
+		 * transport list
+		 */
+		if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
+			goto close_out;
+
+		goto out;
+	}
+	dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
+		ctxt, rdma_xprt, rqstp, ctxt->wc_status);
+	BUG_ON(ctxt->wc_status != IB_WC_SUCCESS);
+	atomic_inc(&rdma_stat_recv);
+
+	/* Build up the XDR from the receive buffers. */
+	rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len);
+
+	/* Decode the RDMA header. */
+	len = svc_rdma_xdr_decode_req(&rmsgp, rqstp);
+	rqstp->rq_xprt_hlen = len;
+
+	/* If the request is invalid, reply with an error */
+	if (len < 0) {
+		if (len == -ENOSYS)
+			svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS);
+		goto close_out;
+	}
+
+	/* Read read-list data. */
+	ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt);
+	if (ret > 0) {
+		/* read-list posted, defer until data received from client. */
+		goto defer;
+	} else if (ret < 0) {
+		/* Post of read-list failed, free context. */
+		svc_rdma_put_context(ctxt, 1);
+		return 0;
+	}
+
+	ret = rqstp->rq_arg.head[0].iov_len
+		+ rqstp->rq_arg.page_len
+		+ rqstp->rq_arg.tail[0].iov_len;
+	svc_rdma_put_context(ctxt, 0);
+ out:
+	dprintk("svcrdma: ret = %d, rq_arg.len =%d, "
+		"rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n",
+		ret, rqstp->rq_arg.len,
+		rqstp->rq_arg.head[0].iov_base,
+		rqstp->rq_arg.head[0].iov_len);
+	rqstp->rq_prot = IPPROTO_MAX;
+	svc_xprt_copy_addrs(rqstp, xprt);
+	return ret;
+
+ close_out:
+	if (ctxt)
+		svc_rdma_put_context(ctxt, 1);
+	dprintk("svcrdma: transport %p is closing\n", xprt);
+	/*
+	 * Set the close bit and enqueue it. svc_recv will see the
+	 * close bit and call svc_xprt_delete
+	 */
+	set_bit(XPT_CLOSE, &xprt->xpt_flags);
+defer:
+	return 0;
+}
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@ -0,0 +1,554 @@
+/*
+ * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
+ * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *      Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ *      Neither the name of the Network Appliance, Inc. nor the names of
+ *      its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written
+ *      permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Tom Tucker <tom@opengridcomputing.com>
+ */
+
+#include <linux/sunrpc/debug.h>
+#include <linux/sunrpc/rpc_rdma.h>
+#include <linux/spinlock.h>
+#include <asm/unaligned.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_cm.h>
+#include <linux/sunrpc/svc_rdma.h>
+
+#define RPCDBG_FACILITY	RPCDBG_SVCXPRT
+
+static int map_xdr(struct svcxprt_rdma *xprt,
+		   struct xdr_buf *xdr,
+		   struct svc_rdma_req_map *vec)
+{
+	int sge_no;
+	u32 sge_bytes;
+	u32 page_bytes;
+	u32 page_off;
+	int page_no;
+
+	BUG_ON(xdr->len !=
+	       (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
+
+	/* Skip the first sge, this is for the RPCRDMA header */
+	sge_no = 1;
+
+	/* Head SGE */
+	vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
+	vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
+	sge_no++;
+
+	/* pages SGE */
+	page_no = 0;
+	page_bytes = xdr->page_len;
+	page_off = xdr->page_base;
+	while (page_bytes) {
+		vec->sge[sge_no].iov_base =
+			page_address(xdr->pages[page_no]) + page_off;
+		sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
+		page_bytes -= sge_bytes;
+		vec->sge[sge_no].iov_len = sge_bytes;
+
+		sge_no++;
+		page_no++;
+		page_off = 0; /* reset for next time through loop */
+	}
+
+	/* Tail SGE */
+	if (xdr->tail[0].iov_len) {
+		vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
+		vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
+		sge_no++;
+	}
+
+	dprintk("svcrdma: map_xdr: sge_no %d page_no %d "
+		"page_base %u page_len %u head_len %zu tail_len %zu\n",
+		sge_no, page_no, xdr->page_base, xdr->page_len,
+		xdr->head[0].iov_len, xdr->tail[0].iov_len);
+
+	vec->count = sge_no;
+	return 0;
+}
+
+static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
+			      struct xdr_buf *xdr,
+			      u32 xdr_off, size_t len, int dir)
+{
+	struct page *page;
+	dma_addr_t dma_addr;
+	if (xdr_off < xdr->head[0].iov_len) {
+		/* This offset is in the head */
+		xdr_off += (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
+		page = virt_to_page(xdr->head[0].iov_base);
+	} else {
+		xdr_off -= xdr->head[0].iov_len;
+		if (xdr_off < xdr->page_len) {
+			/* This offset is in the page list */
+			xdr_off += xdr->page_base;
+			page = xdr->pages[xdr_off >> PAGE_SHIFT];
+			xdr_off &= ~PAGE_MASK;
+		} else {
+			/* This offset is in the tail */
+			xdr_off -= xdr->page_len;
+			xdr_off += (unsigned long)
+				xdr->tail[0].iov_base & ~PAGE_MASK;
+			page = virt_to_page(xdr->tail[0].iov_base);
+		}
+	}
+	dma_addr = ib_dma_map_page(xprt->sc_cm_id->device, page, xdr_off,
+				   min_t(size_t, PAGE_SIZE, len), dir);
+	return dma_addr;
+}
+
+/* Assumptions:
+ * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
+ */
+static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
+		      u32 rmr, u64 to,
+		      u32 xdr_off, int write_len,
+		      struct svc_rdma_req_map *vec)
+{
+	struct ib_send_wr write_wr;
+	struct ib_sge *sge;
+	int xdr_sge_no;
+	int sge_no;
+	int sge_bytes;
+	int sge_off;
+	int bc;
+	struct svc_rdma_op_ctxt *ctxt;
+
+	BUG_ON(vec->count > RPCSVC_MAXPAGES);
+	dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
+		"write_len=%d, vec->sge=%p, vec->count=%lu\n",
+		rmr, (unsigned long long)to, xdr_off,
+		write_len, vec->sge, vec->count);
+
+	ctxt = svc_rdma_get_context(xprt);
+	ctxt->direction = DMA_TO_DEVICE;
+	sge = ctxt->sge;
+
+	/* Find the SGE associated with xdr_off */
+	for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < vec->count;
+	     xdr_sge_no++) {
+		if (vec->sge[xdr_sge_no].iov_len > bc)
+			break;
+		bc -= vec->sge[xdr_sge_no].iov_len;
+	}
+
+	sge_off = bc;
+	bc = write_len;
+	sge_no = 0;
+
+	/* Copy the remaining SGE */
+	while (bc != 0) {
+		sge_bytes = min_t(size_t,
+			  bc, vec->sge[xdr_sge_no].iov_len-sge_off);
+		sge[sge_no].length = sge_bytes;
+		sge[sge_no].addr =
+			dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
+				    sge_bytes, DMA_TO_DEVICE);
+		xdr_off += sge_bytes;
+		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+					 sge[sge_no].addr))
+			goto err;
+		atomic_inc(&xprt->sc_dma_used);
+		sge[sge_no].lkey = xprt->sc_dma_lkey;
+		ctxt->count++;
+		sge_off = 0;
+		sge_no++;
+		xdr_sge_no++;
+		BUG_ON(xdr_sge_no > vec->count);
+		bc -= sge_bytes;
+		if (sge_no == xprt->sc_max_sge)
+			break;
+	}
+
+	/* Prepare WRITE WR */
+	memset(&write_wr, 0, sizeof write_wr);
+	ctxt->wr_op = IB_WR_RDMA_WRITE;
+	write_wr.wr_id = (unsigned long)ctxt;
+	write_wr.sg_list = &sge[0];
+	write_wr.num_sge = sge_no;
+	write_wr.opcode = IB_WR_RDMA_WRITE;
+	write_wr.send_flags = IB_SEND_SIGNALED;
+	write_wr.wr.rdma.rkey = rmr;
+	write_wr.wr.rdma.remote_addr = to;
+
+	/* Post It */
+	atomic_inc(&rdma_stat_write);
+	if (svc_rdma_send(xprt, &write_wr))
+		goto err;
+	return write_len - bc;
+ err:
+	svc_rdma_unmap_dma(ctxt);
+	svc_rdma_put_context(ctxt, 0);
+	/* Fatal error, close transport */
+	return -EIO;
+}
+
+static int send_write_chunks(struct svcxprt_rdma *xprt,
+			     struct rpcrdma_msg *rdma_argp,
+			     struct rpcrdma_msg *rdma_resp,
+			     struct svc_rqst *rqstp,
+			     struct svc_rdma_req_map *vec)
+{
+	u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
+	int write_len;
+	u32 xdr_off;
+	int chunk_off;
+	int chunk_no;
+	struct rpcrdma_write_array *arg_ary;
+	struct rpcrdma_write_array *res_ary;
+	int ret;
+
+	arg_ary = svc_rdma_get_write_array(rdma_argp);
+	if (!arg_ary)
+		return 0;
+	res_ary = (struct rpcrdma_write_array *)
+		&rdma_resp->rm_body.rm_chunks[1];
+
+	/* Write chunks start at the pagelist */
+	for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
+	     xfer_len && chunk_no < arg_ary->wc_nchunks;
+	     chunk_no++) {
+		struct rpcrdma_segment *arg_ch;
+		u64 rs_offset;
+
+		arg_ch = &arg_ary->wc_array[chunk_no].wc_target;
+		write_len = min(xfer_len, ntohl(arg_ch->rs_length));
+
+		/* Prepare the response chunk given the length actually
+		 * written */
+		xdr_decode_hyper((__be32 *)&arg_ch->rs_offset, &rs_offset);
+		svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no,
+						arg_ch->rs_handle,
+						arg_ch->rs_offset,
+						write_len);
+		chunk_off = 0;
+		while (write_len) {
+			ret = send_write(xprt, rqstp,
+					 ntohl(arg_ch->rs_handle),
+					 rs_offset + chunk_off,
+					 xdr_off,
+					 write_len,
+					 vec);
+			if (ret <= 0) {
+				dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
+					ret);
+				return -EIO;
+			}
+			chunk_off += ret;
+			xdr_off += ret;
+			xfer_len -= ret;
+			write_len -= ret;
+		}
+	}
+	/* Update the req with the number of chunks actually used */
+	svc_rdma_xdr_encode_write_list(rdma_resp, chunk_no);
+
+	return rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
+}
+
+static int send_reply_chunks(struct svcxprt_rdma *xprt,
+			     struct rpcrdma_msg *rdma_argp,
+			     struct rpcrdma_msg *rdma_resp,
+			     struct svc_rqst *rqstp,
+			     struct svc_rdma_req_map *vec)
+{
+	u32 xfer_len = rqstp->rq_res.len;
+	int write_len;
+	u32 xdr_off;
+	int chunk_no;
+	int chunk_off;
+	int nchunks;
+	struct rpcrdma_segment *ch;
+	struct rpcrdma_write_array *arg_ary;
+	struct rpcrdma_write_array *res_ary;
+	int ret;
+
+	arg_ary = svc_rdma_get_reply_array(rdma_argp);
+	if (!arg_ary)
+		return 0;
+	/* XXX: need to fix when reply lists occur with read-list and or
+	 * write-list */
+	res_ary = (struct rpcrdma_write_array *)
+		&rdma_resp->rm_body.rm_chunks[2];
+
+	/* xdr offset starts at RPC message */
+	nchunks = ntohl(arg_ary->wc_nchunks);
+	for (xdr_off = 0, chunk_no = 0;
+	     xfer_len && chunk_no < nchunks;
+	     chunk_no++) {
+		u64 rs_offset;
+		ch = &arg_ary->wc_array[chunk_no].wc_target;
+		write_len = min(xfer_len, htonl(ch->rs_length));
+
+		/* Prepare the reply chunk given the length actually
+		 * written */
+		xdr_decode_hyper((__be32 *)&ch->rs_offset, &rs_offset);
+		svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no,
+						ch->rs_handle, ch->rs_offset,
+						write_len);
+		chunk_off = 0;
+		while (write_len) {
+			ret = send_write(xprt, rqstp,
+					 ntohl(ch->rs_handle),
+					 rs_offset + chunk_off,
+					 xdr_off,
+					 write_len,
+					 vec);
+			if (ret <= 0) {
+				dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
+					ret);
+				return -EIO;
+			}
+			chunk_off += ret;
+			xdr_off += ret;
+			xfer_len -= ret;
+			write_len -= ret;
+		}
+	}
+	/* Update the req with the number of chunks actually used */
+	svc_rdma_xdr_encode_reply_array(res_ary, chunk_no);
+
+	return rqstp->rq_res.len;
+}
+
+/* This function prepares the portion of the RPCRDMA message to be
+ * sent in the RDMA_SEND. This function is called after data sent via
+ * RDMA has already been transmitted. There are three cases:
+ * - The RPCRDMA header, RPC header, and payload are all sent in a
+ *   single RDMA_SEND. This is the "inline" case.
+ * - The RPCRDMA header and some portion of the RPC header and data
+ *   are sent via this RDMA_SEND and another portion of the data is
+ *   sent via RDMA.
+ * - The RPCRDMA header [NOMSG] is sent in this RDMA_SEND and the RPC
+ *   header and data are all transmitted via RDMA.
+ * In all three cases, this function prepares the RPCRDMA header in
+ * sge[0], the 'type' parameter indicates the type to place in the
+ * RPCRDMA header, and the 'byte_count' field indicates how much of
+ * the XDR to include in this RDMA_SEND. NB: The offset of the payload
+ * to send is zero in the XDR.
+ */
+static int send_reply(struct svcxprt_rdma *rdma,
+		      struct svc_rqst *rqstp,
+		      struct page *page,
+		      struct rpcrdma_msg *rdma_resp,
+		      struct svc_rdma_op_ctxt *ctxt,
+		      struct svc_rdma_req_map *vec,
+		      int byte_count)
+{
+	struct ib_send_wr send_wr;
+	int sge_no;
+	int sge_bytes;
+	int page_no;
+	int pages;
+	int ret;
+
+	/* Post a recv buffer to handle another request. */
+	ret = svc_rdma_post_recv(rdma);
+	if (ret) {
+		printk(KERN_INFO
+		       "svcrdma: could not post a receive buffer, err=%d."
+		       "Closing transport %p.\n", ret, rdma);
+		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+		svc_rdma_put_context(ctxt, 0);
+		return -ENOTCONN;
+	}
+
+	/* Prepare the context */
+	ctxt->pages[0] = page;
+	ctxt->count = 1;
+
+	/* Prepare the SGE for the RPCRDMA Header */
+	ctxt->sge[0].lkey = rdma->sc_dma_lkey;
+	ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
+	ctxt->sge[0].addr =
+	    ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
+			    ctxt->sge[0].length, DMA_TO_DEVICE);
+	if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
+		goto err;
+	atomic_inc(&rdma->sc_dma_used);
+
+	ctxt->direction = DMA_TO_DEVICE;
+
+	/* Map the payload indicated by 'byte_count' */
+	for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
+		int xdr_off = 0;
+		sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
+		byte_count -= sge_bytes;
+		ctxt->sge[sge_no].addr =
+			dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
+				    sge_bytes, DMA_TO_DEVICE);
+		xdr_off += sge_bytes;
+		if (ib_dma_mapping_error(rdma->sc_cm_id->device,
+					 ctxt->sge[sge_no].addr))
+			goto err;
+		atomic_inc(&rdma->sc_dma_used);
+		ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
+		ctxt->sge[sge_no].length = sge_bytes;
+	}
+	BUG_ON(byte_count != 0);
+
+	/* Save all respages in the ctxt and remove them from the
+	 * respages array. They are our pages until the I/O
+	 * completes.
+	 */
+	pages = rqstp->rq_next_page - rqstp->rq_respages;
+	for (page_no = 0; page_no < pages; page_no++) {
+		ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
+		ctxt->count++;
+		rqstp->rq_respages[page_no] = NULL;
+		/*
+		 * If there are more pages than SGE, terminate SGE
+		 * list so that svc_rdma_unmap_dma doesn't attempt to
+		 * unmap garbage.
+		 */
+		if (page_no+1 >= sge_no)
+			ctxt->sge[page_no+1].length = 0;
+	}
+	rqstp->rq_next_page = rqstp->rq_respages + 1;
+
+	BUG_ON(sge_no > rdma->sc_max_sge);
+	memset(&send_wr, 0, sizeof send_wr);
+	ctxt->wr_op = IB_WR_SEND;
+	send_wr.wr_id = (unsigned long)ctxt;
+	send_wr.sg_list = ctxt->sge;
+	send_wr.num_sge = sge_no;
+	send_wr.opcode = IB_WR_SEND;
+	send_wr.send_flags =  IB_SEND_SIGNALED;
+
+	ret = svc_rdma_send(rdma, &send_wr);
+	if (ret)
+		goto err;
+
+	return 0;
+
+ err:
+	svc_rdma_unmap_dma(ctxt);
+	svc_rdma_put_context(ctxt, 1);
+	return -EIO;
+}
+
+void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
+{
+}
+
+/*
+ * Return the start of an xdr buffer.
+ */
+static void *xdr_start(struct xdr_buf *xdr)
+{
+	return xdr->head[0].iov_base -
+		(xdr->len -
+		 xdr->page_len -
+		 xdr->tail[0].iov_len -
+		 xdr->head[0].iov_len);
+}
+
+int svc_rdma_sendto(struct svc_rqst *rqstp)
+{
+	struct svc_xprt *xprt = rqstp->rq_xprt;
+	struct svcxprt_rdma *rdma =
+		container_of(xprt, struct svcxprt_rdma, sc_xprt);
+	struct rpcrdma_msg *rdma_argp;
+	struct rpcrdma_msg *rdma_resp;
+	struct rpcrdma_write_array *reply_ary;
+	enum rpcrdma_proc reply_type;
+	int ret;
+	int inline_bytes;
+	struct page *res_page;
+	struct svc_rdma_op_ctxt *ctxt;
+	struct svc_rdma_req_map *vec;
+
+	dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
+
+	/* Get the RDMA request header. */
+	rdma_argp = xdr_start(&rqstp->rq_arg);
+
+	/* Build an req vec for the XDR */
+	ctxt = svc_rdma_get_context(rdma);
+	ctxt->direction = DMA_TO_DEVICE;
+	vec = svc_rdma_get_req_map();
+	ret = map_xdr(rdma, &rqstp->rq_res, vec);
+	if (ret)
+		goto err0;
+	inline_bytes = rqstp->rq_res.len;
+
+	/* Create the RDMA response header */
+	res_page = svc_rdma_get_page();
+	rdma_resp = page_address(res_page);
+	reply_ary = svc_rdma_get_reply_array(rdma_argp);
+	if (reply_ary)
+		reply_type = RDMA_NOMSG;
+	else
+		reply_type = RDMA_MSG;
+	svc_rdma_xdr_encode_reply_header(rdma, rdma_argp,
+					 rdma_resp, reply_type);
+
+	/* Send any write-chunk data and build resp write-list */
+	ret = send_write_chunks(rdma, rdma_argp, rdma_resp,
+				rqstp, vec);
+	if (ret < 0) {
+		printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n",
+		       ret);
+		goto err1;
+	}
+	inline_bytes -= ret;
+
+	/* Send any reply-list data and update resp reply-list */
+	ret = send_reply_chunks(rdma, rdma_argp, rdma_resp,
+				rqstp, vec);
+	if (ret < 0) {
+		printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n",
+		       ret);
+		goto err1;
+	}
+	inline_bytes -= ret;
+
+	ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec,
+			 inline_bytes);
+	svc_rdma_put_req_map(vec);
+	dprintk("svcrdma: send_reply returns %d\n", ret);
+	return ret;
+
+ err1:
+	put_page(res_page);
+ err0:
+	svc_rdma_put_req_map(vec);
+	svc_rdma_put_context(ctxt, 0);
+	return ret;
+}
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@ -0,0 +1,747 @@
+/*
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *      Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ *      Neither the name of the Network Appliance, Inc. nor the names of
+ *      its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written
+ *      permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * transport.c
+ *
+ * This file contains the top-level implementation of an RPC RDMA
+ * transport.
+ *
+ * Naming convention: functions beginning with xprt_ are part of the
+ * transport switch. All others are RPC RDMA internal.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/seq_file.h>
+#include <linux/sunrpc/addr.h>
+
+#include "xprt_rdma.h"
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_TRANS
+#endif
+
+MODULE_LICENSE("Dual BSD/GPL");
+
+MODULE_DESCRIPTION("RPC/RDMA Transport for Linux kernel NFS");
+MODULE_AUTHOR("Network Appliance, Inc.");
+
+/*
+ * tunables
+ */
+
+static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
+static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
+static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
+static unsigned int xprt_rdma_inline_write_padding;
+static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
+                int xprt_rdma_pad_optimize = 0;
+
+#ifdef RPC_DEBUG
+
+static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE;
+static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE;
+static unsigned int zero;
+static unsigned int max_padding = PAGE_SIZE;
+static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
+static unsigned int max_memreg = RPCRDMA_LAST - 1;
+
+static struct ctl_table_header *sunrpc_table_header;
+
+static struct ctl_table xr_tunables_table[] = {
+	{
+		.procname	= "rdma_slot_table_entries",
+		.data		= &xprt_rdma_slot_table_entries,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &min_slot_table_size,
+		.extra2		= &max_slot_table_size
+	},
+	{
+		.procname	= "rdma_max_inline_read",
+		.data		= &xprt_rdma_max_inline_read,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "rdma_max_inline_write",
+		.data		= &xprt_rdma_max_inline_write,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "rdma_inline_write_padding",
+		.data		= &xprt_rdma_inline_write_padding,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &max_padding,
+	},
+	{
+		.procname	= "rdma_memreg_strategy",
+		.data		= &xprt_rdma_memreg_strategy,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &min_memreg,
+		.extra2		= &max_memreg,
+	},
+	{
+		.procname	= "rdma_pad_optimize",
+		.data		= &xprt_rdma_pad_optimize,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{ },
+};
+
+static struct ctl_table sunrpc_table[] = {
+	{
+		.procname	= "sunrpc",
+		.mode		= 0555,
+		.child		= xr_tunables_table
+	},
+	{ },
+};
+
+#endif
+
+#define RPCRDMA_BIND_TO		(60U * HZ)
+#define RPCRDMA_INIT_REEST_TO	(5U * HZ)
+#define RPCRDMA_MAX_REEST_TO	(30U * HZ)
+#define RPCRDMA_IDLE_DISC_TO	(5U * 60 * HZ)
+
+static struct rpc_xprt_ops xprt_rdma_procs;	/* forward reference */
+
+static void
+xprt_rdma_format_addresses(struct rpc_xprt *xprt)
+{
+	struct sockaddr *sap = (struct sockaddr *)
+					&rpcx_to_rdmad(xprt).addr;
+	struct sockaddr_in *sin = (struct sockaddr_in *)sap;
+	char buf[64];
+
+	(void)rpc_ntop(sap, buf, sizeof(buf));
+	xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL);
+
+	snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
+	xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
+
+	xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
+
+	snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
+	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
+
+	snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
+	xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
+
+	/* netid */
+	xprt->address_strings[RPC_DISPLAY_NETID] = "rdma";
+}
+
+static void
+xprt_rdma_free_addresses(struct rpc_xprt *xprt)
+{
+	unsigned int i;
+
+	for (i = 0; i < RPC_DISPLAY_MAX; i++)
+		switch (i) {
+		case RPC_DISPLAY_PROTO:
+		case RPC_DISPLAY_NETID:
+			continue;
+		default:
+			kfree(xprt->address_strings[i]);
+		}
+}
+
+static void
+xprt_rdma_connect_worker(struct work_struct *work)
+{
+	struct rpcrdma_xprt *r_xprt =
+		container_of(work, struct rpcrdma_xprt, rdma_connect.work);
+	struct rpc_xprt *xprt = &r_xprt->xprt;
+	int rc = 0;
+
+	xprt_clear_connected(xprt);
+
+	dprintk("RPC:       %s: %sconnect\n", __func__,
+			r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
+	rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
+	if (rc)
+		xprt_wake_pending_tasks(xprt, rc);
+
+	dprintk("RPC:       %s: exit\n", __func__);
+	xprt_clear_connecting(xprt);
+}
+
+/*
+ * xprt_rdma_destroy
+ *
+ * Destroy the xprt.
+ * Free all memory associated with the object, including its own.
+ * NOTE: none of the *destroy methods free memory for their top-level
+ * objects, even though they may have allocated it (they do free
+ * private memory). It's up to the caller to handle it. In this
+ * case (RDMA transport), all structure memory is inlined with the
+ * struct rpcrdma_xprt.
+ */
+static void
+xprt_rdma_destroy(struct rpc_xprt *xprt)
+{
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+
+	dprintk("RPC:       %s: called\n", __func__);
+
+	cancel_delayed_work_sync(&r_xprt->rdma_connect);
+
+	xprt_clear_connected(xprt);
+
+	rpcrdma_buffer_destroy(&r_xprt->rx_buf);
+	rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
+	rpcrdma_ia_close(&r_xprt->rx_ia);
+
+	xprt_rdma_free_addresses(xprt);
+
+	xprt_free(xprt);
+
+	dprintk("RPC:       %s: returning\n", __func__);
+
+	module_put(THIS_MODULE);
+}
+
+static const struct rpc_timeout xprt_rdma_default_timeout = {
+	.to_initval = 60 * HZ,
+	.to_maxval = 60 * HZ,
+};
+
+/**
+ * xprt_setup_rdma - Set up transport to use RDMA
+ *
+ * @args: rpc transport arguments
+ */
+static struct rpc_xprt *
+xprt_setup_rdma(struct xprt_create *args)
+{
+	struct rpcrdma_create_data_internal cdata;
+	struct rpc_xprt *xprt;
+	struct rpcrdma_xprt *new_xprt;
+	struct rpcrdma_ep *new_ep;
+	struct sockaddr_in *sin;
+	int rc;
+
+	if (args->addrlen > sizeof(xprt->addr)) {
+		dprintk("RPC:       %s: address too large\n", __func__);
+		return ERR_PTR(-EBADF);
+	}
+
+	xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt),
+			xprt_rdma_slot_table_entries,
+			xprt_rdma_slot_table_entries);
+	if (xprt == NULL) {
+		dprintk("RPC:       %s: couldn't allocate rpcrdma_xprt\n",
+			__func__);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* 60 second timeout, no retries */
+	xprt->timeout = &xprt_rdma_default_timeout;
+	xprt->bind_timeout = RPCRDMA_BIND_TO;
+	xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
+	xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
+
+	xprt->resvport = 0;		/* privileged port not needed */
+	xprt->tsh_size = 0;		/* RPC-RDMA handles framing */
+	xprt->ops = &xprt_rdma_procs;
+
+	/*
+	 * Set up RDMA-specific connect data.
+	 */
+
+	/* Put server RDMA address in local cdata */
+	memcpy(&cdata.addr, args->dstaddr, args->addrlen);
+
+	/* Ensure xprt->addr holds valid server TCP (not RDMA)
+	 * address, for any side protocols which peek at it */
+	xprt->prot = IPPROTO_TCP;
+	xprt->addrlen = args->addrlen;
+	memcpy(&xprt->addr, &cdata.addr, xprt->addrlen);
+
+	sin = (struct sockaddr_in *)&cdata.addr;
+	if (ntohs(sin->sin_port) != 0)
+		xprt_set_bound(xprt);
+
+	dprintk("RPC:       %s: %pI4:%u\n",
+		__func__, &sin->sin_addr.s_addr, ntohs(sin->sin_port));
+
+	/* Set max requests */
+	cdata.max_requests = xprt->max_reqs;
+
+	/* Set some length limits */
+	cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */
+	cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */
+
+	cdata.inline_wsize = xprt_rdma_max_inline_write;
+	if (cdata.inline_wsize > cdata.wsize)
+		cdata.inline_wsize = cdata.wsize;
+
+	cdata.inline_rsize = xprt_rdma_max_inline_read;
+	if (cdata.inline_rsize > cdata.rsize)
+		cdata.inline_rsize = cdata.rsize;
+
+	cdata.padding = xprt_rdma_inline_write_padding;
+
+	/*
+	 * Create new transport instance, which includes initialized
+	 *  o ia
+	 *  o endpoint
+	 *  o buffers
+	 */
+
+	new_xprt = rpcx_to_rdmax(xprt);
+
+	rc = rpcrdma_ia_open(new_xprt, (struct sockaddr *) &cdata.addr,
+				xprt_rdma_memreg_strategy);
+	if (rc)
+		goto out1;
+
+	/*
+	 * initialize and create ep
+	 */
+	new_xprt->rx_data = cdata;
+	new_ep = &new_xprt->rx_ep;
+	new_ep->rep_remote_addr = cdata.addr;
+
+	rc = rpcrdma_ep_create(&new_xprt->rx_ep,
+				&new_xprt->rx_ia, &new_xprt->rx_data);
+	if (rc)
+		goto out2;
+
+	/*
+	 * Allocate pre-registered send and receive buffers for headers and
+	 * any inline data. Also specify any padding which will be provided
+	 * from a preregistered zero buffer.
+	 */
+	rc = rpcrdma_buffer_create(&new_xprt->rx_buf, new_ep, &new_xprt->rx_ia,
+				&new_xprt->rx_data);
+	if (rc)
+		goto out3;
+
+	/*
+	 * Register a callback for connection events. This is necessary because
+	 * connection loss notification is async. We also catch connection loss
+	 * when reaping receives.
+	 */
+	INIT_DELAYED_WORK(&new_xprt->rdma_connect, xprt_rdma_connect_worker);
+	new_ep->rep_func = rpcrdma_conn_func;
+	new_ep->rep_xprt = xprt;
+
+	xprt_rdma_format_addresses(xprt);
+	xprt->max_payload = rpcrdma_max_payload(new_xprt);
+	dprintk("RPC:       %s: transport data payload maximum: %zu bytes\n",
+		__func__, xprt->max_payload);
+
+	if (!try_module_get(THIS_MODULE))
+		goto out4;
+
+	return xprt;
+
+out4:
+	xprt_rdma_free_addresses(xprt);
+	rc = -EINVAL;
+out3:
+	rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
+out2:
+	rpcrdma_ia_close(&new_xprt->rx_ia);
+out1:
+	xprt_free(xprt);
+	return ERR_PTR(rc);
+}
+
+/*
+ * Close a connection, during shutdown or timeout/reconnect
+ */
+static void
+xprt_rdma_close(struct rpc_xprt *xprt)
+{
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+
+	dprintk("RPC:       %s: closing\n", __func__);
+	if (r_xprt->rx_ep.rep_connected > 0)
+		xprt->reestablish_timeout = 0;
+	xprt_disconnect_done(xprt);
+	rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
+}
+
+static void
+xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
+{
+	struct sockaddr_in *sap;
+
+	sap = (struct sockaddr_in *)&xprt->addr;
+	sap->sin_port = htons(port);
+	sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr;
+	sap->sin_port = htons(port);
+	dprintk("RPC:       %s: %u\n", __func__, port);
+}
+
+static void
+xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+
+	if (r_xprt->rx_ep.rep_connected != 0) {
+		/* Reconnect */
+		schedule_delayed_work(&r_xprt->rdma_connect,
+			xprt->reestablish_timeout);
+		xprt->reestablish_timeout <<= 1;
+		if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO)
+			xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO;
+		else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO)
+			xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
+	} else {
+		schedule_delayed_work(&r_xprt->rdma_connect, 0);
+		if (!RPC_IS_ASYNC(task))
+			flush_delayed_work(&r_xprt->rdma_connect);
+	}
+}
+
+/*
+ * The RDMA allocate/free functions need the task structure as a place
+ * to hide the struct rpcrdma_req, which is necessary for the actual send/recv
+ * sequence. For this reason, the recv buffers are attached to send
+ * buffers for portions of the RPC. Note that the RPC layer allocates
+ * both send and receive buffers in the same call. We may register
+ * the receive buffer portion when using reply chunks.
+ */
+static void *
+xprt_rdma_allocate(struct rpc_task *task, size_t size)
+{
+	struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
+	struct rpcrdma_req *req, *nreq;
+
+	req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf);
+	if (req == NULL)
+		return NULL;
+
+	if (size > req->rl_size) {
+		dprintk("RPC:       %s: size %zd too large for buffer[%zd]: "
+			"prog %d vers %d proc %d\n",
+			__func__, size, req->rl_size,
+			task->tk_client->cl_prog, task->tk_client->cl_vers,
+			task->tk_msg.rpc_proc->p_proc);
+		/*
+		 * Outgoing length shortage. Our inline write max must have
+		 * been configured to perform direct i/o.
+		 *
+		 * This is therefore a large metadata operation, and the
+		 * allocate call was made on the maximum possible message,
+		 * e.g. containing long filename(s) or symlink data. In
+		 * fact, while these metadata operations *might* carry
+		 * large outgoing payloads, they rarely *do*. However, we
+		 * have to commit to the request here, so reallocate and
+		 * register it now. The data path will never require this
+		 * reallocation.
+		 *
+		 * If the allocation or registration fails, the RPC framework
+		 * will (doggedly) retry.
+		 */
+		if (task->tk_flags & RPC_TASK_SWAPPER)
+			nreq = kmalloc(sizeof *req + size, GFP_ATOMIC);
+		else
+			nreq = kmalloc(sizeof *req + size, GFP_NOFS);
+		if (nreq == NULL)
+			goto outfail;
+
+		if (rpcrdma_register_internal(&rpcx_to_rdmax(xprt)->rx_ia,
+				nreq->rl_base, size + sizeof(struct rpcrdma_req)
+				- offsetof(struct rpcrdma_req, rl_base),
+				&nreq->rl_handle, &nreq->rl_iov)) {
+			kfree(nreq);
+			goto outfail;
+		}
+		rpcx_to_rdmax(xprt)->rx_stats.hardway_register_count += size;
+		nreq->rl_size = size;
+		nreq->rl_niovs = 0;
+		nreq->rl_nchunks = 0;
+		nreq->rl_buffer = (struct rpcrdma_buffer *)req;
+		nreq->rl_reply = req->rl_reply;
+		memcpy(nreq->rl_segments,
+			req->rl_segments, sizeof nreq->rl_segments);
+		/* flag the swap with an unused field */
+		nreq->rl_iov.length = 0;
+		req->rl_reply = NULL;
+		req = nreq;
+	}
+	dprintk("RPC:       %s: size %zd, request 0x%p\n", __func__, size, req);
+	req->rl_connect_cookie = 0;	/* our reserved value */
+	return req->rl_xdr_buf;
+
+outfail:
+	rpcrdma_buffer_put(req);
+	rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++;
+	return NULL;
+}
+
+/*
+ * This function returns all RDMA resources to the pool.
+ */
+static void
+xprt_rdma_free(void *buffer)
+{
+	struct rpcrdma_req *req;
+	struct rpcrdma_xprt *r_xprt;
+	struct rpcrdma_rep *rep;
+	int i;
+
+	if (buffer == NULL)
+		return;
+
+	req = container_of(buffer, struct rpcrdma_req, rl_xdr_buf[0]);
+	if (req->rl_iov.length == 0) {	/* see allocate above */
+		r_xprt = container_of(((struct rpcrdma_req *) req->rl_buffer)->rl_buffer,
+				      struct rpcrdma_xprt, rx_buf);
+	} else
+		r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf);
+	rep = req->rl_reply;
+
+	dprintk("RPC:       %s: called on 0x%p%s\n",
+		__func__, rep, (rep && rep->rr_func) ? " (with waiter)" : "");
+
+	/*
+	 * Finish the deregistration.  The process is considered
+	 * complete when the rr_func vector becomes NULL - this
+	 * was put in place during rpcrdma_reply_handler() - the wait
+	 * call below will not block if the dereg is "done". If
+	 * interrupted, our framework will clean up.
+	 */
+	for (i = 0; req->rl_nchunks;) {
+		--req->rl_nchunks;
+		i += rpcrdma_deregister_external(
+			&req->rl_segments[i], r_xprt);
+	}
+
+	if (req->rl_iov.length == 0) {	/* see allocate above */
+		struct rpcrdma_req *oreq = (struct rpcrdma_req *)req->rl_buffer;
+		oreq->rl_reply = req->rl_reply;
+		(void) rpcrdma_deregister_internal(&r_xprt->rx_ia,
+						   req->rl_handle,
+						   &req->rl_iov);
+		kfree(req);
+		req = oreq;
+	}
+
+	/* Put back request+reply buffers */
+	rpcrdma_buffer_put(req);
+}
+
+/*
+ * send_request invokes the meat of RPC RDMA. It must do the following:
+ *  1.  Marshal the RPC request into an RPC RDMA request, which means
+ *	putting a header in front of data, and creating IOVs for RDMA
+ *	from those in the request.
+ *  2.  In marshaling, detect opportunities for RDMA, and use them.
+ *  3.  Post a recv message to set up asynch completion, then send
+ *	the request (rpcrdma_ep_post).
+ *  4.  No partial sends are possible in the RPC-RDMA protocol (as in UDP).
+ */
+
+static int
+xprt_rdma_send_request(struct rpc_task *task)
+{
+	struct rpc_rqst *rqst = task->tk_rqstp;
+	struct rpc_xprt *xprt = rqst->rq_xprt;
+	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+	int rc = 0;
+
+	if (req->rl_niovs == 0)
+		rc = rpcrdma_marshal_req(rqst);
+	else if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
+		rc = rpcrdma_marshal_chunks(rqst, 0);
+	if (rc < 0)
+		goto failed_marshal;
+
+	if (req->rl_reply == NULL) 		/* e.g. reconnection */
+		rpcrdma_recv_buffer_get(req);
+
+	if (req->rl_reply) {
+		req->rl_reply->rr_func = rpcrdma_reply_handler;
+		/* this need only be done once, but... */
+		req->rl_reply->rr_xprt = xprt;
+	}
+
+	/* Must suppress retransmit to maintain credits */
+	if (req->rl_connect_cookie == xprt->connect_cookie)
+		goto drop_connection;
+	req->rl_connect_cookie = xprt->connect_cookie;
+
+	if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
+		goto drop_connection;
+
+	rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
+	rqst->rq_bytes_sent = 0;
+	return 0;
+
+failed_marshal:
+	r_xprt->rx_stats.failed_marshal_count++;
+	dprintk("RPC:       %s: rpcrdma_marshal_req failed, status %i\n",
+		__func__, rc);
+	if (rc == -EIO)
+		return -EIO;
+drop_connection:
+	xprt_disconnect_done(xprt);
+	return -ENOTCONN;	/* implies disconnect */
+}
+
+static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
+{
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+	long idle_time = 0;
+
+	if (xprt_connected(xprt))
+		idle_time = (long)(jiffies - xprt->last_used) / HZ;
+
+	seq_printf(seq,
+	  "\txprt:\trdma %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu "
+	  "%lu %lu %lu %Lu %Lu %Lu %Lu %lu %lu %lu\n",
+
+	   0,	/* need a local port? */
+	   xprt->stat.bind_count,
+	   xprt->stat.connect_count,
+	   xprt->stat.connect_time,
+	   idle_time,
+	   xprt->stat.sends,
+	   xprt->stat.recvs,
+	   xprt->stat.bad_xids,
+	   xprt->stat.req_u,
+	   xprt->stat.bklog_u,
+
+	   r_xprt->rx_stats.read_chunk_count,
+	   r_xprt->rx_stats.write_chunk_count,
+	   r_xprt->rx_stats.reply_chunk_count,
+	   r_xprt->rx_stats.total_rdma_request,
+	   r_xprt->rx_stats.total_rdma_reply,
+	   r_xprt->rx_stats.pullup_copy_count,
+	   r_xprt->rx_stats.fixup_copy_count,
+	   r_xprt->rx_stats.hardway_register_count,
+	   r_xprt->rx_stats.failed_marshal_count,
+	   r_xprt->rx_stats.bad_reply_count);
+}
+
+/*
+ * Plumbing for rpc transport switch and kernel module
+ */
+
+static struct rpc_xprt_ops xprt_rdma_procs = {
+	.reserve_xprt		= xprt_reserve_xprt_cong,
+	.release_xprt		= xprt_release_xprt_cong, /* sunrpc/xprt.c */
+	.alloc_slot		= xprt_alloc_slot,
+	.release_request	= xprt_release_rqst_cong,       /* ditto */
+	.set_retrans_timeout	= xprt_set_retrans_timeout_def, /* ditto */
+	.rpcbind		= rpcb_getport_async,	/* sunrpc/rpcb_clnt.c */
+	.set_port		= xprt_rdma_set_port,
+	.connect		= xprt_rdma_connect,
+	.buf_alloc		= xprt_rdma_allocate,
+	.buf_free		= xprt_rdma_free,
+	.send_request		= xprt_rdma_send_request,
+	.close			= xprt_rdma_close,
+	.destroy		= xprt_rdma_destroy,
+	.print_stats		= xprt_rdma_print_stats
+};
+
+static struct xprt_class xprt_rdma = {
+	.list			= LIST_HEAD_INIT(xprt_rdma.list),
+	.name			= "rdma",
+	.owner			= THIS_MODULE,
+	.ident			= XPRT_TRANSPORT_RDMA,
+	.setup			= xprt_setup_rdma,
+};
+
+static void __exit xprt_rdma_cleanup(void)
+{
+	int rc;
+
+	dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n");
+#ifdef RPC_DEBUG
+	if (sunrpc_table_header) {
+		unregister_sysctl_table(sunrpc_table_header);
+		sunrpc_table_header = NULL;
+	}
+#endif
+	rc = xprt_unregister_transport(&xprt_rdma);
+	if (rc)
+		dprintk("RPC:       %s: xprt_unregister returned %i\n",
+			__func__, rc);
+}
+
+static int __init xprt_rdma_init(void)
+{
+	int rc;
+
+	rc = xprt_register_transport(&xprt_rdma);
+
+	if (rc)
+		return rc;
+
+	dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
+
+	dprintk("Defaults:\n");
+	dprintk("\tSlots %d\n"
+		"\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
+		xprt_rdma_slot_table_entries,
+		xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
+	dprintk("\tPadding %d\n\tMemreg %d\n",
+		xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy);
+
+#ifdef RPC_DEBUG
+	if (!sunrpc_table_header)
+		sunrpc_table_header = register_sysctl_table(sunrpc_table);
+#endif
+	return 0;
+}
+
+module_init(xprt_rdma_init);
+module_exit(xprt_rdma_cleanup);
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@ -0,0 +1,402 @@
+/*
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *      Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ *      Neither the name of the Network Appliance, Inc. nor the names of
+ *      its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written
+ *      permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _LINUX_SUNRPC_XPRT_RDMA_H
+#define _LINUX_SUNRPC_XPRT_RDMA_H
+
+#include <linux/wait.h> 		/* wait_queue_head_t, etc */
+#include <linux/spinlock.h> 		/* spinlock_t, etc */
+#include <linux/atomic.h>			/* atomic_t, etc */
+#include <linux/workqueue.h>		/* struct work_struct */
+
+#include <rdma/rdma_cm.h>		/* RDMA connection api */
+#include <rdma/ib_verbs.h>		/* RDMA verbs api */
+
+#include <linux/sunrpc/clnt.h> 		/* rpc_xprt */
+#include <linux/sunrpc/rpc_rdma.h> 	/* RPC/RDMA protocol */
+#include <linux/sunrpc/xprtrdma.h> 	/* xprt parameters */
+#include <linux/sunrpc/svc.h>		/* RPCSVC_MAXPAYLOAD */
+
+#define RDMA_RESOLVE_TIMEOUT	(5000)	/* 5 seconds */
+#define RDMA_CONNECT_RETRY_MAX	(2)	/* retries if no listener backlog */
+
+/*
+ * Interface Adapter -- one per transport instance
+ */
+struct rpcrdma_ia {
+	rwlock_t		ri_qplock;
+	struct rdma_cm_id 	*ri_id;
+	struct ib_pd		*ri_pd;
+	struct ib_mr		*ri_bind_mem;
+	u32			ri_dma_lkey;
+	int			ri_have_dma_lkey;
+	struct completion	ri_done;
+	int			ri_async_rc;
+	enum rpcrdma_memreg	ri_memreg_strategy;
+	unsigned int		ri_max_frmr_depth;
+};
+
+/*
+ * RDMA Endpoint -- one per transport instance
+ */
+
+#define RPCRDMA_WC_BUDGET	(128)
+#define RPCRDMA_POLLSIZE	(16)
+
+struct rpcrdma_ep {
+	atomic_t		rep_cqcount;
+	int			rep_cqinit;
+	int			rep_connected;
+	struct rpcrdma_ia	*rep_ia;
+	struct ib_qp_init_attr	rep_attr;
+	wait_queue_head_t 	rep_connect_wait;
+	struct ib_sge		rep_pad;	/* holds zeroed pad */
+	struct ib_mr		*rep_pad_mr;	/* holds zeroed pad */
+	void			(*rep_func)(struct rpcrdma_ep *);
+	struct rpc_xprt		*rep_xprt;	/* for rep_func */
+	struct rdma_conn_param	rep_remote_cma;
+	struct sockaddr_storage	rep_remote_addr;
+	struct delayed_work	rep_connect_worker;
+	struct ib_wc		rep_send_wcs[RPCRDMA_POLLSIZE];
+	struct ib_wc		rep_recv_wcs[RPCRDMA_POLLSIZE];
+};
+
+#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
+#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
+
+enum rpcrdma_chunktype {
+	rpcrdma_noch = 0,
+	rpcrdma_readch,
+	rpcrdma_areadch,
+	rpcrdma_writech,
+	rpcrdma_replych
+};
+
+/*
+ * struct rpcrdma_rep -- this structure encapsulates state required to recv
+ * and complete a reply, asychronously. It needs several pieces of
+ * state:
+ *   o recv buffer (posted to provider)
+ *   o ib_sge (also donated to provider)
+ *   o status of reply (length, success or not)
+ *   o bookkeeping state to get run by tasklet (list, etc)
+ *
+ * These are allocated during initialization, per-transport instance;
+ * however, the tasklet execution list itself is global, as it should
+ * always be pretty short.
+ *
+ * N of these are associated with a transport instance, and stored in
+ * struct rpcrdma_buffer. N is the max number of outstanding requests.
+ */
+
+/* temporary static scatter/gather max */
+#define RPCRDMA_MAX_DATA_SEGS	(64)	/* max scatter/gather */
+#define RPCRDMA_MAX_SEGS 	(RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */
+#define MAX_RPCRDMAHDR	(\
+	/* max supported RPC/RDMA header */ \
+	sizeof(struct rpcrdma_msg) + (2 * sizeof(u32)) + \
+	(sizeof(struct rpcrdma_read_chunk) * RPCRDMA_MAX_SEGS) + sizeof(u32))
+
+struct rpcrdma_buffer;
+
+struct rpcrdma_rep {
+	unsigned int	rr_len;		/* actual received reply length */
+	struct rpcrdma_buffer *rr_buffer; /* home base for this structure */
+	struct rpc_xprt	*rr_xprt;	/* needed for request/reply matching */
+	void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */
+	struct list_head rr_list;	/* tasklet list */
+	struct ib_sge	rr_iov;		/* for posting */
+	struct ib_mr	*rr_handle;	/* handle for mem in rr_iov */
+	char	rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */
+};
+
+/*
+ * struct rpcrdma_mw - external memory region metadata
+ *
+ * An external memory region is any buffer or page that is registered
+ * on the fly (ie, not pre-registered).
+ *
+ * Each rpcrdma_buffer has a list of free MWs anchored in rb_mws. During
+ * call_allocate, rpcrdma_buffer_get() assigns one to each segment in
+ * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep
+ * track of registration metadata while each RPC is pending.
+ * rpcrdma_deregister_external() uses this metadata to unmap and
+ * release these resources when an RPC is complete.
+ */
+enum rpcrdma_frmr_state {
+	FRMR_IS_INVALID,	/* ready to be used */
+	FRMR_IS_VALID,		/* in use */
+	FRMR_IS_STALE,		/* failed completion */
+};
+
+struct rpcrdma_frmr {
+	struct ib_fast_reg_page_list	*fr_pgl;
+	struct ib_mr			*fr_mr;
+	enum rpcrdma_frmr_state		fr_state;
+};
+
+struct rpcrdma_mw {
+	union {
+		struct ib_fmr		*fmr;
+		struct rpcrdma_frmr	frmr;
+	} r;
+	struct list_head	mw_list;
+	struct list_head	mw_all;
+};
+
+/*
+ * struct rpcrdma_req -- structure central to the request/reply sequence.
+ *
+ * N of these are associated with a transport instance, and stored in
+ * struct rpcrdma_buffer. N is the max number of outstanding requests.
+ *
+ * It includes pre-registered buffer memory for send AND recv.
+ * The recv buffer, however, is not owned by this structure, and
+ * is "donated" to the hardware when a recv is posted. When a
+ * reply is handled, the recv buffer used is given back to the
+ * struct rpcrdma_req associated with the request.
+ *
+ * In addition to the basic memory, this structure includes an array
+ * of iovs for send operations. The reason is that the iovs passed to
+ * ib_post_{send,recv} must not be modified until the work request
+ * completes.
+ *
+ * NOTES:
+ *   o RPCRDMA_MAX_SEGS is the max number of addressible chunk elements we
+ *     marshal. The number needed varies depending on the iov lists that
+ *     are passed to us, the memory registration mode we are in, and if
+ *     physical addressing is used, the layout.
+ */
+
+struct rpcrdma_mr_seg {		/* chunk descriptors */
+	union {				/* chunk memory handles */
+		struct ib_mr	*rl_mr;		/* if registered directly */
+		struct rpcrdma_mw *rl_mw;	/* if registered from region */
+	} mr_chunk;
+	u64		mr_base;	/* registration result */
+	u32		mr_rkey;	/* registration result */
+	u32		mr_len;		/* length of chunk or segment */
+	int		mr_nsegs;	/* number of segments in chunk or 0 */
+	enum dma_data_direction	mr_dir;	/* segment mapping direction */
+	dma_addr_t	mr_dma;		/* segment mapping address */
+	size_t		mr_dmalen;	/* segment mapping length */
+	struct page	*mr_page;	/* owning page, if any */
+	char		*mr_offset;	/* kva if no page, else offset */
+};
+
+struct rpcrdma_req {
+	size_t 		rl_size;	/* actual length of buffer */
+	unsigned int	rl_niovs;	/* 0, 2 or 4 */
+	unsigned int	rl_nchunks;	/* non-zero if chunks */
+	unsigned int	rl_connect_cookie;	/* retry detection */
+	enum rpcrdma_chunktype	rl_rtype, rl_wtype;
+	struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
+	struct rpcrdma_rep	*rl_reply;/* holder for reply buffer */
+	struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */
+	struct ib_sge	rl_send_iov[4];	/* for active requests */
+	struct ib_sge	rl_iov;		/* for posting */
+	struct ib_mr	*rl_handle;	/* handle for mem in rl_iov */
+	char		rl_base[MAX_RPCRDMAHDR]; /* start of actual buffer */
+	__u32 		rl_xdr_buf[0];	/* start of returned rpc rq_buffer */
+};
+#define rpcr_to_rdmar(r) \
+	container_of((r)->rq_buffer, struct rpcrdma_req, rl_xdr_buf[0])
+
+/*
+ * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for
+ * inline requests/replies, and client/server credits.
+ *
+ * One of these is associated with a transport instance
+ */
+struct rpcrdma_buffer {
+	spinlock_t	rb_lock;	/* protects indexes */
+	atomic_t	rb_credits;	/* most recent server credits */
+	int		rb_max_requests;/* client max requests */
+	struct list_head rb_mws;	/* optional memory windows/fmrs/frmrs */
+	struct list_head rb_all;
+	int		rb_send_index;
+	struct rpcrdma_req	**rb_send_bufs;
+	int		rb_recv_index;
+	struct rpcrdma_rep	**rb_recv_bufs;
+	char		*rb_pool;
+};
+#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
+
+/*
+ * Internal structure for transport instance creation. This
+ * exists primarily for modularity.
+ *
+ * This data should be set with mount options
+ */
+struct rpcrdma_create_data_internal {
+	struct sockaddr_storage	addr;	/* RDMA server address */
+	unsigned int	max_requests;	/* max requests (slots) in flight */
+	unsigned int	rsize;		/* mount rsize - max read hdr+data */
+	unsigned int	wsize;		/* mount wsize - max write hdr+data */
+	unsigned int	inline_rsize;	/* max non-rdma read data payload */
+	unsigned int	inline_wsize;	/* max non-rdma write data payload */
+	unsigned int	padding;	/* non-rdma write header padding */
+};
+
+#define RPCRDMA_INLINE_READ_THRESHOLD(rq) \
+	(rpcx_to_rdmad(rq->rq_xprt).inline_rsize)
+
+#define RPCRDMA_INLINE_WRITE_THRESHOLD(rq)\
+	(rpcx_to_rdmad(rq->rq_xprt).inline_wsize)
+
+#define RPCRDMA_INLINE_PAD_VALUE(rq)\
+	rpcx_to_rdmad(rq->rq_xprt).padding
+
+/*
+ * Statistics for RPCRDMA
+ */
+struct rpcrdma_stats {
+	unsigned long		read_chunk_count;
+	unsigned long		write_chunk_count;
+	unsigned long		reply_chunk_count;
+
+	unsigned long long	total_rdma_request;
+	unsigned long long	total_rdma_reply;
+
+	unsigned long long	pullup_copy_count;
+	unsigned long long	fixup_copy_count;
+	unsigned long		hardway_register_count;
+	unsigned long		failed_marshal_count;
+	unsigned long		bad_reply_count;
+};
+
+/*
+ * RPCRDMA transport -- encapsulates the structures above for
+ * integration with RPC.
+ *
+ * The contained structures are embedded, not pointers,
+ * for convenience. This structure need not be visible externally.
+ *
+ * It is allocated and initialized during mount, and released
+ * during unmount.
+ */
+struct rpcrdma_xprt {
+	struct rpc_xprt		xprt;
+	struct rpcrdma_ia	rx_ia;
+	struct rpcrdma_ep	rx_ep;
+	struct rpcrdma_buffer	rx_buf;
+	struct rpcrdma_create_data_internal rx_data;
+	struct delayed_work	rdma_connect;
+	struct rpcrdma_stats	rx_stats;
+};
+
+#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt)
+#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
+
+/* Setting this to 0 ensures interoperability with early servers.
+ * Setting this to 1 enhances certain unaligned read/write performance.
+ * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
+extern int xprt_rdma_pad_optimize;
+
+/*
+ * Interface Adapter calls - xprtrdma/verbs.c
+ */
+int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int);
+void rpcrdma_ia_close(struct rpcrdma_ia *);
+
+/*
+ * Endpoint calls - xprtrdma/verbs.c
+ */
+int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
+				struct rpcrdma_create_data_internal *);
+void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
+int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
+void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
+
+int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
+				struct rpcrdma_req *);
+int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *,
+				struct rpcrdma_rep *);
+
+/*
+ * Buffer calls - xprtrdma/verbs.c
+ */
+int rpcrdma_buffer_create(struct rpcrdma_buffer *, struct rpcrdma_ep *,
+				struct rpcrdma_ia *,
+				struct rpcrdma_create_data_internal *);
+void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
+
+struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
+void rpcrdma_buffer_put(struct rpcrdma_req *);
+void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
+void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
+
+int rpcrdma_register_internal(struct rpcrdma_ia *, void *, int,
+				struct ib_mr **, struct ib_sge *);
+int rpcrdma_deregister_internal(struct rpcrdma_ia *,
+				struct ib_mr *, struct ib_sge *);
+
+int rpcrdma_register_external(struct rpcrdma_mr_seg *,
+				int, int, struct rpcrdma_xprt *);
+int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
+				struct rpcrdma_xprt *);
+
+/*
+ * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
+ */
+void rpcrdma_connect_worker(struct work_struct *);
+void rpcrdma_conn_func(struct rpcrdma_ep *);
+void rpcrdma_reply_handler(struct rpcrdma_rep *);
+
+/*
+ * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
+ */
+ssize_t rpcrdma_marshal_chunks(struct rpc_rqst *, ssize_t);
+int rpcrdma_marshal_req(struct rpc_rqst *);
+size_t rpcrdma_max_payload(struct rpcrdma_xprt *);
+
+/* Temporary NFS request map cache. Created in svc_rdma.c  */
+extern struct kmem_cache *svc_rdma_map_cachep;
+/* WR context cache. Created in svc_rdma.c  */
+extern struct kmem_cache *svc_rdma_ctxt_cachep;
+/* Workqueue created in svc_rdma.c */
+extern struct workqueue_struct *svc_rdma_wq;
+
+#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
+#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
+#else
+#define RPCSVC_MAXPAYLOAD_RDMA (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
+#endif
+
+#endif				/* _LINUX_SUNRPC_XPRT_RDMA_H */
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c