Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

72
net/sunrpc/Kconfig Normal file
View file

@ -0,0 +1,72 @@
config SUNRPC
tristate
config SUNRPC_GSS
tristate
select OID_REGISTRY
config SUNRPC_BACKCHANNEL
bool
depends on SUNRPC
config SUNRPC_SWAP
bool
depends on SUNRPC
config RPCSEC_GSS_KRB5
tristate "Secure RPC: Kerberos V mechanism"
depends on SUNRPC && CRYPTO
depends on CRYPTO_MD5 && CRYPTO_DES && CRYPTO_CBC && CRYPTO_CTS
depends on CRYPTO_ECB && CRYPTO_HMAC && CRYPTO_SHA1 && CRYPTO_AES
depends on CRYPTO_ARC4
default y
select SUNRPC_GSS
help
Choose Y here to enable Secure RPC using the Kerberos version 5
GSS-API mechanism (RFC 1964).
Secure RPC calls with Kerberos require an auxiliary user-space
daemon which may be found in the Linux nfs-utils package
available from http://linux-nfs.org/. In addition, user-space
Kerberos support should be installed.
If unsure, say Y.
config SUNRPC_DEBUG
bool "RPC: Enable dprintk debugging"
depends on SUNRPC && SYSCTL
help
This option enables a sysctl-based debugging interface
that is be used by the 'rpcdebug' utility to turn on or off
logging of different aspects of the kernel RPC activity.
Disabling this option will make your kernel slightly smaller,
but makes troubleshooting NFS issues significantly harder.
If unsure, say Y.
config SUNRPC_XPRT_RDMA_CLIENT
tristate "RPC over RDMA Client Support"
depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
default SUNRPC && INFINIBAND
help
This option allows the NFS client to support an RDMA-enabled
transport.
To compile RPC client RDMA transport support as a module,
choose M here: the module will be called xprtrdma.
If unsure, say N.
config SUNRPC_XPRT_RDMA_SERVER
tristate "RPC over RDMA Server Support"
depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
default SUNRPC && INFINIBAND
help
This option allows the NFS server to support an RDMA-enabled
transport.
To compile RPC server RDMA transport support as a module,
choose M here: the module will be called svcrdma.
If unsure, say N.

19
net/sunrpc/Makefile Normal file
View file

@ -0,0 +1,19 @@
#
# Makefile for Linux kernel SUN RPC
#
obj-$(CONFIG_SUNRPC) += sunrpc.o
obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
obj-y += xprtrdma/
sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
auth.o auth_null.o auth_unix.o auth_generic.o \
svc.o svcsock.o svcauth.o svcauth_unix.o \
addr.o rpcb_clnt.o timer.o xdr.o \
sunrpc_syms.o cache.o rpc_pipe.o \
svc_xprt.o
sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o
sunrpc-$(CONFIG_PROC_FS) += stats.o
sunrpc-$(CONFIG_SYSCTL) += sysctl.o

357
net/sunrpc/addr.c Normal file
View file

@ -0,0 +1,357 @@
/*
* Copyright 2009, Oracle. All rights reserved.
*
* Convert socket addresses to presentation addresses and universal
* addresses, and vice versa.
*
* Universal addresses are introduced by RFC 1833 and further refined by
* recent RFCs describing NFSv4. The universal address format is part
* of the external (network) interface provided by rpcbind version 3
* and 4, and by NFSv4. Such an address is a string containing a
* presentation format IP address followed by a port number in
* "hibyte.lobyte" format.
*
* IPv6 addresses can also include a scope ID, typically denoted by
* a '%' followed by a device name or a non-negative integer. Refer to
* RFC 4291, Section 2.2 for details on IPv6 presentation formats.
*/
#include <net/ipv6.h>
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/msg_prot.h>
#include <linux/slab.h>
#include <linux/export.h>
#if IS_ENABLED(CONFIG_IPV6)
static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap,
char *buf, const int buflen)
{
const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
const struct in6_addr *addr = &sin6->sin6_addr;
/*
* RFC 4291, Section 2.2.2
*
* Shorthanded ANY address
*/
if (ipv6_addr_any(addr))
return snprintf(buf, buflen, "::");
/*
* RFC 4291, Section 2.2.2
*
* Shorthanded loopback address
*/
if (ipv6_addr_loopback(addr))
return snprintf(buf, buflen, "::1");
/*
* RFC 4291, Section 2.2.3
*
* Special presentation address format for mapped v4
* addresses.
*/
if (ipv6_addr_v4mapped(addr))
return snprintf(buf, buflen, "::ffff:%pI4",
&addr->s6_addr32[3]);
/*
* RFC 4291, Section 2.2.1
*/
return snprintf(buf, buflen, "%pI6c", addr);
}
static size_t rpc_ntop6(const struct sockaddr *sap,
char *buf, const size_t buflen)
{
const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
char scopebuf[IPV6_SCOPE_ID_LEN];
size_t len;
int rc;
len = rpc_ntop6_noscopeid(sap, buf, buflen);
if (unlikely(len == 0))
return len;
if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
return len;
if (sin6->sin6_scope_id == 0)
return len;
rc = snprintf(scopebuf, sizeof(scopebuf), "%c%u",
IPV6_SCOPE_DELIMITER, sin6->sin6_scope_id);
if (unlikely((size_t)rc > sizeof(scopebuf)))
return 0;
len += rc;
if (unlikely(len > buflen))
return 0;
strcat(buf, scopebuf);
return len;
}
#else /* !IS_ENABLED(CONFIG_IPV6) */
static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap,
char *buf, const int buflen)
{
return 0;
}
static size_t rpc_ntop6(const struct sockaddr *sap,
char *buf, const size_t buflen)
{
return 0;
}
#endif /* !IS_ENABLED(CONFIG_IPV6) */
static int rpc_ntop4(const struct sockaddr *sap,
char *buf, const size_t buflen)
{
const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
return snprintf(buf, buflen, "%pI4", &sin->sin_addr);
}
/**
* rpc_ntop - construct a presentation address in @buf
* @sap: socket address
* @buf: construction area
* @buflen: size of @buf, in bytes
*
* Plants a %NUL-terminated string in @buf and returns the length
* of the string, excluding the %NUL. Otherwise zero is returned.
*/
size_t rpc_ntop(const struct sockaddr *sap, char *buf, const size_t buflen)
{
switch (sap->sa_family) {
case AF_INET:
return rpc_ntop4(sap, buf, buflen);
case AF_INET6:
return rpc_ntop6(sap, buf, buflen);
}
return 0;
}
EXPORT_SYMBOL_GPL(rpc_ntop);
static size_t rpc_pton4(const char *buf, const size_t buflen,
struct sockaddr *sap, const size_t salen)
{
struct sockaddr_in *sin = (struct sockaddr_in *)sap;
u8 *addr = (u8 *)&sin->sin_addr.s_addr;
if (buflen > INET_ADDRSTRLEN || salen < sizeof(struct sockaddr_in))
return 0;
memset(sap, 0, sizeof(struct sockaddr_in));
if (in4_pton(buf, buflen, addr, '\0', NULL) == 0)
return 0;
sin->sin_family = AF_INET;
return sizeof(struct sockaddr_in);
}
#if IS_ENABLED(CONFIG_IPV6)
static int rpc_parse_scope_id(struct net *net, const char *buf,
const size_t buflen, const char *delim,
struct sockaddr_in6 *sin6)
{
char *p;
size_t len;
if ((buf + buflen) == delim)
return 1;
if (*delim != IPV6_SCOPE_DELIMITER)
return 0;
if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
return 0;
len = (buf + buflen) - delim - 1;
p = kstrndup(delim + 1, len, GFP_KERNEL);
if (p) {
u32 scope_id = 0;
struct net_device *dev;
dev = dev_get_by_name(net, p);
if (dev != NULL) {
scope_id = dev->ifindex;
dev_put(dev);
} else {
if (kstrtou32(p, 10, &scope_id) == 0) {
kfree(p);
return 0;
}
}
kfree(p);
sin6->sin6_scope_id = scope_id;
return 1;
}
return 0;
}
static size_t rpc_pton6(struct net *net, const char *buf, const size_t buflen,
struct sockaddr *sap, const size_t salen)
{
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
u8 *addr = (u8 *)&sin6->sin6_addr.in6_u;
const char *delim;
if (buflen > (INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN) ||
salen < sizeof(struct sockaddr_in6))
return 0;
memset(sap, 0, sizeof(struct sockaddr_in6));
if (in6_pton(buf, buflen, addr, IPV6_SCOPE_DELIMITER, &delim) == 0)
return 0;
if (!rpc_parse_scope_id(net, buf, buflen, delim, sin6))
return 0;
sin6->sin6_family = AF_INET6;
return sizeof(struct sockaddr_in6);
}
#else
static size_t rpc_pton6(struct net *net, const char *buf, const size_t buflen,
struct sockaddr *sap, const size_t salen)
{
return 0;
}
#endif
/**
* rpc_pton - Construct a sockaddr in @sap
* @net: applicable network namespace
* @buf: C string containing presentation format IP address
* @buflen: length of presentation address in bytes
* @sap: buffer into which to plant socket address
* @salen: size of buffer in bytes
*
* Returns the size of the socket address if successful; otherwise
* zero is returned.
*
* Plants a socket address in @sap and returns the size of the
* socket address, if successful. Returns zero if an error
* occurred.
*/
size_t rpc_pton(struct net *net, const char *buf, const size_t buflen,
struct sockaddr *sap, const size_t salen)
{
unsigned int i;
for (i = 0; i < buflen; i++)
if (buf[i] == ':')
return rpc_pton6(net, buf, buflen, sap, salen);
return rpc_pton4(buf, buflen, sap, salen);
}
EXPORT_SYMBOL_GPL(rpc_pton);
/**
* rpc_sockaddr2uaddr - Construct a universal address string from @sap.
* @sap: socket address
* @gfp_flags: allocation mode
*
* Returns a %NUL-terminated string in dynamically allocated memory;
* otherwise NULL is returned if an error occurred. Caller must
* free the returned string.
*/
char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags)
{
char portbuf[RPCBIND_MAXUADDRPLEN];
char addrbuf[RPCBIND_MAXUADDRLEN];
unsigned short port;
switch (sap->sa_family) {
case AF_INET:
if (rpc_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0)
return NULL;
port = ntohs(((struct sockaddr_in *)sap)->sin_port);
break;
case AF_INET6:
if (rpc_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0)
return NULL;
port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port);
break;
default:
return NULL;
}
if (snprintf(portbuf, sizeof(portbuf),
".%u.%u", port >> 8, port & 0xff) > (int)sizeof(portbuf))
return NULL;
if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) > sizeof(addrbuf))
return NULL;
return kstrdup(addrbuf, gfp_flags);
}
/**
* rpc_uaddr2sockaddr - convert a universal address to a socket address.
* @net: applicable network namespace
* @uaddr: C string containing universal address to convert
* @uaddr_len: length of universal address string
* @sap: buffer into which to plant socket address
* @salen: size of buffer
*
* @uaddr does not have to be '\0'-terminated, but kstrtou8() and
* rpc_pton() require proper string termination to be successful.
*
* Returns the size of the socket address if successful; otherwise
* zero is returned.
*/
size_t rpc_uaddr2sockaddr(struct net *net, const char *uaddr,
const size_t uaddr_len, struct sockaddr *sap,
const size_t salen)
{
char *c, buf[RPCBIND_MAXUADDRLEN + sizeof('\0')];
u8 portlo, porthi;
unsigned short port;
if (uaddr_len > RPCBIND_MAXUADDRLEN)
return 0;
memcpy(buf, uaddr, uaddr_len);
buf[uaddr_len] = '\0';
c = strrchr(buf, '.');
if (unlikely(c == NULL))
return 0;
if (unlikely(kstrtou8(c + 1, 10, &portlo) != 0))
return 0;
*c = '\0';
c = strrchr(buf, '.');
if (unlikely(c == NULL))
return 0;
if (unlikely(kstrtou8(c + 1, 10, &porthi) != 0))
return 0;
port = (unsigned short)((porthi << 8) | portlo);
*c = '\0';
if (rpc_pton(net, buf, strlen(buf), sap, salen) == 0)
return 0;
switch (sap->sa_family) {
case AF_INET:
((struct sockaddr_in *)sap)->sin_port = htons(port);
return sizeof(struct sockaddr_in);
case AF_INET6:
((struct sockaddr_in6 *)sap)->sin6_port = htons(port);
return sizeof(struct sockaddr_in6);
}
return 0;
}
EXPORT_SYMBOL_GPL(rpc_uaddr2sockaddr);

891
net/sunrpc/auth.c Normal file
View file

@ -0,0 +1,891 @@
/*
* linux/net/sunrpc/auth.c
*
* Generic RPC client authentication API.
*
* Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
*/
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/hash.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/gss_api.h>
#include <linux/spinlock.h>
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
#define RPC_CREDCACHE_DEFAULT_HASHBITS (4)
struct rpc_cred_cache {
struct hlist_head *hashtable;
unsigned int hashbits;
spinlock_t lock;
};
static unsigned int auth_hashbits = RPC_CREDCACHE_DEFAULT_HASHBITS;
static DEFINE_SPINLOCK(rpc_authflavor_lock);
static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
&authnull_ops, /* AUTH_NULL */
&authunix_ops, /* AUTH_UNIX */
NULL, /* others can be loadable modules */
};
static LIST_HEAD(cred_unused);
static unsigned long number_cred_unused;
#define MAX_HASHTABLE_BITS (14)
static int param_set_hashtbl_sz(const char *val, const struct kernel_param *kp)
{
unsigned long num;
unsigned int nbits;
int ret;
if (!val)
goto out_inval;
ret = kstrtoul(val, 0, &num);
if (ret == -EINVAL)
goto out_inval;
nbits = fls(num);
if (num > (1U << nbits))
nbits++;
if (nbits > MAX_HASHTABLE_BITS || nbits < 2)
goto out_inval;
*(unsigned int *)kp->arg = nbits;
return 0;
out_inval:
return -EINVAL;
}
static int param_get_hashtbl_sz(char *buffer, const struct kernel_param *kp)
{
unsigned int nbits;
nbits = *(unsigned int *)kp->arg;
return sprintf(buffer, "%u", 1U << nbits);
}
#define param_check_hashtbl_sz(name, p) __param_check(name, p, unsigned int);
static struct kernel_param_ops param_ops_hashtbl_sz = {
.set = param_set_hashtbl_sz,
.get = param_get_hashtbl_sz,
};
module_param_named(auth_hashtable_size, auth_hashbits, hashtbl_sz, 0644);
MODULE_PARM_DESC(auth_hashtable_size, "RPC credential cache hashtable size");
static unsigned long auth_max_cred_cachesize = ULONG_MAX;
module_param(auth_max_cred_cachesize, ulong, 0644);
MODULE_PARM_DESC(auth_max_cred_cachesize, "RPC credential maximum total cache size");
static u32
pseudoflavor_to_flavor(u32 flavor) {
if (flavor > RPC_AUTH_MAXFLAVOR)
return RPC_AUTH_GSS;
return flavor;
}
int
rpcauth_register(const struct rpc_authops *ops)
{
rpc_authflavor_t flavor;
int ret = -EPERM;
if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
return -EINVAL;
spin_lock(&rpc_authflavor_lock);
if (auth_flavors[flavor] == NULL) {
auth_flavors[flavor] = ops;
ret = 0;
}
spin_unlock(&rpc_authflavor_lock);
return ret;
}
EXPORT_SYMBOL_GPL(rpcauth_register);
int
rpcauth_unregister(const struct rpc_authops *ops)
{
rpc_authflavor_t flavor;
int ret = -EPERM;
if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
return -EINVAL;
spin_lock(&rpc_authflavor_lock);
if (auth_flavors[flavor] == ops) {
auth_flavors[flavor] = NULL;
ret = 0;
}
spin_unlock(&rpc_authflavor_lock);
return ret;
}
EXPORT_SYMBOL_GPL(rpcauth_unregister);
/**
* rpcauth_get_pseudoflavor - check if security flavor is supported
* @flavor: a security flavor
* @info: a GSS mech OID, quality of protection, and service value
*
* Verifies that an appropriate kernel module is available or already loaded.
* Returns an equivalent pseudoflavor, or RPC_AUTH_MAXFLAVOR if "flavor" is
* not supported locally.
*/
rpc_authflavor_t
rpcauth_get_pseudoflavor(rpc_authflavor_t flavor, struct rpcsec_gss_info *info)
{
const struct rpc_authops *ops;
rpc_authflavor_t pseudoflavor;
ops = auth_flavors[flavor];
if (ops == NULL)
request_module("rpc-auth-%u", flavor);
spin_lock(&rpc_authflavor_lock);
ops = auth_flavors[flavor];
if (ops == NULL || !try_module_get(ops->owner)) {
spin_unlock(&rpc_authflavor_lock);
return RPC_AUTH_MAXFLAVOR;
}
spin_unlock(&rpc_authflavor_lock);
pseudoflavor = flavor;
if (ops->info2flavor != NULL)
pseudoflavor = ops->info2flavor(info);
module_put(ops->owner);
return pseudoflavor;
}
EXPORT_SYMBOL_GPL(rpcauth_get_pseudoflavor);
/**
* rpcauth_get_gssinfo - find GSS tuple matching a GSS pseudoflavor
* @pseudoflavor: GSS pseudoflavor to match
* @info: rpcsec_gss_info structure to fill in
*
* Returns zero and fills in "info" if pseudoflavor matches a
* supported mechanism.
*/
int
rpcauth_get_gssinfo(rpc_authflavor_t pseudoflavor, struct rpcsec_gss_info *info)
{
rpc_authflavor_t flavor = pseudoflavor_to_flavor(pseudoflavor);
const struct rpc_authops *ops;
int result;
if (flavor >= RPC_AUTH_MAXFLAVOR)
return -EINVAL;
ops = auth_flavors[flavor];
if (ops == NULL)
request_module("rpc-auth-%u", flavor);
spin_lock(&rpc_authflavor_lock);
ops = auth_flavors[flavor];
if (ops == NULL || !try_module_get(ops->owner)) {
spin_unlock(&rpc_authflavor_lock);
return -ENOENT;
}
spin_unlock(&rpc_authflavor_lock);
result = -ENOENT;
if (ops->flavor2info != NULL)
result = ops->flavor2info(pseudoflavor, info);
module_put(ops->owner);
return result;
}
EXPORT_SYMBOL_GPL(rpcauth_get_gssinfo);
/**
* rpcauth_list_flavors - discover registered flavors and pseudoflavors
* @array: array to fill in
* @size: size of "array"
*
* Returns the number of array items filled in, or a negative errno.
*
* The returned array is not sorted by any policy. Callers should not
* rely on the order of the items in the returned array.
*/
int
rpcauth_list_flavors(rpc_authflavor_t *array, int size)
{
rpc_authflavor_t flavor;
int result = 0;
spin_lock(&rpc_authflavor_lock);
for (flavor = 0; flavor < RPC_AUTH_MAXFLAVOR; flavor++) {
const struct rpc_authops *ops = auth_flavors[flavor];
rpc_authflavor_t pseudos[4];
int i, len;
if (result >= size) {
result = -ENOMEM;
break;
}
if (ops == NULL)
continue;
if (ops->list_pseudoflavors == NULL) {
array[result++] = ops->au_flavor;
continue;
}
len = ops->list_pseudoflavors(pseudos, ARRAY_SIZE(pseudos));
if (len < 0) {
result = len;
break;
}
for (i = 0; i < len; i++) {
if (result >= size) {
result = -ENOMEM;
break;
}
array[result++] = pseudos[i];
}
}
spin_unlock(&rpc_authflavor_lock);
dprintk("RPC: %s returns %d\n", __func__, result);
return result;
}
EXPORT_SYMBOL_GPL(rpcauth_list_flavors);
struct rpc_auth *
rpcauth_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
{
struct rpc_auth *auth;
const struct rpc_authops *ops;
u32 flavor = pseudoflavor_to_flavor(args->pseudoflavor);
auth = ERR_PTR(-EINVAL);
if (flavor >= RPC_AUTH_MAXFLAVOR)
goto out;
if ((ops = auth_flavors[flavor]) == NULL)
request_module("rpc-auth-%u", flavor);
spin_lock(&rpc_authflavor_lock);
ops = auth_flavors[flavor];
if (ops == NULL || !try_module_get(ops->owner)) {
spin_unlock(&rpc_authflavor_lock);
goto out;
}
spin_unlock(&rpc_authflavor_lock);
auth = ops->create(args, clnt);
module_put(ops->owner);
if (IS_ERR(auth))
return auth;
if (clnt->cl_auth)
rpcauth_release(clnt->cl_auth);
clnt->cl_auth = auth;
out:
return auth;
}
EXPORT_SYMBOL_GPL(rpcauth_create);
void
rpcauth_release(struct rpc_auth *auth)
{
if (!atomic_dec_and_test(&auth->au_count))
return;
auth->au_ops->destroy(auth);
}
static DEFINE_SPINLOCK(rpc_credcache_lock);
static void
rpcauth_unhash_cred_locked(struct rpc_cred *cred)
{
hlist_del_rcu(&cred->cr_hash);
smp_mb__before_atomic();
clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
}
static int
rpcauth_unhash_cred(struct rpc_cred *cred)
{
spinlock_t *cache_lock;
int ret;
cache_lock = &cred->cr_auth->au_credcache->lock;
spin_lock(cache_lock);
ret = atomic_read(&cred->cr_count) == 0;
if (ret)
rpcauth_unhash_cred_locked(cred);
spin_unlock(cache_lock);
return ret;
}
/*
* Initialize RPC credential cache
*/
int
rpcauth_init_credcache(struct rpc_auth *auth)
{
struct rpc_cred_cache *new;
unsigned int hashsize;
new = kmalloc(sizeof(*new), GFP_KERNEL);
if (!new)
goto out_nocache;
new->hashbits = auth_hashbits;
hashsize = 1U << new->hashbits;
new->hashtable = kcalloc(hashsize, sizeof(new->hashtable[0]), GFP_KERNEL);
if (!new->hashtable)
goto out_nohashtbl;
spin_lock_init(&new->lock);
auth->au_credcache = new;
return 0;
out_nohashtbl:
kfree(new);
out_nocache:
return -ENOMEM;
}
EXPORT_SYMBOL_GPL(rpcauth_init_credcache);
/*
* Setup a credential key lifetime timeout notification
*/
int
rpcauth_key_timeout_notify(struct rpc_auth *auth, struct rpc_cred *cred)
{
if (!cred->cr_auth->au_ops->key_timeout)
return 0;
return cred->cr_auth->au_ops->key_timeout(auth, cred);
}
EXPORT_SYMBOL_GPL(rpcauth_key_timeout_notify);
bool
rpcauth_cred_key_to_expire(struct rpc_cred *cred)
{
if (!cred->cr_ops->crkey_to_expire)
return false;
return cred->cr_ops->crkey_to_expire(cred);
}
EXPORT_SYMBOL_GPL(rpcauth_cred_key_to_expire);
char *
rpcauth_stringify_acceptor(struct rpc_cred *cred)
{
if (!cred->cr_ops->crstringify_acceptor)
return NULL;
return cred->cr_ops->crstringify_acceptor(cred);
}
EXPORT_SYMBOL_GPL(rpcauth_stringify_acceptor);
/*
* Destroy a list of credentials
*/
static inline
void rpcauth_destroy_credlist(struct list_head *head)
{
struct rpc_cred *cred;
while (!list_empty(head)) {
cred = list_entry(head->next, struct rpc_cred, cr_lru);
list_del_init(&cred->cr_lru);
put_rpccred(cred);
}
}
/*
* Clear the RPC credential cache, and delete those credentials
* that are not referenced.
*/
void
rpcauth_clear_credcache(struct rpc_cred_cache *cache)
{
LIST_HEAD(free);
struct hlist_head *head;
struct rpc_cred *cred;
unsigned int hashsize = 1U << cache->hashbits;
int i;
spin_lock(&rpc_credcache_lock);
spin_lock(&cache->lock);
for (i = 0; i < hashsize; i++) {
head = &cache->hashtable[i];
while (!hlist_empty(head)) {
cred = hlist_entry(head->first, struct rpc_cred, cr_hash);
get_rpccred(cred);
if (!list_empty(&cred->cr_lru)) {
list_del(&cred->cr_lru);
number_cred_unused--;
}
list_add_tail(&cred->cr_lru, &free);
rpcauth_unhash_cred_locked(cred);
}
}
spin_unlock(&cache->lock);
spin_unlock(&rpc_credcache_lock);
rpcauth_destroy_credlist(&free);
}
/*
* Destroy the RPC credential cache
*/
void
rpcauth_destroy_credcache(struct rpc_auth *auth)
{
struct rpc_cred_cache *cache = auth->au_credcache;
if (cache) {
auth->au_credcache = NULL;
rpcauth_clear_credcache(cache);
kfree(cache->hashtable);
kfree(cache);
}
}
EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache);
#define RPC_AUTH_EXPIRY_MORATORIUM (60 * HZ)
/*
* Remove stale credentials. Avoid sleeping inside the loop.
*/
static long
rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
{
spinlock_t *cache_lock;
struct rpc_cred *cred, *next;
unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM;
long freed = 0;
list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) {
if (nr_to_scan-- == 0)
break;
/*
* Enforce a 60 second garbage collection moratorium
* Note that the cred_unused list must be time-ordered.
*/
if (time_in_range(cred->cr_expire, expired, jiffies) &&
test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
break;
list_del_init(&cred->cr_lru);
number_cred_unused--;
freed++;
if (atomic_read(&cred->cr_count) != 0)
continue;
cache_lock = &cred->cr_auth->au_credcache->lock;
spin_lock(cache_lock);
if (atomic_read(&cred->cr_count) == 0) {
get_rpccred(cred);
list_add_tail(&cred->cr_lru, free);
rpcauth_unhash_cred_locked(cred);
}
spin_unlock(cache_lock);
}
return freed;
}
static unsigned long
rpcauth_cache_do_shrink(int nr_to_scan)
{
LIST_HEAD(free);
unsigned long freed;
spin_lock(&rpc_credcache_lock);
freed = rpcauth_prune_expired(&free, nr_to_scan);
spin_unlock(&rpc_credcache_lock);
rpcauth_destroy_credlist(&free);
return freed;
}
/*
* Run memory cache shrinker.
*/
static unsigned long
rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
{
if ((sc->gfp_mask & GFP_KERNEL) != GFP_KERNEL)
return SHRINK_STOP;
/* nothing left, don't come back */
if (list_empty(&cred_unused))
return SHRINK_STOP;
return rpcauth_cache_do_shrink(sc->nr_to_scan);
}
static unsigned long
rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{
return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
}
static void
rpcauth_cache_enforce_limit(void)
{
unsigned long diff;
unsigned int nr_to_scan;
if (number_cred_unused <= auth_max_cred_cachesize)
return;
diff = number_cred_unused - auth_max_cred_cachesize;
nr_to_scan = 100;
if (diff < nr_to_scan)
nr_to_scan = diff;
rpcauth_cache_do_shrink(nr_to_scan);
}
/*
* Look up a process' credentials in the authentication cache
*/
struct rpc_cred *
rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
int flags)
{
LIST_HEAD(free);
struct rpc_cred_cache *cache = auth->au_credcache;
struct rpc_cred *cred = NULL,
*entry, *new;
unsigned int nr;
nr = hash_long(from_kuid(&init_user_ns, acred->uid), cache->hashbits);
rcu_read_lock();
hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) {
if (!entry->cr_ops->crmatch(acred, entry, flags))
continue;
if (flags & RPCAUTH_LOOKUP_RCU) {
if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) &&
!test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags))
cred = entry;
break;
}
spin_lock(&cache->lock);
if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) {
spin_unlock(&cache->lock);
continue;
}
cred = get_rpccred(entry);
spin_unlock(&cache->lock);
break;
}
rcu_read_unlock();
if (cred != NULL)
goto found;
if (flags & RPCAUTH_LOOKUP_RCU)
return ERR_PTR(-ECHILD);
new = auth->au_ops->crcreate(auth, acred, flags);
if (IS_ERR(new)) {
cred = new;
goto out;
}
spin_lock(&cache->lock);
hlist_for_each_entry(entry, &cache->hashtable[nr], cr_hash) {
if (!entry->cr_ops->crmatch(acred, entry, flags))
continue;
cred = get_rpccred(entry);
break;
}
if (cred == NULL) {
cred = new;
set_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
hlist_add_head_rcu(&cred->cr_hash, &cache->hashtable[nr]);
} else
list_add_tail(&new->cr_lru, &free);
spin_unlock(&cache->lock);
rpcauth_cache_enforce_limit();
found:
if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) &&
cred->cr_ops->cr_init != NULL &&
!(flags & RPCAUTH_LOOKUP_NEW)) {
int res = cred->cr_ops->cr_init(auth, cred);
if (res < 0) {
put_rpccred(cred);
cred = ERR_PTR(res);
}
}
rpcauth_destroy_credlist(&free);
out:
return cred;
}
EXPORT_SYMBOL_GPL(rpcauth_lookup_credcache);
struct rpc_cred *
rpcauth_lookupcred(struct rpc_auth *auth, int flags)
{
struct auth_cred acred;
struct rpc_cred *ret;
const struct cred *cred = current_cred();
dprintk("RPC: looking up %s cred\n",
auth->au_ops->au_name);
memset(&acred, 0, sizeof(acred));
acred.uid = cred->fsuid;
acred.gid = cred->fsgid;
acred.group_info = cred->group_info;
ret = auth->au_ops->lookup_cred(auth, &acred, flags);
return ret;
}
EXPORT_SYMBOL_GPL(rpcauth_lookupcred);
void
rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
struct rpc_auth *auth, const struct rpc_credops *ops)
{
INIT_HLIST_NODE(&cred->cr_hash);
INIT_LIST_HEAD(&cred->cr_lru);
atomic_set(&cred->cr_count, 1);
cred->cr_auth = auth;
cred->cr_ops = ops;
cred->cr_expire = jiffies;
#ifdef RPC_DEBUG
cred->cr_magic = RPCAUTH_CRED_MAGIC;
#endif
cred->cr_uid = acred->uid;
}
EXPORT_SYMBOL_GPL(rpcauth_init_cred);
struct rpc_cred *
rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
{
dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid,
cred->cr_auth->au_ops->au_name, cred);
return get_rpccred(cred);
}
EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred);
static struct rpc_cred *
rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
{
struct rpc_auth *auth = task->tk_client->cl_auth;
struct auth_cred acred = {
.uid = GLOBAL_ROOT_UID,
.gid = GLOBAL_ROOT_GID,
};
dprintk("RPC: %5u looking up %s cred\n",
task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
return auth->au_ops->lookup_cred(auth, &acred, lookupflags);
}
static struct rpc_cred *
rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags)
{
struct rpc_auth *auth = task->tk_client->cl_auth;
dprintk("RPC: %5u looking up %s cred\n",
task->tk_pid, auth->au_ops->au_name);
return rpcauth_lookupcred(auth, lookupflags);
}
static int
rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_cred *new;
int lookupflags = 0;
if (flags & RPC_TASK_ASYNC)
lookupflags |= RPCAUTH_LOOKUP_NEW;
if (cred != NULL)
new = cred->cr_ops->crbind(task, cred, lookupflags);
else if (flags & RPC_TASK_ROOTCREDS)
new = rpcauth_bind_root_cred(task, lookupflags);
else
new = rpcauth_bind_new_cred(task, lookupflags);
if (IS_ERR(new))
return PTR_ERR(new);
if (req->rq_cred != NULL)
put_rpccred(req->rq_cred);
req->rq_cred = new;
return 0;
}
void
put_rpccred(struct rpc_cred *cred)
{
/* Fast path for unhashed credentials */
if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) == 0) {
if (atomic_dec_and_test(&cred->cr_count))
cred->cr_ops->crdestroy(cred);
return;
}
if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock))
return;
if (!list_empty(&cred->cr_lru)) {
number_cred_unused--;
list_del_init(&cred->cr_lru);
}
if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) {
if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) {
cred->cr_expire = jiffies;
list_add_tail(&cred->cr_lru, &cred_unused);
number_cred_unused++;
goto out_nodestroy;
}
if (!rpcauth_unhash_cred(cred)) {
/* We were hashed and someone looked us up... */
goto out_nodestroy;
}
}
spin_unlock(&rpc_credcache_lock);
cred->cr_ops->crdestroy(cred);
return;
out_nodestroy:
spin_unlock(&rpc_credcache_lock);
}
EXPORT_SYMBOL_GPL(put_rpccred);
__be32 *
rpcauth_marshcred(struct rpc_task *task, __be32 *p)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
dprintk("RPC: %5u marshaling %s cred %p\n",
task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
return cred->cr_ops->crmarshal(task, p);
}
__be32 *
rpcauth_checkverf(struct rpc_task *task, __be32 *p)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
dprintk("RPC: %5u validating %s cred %p\n",
task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
return cred->cr_ops->crvalidate(task, p);
}
static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
__be32 *data, void *obj)
{
struct xdr_stream xdr;
xdr_init_encode(&xdr, &rqstp->rq_snd_buf, data);
encode(rqstp, &xdr, obj);
}
int
rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp,
__be32 *data, void *obj)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
dprintk("RPC: %5u using %s cred %p to wrap rpc data\n",
task->tk_pid, cred->cr_ops->cr_name, cred);
if (cred->cr_ops->crwrap_req)
return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj);
/* By default, we encode the arguments normally. */
rpcauth_wrap_req_encode(encode, rqstp, data, obj);
return 0;
}
static int
rpcauth_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
__be32 *data, void *obj)
{
struct xdr_stream xdr;
xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, data);
return decode(rqstp, &xdr, obj);
}
int
rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp,
__be32 *data, void *obj)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
dprintk("RPC: %5u using %s cred %p to unwrap rpc data\n",
task->tk_pid, cred->cr_ops->cr_name, cred);
if (cred->cr_ops->crunwrap_resp)
return cred->cr_ops->crunwrap_resp(task, decode, rqstp,
data, obj);
/* By default, we decode the arguments normally. */
return rpcauth_unwrap_req_decode(decode, rqstp, data, obj);
}
int
rpcauth_refreshcred(struct rpc_task *task)
{
struct rpc_cred *cred;
int err;
cred = task->tk_rqstp->rq_cred;
if (cred == NULL) {
err = rpcauth_bindcred(task, task->tk_msg.rpc_cred, task->tk_flags);
if (err < 0)
goto out;
cred = task->tk_rqstp->rq_cred;
}
dprintk("RPC: %5u refreshing %s cred %p\n",
task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
err = cred->cr_ops->crrefresh(task);
out:
if (err < 0)
task->tk_status = err;
return err;
}
void
rpcauth_invalcred(struct rpc_task *task)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
dprintk("RPC: %5u invalidating %s cred %p\n",
task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
if (cred)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
}
int
rpcauth_uptodatecred(struct rpc_task *task)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
return cred == NULL ||
test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0;
}
static struct shrinker rpc_cred_shrinker = {
.count_objects = rpcauth_cache_shrink_count,
.scan_objects = rpcauth_cache_shrink_scan,
.seeks = DEFAULT_SEEKS,
};
int __init rpcauth_init_module(void)
{
int err;
err = rpc_init_authunix();
if (err < 0)
goto out1;
err = rpc_init_generic_auth();
if (err < 0)
goto out2;
register_shrinker(&rpc_cred_shrinker);
return 0;
out2:
rpc_destroy_authunix();
out1:
return err;
}
void rpcauth_remove_module(void)
{
rpc_destroy_authunix();
rpc_destroy_generic_auth();
unregister_shrinker(&rpc_cred_shrinker);
}

290
net/sunrpc/auth_generic.c Normal file
View file

@ -0,0 +1,290 @@
/*
* Generic RPC credential
*
* Copyright (C) 2008, Trond Myklebust <Trond.Myklebust@netapp.com>
*/
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/sunrpc/auth.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/debug.h>
#include <linux/sunrpc/sched.h>
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
#define RPC_MACHINE_CRED_USERID GLOBAL_ROOT_UID
#define RPC_MACHINE_CRED_GROUPID GLOBAL_ROOT_GID
struct generic_cred {
struct rpc_cred gc_base;
struct auth_cred acred;
};
static struct rpc_auth generic_auth;
static const struct rpc_credops generic_credops;
/*
* Public call interface
*/
struct rpc_cred *rpc_lookup_cred(void)
{
return rpcauth_lookupcred(&generic_auth, 0);
}
EXPORT_SYMBOL_GPL(rpc_lookup_cred);
struct rpc_cred *rpc_lookup_cred_nonblock(void)
{
return rpcauth_lookupcred(&generic_auth, RPCAUTH_LOOKUP_RCU);
}
EXPORT_SYMBOL_GPL(rpc_lookup_cred_nonblock);
/*
* Public call interface for looking up machine creds.
*/
struct rpc_cred *rpc_lookup_machine_cred(const char *service_name)
{
struct auth_cred acred = {
.uid = RPC_MACHINE_CRED_USERID,
.gid = RPC_MACHINE_CRED_GROUPID,
.principal = service_name,
.machine_cred = 1,
};
dprintk("RPC: looking up machine cred for service %s\n",
service_name);
return generic_auth.au_ops->lookup_cred(&generic_auth, &acred, 0);
}
EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred);
static struct rpc_cred *generic_bind_cred(struct rpc_task *task,
struct rpc_cred *cred, int lookupflags)
{
struct rpc_auth *auth = task->tk_client->cl_auth;
struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred;
return auth->au_ops->lookup_cred(auth, acred, lookupflags);
}
/*
* Lookup generic creds for current process
*/
static struct rpc_cred *
generic_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
{
return rpcauth_lookup_credcache(&generic_auth, acred, flags);
}
static struct rpc_cred *
generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
{
struct generic_cred *gcred;
gcred = kmalloc(sizeof(*gcred), GFP_KERNEL);
if (gcred == NULL)
return ERR_PTR(-ENOMEM);
rpcauth_init_cred(&gcred->gc_base, acred, &generic_auth, &generic_credops);
gcred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
gcred->acred.uid = acred->uid;
gcred->acred.gid = acred->gid;
gcred->acred.group_info = acred->group_info;
gcred->acred.ac_flags = 0;
if (gcred->acred.group_info != NULL)
get_group_info(gcred->acred.group_info);
gcred->acred.machine_cred = acred->machine_cred;
gcred->acred.principal = acred->principal;
dprintk("RPC: allocated %s cred %p for uid %d gid %d\n",
gcred->acred.machine_cred ? "machine" : "generic",
gcred,
from_kuid(&init_user_ns, acred->uid),
from_kgid(&init_user_ns, acred->gid));
return &gcred->gc_base;
}
static void
generic_free_cred(struct rpc_cred *cred)
{
struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base);
dprintk("RPC: generic_free_cred %p\n", gcred);
if (gcred->acred.group_info != NULL)
put_group_info(gcred->acred.group_info);
kfree(gcred);
}
static void
generic_free_cred_callback(struct rcu_head *head)
{
struct rpc_cred *cred = container_of(head, struct rpc_cred, cr_rcu);
generic_free_cred(cred);
}
static void
generic_destroy_cred(struct rpc_cred *cred)
{
call_rcu(&cred->cr_rcu, generic_free_cred_callback);
}
static int
machine_cred_match(struct auth_cred *acred, struct generic_cred *gcred, int flags)
{
if (!gcred->acred.machine_cred ||
gcred->acred.principal != acred->principal ||
!uid_eq(gcred->acred.uid, acred->uid) ||
!gid_eq(gcred->acred.gid, acred->gid))
return 0;
return 1;
}
/*
* Match credentials against current process creds.
*/
static int
generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
{
struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base);
int i;
if (acred->machine_cred)
return machine_cred_match(acred, gcred, flags);
if (!uid_eq(gcred->acred.uid, acred->uid) ||
!gid_eq(gcred->acred.gid, acred->gid) ||
gcred->acred.machine_cred != 0)
goto out_nomatch;
/* Optimisation in the case where pointers are identical... */
if (gcred->acred.group_info == acred->group_info)
goto out_match;
/* Slow path... */
if (gcred->acred.group_info->ngroups != acred->group_info->ngroups)
goto out_nomatch;
for (i = 0; i < gcred->acred.group_info->ngroups; i++) {
if (!gid_eq(GROUP_AT(gcred->acred.group_info, i),
GROUP_AT(acred->group_info, i)))
goto out_nomatch;
}
out_match:
return 1;
out_nomatch:
return 0;
}
int __init rpc_init_generic_auth(void)
{
return rpcauth_init_credcache(&generic_auth);
}
void rpc_destroy_generic_auth(void)
{
rpcauth_destroy_credcache(&generic_auth);
}
/*
* Test the the current time (now) against the underlying credential key expiry
* minus a timeout and setup notification.
*
* The normal case:
* If 'now' is before the key expiry minus RPC_KEY_EXPIRE_TIMEO, set
* the RPC_CRED_NOTIFY_TIMEOUT flag to setup the underlying credential
* rpc_credops crmatch routine to notify this generic cred when it's key
* expiration is within RPC_KEY_EXPIRE_TIMEO, and return 0.
*
* The error case:
* If the underlying cred lookup fails, return -EACCES.
*
* The 'almost' error case:
* If 'now' is within key expiry minus RPC_KEY_EXPIRE_TIMEO, but not within
* key expiry minus RPC_KEY_EXPIRE_FAIL, set the RPC_CRED_EXPIRE_SOON bit
* on the acred ac_flags and return 0.
*/
static int
generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred)
{
struct auth_cred *acred = &container_of(cred, struct generic_cred,
gc_base)->acred;
struct rpc_cred *tcred;
int ret = 0;
/* Fast track for non crkey_timeout (no key) underlying credentials */
if (test_bit(RPC_CRED_NO_CRKEY_TIMEOUT, &acred->ac_flags))
return 0;
/* Fast track for the normal case */
if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags))
return 0;
/* lookup_cred either returns a valid referenced rpc_cred, or PTR_ERR */
tcred = auth->au_ops->lookup_cred(auth, acred, 0);
if (IS_ERR(tcred))
return -EACCES;
if (!tcred->cr_ops->crkey_timeout) {
set_bit(RPC_CRED_NO_CRKEY_TIMEOUT, &acred->ac_flags);
ret = 0;
goto out_put;
}
/* Test for the almost error case */
ret = tcred->cr_ops->crkey_timeout(tcred);
if (ret != 0) {
set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
ret = 0;
} else {
/* In case underlying cred key has been reset */
if (test_and_clear_bit(RPC_CRED_KEY_EXPIRE_SOON,
&acred->ac_flags))
dprintk("RPC: UID %d Credential key reset\n",
from_kuid(&init_user_ns, tcred->cr_uid));
/* set up fasttrack for the normal case */
set_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags);
}
out_put:
put_rpccred(tcred);
return ret;
}
static const struct rpc_authops generic_auth_ops = {
.owner = THIS_MODULE,
.au_name = "Generic",
.lookup_cred = generic_lookup_cred,
.crcreate = generic_create_cred,
.key_timeout = generic_key_timeout,
};
static struct rpc_auth generic_auth = {
.au_ops = &generic_auth_ops,
.au_count = ATOMIC_INIT(0),
};
static bool generic_key_to_expire(struct rpc_cred *cred)
{
struct auth_cred *acred = &container_of(cred, struct generic_cred,
gc_base)->acred;
bool ret;
get_rpccred(cred);
ret = test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
put_rpccred(cred);
return ret;
}
static const struct rpc_credops generic_credops = {
.cr_name = "Generic cred",
.crdestroy = generic_destroy_cred,
.crbind = generic_bind_cred,
.crmatch = generic_match,
.crkey_to_expire = generic_key_to_expire,
};

View file

@ -0,0 +1,14 @@
#
# Makefile for Linux kernel rpcsec_gss implementation
#
obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
auth_rpcgss-y := auth_gss.o gss_generic_token.o \
gss_mech_switch.o svcauth_gss.o \
gss_rpc_upcall.o gss_rpc_xdr.o
obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
rpcsec_gss_krb5-y := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,234 @@
/*
* linux/net/sunrpc/gss_generic_token.c
*
* Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/generic/util_token.c
*
* Copyright (c) 2000 The Regents of the University of Michigan.
* All rights reserved.
*
* Andy Adamson <andros@umich.edu>
*/
/*
* Copyright 1993 by OpenVision Technologies, Inc.
*
* Permission to use, copy, modify, distribute, and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appears in all copies and
* that both that copyright notice and this permission notice appear in
* supporting documentation, and that the name of OpenVision not be used
* in advertising or publicity pertaining to distribution of the software
* without specific, written prior permission. OpenVision makes no
* representations about the suitability of this software for any
* purpose. It is provided "as is" without express or implied warranty.
*
* OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
* EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
* USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THIS SOFTWARE.
*/
#include <linux/types.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/gss_asn1.h>
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
/* TWRITE_STR from gssapiP_generic.h */
#define TWRITE_STR(ptr, str, len) \
memcpy((ptr), (char *) (str), (len)); \
(ptr) += (len);
/* XXXX this code currently makes the assumption that a mech oid will
never be longer than 127 bytes. This assumption is not inherent in
the interfaces, so the code can be fixed if the OSI namespace
balloons unexpectedly. */
/* Each token looks like this:
0x60 tag for APPLICATION 0, SEQUENCE
(constructed, definite-length)
<length> possible multiple bytes, need to parse/generate
0x06 tag for OBJECT IDENTIFIER
<moid_length> compile-time constant string (assume 1 byte)
<moid_bytes> compile-time constant string
<inner_bytes> the ANY containing the application token
bytes 0,1 are the token type
bytes 2,n are the token data
For the purposes of this abstraction, the token "header" consists of
the sequence tag and length octets, the mech OID DER encoding, and the
first two inner bytes, which indicate the token type. The token
"body" consists of everything else.
*/
static int
der_length_size( int length)
{
if (length < (1<<7))
return 1;
else if (length < (1<<8))
return 2;
#if (SIZEOF_INT == 2)
else
return 3;
#else
else if (length < (1<<16))
return 3;
else if (length < (1<<24))
return 4;
else
return 5;
#endif
}
static void
der_write_length(unsigned char **buf, int length)
{
if (length < (1<<7)) {
*(*buf)++ = (unsigned char) length;
} else {
*(*buf)++ = (unsigned char) (der_length_size(length)+127);
#if (SIZEOF_INT > 2)
if (length >= (1<<24))
*(*buf)++ = (unsigned char) (length>>24);
if (length >= (1<<16))
*(*buf)++ = (unsigned char) ((length>>16)&0xff);
#endif
if (length >= (1<<8))
*(*buf)++ = (unsigned char) ((length>>8)&0xff);
*(*buf)++ = (unsigned char) (length&0xff);
}
}
/* returns decoded length, or < 0 on failure. Advances buf and
decrements bufsize */
static int
der_read_length(unsigned char **buf, int *bufsize)
{
unsigned char sf;
int ret;
if (*bufsize < 1)
return -1;
sf = *(*buf)++;
(*bufsize)--;
if (sf & 0x80) {
if ((sf &= 0x7f) > ((*bufsize)-1))
return -1;
if (sf > SIZEOF_INT)
return -1;
ret = 0;
for (; sf; sf--) {
ret = (ret<<8) + (*(*buf)++);
(*bufsize)--;
}
} else {
ret = sf;
}
return ret;
}
/* returns the length of a token, given the mech oid and the body size */
int
g_token_size(struct xdr_netobj *mech, unsigned int body_size)
{
/* set body_size to sequence contents size */
body_size += 2 + (int) mech->len; /* NEED overflow check */
return 1 + der_length_size(body_size) + body_size;
}
EXPORT_SYMBOL_GPL(g_token_size);
/* fills in a buffer with the token header. The buffer is assumed to
be the right size. buf is advanced past the token header */
void
g_make_token_header(struct xdr_netobj *mech, int body_size, unsigned char **buf)
{
*(*buf)++ = 0x60;
der_write_length(buf, 2 + mech->len + body_size);
*(*buf)++ = 0x06;
*(*buf)++ = (unsigned char) mech->len;
TWRITE_STR(*buf, mech->data, ((int) mech->len));
}
EXPORT_SYMBOL_GPL(g_make_token_header);
/*
* Given a buffer containing a token, reads and verifies the token,
* leaving buf advanced past the token header, and setting body_size
* to the number of remaining bytes. Returns 0 on success,
* G_BAD_TOK_HEADER for a variety of errors, and G_WRONG_MECH if the
* mechanism in the token does not match the mech argument. buf and
* *body_size are left unmodified on error.
*/
u32
g_verify_token_header(struct xdr_netobj *mech, int *body_size,
unsigned char **buf_in, int toksize)
{
unsigned char *buf = *buf_in;
int seqsize;
struct xdr_netobj toid;
int ret = 0;
if ((toksize-=1) < 0)
return G_BAD_TOK_HEADER;
if (*buf++ != 0x60)
return G_BAD_TOK_HEADER;
if ((seqsize = der_read_length(&buf, &toksize)) < 0)
return G_BAD_TOK_HEADER;
if (seqsize != toksize)
return G_BAD_TOK_HEADER;
if ((toksize-=1) < 0)
return G_BAD_TOK_HEADER;
if (*buf++ != 0x06)
return G_BAD_TOK_HEADER;
if ((toksize-=1) < 0)
return G_BAD_TOK_HEADER;
toid.len = *buf++;
if ((toksize-=toid.len) < 0)
return G_BAD_TOK_HEADER;
toid.data = buf;
buf+=toid.len;
if (! g_OID_equal(&toid, mech))
ret = G_WRONG_MECH;
/* G_WRONG_MECH is not returned immediately because it's more important
to return G_BAD_TOK_HEADER if the token header is in fact bad */
if ((toksize-=2) < 0)
return G_BAD_TOK_HEADER;
if (ret)
return ret;
if (!ret) {
*buf_in = buf;
*body_size = toksize;
}
return ret;
}
EXPORT_SYMBOL_GPL(g_verify_token_header);

View file

@ -0,0 +1,988 @@
/*
* linux/net/sunrpc/gss_krb5_crypto.c
*
* Copyright (c) 2000-2008 The Regents of the University of Michigan.
* All rights reserved.
*
* Andy Adamson <andros@umich.edu>
* Bruce Fields <bfields@umich.edu>
*/
/*
* Copyright (C) 1998 by the FundsXpress, INC.
*
* All rights reserved.
*
* Export of this software from the United States of America may require
* a specific license from the United States Government. It is the
* responsibility of any person or organization contemplating export to
* obtain such a license before exporting.
*
* WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
* distribute this software and its documentation for any purpose and
* without fee is hereby granted, provided that the above copyright
* notice appear in all copies and that both that copyright notice and
* this permission notice appear in supporting documentation, and that
* the name of FundsXpress. not be used in advertising or publicity pertaining
* to distribution of the software without specific, written prior
* permission. FundsXpress makes no representations about the suitability of
* this software for any purpose. It is provided "as is" without express
* or implied warranty.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
#include <linux/err.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/scatterlist.h>
#include <linux/crypto.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/random.h>
#include <linux/sunrpc/gss_krb5.h>
#include <linux/sunrpc/xdr.h>
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
u32
krb5_encrypt(
struct crypto_blkcipher *tfm,
void * iv,
void * in,
void * out,
int length)
{
u32 ret = -EINVAL;
struct scatterlist sg[1];
u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
if (length % crypto_blkcipher_blocksize(tfm) != 0)
goto out;
if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
dprintk("RPC: gss_k5encrypt: tfm iv size too large %d\n",
crypto_blkcipher_ivsize(tfm));
goto out;
}
if (iv)
memcpy(local_iv, iv, crypto_blkcipher_ivsize(tfm));
memcpy(out, in, length);
sg_init_one(sg, out, length);
ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, length);
out:
dprintk("RPC: krb5_encrypt returns %d\n", ret);
return ret;
}
u32
krb5_decrypt(
struct crypto_blkcipher *tfm,
void * iv,
void * in,
void * out,
int length)
{
u32 ret = -EINVAL;
struct scatterlist sg[1];
u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
if (length % crypto_blkcipher_blocksize(tfm) != 0)
goto out;
if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
dprintk("RPC: gss_k5decrypt: tfm iv size too large %d\n",
crypto_blkcipher_ivsize(tfm));
goto out;
}
if (iv)
memcpy(local_iv,iv, crypto_blkcipher_ivsize(tfm));
memcpy(out, in, length);
sg_init_one(sg, out, length);
ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, length);
out:
dprintk("RPC: gss_k5decrypt returns %d\n",ret);
return ret;
}
static int
checksummer(struct scatterlist *sg, void *data)
{
struct hash_desc *desc = data;
return crypto_hash_update(desc, sg, sg->length);
}
static int
arcfour_hmac_md5_usage_to_salt(unsigned int usage, u8 salt[4])
{
unsigned int ms_usage;
switch (usage) {
case KG_USAGE_SIGN:
ms_usage = 15;
break;
case KG_USAGE_SEAL:
ms_usage = 13;
break;
default:
return -EINVAL;
}
salt[0] = (ms_usage >> 0) & 0xff;
salt[1] = (ms_usage >> 8) & 0xff;
salt[2] = (ms_usage >> 16) & 0xff;
salt[3] = (ms_usage >> 24) & 0xff;
return 0;
}
static u32
make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
struct xdr_buf *body, int body_offset, u8 *cksumkey,
unsigned int usage, struct xdr_netobj *cksumout)
{
struct hash_desc desc;
struct scatterlist sg[1];
int err;
u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
u8 rc4salt[4];
struct crypto_hash *md5;
struct crypto_hash *hmac_md5;
if (cksumkey == NULL)
return GSS_S_FAILURE;
if (cksumout->len < kctx->gk5e->cksumlength) {
dprintk("%s: checksum buffer length, %u, too small for %s\n",
__func__, cksumout->len, kctx->gk5e->name);
return GSS_S_FAILURE;
}
if (arcfour_hmac_md5_usage_to_salt(usage, rc4salt)) {
dprintk("%s: invalid usage value %u\n", __func__, usage);
return GSS_S_FAILURE;
}
md5 = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(md5))
return GSS_S_FAILURE;
hmac_md5 = crypto_alloc_hash(kctx->gk5e->cksum_name, 0,
CRYPTO_ALG_ASYNC);
if (IS_ERR(hmac_md5)) {
crypto_free_hash(md5);
return GSS_S_FAILURE;
}
desc.tfm = md5;
desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
err = crypto_hash_init(&desc);
if (err)
goto out;
sg_init_one(sg, rc4salt, 4);
err = crypto_hash_update(&desc, sg, 4);
if (err)
goto out;
sg_init_one(sg, header, hdrlen);
err = crypto_hash_update(&desc, sg, hdrlen);
if (err)
goto out;
err = xdr_process_buf(body, body_offset, body->len - body_offset,
checksummer, &desc);
if (err)
goto out;
err = crypto_hash_final(&desc, checksumdata);
if (err)
goto out;
desc.tfm = hmac_md5;
desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
err = crypto_hash_init(&desc);
if (err)
goto out;
err = crypto_hash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength);
if (err)
goto out;
sg_init_one(sg, checksumdata, crypto_hash_digestsize(md5));
err = crypto_hash_digest(&desc, sg, crypto_hash_digestsize(md5),
checksumdata);
if (err)
goto out;
memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
cksumout->len = kctx->gk5e->cksumlength;
out:
crypto_free_hash(md5);
crypto_free_hash(hmac_md5);
return err ? GSS_S_FAILURE : 0;
}
/*
* checksum the plaintext data and hdrlen bytes of the token header
* The checksum is performed over the first 8 bytes of the
* gss token header and then over the data body
*/
u32
make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
struct xdr_buf *body, int body_offset, u8 *cksumkey,
unsigned int usage, struct xdr_netobj *cksumout)
{
struct hash_desc desc;
struct scatterlist sg[1];
int err;
u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
unsigned int checksumlen;
if (kctx->gk5e->ctype == CKSUMTYPE_HMAC_MD5_ARCFOUR)
return make_checksum_hmac_md5(kctx, header, hdrlen,
body, body_offset,
cksumkey, usage, cksumout);
if (cksumout->len < kctx->gk5e->cksumlength) {
dprintk("%s: checksum buffer length, %u, too small for %s\n",
__func__, cksumout->len, kctx->gk5e->name);
return GSS_S_FAILURE;
}
desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(desc.tfm))
return GSS_S_FAILURE;
desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
checksumlen = crypto_hash_digestsize(desc.tfm);
if (cksumkey != NULL) {
err = crypto_hash_setkey(desc.tfm, cksumkey,
kctx->gk5e->keylength);
if (err)
goto out;
}
err = crypto_hash_init(&desc);
if (err)
goto out;
sg_init_one(sg, header, hdrlen);
err = crypto_hash_update(&desc, sg, hdrlen);
if (err)
goto out;
err = xdr_process_buf(body, body_offset, body->len - body_offset,
checksummer, &desc);
if (err)
goto out;
err = crypto_hash_final(&desc, checksumdata);
if (err)
goto out;
switch (kctx->gk5e->ctype) {
case CKSUMTYPE_RSA_MD5:
err = kctx->gk5e->encrypt(kctx->seq, NULL, checksumdata,
checksumdata, checksumlen);
if (err)
goto out;
memcpy(cksumout->data,
checksumdata + checksumlen - kctx->gk5e->cksumlength,
kctx->gk5e->cksumlength);
break;
case CKSUMTYPE_HMAC_SHA1_DES3:
memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
break;
default:
BUG();
break;
}
cksumout->len = kctx->gk5e->cksumlength;
out:
crypto_free_hash(desc.tfm);
return err ? GSS_S_FAILURE : 0;
}
/*
* checksum the plaintext data and hdrlen bytes of the token header
* Per rfc4121, sec. 4.2.4, the checksum is performed over the data
* body then over the first 16 octets of the MIC token
* Inclusion of the header data in the calculation of the
* checksum is optional.
*/
u32
make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
struct xdr_buf *body, int body_offset, u8 *cksumkey,
unsigned int usage, struct xdr_netobj *cksumout)
{
struct hash_desc desc;
struct scatterlist sg[1];
int err;
u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
unsigned int checksumlen;
if (kctx->gk5e->keyed_cksum == 0) {
dprintk("%s: expected keyed hash for %s\n",
__func__, kctx->gk5e->name);
return GSS_S_FAILURE;
}
if (cksumkey == NULL) {
dprintk("%s: no key supplied for %s\n",
__func__, kctx->gk5e->name);
return GSS_S_FAILURE;
}
desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0,
CRYPTO_ALG_ASYNC);
if (IS_ERR(desc.tfm))
return GSS_S_FAILURE;
checksumlen = crypto_hash_digestsize(desc.tfm);
desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
err = crypto_hash_setkey(desc.tfm, cksumkey, kctx->gk5e->keylength);
if (err)
goto out;
err = crypto_hash_init(&desc);
if (err)
goto out;
err = xdr_process_buf(body, body_offset, body->len - body_offset,
checksummer, &desc);
if (err)
goto out;
if (header != NULL) {
sg_init_one(sg, header, hdrlen);
err = crypto_hash_update(&desc, sg, hdrlen);
if (err)
goto out;
}
err = crypto_hash_final(&desc, checksumdata);
if (err)
goto out;
cksumout->len = kctx->gk5e->cksumlength;
switch (kctx->gk5e->ctype) {
case CKSUMTYPE_HMAC_SHA1_96_AES128:
case CKSUMTYPE_HMAC_SHA1_96_AES256:
/* note that this truncates the hash */
memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
break;
default:
BUG();
break;
}
out:
crypto_free_hash(desc.tfm);
return err ? GSS_S_FAILURE : 0;
}
struct encryptor_desc {
u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
struct blkcipher_desc desc;
int pos;
struct xdr_buf *outbuf;
struct page **pages;
struct scatterlist infrags[4];
struct scatterlist outfrags[4];
int fragno;
int fraglen;
};
static int
encryptor(struct scatterlist *sg, void *data)
{
struct encryptor_desc *desc = data;
struct xdr_buf *outbuf = desc->outbuf;
struct page *in_page;
int thislen = desc->fraglen + sg->length;
int fraglen, ret;
int page_pos;
/* Worst case is 4 fragments: head, end of page 1, start
* of page 2, tail. Anything more is a bug. */
BUG_ON(desc->fragno > 3);
page_pos = desc->pos - outbuf->head[0].iov_len;
if (page_pos >= 0 && page_pos < outbuf->page_len) {
/* pages are not in place: */
int i = (page_pos + outbuf->page_base) >> PAGE_CACHE_SHIFT;
in_page = desc->pages[i];
} else {
in_page = sg_page(sg);
}
sg_set_page(&desc->infrags[desc->fragno], in_page, sg->length,
sg->offset);
sg_set_page(&desc->outfrags[desc->fragno], sg_page(sg), sg->length,
sg->offset);
desc->fragno++;
desc->fraglen += sg->length;
desc->pos += sg->length;
fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1);
thislen -= fraglen;
if (thislen == 0)
return 0;
sg_mark_end(&desc->infrags[desc->fragno - 1]);
sg_mark_end(&desc->outfrags[desc->fragno - 1]);
ret = crypto_blkcipher_encrypt_iv(&desc->desc, desc->outfrags,
desc->infrags, thislen);
if (ret)
return ret;
sg_init_table(desc->infrags, 4);
sg_init_table(desc->outfrags, 4);
if (fraglen) {
sg_set_page(&desc->outfrags[0], sg_page(sg), fraglen,
sg->offset + sg->length - fraglen);
desc->infrags[0] = desc->outfrags[0];
sg_assign_page(&desc->infrags[0], in_page);
desc->fragno = 1;
desc->fraglen = fraglen;
} else {
desc->fragno = 0;
desc->fraglen = 0;
}
return 0;
}
int
gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
int offset, struct page **pages)
{
int ret;
struct encryptor_desc desc;
BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0);
memset(desc.iv, 0, sizeof(desc.iv));
desc.desc.tfm = tfm;
desc.desc.info = desc.iv;
desc.desc.flags = 0;
desc.pos = offset;
desc.outbuf = buf;
desc.pages = pages;
desc.fragno = 0;
desc.fraglen = 0;
sg_init_table(desc.infrags, 4);
sg_init_table(desc.outfrags, 4);
ret = xdr_process_buf(buf, offset, buf->len - offset, encryptor, &desc);
return ret;
}
struct decryptor_desc {
u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
struct blkcipher_desc desc;
struct scatterlist frags[4];
int fragno;
int fraglen;
};
static int
decryptor(struct scatterlist *sg, void *data)
{
struct decryptor_desc *desc = data;
int thislen = desc->fraglen + sg->length;
int fraglen, ret;
/* Worst case is 4 fragments: head, end of page 1, start
* of page 2, tail. Anything more is a bug. */
BUG_ON(desc->fragno > 3);
sg_set_page(&desc->frags[desc->fragno], sg_page(sg), sg->length,
sg->offset);
desc->fragno++;
desc->fraglen += sg->length;
fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1);
thislen -= fraglen;
if (thislen == 0)
return 0;
sg_mark_end(&desc->frags[desc->fragno - 1]);
ret = crypto_blkcipher_decrypt_iv(&desc->desc, desc->frags,
desc->frags, thislen);
if (ret)
return ret;
sg_init_table(desc->frags, 4);
if (fraglen) {
sg_set_page(&desc->frags[0], sg_page(sg), fraglen,
sg->offset + sg->length - fraglen);
desc->fragno = 1;
desc->fraglen = fraglen;
} else {
desc->fragno = 0;
desc->fraglen = 0;
}
return 0;
}
int
gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
int offset)
{
struct decryptor_desc desc;
/* XXXJBF: */
BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0);
memset(desc.iv, 0, sizeof(desc.iv));
desc.desc.tfm = tfm;
desc.desc.info = desc.iv;
desc.desc.flags = 0;
desc.fragno = 0;
desc.fraglen = 0;
sg_init_table(desc.frags, 4);
return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
}
/*
* This function makes the assumption that it was ultimately called
* from gss_wrap().
*
* The client auth_gss code moves any existing tail data into a
* separate page before calling gss_wrap.
* The server svcauth_gss code ensures that both the head and the
* tail have slack space of RPC_MAX_AUTH_SIZE before calling gss_wrap.
*
* Even with that guarantee, this function may be called more than
* once in the processing of gss_wrap(). The best we can do is
* verify at compile-time (see GSS_KRB5_SLACK_CHECK) that the
* largest expected shift will fit within RPC_MAX_AUTH_SIZE.
* At run-time we can verify that a single invocation of this
* function doesn't attempt to use more the RPC_MAX_AUTH_SIZE.
*/
int
xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen)
{
u8 *p;
if (shiftlen == 0)
return 0;
BUILD_BUG_ON(GSS_KRB5_MAX_SLACK_NEEDED > RPC_MAX_AUTH_SIZE);
BUG_ON(shiftlen > RPC_MAX_AUTH_SIZE);
p = buf->head[0].iov_base + base;
memmove(p + shiftlen, p, buf->head[0].iov_len - base);
buf->head[0].iov_len += shiftlen;
buf->len += shiftlen;
return 0;
}
static u32
gss_krb5_cts_crypt(struct crypto_blkcipher *cipher, struct xdr_buf *buf,
u32 offset, u8 *iv, struct page **pages, int encrypt)
{
u32 ret;
struct scatterlist sg[1];
struct blkcipher_desc desc = { .tfm = cipher, .info = iv };
u8 data[GSS_KRB5_MAX_BLOCKSIZE * 2];
struct page **save_pages;
u32 len = buf->len - offset;
if (len > ARRAY_SIZE(data)) {
WARN_ON(0);
return -ENOMEM;
}
/*
* For encryption, we want to read from the cleartext
* page cache pages, and write the encrypted data to
* the supplied xdr_buf pages.
*/
save_pages = buf->pages;
if (encrypt)
buf->pages = pages;
ret = read_bytes_from_xdr_buf(buf, offset, data, len);
buf->pages = save_pages;
if (ret)
goto out;
sg_init_one(sg, data, len);
if (encrypt)
ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, len);
else
ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, len);
if (ret)
goto out;
ret = write_bytes_to_xdr_buf(buf, offset, data, len);
out:
return ret;
}
u32
gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
struct xdr_buf *buf, struct page **pages)
{
u32 err;
struct xdr_netobj hmac;
u8 *cksumkey;
u8 *ecptr;
struct crypto_blkcipher *cipher, *aux_cipher;
int blocksize;
struct page **save_pages;
int nblocks, nbytes;
struct encryptor_desc desc;
u32 cbcbytes;
unsigned int usage;
if (kctx->initiate) {
cipher = kctx->initiator_enc;
aux_cipher = kctx->initiator_enc_aux;
cksumkey = kctx->initiator_integ;
usage = KG_USAGE_INITIATOR_SEAL;
} else {
cipher = kctx->acceptor_enc;
aux_cipher = kctx->acceptor_enc_aux;
cksumkey = kctx->acceptor_integ;
usage = KG_USAGE_ACCEPTOR_SEAL;
}
blocksize = crypto_blkcipher_blocksize(cipher);
/* hide the gss token header and insert the confounder */
offset += GSS_KRB5_TOK_HDR_LEN;
if (xdr_extend_head(buf, offset, kctx->gk5e->conflen))
return GSS_S_FAILURE;
gss_krb5_make_confounder(buf->head[0].iov_base + offset, kctx->gk5e->conflen);
offset -= GSS_KRB5_TOK_HDR_LEN;
if (buf->tail[0].iov_base != NULL) {
ecptr = buf->tail[0].iov_base + buf->tail[0].iov_len;
} else {
buf->tail[0].iov_base = buf->head[0].iov_base
+ buf->head[0].iov_len;
buf->tail[0].iov_len = 0;
ecptr = buf->tail[0].iov_base;
}
/* copy plaintext gss token header after filler (if any) */
memcpy(ecptr, buf->head[0].iov_base + offset, GSS_KRB5_TOK_HDR_LEN);
buf->tail[0].iov_len += GSS_KRB5_TOK_HDR_LEN;
buf->len += GSS_KRB5_TOK_HDR_LEN;
/* Do the HMAC */
hmac.len = GSS_KRB5_MAX_CKSUM_LEN;
hmac.data = buf->tail[0].iov_base + buf->tail[0].iov_len;
/*
* When we are called, pages points to the real page cache
* data -- which we can't go and encrypt! buf->pages points
* to scratch pages which we are going to send off to the
* client/server. Swap in the plaintext pages to calculate
* the hmac.
*/
save_pages = buf->pages;
buf->pages = pages;
err = make_checksum_v2(kctx, NULL, 0, buf,
offset + GSS_KRB5_TOK_HDR_LEN,
cksumkey, usage, &hmac);
buf->pages = save_pages;
if (err)
return GSS_S_FAILURE;
nbytes = buf->len - offset - GSS_KRB5_TOK_HDR_LEN;
nblocks = (nbytes + blocksize - 1) / blocksize;
cbcbytes = 0;
if (nblocks > 2)
cbcbytes = (nblocks - 2) * blocksize;
memset(desc.iv, 0, sizeof(desc.iv));
if (cbcbytes) {
desc.pos = offset + GSS_KRB5_TOK_HDR_LEN;
desc.fragno = 0;
desc.fraglen = 0;
desc.pages = pages;
desc.outbuf = buf;
desc.desc.info = desc.iv;
desc.desc.flags = 0;
desc.desc.tfm = aux_cipher;
sg_init_table(desc.infrags, 4);
sg_init_table(desc.outfrags, 4);
err = xdr_process_buf(buf, offset + GSS_KRB5_TOK_HDR_LEN,
cbcbytes, encryptor, &desc);
if (err)
goto out_err;
}
/* Make sure IV carries forward from any CBC results. */
err = gss_krb5_cts_crypt(cipher, buf,
offset + GSS_KRB5_TOK_HDR_LEN + cbcbytes,
desc.iv, pages, 1);
if (err) {
err = GSS_S_FAILURE;
goto out_err;
}
/* Now update buf to account for HMAC */
buf->tail[0].iov_len += kctx->gk5e->cksumlength;
buf->len += kctx->gk5e->cksumlength;
out_err:
if (err)
err = GSS_S_FAILURE;
return err;
}
u32
gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
u32 *headskip, u32 *tailskip)
{
struct xdr_buf subbuf;
u32 ret = 0;
u8 *cksum_key;
struct crypto_blkcipher *cipher, *aux_cipher;
struct xdr_netobj our_hmac_obj;
u8 our_hmac[GSS_KRB5_MAX_CKSUM_LEN];
u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN];
int nblocks, blocksize, cbcbytes;
struct decryptor_desc desc;
unsigned int usage;
if (kctx->initiate) {
cipher = kctx->acceptor_enc;
aux_cipher = kctx->acceptor_enc_aux;
cksum_key = kctx->acceptor_integ;
usage = KG_USAGE_ACCEPTOR_SEAL;
} else {
cipher = kctx->initiator_enc;
aux_cipher = kctx->initiator_enc_aux;
cksum_key = kctx->initiator_integ;
usage = KG_USAGE_INITIATOR_SEAL;
}
blocksize = crypto_blkcipher_blocksize(cipher);
/* create a segment skipping the header and leaving out the checksum */
xdr_buf_subsegment(buf, &subbuf, offset + GSS_KRB5_TOK_HDR_LEN,
(buf->len - offset - GSS_KRB5_TOK_HDR_LEN -
kctx->gk5e->cksumlength));
nblocks = (subbuf.len + blocksize - 1) / blocksize;
cbcbytes = 0;
if (nblocks > 2)
cbcbytes = (nblocks - 2) * blocksize;
memset(desc.iv, 0, sizeof(desc.iv));
if (cbcbytes) {
desc.fragno = 0;
desc.fraglen = 0;
desc.desc.info = desc.iv;
desc.desc.flags = 0;
desc.desc.tfm = aux_cipher;
sg_init_table(desc.frags, 4);
ret = xdr_process_buf(&subbuf, 0, cbcbytes, decryptor, &desc);
if (ret)
goto out_err;
}
/* Make sure IV carries forward from any CBC results. */
ret = gss_krb5_cts_crypt(cipher, &subbuf, cbcbytes, desc.iv, NULL, 0);
if (ret)
goto out_err;
/* Calculate our hmac over the plaintext data */
our_hmac_obj.len = sizeof(our_hmac);
our_hmac_obj.data = our_hmac;
ret = make_checksum_v2(kctx, NULL, 0, &subbuf, 0,
cksum_key, usage, &our_hmac_obj);
if (ret)
goto out_err;
/* Get the packet's hmac value */
ret = read_bytes_from_xdr_buf(buf, buf->len - kctx->gk5e->cksumlength,
pkt_hmac, kctx->gk5e->cksumlength);
if (ret)
goto out_err;
if (memcmp(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) {
ret = GSS_S_BAD_SIG;
goto out_err;
}
*headskip = kctx->gk5e->conflen;
*tailskip = kctx->gk5e->cksumlength;
out_err:
if (ret && ret != GSS_S_BAD_SIG)
ret = GSS_S_FAILURE;
return ret;
}
/*
* Compute Kseq given the initial session key and the checksum.
* Set the key of the given cipher.
*/
int
krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
unsigned char *cksum)
{
struct crypto_hash *hmac;
struct hash_desc desc;
struct scatterlist sg[1];
u8 Kseq[GSS_KRB5_MAX_KEYLEN];
u32 zeroconstant = 0;
int err;
dprintk("%s: entered\n", __func__);
hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(hmac)) {
dprintk("%s: error %ld, allocating hash '%s'\n",
__func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
return PTR_ERR(hmac);
}
desc.tfm = hmac;
desc.flags = 0;
err = crypto_hash_init(&desc);
if (err)
goto out_err;
/* Compute intermediate Kseq from session key */
err = crypto_hash_setkey(hmac, kctx->Ksess, kctx->gk5e->keylength);
if (err)
goto out_err;
sg_init_table(sg, 1);
sg_set_buf(sg, &zeroconstant, 4);
err = crypto_hash_digest(&desc, sg, 4, Kseq);
if (err)
goto out_err;
/* Compute final Kseq from the checksum and intermediate Kseq */
err = crypto_hash_setkey(hmac, Kseq, kctx->gk5e->keylength);
if (err)
goto out_err;
sg_set_buf(sg, cksum, 8);
err = crypto_hash_digest(&desc, sg, 8, Kseq);
if (err)
goto out_err;
err = crypto_blkcipher_setkey(cipher, Kseq, kctx->gk5e->keylength);
if (err)
goto out_err;
err = 0;
out_err:
crypto_free_hash(hmac);
dprintk("%s: returning %d\n", __func__, err);
return err;
}
/*
* Compute Kcrypt given the initial session key and the plaintext seqnum.
* Set the key of cipher kctx->enc.
*/
int
krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
s32 seqnum)
{
struct crypto_hash *hmac;
struct hash_desc desc;
struct scatterlist sg[1];
u8 Kcrypt[GSS_KRB5_MAX_KEYLEN];
u8 zeroconstant[4] = {0};
u8 seqnumarray[4];
int err, i;
dprintk("%s: entered, seqnum %u\n", __func__, seqnum);
hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(hmac)) {
dprintk("%s: error %ld, allocating hash '%s'\n",
__func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
return PTR_ERR(hmac);
}
desc.tfm = hmac;
desc.flags = 0;
err = crypto_hash_init(&desc);
if (err)
goto out_err;
/* Compute intermediate Kcrypt from session key */
for (i = 0; i < kctx->gk5e->keylength; i++)
Kcrypt[i] = kctx->Ksess[i] ^ 0xf0;
err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
if (err)
goto out_err;
sg_init_table(sg, 1);
sg_set_buf(sg, zeroconstant, 4);
err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
if (err)
goto out_err;
/* Compute final Kcrypt from the seqnum and intermediate Kcrypt */
err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
if (err)
goto out_err;
seqnumarray[0] = (unsigned char) ((seqnum >> 24) & 0xff);
seqnumarray[1] = (unsigned char) ((seqnum >> 16) & 0xff);
seqnumarray[2] = (unsigned char) ((seqnum >> 8) & 0xff);
seqnumarray[3] = (unsigned char) ((seqnum >> 0) & 0xff);
sg_set_buf(sg, seqnumarray, 4);
err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
if (err)
goto out_err;
err = crypto_blkcipher_setkey(cipher, Kcrypt, kctx->gk5e->keylength);
if (err)
goto out_err;
err = 0;
out_err:
crypto_free_hash(hmac);
dprintk("%s: returning %d\n", __func__, err);
return err;
}

View file

@ -0,0 +1,327 @@
/*
* COPYRIGHT (c) 2008
* The Regents of the University of Michigan
* ALL RIGHTS RESERVED
*
* Permission is granted to use, copy, create derivative works
* and redistribute this software and such derivative works
* for any purpose, so long as the name of The University of
* Michigan is not used in any advertising or publicity
* pertaining to the use of distribution of this software
* without specific, written prior authorization. If the
* above copyright notice or any other identification of the
* University of Michigan is included in any copy of any
* portion of this software, then the disclaimer below must
* also be included.
*
* THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION
* FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY
* PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY OF
* MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
* WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
* REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE
* FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR
* CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING
* OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN
* IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGES.
*/
/*
* Copyright (C) 1998 by the FundsXpress, INC.
*
* All rights reserved.
*
* Export of this software from the United States of America may require
* a specific license from the United States Government. It is the
* responsibility of any person or organization contemplating export to
* obtain such a license before exporting.
*
* WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
* distribute this software and its documentation for any purpose and
* without fee is hereby granted, provided that the above copyright
* notice appear in all copies and that both that copyright notice and
* this permission notice appear in supporting documentation, and that
* the name of FundsXpress. not be used in advertising or publicity pertaining
* to distribution of the software without specific, written prior
* permission. FundsXpress makes no representations about the suitability of
* this software for any purpose. It is provided "as is" without express
* or implied warranty.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
#include <linux/err.h>
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/sunrpc/gss_krb5.h>
#include <linux/sunrpc/xdr.h>
#include <linux/lcm.h>
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
/*
* This is the n-fold function as described in rfc3961, sec 5.1
* Taken from MIT Kerberos and modified.
*/
static void krb5_nfold(u32 inbits, const u8 *in,
u32 outbits, u8 *out)
{
unsigned long ulcm;
int byte, i, msbit;
/* the code below is more readable if I make these bytes
instead of bits */
inbits >>= 3;
outbits >>= 3;
/* first compute lcm(n,k) */
ulcm = lcm(inbits, outbits);
/* now do the real work */
memset(out, 0, outbits);
byte = 0;
/* this will end up cycling through k lcm(k,n)/k times, which
is correct */
for (i = ulcm-1; i >= 0; i--) {
/* compute the msbit in k which gets added into this byte */
msbit = (
/* first, start with the msbit in the first,
* unrotated byte */
((inbits << 3) - 1)
/* then, for each byte, shift to the right
* for each repetition */
+ (((inbits << 3) + 13) * (i/inbits))
/* last, pick out the correct byte within
* that shifted repetition */
+ ((inbits - (i % inbits)) << 3)
) % (inbits << 3);
/* pull out the byte value itself */
byte += (((in[((inbits - 1) - (msbit >> 3)) % inbits] << 8)|
(in[((inbits) - (msbit >> 3)) % inbits]))
>> ((msbit & 7) + 1)) & 0xff;
/* do the addition */
byte += out[i % outbits];
out[i % outbits] = byte & 0xff;
/* keep around the carry bit, if any */
byte >>= 8;
}
/* if there's a carry bit left over, add it back in */
if (byte) {
for (i = outbits - 1; i >= 0; i--) {
/* do the addition */
byte += out[i];
out[i] = byte & 0xff;
/* keep around the carry bit, if any */
byte >>= 8;
}
}
}
/*
* This is the DK (derive_key) function as described in rfc3961, sec 5.1
* Taken from MIT Kerberos and modified.
*/
u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
const struct xdr_netobj *inkey,
struct xdr_netobj *outkey,
const struct xdr_netobj *in_constant,
gfp_t gfp_mask)
{
size_t blocksize, keybytes, keylength, n;
unsigned char *inblockdata, *outblockdata, *rawkey;
struct xdr_netobj inblock, outblock;
struct crypto_blkcipher *cipher;
u32 ret = EINVAL;
blocksize = gk5e->blocksize;
keybytes = gk5e->keybytes;
keylength = gk5e->keylength;
if ((inkey->len != keylength) || (outkey->len != keylength))
goto err_return;
cipher = crypto_alloc_blkcipher(gk5e->encrypt_name, 0,
CRYPTO_ALG_ASYNC);
if (IS_ERR(cipher))
goto err_return;
if (crypto_blkcipher_setkey(cipher, inkey->data, inkey->len))
goto err_return;
/* allocate and set up buffers */
ret = ENOMEM;
inblockdata = kmalloc(blocksize, gfp_mask);
if (inblockdata == NULL)
goto err_free_cipher;
outblockdata = kmalloc(blocksize, gfp_mask);
if (outblockdata == NULL)
goto err_free_in;
rawkey = kmalloc(keybytes, gfp_mask);
if (rawkey == NULL)
goto err_free_out;
inblock.data = (char *) inblockdata;
inblock.len = blocksize;
outblock.data = (char *) outblockdata;
outblock.len = blocksize;
/* initialize the input block */
if (in_constant->len == inblock.len) {
memcpy(inblock.data, in_constant->data, inblock.len);
} else {
krb5_nfold(in_constant->len * 8, in_constant->data,
inblock.len * 8, inblock.data);
}
/* loop encrypting the blocks until enough key bytes are generated */
n = 0;
while (n < keybytes) {
(*(gk5e->encrypt))(cipher, NULL, inblock.data,
outblock.data, inblock.len);
if ((keybytes - n) <= outblock.len) {
memcpy(rawkey + n, outblock.data, (keybytes - n));
break;
}
memcpy(rawkey + n, outblock.data, outblock.len);
memcpy(inblock.data, outblock.data, outblock.len);
n += outblock.len;
}
/* postprocess the key */
inblock.data = (char *) rawkey;
inblock.len = keybytes;
BUG_ON(gk5e->mk_key == NULL);
ret = (*(gk5e->mk_key))(gk5e, &inblock, outkey);
if (ret) {
dprintk("%s: got %d from mk_key function for '%s'\n",
__func__, ret, gk5e->encrypt_name);
goto err_free_raw;
}
/* clean memory, free resources and exit */
ret = 0;
err_free_raw:
memset(rawkey, 0, keybytes);
kfree(rawkey);
err_free_out:
memset(outblockdata, 0, blocksize);
kfree(outblockdata);
err_free_in:
memset(inblockdata, 0, blocksize);
kfree(inblockdata);
err_free_cipher:
crypto_free_blkcipher(cipher);
err_return:
return ret;
}
#define smask(step) ((1<<step)-1)
#define pstep(x, step) (((x)&smask(step))^(((x)>>step)&smask(step)))
#define parity_char(x) pstep(pstep(pstep((x), 4), 2), 1)
static void mit_des_fixup_key_parity(u8 key[8])
{
int i;
for (i = 0; i < 8; i++) {
key[i] &= 0xfe;
key[i] |= 1^parity_char(key[i]);
}
}
/*
* This is the des3 key derivation postprocess function
*/
u32 gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e,
struct xdr_netobj *randombits,
struct xdr_netobj *key)
{
int i;
u32 ret = EINVAL;
if (key->len != 24) {
dprintk("%s: key->len is %d\n", __func__, key->len);
goto err_out;
}
if (randombits->len != 21) {
dprintk("%s: randombits->len is %d\n",
__func__, randombits->len);
goto err_out;
}
/* take the seven bytes, move them around into the top 7 bits of the
8 key bytes, then compute the parity bits. Do this three times. */
for (i = 0; i < 3; i++) {
memcpy(key->data + i*8, randombits->data + i*7, 7);
key->data[i*8+7] = (((key->data[i*8]&1)<<1) |
((key->data[i*8+1]&1)<<2) |
((key->data[i*8+2]&1)<<3) |
((key->data[i*8+3]&1)<<4) |
((key->data[i*8+4]&1)<<5) |
((key->data[i*8+5]&1)<<6) |
((key->data[i*8+6]&1)<<7));
mit_des_fixup_key_parity(key->data + i*8);
}
ret = 0;
err_out:
return ret;
}
/*
* This is the aes key derivation postprocess function
*/
u32 gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e,
struct xdr_netobj *randombits,
struct xdr_netobj *key)
{
u32 ret = EINVAL;
if (key->len != 16 && key->len != 32) {
dprintk("%s: key->len is %d\n", __func__, key->len);
goto err_out;
}
if (randombits->len != 16 && randombits->len != 32) {
dprintk("%s: randombits->len is %d\n",
__func__, randombits->len);
goto err_out;
}
if (randombits->len != key->len) {
dprintk("%s: randombits->len is %d, key->len is %d\n",
__func__, randombits->len, key->len);
goto err_out;
}
memcpy(key->data, randombits->data, key->len);
ret = 0;
err_out:
return ret;
}

View file

@ -0,0 +1,788 @@
/*
* linux/net/sunrpc/gss_krb5_mech.c
*
* Copyright (c) 2001-2008 The Regents of the University of Michigan.
* All rights reserved.
*
* Andy Adamson <andros@umich.edu>
* J. Bruce Fields <bfields@umich.edu>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/err.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/sunrpc/auth.h>
#include <linux/sunrpc/gss_krb5.h>
#include <linux/sunrpc/xdr.h>
#include <linux/crypto.h>
#include <linux/sunrpc/gss_krb5_enctypes.h>
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
static struct gss_api_mech gss_kerberos_mech; /* forward declaration */
static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
/*
* DES (All DES enctypes are mapped to the same gss functionality)
*/
{
.etype = ENCTYPE_DES_CBC_RAW,
.ctype = CKSUMTYPE_RSA_MD5,
.name = "des-cbc-crc",
.encrypt_name = "cbc(des)",
.cksum_name = "md5",
.encrypt = krb5_encrypt,
.decrypt = krb5_decrypt,
.mk_key = NULL,
.signalg = SGN_ALG_DES_MAC_MD5,
.sealalg = SEAL_ALG_DES,
.keybytes = 7,
.keylength = 8,
.blocksize = 8,
.conflen = 8,
.cksumlength = 8,
.keyed_cksum = 0,
},
/*
* RC4-HMAC
*/
{
.etype = ENCTYPE_ARCFOUR_HMAC,
.ctype = CKSUMTYPE_HMAC_MD5_ARCFOUR,
.name = "rc4-hmac",
.encrypt_name = "ecb(arc4)",
.cksum_name = "hmac(md5)",
.encrypt = krb5_encrypt,
.decrypt = krb5_decrypt,
.mk_key = NULL,
.signalg = SGN_ALG_HMAC_MD5,
.sealalg = SEAL_ALG_MICROSOFT_RC4,
.keybytes = 16,
.keylength = 16,
.blocksize = 1,
.conflen = 8,
.cksumlength = 8,
.keyed_cksum = 1,
},
/*
* 3DES
*/
{
.etype = ENCTYPE_DES3_CBC_RAW,
.ctype = CKSUMTYPE_HMAC_SHA1_DES3,
.name = "des3-hmac-sha1",
.encrypt_name = "cbc(des3_ede)",
.cksum_name = "hmac(sha1)",
.encrypt = krb5_encrypt,
.decrypt = krb5_decrypt,
.mk_key = gss_krb5_des3_make_key,
.signalg = SGN_ALG_HMAC_SHA1_DES3_KD,
.sealalg = SEAL_ALG_DES3KD,
.keybytes = 21,
.keylength = 24,
.blocksize = 8,
.conflen = 8,
.cksumlength = 20,
.keyed_cksum = 1,
},
/*
* AES128
*/
{
.etype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
.ctype = CKSUMTYPE_HMAC_SHA1_96_AES128,
.name = "aes128-cts",
.encrypt_name = "cts(cbc(aes))",
.cksum_name = "hmac(sha1)",
.encrypt = krb5_encrypt,
.decrypt = krb5_decrypt,
.mk_key = gss_krb5_aes_make_key,
.encrypt_v2 = gss_krb5_aes_encrypt,
.decrypt_v2 = gss_krb5_aes_decrypt,
.signalg = -1,
.sealalg = -1,
.keybytes = 16,
.keylength = 16,
.blocksize = 16,
.conflen = 16,
.cksumlength = 12,
.keyed_cksum = 1,
},
/*
* AES256
*/
{
.etype = ENCTYPE_AES256_CTS_HMAC_SHA1_96,
.ctype = CKSUMTYPE_HMAC_SHA1_96_AES256,
.name = "aes256-cts",
.encrypt_name = "cts(cbc(aes))",
.cksum_name = "hmac(sha1)",
.encrypt = krb5_encrypt,
.decrypt = krb5_decrypt,
.mk_key = gss_krb5_aes_make_key,
.encrypt_v2 = gss_krb5_aes_encrypt,
.decrypt_v2 = gss_krb5_aes_decrypt,
.signalg = -1,
.sealalg = -1,
.keybytes = 32,
.keylength = 32,
.blocksize = 16,
.conflen = 16,
.cksumlength = 12,
.keyed_cksum = 1,
},
};
static const int num_supported_enctypes =
ARRAY_SIZE(supported_gss_krb5_enctypes);
static int
supported_gss_krb5_enctype(int etype)
{
int i;
for (i = 0; i < num_supported_enctypes; i++)
if (supported_gss_krb5_enctypes[i].etype == etype)
return 1;
return 0;
}
static const struct gss_krb5_enctype *
get_gss_krb5_enctype(int etype)
{
int i;
for (i = 0; i < num_supported_enctypes; i++)
if (supported_gss_krb5_enctypes[i].etype == etype)
return &supported_gss_krb5_enctypes[i];
return NULL;
}
static const void *
simple_get_bytes(const void *p, const void *end, void *res, int len)
{
const void *q = (const void *)((const char *)p + len);
if (unlikely(q > end || q < p))
return ERR_PTR(-EFAULT);
memcpy(res, p, len);
return q;
}
static const void *
simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
{
const void *q;
unsigned int len;
p = simple_get_bytes(p, end, &len, sizeof(len));
if (IS_ERR(p))
return p;
q = (const void *)((const char *)p + len);
if (unlikely(q > end || q < p))
return ERR_PTR(-EFAULT);
res->data = kmemdup(p, len, GFP_NOFS);
if (unlikely(res->data == NULL))
return ERR_PTR(-ENOMEM);
res->len = len;
return q;
}
static inline const void *
get_key(const void *p, const void *end,
struct krb5_ctx *ctx, struct crypto_blkcipher **res)
{
struct xdr_netobj key;
int alg;
p = simple_get_bytes(p, end, &alg, sizeof(alg));
if (IS_ERR(p))
goto out_err;
switch (alg) {
case ENCTYPE_DES_CBC_CRC:
case ENCTYPE_DES_CBC_MD4:
case ENCTYPE_DES_CBC_MD5:
/* Map all these key types to ENCTYPE_DES_CBC_RAW */
alg = ENCTYPE_DES_CBC_RAW;
break;
}
if (!supported_gss_krb5_enctype(alg)) {
printk(KERN_WARNING "gss_kerberos_mech: unsupported "
"encryption key algorithm %d\n", alg);
p = ERR_PTR(-EINVAL);
goto out_err;
}
p = simple_get_netobj(p, end, &key);
if (IS_ERR(p))
goto out_err;
*res = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
CRYPTO_ALG_ASYNC);
if (IS_ERR(*res)) {
printk(KERN_WARNING "gss_kerberos_mech: unable to initialize "
"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
*res = NULL;
goto out_err_free_key;
}
if (crypto_blkcipher_setkey(*res, key.data, key.len)) {
printk(KERN_WARNING "gss_kerberos_mech: error setting key for "
"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
goto out_err_free_tfm;
}
kfree(key.data);
return p;
out_err_free_tfm:
crypto_free_blkcipher(*res);
out_err_free_key:
kfree(key.data);
p = ERR_PTR(-EINVAL);
out_err:
return p;
}
static int
gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
{
int tmp;
p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
if (IS_ERR(p))
goto out_err;
/* Old format supports only DES! Any other enctype uses new format */
ctx->enctype = ENCTYPE_DES_CBC_RAW;
ctx->gk5e = get_gss_krb5_enctype(ctx->enctype);
if (ctx->gk5e == NULL) {
p = ERR_PTR(-EINVAL);
goto out_err;
}
/* The downcall format was designed before we completely understood
* the uses of the context fields; so it includes some stuff we
* just give some minimal sanity-checking, and some we ignore
* completely (like the next twenty bytes): */
if (unlikely(p + 20 > end || p + 20 < p)) {
p = ERR_PTR(-EFAULT);
goto out_err;
}
p += 20;
p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
if (IS_ERR(p))
goto out_err;
if (tmp != SGN_ALG_DES_MAC_MD5) {
p = ERR_PTR(-ENOSYS);
goto out_err;
}
p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
if (IS_ERR(p))
goto out_err;
if (tmp != SEAL_ALG_DES) {
p = ERR_PTR(-ENOSYS);
goto out_err;
}
p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
if (IS_ERR(p))
goto out_err;
p = simple_get_bytes(p, end, &ctx->seq_send, sizeof(ctx->seq_send));
if (IS_ERR(p))
goto out_err;
p = simple_get_netobj(p, end, &ctx->mech_used);
if (IS_ERR(p))
goto out_err;
p = get_key(p, end, ctx, &ctx->enc);
if (IS_ERR(p))
goto out_err_free_mech;
p = get_key(p, end, ctx, &ctx->seq);
if (IS_ERR(p))
goto out_err_free_key1;
if (p != end) {
p = ERR_PTR(-EFAULT);
goto out_err_free_key2;
}
return 0;
out_err_free_key2:
crypto_free_blkcipher(ctx->seq);
out_err_free_key1:
crypto_free_blkcipher(ctx->enc);
out_err_free_mech:
kfree(ctx->mech_used.data);
out_err:
return PTR_ERR(p);
}
static struct crypto_blkcipher *
context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key)
{
struct crypto_blkcipher *cp;
cp = crypto_alloc_blkcipher(cname, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(cp)) {
dprintk("gss_kerberos_mech: unable to initialize "
"crypto algorithm %s\n", cname);
return NULL;
}
if (crypto_blkcipher_setkey(cp, key, ctx->gk5e->keylength)) {
dprintk("gss_kerberos_mech: error setting key for "
"crypto algorithm %s\n", cname);
crypto_free_blkcipher(cp);
return NULL;
}
return cp;
}
static inline void
set_cdata(u8 cdata[GSS_KRB5_K5CLENGTH], u32 usage, u8 seed)
{
cdata[0] = (usage>>24)&0xff;
cdata[1] = (usage>>16)&0xff;
cdata[2] = (usage>>8)&0xff;
cdata[3] = usage&0xff;
cdata[4] = seed;
}
static int
context_derive_keys_des3(struct krb5_ctx *ctx, gfp_t gfp_mask)
{
struct xdr_netobj c, keyin, keyout;
u8 cdata[GSS_KRB5_K5CLENGTH];
u32 err;
c.len = GSS_KRB5_K5CLENGTH;
c.data = cdata;
keyin.data = ctx->Ksess;
keyin.len = ctx->gk5e->keylength;
keyout.len = ctx->gk5e->keylength;
/* seq uses the raw key */
ctx->seq = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
ctx->Ksess);
if (ctx->seq == NULL)
goto out_err;
ctx->enc = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
ctx->Ksess);
if (ctx->enc == NULL)
goto out_free_seq;
/* derive cksum */
set_cdata(cdata, KG_USAGE_SIGN, KEY_USAGE_SEED_CHECKSUM);
keyout.data = ctx->cksum;
err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
if (err) {
dprintk("%s: Error %d deriving cksum key\n",
__func__, err);
goto out_free_enc;
}
return 0;
out_free_enc:
crypto_free_blkcipher(ctx->enc);
out_free_seq:
crypto_free_blkcipher(ctx->seq);
out_err:
return -EINVAL;
}
/*
* Note that RC4 depends on deriving keys using the sequence
* number or the checksum of a token. Therefore, the final keys
* cannot be calculated until the token is being constructed!
*/
static int
context_derive_keys_rc4(struct krb5_ctx *ctx)
{
struct crypto_hash *hmac;
char sigkeyconstant[] = "signaturekey";
int slen = strlen(sigkeyconstant) + 1; /* include null terminator */
struct hash_desc desc;
struct scatterlist sg[1];
int err;
dprintk("RPC: %s: entered\n", __func__);
/*
* derive cksum (aka Ksign) key
*/
hmac = crypto_alloc_hash(ctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(hmac)) {
dprintk("%s: error %ld allocating hash '%s'\n",
__func__, PTR_ERR(hmac), ctx->gk5e->cksum_name);
err = PTR_ERR(hmac);
goto out_err;
}
err = crypto_hash_setkey(hmac, ctx->Ksess, ctx->gk5e->keylength);
if (err)
goto out_err_free_hmac;
sg_init_table(sg, 1);
sg_set_buf(sg, sigkeyconstant, slen);
desc.tfm = hmac;
desc.flags = 0;
err = crypto_hash_init(&desc);
if (err)
goto out_err_free_hmac;
err = crypto_hash_digest(&desc, sg, slen, ctx->cksum);
if (err)
goto out_err_free_hmac;
/*
* allocate hash, and blkciphers for data and seqnum encryption
*/
ctx->enc = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
CRYPTO_ALG_ASYNC);
if (IS_ERR(ctx->enc)) {
err = PTR_ERR(ctx->enc);
goto out_err_free_hmac;
}
ctx->seq = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
CRYPTO_ALG_ASYNC);
if (IS_ERR(ctx->seq)) {
crypto_free_blkcipher(ctx->enc);
err = PTR_ERR(ctx->seq);
goto out_err_free_hmac;
}
dprintk("RPC: %s: returning success\n", __func__);
err = 0;
out_err_free_hmac:
crypto_free_hash(hmac);
out_err:
dprintk("RPC: %s: returning %d\n", __func__, err);
return err;
}
static int
context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
{
struct xdr_netobj c, keyin, keyout;
u8 cdata[GSS_KRB5_K5CLENGTH];
u32 err;
c.len = GSS_KRB5_K5CLENGTH;
c.data = cdata;
keyin.data = ctx->Ksess;
keyin.len = ctx->gk5e->keylength;
keyout.len = ctx->gk5e->keylength;
/* initiator seal encryption */
set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
keyout.data = ctx->initiator_seal;
err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
if (err) {
dprintk("%s: Error %d deriving initiator_seal key\n",
__func__, err);
goto out_err;
}
ctx->initiator_enc = context_v2_alloc_cipher(ctx,
ctx->gk5e->encrypt_name,
ctx->initiator_seal);
if (ctx->initiator_enc == NULL)
goto out_err;
/* acceptor seal encryption */
set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
keyout.data = ctx->acceptor_seal;
err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
if (err) {
dprintk("%s: Error %d deriving acceptor_seal key\n",
__func__, err);
goto out_free_initiator_enc;
}
ctx->acceptor_enc = context_v2_alloc_cipher(ctx,
ctx->gk5e->encrypt_name,
ctx->acceptor_seal);
if (ctx->acceptor_enc == NULL)
goto out_free_initiator_enc;
/* initiator sign checksum */
set_cdata(cdata, KG_USAGE_INITIATOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
keyout.data = ctx->initiator_sign;
err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
if (err) {
dprintk("%s: Error %d deriving initiator_sign key\n",
__func__, err);
goto out_free_acceptor_enc;
}
/* acceptor sign checksum */
set_cdata(cdata, KG_USAGE_ACCEPTOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
keyout.data = ctx->acceptor_sign;
err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
if (err) {
dprintk("%s: Error %d deriving acceptor_sign key\n",
__func__, err);
goto out_free_acceptor_enc;
}
/* initiator seal integrity */
set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
keyout.data = ctx->initiator_integ;
err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
if (err) {
dprintk("%s: Error %d deriving initiator_integ key\n",
__func__, err);
goto out_free_acceptor_enc;
}
/* acceptor seal integrity */
set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
keyout.data = ctx->acceptor_integ;
err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
if (err) {
dprintk("%s: Error %d deriving acceptor_integ key\n",
__func__, err);
goto out_free_acceptor_enc;
}
switch (ctx->enctype) {
case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
ctx->initiator_enc_aux =
context_v2_alloc_cipher(ctx, "cbc(aes)",
ctx->initiator_seal);
if (ctx->initiator_enc_aux == NULL)
goto out_free_acceptor_enc;
ctx->acceptor_enc_aux =
context_v2_alloc_cipher(ctx, "cbc(aes)",
ctx->acceptor_seal);
if (ctx->acceptor_enc_aux == NULL) {
crypto_free_blkcipher(ctx->initiator_enc_aux);
goto out_free_acceptor_enc;
}
}
return 0;
out_free_acceptor_enc:
crypto_free_blkcipher(ctx->acceptor_enc);
out_free_initiator_enc:
crypto_free_blkcipher(ctx->initiator_enc);
out_err:
return -EINVAL;
}
static int
gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
gfp_t gfp_mask)
{
int keylen;
p = simple_get_bytes(p, end, &ctx->flags, sizeof(ctx->flags));
if (IS_ERR(p))
goto out_err;
ctx->initiate = ctx->flags & KRB5_CTX_FLAG_INITIATOR;
p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
if (IS_ERR(p))
goto out_err;
p = simple_get_bytes(p, end, &ctx->seq_send64, sizeof(ctx->seq_send64));
if (IS_ERR(p))
goto out_err;
/* set seq_send for use by "older" enctypes */
ctx->seq_send = ctx->seq_send64;
if (ctx->seq_send64 != ctx->seq_send) {
dprintk("%s: seq_send64 %lx, seq_send %x overflow?\n", __func__,
(unsigned long)ctx->seq_send64, ctx->seq_send);
p = ERR_PTR(-EINVAL);
goto out_err;
}
p = simple_get_bytes(p, end, &ctx->enctype, sizeof(ctx->enctype));
if (IS_ERR(p))
goto out_err;
/* Map ENCTYPE_DES3_CBC_SHA1 to ENCTYPE_DES3_CBC_RAW */
if (ctx->enctype == ENCTYPE_DES3_CBC_SHA1)
ctx->enctype = ENCTYPE_DES3_CBC_RAW;
ctx->gk5e = get_gss_krb5_enctype(ctx->enctype);
if (ctx->gk5e == NULL) {
dprintk("gss_kerberos_mech: unsupported krb5 enctype %u\n",
ctx->enctype);
p = ERR_PTR(-EINVAL);
goto out_err;
}
keylen = ctx->gk5e->keylength;
p = simple_get_bytes(p, end, ctx->Ksess, keylen);
if (IS_ERR(p))
goto out_err;
if (p != end) {
p = ERR_PTR(-EINVAL);
goto out_err;
}
ctx->mech_used.data = kmemdup(gss_kerberos_mech.gm_oid.data,
gss_kerberos_mech.gm_oid.len, gfp_mask);
if (unlikely(ctx->mech_used.data == NULL)) {
p = ERR_PTR(-ENOMEM);
goto out_err;
}
ctx->mech_used.len = gss_kerberos_mech.gm_oid.len;
switch (ctx->enctype) {
case ENCTYPE_DES3_CBC_RAW:
return context_derive_keys_des3(ctx, gfp_mask);
case ENCTYPE_ARCFOUR_HMAC:
return context_derive_keys_rc4(ctx);
case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
return context_derive_keys_new(ctx, gfp_mask);
default:
return -EINVAL;
}
out_err:
return PTR_ERR(p);
}
static int
gss_import_sec_context_kerberos(const void *p, size_t len,
struct gss_ctx *ctx_id,
time_t *endtime,
gfp_t gfp_mask)
{
const void *end = (const void *)((const char *)p + len);
struct krb5_ctx *ctx;
int ret;
ctx = kzalloc(sizeof(*ctx), gfp_mask);
if (ctx == NULL)
return -ENOMEM;
if (len == 85)
ret = gss_import_v1_context(p, end, ctx);
else
ret = gss_import_v2_context(p, end, ctx, gfp_mask);
if (ret == 0) {
ctx_id->internal_ctx_id = ctx;
if (endtime)
*endtime = ctx->endtime;
} else
kfree(ctx);
dprintk("RPC: %s: returning %d\n", __func__, ret);
return ret;
}
static void
gss_delete_sec_context_kerberos(void *internal_ctx) {
struct krb5_ctx *kctx = internal_ctx;
crypto_free_blkcipher(kctx->seq);
crypto_free_blkcipher(kctx->enc);
crypto_free_blkcipher(kctx->acceptor_enc);
crypto_free_blkcipher(kctx->initiator_enc);
crypto_free_blkcipher(kctx->acceptor_enc_aux);
crypto_free_blkcipher(kctx->initiator_enc_aux);
kfree(kctx->mech_used.data);
kfree(kctx);
}
static const struct gss_api_ops gss_kerberos_ops = {
.gss_import_sec_context = gss_import_sec_context_kerberos,
.gss_get_mic = gss_get_mic_kerberos,
.gss_verify_mic = gss_verify_mic_kerberos,
.gss_wrap = gss_wrap_kerberos,
.gss_unwrap = gss_unwrap_kerberos,
.gss_delete_sec_context = gss_delete_sec_context_kerberos,
};
static struct pf_desc gss_kerberos_pfs[] = {
[0] = {
.pseudoflavor = RPC_AUTH_GSS_KRB5,
.qop = GSS_C_QOP_DEFAULT,
.service = RPC_GSS_SVC_NONE,
.name = "krb5",
},
[1] = {
.pseudoflavor = RPC_AUTH_GSS_KRB5I,
.qop = GSS_C_QOP_DEFAULT,
.service = RPC_GSS_SVC_INTEGRITY,
.name = "krb5i",
},
[2] = {
.pseudoflavor = RPC_AUTH_GSS_KRB5P,
.qop = GSS_C_QOP_DEFAULT,
.service = RPC_GSS_SVC_PRIVACY,
.name = "krb5p",
},
};
MODULE_ALIAS("rpc-auth-gss-krb5");
MODULE_ALIAS("rpc-auth-gss-krb5i");
MODULE_ALIAS("rpc-auth-gss-krb5p");
MODULE_ALIAS("rpc-auth-gss-390003");
MODULE_ALIAS("rpc-auth-gss-390004");
MODULE_ALIAS("rpc-auth-gss-390005");
MODULE_ALIAS("rpc-auth-gss-1.2.840.113554.1.2.2");
static struct gss_api_mech gss_kerberos_mech = {
.gm_name = "krb5",
.gm_owner = THIS_MODULE,
.gm_oid = { 9, "\x2a\x86\x48\x86\xf7\x12\x01\x02\x02" },
.gm_ops = &gss_kerberos_ops,
.gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs),
.gm_pfs = gss_kerberos_pfs,
.gm_upcall_enctypes = KRB5_SUPPORTED_ENCTYPES,
};
static int __init init_kerberos_module(void)
{
int status;
status = gss_mech_register(&gss_kerberos_mech);
if (status)
printk("Failed to register kerberos gss mechanism!\n");
return status;
}
static void __exit cleanup_kerberos_module(void)
{
gss_mech_unregister(&gss_kerberos_mech);
}
MODULE_LICENSE("GPL");
module_init(init_kerberos_module);
module_exit(cleanup_kerberos_module);

View file

@ -0,0 +1,229 @@
/*
* linux/net/sunrpc/gss_krb5_seal.c
*
* Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5seal.c
*
* Copyright (c) 2000-2008 The Regents of the University of Michigan.
* All rights reserved.
*
* Andy Adamson <andros@umich.edu>
* J. Bruce Fields <bfields@umich.edu>
*/
/*
* Copyright 1993 by OpenVision Technologies, Inc.
*
* Permission to use, copy, modify, distribute, and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appears in all copies and
* that both that copyright notice and this permission notice appear in
* supporting documentation, and that the name of OpenVision not be used
* in advertising or publicity pertaining to distribution of the software
* without specific, written prior permission. OpenVision makes no
* representations about the suitability of this software for any
* purpose. It is provided "as is" without express or implied warranty.
*
* OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
* EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
* USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THIS SOFTWARE.
*/
/*
* Copyright (C) 1998 by the FundsXpress, INC.
*
* All rights reserved.
*
* Export of this software from the United States of America may require
* a specific license from the United States Government. It is the
* responsibility of any person or organization contemplating export to
* obtain such a license before exporting.
*
* WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
* distribute this software and its documentation for any purpose and
* without fee is hereby granted, provided that the above copyright
* notice appear in all copies and that both that copyright notice and
* this permission notice appear in supporting documentation, and that
* the name of FundsXpress. not be used in advertising or publicity pertaining
* to distribution of the software without specific, written prior
* permission. FundsXpress makes no representations about the suitability of
* this software for any purpose. It is provided "as is" without express
* or implied warranty.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
#include <linux/types.h>
#include <linux/jiffies.h>
#include <linux/sunrpc/gss_krb5.h>
#include <linux/random.h>
#include <linux/crypto.h>
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
DEFINE_SPINLOCK(krb5_seq_lock);
static void *
setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
{
u16 *ptr;
void *krb5_hdr;
int body_size = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
token->len = g_token_size(&ctx->mech_used, body_size);
ptr = (u16 *)token->data;
g_make_token_header(&ctx->mech_used, body_size, (unsigned char **)&ptr);
/* ptr now at start of header described in rfc 1964, section 1.2.1: */
krb5_hdr = ptr;
*ptr++ = KG_TOK_MIC_MSG;
/*
* signalg is stored as if it were converted from LE to host endian, even
* though it's an opaque pair of bytes according to the RFC.
*/
*ptr++ = (__force u16)cpu_to_le16(ctx->gk5e->signalg);
*ptr++ = SEAL_ALG_NONE;
*ptr = 0xffff;
return krb5_hdr;
}
static void *
setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
{
u16 *ptr;
void *krb5_hdr;
u8 *p, flags = 0x00;
if ((ctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0)
flags |= 0x01;
if (ctx->flags & KRB5_CTX_FLAG_ACCEPTOR_SUBKEY)
flags |= 0x04;
/* Per rfc 4121, sec 4.2.6.1, there is no header,
* just start the token */
krb5_hdr = ptr = (u16 *)token->data;
*ptr++ = KG2_TOK_MIC;
p = (u8 *)ptr;
*p++ = flags;
*p++ = 0xff;
ptr = (u16 *)p;
*ptr++ = 0xffff;
*ptr = 0xffff;
token->len = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
return krb5_hdr;
}
static u32
gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
struct xdr_netobj *token)
{
char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
.data = cksumdata};
void *ptr;
s32 now;
u32 seq_send;
u8 *cksumkey;
dprintk("RPC: %s\n", __func__);
BUG_ON(ctx == NULL);
now = get_seconds();
ptr = setup_token(ctx, token);
if (ctx->gk5e->keyed_cksum)
cksumkey = ctx->cksum;
else
cksumkey = NULL;
if (make_checksum(ctx, ptr, 8, text, 0, cksumkey,
KG_USAGE_SIGN, &md5cksum))
return GSS_S_FAILURE;
memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
spin_lock(&krb5_seq_lock);
seq_send = ctx->seq_send++;
spin_unlock(&krb5_seq_lock);
if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff,
seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))
return GSS_S_FAILURE;
return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
}
static u32
gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
struct xdr_netobj *token)
{
char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
struct xdr_netobj cksumobj = { .len = sizeof(cksumdata),
.data = cksumdata};
void *krb5_hdr;
s32 now;
u64 seq_send;
u8 *cksumkey;
unsigned int cksum_usage;
dprintk("RPC: %s\n", __func__);
krb5_hdr = setup_token_v2(ctx, token);
/* Set up the sequence number. Now 64-bits in clear
* text and w/o direction indicator */
spin_lock(&krb5_seq_lock);
seq_send = ctx->seq_send64++;
spin_unlock(&krb5_seq_lock);
*((__be64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send);
if (ctx->initiate) {
cksumkey = ctx->initiator_sign;
cksum_usage = KG_USAGE_INITIATOR_SIGN;
} else {
cksumkey = ctx->acceptor_sign;
cksum_usage = KG_USAGE_ACCEPTOR_SIGN;
}
if (make_checksum_v2(ctx, krb5_hdr, GSS_KRB5_TOK_HDR_LEN,
text, 0, cksumkey, cksum_usage, &cksumobj))
return GSS_S_FAILURE;
memcpy(krb5_hdr + GSS_KRB5_TOK_HDR_LEN, cksumobj.data, cksumobj.len);
now = get_seconds();
return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
}
u32
gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
struct xdr_netobj *token)
{
struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
switch (ctx->enctype) {
default:
BUG();
case ENCTYPE_DES_CBC_RAW:
case ENCTYPE_DES3_CBC_RAW:
case ENCTYPE_ARCFOUR_HMAC:
return gss_get_mic_v1(ctx, text, token);
case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
return gss_get_mic_v2(ctx, text, token);
}
}

View file

@ -0,0 +1,166 @@
/*
* linux/net/sunrpc/gss_krb5_seqnum.c
*
* Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/util_seqnum.c
*
* Copyright (c) 2000 The Regents of the University of Michigan.
* All rights reserved.
*
* Andy Adamson <andros@umich.edu>
*/
/*
* Copyright 1993 by OpenVision Technologies, Inc.
*
* Permission to use, copy, modify, distribute, and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appears in all copies and
* that both that copyright notice and this permission notice appear in
* supporting documentation, and that the name of OpenVision not be used
* in advertising or publicity pertaining to distribution of the software
* without specific, written prior permission. OpenVision makes no
* representations about the suitability of this software for any
* purpose. It is provided "as is" without express or implied warranty.
*
* OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
* EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
* USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THIS SOFTWARE.
*/
#include <linux/types.h>
#include <linux/sunrpc/gss_krb5.h>
#include <linux/crypto.h>
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
static s32
krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
unsigned char *cksum, unsigned char *buf)
{
struct crypto_blkcipher *cipher;
unsigned char plain[8];
s32 code;
dprintk("RPC: %s:\n", __func__);
cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
CRYPTO_ALG_ASYNC);
if (IS_ERR(cipher))
return PTR_ERR(cipher);
plain[0] = (unsigned char) ((seqnum >> 24) & 0xff);
plain[1] = (unsigned char) ((seqnum >> 16) & 0xff);
plain[2] = (unsigned char) ((seqnum >> 8) & 0xff);
plain[3] = (unsigned char) ((seqnum >> 0) & 0xff);
plain[4] = direction;
plain[5] = direction;
plain[6] = direction;
plain[7] = direction;
code = krb5_rc4_setup_seq_key(kctx, cipher, cksum);
if (code)
goto out;
code = krb5_encrypt(cipher, cksum, plain, buf, 8);
out:
crypto_free_blkcipher(cipher);
return code;
}
s32
krb5_make_seq_num(struct krb5_ctx *kctx,
struct crypto_blkcipher *key,
int direction,
u32 seqnum,
unsigned char *cksum, unsigned char *buf)
{
unsigned char plain[8];
if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
return krb5_make_rc4_seq_num(kctx, direction, seqnum,
cksum, buf);
plain[0] = (unsigned char) (seqnum & 0xff);
plain[1] = (unsigned char) ((seqnum >> 8) & 0xff);
plain[2] = (unsigned char) ((seqnum >> 16) & 0xff);
plain[3] = (unsigned char) ((seqnum >> 24) & 0xff);
plain[4] = direction;
plain[5] = direction;
plain[6] = direction;
plain[7] = direction;
return krb5_encrypt(key, cksum, plain, buf, 8);
}
static s32
krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
unsigned char *buf, int *direction, s32 *seqnum)
{
struct crypto_blkcipher *cipher;
unsigned char plain[8];
s32 code;
dprintk("RPC: %s:\n", __func__);
cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
CRYPTO_ALG_ASYNC);
if (IS_ERR(cipher))
return PTR_ERR(cipher);
code = krb5_rc4_setup_seq_key(kctx, cipher, cksum);
if (code)
goto out;
code = krb5_decrypt(cipher, cksum, buf, plain, 8);
if (code)
goto out;
if ((plain[4] != plain[5]) || (plain[4] != plain[6])
|| (plain[4] != plain[7])) {
code = (s32)KG_BAD_SEQ;
goto out;
}
*direction = plain[4];
*seqnum = ((plain[0] << 24) | (plain[1] << 16) |
(plain[2] << 8) | (plain[3]));
out:
crypto_free_blkcipher(cipher);
return code;
}
s32
krb5_get_seq_num(struct krb5_ctx *kctx,
unsigned char *cksum,
unsigned char *buf,
int *direction, u32 *seqnum)
{
s32 code;
unsigned char plain[8];
struct crypto_blkcipher *key = kctx->seq;
dprintk("RPC: krb5_get_seq_num:\n");
if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
return krb5_get_rc4_seq_num(kctx, cksum, buf,
direction, seqnum);
if ((code = krb5_decrypt(key, cksum, buf, plain, 8)))
return code;
if ((plain[4] != plain[5]) || (plain[4] != plain[6]) ||
(plain[4] != plain[7]))
return (s32)KG_BAD_SEQ;
*direction = plain[4];
*seqnum = ((plain[0]) |
(plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24));
return 0;
}

View file

@ -0,0 +1,226 @@
/*
* linux/net/sunrpc/gss_krb5_unseal.c
*
* Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5unseal.c
*
* Copyright (c) 2000-2008 The Regents of the University of Michigan.
* All rights reserved.
*
* Andy Adamson <andros@umich.edu>
*/
/*
* Copyright 1993 by OpenVision Technologies, Inc.
*
* Permission to use, copy, modify, distribute, and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appears in all copies and
* that both that copyright notice and this permission notice appear in
* supporting documentation, and that the name of OpenVision not be used
* in advertising or publicity pertaining to distribution of the software
* without specific, written prior permission. OpenVision makes no
* representations about the suitability of this software for any
* purpose. It is provided "as is" without express or implied warranty.
*
* OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
* EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
* USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THIS SOFTWARE.
*/
/*
* Copyright (C) 1998 by the FundsXpress, INC.
*
* All rights reserved.
*
* Export of this software from the United States of America may require
* a specific license from the United States Government. It is the
* responsibility of any person or organization contemplating export to
* obtain such a license before exporting.
*
* WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
* distribute this software and its documentation for any purpose and
* without fee is hereby granted, provided that the above copyright
* notice appear in all copies and that both that copyright notice and
* this permission notice appear in supporting documentation, and that
* the name of FundsXpress. not be used in advertising or publicity pertaining
* to distribution of the software without specific, written prior
* permission. FundsXpress makes no representations about the suitability of
* this software for any purpose. It is provided "as is" without express
* or implied warranty.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
#include <linux/types.h>
#include <linux/jiffies.h>
#include <linux/sunrpc/gss_krb5.h>
#include <linux/crypto.h>
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
/* read_token is a mic token, and message_buffer is the data that the mic was
* supposedly taken over. */
static u32
gss_verify_mic_v1(struct krb5_ctx *ctx,
struct xdr_buf *message_buffer, struct xdr_netobj *read_token)
{
int signalg;
int sealalg;
char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
.data = cksumdata};
s32 now;
int direction;
u32 seqnum;
unsigned char *ptr = (unsigned char *)read_token->data;
int bodysize;
u8 *cksumkey;
dprintk("RPC: krb5_read_token\n");
if (g_verify_token_header(&ctx->mech_used, &bodysize, &ptr,
read_token->len))
return GSS_S_DEFECTIVE_TOKEN;
if ((ptr[0] != ((KG_TOK_MIC_MSG >> 8) & 0xff)) ||
(ptr[1] != (KG_TOK_MIC_MSG & 0xff)))
return GSS_S_DEFECTIVE_TOKEN;
/* XXX sanity-check bodysize?? */
signalg = ptr[2] + (ptr[3] << 8);
if (signalg != ctx->gk5e->signalg)
return GSS_S_DEFECTIVE_TOKEN;
sealalg = ptr[4] + (ptr[5] << 8);
if (sealalg != SEAL_ALG_NONE)
return GSS_S_DEFECTIVE_TOKEN;
if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
return GSS_S_DEFECTIVE_TOKEN;
if (ctx->gk5e->keyed_cksum)
cksumkey = ctx->cksum;
else
cksumkey = NULL;
if (make_checksum(ctx, ptr, 8, message_buffer, 0,
cksumkey, KG_USAGE_SIGN, &md5cksum))
return GSS_S_FAILURE;
if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
ctx->gk5e->cksumlength))
return GSS_S_BAD_SIG;
/* it got through unscathed. Make sure the context is unexpired */
now = get_seconds();
if (now > ctx->endtime)
return GSS_S_CONTEXT_EXPIRED;
/* do sequencing checks */
if (krb5_get_seq_num(ctx, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
&direction, &seqnum))
return GSS_S_FAILURE;
if ((ctx->initiate && direction != 0xff) ||
(!ctx->initiate && direction != 0))
return GSS_S_BAD_SIG;
return GSS_S_COMPLETE;
}
static u32
gss_verify_mic_v2(struct krb5_ctx *ctx,
struct xdr_buf *message_buffer, struct xdr_netobj *read_token)
{
char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
struct xdr_netobj cksumobj = {.len = sizeof(cksumdata),
.data = cksumdata};
s32 now;
u8 *ptr = read_token->data;
u8 *cksumkey;
u8 flags;
int i;
unsigned int cksum_usage;
dprintk("RPC: %s\n", __func__);
if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_MIC)
return GSS_S_DEFECTIVE_TOKEN;
flags = ptr[2];
if ((!ctx->initiate && (flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)) ||
(ctx->initiate && !(flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)))
return GSS_S_BAD_SIG;
if (flags & KG2_TOKEN_FLAG_SEALED) {
dprintk("%s: token has unexpected sealed flag\n", __func__);
return GSS_S_FAILURE;
}
for (i = 3; i < 8; i++)
if (ptr[i] != 0xff)
return GSS_S_DEFECTIVE_TOKEN;
if (ctx->initiate) {
cksumkey = ctx->acceptor_sign;
cksum_usage = KG_USAGE_ACCEPTOR_SIGN;
} else {
cksumkey = ctx->initiator_sign;
cksum_usage = KG_USAGE_INITIATOR_SIGN;
}
if (make_checksum_v2(ctx, ptr, GSS_KRB5_TOK_HDR_LEN, message_buffer, 0,
cksumkey, cksum_usage, &cksumobj))
return GSS_S_FAILURE;
if (memcmp(cksumobj.data, ptr + GSS_KRB5_TOK_HDR_LEN,
ctx->gk5e->cksumlength))
return GSS_S_BAD_SIG;
/* it got through unscathed. Make sure the context is unexpired */
now = get_seconds();
if (now > ctx->endtime)
return GSS_S_CONTEXT_EXPIRED;
/*
* NOTE: the sequence number at ptr + 8 is skipped, rpcsec_gss
* doesn't want it checked; see page 6 of rfc 2203.
*/
return GSS_S_COMPLETE;
}
u32
gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
struct xdr_buf *message_buffer,
struct xdr_netobj *read_token)
{
struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
switch (ctx->enctype) {
default:
BUG();
case ENCTYPE_DES_CBC_RAW:
case ENCTYPE_DES3_CBC_RAW:
case ENCTYPE_ARCFOUR_HMAC:
return gss_verify_mic_v1(ctx, message_buffer, read_token);
case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
return gss_verify_mic_v2(ctx, message_buffer, read_token);
}
}

View file

@ -0,0 +1,626 @@
/*
* COPYRIGHT (c) 2008
* The Regents of the University of Michigan
* ALL RIGHTS RESERVED
*
* Permission is granted to use, copy, create derivative works
* and redistribute this software and such derivative works
* for any purpose, so long as the name of The University of
* Michigan is not used in any advertising or publicity
* pertaining to the use of distribution of this software
* without specific, written prior authorization. If the
* above copyright notice or any other identification of the
* University of Michigan is included in any copy of any
* portion of this software, then the disclaimer below must
* also be included.
*
* THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION
* FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY
* PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY OF
* MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
* WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
* REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE
* FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR
* CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING
* OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN
* IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGES.
*/
#include <linux/types.h>
#include <linux/jiffies.h>
#include <linux/sunrpc/gss_krb5.h>
#include <linux/random.h>
#include <linux/pagemap.h>
#include <linux/crypto.h>
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
static inline int
gss_krb5_padding(int blocksize, int length)
{
return blocksize - (length % blocksize);
}
static inline void
gss_krb5_add_padding(struct xdr_buf *buf, int offset, int blocksize)
{
int padding = gss_krb5_padding(blocksize, buf->len - offset);
char *p;
struct kvec *iov;
if (buf->page_len || buf->tail[0].iov_len)
iov = &buf->tail[0];
else
iov = &buf->head[0];
p = iov->iov_base + iov->iov_len;
iov->iov_len += padding;
buf->len += padding;
memset(p, padding, padding);
}
static inline int
gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize)
{
u8 *ptr;
u8 pad;
size_t len = buf->len;
if (len <= buf->head[0].iov_len) {
pad = *(u8 *)(buf->head[0].iov_base + len - 1);
if (pad > buf->head[0].iov_len)
return -EINVAL;
buf->head[0].iov_len -= pad;
goto out;
} else
len -= buf->head[0].iov_len;
if (len <= buf->page_len) {
unsigned int last = (buf->page_base + len - 1)
>>PAGE_CACHE_SHIFT;
unsigned int offset = (buf->page_base + len - 1)
& (PAGE_CACHE_SIZE - 1);
ptr = kmap_atomic(buf->pages[last]);
pad = *(ptr + offset);
kunmap_atomic(ptr);
goto out;
} else
len -= buf->page_len;
BUG_ON(len > buf->tail[0].iov_len);
pad = *(u8 *)(buf->tail[0].iov_base + len - 1);
out:
/* XXX: NOTE: we do not adjust the page lengths--they represent
* a range of data in the real filesystem page cache, and we need
* to know that range so the xdr code can properly place read data.
* However adjusting the head length, as we do above, is harmless.
* In the case of a request that fits into a single page, the server
* also uses length and head length together to determine the original
* start of the request to copy the request for deferal; so it's
* easier on the server if we adjust head and tail length in tandem.
* It's not really a problem that we don't fool with the page and
* tail lengths, though--at worst badly formed xdr might lead the
* server to attempt to parse the padding.
* XXX: Document all these weird requirements for gss mechanism
* wrap/unwrap functions. */
if (pad > blocksize)
return -EINVAL;
if (buf->len > pad)
buf->len -= pad;
else
return -EINVAL;
return 0;
}
void
gss_krb5_make_confounder(char *p, u32 conflen)
{
static u64 i = 0;
u64 *q = (u64 *)p;
/* rfc1964 claims this should be "random". But all that's really
* necessary is that it be unique. And not even that is necessary in
* our case since our "gssapi" implementation exists only to support
* rpcsec_gss, so we know that the only buffers we will ever encrypt
* already begin with a unique sequence number. Just to hedge my bets
* I'll make a half-hearted attempt at something unique, but ensuring
* uniqueness would mean worrying about atomicity and rollover, and I
* don't care enough. */
/* initialize to random value */
if (i == 0) {
i = prandom_u32();
i = (i << 32) | prandom_u32();
}
switch (conflen) {
case 16:
*q++ = i++;
/* fall through */
case 8:
*q++ = i++;
break;
default:
BUG();
}
}
/* Assumptions: the head and tail of inbuf are ours to play with.
* The pages, however, may be real pages in the page cache and we replace
* them with scratch pages from **pages before writing to them. */
/* XXX: obviously the above should be documentation of wrap interface,
* and shouldn't be in this kerberos-specific file. */
/* XXX factor out common code with seal/unseal. */
static u32
gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
struct xdr_buf *buf, struct page **pages)
{
char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
.data = cksumdata};
int blocksize = 0, plainlen;
unsigned char *ptr, *msg_start;
s32 now;
int headlen;
struct page **tmp_pages;
u32 seq_send;
u8 *cksumkey;
u32 conflen = kctx->gk5e->conflen;
dprintk("RPC: %s\n", __func__);
now = get_seconds();
blocksize = crypto_blkcipher_blocksize(kctx->enc);
gss_krb5_add_padding(buf, offset, blocksize);
BUG_ON((buf->len - offset) % blocksize);
plainlen = conflen + buf->len - offset;
headlen = g_token_size(&kctx->mech_used,
GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength + plainlen) -
(buf->len - offset);
ptr = buf->head[0].iov_base + offset;
/* shift data to make room for header. */
xdr_extend_head(buf, offset, headlen);
/* XXX Would be cleverer to encrypt while copying. */
BUG_ON((buf->len - offset - headlen) % blocksize);
g_make_token_header(&kctx->mech_used,
GSS_KRB5_TOK_HDR_LEN +
kctx->gk5e->cksumlength + plainlen, &ptr);
/* ptr now at header described in rfc 1964, section 1.2.1: */
ptr[0] = (unsigned char) ((KG_TOK_WRAP_MSG >> 8) & 0xff);
ptr[1] = (unsigned char) (KG_TOK_WRAP_MSG & 0xff);
msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength;
/*
* signalg and sealalg are stored as if they were converted from LE
* to host endian, even though they're opaque pairs of bytes according
* to the RFC.
*/
*(__le16 *)(ptr + 2) = cpu_to_le16(kctx->gk5e->signalg);
*(__le16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg);
ptr[6] = 0xff;
ptr[7] = 0xff;
gss_krb5_make_confounder(msg_start, conflen);
if (kctx->gk5e->keyed_cksum)
cksumkey = kctx->cksum;
else
cksumkey = NULL;
/* XXXJBF: UGH!: */
tmp_pages = buf->pages;
buf->pages = pages;
if (make_checksum(kctx, ptr, 8, buf, offset + headlen - conflen,
cksumkey, KG_USAGE_SEAL, &md5cksum))
return GSS_S_FAILURE;
buf->pages = tmp_pages;
memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
spin_lock(&krb5_seq_lock);
seq_send = kctx->seq_send++;
spin_unlock(&krb5_seq_lock);
/* XXX would probably be more efficient to compute checksum
* and encrypt at the same time: */
if ((krb5_make_seq_num(kctx, kctx->seq, kctx->initiate ? 0 : 0xff,
seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)))
return GSS_S_FAILURE;
if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
struct crypto_blkcipher *cipher;
int err;
cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
CRYPTO_ALG_ASYNC);
if (IS_ERR(cipher))
return GSS_S_FAILURE;
krb5_rc4_setup_enc_key(kctx, cipher, seq_send);
err = gss_encrypt_xdr_buf(cipher, buf,
offset + headlen - conflen, pages);
crypto_free_blkcipher(cipher);
if (err)
return GSS_S_FAILURE;
} else {
if (gss_encrypt_xdr_buf(kctx->enc, buf,
offset + headlen - conflen, pages))
return GSS_S_FAILURE;
}
return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
}
static u32
gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
{
int signalg;
int sealalg;
char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
.data = cksumdata};
s32 now;
int direction;
s32 seqnum;
unsigned char *ptr;
int bodysize;
void *data_start, *orig_start;
int data_len;
int blocksize;
u32 conflen = kctx->gk5e->conflen;
int crypt_offset;
u8 *cksumkey;
dprintk("RPC: gss_unwrap_kerberos\n");
ptr = (u8 *)buf->head[0].iov_base + offset;
if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr,
buf->len - offset))
return GSS_S_DEFECTIVE_TOKEN;
if ((ptr[0] != ((KG_TOK_WRAP_MSG >> 8) & 0xff)) ||
(ptr[1] != (KG_TOK_WRAP_MSG & 0xff)))
return GSS_S_DEFECTIVE_TOKEN;
/* XXX sanity-check bodysize?? */
/* get the sign and seal algorithms */
signalg = ptr[2] + (ptr[3] << 8);
if (signalg != kctx->gk5e->signalg)
return GSS_S_DEFECTIVE_TOKEN;
sealalg = ptr[4] + (ptr[5] << 8);
if (sealalg != kctx->gk5e->sealalg)
return GSS_S_DEFECTIVE_TOKEN;
if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
return GSS_S_DEFECTIVE_TOKEN;
/*
* Data starts after token header and checksum. ptr points
* to the beginning of the token header
*/
crypt_offset = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) -
(unsigned char *)buf->head[0].iov_base;
/*
* Need plaintext seqnum to derive encryption key for arcfour-hmac
*/
if (krb5_get_seq_num(kctx, ptr + GSS_KRB5_TOK_HDR_LEN,
ptr + 8, &direction, &seqnum))
return GSS_S_BAD_SIG;
if ((kctx->initiate && direction != 0xff) ||
(!kctx->initiate && direction != 0))
return GSS_S_BAD_SIG;
if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
struct crypto_blkcipher *cipher;
int err;
cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
CRYPTO_ALG_ASYNC);
if (IS_ERR(cipher))
return GSS_S_FAILURE;
krb5_rc4_setup_enc_key(kctx, cipher, seqnum);
err = gss_decrypt_xdr_buf(cipher, buf, crypt_offset);
crypto_free_blkcipher(cipher);
if (err)
return GSS_S_DEFECTIVE_TOKEN;
} else {
if (gss_decrypt_xdr_buf(kctx->enc, buf, crypt_offset))
return GSS_S_DEFECTIVE_TOKEN;
}
if (kctx->gk5e->keyed_cksum)
cksumkey = kctx->cksum;
else
cksumkey = NULL;
if (make_checksum(kctx, ptr, 8, buf, crypt_offset,
cksumkey, KG_USAGE_SEAL, &md5cksum))
return GSS_S_FAILURE;
if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
kctx->gk5e->cksumlength))
return GSS_S_BAD_SIG;
/* it got through unscathed. Make sure the context is unexpired */
now = get_seconds();
if (now > kctx->endtime)
return GSS_S_CONTEXT_EXPIRED;
/* do sequencing checks */
/* Copy the data back to the right position. XXX: Would probably be
* better to copy and encrypt at the same time. */
blocksize = crypto_blkcipher_blocksize(kctx->enc);
data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) +
conflen;
orig_start = buf->head[0].iov_base + offset;
data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
memmove(orig_start, data_start, data_len);
buf->head[0].iov_len -= (data_start - orig_start);
buf->len -= (data_start - orig_start);
if (gss_krb5_remove_padding(buf, blocksize))
return GSS_S_DEFECTIVE_TOKEN;
return GSS_S_COMPLETE;
}
/*
* We can shift data by up to LOCAL_BUF_LEN bytes in a pass. If we need
* to do more than that, we shift repeatedly. Kevin Coffman reports
* seeing 28 bytes as the value used by Microsoft clients and servers
* with AES, so this constant is chosen to allow handling 28 in one pass
* without using too much stack space.
*
* If that proves to a problem perhaps we could use a more clever
* algorithm.
*/
#define LOCAL_BUF_LEN 32u
static void rotate_buf_a_little(struct xdr_buf *buf, unsigned int shift)
{
char head[LOCAL_BUF_LEN];
char tmp[LOCAL_BUF_LEN];
unsigned int this_len, i;
BUG_ON(shift > LOCAL_BUF_LEN);
read_bytes_from_xdr_buf(buf, 0, head, shift);
for (i = 0; i + shift < buf->len; i += LOCAL_BUF_LEN) {
this_len = min(LOCAL_BUF_LEN, buf->len - (i + shift));
read_bytes_from_xdr_buf(buf, i+shift, tmp, this_len);
write_bytes_to_xdr_buf(buf, i, tmp, this_len);
}
write_bytes_to_xdr_buf(buf, buf->len - shift, head, shift);
}
static void _rotate_left(struct xdr_buf *buf, unsigned int shift)
{
int shifted = 0;
int this_shift;
shift %= buf->len;
while (shifted < shift) {
this_shift = min(shift - shifted, LOCAL_BUF_LEN);
rotate_buf_a_little(buf, this_shift);
shifted += this_shift;
}
}
static void rotate_left(u32 base, struct xdr_buf *buf, unsigned int shift)
{
struct xdr_buf subbuf;
xdr_buf_subsegment(buf, &subbuf, base, buf->len - base);
_rotate_left(&subbuf, shift);
}
static u32
gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
struct xdr_buf *buf, struct page **pages)
{
int blocksize;
u8 *ptr, *plainhdr;
s32 now;
u8 flags = 0x00;
__be16 *be16ptr;
__be64 *be64ptr;
u32 err;
dprintk("RPC: %s\n", __func__);
if (kctx->gk5e->encrypt_v2 == NULL)
return GSS_S_FAILURE;
/* make room for gss token header */
if (xdr_extend_head(buf, offset, GSS_KRB5_TOK_HDR_LEN))
return GSS_S_FAILURE;
/* construct gss token header */
ptr = plainhdr = buf->head[0].iov_base + offset;
*ptr++ = (unsigned char) ((KG2_TOK_WRAP>>8) & 0xff);
*ptr++ = (unsigned char) (KG2_TOK_WRAP & 0xff);
if ((kctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0)
flags |= KG2_TOKEN_FLAG_SENTBYACCEPTOR;
if ((kctx->flags & KRB5_CTX_FLAG_ACCEPTOR_SUBKEY) != 0)
flags |= KG2_TOKEN_FLAG_ACCEPTORSUBKEY;
/* We always do confidentiality in wrap tokens */
flags |= KG2_TOKEN_FLAG_SEALED;
*ptr++ = flags;
*ptr++ = 0xff;
be16ptr = (__be16 *)ptr;
blocksize = crypto_blkcipher_blocksize(kctx->acceptor_enc);
*be16ptr++ = 0;
/* "inner" token header always uses 0 for RRC */
*be16ptr++ = 0;
be64ptr = (__be64 *)be16ptr;
spin_lock(&krb5_seq_lock);
*be64ptr = cpu_to_be64(kctx->seq_send64++);
spin_unlock(&krb5_seq_lock);
err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, pages);
if (err)
return err;
now = get_seconds();
return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
}
static u32
gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
{
s32 now;
u8 *ptr;
u8 flags = 0x00;
u16 ec, rrc;
int err;
u32 headskip, tailskip;
u8 decrypted_hdr[GSS_KRB5_TOK_HDR_LEN];
unsigned int movelen;
dprintk("RPC: %s\n", __func__);
if (kctx->gk5e->decrypt_v2 == NULL)
return GSS_S_FAILURE;
ptr = buf->head[0].iov_base + offset;
if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_WRAP)
return GSS_S_DEFECTIVE_TOKEN;
flags = ptr[2];
if ((!kctx->initiate && (flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)) ||
(kctx->initiate && !(flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)))
return GSS_S_BAD_SIG;
if ((flags & KG2_TOKEN_FLAG_SEALED) == 0) {
dprintk("%s: token missing expected sealed flag\n", __func__);
return GSS_S_DEFECTIVE_TOKEN;
}
if (ptr[3] != 0xff)
return GSS_S_DEFECTIVE_TOKEN;
ec = be16_to_cpup((__be16 *)(ptr + 4));
rrc = be16_to_cpup((__be16 *)(ptr + 6));
/*
* NOTE: the sequence number at ptr + 8 is skipped, rpcsec_gss
* doesn't want it checked; see page 6 of rfc 2203.
*/
if (rrc != 0)
rotate_left(offset + 16, buf, rrc);
err = (*kctx->gk5e->decrypt_v2)(kctx, offset, buf,
&headskip, &tailskip);
if (err)
return GSS_S_FAILURE;
/*
* Retrieve the decrypted gss token header and verify
* it against the original
*/
err = read_bytes_from_xdr_buf(buf,
buf->len - GSS_KRB5_TOK_HDR_LEN - tailskip,
decrypted_hdr, GSS_KRB5_TOK_HDR_LEN);
if (err) {
dprintk("%s: error %u getting decrypted_hdr\n", __func__, err);
return GSS_S_FAILURE;
}
if (memcmp(ptr, decrypted_hdr, 6)
|| memcmp(ptr + 8, decrypted_hdr + 8, 8)) {
dprintk("%s: token hdr, plaintext hdr mismatch!\n", __func__);
return GSS_S_FAILURE;
}
/* do sequencing checks */
/* it got through unscathed. Make sure the context is unexpired */
now = get_seconds();
if (now > kctx->endtime)
return GSS_S_CONTEXT_EXPIRED;
/*
* Move the head data back to the right position in xdr_buf.
* We ignore any "ec" data since it might be in the head or
* the tail, and we really don't need to deal with it.
* Note that buf->head[0].iov_len may indicate the available
* head buffer space rather than that actually occupied.
*/
movelen = min_t(unsigned int, buf->head[0].iov_len, buf->len);
movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip;
BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
buf->head[0].iov_len);
memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen);
buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip;
buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip;
/* Trim off the trailing "extra count" and checksum blob */
xdr_buf_trim(buf, ec + GSS_KRB5_TOK_HDR_LEN + tailskip);
return GSS_S_COMPLETE;
}
u32
gss_wrap_kerberos(struct gss_ctx *gctx, int offset,
struct xdr_buf *buf, struct page **pages)
{
struct krb5_ctx *kctx = gctx->internal_ctx_id;
switch (kctx->enctype) {
default:
BUG();
case ENCTYPE_DES_CBC_RAW:
case ENCTYPE_DES3_CBC_RAW:
case ENCTYPE_ARCFOUR_HMAC:
return gss_wrap_kerberos_v1(kctx, offset, buf, pages);
case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
return gss_wrap_kerberos_v2(kctx, offset, buf, pages);
}
}
u32
gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf)
{
struct krb5_ctx *kctx = gctx->internal_ctx_id;
switch (kctx->enctype) {
default:
BUG();
case ENCTYPE_DES_CBC_RAW:
case ENCTYPE_DES3_CBC_RAW:
case ENCTYPE_ARCFOUR_HMAC:
return gss_unwrap_kerberos_v1(kctx, offset, buf);
case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
return gss_unwrap_kerberos_v2(kctx, offset, buf);
}
}

View file

@ -0,0 +1,481 @@
/*
* linux/net/sunrpc/gss_mech_switch.c
*
* Copyright (c) 2001 The Regents of the University of Michigan.
* All rights reserved.
*
* J. Bruce Fields <bfields@umich.edu>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/oid_registry.h>
#include <linux/sunrpc/msg_prot.h>
#include <linux/sunrpc/gss_asn1.h>
#include <linux/sunrpc/auth_gss.h>
#include <linux/sunrpc/svcauth_gss.h>
#include <linux/sunrpc/gss_err.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/gss_api.h>
#include <linux/sunrpc/clnt.h>
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
static LIST_HEAD(registered_mechs);
static DEFINE_SPINLOCK(registered_mechs_lock);
static void
gss_mech_free(struct gss_api_mech *gm)
{
struct pf_desc *pf;
int i;
for (i = 0; i < gm->gm_pf_num; i++) {
pf = &gm->gm_pfs[i];
kfree(pf->auth_domain_name);
pf->auth_domain_name = NULL;
}
}
static inline char *
make_auth_domain_name(char *name)
{
static char *prefix = "gss/";
char *new;
new = kmalloc(strlen(name) + strlen(prefix) + 1, GFP_KERNEL);
if (new) {
strcpy(new, prefix);
strcat(new, name);
}
return new;
}
static int
gss_mech_svc_setup(struct gss_api_mech *gm)
{
struct pf_desc *pf;
int i, status;
for (i = 0; i < gm->gm_pf_num; i++) {
pf = &gm->gm_pfs[i];
pf->auth_domain_name = make_auth_domain_name(pf->name);
status = -ENOMEM;
if (pf->auth_domain_name == NULL)
goto out;
status = svcauth_gss_register_pseudoflavor(pf->pseudoflavor,
pf->auth_domain_name);
if (status)
goto out;
}
return 0;
out:
gss_mech_free(gm);
return status;
}
/**
* gss_mech_register - register a GSS mechanism
* @gm: GSS mechanism handle
*
* Returns zero if successful, or a negative errno.
*/
int gss_mech_register(struct gss_api_mech *gm)
{
int status;
status = gss_mech_svc_setup(gm);
if (status)
return status;
spin_lock(&registered_mechs_lock);
list_add(&gm->gm_list, &registered_mechs);
spin_unlock(&registered_mechs_lock);
dprintk("RPC: registered gss mechanism %s\n", gm->gm_name);
return 0;
}
EXPORT_SYMBOL_GPL(gss_mech_register);
/**
* gss_mech_unregister - release a GSS mechanism
* @gm: GSS mechanism handle
*
*/
void gss_mech_unregister(struct gss_api_mech *gm)
{
spin_lock(&registered_mechs_lock);
list_del(&gm->gm_list);
spin_unlock(&registered_mechs_lock);
dprintk("RPC: unregistered gss mechanism %s\n", gm->gm_name);
gss_mech_free(gm);
}
EXPORT_SYMBOL_GPL(gss_mech_unregister);
struct gss_api_mech *gss_mech_get(struct gss_api_mech *gm)
{
__module_get(gm->gm_owner);
return gm;
}
EXPORT_SYMBOL(gss_mech_get);
static struct gss_api_mech *
_gss_mech_get_by_name(const char *name)
{
struct gss_api_mech *pos, *gm = NULL;
spin_lock(&registered_mechs_lock);
list_for_each_entry(pos, &registered_mechs, gm_list) {
if (0 == strcmp(name, pos->gm_name)) {
if (try_module_get(pos->gm_owner))
gm = pos;
break;
}
}
spin_unlock(&registered_mechs_lock);
return gm;
}
struct gss_api_mech * gss_mech_get_by_name(const char *name)
{
struct gss_api_mech *gm = NULL;
gm = _gss_mech_get_by_name(name);
if (!gm) {
request_module("rpc-auth-gss-%s", name);
gm = _gss_mech_get_by_name(name);
}
return gm;
}
struct gss_api_mech *gss_mech_get_by_OID(struct rpcsec_gss_oid *obj)
{
struct gss_api_mech *pos, *gm = NULL;
char buf[32];
if (sprint_oid(obj->data, obj->len, buf, sizeof(buf)) < 0)
return NULL;
dprintk("RPC: %s(%s)\n", __func__, buf);
request_module("rpc-auth-gss-%s", buf);
spin_lock(&registered_mechs_lock);
list_for_each_entry(pos, &registered_mechs, gm_list) {
if (obj->len == pos->gm_oid.len) {
if (0 == memcmp(obj->data, pos->gm_oid.data, obj->len)) {
if (try_module_get(pos->gm_owner))
gm = pos;
break;
}
}
}
spin_unlock(&registered_mechs_lock);
return gm;
}
static inline int
mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor)
{
int i;
for (i = 0; i < gm->gm_pf_num; i++) {
if (gm->gm_pfs[i].pseudoflavor == pseudoflavor)
return 1;
}
return 0;
}
static struct gss_api_mech *_gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
{
struct gss_api_mech *gm = NULL, *pos;
spin_lock(&registered_mechs_lock);
list_for_each_entry(pos, &registered_mechs, gm_list) {
if (!mech_supports_pseudoflavor(pos, pseudoflavor))
continue;
if (try_module_get(pos->gm_owner))
gm = pos;
break;
}
spin_unlock(&registered_mechs_lock);
return gm;
}
struct gss_api_mech *
gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
{
struct gss_api_mech *gm;
gm = _gss_mech_get_by_pseudoflavor(pseudoflavor);
if (!gm) {
request_module("rpc-auth-gss-%u", pseudoflavor);
gm = _gss_mech_get_by_pseudoflavor(pseudoflavor);
}
return gm;
}
/**
* gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors
* @array: array to fill in
* @size: size of "array"
*
* Returns the number of array items filled in, or a negative errno.
*
* The returned array is not sorted by any policy. Callers should not
* rely on the order of the items in the returned array.
*/
int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr, int size)
{
struct gss_api_mech *pos = NULL;
int j, i = 0;
spin_lock(&registered_mechs_lock);
list_for_each_entry(pos, &registered_mechs, gm_list) {
for (j = 0; j < pos->gm_pf_num; j++) {
if (i >= size) {
spin_unlock(&registered_mechs_lock);
return -ENOMEM;
}
array_ptr[i++] = pos->gm_pfs[j].pseudoflavor;
}
}
spin_unlock(&registered_mechs_lock);
return i;
}
/**
* gss_svc_to_pseudoflavor - map a GSS service number to a pseudoflavor
* @gm: GSS mechanism handle
* @qop: GSS quality-of-protection value
* @service: GSS service value
*
* Returns a matching security flavor, or RPC_AUTH_MAXFLAVOR if none is found.
*/
rpc_authflavor_t gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 qop,
u32 service)
{
int i;
for (i = 0; i < gm->gm_pf_num; i++) {
if (gm->gm_pfs[i].qop == qop &&
gm->gm_pfs[i].service == service) {
return gm->gm_pfs[i].pseudoflavor;
}
}
return RPC_AUTH_MAXFLAVOR;
}
/**
* gss_mech_info2flavor - look up a pseudoflavor given a GSS tuple
* @info: a GSS mech OID, quality of protection, and service value
*
* Returns a matching pseudoflavor, or RPC_AUTH_MAXFLAVOR if the tuple is
* not supported.
*/
rpc_authflavor_t gss_mech_info2flavor(struct rpcsec_gss_info *info)
{
rpc_authflavor_t pseudoflavor;
struct gss_api_mech *gm;
gm = gss_mech_get_by_OID(&info->oid);
if (gm == NULL)
return RPC_AUTH_MAXFLAVOR;
pseudoflavor = gss_svc_to_pseudoflavor(gm, info->qop, info->service);
gss_mech_put(gm);
return pseudoflavor;
}
/**
* gss_mech_flavor2info - look up a GSS tuple for a given pseudoflavor
* @pseudoflavor: GSS pseudoflavor to match
* @info: rpcsec_gss_info structure to fill in
*
* Returns zero and fills in "info" if pseudoflavor matches a
* supported mechanism. Otherwise a negative errno is returned.
*/
int gss_mech_flavor2info(rpc_authflavor_t pseudoflavor,
struct rpcsec_gss_info *info)
{
struct gss_api_mech *gm;
int i;
gm = gss_mech_get_by_pseudoflavor(pseudoflavor);
if (gm == NULL)
return -ENOENT;
for (i = 0; i < gm->gm_pf_num; i++) {
if (gm->gm_pfs[i].pseudoflavor == pseudoflavor) {
memcpy(info->oid.data, gm->gm_oid.data, gm->gm_oid.len);
info->oid.len = gm->gm_oid.len;
info->qop = gm->gm_pfs[i].qop;
info->service = gm->gm_pfs[i].service;
gss_mech_put(gm);
return 0;
}
}
gss_mech_put(gm);
return -ENOENT;
}
u32
gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor)
{
int i;
for (i = 0; i < gm->gm_pf_num; i++) {
if (gm->gm_pfs[i].pseudoflavor == pseudoflavor)
return gm->gm_pfs[i].service;
}
return 0;
}
EXPORT_SYMBOL(gss_pseudoflavor_to_service);
char *
gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service)
{
int i;
for (i = 0; i < gm->gm_pf_num; i++) {
if (gm->gm_pfs[i].service == service)
return gm->gm_pfs[i].auth_domain_name;
}
return NULL;
}
void
gss_mech_put(struct gss_api_mech * gm)
{
if (gm)
module_put(gm->gm_owner);
}
EXPORT_SYMBOL(gss_mech_put);
/* The mech could probably be determined from the token instead, but it's just
* as easy for now to pass it in. */
int
gss_import_sec_context(const void *input_token, size_t bufsize,
struct gss_api_mech *mech,
struct gss_ctx **ctx_id,
time_t *endtime,
gfp_t gfp_mask)
{
if (!(*ctx_id = kzalloc(sizeof(**ctx_id), gfp_mask)))
return -ENOMEM;
(*ctx_id)->mech_type = gss_mech_get(mech);
return mech->gm_ops->gss_import_sec_context(input_token, bufsize,
*ctx_id, endtime, gfp_mask);
}
/* gss_get_mic: compute a mic over message and return mic_token. */
u32
gss_get_mic(struct gss_ctx *context_handle,
struct xdr_buf *message,
struct xdr_netobj *mic_token)
{
return context_handle->mech_type->gm_ops
->gss_get_mic(context_handle,
message,
mic_token);
}
/* gss_verify_mic: check whether the provided mic_token verifies message. */
u32
gss_verify_mic(struct gss_ctx *context_handle,
struct xdr_buf *message,
struct xdr_netobj *mic_token)
{
return context_handle->mech_type->gm_ops
->gss_verify_mic(context_handle,
message,
mic_token);
}
/*
* This function is called from both the client and server code.
* Each makes guarantees about how much "slack" space is available
* for the underlying function in "buf"'s head and tail while
* performing the wrap.
*
* The client and server code allocate RPC_MAX_AUTH_SIZE extra
* space in both the head and tail which is available for use by
* the wrap function.
*
* Underlying functions should verify they do not use more than
* RPC_MAX_AUTH_SIZE of extra space in either the head or tail
* when performing the wrap.
*/
u32
gss_wrap(struct gss_ctx *ctx_id,
int offset,
struct xdr_buf *buf,
struct page **inpages)
{
return ctx_id->mech_type->gm_ops
->gss_wrap(ctx_id, offset, buf, inpages);
}
u32
gss_unwrap(struct gss_ctx *ctx_id,
int offset,
struct xdr_buf *buf)
{
return ctx_id->mech_type->gm_ops
->gss_unwrap(ctx_id, offset, buf);
}
/* gss_delete_sec_context: free all resources associated with context_handle.
* Note this differs from the RFC 2744-specified prototype in that we don't
* bother returning an output token, since it would never be used anyway. */
u32
gss_delete_sec_context(struct gss_ctx **context_handle)
{
dprintk("RPC: gss_delete_sec_context deleting %p\n",
*context_handle);
if (!*context_handle)
return GSS_S_NO_CONTEXT;
if ((*context_handle)->internal_ctx_id)
(*context_handle)->mech_type->gm_ops
->gss_delete_sec_context((*context_handle)
->internal_ctx_id);
gss_mech_put((*context_handle)->mech_type);
kfree(*context_handle);
*context_handle=NULL;
return GSS_S_COMPLETE;
}

View file

@ -0,0 +1,382 @@
/*
* linux/net/sunrpc/gss_rpc_upcall.c
*
* Copyright (C) 2012 Simo Sorce <simo@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/types.h>
#include <linux/un.h>
#include <linux/sunrpc/svcauth.h>
#include "gss_rpc_upcall.h"
#define GSSPROXY_SOCK_PATHNAME "/var/run/gssproxy.sock"
#define GSSPROXY_PROGRAM (400112u)
#define GSSPROXY_VERS_1 (1u)
/*
* Encoding/Decoding functions
*/
enum {
GSSX_NULL = 0, /* Unused */
GSSX_INDICATE_MECHS = 1,
GSSX_GET_CALL_CONTEXT = 2,
GSSX_IMPORT_AND_CANON_NAME = 3,
GSSX_EXPORT_CRED = 4,
GSSX_IMPORT_CRED = 5,
GSSX_ACQUIRE_CRED = 6,
GSSX_STORE_CRED = 7,
GSSX_INIT_SEC_CONTEXT = 8,
GSSX_ACCEPT_SEC_CONTEXT = 9,
GSSX_RELEASE_HANDLE = 10,
GSSX_GET_MIC = 11,
GSSX_VERIFY = 12,
GSSX_WRAP = 13,
GSSX_UNWRAP = 14,
GSSX_WRAP_SIZE_LIMIT = 15,
};
#define PROC(proc, name) \
[GSSX_##proc] = { \
.p_proc = GSSX_##proc, \
.p_encode = (kxdreproc_t)gssx_enc_##name, \
.p_decode = (kxdrdproc_t)gssx_dec_##name, \
.p_arglen = GSSX_ARG_##name##_sz, \
.p_replen = GSSX_RES_##name##_sz, \
.p_statidx = GSSX_##proc, \
.p_name = #proc, \
}
static struct rpc_procinfo gssp_procedures[] = {
PROC(INDICATE_MECHS, indicate_mechs),
PROC(GET_CALL_CONTEXT, get_call_context),
PROC(IMPORT_AND_CANON_NAME, import_and_canon_name),
PROC(EXPORT_CRED, export_cred),
PROC(IMPORT_CRED, import_cred),
PROC(ACQUIRE_CRED, acquire_cred),
PROC(STORE_CRED, store_cred),
PROC(INIT_SEC_CONTEXT, init_sec_context),
PROC(ACCEPT_SEC_CONTEXT, accept_sec_context),
PROC(RELEASE_HANDLE, release_handle),
PROC(GET_MIC, get_mic),
PROC(VERIFY, verify),
PROC(WRAP, wrap),
PROC(UNWRAP, unwrap),
PROC(WRAP_SIZE_LIMIT, wrap_size_limit),
};
/*
* Common transport functions
*/
static const struct rpc_program gssp_program;
static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt)
{
static const struct sockaddr_un gssp_localaddr = {
.sun_family = AF_LOCAL,
.sun_path = GSSPROXY_SOCK_PATHNAME,
};
struct rpc_create_args args = {
.net = net,
.protocol = XPRT_TRANSPORT_LOCAL,
.address = (struct sockaddr *)&gssp_localaddr,
.addrsize = sizeof(gssp_localaddr),
.servername = "localhost",
.program = &gssp_program,
.version = GSSPROXY_VERS_1,
.authflavor = RPC_AUTH_NULL,
/*
* Note we want connection to be done in the caller's
* filesystem namespace. We therefore turn off the idle
* timeout, which would result in reconnections being
* done without the correct namespace:
*/
.flags = RPC_CLNT_CREATE_NOPING |
RPC_CLNT_CREATE_NO_IDLE_TIMEOUT
};
struct rpc_clnt *clnt;
int result = 0;
clnt = rpc_create(&args);
if (IS_ERR(clnt)) {
dprintk("RPC: failed to create AF_LOCAL gssproxy "
"client (errno %ld).\n", PTR_ERR(clnt));
result = PTR_ERR(clnt);
*_clnt = NULL;
goto out;
}
dprintk("RPC: created new gssp local client (gssp_local_clnt: "
"%p)\n", clnt);
*_clnt = clnt;
out:
return result;
}
void init_gssp_clnt(struct sunrpc_net *sn)
{
mutex_init(&sn->gssp_lock);
sn->gssp_clnt = NULL;
}
int set_gssp_clnt(struct net *net)
{
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
struct rpc_clnt *clnt;
int ret;
mutex_lock(&sn->gssp_lock);
ret = gssp_rpc_create(net, &clnt);
if (!ret) {
if (sn->gssp_clnt)
rpc_shutdown_client(sn->gssp_clnt);
sn->gssp_clnt = clnt;
}
mutex_unlock(&sn->gssp_lock);
return ret;
}
void clear_gssp_clnt(struct sunrpc_net *sn)
{
mutex_lock(&sn->gssp_lock);
if (sn->gssp_clnt) {
rpc_shutdown_client(sn->gssp_clnt);
sn->gssp_clnt = NULL;
}
mutex_unlock(&sn->gssp_lock);
}
static struct rpc_clnt *get_gssp_clnt(struct sunrpc_net *sn)
{
struct rpc_clnt *clnt;
mutex_lock(&sn->gssp_lock);
clnt = sn->gssp_clnt;
if (clnt)
atomic_inc(&clnt->cl_count);
mutex_unlock(&sn->gssp_lock);
return clnt;
}
static int gssp_call(struct net *net, struct rpc_message *msg)
{
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
struct rpc_clnt *clnt;
int status;
clnt = get_gssp_clnt(sn);
if (!clnt)
return -EIO;
status = rpc_call_sync(clnt, msg, 0);
if (status < 0) {
dprintk("gssp: rpc_call returned error %d\n", -status);
switch (status) {
case -EPROTONOSUPPORT:
status = -EINVAL;
break;
case -ECONNREFUSED:
case -ETIMEDOUT:
case -ENOTCONN:
status = -EAGAIN;
break;
case -ERESTARTSYS:
if (signalled ())
status = -EINTR;
break;
default:
break;
}
}
rpc_release_client(clnt);
return status;
}
static void gssp_free_receive_pages(struct gssx_arg_accept_sec_context *arg)
{
int i;
for (i = 0; i < arg->npages && arg->pages[i]; i++)
__free_page(arg->pages[i]);
}
static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg)
{
arg->npages = DIV_ROUND_UP(NGROUPS_MAX * 4, PAGE_SIZE);
arg->pages = kzalloc(arg->npages * sizeof(struct page *), GFP_KERNEL);
/*
* XXX: actual pages are allocated by xdr layer in
* xdr_partial_copy_from_skb.
*/
if (!arg->pages)
return -ENOMEM;
return 0;
}
/*
* Public functions
*/
/* numbers somewhat arbitrary but large enough for current needs */
#define GSSX_MAX_OUT_HANDLE 128
#define GSSX_MAX_SRC_PRINC 256
#define GSSX_KMEMBUF (GSSX_max_output_handle_sz + \
GSSX_max_oid_sz + \
GSSX_max_princ_sz + \
sizeof(struct svc_cred))
int gssp_accept_sec_context_upcall(struct net *net,
struct gssp_upcall_data *data)
{
struct gssx_ctx ctxh = {
.state = data->in_handle
};
struct gssx_arg_accept_sec_context arg = {
.input_token = data->in_token,
};
struct gssx_ctx rctxh = {
/*
* pass in the max length we expect for each of these
* buffers but let the xdr code kmalloc them:
*/
.exported_context_token.len = GSSX_max_output_handle_sz,
.mech.len = GSS_OID_MAX_LEN,
.src_name.display_name.len = GSSX_max_princ_sz
};
struct gssx_res_accept_sec_context res = {
.context_handle = &rctxh,
.output_token = &data->out_token
};
struct rpc_message msg = {
.rpc_proc = &gssp_procedures[GSSX_ACCEPT_SEC_CONTEXT],
.rpc_argp = &arg,
.rpc_resp = &res,
.rpc_cred = NULL, /* FIXME ? */
};
struct xdr_netobj client_name = { 0 , NULL };
int ret;
if (data->in_handle.len != 0)
arg.context_handle = &ctxh;
res.output_token->len = GSSX_max_output_token_sz;
ret = gssp_alloc_receive_pages(&arg);
if (ret)
return ret;
/* use nfs/ for targ_name ? */
ret = gssp_call(net, &msg);
gssp_free_receive_pages(&arg);
/* we need to fetch all data even in case of error so
* that we can free special strctures is they have been allocated */
data->major_status = res.status.major_status;
data->minor_status = res.status.minor_status;
if (res.context_handle) {
data->out_handle = rctxh.exported_context_token;
data->mech_oid.len = rctxh.mech.len;
if (rctxh.mech.data)
memcpy(data->mech_oid.data, rctxh.mech.data,
data->mech_oid.len);
client_name = rctxh.src_name.display_name;
}
if (res.options.count == 1) {
gssx_buffer *value = &res.options.data[0].value;
/* Currently we only decode CREDS_VALUE, if we add
* anything else we'll have to loop and match on the
* option name */
if (value->len == 1) {
/* steal group info from struct svc_cred */
data->creds = *(struct svc_cred *)value->data;
data->found_creds = 1;
}
/* whether we use it or not, free data */
kfree(value->data);
}
if (res.options.count != 0) {
kfree(res.options.data);
}
/* convert to GSS_NT_HOSTBASED_SERVICE form and set into creds */
if (data->found_creds && client_name.data != NULL) {
char *c;
data->creds.cr_principal = kstrndup(client_name.data,
client_name.len, GFP_KERNEL);
if (data->creds.cr_principal) {
/* terminate and remove realm part */
c = strchr(data->creds.cr_principal, '@');
if (c) {
*c = '\0';
/* change service-hostname delimiter */
c = strchr(data->creds.cr_principal, '/');
if (c) *c = '@';
}
if (!c) {
/* not a service principal */
kfree(data->creds.cr_principal);
data->creds.cr_principal = NULL;
}
}
}
kfree(client_name.data);
return ret;
}
void gssp_free_upcall_data(struct gssp_upcall_data *data)
{
kfree(data->in_handle.data);
kfree(data->out_handle.data);
kfree(data->out_token.data);
free_svc_cred(&data->creds);
}
/*
* Initialization stuff
*/
static const struct rpc_version gssp_version1 = {
.number = GSSPROXY_VERS_1,
.nrprocs = ARRAY_SIZE(gssp_procedures),
.procs = gssp_procedures,
};
static const struct rpc_version *gssp_version[] = {
NULL,
&gssp_version1,
};
static struct rpc_stat gssp_stats;
static const struct rpc_program gssp_program = {
.name = "gssproxy",
.number = GSSPROXY_PROGRAM,
.nrvers = ARRAY_SIZE(gssp_version),
.version = gssp_version,
.stats = &gssp_stats,
};

View file

@ -0,0 +1,48 @@
/*
* linux/net/sunrpc/gss_rpc_upcall.h
*
* Copyright (C) 2012 Simo Sorce <simo@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef _GSS_RPC_UPCALL_H
#define _GSS_RPC_UPCALL_H
#include <linux/sunrpc/gss_api.h>
#include <linux/sunrpc/auth_gss.h>
#include "gss_rpc_xdr.h"
#include "../netns.h"
struct gssp_upcall_data {
struct xdr_netobj in_handle;
struct gssp_in_token in_token;
struct xdr_netobj out_handle;
struct xdr_netobj out_token;
struct rpcsec_gss_oid mech_oid;
struct svc_cred creds;
int found_creds;
int major_status;
int minor_status;
};
int gssp_accept_sec_context_upcall(struct net *net,
struct gssp_upcall_data *data);
void gssp_free_upcall_data(struct gssp_upcall_data *data);
void init_gssp_clnt(struct sunrpc_net *);
int set_gssp_clnt(struct net *);
void clear_gssp_clnt(struct sunrpc_net *);
#endif /* _GSS_RPC_UPCALL_H */

View file

@ -0,0 +1,839 @@
/*
* GSS Proxy upcall module
*
* Copyright (C) 2012 Simo Sorce <simo@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/sunrpc/svcauth.h>
#include "gss_rpc_xdr.h"
static int gssx_enc_bool(struct xdr_stream *xdr, int v)
{
__be32 *p;
p = xdr_reserve_space(xdr, 4);
if (unlikely(p == NULL))
return -ENOSPC;
*p = v ? xdr_one : xdr_zero;
return 0;
}
static int gssx_dec_bool(struct xdr_stream *xdr, u32 *v)
{
__be32 *p;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -ENOSPC;
*v = be32_to_cpu(*p);
return 0;
}
static int gssx_enc_buffer(struct xdr_stream *xdr,
gssx_buffer *buf)
{
__be32 *p;
p = xdr_reserve_space(xdr, sizeof(u32) + buf->len);
if (!p)
return -ENOSPC;
xdr_encode_opaque(p, buf->data, buf->len);
return 0;
}
static int gssx_enc_in_token(struct xdr_stream *xdr,
struct gssp_in_token *in)
{
__be32 *p;
p = xdr_reserve_space(xdr, 4);
if (!p)
return -ENOSPC;
*p = cpu_to_be32(in->page_len);
/* all we need to do is to write pages */
xdr_write_pages(xdr, in->pages, in->page_base, in->page_len);
return 0;
}
static int gssx_dec_buffer(struct xdr_stream *xdr,
gssx_buffer *buf)
{
u32 length;
__be32 *p;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -ENOSPC;
length = be32_to_cpup(p);
p = xdr_inline_decode(xdr, length);
if (unlikely(p == NULL))
return -ENOSPC;
if (buf->len == 0) {
/* we intentionally are not interested in this buffer */
return 0;
}
if (length > buf->len)
return -ENOSPC;
if (!buf->data) {
buf->data = kmemdup(p, length, GFP_KERNEL);
if (!buf->data)
return -ENOMEM;
} else {
memcpy(buf->data, p, length);
}
buf->len = length;
return 0;
}
static int gssx_enc_option(struct xdr_stream *xdr,
struct gssx_option *opt)
{
int err;
err = gssx_enc_buffer(xdr, &opt->option);
if (err)
return err;
err = gssx_enc_buffer(xdr, &opt->value);
return err;
}
static int gssx_dec_option(struct xdr_stream *xdr,
struct gssx_option *opt)
{
int err;
err = gssx_dec_buffer(xdr, &opt->option);
if (err)
return err;
err = gssx_dec_buffer(xdr, &opt->value);
return err;
}
static int dummy_enc_opt_array(struct xdr_stream *xdr,
struct gssx_option_array *oa)
{
__be32 *p;
if (oa->count != 0)
return -EINVAL;
p = xdr_reserve_space(xdr, 4);
if (!p)
return -ENOSPC;
*p = 0;
return 0;
}
static int dummy_dec_opt_array(struct xdr_stream *xdr,
struct gssx_option_array *oa)
{
struct gssx_option dummy;
u32 count, i;
__be32 *p;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -ENOSPC;
count = be32_to_cpup(p++);
memset(&dummy, 0, sizeof(dummy));
for (i = 0; i < count; i++) {
gssx_dec_option(xdr, &dummy);
}
oa->count = 0;
oa->data = NULL;
return 0;
}
static int get_host_u32(struct xdr_stream *xdr, u32 *res)
{
__be32 *p;
p = xdr_inline_decode(xdr, 4);
if (!p)
return -EINVAL;
/* Contents of linux creds are all host-endian: */
memcpy(res, p, sizeof(u32));
return 0;
}
static int gssx_dec_linux_creds(struct xdr_stream *xdr,
struct svc_cred *creds)
{
u32 length;
__be32 *p;
u32 tmp;
u32 N;
int i, err;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -ENOSPC;
length = be32_to_cpup(p);
if (length > (3 + NGROUPS_MAX) * sizeof(u32))
return -ENOSPC;
/* uid */
err = get_host_u32(xdr, &tmp);
if (err)
return err;
creds->cr_uid = make_kuid(&init_user_ns, tmp);
/* gid */
err = get_host_u32(xdr, &tmp);
if (err)
return err;
creds->cr_gid = make_kgid(&init_user_ns, tmp);
/* number of additional gid's */
err = get_host_u32(xdr, &tmp);
if (err)
return err;
N = tmp;
if ((3 + N) * sizeof(u32) != length)
return -EINVAL;
creds->cr_group_info = groups_alloc(N);
if (creds->cr_group_info == NULL)
return -ENOMEM;
/* gid's */
for (i = 0; i < N; i++) {
kgid_t kgid;
err = get_host_u32(xdr, &tmp);
if (err)
goto out_free_groups;
err = -EINVAL;
kgid = make_kgid(&init_user_ns, tmp);
if (!gid_valid(kgid))
goto out_free_groups;
GROUP_AT(creds->cr_group_info, i) = kgid;
}
return 0;
out_free_groups:
groups_free(creds->cr_group_info);
return err;
}
static int gssx_dec_option_array(struct xdr_stream *xdr,
struct gssx_option_array *oa)
{
struct svc_cred *creds;
u32 count, i;
__be32 *p;
int err;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -ENOSPC;
count = be32_to_cpup(p++);
if (!count)
return 0;
/* we recognize only 1 currently: CREDS_VALUE */
oa->count = 1;
oa->data = kmalloc(sizeof(struct gssx_option), GFP_KERNEL);
if (!oa->data)
return -ENOMEM;
creds = kmalloc(sizeof(struct svc_cred), GFP_KERNEL);
if (!creds) {
kfree(oa->data);
return -ENOMEM;
}
oa->data[0].option.data = CREDS_VALUE;
oa->data[0].option.len = sizeof(CREDS_VALUE);
oa->data[0].value.data = (void *)creds;
oa->data[0].value.len = 0;
for (i = 0; i < count; i++) {
gssx_buffer dummy = { 0, NULL };
u32 length;
/* option buffer */
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -ENOSPC;
length = be32_to_cpup(p);
p = xdr_inline_decode(xdr, length);
if (unlikely(p == NULL))
return -ENOSPC;
if (length == sizeof(CREDS_VALUE) &&
memcmp(p, CREDS_VALUE, sizeof(CREDS_VALUE)) == 0) {
/* We have creds here. parse them */
err = gssx_dec_linux_creds(xdr, creds);
if (err)
return err;
oa->data[0].value.len = 1; /* presence */
} else {
/* consume uninteresting buffer */
err = gssx_dec_buffer(xdr, &dummy);
if (err)
return err;
}
}
return 0;
}
static int gssx_dec_status(struct xdr_stream *xdr,
struct gssx_status *status)
{
__be32 *p;
int err;
/* status->major_status */
p = xdr_inline_decode(xdr, 8);
if (unlikely(p == NULL))
return -ENOSPC;
p = xdr_decode_hyper(p, &status->major_status);
/* status->mech */
err = gssx_dec_buffer(xdr, &status->mech);
if (err)
return err;
/* status->minor_status */
p = xdr_inline_decode(xdr, 8);
if (unlikely(p == NULL))
return -ENOSPC;
p = xdr_decode_hyper(p, &status->minor_status);
/* status->major_status_string */
err = gssx_dec_buffer(xdr, &status->major_status_string);
if (err)
return err;
/* status->minor_status_string */
err = gssx_dec_buffer(xdr, &status->minor_status_string);
if (err)
return err;
/* status->server_ctx */
err = gssx_dec_buffer(xdr, &status->server_ctx);
if (err)
return err;
/* we assume we have no options for now, so simply consume them */
/* status->options */
err = dummy_dec_opt_array(xdr, &status->options);
return err;
}
static int gssx_enc_call_ctx(struct xdr_stream *xdr,
struct gssx_call_ctx *ctx)
{
struct gssx_option opt;
__be32 *p;
int err;
/* ctx->locale */
err = gssx_enc_buffer(xdr, &ctx->locale);
if (err)
return err;
/* ctx->server_ctx */
err = gssx_enc_buffer(xdr, &ctx->server_ctx);
if (err)
return err;
/* we always want to ask for lucid contexts */
/* ctx->options */
p = xdr_reserve_space(xdr, 4);
*p = cpu_to_be32(2);
/* we want a lucid_v1 context */
opt.option.data = LUCID_OPTION;
opt.option.len = sizeof(LUCID_OPTION);
opt.value.data = LUCID_VALUE;
opt.value.len = sizeof(LUCID_VALUE);
err = gssx_enc_option(xdr, &opt);
/* ..and user creds */
opt.option.data = CREDS_OPTION;
opt.option.len = sizeof(CREDS_OPTION);
opt.value.data = CREDS_VALUE;
opt.value.len = sizeof(CREDS_VALUE);
err = gssx_enc_option(xdr, &opt);
return err;
}
static int gssx_dec_name_attr(struct xdr_stream *xdr,
struct gssx_name_attr *attr)
{
int err;
/* attr->attr */
err = gssx_dec_buffer(xdr, &attr->attr);
if (err)
return err;
/* attr->value */
err = gssx_dec_buffer(xdr, &attr->value);
if (err)
return err;
/* attr->extensions */
err = dummy_dec_opt_array(xdr, &attr->extensions);
return err;
}
static int dummy_enc_nameattr_array(struct xdr_stream *xdr,
struct gssx_name_attr_array *naa)
{
__be32 *p;
if (naa->count != 0)
return -EINVAL;
p = xdr_reserve_space(xdr, 4);
if (!p)
return -ENOSPC;
*p = 0;
return 0;
}
static int dummy_dec_nameattr_array(struct xdr_stream *xdr,
struct gssx_name_attr_array *naa)
{
struct gssx_name_attr dummy = { .attr = {.len = 0} };
u32 count, i;
__be32 *p;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return -ENOSPC;
count = be32_to_cpup(p++);
for (i = 0; i < count; i++) {
gssx_dec_name_attr(xdr, &dummy);
}
naa->count = 0;
naa->data = NULL;
return 0;
}
static struct xdr_netobj zero_netobj = {};
static struct gssx_name_attr_array zero_name_attr_array = {};
static struct gssx_option_array zero_option_array = {};
static int gssx_enc_name(struct xdr_stream *xdr,
struct gssx_name *name)
{
int err;
/* name->display_name */
err = gssx_enc_buffer(xdr, &name->display_name);
if (err)
return err;
/* name->name_type */
err = gssx_enc_buffer(xdr, &zero_netobj);
if (err)
return err;
/* name->exported_name */
err = gssx_enc_buffer(xdr, &zero_netobj);
if (err)
return err;
/* name->exported_composite_name */
err = gssx_enc_buffer(xdr, &zero_netobj);
if (err)
return err;
/* leave name_attributes empty for now, will add once we have any
* to pass up at all */
/* name->name_attributes */
err = dummy_enc_nameattr_array(xdr, &zero_name_attr_array);
if (err)
return err;
/* leave options empty for now, will add once we have any options
* to pass up at all */
/* name->extensions */
err = dummy_enc_opt_array(xdr, &zero_option_array);
return err;
}
static int gssx_dec_name(struct xdr_stream *xdr,
struct gssx_name *name)
{
struct xdr_netobj dummy_netobj = { .len = 0 };
struct gssx_name_attr_array dummy_name_attr_array = { .count = 0 };
struct gssx_option_array dummy_option_array = { .count = 0 };
int err;
/* name->display_name */
err = gssx_dec_buffer(xdr, &name->display_name);
if (err)
return err;
/* name->name_type */
err = gssx_dec_buffer(xdr, &dummy_netobj);
if (err)
return err;
/* name->exported_name */
err = gssx_dec_buffer(xdr, &dummy_netobj);
if (err)
return err;
/* name->exported_composite_name */
err = gssx_dec_buffer(xdr, &dummy_netobj);
if (err)
return err;
/* we assume we have no attributes for now, so simply consume them */
/* name->name_attributes */
err = dummy_dec_nameattr_array(xdr, &dummy_name_attr_array);
if (err)
return err;
/* we assume we have no options for now, so simply consume them */
/* name->extensions */
err = dummy_dec_opt_array(xdr, &dummy_option_array);
return err;
}
static int dummy_enc_credel_array(struct xdr_stream *xdr,
struct gssx_cred_element_array *cea)
{
__be32 *p;
if (cea->count != 0)
return -EINVAL;
p = xdr_reserve_space(xdr, 4);
if (!p)
return -ENOSPC;
*p = 0;
return 0;
}
static int gssx_enc_cred(struct xdr_stream *xdr,
struct gssx_cred *cred)
{
int err;
/* cred->desired_name */
err = gssx_enc_name(xdr, &cred->desired_name);
if (err)
return err;
/* cred->elements */
err = dummy_enc_credel_array(xdr, &cred->elements);
if (err)
return err;
/* cred->cred_handle_reference */
err = gssx_enc_buffer(xdr, &cred->cred_handle_reference);
if (err)
return err;
/* cred->needs_release */
err = gssx_enc_bool(xdr, cred->needs_release);
return err;
}
static int gssx_enc_ctx(struct xdr_stream *xdr,
struct gssx_ctx *ctx)
{
__be32 *p;
int err;
/* ctx->exported_context_token */
err = gssx_enc_buffer(xdr, &ctx->exported_context_token);
if (err)
return err;
/* ctx->state */
err = gssx_enc_buffer(xdr, &ctx->state);
if (err)
return err;
/* ctx->need_release */
err = gssx_enc_bool(xdr, ctx->need_release);
if (err)
return err;
/* ctx->mech */
err = gssx_enc_buffer(xdr, &ctx->mech);
if (err)
return err;
/* ctx->src_name */
err = gssx_enc_name(xdr, &ctx->src_name);
if (err)
return err;
/* ctx->targ_name */
err = gssx_enc_name(xdr, &ctx->targ_name);
if (err)
return err;
/* ctx->lifetime */
p = xdr_reserve_space(xdr, 8+8);
if (!p)
return -ENOSPC;
p = xdr_encode_hyper(p, ctx->lifetime);
/* ctx->ctx_flags */
p = xdr_encode_hyper(p, ctx->ctx_flags);
/* ctx->locally_initiated */
err = gssx_enc_bool(xdr, ctx->locally_initiated);
if (err)
return err;
/* ctx->open */
err = gssx_enc_bool(xdr, ctx->open);
if (err)
return err;
/* leave options empty for now, will add once we have any options
* to pass up at all */
/* ctx->options */
err = dummy_enc_opt_array(xdr, &ctx->options);
return err;
}
static int gssx_dec_ctx(struct xdr_stream *xdr,
struct gssx_ctx *ctx)
{
__be32 *p;
int err;
/* ctx->exported_context_token */
err = gssx_dec_buffer(xdr, &ctx->exported_context_token);
if (err)
return err;
/* ctx->state */
err = gssx_dec_buffer(xdr, &ctx->state);
if (err)
return err;
/* ctx->need_release */
err = gssx_dec_bool(xdr, &ctx->need_release);
if (err)
return err;
/* ctx->mech */
err = gssx_dec_buffer(xdr, &ctx->mech);
if (err)
return err;
/* ctx->src_name */
err = gssx_dec_name(xdr, &ctx->src_name);
if (err)
return err;
/* ctx->targ_name */
err = gssx_dec_name(xdr, &ctx->targ_name);
if (err)
return err;
/* ctx->lifetime */
p = xdr_inline_decode(xdr, 8+8);
if (unlikely(p == NULL))
return -ENOSPC;
p = xdr_decode_hyper(p, &ctx->lifetime);
/* ctx->ctx_flags */
p = xdr_decode_hyper(p, &ctx->ctx_flags);
/* ctx->locally_initiated */
err = gssx_dec_bool(xdr, &ctx->locally_initiated);
if (err)
return err;
/* ctx->open */
err = gssx_dec_bool(xdr, &ctx->open);
if (err)
return err;
/* we assume we have no options for now, so simply consume them */
/* ctx->options */
err = dummy_dec_opt_array(xdr, &ctx->options);
return err;
}
static int gssx_enc_cb(struct xdr_stream *xdr, struct gssx_cb *cb)
{
__be32 *p;
int err;
/* cb->initiator_addrtype */
p = xdr_reserve_space(xdr, 8);
if (!p)
return -ENOSPC;
p = xdr_encode_hyper(p, cb->initiator_addrtype);
/* cb->initiator_address */
err = gssx_enc_buffer(xdr, &cb->initiator_address);
if (err)
return err;
/* cb->acceptor_addrtype */
p = xdr_reserve_space(xdr, 8);
if (!p)
return -ENOSPC;
p = xdr_encode_hyper(p, cb->acceptor_addrtype);
/* cb->acceptor_address */
err = gssx_enc_buffer(xdr, &cb->acceptor_address);
if (err)
return err;
/* cb->application_data */
err = gssx_enc_buffer(xdr, &cb->application_data);
return err;
}
void gssx_enc_accept_sec_context(struct rpc_rqst *req,
struct xdr_stream *xdr,
struct gssx_arg_accept_sec_context *arg)
{
int err;
err = gssx_enc_call_ctx(xdr, &arg->call_ctx);
if (err)
goto done;
/* arg->context_handle */
if (arg->context_handle)
err = gssx_enc_ctx(xdr, arg->context_handle);
else
err = gssx_enc_bool(xdr, 0);
if (err)
goto done;
/* arg->cred_handle */
if (arg->cred_handle)
err = gssx_enc_cred(xdr, arg->cred_handle);
else
err = gssx_enc_bool(xdr, 0);
if (err)
goto done;
/* arg->input_token */
err = gssx_enc_in_token(xdr, &arg->input_token);
if (err)
goto done;
/* arg->input_cb */
if (arg->input_cb)
err = gssx_enc_cb(xdr, arg->input_cb);
else
err = gssx_enc_bool(xdr, 0);
if (err)
goto done;
err = gssx_enc_bool(xdr, arg->ret_deleg_cred);
if (err)
goto done;
/* leave options empty for now, will add once we have any options
* to pass up at all */
/* arg->options */
err = dummy_enc_opt_array(xdr, &arg->options);
xdr_inline_pages(&req->rq_rcv_buf,
PAGE_SIZE/2 /* pretty arbitrary */,
arg->pages, 0 /* page base */, arg->npages * PAGE_SIZE);
done:
if (err)
dprintk("RPC: gssx_enc_accept_sec_context: %d\n", err);
}
int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
struct xdr_stream *xdr,
struct gssx_res_accept_sec_context *res)
{
u32 value_follows;
int err;
/* res->status */
err = gssx_dec_status(xdr, &res->status);
if (err)
return err;
/* res->context_handle */
err = gssx_dec_bool(xdr, &value_follows);
if (err)
return err;
if (value_follows) {
err = gssx_dec_ctx(xdr, res->context_handle);
if (err)
return err;
} else {
res->context_handle = NULL;
}
/* res->output_token */
err = gssx_dec_bool(xdr, &value_follows);
if (err)
return err;
if (value_follows) {
err = gssx_dec_buffer(xdr, res->output_token);
if (err)
return err;
} else {
res->output_token = NULL;
}
/* res->delegated_cred_handle */
err = gssx_dec_bool(xdr, &value_follows);
if (err)
return err;
if (value_follows) {
/* we do not support upcall servers sending this data. */
return -EINVAL;
}
/* res->options */
err = gssx_dec_option_array(xdr, &res->options);
return err;
}

View file

@ -0,0 +1,267 @@
/*
* GSS Proxy upcall module
*
* Copyright (C) 2012 Simo Sorce <simo@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef _LINUX_GSS_RPC_XDR_H
#define _LINUX_GSS_RPC_XDR_H
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/xprtsock.h>
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
#define LUCID_OPTION "exported_context_type"
#define LUCID_VALUE "linux_lucid_v1"
#define CREDS_OPTION "exported_creds_type"
#define CREDS_VALUE "linux_creds_v1"
typedef struct xdr_netobj gssx_buffer;
typedef struct xdr_netobj utf8string;
typedef struct xdr_netobj gssx_OID;
enum gssx_cred_usage {
GSSX_C_INITIATE = 1,
GSSX_C_ACCEPT = 2,
GSSX_C_BOTH = 3,
};
struct gssx_option {
gssx_buffer option;
gssx_buffer value;
};
struct gssx_option_array {
u32 count;
struct gssx_option *data;
};
struct gssx_status {
u64 major_status;
gssx_OID mech;
u64 minor_status;
utf8string major_status_string;
utf8string minor_status_string;
gssx_buffer server_ctx;
struct gssx_option_array options;
};
struct gssx_call_ctx {
utf8string locale;
gssx_buffer server_ctx;
struct gssx_option_array options;
};
struct gssx_name_attr {
gssx_buffer attr;
gssx_buffer value;
struct gssx_option_array extensions;
};
struct gssx_name_attr_array {
u32 count;
struct gssx_name_attr *data;
};
struct gssx_name {
gssx_buffer display_name;
};
typedef struct gssx_name gssx_name;
struct gssx_cred_element {
gssx_name MN;
gssx_OID mech;
u32 cred_usage;
u64 initiator_time_rec;
u64 acceptor_time_rec;
struct gssx_option_array options;
};
struct gssx_cred_element_array {
u32 count;
struct gssx_cred_element *data;
};
struct gssx_cred {
gssx_name desired_name;
struct gssx_cred_element_array elements;
gssx_buffer cred_handle_reference;
u32 needs_release;
};
struct gssx_ctx {
gssx_buffer exported_context_token;
gssx_buffer state;
u32 need_release;
gssx_OID mech;
gssx_name src_name;
gssx_name targ_name;
u64 lifetime;
u64 ctx_flags;
u32 locally_initiated;
u32 open;
struct gssx_option_array options;
};
struct gssx_cb {
u64 initiator_addrtype;
gssx_buffer initiator_address;
u64 acceptor_addrtype;
gssx_buffer acceptor_address;
gssx_buffer application_data;
};
/* This structure is not defined in the protocol.
* It is used in the kernel to carry around a big buffer
* as a set of pages */
struct gssp_in_token {
struct page **pages; /* Array of contiguous pages */
unsigned int page_base; /* Start of page data */
unsigned int page_len; /* Length of page data */
};
struct gssx_arg_accept_sec_context {
struct gssx_call_ctx call_ctx;
struct gssx_ctx *context_handle;
struct gssx_cred *cred_handle;
struct gssp_in_token input_token;
struct gssx_cb *input_cb;
u32 ret_deleg_cred;
struct gssx_option_array options;
struct page **pages;
unsigned int npages;
};
struct gssx_res_accept_sec_context {
struct gssx_status status;
struct gssx_ctx *context_handle;
gssx_buffer *output_token;
/* struct gssx_cred *delegated_cred_handle; not used in kernel */
struct gssx_option_array options;
};
#define gssx_enc_indicate_mechs NULL
#define gssx_dec_indicate_mechs NULL
#define gssx_enc_get_call_context NULL
#define gssx_dec_get_call_context NULL
#define gssx_enc_import_and_canon_name NULL
#define gssx_dec_import_and_canon_name NULL
#define gssx_enc_export_cred NULL
#define gssx_dec_export_cred NULL
#define gssx_enc_import_cred NULL
#define gssx_dec_import_cred NULL
#define gssx_enc_acquire_cred NULL
#define gssx_dec_acquire_cred NULL
#define gssx_enc_store_cred NULL
#define gssx_dec_store_cred NULL
#define gssx_enc_init_sec_context NULL
#define gssx_dec_init_sec_context NULL
void gssx_enc_accept_sec_context(struct rpc_rqst *req,
struct xdr_stream *xdr,
struct gssx_arg_accept_sec_context *args);
int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
struct xdr_stream *xdr,
struct gssx_res_accept_sec_context *res);
#define gssx_enc_release_handle NULL
#define gssx_dec_release_handle NULL
#define gssx_enc_get_mic NULL
#define gssx_dec_get_mic NULL
#define gssx_enc_verify NULL
#define gssx_dec_verify NULL
#define gssx_enc_wrap NULL
#define gssx_dec_wrap NULL
#define gssx_enc_unwrap NULL
#define gssx_dec_unwrap NULL
#define gssx_enc_wrap_size_limit NULL
#define gssx_dec_wrap_size_limit NULL
/* non implemented calls are set to 0 size */
#define GSSX_ARG_indicate_mechs_sz 0
#define GSSX_RES_indicate_mechs_sz 0
#define GSSX_ARG_get_call_context_sz 0
#define GSSX_RES_get_call_context_sz 0
#define GSSX_ARG_import_and_canon_name_sz 0
#define GSSX_RES_import_and_canon_name_sz 0
#define GSSX_ARG_export_cred_sz 0
#define GSSX_RES_export_cred_sz 0
#define GSSX_ARG_import_cred_sz 0
#define GSSX_RES_import_cred_sz 0
#define GSSX_ARG_acquire_cred_sz 0
#define GSSX_RES_acquire_cred_sz 0
#define GSSX_ARG_store_cred_sz 0
#define GSSX_RES_store_cred_sz 0
#define GSSX_ARG_init_sec_context_sz 0
#define GSSX_RES_init_sec_context_sz 0
#define GSSX_default_in_call_ctx_sz (4 + 4 + 4 + \
8 + sizeof(LUCID_OPTION) + sizeof(LUCID_VALUE) + \
8 + sizeof(CREDS_OPTION) + sizeof(CREDS_VALUE))
#define GSSX_default_in_ctx_hndl_sz (4 + 4+8 + 4 + 4 + 6*4 + 6*4 + 8 + 8 + \
4 + 4 + 4)
#define GSSX_default_in_cred_sz 4 /* we send in no cred_handle */
#define GSSX_default_in_token_sz 4 /* does *not* include token data */
#define GSSX_default_in_cb_sz 4 /* we do not use channel bindings */
#define GSSX_ARG_accept_sec_context_sz (GSSX_default_in_call_ctx_sz + \
GSSX_default_in_ctx_hndl_sz + \
GSSX_default_in_cred_sz + \
GSSX_default_in_token_sz + \
GSSX_default_in_cb_sz + \
4 /* no deleg creds boolean */ + \
4) /* empty options */
/* somewhat arbitrary numbers but large enough (we ignore some of the data
* sent down, but it is part of the protocol so we need enough space to take
* it in) */
#define GSSX_default_status_sz 8 + 24 + 8 + 256 + 256 + 16 + 4
#define GSSX_max_output_handle_sz 128
#define GSSX_max_oid_sz 16
#define GSSX_max_princ_sz 256
#define GSSX_default_ctx_sz (GSSX_max_output_handle_sz + \
16 + 4 + GSSX_max_oid_sz + \
2 * GSSX_max_princ_sz + \
8 + 8 + 4 + 4 + 4)
#define GSSX_max_output_token_sz 1024
/* grouplist not included; we allocate separate pages for that: */
#define GSSX_max_creds_sz (4 + 4 + 4 /* + NGROUPS_MAX*4 */)
#define GSSX_RES_accept_sec_context_sz (GSSX_default_status_sz + \
GSSX_default_ctx_sz + \
GSSX_max_output_token_sz + \
4 + GSSX_max_creds_sz)
#define GSSX_ARG_release_handle_sz 0
#define GSSX_RES_release_handle_sz 0
#define GSSX_ARG_get_mic_sz 0
#define GSSX_RES_get_mic_sz 0
#define GSSX_ARG_verify_sz 0
#define GSSX_RES_verify_sz 0
#define GSSX_ARG_wrap_sz 0
#define GSSX_RES_wrap_sz 0
#define GSSX_ARG_unwrap_sz 0
#define GSSX_RES_unwrap_sz 0
#define GSSX_ARG_wrap_size_limit_sz 0
#define GSSX_RES_wrap_size_limit_sz 0
#endif /* _LINUX_GSS_RPC_XDR_H */

File diff suppressed because it is too large Load diff

144
net/sunrpc/auth_null.c Normal file
View file

@ -0,0 +1,144 @@
/*
* linux/net/sunrpc/auth_null.c
*
* AUTH_NULL authentication. Really :-)
*
* Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
*/
#include <linux/types.h>
#include <linux/module.h>
#include <linux/sunrpc/clnt.h>
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
static struct rpc_auth null_auth;
static struct rpc_cred null_cred;
static struct rpc_auth *
nul_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
{
atomic_inc(&null_auth.au_count);
return &null_auth;
}
static void
nul_destroy(struct rpc_auth *auth)
{
}
/*
* Lookup NULL creds for current process
*/
static struct rpc_cred *
nul_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
{
if (flags & RPCAUTH_LOOKUP_RCU)
return &null_cred;
return get_rpccred(&null_cred);
}
/*
* Destroy cred handle.
*/
static void
nul_destroy_cred(struct rpc_cred *cred)
{
}
/*
* Match cred handle against current process
*/
static int
nul_match(struct auth_cred *acred, struct rpc_cred *cred, int taskflags)
{
return 1;
}
/*
* Marshal credential.
*/
static __be32 *
nul_marshal(struct rpc_task *task, __be32 *p)
{
*p++ = htonl(RPC_AUTH_NULL);
*p++ = 0;
*p++ = htonl(RPC_AUTH_NULL);
*p++ = 0;
return p;
}
/*
* Refresh credential. This is a no-op for AUTH_NULL
*/
static int
nul_refresh(struct rpc_task *task)
{
set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags);
return 0;
}
static __be32 *
nul_validate(struct rpc_task *task, __be32 *p)
{
rpc_authflavor_t flavor;
u32 size;
flavor = ntohl(*p++);
if (flavor != RPC_AUTH_NULL) {
printk("RPC: bad verf flavor: %u\n", flavor);
return ERR_PTR(-EIO);
}
size = ntohl(*p++);
if (size != 0) {
printk("RPC: bad verf size: %u\n", size);
return ERR_PTR(-EIO);
}
return p;
}
const struct rpc_authops authnull_ops = {
.owner = THIS_MODULE,
.au_flavor = RPC_AUTH_NULL,
.au_name = "NULL",
.create = nul_create,
.destroy = nul_destroy,
.lookup_cred = nul_lookup_cred,
};
static
struct rpc_auth null_auth = {
.au_cslack = 4,
.au_rslack = 2,
.au_ops = &authnull_ops,
.au_flavor = RPC_AUTH_NULL,
.au_count = ATOMIC_INIT(0),
};
static
const struct rpc_credops null_credops = {
.cr_name = "AUTH_NULL",
.crdestroy = nul_destroy_cred,
.crbind = rpcauth_generic_bind_cred,
.crmatch = nul_match,
.crmarshal = nul_marshal,
.crrefresh = nul_refresh,
.crvalidate = nul_validate,
};
static
struct rpc_cred null_cred = {
.cr_lru = LIST_HEAD_INIT(null_cred.cr_lru),
.cr_auth = &null_auth,
.cr_ops = &null_credops,
.cr_count = ATOMIC_INIT(1),
.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE,
#ifdef RPC_DEBUG
.cr_magic = RPCAUTH_CRED_MAGIC,
#endif
};

247
net/sunrpc/auth_unix.c Normal file
View file

@ -0,0 +1,247 @@
/*
* linux/net/sunrpc/auth_unix.c
*
* UNIX-style authentication; no AUTH_SHORT support
*
* Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
*/
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/auth.h>
#include <linux/user_namespace.h>
#define NFS_NGROUPS 16
struct unx_cred {
struct rpc_cred uc_base;
kgid_t uc_gid;
kgid_t uc_gids[NFS_NGROUPS];
};
#define uc_uid uc_base.cr_uid
#define UNX_WRITESLACK (21 + (UNX_MAXNODENAME >> 2))
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
static struct rpc_auth unix_auth;
static const struct rpc_credops unix_credops;
static struct rpc_auth *
unx_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
{
dprintk("RPC: creating UNIX authenticator for client %p\n",
clnt);
atomic_inc(&unix_auth.au_count);
return &unix_auth;
}
static void
unx_destroy(struct rpc_auth *auth)
{
dprintk("RPC: destroying UNIX authenticator %p\n", auth);
rpcauth_clear_credcache(auth->au_credcache);
}
/*
* Lookup AUTH_UNIX creds for current process
*/
static struct rpc_cred *
unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
{
return rpcauth_lookup_credcache(auth, acred, flags);
}
static struct rpc_cred *
unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
{
struct unx_cred *cred;
unsigned int groups = 0;
unsigned int i;
dprintk("RPC: allocating UNIX cred for uid %d gid %d\n",
from_kuid(&init_user_ns, acred->uid),
from_kgid(&init_user_ns, acred->gid));
if (!(cred = kmalloc(sizeof(*cred), GFP_NOFS)))
return ERR_PTR(-ENOMEM);
rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops);
cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
if (acred->group_info != NULL)
groups = acred->group_info->ngroups;
if (groups > NFS_NGROUPS)
groups = NFS_NGROUPS;
cred->uc_gid = acred->gid;
for (i = 0; i < groups; i++)
cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
if (i < NFS_NGROUPS)
cred->uc_gids[i] = INVALID_GID;
return &cred->uc_base;
}
static void
unx_free_cred(struct unx_cred *unx_cred)
{
dprintk("RPC: unx_free_cred %p\n", unx_cred);
kfree(unx_cred);
}
static void
unx_free_cred_callback(struct rcu_head *head)
{
struct unx_cred *unx_cred = container_of(head, struct unx_cred, uc_base.cr_rcu);
unx_free_cred(unx_cred);
}
static void
unx_destroy_cred(struct rpc_cred *cred)
{
call_rcu(&cred->cr_rcu, unx_free_cred_callback);
}
/*
* Match credentials against current process creds.
* The root_override argument takes care of cases where the caller may
* request root creds (e.g. for NFS swapping).
*/
static int
unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags)
{
struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base);
unsigned int groups = 0;
unsigned int i;
if (!uid_eq(cred->uc_uid, acred->uid) || !gid_eq(cred->uc_gid, acred->gid))
return 0;
if (acred->group_info != NULL)
groups = acred->group_info->ngroups;
if (groups > NFS_NGROUPS)
groups = NFS_NGROUPS;
for (i = 0; i < groups ; i++)
if (!gid_eq(cred->uc_gids[i], GROUP_AT(acred->group_info, i)))
return 0;
if (groups < NFS_NGROUPS && gid_valid(cred->uc_gids[groups]))
return 0;
return 1;
}
/*
* Marshal credentials.
* Maybe we should keep a cached credential for performance reasons.
*/
static __be32 *
unx_marshal(struct rpc_task *task, __be32 *p)
{
struct rpc_clnt *clnt = task->tk_client;
struct unx_cred *cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base);
__be32 *base, *hold;
int i;
*p++ = htonl(RPC_AUTH_UNIX);
base = p++;
*p++ = htonl(jiffies/HZ);
/*
* Copy the UTS nodename captured when the client was created.
*/
p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
*p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid));
*p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid));
hold = p++;
for (i = 0; i < 16 && gid_valid(cred->uc_gids[i]); i++)
*p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i]));
*hold = htonl(p - hold - 1); /* gid array length */
*base = htonl((p - base - 1) << 2); /* cred length */
*p++ = htonl(RPC_AUTH_NULL);
*p++ = htonl(0);
return p;
}
/*
* Refresh credentials. This is a no-op for AUTH_UNIX
*/
static int
unx_refresh(struct rpc_task *task)
{
set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags);
return 0;
}
static __be32 *
unx_validate(struct rpc_task *task, __be32 *p)
{
rpc_authflavor_t flavor;
u32 size;
flavor = ntohl(*p++);
if (flavor != RPC_AUTH_NULL &&
flavor != RPC_AUTH_UNIX &&
flavor != RPC_AUTH_SHORT) {
printk("RPC: bad verf flavor: %u\n", flavor);
return ERR_PTR(-EIO);
}
size = ntohl(*p++);
if (size > RPC_MAX_AUTH_SIZE) {
printk("RPC: giant verf size: %u\n", size);
return ERR_PTR(-EIO);
}
task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2;
p += (size >> 2);
return p;
}
int __init rpc_init_authunix(void)
{
return rpcauth_init_credcache(&unix_auth);
}
void rpc_destroy_authunix(void)
{
rpcauth_destroy_credcache(&unix_auth);
}
const struct rpc_authops authunix_ops = {
.owner = THIS_MODULE,
.au_flavor = RPC_AUTH_UNIX,
.au_name = "UNIX",
.create = unx_create,
.destroy = unx_destroy,
.lookup_cred = unx_lookup_cred,
.crcreate = unx_create_cred,
};
static
struct rpc_auth unix_auth = {
.au_cslack = UNX_WRITESLACK,
.au_rslack = 2, /* assume AUTH_NULL verf */
.au_ops = &authunix_ops,
.au_flavor = RPC_AUTH_UNIX,
.au_count = ATOMIC_INIT(0),
};
static
const struct rpc_credops unix_credops = {
.cr_name = "AUTH_UNIX",
.crdestroy = unx_destroy_cred,
.crbind = rpcauth_generic_bind_cred,
.crmatch = unx_match,
.crmarshal = unx_marshal,
.crrefresh = unx_refresh,
.crvalidate = unx_validate,
};

View file

@ -0,0 +1,325 @@
/******************************************************************************
(c) 2007 Network Appliance, Inc. All Rights Reserved.
(c) 2009 NetApp. All Rights Reserved.
NetApp provides this source code under the GPL v2 License.
The GPL v2 license is available at
http://opensource.org/licenses/gpl-license.php.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
#include <linux/tcp.h>
#include <linux/slab.h>
#include <linux/sunrpc/xprt.h>
#include <linux/export.h>
#include <linux/sunrpc/bc_xprt.h>
#ifdef RPC_DEBUG
#define RPCDBG_FACILITY RPCDBG_TRANS
#endif
/*
* Helper routines that track the number of preallocation elements
* on the transport.
*/
static inline int xprt_need_to_requeue(struct rpc_xprt *xprt)
{
return xprt->bc_alloc_count > 0;
}
static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n)
{
xprt->bc_alloc_count += n;
}
static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n)
{
return xprt->bc_alloc_count -= n;
}
/*
* Free the preallocated rpc_rqst structure and the memory
* buffers hanging off of it.
*/
static void xprt_free_allocation(struct rpc_rqst *req)
{
struct xdr_buf *xbufp;
dprintk("RPC: free allocations for req= %p\n", req);
WARN_ON_ONCE(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
xbufp = &req->rq_private_buf;
free_page((unsigned long)xbufp->head[0].iov_base);
xbufp = &req->rq_snd_buf;
free_page((unsigned long)xbufp->head[0].iov_base);
kfree(req);
}
/*
* Preallocate up to min_reqs structures and related buffers for use
* by the backchannel. This function can be called multiple times
* when creating new sessions that use the same rpc_xprt. The
* preallocated buffers are added to the pool of resources used by
* the rpc_xprt. Anyone of these resources may be used used by an
* incoming callback request. It's up to the higher levels in the
* stack to enforce that the maximum number of session slots is not
* being exceeded.
*
* Some callback arguments can be large. For example, a pNFS server
* using multiple deviceids. The list can be unbound, but the client
* has the ability to tell the server the maximum size of the callback
* requests. Each deviceID is 16 bytes, so allocate one page
* for the arguments to have enough room to receive a number of these
* deviceIDs. The NFS client indicates to the pNFS server that its
* callback requests can be up to 4096 bytes in size.
*/
int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
{
struct page *page_rcv = NULL, *page_snd = NULL;
struct xdr_buf *xbufp = NULL;
struct rpc_rqst *req, *tmp;
struct list_head tmp_list;
int i;
dprintk("RPC: setup backchannel transport\n");
/*
* We use a temporary list to keep track of the preallocated
* buffers. Once we're done building the list we splice it
* into the backchannel preallocation list off of the rpc_xprt
* struct. This helps minimize the amount of time the list
* lock is held on the rpc_xprt struct. It also makes cleanup
* easier in case of memory allocation errors.
*/
INIT_LIST_HEAD(&tmp_list);
for (i = 0; i < min_reqs; i++) {
/* Pre-allocate one backchannel rpc_rqst */
req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL);
if (req == NULL) {
printk(KERN_ERR "Failed to create bc rpc_rqst\n");
goto out_free;
}
/* Add the allocated buffer to the tmp list */
dprintk("RPC: adding req= %p\n", req);
list_add(&req->rq_bc_pa_list, &tmp_list);
req->rq_xprt = xprt;
INIT_LIST_HEAD(&req->rq_list);
INIT_LIST_HEAD(&req->rq_bc_list);
/* Preallocate one XDR receive buffer */
page_rcv = alloc_page(GFP_KERNEL);
if (page_rcv == NULL) {
printk(KERN_ERR "Failed to create bc receive xbuf\n");
goto out_free;
}
xbufp = &req->rq_rcv_buf;
xbufp->head[0].iov_base = page_address(page_rcv);
xbufp->head[0].iov_len = PAGE_SIZE;
xbufp->tail[0].iov_base = NULL;
xbufp->tail[0].iov_len = 0;
xbufp->page_len = 0;
xbufp->len = PAGE_SIZE;
xbufp->buflen = PAGE_SIZE;
/* Preallocate one XDR send buffer */
page_snd = alloc_page(GFP_KERNEL);
if (page_snd == NULL) {
printk(KERN_ERR "Failed to create bc snd xbuf\n");
goto out_free;
}
xbufp = &req->rq_snd_buf;
xbufp->head[0].iov_base = page_address(page_snd);
xbufp->head[0].iov_len = 0;
xbufp->tail[0].iov_base = NULL;
xbufp->tail[0].iov_len = 0;
xbufp->page_len = 0;
xbufp->len = 0;
xbufp->buflen = PAGE_SIZE;
}
/*
* Add the temporary list to the backchannel preallocation list
*/
spin_lock_bh(&xprt->bc_pa_lock);
list_splice(&tmp_list, &xprt->bc_pa_list);
xprt_inc_alloc_count(xprt, min_reqs);
spin_unlock_bh(&xprt->bc_pa_lock);
dprintk("RPC: setup backchannel transport done\n");
return 0;
out_free:
/*
* Memory allocation failed, free the temporary list
*/
list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list) {
list_del(&req->rq_bc_pa_list);
xprt_free_allocation(req);
}
dprintk("RPC: setup backchannel transport failed\n");
return -ENOMEM;
}
EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
/**
* xprt_destroy_backchannel - Destroys the backchannel preallocated structures.
* @xprt: the transport holding the preallocated strucures
* @max_reqs the maximum number of preallocated structures to destroy
*
* Since these structures may have been allocated by multiple calls
* to xprt_setup_backchannel, we only destroy up to the maximum number
* of reqs specified by the caller.
*/
void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs)
{
struct rpc_rqst *req = NULL, *tmp = NULL;
dprintk("RPC: destroy backchannel transport\n");
if (max_reqs == 0)
goto out;
spin_lock_bh(&xprt->bc_pa_lock);
xprt_dec_alloc_count(xprt, max_reqs);
list_for_each_entry_safe(req, tmp, &xprt->bc_pa_list, rq_bc_pa_list) {
dprintk("RPC: req=%p\n", req);
list_del(&req->rq_bc_pa_list);
xprt_free_allocation(req);
if (--max_reqs == 0)
break;
}
spin_unlock_bh(&xprt->bc_pa_lock);
out:
dprintk("RPC: backchannel list empty= %s\n",
list_empty(&xprt->bc_pa_list) ? "true" : "false");
}
EXPORT_SYMBOL_GPL(xprt_destroy_backchannel);
static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid)
{
struct rpc_rqst *req = NULL;
dprintk("RPC: allocate a backchannel request\n");
if (list_empty(&xprt->bc_pa_list))
goto not_found;
req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst,
rq_bc_pa_list);
req->rq_reply_bytes_recvd = 0;
req->rq_bytes_sent = 0;
memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
sizeof(req->rq_private_buf));
req->rq_xid = xid;
req->rq_connect_cookie = xprt->connect_cookie;
not_found:
dprintk("RPC: backchannel req=%p\n", req);
return req;
}
/*
* Return the preallocated rpc_rqst structure and XDR buffers
* associated with this rpc_task.
*/
void xprt_free_bc_request(struct rpc_rqst *req)
{
struct rpc_xprt *xprt = req->rq_xprt;
dprintk("RPC: free backchannel req=%p\n", req);
req->rq_connect_cookie = xprt->connect_cookie - 1;
smp_mb__before_atomic();
WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
smp_mb__after_atomic();
if (!xprt_need_to_requeue(xprt)) {
/*
* The last remaining session was destroyed while this
* entry was in use. Free the entry and don't attempt
* to add back to the list because there is no need to
* have anymore preallocated entries.
*/
dprintk("RPC: Last session removed req=%p\n", req);
xprt_free_allocation(req);
return;
}
/*
* Return it to the list of preallocations so that it
* may be reused by a new callback request.
*/
spin_lock_bh(&xprt->bc_pa_lock);
list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
spin_unlock_bh(&xprt->bc_pa_lock);
}
/*
* One or more rpc_rqst structure have been preallocated during the
* backchannel setup. Buffer space for the send and private XDR buffers
* has been preallocated as well. Use xprt_alloc_bc_request to allocate
* to this request. Use xprt_free_bc_request to return it.
*
* We know that we're called in soft interrupt context, grab the spin_lock
* since there is no need to grab the bottom half spin_lock.
*
* Return an available rpc_rqst, otherwise NULL if non are available.
*/
struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid)
{
struct rpc_rqst *req;
spin_lock(&xprt->bc_pa_lock);
list_for_each_entry(req, &xprt->bc_pa_list, rq_bc_pa_list) {
if (req->rq_connect_cookie != xprt->connect_cookie)
continue;
if (req->rq_xid == xid)
goto found;
}
req = xprt_alloc_bc_request(xprt, xid);
found:
spin_unlock(&xprt->bc_pa_lock);
return req;
}
/*
* Add callback request to callback list. The callback
* service sleeps on the sv_cb_waitq waiting for new
* requests. Wake it up after adding enqueing the
* request.
*/
void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
{
struct rpc_xprt *xprt = req->rq_xprt;
struct svc_serv *bc_serv = xprt->bc_serv;
spin_lock(&xprt->bc_pa_lock);
list_del(&req->rq_bc_pa_list);
spin_unlock(&xprt->bc_pa_lock);
req->rq_private_buf.len = copied;
set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
dprintk("RPC: add callback request to list\n");
spin_lock(&bc_serv->sv_cb_lock);
list_add(&req->rq_bc_list, &bc_serv->sv_cb_list);
wake_up(&bc_serv->sv_cb_waitq);
spin_unlock(&bc_serv->sv_cb_lock);
}

63
net/sunrpc/bc_svc.c Normal file
View file

@ -0,0 +1,63 @@
/******************************************************************************
(c) 2007 Network Appliance, Inc. All Rights Reserved.
(c) 2009 NetApp. All Rights Reserved.
NetApp provides this source code under the GPL v2 License.
The GPL v2 license is available at
http://opensource.org/licenses/gpl-license.php.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
/*
* The NFSv4.1 callback service helper routines.
* They implement the transport level processing required to send the
* reply over an existing open connection previously established by the client.
*/
#include <linux/module.h>
#include <linux/sunrpc/xprt.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/bc_xprt.h>
#define RPCDBG_FACILITY RPCDBG_SVCDSP
/* Empty callback ops */
static const struct rpc_call_ops nfs41_callback_ops = {
};
/*
* Send the callback reply
*/
int bc_send(struct rpc_rqst *req)
{
struct rpc_task *task;
int ret;
dprintk("RPC: bc_send req= %p\n", req);
task = rpc_run_bc_task(req, &nfs41_callback_ops);
if (IS_ERR(task))
ret = PTR_ERR(task);
else {
WARN_ON_ONCE(atomic_read(&task->tk_count) != 1);
ret = task->tk_status;
rpc_put_task(task);
}
dprintk("RPC: bc_send ret= %d\n", ret);
return ret;
}

1838
net/sunrpc/cache.c Normal file

File diff suppressed because it is too large Load diff

2469
net/sunrpc/clnt.c Normal file

File diff suppressed because it is too large Load diff

42
net/sunrpc/netns.h Normal file
View file

@ -0,0 +1,42 @@
#ifndef __SUNRPC_NETNS_H__
#define __SUNRPC_NETNS_H__
#include <net/net_namespace.h>
#include <net/netns/generic.h>
struct cache_detail;
struct sunrpc_net {
struct proc_dir_entry *proc_net_rpc;
struct cache_detail *ip_map_cache;
struct cache_detail *unix_gid_cache;
struct cache_detail *rsc_cache;
struct cache_detail *rsi_cache;
struct super_block *pipefs_sb;
struct rpc_pipe *gssd_dummy;
struct mutex pipefs_sb_lock;
struct list_head all_clients;
spinlock_t rpc_client_lock;
struct rpc_clnt *rpcb_local_clnt;
struct rpc_clnt *rpcb_local_clnt4;
spinlock_t rpcb_clnt_lock;
unsigned int rpcb_users;
unsigned int rpcb_is_af_local : 1;
struct mutex gssp_lock;
struct rpc_clnt *gssp_clnt;
int use_gss_proxy;
int pipe_version;
atomic_t pipe_users;
struct proc_dir_entry *use_gssp_proc;
};
extern int sunrpc_net_id;
int ip_map_cache_create(struct net *);
void ip_map_cache_destroy(struct net *);
#endif

1527
net/sunrpc/rpc_pipe.c Normal file

File diff suppressed because it is too large Load diff

1151
net/sunrpc/rpcb_clnt.c Normal file

File diff suppressed because it is too large Load diff

1140
net/sunrpc/sched.c Normal file

File diff suppressed because it is too large Load diff

187
net/sunrpc/socklib.c Normal file
View file

@ -0,0 +1,187 @@
/*
* linux/net/sunrpc/socklib.c
*
* Common socket helper routines for RPC client and server
*
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
*/
#include <linux/compiler.h>
#include <linux/netdevice.h>
#include <linux/gfp.h>
#include <linux/skbuff.h>
#include <linux/types.h>
#include <linux/pagemap.h>
#include <linux/udp.h>
#include <linux/sunrpc/xdr.h>
#include <linux/export.h>
/**
* xdr_skb_read_bits - copy some data bits from skb to internal buffer
* @desc: sk_buff copy helper
* @to: copy destination
* @len: number of bytes to copy
*
* Possibly called several times to iterate over an sk_buff and copy
* data out of it.
*/
size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
{
if (len > desc->count)
len = desc->count;
if (unlikely(skb_copy_bits(desc->skb, desc->offset, to, len)))
return 0;
desc->count -= len;
desc->offset += len;
return len;
}
EXPORT_SYMBOL_GPL(xdr_skb_read_bits);
/**
* xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer
* @desc: sk_buff copy helper
* @to: copy destination
* @len: number of bytes to copy
*
* Same as skb_read_bits, but calculate a checksum at the same time.
*/
static size_t xdr_skb_read_and_csum_bits(struct xdr_skb_reader *desc, void *to, size_t len)
{
unsigned int pos;
__wsum csum2;
if (len > desc->count)
len = desc->count;
pos = desc->offset;
csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len, 0);
desc->csum = csum_block_add(desc->csum, csum2, pos);
desc->count -= len;
desc->offset += len;
return len;
}
/**
* xdr_partial_copy_from_skb - copy data out of an skb
* @xdr: target XDR buffer
* @base: starting offset
* @desc: sk_buff copy helper
* @copy_actor: virtual method for copying data
*
*/
ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor)
{
struct page **ppage = xdr->pages;
unsigned int len, pglen = xdr->page_len;
ssize_t copied = 0;
size_t ret;
len = xdr->head[0].iov_len;
if (base < len) {
len -= base;
ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len);
copied += ret;
if (ret != len || !desc->count)
goto out;
base = 0;
} else
base -= len;
if (unlikely(pglen == 0))
goto copy_tail;
if (unlikely(base >= pglen)) {
base -= pglen;
goto copy_tail;
}
if (base || xdr->page_base) {
pglen -= base;
base += xdr->page_base;
ppage += base >> PAGE_CACHE_SHIFT;
base &= ~PAGE_CACHE_MASK;
}
do {
char *kaddr;
/* ACL likes to be lazy in allocating pages - ACLs
* are small by default but can get huge. */
if (unlikely(*ppage == NULL)) {
*ppage = alloc_page(GFP_ATOMIC);
if (unlikely(*ppage == NULL)) {
if (copied == 0)
copied = -ENOMEM;
goto out;
}
}
len = PAGE_CACHE_SIZE;
kaddr = kmap_atomic(*ppage);
if (base) {
len -= base;
if (pglen < len)
len = pglen;
ret = copy_actor(desc, kaddr + base, len);
base = 0;
} else {
if (pglen < len)
len = pglen;
ret = copy_actor(desc, kaddr, len);
}
flush_dcache_page(*ppage);
kunmap_atomic(kaddr);
copied += ret;
if (ret != len || !desc->count)
goto out;
ppage++;
} while ((pglen -= len) != 0);
copy_tail:
len = xdr->tail[0].iov_len;
if (base < len)
copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base);
out:
return copied;
}
EXPORT_SYMBOL_GPL(xdr_partial_copy_from_skb);
/**
* csum_partial_copy_to_xdr - checksum and copy data
* @xdr: target XDR buffer
* @skb: source skb
*
* We have set things up such that we perform the checksum of the UDP
* packet in parallel with the copies into the RPC client iovec. -DaveM
*/
int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
{
struct xdr_skb_reader desc;
desc.skb = skb;
desc.offset = sizeof(struct udphdr);
desc.count = skb->len - desc.offset;
if (skb_csum_unnecessary(skb))
goto no_checksum;
desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_and_csum_bits) < 0)
return -1;
if (desc.offset != skb->len) {
__wsum csum2;
csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0);
desc.csum = csum_block_add(desc.csum, csum2, desc.offset);
}
if (desc.count)
return -1;
if (csum_fold(desc.csum))
return -1;
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
!skb->csum_complete_sw)
netdev_rx_csum_fault(skb->dev);
return 0;
no_checksum:
if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0)
return -1;
if (desc.count)
return -1;
return 0;
}
EXPORT_SYMBOL_GPL(csum_partial_copy_to_xdr);

282
net/sunrpc/stats.c Normal file
View file

@ -0,0 +1,282 @@
/*
* linux/net/sunrpc/stats.c
*
* procfs-based user access to generic RPC statistics. The stats files
* reside in /proc/net/rpc.
*
* The read routines assume that the buffer passed in is just big enough.
* If you implement an RPC service that has its own stats routine which
* appends the generic RPC stats, make sure you don't exceed the PAGE_SIZE
* limit.
*
* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
*/
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/metrics.h>
#include <linux/rcupdate.h>
#include "netns.h"
#define RPCDBG_FACILITY RPCDBG_MISC
/*
* Get RPC client stats
*/
static int rpc_proc_show(struct seq_file *seq, void *v) {
const struct rpc_stat *statp = seq->private;
const struct rpc_program *prog = statp->program;
unsigned int i, j;
seq_printf(seq,
"net %u %u %u %u\n",
statp->netcnt,
statp->netudpcnt,
statp->nettcpcnt,
statp->nettcpconn);
seq_printf(seq,
"rpc %u %u %u\n",
statp->rpccnt,
statp->rpcretrans,
statp->rpcauthrefresh);
for (i = 0; i < prog->nrvers; i++) {
const struct rpc_version *vers = prog->version[i];
if (!vers)
continue;
seq_printf(seq, "proc%u %u",
vers->number, vers->nrprocs);
for (j = 0; j < vers->nrprocs; j++)
seq_printf(seq, " %u",
vers->procs[j].p_count);
seq_putc(seq, '\n');
}
return 0;
}
static int rpc_proc_open(struct inode *inode, struct file *file)
{
return single_open(file, rpc_proc_show, PDE_DATA(inode));
}
static const struct file_operations rpc_proc_fops = {
.owner = THIS_MODULE,
.open = rpc_proc_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
/*
* Get RPC server stats
*/
void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
const struct svc_program *prog = statp->program;
const struct svc_procedure *proc;
const struct svc_version *vers;
unsigned int i, j;
seq_printf(seq,
"net %u %u %u %u\n",
statp->netcnt,
statp->netudpcnt,
statp->nettcpcnt,
statp->nettcpconn);
seq_printf(seq,
"rpc %u %u %u %u %u\n",
statp->rpccnt,
statp->rpcbadfmt+statp->rpcbadauth+statp->rpcbadclnt,
statp->rpcbadfmt,
statp->rpcbadauth,
statp->rpcbadclnt);
for (i = 0; i < prog->pg_nvers; i++) {
if (!(vers = prog->pg_vers[i]) || !(proc = vers->vs_proc))
continue;
seq_printf(seq, "proc%d %u", i, vers->vs_nproc);
for (j = 0; j < vers->vs_nproc; j++, proc++)
seq_printf(seq, " %u", proc->pc_count);
seq_putc(seq, '\n');
}
}
EXPORT_SYMBOL_GPL(svc_seq_show);
/**
* rpc_alloc_iostats - allocate an rpc_iostats structure
* @clnt: RPC program, version, and xprt
*
*/
struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt)
{
return kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL);
}
EXPORT_SYMBOL_GPL(rpc_alloc_iostats);
/**
* rpc_free_iostats - release an rpc_iostats structure
* @stats: doomed rpc_iostats structure
*
*/
void rpc_free_iostats(struct rpc_iostats *stats)
{
kfree(stats);
}
EXPORT_SYMBOL_GPL(rpc_free_iostats);
/**
* rpc_count_iostats - tally up per-task stats
* @task: completed rpc_task
* @stats: array of stat structures
*
* Relies on the caller for serialization.
*/
void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_iostats *op_metrics;
ktime_t delta;
if (!stats || !req)
return;
op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx];
op_metrics->om_ops++;
op_metrics->om_ntrans += req->rq_ntrans;
op_metrics->om_timeouts += task->tk_timeouts;
op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent;
op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd;
delta = ktime_sub(req->rq_xtime, task->tk_start);
op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta);
op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt);
delta = ktime_sub(ktime_get(), task->tk_start);
op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta);
}
EXPORT_SYMBOL_GPL(rpc_count_iostats);
static void _print_name(struct seq_file *seq, unsigned int op,
struct rpc_procinfo *procs)
{
if (procs[op].p_name)
seq_printf(seq, "\t%12s: ", procs[op].p_name);
else if (op == 0)
seq_printf(seq, "\t NULL: ");
else
seq_printf(seq, "\t%12u: ", op);
}
void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
{
struct rpc_iostats *stats = clnt->cl_metrics;
struct rpc_xprt *xprt;
unsigned int op, maxproc = clnt->cl_maxproc;
if (!stats)
return;
seq_printf(seq, "\tRPC iostats version: %s ", RPC_IOSTATS_VERS);
seq_printf(seq, "p/v: %u/%u (%s)\n",
clnt->cl_prog, clnt->cl_vers, clnt->cl_program->name);
rcu_read_lock();
xprt = rcu_dereference(clnt->cl_xprt);
if (xprt)
xprt->ops->print_stats(xprt, seq);
rcu_read_unlock();
seq_printf(seq, "\tper-op statistics\n");
for (op = 0; op < maxproc; op++) {
struct rpc_iostats *metrics = &stats[op];
_print_name(seq, op, clnt->cl_procinfo);
seq_printf(seq, "%lu %lu %lu %Lu %Lu %Lu %Lu %Lu\n",
metrics->om_ops,
metrics->om_ntrans,
metrics->om_timeouts,
metrics->om_bytes_sent,
metrics->om_bytes_recv,
ktime_to_ms(metrics->om_queue),
ktime_to_ms(metrics->om_rtt),
ktime_to_ms(metrics->om_execute));
}
}
EXPORT_SYMBOL_GPL(rpc_print_iostats);
/*
* Register/unregister RPC proc files
*/
static inline struct proc_dir_entry *
do_register(struct net *net, const char *name, void *data,
const struct file_operations *fops)
{
struct sunrpc_net *sn;
dprintk("RPC: registering /proc/net/rpc/%s\n", name);
sn = net_generic(net, sunrpc_net_id);
return proc_create_data(name, 0, sn->proc_net_rpc, fops, data);
}
struct proc_dir_entry *
rpc_proc_register(struct net *net, struct rpc_stat *statp)
{
return do_register(net, statp->program->name, statp, &rpc_proc_fops);
}
EXPORT_SYMBOL_GPL(rpc_proc_register);
void
rpc_proc_unregister(struct net *net, const char *name)
{
struct sunrpc_net *sn;
sn = net_generic(net, sunrpc_net_id);
remove_proc_entry(name, sn->proc_net_rpc);
}
EXPORT_SYMBOL_GPL(rpc_proc_unregister);
struct proc_dir_entry *
svc_proc_register(struct net *net, struct svc_stat *statp, const struct file_operations *fops)
{
return do_register(net, statp->program->pg_name, statp, fops);
}
EXPORT_SYMBOL_GPL(svc_proc_register);
void
svc_proc_unregister(struct net *net, const char *name)
{
struct sunrpc_net *sn;
sn = net_generic(net, sunrpc_net_id);
remove_proc_entry(name, sn->proc_net_rpc);
}
EXPORT_SYMBOL_GPL(svc_proc_unregister);
int rpc_proc_init(struct net *net)
{
struct sunrpc_net *sn;
dprintk("RPC: registering /proc/net/rpc\n");
sn = net_generic(net, sunrpc_net_id);
sn->proc_net_rpc = proc_mkdir("rpc", net->proc_net);
if (sn->proc_net_rpc == NULL)
return -ENOMEM;
return 0;
}
void rpc_proc_exit(struct net *net)
{
dprintk("RPC: unregistering /proc/net/rpc\n");
remove_proc_entry("rpc", net->proc_net);
}

66
net/sunrpc/sunrpc.h Normal file
View file

@ -0,0 +1,66 @@
/******************************************************************************
(c) 2008 NetApp. All Rights Reserved.
NetApp provides this source code under the GPL v2 License.
The GPL v2 license is available at
http://opensource.org/licenses/gpl-license.php.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
/*
* Functions and macros used internally by RPC
*/
#ifndef _NET_SUNRPC_SUNRPC_H
#define _NET_SUNRPC_SUNRPC_H
#include <linux/net.h>
/*
* Header for dynamically allocated rpc buffers.
*/
struct rpc_buffer {
size_t len;
char data[];
};
static inline int rpc_reply_expected(struct rpc_task *task)
{
return (task->tk_msg.rpc_proc != NULL) &&
(task->tk_msg.rpc_proc->p_decode != NULL);
}
static inline int sock_is_loopback(struct sock *sk)
{
struct dst_entry *dst;
int loopback = 0;
rcu_read_lock();
dst = rcu_dereference(sk->sk_dst_cache);
if (dst && dst->dev &&
(dst->dev->features & NETIF_F_LOOPBACK))
loopback = 1;
rcu_read_unlock();
return loopback;
}
int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
struct page *headpage, unsigned long headoffset,
struct page *tailpage, unsigned long tailoffset);
int rpc_clients_notifier_register(void);
void rpc_clients_notifier_unregister(void);
#endif /* _NET_SUNRPC_SUNRPC_H */

133
net/sunrpc/sunrpc_syms.c Normal file
View file

@ -0,0 +1,133 @@
/*
* linux/net/sunrpc/sunrpc_syms.c
*
* Symbols exported by the sunrpc module.
*
* Copyright (C) 1997 Olaf Kirch <okir@monad.swb.de>
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/uio.h>
#include <linux/unistd.h>
#include <linux/init.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/auth.h>
#include <linux/workqueue.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/sunrpc/xprtsock.h>
#include "netns.h"
int sunrpc_net_id;
EXPORT_SYMBOL_GPL(sunrpc_net_id);
static __net_init int sunrpc_init_net(struct net *net)
{
int err;
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
err = rpc_proc_init(net);
if (err)
goto err_proc;
err = ip_map_cache_create(net);
if (err)
goto err_ipmap;
err = unix_gid_cache_create(net);
if (err)
goto err_unixgid;
err = rpc_pipefs_init_net(net);
if (err)
goto err_pipefs;
INIT_LIST_HEAD(&sn->all_clients);
spin_lock_init(&sn->rpc_client_lock);
spin_lock_init(&sn->rpcb_clnt_lock);
return 0;
err_pipefs:
unix_gid_cache_destroy(net);
err_unixgid:
ip_map_cache_destroy(net);
err_ipmap:
rpc_proc_exit(net);
err_proc:
return err;
}
static __net_exit void sunrpc_exit_net(struct net *net)
{
rpc_pipefs_exit_net(net);
unix_gid_cache_destroy(net);
ip_map_cache_destroy(net);
rpc_proc_exit(net);
}
static struct pernet_operations sunrpc_net_ops = {
.init = sunrpc_init_net,
.exit = sunrpc_exit_net,
.id = &sunrpc_net_id,
.size = sizeof(struct sunrpc_net),
};
static int __init
init_sunrpc(void)
{
int err = rpc_init_mempool();
if (err)
goto out;
err = rpcauth_init_module();
if (err)
goto out2;
cache_initialize();
err = register_pernet_subsys(&sunrpc_net_ops);
if (err)
goto out3;
err = register_rpc_pipefs();
if (err)
goto out4;
#ifdef RPC_DEBUG
rpc_register_sysctl();
#endif
svc_init_xprt_sock(); /* svc sock transport */
init_socket_xprt(); /* clnt sock transport */
return 0;
out4:
unregister_pernet_subsys(&sunrpc_net_ops);
out3:
rpcauth_remove_module();
out2:
rpc_destroy_mempool();
out:
return err;
}
static void __exit
cleanup_sunrpc(void)
{
rpcauth_remove_module();
cleanup_socket_xprt();
svc_cleanup_xprt_sock();
unregister_rpc_pipefs();
rpc_destroy_mempool();
unregister_pernet_subsys(&sunrpc_net_ops);
#ifdef RPC_DEBUG
rpc_unregister_sysctl();
#endif
rcu_barrier(); /* Wait for completion of call_rcu()'s */
}
MODULE_LICENSE("GPL");
fs_initcall(init_sunrpc); /* Ensure we're initialised before nfs */
module_exit(cleanup_sunrpc);

1400
net/sunrpc/svc.c Normal file

File diff suppressed because it is too large Load diff

1329
net/sunrpc/svc_xprt.c Normal file

File diff suppressed because it is too large Load diff

166
net/sunrpc/svcauth.c Normal file
View file

@ -0,0 +1,166 @@
/*
* linux/net/sunrpc/svcauth.c
*
* The generic interface for RPC authentication on the server side.
*
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
*
* CHANGES
* 19-Apr-2000 Chris Evans - Security fix
*/
#include <linux/types.h>
#include <linux/module.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/svcauth.h>
#include <linux/err.h>
#include <linux/hash.h>
#define RPCDBG_FACILITY RPCDBG_AUTH
/*
* Table of authenticators
*/
extern struct auth_ops svcauth_null;
extern struct auth_ops svcauth_unix;
static DEFINE_SPINLOCK(authtab_lock);
static struct auth_ops *authtab[RPC_AUTH_MAXFLAVOR] = {
[0] = &svcauth_null,
[1] = &svcauth_unix,
};
int
svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
{
rpc_authflavor_t flavor;
struct auth_ops *aops;
*authp = rpc_auth_ok;
flavor = svc_getnl(&rqstp->rq_arg.head[0]);
dprintk("svc: svc_authenticate (%d)\n", flavor);
spin_lock(&authtab_lock);
if (flavor >= RPC_AUTH_MAXFLAVOR || !(aops = authtab[flavor]) ||
!try_module_get(aops->owner)) {
spin_unlock(&authtab_lock);
*authp = rpc_autherr_badcred;
return SVC_DENIED;
}
spin_unlock(&authtab_lock);
rqstp->rq_auth_slack = 0;
rqstp->rq_authop = aops;
return aops->accept(rqstp, authp);
}
EXPORT_SYMBOL_GPL(svc_authenticate);
int svc_set_client(struct svc_rqst *rqstp)
{
return rqstp->rq_authop->set_client(rqstp);
}
EXPORT_SYMBOL_GPL(svc_set_client);
/* A request, which was authenticated, has now executed.
* Time to finalise the credentials and verifier
* and release and resources
*/
int svc_authorise(struct svc_rqst *rqstp)
{
struct auth_ops *aops = rqstp->rq_authop;
int rv = 0;
rqstp->rq_authop = NULL;
if (aops) {
rv = aops->release(rqstp);
module_put(aops->owner);
}
return rv;
}
int
svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops)
{
int rv = -EINVAL;
spin_lock(&authtab_lock);
if (flavor < RPC_AUTH_MAXFLAVOR && authtab[flavor] == NULL) {
authtab[flavor] = aops;
rv = 0;
}
spin_unlock(&authtab_lock);
return rv;
}
EXPORT_SYMBOL_GPL(svc_auth_register);
void
svc_auth_unregister(rpc_authflavor_t flavor)
{
spin_lock(&authtab_lock);
if (flavor < RPC_AUTH_MAXFLAVOR)
authtab[flavor] = NULL;
spin_unlock(&authtab_lock);
}
EXPORT_SYMBOL_GPL(svc_auth_unregister);
/**************************************************
* 'auth_domains' are stored in a hash table indexed by name.
* When the last reference to an 'auth_domain' is dropped,
* the object is unhashed and freed.
* If auth_domain_lookup fails to find an entry, it will return
* it's second argument 'new'. If this is non-null, it will
* have been atomically linked into the table.
*/
#define DN_HASHBITS 6
#define DN_HASHMAX (1<<DN_HASHBITS)
static struct hlist_head auth_domain_table[DN_HASHMAX];
static spinlock_t auth_domain_lock =
__SPIN_LOCK_UNLOCKED(auth_domain_lock);
void auth_domain_put(struct auth_domain *dom)
{
if (atomic_dec_and_lock(&dom->ref.refcount, &auth_domain_lock)) {
hlist_del(&dom->hash);
dom->flavour->domain_release(dom);
spin_unlock(&auth_domain_lock);
}
}
EXPORT_SYMBOL_GPL(auth_domain_put);
struct auth_domain *
auth_domain_lookup(char *name, struct auth_domain *new)
{
struct auth_domain *hp;
struct hlist_head *head;
head = &auth_domain_table[hash_str(name, DN_HASHBITS)];
spin_lock(&auth_domain_lock);
hlist_for_each_entry(hp, head, hash) {
if (strcmp(hp->name, name)==0) {
kref_get(&hp->ref);
spin_unlock(&auth_domain_lock);
return hp;
}
}
if (new)
hlist_add_head(&new->hash, head);
spin_unlock(&auth_domain_lock);
return new;
}
EXPORT_SYMBOL_GPL(auth_domain_lookup);
struct auth_domain *auth_domain_find(char *name)
{
return auth_domain_lookup(name, NULL);
}
EXPORT_SYMBOL_GPL(auth_domain_find);

914
net/sunrpc/svcauth_unix.c Normal file
View file

@ -0,0 +1,914 @@
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/svcauth.h>
#include <linux/sunrpc/gss_api.h>
#include <linux/sunrpc/addr.h>
#include <linux/err.h>
#include <linux/seq_file.h>
#include <linux/hash.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <net/sock.h>
#include <net/ipv6.h>
#include <linux/kernel.h>
#include <linux/user_namespace.h>
#define RPCDBG_FACILITY RPCDBG_AUTH
#include "netns.h"
/*
* AUTHUNIX and AUTHNULL credentials are both handled here.
* AUTHNULL is treated just like AUTHUNIX except that the uid/gid
* are always nobody (-2). i.e. we do the same IP address checks for
* AUTHNULL as for AUTHUNIX, and that is done here.
*/
struct unix_domain {
struct auth_domain h;
/* other stuff later */
};
extern struct auth_ops svcauth_null;
extern struct auth_ops svcauth_unix;
static void svcauth_unix_domain_release(struct auth_domain *dom)
{
struct unix_domain *ud = container_of(dom, struct unix_domain, h);
kfree(dom->name);
kfree(ud);
}
struct auth_domain *unix_domain_find(char *name)
{
struct auth_domain *rv;
struct unix_domain *new = NULL;
rv = auth_domain_lookup(name, NULL);
while(1) {
if (rv) {
if (new && rv != &new->h)
svcauth_unix_domain_release(&new->h);
if (rv->flavour != &svcauth_unix) {
auth_domain_put(rv);
return NULL;
}
return rv;
}
new = kmalloc(sizeof(*new), GFP_KERNEL);
if (new == NULL)
return NULL;
kref_init(&new->h.ref);
new->h.name = kstrdup(name, GFP_KERNEL);
if (new->h.name == NULL) {
kfree(new);
return NULL;
}
new->h.flavour = &svcauth_unix;
rv = auth_domain_lookup(name, &new->h);
}
}
EXPORT_SYMBOL_GPL(unix_domain_find);
/**************************************************
* cache for IP address to unix_domain
* as needed by AUTH_UNIX
*/
#define IP_HASHBITS 8
#define IP_HASHMAX (1<<IP_HASHBITS)
struct ip_map {
struct cache_head h;
char m_class[8]; /* e.g. "nfsd" */
struct in6_addr m_addr;
struct unix_domain *m_client;
};
static void ip_map_put(struct kref *kref)
{
struct cache_head *item = container_of(kref, struct cache_head, ref);
struct ip_map *im = container_of(item, struct ip_map,h);
if (test_bit(CACHE_VALID, &item->flags) &&
!test_bit(CACHE_NEGATIVE, &item->flags))
auth_domain_put(&im->m_client->h);
kfree(im);
}
static inline int hash_ip6(const struct in6_addr *ip)
{
return hash_32(ipv6_addr_hash(ip), IP_HASHBITS);
}
static int ip_map_match(struct cache_head *corig, struct cache_head *cnew)
{
struct ip_map *orig = container_of(corig, struct ip_map, h);
struct ip_map *new = container_of(cnew, struct ip_map, h);
return strcmp(orig->m_class, new->m_class) == 0 &&
ipv6_addr_equal(&orig->m_addr, &new->m_addr);
}
static void ip_map_init(struct cache_head *cnew, struct cache_head *citem)
{
struct ip_map *new = container_of(cnew, struct ip_map, h);
struct ip_map *item = container_of(citem, struct ip_map, h);
strcpy(new->m_class, item->m_class);
new->m_addr = item->m_addr;
}
static void update(struct cache_head *cnew, struct cache_head *citem)
{
struct ip_map *new = container_of(cnew, struct ip_map, h);
struct ip_map *item = container_of(citem, struct ip_map, h);
kref_get(&item->m_client->h.ref);
new->m_client = item->m_client;
}
static struct cache_head *ip_map_alloc(void)
{
struct ip_map *i = kmalloc(sizeof(*i), GFP_KERNEL);
if (i)
return &i->h;
else
return NULL;
}
static void ip_map_request(struct cache_detail *cd,
struct cache_head *h,
char **bpp, int *blen)
{
char text_addr[40];
struct ip_map *im = container_of(h, struct ip_map, h);
if (ipv6_addr_v4mapped(&(im->m_addr))) {
snprintf(text_addr, 20, "%pI4", &im->m_addr.s6_addr32[3]);
} else {
snprintf(text_addr, 40, "%pI6", &im->m_addr);
}
qword_add(bpp, blen, im->m_class);
qword_add(bpp, blen, text_addr);
(*bpp)[-1] = '\n';
}
static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct in6_addr *addr);
static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, struct unix_domain *udom, time_t expiry);
static int ip_map_parse(struct cache_detail *cd,
char *mesg, int mlen)
{
/* class ipaddress [domainname] */
/* should be safe just to use the start of the input buffer
* for scratch: */
char *buf = mesg;
int len;
char class[8];
union {
struct sockaddr sa;
struct sockaddr_in s4;
struct sockaddr_in6 s6;
} address;
struct sockaddr_in6 sin6;
int err;
struct ip_map *ipmp;
struct auth_domain *dom;
time_t expiry;
if (mesg[mlen-1] != '\n')
return -EINVAL;
mesg[mlen-1] = 0;
/* class */
len = qword_get(&mesg, class, sizeof(class));
if (len <= 0) return -EINVAL;
/* ip address */
len = qword_get(&mesg, buf, mlen);
if (len <= 0) return -EINVAL;
if (rpc_pton(cd->net, buf, len, &address.sa, sizeof(address)) == 0)
return -EINVAL;
switch (address.sa.sa_family) {
case AF_INET:
/* Form a mapped IPv4 address in sin6 */
sin6.sin6_family = AF_INET6;
ipv6_addr_set_v4mapped(address.s4.sin_addr.s_addr,
&sin6.sin6_addr);
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
memcpy(&sin6, &address.s6, sizeof(sin6));
break;
#endif
default:
return -EINVAL;
}
expiry = get_expiry(&mesg);
if (expiry ==0)
return -EINVAL;
/* domainname, or empty for NEGATIVE */
len = qword_get(&mesg, buf, mlen);
if (len < 0) return -EINVAL;
if (len) {
dom = unix_domain_find(buf);
if (dom == NULL)
return -ENOENT;
} else
dom = NULL;
/* IPv6 scope IDs are ignored for now */
ipmp = __ip_map_lookup(cd, class, &sin6.sin6_addr);
if (ipmp) {
err = __ip_map_update(cd, ipmp,
container_of(dom, struct unix_domain, h),
expiry);
} else
err = -ENOMEM;
if (dom)
auth_domain_put(dom);
cache_flush();
return err;
}
static int ip_map_show(struct seq_file *m,
struct cache_detail *cd,
struct cache_head *h)
{
struct ip_map *im;
struct in6_addr addr;
char *dom = "-no-domain-";
if (h == NULL) {
seq_puts(m, "#class IP domain\n");
return 0;
}
im = container_of(h, struct ip_map, h);
/* class addr domain */
addr = im->m_addr;
if (test_bit(CACHE_VALID, &h->flags) &&
!test_bit(CACHE_NEGATIVE, &h->flags))
dom = im->m_client->h.name;
if (ipv6_addr_v4mapped(&addr)) {
seq_printf(m, "%s %pI4 %s\n",
im->m_class, &addr.s6_addr32[3], dom);
} else {
seq_printf(m, "%s %pI6 %s\n", im->m_class, &addr, dom);
}
return 0;
}
static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class,
struct in6_addr *addr)
{
struct ip_map ip;
struct cache_head *ch;
strcpy(ip.m_class, class);
ip.m_addr = *addr;
ch = sunrpc_cache_lookup(cd, &ip.h,
hash_str(class, IP_HASHBITS) ^
hash_ip6(addr));
if (ch)
return container_of(ch, struct ip_map, h);
else
return NULL;
}
static inline struct ip_map *ip_map_lookup(struct net *net, char *class,
struct in6_addr *addr)
{
struct sunrpc_net *sn;
sn = net_generic(net, sunrpc_net_id);
return __ip_map_lookup(sn->ip_map_cache, class, addr);
}
static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm,
struct unix_domain *udom, time_t expiry)
{
struct ip_map ip;
struct cache_head *ch;
ip.m_client = udom;
ip.h.flags = 0;
if (!udom)
set_bit(CACHE_NEGATIVE, &ip.h.flags);
ip.h.expiry_time = expiry;
ch = sunrpc_cache_update(cd, &ip.h, &ipm->h,
hash_str(ipm->m_class, IP_HASHBITS) ^
hash_ip6(&ipm->m_addr));
if (!ch)
return -ENOMEM;
cache_put(ch, cd);
return 0;
}
static inline int ip_map_update(struct net *net, struct ip_map *ipm,
struct unix_domain *udom, time_t expiry)
{
struct sunrpc_net *sn;
sn = net_generic(net, sunrpc_net_id);
return __ip_map_update(sn->ip_map_cache, ipm, udom, expiry);
}
void svcauth_unix_purge(struct net *net)
{
struct sunrpc_net *sn;
sn = net_generic(net, sunrpc_net_id);
cache_purge(sn->ip_map_cache);
}
EXPORT_SYMBOL_GPL(svcauth_unix_purge);
static inline struct ip_map *
ip_map_cached_get(struct svc_xprt *xprt)
{
struct ip_map *ipm = NULL;
struct sunrpc_net *sn;
if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
spin_lock(&xprt->xpt_lock);
ipm = xprt->xpt_auth_cache;
if (ipm != NULL) {
sn = net_generic(xprt->xpt_net, sunrpc_net_id);
if (cache_is_expired(sn->ip_map_cache, &ipm->h)) {
/*
* The entry has been invalidated since it was
* remembered, e.g. by a second mount from the
* same IP address.
*/
xprt->xpt_auth_cache = NULL;
spin_unlock(&xprt->xpt_lock);
cache_put(&ipm->h, sn->ip_map_cache);
return NULL;
}
cache_get(&ipm->h);
}
spin_unlock(&xprt->xpt_lock);
}
return ipm;
}
static inline void
ip_map_cached_put(struct svc_xprt *xprt, struct ip_map *ipm)
{
if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
spin_lock(&xprt->xpt_lock);
if (xprt->xpt_auth_cache == NULL) {
/* newly cached, keep the reference */
xprt->xpt_auth_cache = ipm;
ipm = NULL;
}
spin_unlock(&xprt->xpt_lock);
}
if (ipm) {
struct sunrpc_net *sn;
sn = net_generic(xprt->xpt_net, sunrpc_net_id);
cache_put(&ipm->h, sn->ip_map_cache);
}
}
void
svcauth_unix_info_release(struct svc_xprt *xpt)
{
struct ip_map *ipm;
ipm = xpt->xpt_auth_cache;
if (ipm != NULL) {
struct sunrpc_net *sn;
sn = net_generic(xpt->xpt_net, sunrpc_net_id);
cache_put(&ipm->h, sn->ip_map_cache);
}
}
/****************************************************************************
* auth.unix.gid cache
* simple cache to map a UID to a list of GIDs
* because AUTH_UNIX aka AUTH_SYS has a max of 16
*/
#define GID_HASHBITS 8
#define GID_HASHMAX (1<<GID_HASHBITS)
struct unix_gid {
struct cache_head h;
kuid_t uid;
struct group_info *gi;
};
static int unix_gid_hash(kuid_t uid)
{
return hash_long(from_kuid(&init_user_ns, uid), GID_HASHBITS);
}
static void unix_gid_put(struct kref *kref)
{
struct cache_head *item = container_of(kref, struct cache_head, ref);
struct unix_gid *ug = container_of(item, struct unix_gid, h);
if (test_bit(CACHE_VALID, &item->flags) &&
!test_bit(CACHE_NEGATIVE, &item->flags))
put_group_info(ug->gi);
kfree(ug);
}
static int unix_gid_match(struct cache_head *corig, struct cache_head *cnew)
{
struct unix_gid *orig = container_of(corig, struct unix_gid, h);
struct unix_gid *new = container_of(cnew, struct unix_gid, h);
return uid_eq(orig->uid, new->uid);
}
static void unix_gid_init(struct cache_head *cnew, struct cache_head *citem)
{
struct unix_gid *new = container_of(cnew, struct unix_gid, h);
struct unix_gid *item = container_of(citem, struct unix_gid, h);
new->uid = item->uid;
}
static void unix_gid_update(struct cache_head *cnew, struct cache_head *citem)
{
struct unix_gid *new = container_of(cnew, struct unix_gid, h);
struct unix_gid *item = container_of(citem, struct unix_gid, h);
get_group_info(item->gi);
new->gi = item->gi;
}
static struct cache_head *unix_gid_alloc(void)
{
struct unix_gid *g = kmalloc(sizeof(*g), GFP_KERNEL);
if (g)
return &g->h;
else
return NULL;
}
static void unix_gid_request(struct cache_detail *cd,
struct cache_head *h,
char **bpp, int *blen)
{
char tuid[20];
struct unix_gid *ug = container_of(h, struct unix_gid, h);
snprintf(tuid, 20, "%u", from_kuid(&init_user_ns, ug->uid));
qword_add(bpp, blen, tuid);
(*bpp)[-1] = '\n';
}
static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, kuid_t uid);
static int unix_gid_parse(struct cache_detail *cd,
char *mesg, int mlen)
{
/* uid expiry Ngid gid0 gid1 ... gidN-1 */
int id;
kuid_t uid;
int gids;
int rv;
int i;
int err;
time_t expiry;
struct unix_gid ug, *ugp;
if (mesg[mlen - 1] != '\n')
return -EINVAL;
mesg[mlen-1] = 0;
rv = get_int(&mesg, &id);
if (rv)
return -EINVAL;
uid = make_kuid(&init_user_ns, id);
ug.uid = uid;
expiry = get_expiry(&mesg);
if (expiry == 0)
return -EINVAL;
rv = get_int(&mesg, &gids);
if (rv || gids < 0 || gids > 8192)
return -EINVAL;
ug.gi = groups_alloc(gids);
if (!ug.gi)
return -ENOMEM;
for (i = 0 ; i < gids ; i++) {
int gid;
kgid_t kgid;
rv = get_int(&mesg, &gid);
err = -EINVAL;
if (rv)
goto out;
kgid = make_kgid(&init_user_ns, gid);
if (!gid_valid(kgid))
goto out;
GROUP_AT(ug.gi, i) = kgid;
}
ugp = unix_gid_lookup(cd, uid);
if (ugp) {
struct cache_head *ch;
ug.h.flags = 0;
ug.h.expiry_time = expiry;
ch = sunrpc_cache_update(cd,
&ug.h, &ugp->h,
unix_gid_hash(uid));
if (!ch)
err = -ENOMEM;
else {
err = 0;
cache_put(ch, cd);
}
} else
err = -ENOMEM;
out:
if (ug.gi)
put_group_info(ug.gi);
return err;
}
static int unix_gid_show(struct seq_file *m,
struct cache_detail *cd,
struct cache_head *h)
{
struct user_namespace *user_ns = &init_user_ns;
struct unix_gid *ug;
int i;
int glen;
if (h == NULL) {
seq_puts(m, "#uid cnt: gids...\n");
return 0;
}
ug = container_of(h, struct unix_gid, h);
if (test_bit(CACHE_VALID, &h->flags) &&
!test_bit(CACHE_NEGATIVE, &h->flags))
glen = ug->gi->ngroups;
else
glen = 0;
seq_printf(m, "%u %d:", from_kuid_munged(user_ns, ug->uid), glen);
for (i = 0; i < glen; i++)
seq_printf(m, " %d", from_kgid_munged(user_ns, GROUP_AT(ug->gi, i)));
seq_printf(m, "\n");
return 0;
}
static struct cache_detail unix_gid_cache_template = {
.owner = THIS_MODULE,
.hash_size = GID_HASHMAX,
.name = "auth.unix.gid",
.cache_put = unix_gid_put,
.cache_request = unix_gid_request,
.cache_parse = unix_gid_parse,
.cache_show = unix_gid_show,
.match = unix_gid_match,
.init = unix_gid_init,
.update = unix_gid_update,
.alloc = unix_gid_alloc,
};
int unix_gid_cache_create(struct net *net)
{
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
struct cache_detail *cd;
int err;
cd = cache_create_net(&unix_gid_cache_template, net);
if (IS_ERR(cd))
return PTR_ERR(cd);
err = cache_register_net(cd, net);
if (err) {
cache_destroy_net(cd, net);
return err;
}
sn->unix_gid_cache = cd;
return 0;
}
void unix_gid_cache_destroy(struct net *net)
{
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
struct cache_detail *cd = sn->unix_gid_cache;
sn->unix_gid_cache = NULL;
cache_purge(cd);
cache_unregister_net(cd, net);
cache_destroy_net(cd, net);
}
static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, kuid_t uid)
{
struct unix_gid ug;
struct cache_head *ch;
ug.uid = uid;
ch = sunrpc_cache_lookup(cd, &ug.h, unix_gid_hash(uid));
if (ch)
return container_of(ch, struct unix_gid, h);
else
return NULL;
}
static struct group_info *unix_gid_find(kuid_t uid, struct svc_rqst *rqstp)
{
struct unix_gid *ug;
struct group_info *gi;
int ret;
struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net,
sunrpc_net_id);
ug = unix_gid_lookup(sn->unix_gid_cache, uid);
if (!ug)
return ERR_PTR(-EAGAIN);
ret = cache_check(sn->unix_gid_cache, &ug->h, &rqstp->rq_chandle);
switch (ret) {
case -ENOENT:
return ERR_PTR(-ENOENT);
case -ETIMEDOUT:
return ERR_PTR(-ESHUTDOWN);
case 0:
gi = get_group_info(ug->gi);
cache_put(&ug->h, sn->unix_gid_cache);
return gi;
default:
return ERR_PTR(-EAGAIN);
}
}
int
svcauth_unix_set_client(struct svc_rqst *rqstp)
{
struct sockaddr_in *sin;
struct sockaddr_in6 *sin6, sin6_storage;
struct ip_map *ipm;
struct group_info *gi;
struct svc_cred *cred = &rqstp->rq_cred;
struct svc_xprt *xprt = rqstp->rq_xprt;
struct net *net = xprt->xpt_net;
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
switch (rqstp->rq_addr.ss_family) {
case AF_INET:
sin = svc_addr_in(rqstp);
sin6 = &sin6_storage;
ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &sin6->sin6_addr);
break;
case AF_INET6:
sin6 = svc_addr_in6(rqstp);
break;
default:
BUG();
}
rqstp->rq_client = NULL;
if (rqstp->rq_proc == 0)
return SVC_OK;
ipm = ip_map_cached_get(xprt);
if (ipm == NULL)
ipm = __ip_map_lookup(sn->ip_map_cache, rqstp->rq_server->sv_program->pg_class,
&sin6->sin6_addr);
if (ipm == NULL)
return SVC_DENIED;
switch (cache_check(sn->ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
default:
BUG();
case -ETIMEDOUT:
return SVC_CLOSE;
case -EAGAIN:
return SVC_DROP;
case -ENOENT:
return SVC_DENIED;
case 0:
rqstp->rq_client = &ipm->m_client->h;
kref_get(&rqstp->rq_client->ref);
ip_map_cached_put(xprt, ipm);
break;
}
gi = unix_gid_find(cred->cr_uid, rqstp);
switch (PTR_ERR(gi)) {
case -EAGAIN:
return SVC_DROP;
case -ESHUTDOWN:
return SVC_CLOSE;
case -ENOENT:
break;
default:
put_group_info(cred->cr_group_info);
cred->cr_group_info = gi;
}
return SVC_OK;
}
EXPORT_SYMBOL_GPL(svcauth_unix_set_client);
static int
svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
struct svc_cred *cred = &rqstp->rq_cred;
cred->cr_group_info = NULL;
cred->cr_principal = NULL;
rqstp->rq_client = NULL;
if (argv->iov_len < 3*4)
return SVC_GARBAGE;
if (svc_getu32(argv) != 0) {
dprintk("svc: bad null cred\n");
*authp = rpc_autherr_badcred;
return SVC_DENIED;
}
if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
dprintk("svc: bad null verf\n");
*authp = rpc_autherr_badverf;
return SVC_DENIED;
}
/* Signal that mapping to nobody uid/gid is required */
cred->cr_uid = INVALID_UID;
cred->cr_gid = INVALID_GID;
cred->cr_group_info = groups_alloc(0);
if (cred->cr_group_info == NULL)
return SVC_CLOSE; /* kmalloc failure - client must retry */
/* Put NULL verifier */
svc_putnl(resv, RPC_AUTH_NULL);
svc_putnl(resv, 0);
rqstp->rq_cred.cr_flavor = RPC_AUTH_NULL;
return SVC_OK;
}
static int
svcauth_null_release(struct svc_rqst *rqstp)
{
if (rqstp->rq_client)
auth_domain_put(rqstp->rq_client);
rqstp->rq_client = NULL;
if (rqstp->rq_cred.cr_group_info)
put_group_info(rqstp->rq_cred.cr_group_info);
rqstp->rq_cred.cr_group_info = NULL;
return 0; /* don't drop */
}
struct auth_ops svcauth_null = {
.name = "null",
.owner = THIS_MODULE,
.flavour = RPC_AUTH_NULL,
.accept = svcauth_null_accept,
.release = svcauth_null_release,
.set_client = svcauth_unix_set_client,
};
static int
svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
struct svc_cred *cred = &rqstp->rq_cred;
u32 slen, i;
int len = argv->iov_len;
cred->cr_group_info = NULL;
cred->cr_principal = NULL;
rqstp->rq_client = NULL;
if ((len -= 3*4) < 0)
return SVC_GARBAGE;
svc_getu32(argv); /* length */
svc_getu32(argv); /* time stamp */
slen = XDR_QUADLEN(svc_getnl(argv)); /* machname length */
if (slen > 64 || (len -= (slen + 3)*4) < 0)
goto badcred;
argv->iov_base = (void*)((__be32*)argv->iov_base + slen); /* skip machname */
argv->iov_len -= slen*4;
/*
* Note: we skip uid_valid()/gid_valid() checks here for
* backwards compatibility with clients that use -1 id's.
* Instead, -1 uid or gid is later mapped to the
* (export-specific) anonymous id by nfsd_setuser.
* Supplementary gid's will be left alone.
*/
cred->cr_uid = make_kuid(&init_user_ns, svc_getnl(argv)); /* uid */
cred->cr_gid = make_kgid(&init_user_ns, svc_getnl(argv)); /* gid */
slen = svc_getnl(argv); /* gids length */
if (slen > 16 || (len -= (slen + 2)*4) < 0)
goto badcred;
cred->cr_group_info = groups_alloc(slen);
if (cred->cr_group_info == NULL)
return SVC_CLOSE;
for (i = 0; i < slen; i++) {
kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv));
GROUP_AT(cred->cr_group_info, i) = kgid;
}
if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
*authp = rpc_autherr_badverf;
return SVC_DENIED;
}
/* Put NULL verifier */
svc_putnl(resv, RPC_AUTH_NULL);
svc_putnl(resv, 0);
rqstp->rq_cred.cr_flavor = RPC_AUTH_UNIX;
return SVC_OK;
badcred:
*authp = rpc_autherr_badcred;
return SVC_DENIED;
}
static int
svcauth_unix_release(struct svc_rqst *rqstp)
{
/* Verifier (such as it is) is already in place.
*/
if (rqstp->rq_client)
auth_domain_put(rqstp->rq_client);
rqstp->rq_client = NULL;
if (rqstp->rq_cred.cr_group_info)
put_group_info(rqstp->rq_cred.cr_group_info);
rqstp->rq_cred.cr_group_info = NULL;
return 0;
}
struct auth_ops svcauth_unix = {
.name = "unix",
.owner = THIS_MODULE,
.flavour = RPC_AUTH_UNIX,
.accept = svcauth_unix_accept,
.release = svcauth_unix_release,
.domain_release = svcauth_unix_domain_release,
.set_client = svcauth_unix_set_client,
};
static struct cache_detail ip_map_cache_template = {
.owner = THIS_MODULE,
.hash_size = IP_HASHMAX,
.name = "auth.unix.ip",
.cache_put = ip_map_put,
.cache_request = ip_map_request,
.cache_parse = ip_map_parse,
.cache_show = ip_map_show,
.match = ip_map_match,
.init = ip_map_init,
.update = update,
.alloc = ip_map_alloc,
};
int ip_map_cache_create(struct net *net)
{
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
struct cache_detail *cd;
int err;
cd = cache_create_net(&ip_map_cache_template, net);
if (IS_ERR(cd))
return PTR_ERR(cd);
err = cache_register_net(cd, net);
if (err) {
cache_destroy_net(cd, net);
return err;
}
sn->ip_map_cache = cd;
return 0;
}
void ip_map_cache_destroy(struct net *net)
{
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
struct cache_detail *cd = sn->ip_map_cache;
sn->ip_map_cache = NULL;
cache_purge(cd);
cache_unregister_net(cd, net);
cache_destroy_net(cd, net);
}

1669
net/sunrpc/svcsock.c Normal file

File diff suppressed because it is too large Load diff

185
net/sunrpc/sysctl.c Normal file
View file

@ -0,0 +1,185 @@
/*
* linux/net/sunrpc/sysctl.c
*
* Sysctl interface to sunrpc module.
*
* I would prefer to register the sunrpc table below sys/net, but that's
* impossible at the moment.
*/
#include <linux/types.h>
#include <linux/linkage.h>
#include <linux/ctype.h>
#include <linux/fs.h>
#include <linux/sysctl.h>
#include <linux/module.h>
#include <asm/uaccess.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/svc_xprt.h>
#include "netns.h"
/*
* Declare the debug flags here
*/
unsigned int rpc_debug;
EXPORT_SYMBOL_GPL(rpc_debug);
unsigned int nfs_debug;
EXPORT_SYMBOL_GPL(nfs_debug);
unsigned int nfsd_debug;
EXPORT_SYMBOL_GPL(nfsd_debug);
unsigned int nlm_debug;
EXPORT_SYMBOL_GPL(nlm_debug);
#ifdef RPC_DEBUG
static struct ctl_table_header *sunrpc_table_header;
static struct ctl_table sunrpc_table[];
void
rpc_register_sysctl(void)
{
if (!sunrpc_table_header)
sunrpc_table_header = register_sysctl_table(sunrpc_table);
}
void
rpc_unregister_sysctl(void)
{
if (sunrpc_table_header) {
unregister_sysctl_table(sunrpc_table_header);
sunrpc_table_header = NULL;
}
}
static int proc_do_xprt(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
char tmpbuf[256];
size_t len;
if ((*ppos && !write) || !*lenp) {
*lenp = 0;
return 0;
}
len = svc_print_xprts(tmpbuf, sizeof(tmpbuf));
return simple_read_from_buffer(buffer, *lenp, ppos, tmpbuf, len);
}
static int
proc_dodebug(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
char tmpbuf[20], c, *s;
char __user *p;
unsigned int value;
size_t left, len;
if ((*ppos && !write) || !*lenp) {
*lenp = 0;
return 0;
}
left = *lenp;
if (write) {
if (!access_ok(VERIFY_READ, buffer, left))
return -EFAULT;
p = buffer;
while (left && __get_user(c, p) >= 0 && isspace(c))
left--, p++;
if (!left)
goto done;
if (left > sizeof(tmpbuf) - 1)
return -EINVAL;
if (copy_from_user(tmpbuf, p, left))
return -EFAULT;
tmpbuf[left] = '\0';
for (s = tmpbuf, value = 0; '0' <= *s && *s <= '9'; s++, left--)
value = 10 * value + (*s - '0');
if (*s && !isspace(*s))
return -EINVAL;
while (left && isspace(*s))
left--, s++;
*(unsigned int *) table->data = value;
/* Display the RPC tasks on writing to rpc_debug */
if (strcmp(table->procname, "rpc_debug") == 0)
rpc_show_tasks(&init_net);
} else {
if (!access_ok(VERIFY_WRITE, buffer, left))
return -EFAULT;
len = sprintf(tmpbuf, "%d", *(unsigned int *) table->data);
if (len > left)
len = left;
if (__copy_to_user(buffer, tmpbuf, len))
return -EFAULT;
if ((left -= len) > 0) {
if (put_user('\n', (char __user *)buffer + len))
return -EFAULT;
left--;
}
}
done:
*lenp -= left;
*ppos += *lenp;
return 0;
}
static struct ctl_table debug_table[] = {
{
.procname = "rpc_debug",
.data = &rpc_debug,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dodebug
},
{
.procname = "nfs_debug",
.data = &nfs_debug,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dodebug
},
{
.procname = "nfsd_debug",
.data = &nfsd_debug,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dodebug
},
{
.procname = "nlm_debug",
.data = &nlm_debug,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dodebug
},
{
.procname = "transports",
.maxlen = 256,
.mode = 0444,
.proc_handler = proc_do_xprt,
},
{ }
};
static struct ctl_table sunrpc_table[] = {
{
.procname = "sunrpc",
.mode = 0555,
.child = debug_table
},
{ }
};
#endif

122
net/sunrpc/timer.c Normal file
View file

@ -0,0 +1,122 @@
/*
* linux/net/sunrpc/timer.c
*
* Estimate RPC request round trip time.
*
* Based on packet round-trip and variance estimator algorithms described
* in appendix A of "Congestion Avoidance and Control" by Van Jacobson
* and Michael J. Karels (ACM Computer Communication Review; Proceedings
* of the Sigcomm '88 Symposium in Stanford, CA, August, 1988).
*
* This RTT estimator is used only for RPC over datagram protocols.
*
* Copyright (C) 2002 Trond Myklebust <trond.myklebust@fys.uio.no>
*/
#include <asm/param.h>
#include <linux/types.h>
#include <linux/unistd.h>
#include <linux/module.h>
#include <linux/sunrpc/clnt.h>
#define RPC_RTO_MAX (60*HZ)
#define RPC_RTO_INIT (HZ/5)
#define RPC_RTO_MIN (HZ/10)
/**
* rpc_init_rtt - Initialize an RPC RTT estimator context
* @rt: context to initialize
* @timeo: initial timeout value, in jiffies
*
*/
void rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo)
{
unsigned long init = 0;
unsigned int i;
rt->timeo = timeo;
if (timeo > RPC_RTO_INIT)
init = (timeo - RPC_RTO_INIT) << 3;
for (i = 0; i < 5; i++) {
rt->srtt[i] = init;
rt->sdrtt[i] = RPC_RTO_INIT;
rt->ntimeouts[i] = 0;
}
}
EXPORT_SYMBOL_GPL(rpc_init_rtt);
/**
* rpc_update_rtt - Update an RPC RTT estimator context
* @rt: context to update
* @timer: timer array index (request type)
* @m: recent actual RTT, in jiffies
*
* NB: When computing the smoothed RTT and standard deviation,
* be careful not to produce negative intermediate results.
*/
void rpc_update_rtt(struct rpc_rtt *rt, unsigned int timer, long m)
{
long *srtt, *sdrtt;
if (timer-- == 0)
return;
/* jiffies wrapped; ignore this one */
if (m < 0)
return;
if (m == 0)
m = 1L;
srtt = (long *)&rt->srtt[timer];
m -= *srtt >> 3;
*srtt += m;
if (m < 0)
m = -m;
sdrtt = (long *)&rt->sdrtt[timer];
m -= *sdrtt >> 2;
*sdrtt += m;
/* Set lower bound on the variance */
if (*sdrtt < RPC_RTO_MIN)
*sdrtt = RPC_RTO_MIN;
}
EXPORT_SYMBOL_GPL(rpc_update_rtt);
/**
* rpc_calc_rto - Provide an estimated timeout value
* @rt: context to use for calculation
* @timer: timer array index (request type)
*
* Estimate RTO for an NFS RPC sent via an unreliable datagram. Use
* the mean and mean deviation of RTT for the appropriate type of RPC
* for frequently issued RPCs, and a fixed default for the others.
*
* The justification for doing "other" this way is that these RPCs
* happen so infrequently that timer estimation would probably be
* stale. Also, since many of these RPCs are non-idempotent, a
* conservative timeout is desired.
*
* getattr, lookup,
* read, write, commit - A+4D
* other - timeo
*/
unsigned long rpc_calc_rto(struct rpc_rtt *rt, unsigned int timer)
{
unsigned long res;
if (timer-- == 0)
return rt->timeo;
res = ((rt->srtt[timer] + 7) >> 3) + rt->sdrtt[timer];
if (res > RPC_RTO_MAX)
res = RPC_RTO_MAX;
return res;
}
EXPORT_SYMBOL_GPL(rpc_calc_rto);

1514
net/sunrpc/xdr.c Normal file

File diff suppressed because it is too large Load diff

1376
net/sunrpc/xprt.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,8 @@
obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o
xprtrdma-y := transport.o rpc_rdma.o verbs.o
obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o
svcrdma-y := svc_rdma.o svc_rdma_transport.o \
svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o

View file

@ -0,0 +1,875 @@
/*
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the BSD-type
* license below:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* Neither the name of the Network Appliance, Inc. nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* rpc_rdma.c
*
* This file contains the guts of the RPC RDMA protocol, and
* does marshaling/unmarshaling, etc. It is also where interfacing
* to the Linux RPC framework lives.
*/
#include "xprt_rdma.h"
#include <linux/highmem.h>
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
#ifdef RPC_DEBUG
static const char transfertypes[][12] = {
"pure inline", /* no chunks */
" read chunk", /* some argument via rdma read */
"*read chunk", /* entire request via rdma read */
"write chunk", /* some result via rdma write */
"reply chunk" /* entire reply via rdma write */
};
#endif
/*
* Chunk assembly from upper layer xdr_buf.
*
* Prepare the passed-in xdr_buf into representation as RPC/RDMA chunk
* elements. Segments are then coalesced when registered, if possible
* within the selected memreg mode.
*
* Returns positive number of segments converted, or a negative errno.
*/
static int
rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs)
{
int len, n = 0, p;
int page_base;
struct page **ppages;
if (pos == 0 && xdrbuf->head[0].iov_len) {
seg[n].mr_page = NULL;
seg[n].mr_offset = xdrbuf->head[0].iov_base;
seg[n].mr_len = xdrbuf->head[0].iov_len;
++n;
}
len = xdrbuf->page_len;
ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
page_base = xdrbuf->page_base & ~PAGE_MASK;
p = 0;
while (len && n < nsegs) {
if (!ppages[p]) {
/* alloc the pagelist for receiving buffer */
ppages[p] = alloc_page(GFP_ATOMIC);
if (!ppages[p])
return -ENOMEM;
}
seg[n].mr_page = ppages[p];
seg[n].mr_offset = (void *)(unsigned long) page_base;
seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len);
if (seg[n].mr_len > PAGE_SIZE)
return -EIO;
len -= seg[n].mr_len;
++n;
++p;
page_base = 0; /* page offset only applies to first page */
}
/* Message overflows the seg array */
if (len && n == nsegs)
return -EIO;
if (xdrbuf->tail[0].iov_len) {
/* the rpcrdma protocol allows us to omit any trailing
* xdr pad bytes, saving the server an RDMA operation. */
if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
return n;
if (n == nsegs)
/* Tail remains, but we're out of segments */
return -EIO;
seg[n].mr_page = NULL;
seg[n].mr_offset = xdrbuf->tail[0].iov_base;
seg[n].mr_len = xdrbuf->tail[0].iov_len;
++n;
}
return n;
}
/*
* Create read/write chunk lists, and reply chunks, for RDMA
*
* Assume check against THRESHOLD has been done, and chunks are required.
* Assume only encoding one list entry for read|write chunks. The NFSv3
* protocol is simple enough to allow this as it only has a single "bulk
* result" in each procedure - complicated NFSv4 COMPOUNDs are not. (The
* RDMA/Sessions NFSv4 proposal addresses this for future v4 revs.)
*
* When used for a single reply chunk (which is a special write
* chunk used for the entire reply, rather than just the data), it
* is used primarily for READDIR and READLINK which would otherwise
* be severely size-limited by a small rdma inline read max. The server
* response will come back as an RDMA Write, followed by a message
* of type RDMA_NOMSG carrying the xid and length. As a result, reply
* chunks do not provide data alignment, however they do not require
* "fixup" (moving the response to the upper layer buffer) either.
*
* Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
*
* Read chunklist (a linked list):
* N elements, position P (same P for all chunks of same arg!):
* 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0
*
* Write chunklist (a list of (one) counted array):
* N elements:
* 1 - N - HLOO - HLOO - ... - HLOO - 0
*
* Reply chunk (a counted array):
* N elements:
* 1 - N - HLOO - HLOO - ... - HLOO
*
* Returns positive RPC/RDMA header size, or negative errno.
*/
static ssize_t
rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type)
{
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
int n, nsegs, nchunks = 0;
unsigned int pos;
struct rpcrdma_mr_seg *seg = req->rl_segments;
struct rpcrdma_read_chunk *cur_rchunk = NULL;
struct rpcrdma_write_array *warray = NULL;
struct rpcrdma_write_chunk *cur_wchunk = NULL;
__be32 *iptr = headerp->rm_body.rm_chunks;
if (type == rpcrdma_readch || type == rpcrdma_areadch) {
/* a read chunk - server will RDMA Read our memory */
cur_rchunk = (struct rpcrdma_read_chunk *) iptr;
} else {
/* a write or reply chunk - server will RDMA Write our memory */
*iptr++ = xdr_zero; /* encode a NULL read chunk list */
if (type == rpcrdma_replych)
*iptr++ = xdr_zero; /* a NULL write chunk list */
warray = (struct rpcrdma_write_array *) iptr;
cur_wchunk = (struct rpcrdma_write_chunk *) (warray + 1);
}
if (type == rpcrdma_replych || type == rpcrdma_areadch)
pos = 0;
else
pos = target->head[0].iov_len;
nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS);
if (nsegs < 0)
return nsegs;
do {
n = rpcrdma_register_external(seg, nsegs,
cur_wchunk != NULL, r_xprt);
if (n <= 0)
goto out;
if (cur_rchunk) { /* read */
cur_rchunk->rc_discrim = xdr_one;
/* all read chunks have the same "position" */
cur_rchunk->rc_position = htonl(pos);
cur_rchunk->rc_target.rs_handle = htonl(seg->mr_rkey);
cur_rchunk->rc_target.rs_length = htonl(seg->mr_len);
xdr_encode_hyper(
(__be32 *)&cur_rchunk->rc_target.rs_offset,
seg->mr_base);
dprintk("RPC: %s: read chunk "
"elem %d@0x%llx:0x%x pos %u (%s)\n", __func__,
seg->mr_len, (unsigned long long)seg->mr_base,
seg->mr_rkey, pos, n < nsegs ? "more" : "last");
cur_rchunk++;
r_xprt->rx_stats.read_chunk_count++;
} else { /* write/reply */
cur_wchunk->wc_target.rs_handle = htonl(seg->mr_rkey);
cur_wchunk->wc_target.rs_length = htonl(seg->mr_len);
xdr_encode_hyper(
(__be32 *)&cur_wchunk->wc_target.rs_offset,
seg->mr_base);
dprintk("RPC: %s: %s chunk "
"elem %d@0x%llx:0x%x (%s)\n", __func__,
(type == rpcrdma_replych) ? "reply" : "write",
seg->mr_len, (unsigned long long)seg->mr_base,
seg->mr_rkey, n < nsegs ? "more" : "last");
cur_wchunk++;
if (type == rpcrdma_replych)
r_xprt->rx_stats.reply_chunk_count++;
else
r_xprt->rx_stats.write_chunk_count++;
r_xprt->rx_stats.total_rdma_request += seg->mr_len;
}
nchunks++;
seg += n;
nsegs -= n;
} while (nsegs);
/* success. all failures return above */
req->rl_nchunks = nchunks;
/*
* finish off header. If write, marshal discrim and nchunks.
*/
if (cur_rchunk) {
iptr = (__be32 *) cur_rchunk;
*iptr++ = xdr_zero; /* finish the read chunk list */
*iptr++ = xdr_zero; /* encode a NULL write chunk list */
*iptr++ = xdr_zero; /* encode a NULL reply chunk */
} else {
warray->wc_discrim = xdr_one;
warray->wc_nchunks = htonl(nchunks);
iptr = (__be32 *) cur_wchunk;
if (type == rpcrdma_writech) {
*iptr++ = xdr_zero; /* finish the write chunk list */
*iptr++ = xdr_zero; /* encode a NULL reply chunk */
}
}
/*
* Return header size.
*/
return (unsigned char *)iptr - (unsigned char *)headerp;
out:
if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR) {
for (pos = 0; nchunks--;)
pos += rpcrdma_deregister_external(
&req->rl_segments[pos], r_xprt);
}
return n;
}
/*
* Marshal chunks. This routine returns the header length
* consumed by marshaling.
*
* Returns positive RPC/RDMA header size, or negative errno.
*/
ssize_t
rpcrdma_marshal_chunks(struct rpc_rqst *rqst, ssize_t result)
{
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)req->rl_base;
if (req->rl_rtype != rpcrdma_noch)
result = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf,
headerp, req->rl_rtype);
else if (req->rl_wtype != rpcrdma_noch)
result = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf,
headerp, req->rl_wtype);
return result;
}
/*
* Copy write data inline.
* This function is used for "small" requests. Data which is passed
* to RPC via iovecs (or page list) is copied directly into the
* pre-registered memory buffer for this request. For small amounts
* of data, this is efficient. The cutoff value is tunable.
*/
static int
rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
{
int i, npages, curlen;
int copy_len;
unsigned char *srcp, *destp;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
int page_base;
struct page **ppages;
destp = rqst->rq_svec[0].iov_base;
curlen = rqst->rq_svec[0].iov_len;
destp += curlen;
/*
* Do optional padding where it makes sense. Alignment of write
* payload can help the server, if our setting is accurate.
*/
pad -= (curlen + 36/*sizeof(struct rpcrdma_msg_padded)*/);
if (pad < 0 || rqst->rq_slen - curlen < RPCRDMA_INLINE_PAD_THRESH)
pad = 0; /* don't pad this request */
dprintk("RPC: %s: pad %d destp 0x%p len %d hdrlen %d\n",
__func__, pad, destp, rqst->rq_slen, curlen);
copy_len = rqst->rq_snd_buf.page_len;
if (rqst->rq_snd_buf.tail[0].iov_len) {
curlen = rqst->rq_snd_buf.tail[0].iov_len;
if (destp + copy_len != rqst->rq_snd_buf.tail[0].iov_base) {
memmove(destp + copy_len,
rqst->rq_snd_buf.tail[0].iov_base, curlen);
r_xprt->rx_stats.pullup_copy_count += curlen;
}
dprintk("RPC: %s: tail destp 0x%p len %d\n",
__func__, destp + copy_len, curlen);
rqst->rq_svec[0].iov_len += curlen;
}
r_xprt->rx_stats.pullup_copy_count += copy_len;
page_base = rqst->rq_snd_buf.page_base;
ppages = rqst->rq_snd_buf.pages + (page_base >> PAGE_SHIFT);
page_base &= ~PAGE_MASK;
npages = PAGE_ALIGN(page_base+copy_len) >> PAGE_SHIFT;
for (i = 0; copy_len && i < npages; i++) {
curlen = PAGE_SIZE - page_base;
if (curlen > copy_len)
curlen = copy_len;
dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n",
__func__, i, destp, copy_len, curlen);
srcp = kmap_atomic(ppages[i]);
memcpy(destp, srcp+page_base, curlen);
kunmap_atomic(srcp);
rqst->rq_svec[0].iov_len += curlen;
destp += curlen;
copy_len -= curlen;
page_base = 0;
}
/* header now contains entire send message */
return pad;
}
/*
* Marshal a request: the primary job of this routine is to choose
* the transfer modes. See comments below.
*
* Uses multiple RDMA IOVs for a request:
* [0] -- RPC RDMA header, which uses memory from the *start* of the
* preregistered buffer that already holds the RPC data in
* its middle.
* [1] -- the RPC header/data, marshaled by RPC and the NFS protocol.
* [2] -- optional padding.
* [3] -- if padded, header only in [1] and data here.
*
* Returns zero on success, otherwise a negative errno.
*/
int
rpcrdma_marshal_req(struct rpc_rqst *rqst)
{
struct rpc_xprt *xprt = rqst->rq_xprt;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
char *base;
size_t rpclen, padlen;
ssize_t hdrlen;
struct rpcrdma_msg *headerp;
/*
* rpclen gets amount of data in first buffer, which is the
* pre-registered buffer.
*/
base = rqst->rq_svec[0].iov_base;
rpclen = rqst->rq_svec[0].iov_len;
/* build RDMA header in private area at front */
headerp = (struct rpcrdma_msg *) req->rl_base;
/* don't htonl XID, it's already done in request */
headerp->rm_xid = rqst->rq_xid;
headerp->rm_vers = xdr_one;
headerp->rm_credit = htonl(r_xprt->rx_buf.rb_max_requests);
headerp->rm_type = htonl(RDMA_MSG);
/*
* Chunks needed for results?
*
* o If the expected result is under the inline threshold, all ops
* return as inline (but see later).
* o Large non-read ops return as a single reply chunk.
* o Large read ops return data as write chunk(s), header as inline.
*
* Note: the NFS code sending down multiple result segments implies
* the op is one of read, readdir[plus], readlink or NFSv4 getacl.
*/
/*
* This code can handle read chunks, write chunks OR reply
* chunks -- only one type. If the request is too big to fit
* inline, then we will choose read chunks. If the request is
* a READ, then use write chunks to separate the file data
* into pages; otherwise use reply chunks.
*/
if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst))
req->rl_wtype = rpcrdma_noch;
else if (rqst->rq_rcv_buf.page_len == 0)
req->rl_wtype = rpcrdma_replych;
else if (rqst->rq_rcv_buf.flags & XDRBUF_READ)
req->rl_wtype = rpcrdma_writech;
else
req->rl_wtype = rpcrdma_replych;
/*
* Chunks needed for arguments?
*
* o If the total request is under the inline threshold, all ops
* are sent as inline.
* o Large non-write ops are sent with the entire message as a
* single read chunk (protocol 0-position special case).
* o Large write ops transmit data as read chunk(s), header as
* inline.
*
* Note: the NFS code sending down multiple argument segments
* implies the op is a write.
* TBD check NFSv4 setacl
*/
if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
req->rl_rtype = rpcrdma_noch;
else if (rqst->rq_snd_buf.page_len == 0)
req->rl_rtype = rpcrdma_areadch;
else
req->rl_rtype = rpcrdma_readch;
/* The following simplification is not true forever */
if (req->rl_rtype != rpcrdma_noch && req->rl_wtype == rpcrdma_replych)
req->rl_wtype = rpcrdma_noch;
if (req->rl_rtype != rpcrdma_noch && req->rl_wtype != rpcrdma_noch) {
dprintk("RPC: %s: cannot marshal multiple chunk lists\n",
__func__);
return -EIO;
}
hdrlen = 28; /*sizeof *headerp;*/
padlen = 0;
/*
* Pull up any extra send data into the preregistered buffer.
* When padding is in use and applies to the transfer, insert
* it and change the message type.
*/
if (req->rl_rtype == rpcrdma_noch) {
padlen = rpcrdma_inline_pullup(rqst,
RPCRDMA_INLINE_PAD_VALUE(rqst));
if (padlen) {
headerp->rm_type = htonl(RDMA_MSGP);
headerp->rm_body.rm_padded.rm_align =
htonl(RPCRDMA_INLINE_PAD_VALUE(rqst));
headerp->rm_body.rm_padded.rm_thresh =
htonl(RPCRDMA_INLINE_PAD_THRESH);
headerp->rm_body.rm_padded.rm_pempty[0] = xdr_zero;
headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
hdrlen += 2 * sizeof(u32); /* extra words in padhdr */
if (req->rl_wtype != rpcrdma_noch) {
dprintk("RPC: %s: invalid chunk list\n",
__func__);
return -EIO;
}
} else {
headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero;
headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero;
headerp->rm_body.rm_nochunks.rm_empty[2] = xdr_zero;
/* new length after pullup */
rpclen = rqst->rq_svec[0].iov_len;
/*
* Currently we try to not actually use read inline.
* Reply chunks have the desirable property that
* they land, packed, directly in the target buffers
* without headers, so they require no fixup. The
* additional RDMA Write op sends the same amount
* of data, streams on-the-wire and adds no overhead
* on receive. Therefore, we request a reply chunk
* for non-writes wherever feasible and efficient.
*/
if (req->rl_wtype == rpcrdma_noch)
req->rl_wtype = rpcrdma_replych;
}
}
hdrlen = rpcrdma_marshal_chunks(rqst, hdrlen);
if (hdrlen < 0)
return hdrlen;
dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd"
" headerp 0x%p base 0x%p lkey 0x%x\n",
__func__, transfertypes[req->rl_wtype], hdrlen, rpclen, padlen,
headerp, base, req->rl_iov.lkey);
/*
* initialize send_iov's - normally only two: rdma chunk header and
* single preregistered RPC header buffer, but if padding is present,
* then use a preregistered (and zeroed) pad buffer between the RPC
* header and any write data. In all non-rdma cases, any following
* data has been copied into the RPC header buffer.
*/
req->rl_send_iov[0].addr = req->rl_iov.addr;
req->rl_send_iov[0].length = hdrlen;
req->rl_send_iov[0].lkey = req->rl_iov.lkey;
req->rl_send_iov[1].addr = req->rl_iov.addr + (base - req->rl_base);
req->rl_send_iov[1].length = rpclen;
req->rl_send_iov[1].lkey = req->rl_iov.lkey;
req->rl_niovs = 2;
if (padlen) {
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
req->rl_send_iov[2].addr = ep->rep_pad.addr;
req->rl_send_iov[2].length = padlen;
req->rl_send_iov[2].lkey = ep->rep_pad.lkey;
req->rl_send_iov[3].addr = req->rl_send_iov[1].addr + rpclen;
req->rl_send_iov[3].length = rqst->rq_slen - rpclen;
req->rl_send_iov[3].lkey = req->rl_iov.lkey;
req->rl_niovs = 4;
}
return 0;
}
/*
* Chase down a received write or reply chunklist to get length
* RDMA'd by server. See map at rpcrdma_create_chunks()! :-)
*/
static int
rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __be32 **iptrp)
{
unsigned int i, total_len;
struct rpcrdma_write_chunk *cur_wchunk;
i = ntohl(**iptrp); /* get array count */
if (i > max)
return -1;
cur_wchunk = (struct rpcrdma_write_chunk *) (*iptrp + 1);
total_len = 0;
while (i--) {
struct rpcrdma_segment *seg = &cur_wchunk->wc_target;
ifdebug(FACILITY) {
u64 off;
xdr_decode_hyper((__be32 *)&seg->rs_offset, &off);
dprintk("RPC: %s: chunk %d@0x%llx:0x%x\n",
__func__,
ntohl(seg->rs_length),
(unsigned long long)off,
ntohl(seg->rs_handle));
}
total_len += ntohl(seg->rs_length);
++cur_wchunk;
}
/* check and adjust for properly terminated write chunk */
if (wrchunk) {
__be32 *w = (__be32 *) cur_wchunk;
if (*w++ != xdr_zero)
return -1;
cur_wchunk = (struct rpcrdma_write_chunk *) w;
}
if ((char *) cur_wchunk > rep->rr_base + rep->rr_len)
return -1;
*iptrp = (__be32 *) cur_wchunk;
return total_len;
}
/*
* Scatter inline received data back into provided iov's.
*/
static void
rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
{
int i, npages, curlen, olen;
char *destp;
struct page **ppages;
int page_base;
curlen = rqst->rq_rcv_buf.head[0].iov_len;
if (curlen > copy_len) { /* write chunk header fixup */
curlen = copy_len;
rqst->rq_rcv_buf.head[0].iov_len = curlen;
}
dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n",
__func__, srcp, copy_len, curlen);
/* Shift pointer for first receive segment only */
rqst->rq_rcv_buf.head[0].iov_base = srcp;
srcp += curlen;
copy_len -= curlen;
olen = copy_len;
i = 0;
rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen;
page_base = rqst->rq_rcv_buf.page_base;
ppages = rqst->rq_rcv_buf.pages + (page_base >> PAGE_SHIFT);
page_base &= ~PAGE_MASK;
if (copy_len && rqst->rq_rcv_buf.page_len) {
npages = PAGE_ALIGN(page_base +
rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT;
for (; i < npages; i++) {
curlen = PAGE_SIZE - page_base;
if (curlen > copy_len)
curlen = copy_len;
dprintk("RPC: %s: page %d"
" srcp 0x%p len %d curlen %d\n",
__func__, i, srcp, copy_len, curlen);
destp = kmap_atomic(ppages[i]);
memcpy(destp + page_base, srcp, curlen);
flush_dcache_page(ppages[i]);
kunmap_atomic(destp);
srcp += curlen;
copy_len -= curlen;
if (copy_len == 0)
break;
page_base = 0;
}
}
if (copy_len && rqst->rq_rcv_buf.tail[0].iov_len) {
curlen = copy_len;
if (curlen > rqst->rq_rcv_buf.tail[0].iov_len)
curlen = rqst->rq_rcv_buf.tail[0].iov_len;
if (rqst->rq_rcv_buf.tail[0].iov_base != srcp)
memmove(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen);
dprintk("RPC: %s: tail srcp 0x%p len %d curlen %d\n",
__func__, srcp, copy_len, curlen);
rqst->rq_rcv_buf.tail[0].iov_len = curlen;
copy_len -= curlen; ++i;
} else
rqst->rq_rcv_buf.tail[0].iov_len = 0;
if (pad) {
/* implicit padding on terminal chunk */
unsigned char *p = rqst->rq_rcv_buf.tail[0].iov_base;
while (pad--)
p[rqst->rq_rcv_buf.tail[0].iov_len++] = 0;
}
if (copy_len)
dprintk("RPC: %s: %d bytes in"
" %d extra segments (%d lost)\n",
__func__, olen, i, copy_len);
/* TBD avoid a warning from call_decode() */
rqst->rq_private_buf = rqst->rq_rcv_buf;
}
void
rpcrdma_connect_worker(struct work_struct *work)
{
struct rpcrdma_ep *ep =
container_of(work, struct rpcrdma_ep, rep_connect_worker.work);
struct rpc_xprt *xprt = ep->rep_xprt;
spin_lock_bh(&xprt->transport_lock);
if (++xprt->connect_cookie == 0) /* maintain a reserved value */
++xprt->connect_cookie;
if (ep->rep_connected > 0) {
if (!xprt_test_and_set_connected(xprt))
xprt_wake_pending_tasks(xprt, 0);
} else {
if (xprt_test_and_clear_connected(xprt))
xprt_wake_pending_tasks(xprt, -ENOTCONN);
}
spin_unlock_bh(&xprt->transport_lock);
}
/*
* This function is called when an async event is posted to
* the connection which changes the connection state. All it
* does at this point is mark the connection up/down, the rpc
* timers do the rest.
*/
void
rpcrdma_conn_func(struct rpcrdma_ep *ep)
{
schedule_delayed_work(&ep->rep_connect_worker, 0);
}
/*
* Called as a tasklet to do req/reply match and complete a request
* Errors must result in the RPC task either being awakened, or
* allowed to timeout, to discover the errors at that time.
*/
void
rpcrdma_reply_handler(struct rpcrdma_rep *rep)
{
struct rpcrdma_msg *headerp;
struct rpcrdma_req *req;
struct rpc_rqst *rqst;
struct rpc_xprt *xprt = rep->rr_xprt;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
__be32 *iptr;
int rdmalen, status;
unsigned long cwnd;
/* Check status. If bad, signal disconnect and return rep to pool */
if (rep->rr_len == ~0U) {
rpcrdma_recv_buffer_put(rep);
if (r_xprt->rx_ep.rep_connected == 1) {
r_xprt->rx_ep.rep_connected = -EIO;
rpcrdma_conn_func(&r_xprt->rx_ep);
}
return;
}
if (rep->rr_len < 28) {
dprintk("RPC: %s: short/invalid reply\n", __func__);
goto repost;
}
headerp = (struct rpcrdma_msg *) rep->rr_base;
if (headerp->rm_vers != xdr_one) {
dprintk("RPC: %s: invalid version %d\n",
__func__, ntohl(headerp->rm_vers));
goto repost;
}
/* Get XID and try for a match. */
spin_lock(&xprt->transport_lock);
rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);
if (rqst == NULL) {
spin_unlock(&xprt->transport_lock);
dprintk("RPC: %s: reply 0x%p failed "
"to match any request xid 0x%08x len %d\n",
__func__, rep, headerp->rm_xid, rep->rr_len);
repost:
r_xprt->rx_stats.bad_reply_count++;
rep->rr_func = rpcrdma_reply_handler;
if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
rpcrdma_recv_buffer_put(rep);
return;
}
/* get request object */
req = rpcr_to_rdmar(rqst);
if (req->rl_reply) {
spin_unlock(&xprt->transport_lock);
dprintk("RPC: %s: duplicate reply 0x%p to RPC "
"request 0x%p: xid 0x%08x\n", __func__, rep, req,
headerp->rm_xid);
goto repost;
}
dprintk("RPC: %s: reply 0x%p completes request 0x%p\n"
" RPC request 0x%p xid 0x%08x\n",
__func__, rep, req, rqst, headerp->rm_xid);
/* from here on, the reply is no longer an orphan */
req->rl_reply = rep;
xprt->reestablish_timeout = 0;
/* check for expected message types */
/* The order of some of these tests is important. */
switch (headerp->rm_type) {
case htonl(RDMA_MSG):
/* never expect read chunks */
/* never expect reply chunks (two ways to check) */
/* never expect write chunks without having offered RDMA */
if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
(headerp->rm_body.rm_chunks[1] == xdr_zero &&
headerp->rm_body.rm_chunks[2] != xdr_zero) ||
(headerp->rm_body.rm_chunks[1] != xdr_zero &&
req->rl_nchunks == 0))
goto badheader;
if (headerp->rm_body.rm_chunks[1] != xdr_zero) {
/* count any expected write chunks in read reply */
/* start at write chunk array count */
iptr = &headerp->rm_body.rm_chunks[2];
rdmalen = rpcrdma_count_chunks(rep,
req->rl_nchunks, 1, &iptr);
/* check for validity, and no reply chunk after */
if (rdmalen < 0 || *iptr++ != xdr_zero)
goto badheader;
rep->rr_len -=
((unsigned char *)iptr - (unsigned char *)headerp);
status = rep->rr_len + rdmalen;
r_xprt->rx_stats.total_rdma_reply += rdmalen;
/* special case - last chunk may omit padding */
if (rdmalen &= 3) {
rdmalen = 4 - rdmalen;
status += rdmalen;
}
} else {
/* else ordinary inline */
rdmalen = 0;
iptr = (__be32 *)((unsigned char *)headerp + 28);
rep->rr_len -= 28; /*sizeof *headerp;*/
status = rep->rr_len;
}
/* Fix up the rpc results for upper layer */
rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, rdmalen);
break;
case htonl(RDMA_NOMSG):
/* never expect read or write chunks, always reply chunks */
if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
headerp->rm_body.rm_chunks[1] != xdr_zero ||
headerp->rm_body.rm_chunks[2] != xdr_one ||
req->rl_nchunks == 0)
goto badheader;
iptr = (__be32 *)((unsigned char *)headerp + 28);
rdmalen = rpcrdma_count_chunks(rep, req->rl_nchunks, 0, &iptr);
if (rdmalen < 0)
goto badheader;
r_xprt->rx_stats.total_rdma_reply += rdmalen;
/* Reply chunk buffer already is the reply vector - no fixup. */
status = rdmalen;
break;
badheader:
default:
dprintk("%s: invalid rpcrdma reply header (type %d):"
" chunks[012] == %d %d %d"
" expected chunks <= %d\n",
__func__, ntohl(headerp->rm_type),
headerp->rm_body.rm_chunks[0],
headerp->rm_body.rm_chunks[1],
headerp->rm_body.rm_chunks[2],
req->rl_nchunks);
status = -EIO;
r_xprt->rx_stats.bad_reply_count++;
break;
}
cwnd = xprt->cwnd;
xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
if (xprt->cwnd > cwnd)
xprt_release_rqst_cong(rqst->rq_task);
dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
__func__, xprt, rqst, status);
xprt_complete_rqst(rqst->rq_task, status);
spin_unlock(&xprt->transport_lock);
}

View file

@ -0,0 +1,302 @@
/*
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the BSD-type
* license below:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* Neither the name of the Network Appliance, Inc. nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Author: Tom Tucker <tom@opengridcomputing.com>
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/sysctl.h>
#include <linux/workqueue.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/svc_rdma.h>
#include "xprt_rdma.h"
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
/* RPC/RDMA parameters */
unsigned int svcrdma_ord = RPCRDMA_ORD;
static unsigned int min_ord = 1;
static unsigned int max_ord = 4096;
unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
static unsigned int min_max_requests = 4;
static unsigned int max_max_requests = 16384;
unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE;
static unsigned int min_max_inline = 4096;
static unsigned int max_max_inline = 65536;
atomic_t rdma_stat_recv;
atomic_t rdma_stat_read;
atomic_t rdma_stat_write;
atomic_t rdma_stat_sq_starve;
atomic_t rdma_stat_rq_starve;
atomic_t rdma_stat_rq_poll;
atomic_t rdma_stat_rq_prod;
atomic_t rdma_stat_sq_poll;
atomic_t rdma_stat_sq_prod;
/* Temporary NFS request map and context caches */
struct kmem_cache *svc_rdma_map_cachep;
struct kmem_cache *svc_rdma_ctxt_cachep;
struct workqueue_struct *svc_rdma_wq;
/*
* This function implements reading and resetting an atomic_t stat
* variable through read/write to a proc file. Any write to the file
* resets the associated statistic to zero. Any read returns it's
* current value.
*/
static int read_reset_stat(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
atomic_t *stat = (atomic_t *)table->data;
if (!stat)
return -EINVAL;
if (write)
atomic_set(stat, 0);
else {
char str_buf[32];
char *data;
int len = snprintf(str_buf, 32, "%d\n", atomic_read(stat));
if (len >= 32)
return -EFAULT;
len = strlen(str_buf);
if (*ppos > len) {
*lenp = 0;
return 0;
}
data = &str_buf[*ppos];
len -= *ppos;
if (len > *lenp)
len = *lenp;
if (len && copy_to_user(buffer, str_buf, len))
return -EFAULT;
*lenp = len;
*ppos += len;
}
return 0;
}
static struct ctl_table_header *svcrdma_table_header;
static struct ctl_table svcrdma_parm_table[] = {
{
.procname = "max_requests",
.data = &svcrdma_max_requests,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_max_requests,
.extra2 = &max_max_requests
},
{
.procname = "max_req_size",
.data = &svcrdma_max_req_size,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_max_inline,
.extra2 = &max_max_inline
},
{
.procname = "max_outbound_read_requests",
.data = &svcrdma_ord,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_ord,
.extra2 = &max_ord,
},
{
.procname = "rdma_stat_read",
.data = &rdma_stat_read,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = read_reset_stat,
},
{
.procname = "rdma_stat_recv",
.data = &rdma_stat_recv,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = read_reset_stat,
},
{
.procname = "rdma_stat_write",
.data = &rdma_stat_write,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = read_reset_stat,
},
{
.procname = "rdma_stat_sq_starve",
.data = &rdma_stat_sq_starve,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = read_reset_stat,
},
{
.procname = "rdma_stat_rq_starve",
.data = &rdma_stat_rq_starve,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = read_reset_stat,
},
{
.procname = "rdma_stat_rq_poll",
.data = &rdma_stat_rq_poll,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = read_reset_stat,
},
{
.procname = "rdma_stat_rq_prod",
.data = &rdma_stat_rq_prod,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = read_reset_stat,
},
{
.procname = "rdma_stat_sq_poll",
.data = &rdma_stat_sq_poll,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = read_reset_stat,
},
{
.procname = "rdma_stat_sq_prod",
.data = &rdma_stat_sq_prod,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = read_reset_stat,
},
{ },
};
static struct ctl_table svcrdma_table[] = {
{
.procname = "svc_rdma",
.mode = 0555,
.child = svcrdma_parm_table
},
{ },
};
static struct ctl_table svcrdma_root_table[] = {
{
.procname = "sunrpc",
.mode = 0555,
.child = svcrdma_table
},
{ },
};
void svc_rdma_cleanup(void)
{
dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
destroy_workqueue(svc_rdma_wq);
if (svcrdma_table_header) {
unregister_sysctl_table(svcrdma_table_header);
svcrdma_table_header = NULL;
}
svc_unreg_xprt_class(&svc_rdma_class);
kmem_cache_destroy(svc_rdma_map_cachep);
kmem_cache_destroy(svc_rdma_ctxt_cachep);
}
int svc_rdma_init(void)
{
dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord);
dprintk("\tmax_requests : %d\n", svcrdma_max_requests);
dprintk("\tsq_depth : %d\n",
svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
if (!svc_rdma_wq)
return -ENOMEM;
if (!svcrdma_table_header)
svcrdma_table_header =
register_sysctl_table(svcrdma_root_table);
/* Create the temporary map cache */
svc_rdma_map_cachep = kmem_cache_create("svc_rdma_map_cache",
sizeof(struct svc_rdma_req_map),
0,
SLAB_HWCACHE_ALIGN,
NULL);
if (!svc_rdma_map_cachep) {
printk(KERN_INFO "Could not allocate map cache.\n");
goto err0;
}
/* Create the temporary context cache */
svc_rdma_ctxt_cachep =
kmem_cache_create("svc_rdma_ctxt_cache",
sizeof(struct svc_rdma_op_ctxt),
0,
SLAB_HWCACHE_ALIGN,
NULL);
if (!svc_rdma_ctxt_cachep) {
printk(KERN_INFO "Could not allocate WR ctxt cache.\n");
goto err1;
}
/* Register RDMA with the SVC transport switch */
svc_reg_xprt_class(&svc_rdma_class);
return 0;
err1:
kmem_cache_destroy(svc_rdma_map_cachep);
err0:
unregister_sysctl_table(svcrdma_table_header);
destroy_workqueue(svc_rdma_wq);
return -ENOMEM;
}
MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
MODULE_DESCRIPTION("SVC RDMA Transport");
MODULE_LICENSE("Dual BSD/GPL");
module_init(svc_rdma_init);
module_exit(svc_rdma_cleanup);

View file

@ -0,0 +1,386 @@
/*
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the BSD-type
* license below:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* Neither the name of the Network Appliance, Inc. nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Author: Tom Tucker <tom@opengridcomputing.com>
*/
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/debug.h>
#include <asm/unaligned.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/sunrpc/svc_rdma.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
/*
* Decodes a read chunk list. The expected format is as follows:
* descrim : xdr_one
* position : u32 offset into XDR stream
* handle : u32 RKEY
* . . .
* end-of-list: xdr_zero
*/
static u32 *decode_read_list(u32 *va, u32 *vaend)
{
struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
while (ch->rc_discrim != xdr_zero) {
if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) >
(unsigned long)vaend) {
dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch);
return NULL;
}
ch++;
}
return (u32 *)&ch->rc_position;
}
/*
* Determine number of chunks and total bytes in chunk list. The chunk
* list has already been verified to fit within the RPCRDMA header.
*/
void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch,
int *ch_count, int *byte_count)
{
/* compute the number of bytes represented by read chunks */
*byte_count = 0;
*ch_count = 0;
for (; ch->rc_discrim != 0; ch++) {
*byte_count = *byte_count + ntohl(ch->rc_target.rs_length);
*ch_count = *ch_count + 1;
}
}
/*
* Decodes a write chunk list. The expected format is as follows:
* descrim : xdr_one
* nchunks : <count>
* handle : u32 RKEY ---+
* length : u32 <len of segment> |
* offset : remove va + <count>
* . . . |
* ---+
*/
static u32 *decode_write_list(u32 *va, u32 *vaend)
{
unsigned long start, end;
int nchunks;
struct rpcrdma_write_array *ary =
(struct rpcrdma_write_array *)va;
/* Check for not write-array */
if (ary->wc_discrim == xdr_zero)
return (u32 *)&ary->wc_nchunks;
if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
return NULL;
}
nchunks = ntohl(ary->wc_nchunks);
start = (unsigned long)&ary->wc_array[0];
end = (unsigned long)vaend;
if (nchunks < 0 ||
nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
(start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
ary, nchunks, vaend);
return NULL;
}
/*
* rs_length is the 2nd 4B field in wc_target and taking its
* address skips the list terminator
*/
return (u32 *)&ary->wc_array[nchunks].wc_target.rs_length;
}
static u32 *decode_reply_array(u32 *va, u32 *vaend)
{
unsigned long start, end;
int nchunks;
struct rpcrdma_write_array *ary =
(struct rpcrdma_write_array *)va;
/* Check for no reply-array */
if (ary->wc_discrim == xdr_zero)
return (u32 *)&ary->wc_nchunks;
if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
return NULL;
}
nchunks = ntohl(ary->wc_nchunks);
start = (unsigned long)&ary->wc_array[0];
end = (unsigned long)vaend;
if (nchunks < 0 ||
nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
(start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
ary, nchunks, vaend);
return NULL;
}
return (u32 *)&ary->wc_array[nchunks];
}
int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
struct svc_rqst *rqstp)
{
struct rpcrdma_msg *rmsgp = NULL;
u32 *va;
u32 *vaend;
u32 hdr_len;
rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
/* Verify that there's enough bytes for header + something */
if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) {
dprintk("svcrdma: header too short = %d\n",
rqstp->rq_arg.len);
return -EINVAL;
}
/* Decode the header */
rmsgp->rm_xid = ntohl(rmsgp->rm_xid);
rmsgp->rm_vers = ntohl(rmsgp->rm_vers);
rmsgp->rm_credit = ntohl(rmsgp->rm_credit);
rmsgp->rm_type = ntohl(rmsgp->rm_type);
if (rmsgp->rm_vers != RPCRDMA_VERSION)
return -ENOSYS;
/* Pull in the extra for the padded case and bump our pointer */
if (rmsgp->rm_type == RDMA_MSGP) {
int hdrlen;
rmsgp->rm_body.rm_padded.rm_align =
ntohl(rmsgp->rm_body.rm_padded.rm_align);
rmsgp->rm_body.rm_padded.rm_thresh =
ntohl(rmsgp->rm_body.rm_padded.rm_thresh);
va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
rqstp->rq_arg.head[0].iov_base = va;
hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
rqstp->rq_arg.head[0].iov_len -= hdrlen;
if (hdrlen > rqstp->rq_arg.len)
return -EINVAL;
return hdrlen;
}
/* The chunk list may contain either a read chunk list or a write
* chunk list and a reply chunk list.
*/
va = &rmsgp->rm_body.rm_chunks[0];
vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
va = decode_read_list(va, vaend);
if (!va)
return -EINVAL;
va = decode_write_list(va, vaend);
if (!va)
return -EINVAL;
va = decode_reply_array(va, vaend);
if (!va)
return -EINVAL;
rqstp->rq_arg.head[0].iov_base = va;
hdr_len = (unsigned long)va - (unsigned long)rmsgp;
rqstp->rq_arg.head[0].iov_len -= hdr_len;
*rdma_req = rmsgp;
return hdr_len;
}
int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp)
{
struct rpcrdma_msg *rmsgp = NULL;
struct rpcrdma_read_chunk *ch;
struct rpcrdma_write_array *ary;
u32 *va;
u32 hdrlen;
dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n",
rqstp);
rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
/* Pull in the extra for the padded case and bump our pointer */
if (rmsgp->rm_type == RDMA_MSGP) {
va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
rqstp->rq_arg.head[0].iov_base = va;
hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
rqstp->rq_arg.head[0].iov_len -= hdrlen;
return hdrlen;
}
/*
* Skip all chunks to find RPC msg. These were previously processed
*/
va = &rmsgp->rm_body.rm_chunks[0];
/* Skip read-list */
for (ch = (struct rpcrdma_read_chunk *)va;
ch->rc_discrim != xdr_zero; ch++);
va = (u32 *)&ch->rc_position;
/* Skip write-list */
ary = (struct rpcrdma_write_array *)va;
if (ary->wc_discrim == xdr_zero)
va = (u32 *)&ary->wc_nchunks;
else
/*
* rs_length is the 2nd 4B field in wc_target and taking its
* address skips the list terminator
*/
va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length;
/* Skip reply-array */
ary = (struct rpcrdma_write_array *)va;
if (ary->wc_discrim == xdr_zero)
va = (u32 *)&ary->wc_nchunks;
else
va = (u32 *)&ary->wc_array[ary->wc_nchunks];
rqstp->rq_arg.head[0].iov_base = va;
hdrlen = (unsigned long)va - (unsigned long)rmsgp;
rqstp->rq_arg.head[0].iov_len -= hdrlen;
return hdrlen;
}
int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rmsgp,
enum rpcrdma_errcode err, u32 *va)
{
u32 *startp = va;
*va++ = htonl(rmsgp->rm_xid);
*va++ = htonl(rmsgp->rm_vers);
*va++ = htonl(xprt->sc_max_requests);
*va++ = htonl(RDMA_ERROR);
*va++ = htonl(err);
if (err == ERR_VERS) {
*va++ = htonl(RPCRDMA_VERSION);
*va++ = htonl(RPCRDMA_VERSION);
}
return (int)((unsigned long)va - (unsigned long)startp);
}
int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
{
struct rpcrdma_write_array *wr_ary;
/* There is no read-list in a reply */
/* skip write list */
wr_ary = (struct rpcrdma_write_array *)
&rmsgp->rm_body.rm_chunks[1];
if (wr_ary->wc_discrim)
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)].
wc_target.rs_length;
else
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_nchunks;
/* skip reply array */
if (wr_ary->wc_discrim)
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)];
else
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_nchunks;
return (unsigned long) wr_ary - (unsigned long) rmsgp;
}
void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
{
struct rpcrdma_write_array *ary;
/* no read-list */
rmsgp->rm_body.rm_chunks[0] = xdr_zero;
/* write-array discrim */
ary = (struct rpcrdma_write_array *)
&rmsgp->rm_body.rm_chunks[1];
ary->wc_discrim = xdr_one;
ary->wc_nchunks = htonl(chunks);
/* write-list terminator */
ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
/* reply-array discriminator */
ary->wc_array[chunks].wc_target.rs_length = xdr_zero;
}
void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary,
int chunks)
{
ary->wc_discrim = xdr_one;
ary->wc_nchunks = htonl(chunks);
}
void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
int chunk_no,
__be32 rs_handle,
__be64 rs_offset,
u32 write_len)
{
struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
seg->rs_handle = rs_handle;
seg->rs_offset = rs_offset;
seg->rs_length = htonl(write_len);
}
void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rdma_argp,
struct rpcrdma_msg *rdma_resp,
enum rpcrdma_proc rdma_type)
{
rdma_resp->rm_xid = htonl(rdma_argp->rm_xid);
rdma_resp->rm_vers = htonl(rdma_argp->rm_vers);
rdma_resp->rm_credit = htonl(xprt->sc_max_requests);
rdma_resp->rm_type = htonl(rdma_type);
/* Encode <nul> chunks lists */
rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
rdma_resp->rm_body.rm_chunks[1] = xdr_zero;
rdma_resp->rm_body.rm_chunks[2] = xdr_zero;
}

View file

@ -0,0 +1,614 @@
/*
* Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the BSD-type
* license below:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* Neither the name of the Network Appliance, Inc. nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Author: Tom Tucker <tom@opengridcomputing.com>
*/
#include <linux/sunrpc/debug.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/spinlock.h>
#include <linux/highmem.h>
#include <asm/unaligned.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#include <linux/sunrpc/svc_rdma.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
/*
* Replace the pages in the rq_argpages array with the pages from the SGE in
* the RDMA_RECV completion. The SGL should contain full pages up until the
* last one.
*/
static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *ctxt,
u32 byte_count)
{
struct page *page;
u32 bc;
int sge_no;
/* Swap the page in the SGE with the page in argpages */
page = ctxt->pages[0];
put_page(rqstp->rq_pages[0]);
rqstp->rq_pages[0] = page;
/* Set up the XDR head */
rqstp->rq_arg.head[0].iov_base = page_address(page);
rqstp->rq_arg.head[0].iov_len =
min_t(size_t, byte_count, ctxt->sge[0].length);
rqstp->rq_arg.len = byte_count;
rqstp->rq_arg.buflen = byte_count;
/* Compute bytes past head in the SGL */
bc = byte_count - rqstp->rq_arg.head[0].iov_len;
/* If data remains, store it in the pagelist */
rqstp->rq_arg.page_len = bc;
rqstp->rq_arg.page_base = 0;
rqstp->rq_arg.pages = &rqstp->rq_pages[1];
sge_no = 1;
while (bc && sge_no < ctxt->count) {
page = ctxt->pages[sge_no];
put_page(rqstp->rq_pages[sge_no]);
rqstp->rq_pages[sge_no] = page;
bc -= min_t(u32, bc, ctxt->sge[sge_no].length);
rqstp->rq_arg.buflen += ctxt->sge[sge_no].length;
sge_no++;
}
rqstp->rq_respages = &rqstp->rq_pages[sge_no];
rqstp->rq_next_page = rqstp->rq_respages + 1;
/* We should never run out of SGE because the limit is defined to
* support the max allowed RPC data length
*/
BUG_ON(bc && (sge_no == ctxt->count));
BUG_ON((rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len)
!= byte_count);
BUG_ON(rqstp->rq_arg.len != byte_count);
/* If not all pages were used from the SGL, free the remaining ones */
bc = sge_no;
while (sge_no < ctxt->count) {
page = ctxt->pages[sge_no++];
put_page(page);
}
ctxt->count = bc;
/* Set up tail */
rqstp->rq_arg.tail[0].iov_base = NULL;
rqstp->rq_arg.tail[0].iov_len = 0;
}
static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
{
if (rdma_node_get_transport(xprt->sc_cm_id->device->node_type) ==
RDMA_TRANSPORT_IWARP)
return 1;
else
return min_t(int, sge_count, xprt->sc_max_sge);
}
typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt,
struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head,
int *page_no,
u32 *page_offset,
u32 rs_handle,
u32 rs_length,
u64 rs_offset,
int last);
/* Issue an RDMA_READ using the local lkey to map the data sink */
static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head,
int *page_no,
u32 *page_offset,
u32 rs_handle,
u32 rs_length,
u64 rs_offset,
int last)
{
struct ib_send_wr read_wr;
int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
int ret, read, pno;
u32 pg_off = *page_offset;
u32 pg_no = *page_no;
ctxt->direction = DMA_FROM_DEVICE;
ctxt->read_hdr = head;
pages_needed =
min_t(int, pages_needed, rdma_read_max_sge(xprt, pages_needed));
read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
for (pno = 0; pno < pages_needed; pno++) {
int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
head->arg.page_len += len;
head->arg.len += len;
if (!pg_off)
head->count++;
rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
rqstp->rq_next_page = rqstp->rq_respages + 1;
ctxt->sge[pno].addr =
ib_dma_map_page(xprt->sc_cm_id->device,
head->arg.pages[pg_no], pg_off,
PAGE_SIZE - pg_off,
DMA_FROM_DEVICE);
ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
ctxt->sge[pno].addr);
if (ret)
goto err;
atomic_inc(&xprt->sc_dma_used);
/* The lkey here is either a local dma lkey or a dma_mr lkey */
ctxt->sge[pno].lkey = xprt->sc_dma_lkey;
ctxt->sge[pno].length = len;
ctxt->count++;
/* adjust offset and wrap to next page if needed */
pg_off += len;
if (pg_off == PAGE_SIZE) {
pg_off = 0;
pg_no++;
}
rs_length -= len;
}
if (last && rs_length == 0)
set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
else
clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
memset(&read_wr, 0, sizeof(read_wr));
read_wr.wr_id = (unsigned long)ctxt;
read_wr.opcode = IB_WR_RDMA_READ;
ctxt->wr_op = read_wr.opcode;
read_wr.send_flags = IB_SEND_SIGNALED;
read_wr.wr.rdma.rkey = rs_handle;
read_wr.wr.rdma.remote_addr = rs_offset;
read_wr.sg_list = ctxt->sge;
read_wr.num_sge = pages_needed;
ret = svc_rdma_send(xprt, &read_wr);
if (ret) {
pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
goto err;
}
/* return current location in page array */
*page_no = pg_no;
*page_offset = pg_off;
ret = read;
atomic_inc(&rdma_stat_read);
return ret;
err:
svc_rdma_unmap_dma(ctxt);
svc_rdma_put_context(ctxt, 0);
return ret;
}
/* Issue an RDMA_READ using an FRMR to map the data sink */
static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head,
int *page_no,
u32 *page_offset,
u32 rs_handle,
u32 rs_length,
u64 rs_offset,
int last)
{
struct ib_send_wr read_wr;
struct ib_send_wr inv_wr;
struct ib_send_wr fastreg_wr;
u8 key;
int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
int ret, read, pno;
u32 pg_off = *page_offset;
u32 pg_no = *page_no;
if (IS_ERR(frmr))
return -ENOMEM;
ctxt->direction = DMA_FROM_DEVICE;
ctxt->frmr = frmr;
pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len);
read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]);
frmr->direction = DMA_FROM_DEVICE;
frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
frmr->map_len = pages_needed << PAGE_SHIFT;
frmr->page_list_len = pages_needed;
for (pno = 0; pno < pages_needed; pno++) {
int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
head->arg.page_len += len;
head->arg.len += len;
if (!pg_off)
head->count++;
rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
rqstp->rq_next_page = rqstp->rq_respages + 1;
frmr->page_list->page_list[pno] =
ib_dma_map_page(xprt->sc_cm_id->device,
head->arg.pages[pg_no], 0,
PAGE_SIZE, DMA_FROM_DEVICE);
ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
frmr->page_list->page_list[pno]);
if (ret)
goto err;
atomic_inc(&xprt->sc_dma_used);
/* adjust offset and wrap to next page if needed */
pg_off += len;
if (pg_off == PAGE_SIZE) {
pg_off = 0;
pg_no++;
}
rs_length -= len;
}
if (last && rs_length == 0)
set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
else
clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
/* Bump the key */
key = (u8)(frmr->mr->lkey & 0x000000FF);
ib_update_fast_reg_key(frmr->mr, ++key);
ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset;
ctxt->sge[0].lkey = frmr->mr->lkey;
ctxt->sge[0].length = read;
ctxt->count = 1;
ctxt->read_hdr = head;
/* Prepare FASTREG WR */
memset(&fastreg_wr, 0, sizeof(fastreg_wr));
fastreg_wr.opcode = IB_WR_FAST_REG_MR;
fastreg_wr.send_flags = IB_SEND_SIGNALED;
fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
fastreg_wr.wr.fast_reg.length = frmr->map_len;
fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
fastreg_wr.next = &read_wr;
/* Prepare RDMA_READ */
memset(&read_wr, 0, sizeof(read_wr));
read_wr.send_flags = IB_SEND_SIGNALED;
read_wr.wr.rdma.rkey = rs_handle;
read_wr.wr.rdma.remote_addr = rs_offset;
read_wr.sg_list = ctxt->sge;
read_wr.num_sge = 1;
if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
read_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
read_wr.wr_id = (unsigned long)ctxt;
read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
} else {
read_wr.opcode = IB_WR_RDMA_READ;
read_wr.next = &inv_wr;
/* Prepare invalidate */
memset(&inv_wr, 0, sizeof(inv_wr));
inv_wr.wr_id = (unsigned long)ctxt;
inv_wr.opcode = IB_WR_LOCAL_INV;
inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
}
ctxt->wr_op = read_wr.opcode;
/* Post the chain */
ret = svc_rdma_send(xprt, &fastreg_wr);
if (ret) {
pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
goto err;
}
/* return current location in page array */
*page_no = pg_no;
*page_offset = pg_off;
ret = read;
atomic_inc(&rdma_stat_read);
return ret;
err:
svc_rdma_unmap_dma(ctxt);
svc_rdma_put_context(ctxt, 0);
svc_rdma_put_frmr(xprt, frmr);
return ret;
}
static int rdma_read_chunks(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rmsgp,
struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head)
{
int page_no, ch_count, ret;
struct rpcrdma_read_chunk *ch;
u32 page_offset, byte_count;
u64 rs_offset;
rdma_reader_fn reader;
/* If no read list is present, return 0 */
ch = svc_rdma_get_read_chunk(rmsgp);
if (!ch)
return 0;
svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
if (ch_count > RPCSVC_MAXPAGES)
return -EINVAL;
/* The request is completed when the RDMA_READs complete. The
* head context keeps all the pages that comprise the
* request.
*/
head->arg.head[0] = rqstp->rq_arg.head[0];
head->arg.tail[0] = rqstp->rq_arg.tail[0];
head->arg.pages = &head->pages[head->count];
head->hdr_count = head->count;
head->arg.page_base = 0;
head->arg.page_len = 0;
head->arg.len = rqstp->rq_arg.len;
head->arg.buflen = rqstp->rq_arg.buflen;
/* Use FRMR if supported */
if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)
reader = rdma_read_chunk_frmr;
else
reader = rdma_read_chunk_lcl;
page_no = 0; page_offset = 0;
for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
ch->rc_discrim != 0; ch++) {
xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset,
&rs_offset);
byte_count = ntohl(ch->rc_target.rs_length);
while (byte_count > 0) {
ret = reader(xprt, rqstp, head,
&page_no, &page_offset,
ntohl(ch->rc_target.rs_handle),
byte_count, rs_offset,
((ch+1)->rc_discrim == 0) /* last */
);
if (ret < 0)
goto err;
byte_count -= ret;
rs_offset += ret;
head->arg.buflen += ret;
}
}
ret = 1;
err:
/* Detach arg pages. svc_recv will replenish them */
for (page_no = 0;
&rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++)
rqstp->rq_pages[page_no] = NULL;
return ret;
}
/*
* To avoid a separate RDMA READ just for a handful of zero bytes,
* RFC 5666 section 3.7 allows the client to omit the XDR zero pad
* in chunk lists.
*/
static void
rdma_fix_xdr_pad(struct xdr_buf *buf)
{
unsigned int page_len = buf->page_len;
unsigned int size = (XDR_QUADLEN(page_len) << 2) - page_len;
unsigned int offset, pg_no;
char *p;
if (size == 0)
return;
pg_no = page_len >> PAGE_SHIFT;
offset = page_len & ~PAGE_MASK;
p = page_address(buf->pages[pg_no]);
memset(p + offset, 0, size);
buf->page_len += size;
buf->buflen += size;
buf->len += size;
}
static int rdma_read_complete(struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head)
{
int page_no;
int ret;
BUG_ON(!head);
/* Copy RPC pages */
for (page_no = 0; page_no < head->count; page_no++) {
put_page(rqstp->rq_pages[page_no]);
rqstp->rq_pages[page_no] = head->pages[page_no];
}
/* Point rq_arg.pages past header */
rdma_fix_xdr_pad(&head->arg);
rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];
rqstp->rq_arg.page_len = head->arg.page_len;
rqstp->rq_arg.page_base = head->arg.page_base;
/* rq_respages starts after the last arg page */
rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
rqstp->rq_next_page = rqstp->rq_respages + 1;
/* Rebuild rq_arg head and tail. */
rqstp->rq_arg.head[0] = head->arg.head[0];
rqstp->rq_arg.tail[0] = head->arg.tail[0];
rqstp->rq_arg.len = head->arg.len;
rqstp->rq_arg.buflen = head->arg.buflen;
/* Free the context */
svc_rdma_put_context(head, 0);
/* XXX: What should this be? */
rqstp->rq_prot = IPPROTO_MAX;
svc_xprt_copy_addrs(rqstp, rqstp->rq_xprt);
ret = rqstp->rq_arg.head[0].iov_len
+ rqstp->rq_arg.page_len
+ rqstp->rq_arg.tail[0].iov_len;
dprintk("svcrdma: deferred read ret=%d, rq_arg.len =%d, "
"rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n",
ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base,
rqstp->rq_arg.head[0].iov_len);
return ret;
}
/*
* Set up the rqstp thread context to point to the RQ buffer. If
* necessary, pull additional data from the client with an RDMA_READ
* request.
*/
int svc_rdma_recvfrom(struct svc_rqst *rqstp)
{
struct svc_xprt *xprt = rqstp->rq_xprt;
struct svcxprt_rdma *rdma_xprt =
container_of(xprt, struct svcxprt_rdma, sc_xprt);
struct svc_rdma_op_ctxt *ctxt = NULL;
struct rpcrdma_msg *rmsgp;
int ret = 0;
int len;
dprintk("svcrdma: rqstp=%p\n", rqstp);
spin_lock_bh(&rdma_xprt->sc_rq_dto_lock);
if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
ctxt = list_entry(rdma_xprt->sc_read_complete_q.next,
struct svc_rdma_op_ctxt,
dto_q);
list_del_init(&ctxt->dto_q);
spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
return rdma_read_complete(rqstp, ctxt);
} else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next,
struct svc_rdma_op_ctxt,
dto_q);
list_del_init(&ctxt->dto_q);
} else {
atomic_inc(&rdma_stat_rq_starve);
clear_bit(XPT_DATA, &xprt->xpt_flags);
ctxt = NULL;
}
spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
if (!ctxt) {
/* This is the EAGAIN path. The svc_recv routine will
* return -EAGAIN, the nfsd thread will go to call into
* svc_recv again and we shouldn't be on the active
* transport list
*/
if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
goto close_out;
goto out;
}
dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
ctxt, rdma_xprt, rqstp, ctxt->wc_status);
BUG_ON(ctxt->wc_status != IB_WC_SUCCESS);
atomic_inc(&rdma_stat_recv);
/* Build up the XDR from the receive buffers. */
rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len);
/* Decode the RDMA header. */
len = svc_rdma_xdr_decode_req(&rmsgp, rqstp);
rqstp->rq_xprt_hlen = len;
/* If the request is invalid, reply with an error */
if (len < 0) {
if (len == -ENOSYS)
svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS);
goto close_out;
}
/* Read read-list data. */
ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt);
if (ret > 0) {
/* read-list posted, defer until data received from client. */
goto defer;
} else if (ret < 0) {
/* Post of read-list failed, free context. */
svc_rdma_put_context(ctxt, 1);
return 0;
}
ret = rqstp->rq_arg.head[0].iov_len
+ rqstp->rq_arg.page_len
+ rqstp->rq_arg.tail[0].iov_len;
svc_rdma_put_context(ctxt, 0);
out:
dprintk("svcrdma: ret = %d, rq_arg.len =%d, "
"rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n",
ret, rqstp->rq_arg.len,
rqstp->rq_arg.head[0].iov_base,
rqstp->rq_arg.head[0].iov_len);
rqstp->rq_prot = IPPROTO_MAX;
svc_xprt_copy_addrs(rqstp, xprt);
return ret;
close_out:
if (ctxt)
svc_rdma_put_context(ctxt, 1);
dprintk("svcrdma: transport %p is closing\n", xprt);
/*
* Set the close bit and enqueue it. svc_recv will see the
* close bit and call svc_xprt_delete
*/
set_bit(XPT_CLOSE, &xprt->xpt_flags);
defer:
return 0;
}

View file

@ -0,0 +1,554 @@
/*
* Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the BSD-type
* license below:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* Neither the name of the Network Appliance, Inc. nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Author: Tom Tucker <tom@opengridcomputing.com>
*/
#include <linux/sunrpc/debug.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/spinlock.h>
#include <asm/unaligned.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#include <linux/sunrpc/svc_rdma.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
static int map_xdr(struct svcxprt_rdma *xprt,
struct xdr_buf *xdr,
struct svc_rdma_req_map *vec)
{
int sge_no;
u32 sge_bytes;
u32 page_bytes;
u32 page_off;
int page_no;
BUG_ON(xdr->len !=
(xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
/* Skip the first sge, this is for the RPCRDMA header */
sge_no = 1;
/* Head SGE */
vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
sge_no++;
/* pages SGE */
page_no = 0;
page_bytes = xdr->page_len;
page_off = xdr->page_base;
while (page_bytes) {
vec->sge[sge_no].iov_base =
page_address(xdr->pages[page_no]) + page_off;
sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
page_bytes -= sge_bytes;
vec->sge[sge_no].iov_len = sge_bytes;
sge_no++;
page_no++;
page_off = 0; /* reset for next time through loop */
}
/* Tail SGE */
if (xdr->tail[0].iov_len) {
vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
sge_no++;
}
dprintk("svcrdma: map_xdr: sge_no %d page_no %d "
"page_base %u page_len %u head_len %zu tail_len %zu\n",
sge_no, page_no, xdr->page_base, xdr->page_len,
xdr->head[0].iov_len, xdr->tail[0].iov_len);
vec->count = sge_no;
return 0;
}
static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
struct xdr_buf *xdr,
u32 xdr_off, size_t len, int dir)
{
struct page *page;
dma_addr_t dma_addr;
if (xdr_off < xdr->head[0].iov_len) {
/* This offset is in the head */
xdr_off += (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
page = virt_to_page(xdr->head[0].iov_base);
} else {
xdr_off -= xdr->head[0].iov_len;
if (xdr_off < xdr->page_len) {
/* This offset is in the page list */
xdr_off += xdr->page_base;
page = xdr->pages[xdr_off >> PAGE_SHIFT];
xdr_off &= ~PAGE_MASK;
} else {
/* This offset is in the tail */
xdr_off -= xdr->page_len;
xdr_off += (unsigned long)
xdr->tail[0].iov_base & ~PAGE_MASK;
page = virt_to_page(xdr->tail[0].iov_base);
}
}
dma_addr = ib_dma_map_page(xprt->sc_cm_id->device, page, xdr_off,
min_t(size_t, PAGE_SIZE, len), dir);
return dma_addr;
}
/* Assumptions:
* - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
*/
static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
u32 rmr, u64 to,
u32 xdr_off, int write_len,
struct svc_rdma_req_map *vec)
{
struct ib_send_wr write_wr;
struct ib_sge *sge;
int xdr_sge_no;
int sge_no;
int sge_bytes;
int sge_off;
int bc;
struct svc_rdma_op_ctxt *ctxt;
BUG_ON(vec->count > RPCSVC_MAXPAGES);
dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
"write_len=%d, vec->sge=%p, vec->count=%lu\n",
rmr, (unsigned long long)to, xdr_off,
write_len, vec->sge, vec->count);
ctxt = svc_rdma_get_context(xprt);
ctxt->direction = DMA_TO_DEVICE;
sge = ctxt->sge;
/* Find the SGE associated with xdr_off */
for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < vec->count;
xdr_sge_no++) {
if (vec->sge[xdr_sge_no].iov_len > bc)
break;
bc -= vec->sge[xdr_sge_no].iov_len;
}
sge_off = bc;
bc = write_len;
sge_no = 0;
/* Copy the remaining SGE */
while (bc != 0) {
sge_bytes = min_t(size_t,
bc, vec->sge[xdr_sge_no].iov_len-sge_off);
sge[sge_no].length = sge_bytes;
sge[sge_no].addr =
dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
sge_bytes, DMA_TO_DEVICE);
xdr_off += sge_bytes;
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
sge[sge_no].addr))
goto err;
atomic_inc(&xprt->sc_dma_used);
sge[sge_no].lkey = xprt->sc_dma_lkey;
ctxt->count++;
sge_off = 0;
sge_no++;
xdr_sge_no++;
BUG_ON(xdr_sge_no > vec->count);
bc -= sge_bytes;
if (sge_no == xprt->sc_max_sge)
break;
}
/* Prepare WRITE WR */
memset(&write_wr, 0, sizeof write_wr);
ctxt->wr_op = IB_WR_RDMA_WRITE;
write_wr.wr_id = (unsigned long)ctxt;
write_wr.sg_list = &sge[0];
write_wr.num_sge = sge_no;
write_wr.opcode = IB_WR_RDMA_WRITE;
write_wr.send_flags = IB_SEND_SIGNALED;
write_wr.wr.rdma.rkey = rmr;
write_wr.wr.rdma.remote_addr = to;
/* Post It */
atomic_inc(&rdma_stat_write);
if (svc_rdma_send(xprt, &write_wr))
goto err;
return write_len - bc;
err:
svc_rdma_unmap_dma(ctxt);
svc_rdma_put_context(ctxt, 0);
/* Fatal error, close transport */
return -EIO;
}
static int send_write_chunks(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rdma_argp,
struct rpcrdma_msg *rdma_resp,
struct svc_rqst *rqstp,
struct svc_rdma_req_map *vec)
{
u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
int write_len;
u32 xdr_off;
int chunk_off;
int chunk_no;
struct rpcrdma_write_array *arg_ary;
struct rpcrdma_write_array *res_ary;
int ret;
arg_ary = svc_rdma_get_write_array(rdma_argp);
if (!arg_ary)
return 0;
res_ary = (struct rpcrdma_write_array *)
&rdma_resp->rm_body.rm_chunks[1];
/* Write chunks start at the pagelist */
for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
xfer_len && chunk_no < arg_ary->wc_nchunks;
chunk_no++) {
struct rpcrdma_segment *arg_ch;
u64 rs_offset;
arg_ch = &arg_ary->wc_array[chunk_no].wc_target;
write_len = min(xfer_len, ntohl(arg_ch->rs_length));
/* Prepare the response chunk given the length actually
* written */
xdr_decode_hyper((__be32 *)&arg_ch->rs_offset, &rs_offset);
svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no,
arg_ch->rs_handle,
arg_ch->rs_offset,
write_len);
chunk_off = 0;
while (write_len) {
ret = send_write(xprt, rqstp,
ntohl(arg_ch->rs_handle),
rs_offset + chunk_off,
xdr_off,
write_len,
vec);
if (ret <= 0) {
dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
ret);
return -EIO;
}
chunk_off += ret;
xdr_off += ret;
xfer_len -= ret;
write_len -= ret;
}
}
/* Update the req with the number of chunks actually used */
svc_rdma_xdr_encode_write_list(rdma_resp, chunk_no);
return rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
}
static int send_reply_chunks(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rdma_argp,
struct rpcrdma_msg *rdma_resp,
struct svc_rqst *rqstp,
struct svc_rdma_req_map *vec)
{
u32 xfer_len = rqstp->rq_res.len;
int write_len;
u32 xdr_off;
int chunk_no;
int chunk_off;
int nchunks;
struct rpcrdma_segment *ch;
struct rpcrdma_write_array *arg_ary;
struct rpcrdma_write_array *res_ary;
int ret;
arg_ary = svc_rdma_get_reply_array(rdma_argp);
if (!arg_ary)
return 0;
/* XXX: need to fix when reply lists occur with read-list and or
* write-list */
res_ary = (struct rpcrdma_write_array *)
&rdma_resp->rm_body.rm_chunks[2];
/* xdr offset starts at RPC message */
nchunks = ntohl(arg_ary->wc_nchunks);
for (xdr_off = 0, chunk_no = 0;
xfer_len && chunk_no < nchunks;
chunk_no++) {
u64 rs_offset;
ch = &arg_ary->wc_array[chunk_no].wc_target;
write_len = min(xfer_len, htonl(ch->rs_length));
/* Prepare the reply chunk given the length actually
* written */
xdr_decode_hyper((__be32 *)&ch->rs_offset, &rs_offset);
svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no,
ch->rs_handle, ch->rs_offset,
write_len);
chunk_off = 0;
while (write_len) {
ret = send_write(xprt, rqstp,
ntohl(ch->rs_handle),
rs_offset + chunk_off,
xdr_off,
write_len,
vec);
if (ret <= 0) {
dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
ret);
return -EIO;
}
chunk_off += ret;
xdr_off += ret;
xfer_len -= ret;
write_len -= ret;
}
}
/* Update the req with the number of chunks actually used */
svc_rdma_xdr_encode_reply_array(res_ary, chunk_no);
return rqstp->rq_res.len;
}
/* This function prepares the portion of the RPCRDMA message to be
* sent in the RDMA_SEND. This function is called after data sent via
* RDMA has already been transmitted. There are three cases:
* - The RPCRDMA header, RPC header, and payload are all sent in a
* single RDMA_SEND. This is the "inline" case.
* - The RPCRDMA header and some portion of the RPC header and data
* are sent via this RDMA_SEND and another portion of the data is
* sent via RDMA.
* - The RPCRDMA header [NOMSG] is sent in this RDMA_SEND and the RPC
* header and data are all transmitted via RDMA.
* In all three cases, this function prepares the RPCRDMA header in
* sge[0], the 'type' parameter indicates the type to place in the
* RPCRDMA header, and the 'byte_count' field indicates how much of
* the XDR to include in this RDMA_SEND. NB: The offset of the payload
* to send is zero in the XDR.
*/
static int send_reply(struct svcxprt_rdma *rdma,
struct svc_rqst *rqstp,
struct page *page,
struct rpcrdma_msg *rdma_resp,
struct svc_rdma_op_ctxt *ctxt,
struct svc_rdma_req_map *vec,
int byte_count)
{
struct ib_send_wr send_wr;
int sge_no;
int sge_bytes;
int page_no;
int pages;
int ret;
/* Post a recv buffer to handle another request. */
ret = svc_rdma_post_recv(rdma);
if (ret) {
printk(KERN_INFO
"svcrdma: could not post a receive buffer, err=%d."
"Closing transport %p.\n", ret, rdma);
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
svc_rdma_put_context(ctxt, 0);
return -ENOTCONN;
}
/* Prepare the context */
ctxt->pages[0] = page;
ctxt->count = 1;
/* Prepare the SGE for the RPCRDMA Header */
ctxt->sge[0].lkey = rdma->sc_dma_lkey;
ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
ctxt->sge[0].addr =
ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
ctxt->sge[0].length, DMA_TO_DEVICE);
if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
goto err;
atomic_inc(&rdma->sc_dma_used);
ctxt->direction = DMA_TO_DEVICE;
/* Map the payload indicated by 'byte_count' */
for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
int xdr_off = 0;
sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
byte_count -= sge_bytes;
ctxt->sge[sge_no].addr =
dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
sge_bytes, DMA_TO_DEVICE);
xdr_off += sge_bytes;
if (ib_dma_mapping_error(rdma->sc_cm_id->device,
ctxt->sge[sge_no].addr))
goto err;
atomic_inc(&rdma->sc_dma_used);
ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
ctxt->sge[sge_no].length = sge_bytes;
}
BUG_ON(byte_count != 0);
/* Save all respages in the ctxt and remove them from the
* respages array. They are our pages until the I/O
* completes.
*/
pages = rqstp->rq_next_page - rqstp->rq_respages;
for (page_no = 0; page_no < pages; page_no++) {
ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
ctxt->count++;
rqstp->rq_respages[page_no] = NULL;
/*
* If there are more pages than SGE, terminate SGE
* list so that svc_rdma_unmap_dma doesn't attempt to
* unmap garbage.
*/
if (page_no+1 >= sge_no)
ctxt->sge[page_no+1].length = 0;
}
rqstp->rq_next_page = rqstp->rq_respages + 1;
BUG_ON(sge_no > rdma->sc_max_sge);
memset(&send_wr, 0, sizeof send_wr);
ctxt->wr_op = IB_WR_SEND;
send_wr.wr_id = (unsigned long)ctxt;
send_wr.sg_list = ctxt->sge;
send_wr.num_sge = sge_no;
send_wr.opcode = IB_WR_SEND;
send_wr.send_flags = IB_SEND_SIGNALED;
ret = svc_rdma_send(rdma, &send_wr);
if (ret)
goto err;
return 0;
err:
svc_rdma_unmap_dma(ctxt);
svc_rdma_put_context(ctxt, 1);
return -EIO;
}
void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
{
}
/*
* Return the start of an xdr buffer.
*/
static void *xdr_start(struct xdr_buf *xdr)
{
return xdr->head[0].iov_base -
(xdr->len -
xdr->page_len -
xdr->tail[0].iov_len -
xdr->head[0].iov_len);
}
int svc_rdma_sendto(struct svc_rqst *rqstp)
{
struct svc_xprt *xprt = rqstp->rq_xprt;
struct svcxprt_rdma *rdma =
container_of(xprt, struct svcxprt_rdma, sc_xprt);
struct rpcrdma_msg *rdma_argp;
struct rpcrdma_msg *rdma_resp;
struct rpcrdma_write_array *reply_ary;
enum rpcrdma_proc reply_type;
int ret;
int inline_bytes;
struct page *res_page;
struct svc_rdma_op_ctxt *ctxt;
struct svc_rdma_req_map *vec;
dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
/* Get the RDMA request header. */
rdma_argp = xdr_start(&rqstp->rq_arg);
/* Build an req vec for the XDR */
ctxt = svc_rdma_get_context(rdma);
ctxt->direction = DMA_TO_DEVICE;
vec = svc_rdma_get_req_map();
ret = map_xdr(rdma, &rqstp->rq_res, vec);
if (ret)
goto err0;
inline_bytes = rqstp->rq_res.len;
/* Create the RDMA response header */
res_page = svc_rdma_get_page();
rdma_resp = page_address(res_page);
reply_ary = svc_rdma_get_reply_array(rdma_argp);
if (reply_ary)
reply_type = RDMA_NOMSG;
else
reply_type = RDMA_MSG;
svc_rdma_xdr_encode_reply_header(rdma, rdma_argp,
rdma_resp, reply_type);
/* Send any write-chunk data and build resp write-list */
ret = send_write_chunks(rdma, rdma_argp, rdma_resp,
rqstp, vec);
if (ret < 0) {
printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n",
ret);
goto err1;
}
inline_bytes -= ret;
/* Send any reply-list data and update resp reply-list */
ret = send_reply_chunks(rdma, rdma_argp, rdma_resp,
rqstp, vec);
if (ret < 0) {
printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n",
ret);
goto err1;
}
inline_bytes -= ret;
ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec,
inline_bytes);
svc_rdma_put_req_map(vec);
dprintk("svcrdma: send_reply returns %d\n", ret);
return ret;
err1:
put_page(res_page);
err0:
svc_rdma_put_req_map(vec);
svc_rdma_put_context(ctxt, 0);
return ret;
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,747 @@
/*
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the BSD-type
* license below:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* Neither the name of the Network Appliance, Inc. nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* transport.c
*
* This file contains the top-level implementation of an RPC RDMA
* transport.
*
* Naming convention: functions beginning with xprt_ are part of the
* transport switch. All others are RPC RDMA internal.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/sunrpc/addr.h>
#include "xprt_rdma.h"
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("RPC/RDMA Transport for Linux kernel NFS");
MODULE_AUTHOR("Network Appliance, Inc.");
/*
* tunables
*/
static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_inline_write_padding;
static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
int xprt_rdma_pad_optimize = 0;
#ifdef RPC_DEBUG
static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE;
static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE;
static unsigned int zero;
static unsigned int max_padding = PAGE_SIZE;
static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
static unsigned int max_memreg = RPCRDMA_LAST - 1;
static struct ctl_table_header *sunrpc_table_header;
static struct ctl_table xr_tunables_table[] = {
{
.procname = "rdma_slot_table_entries",
.data = &xprt_rdma_slot_table_entries,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_slot_table_size,
.extra2 = &max_slot_table_size
},
{
.procname = "rdma_max_inline_read",
.data = &xprt_rdma_max_inline_read,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "rdma_max_inline_write",
.data = &xprt_rdma_max_inline_write,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "rdma_inline_write_padding",
.data = &xprt_rdma_inline_write_padding,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &max_padding,
},
{
.procname = "rdma_memreg_strategy",
.data = &xprt_rdma_memreg_strategy,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_memreg,
.extra2 = &max_memreg,
},
{
.procname = "rdma_pad_optimize",
.data = &xprt_rdma_pad_optimize,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{ },
};
static struct ctl_table sunrpc_table[] = {
{
.procname = "sunrpc",
.mode = 0555,
.child = xr_tunables_table
},
{ },
};
#endif
#define RPCRDMA_BIND_TO (60U * HZ)
#define RPCRDMA_INIT_REEST_TO (5U * HZ)
#define RPCRDMA_MAX_REEST_TO (30U * HZ)
#define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ)
static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */
static void
xprt_rdma_format_addresses(struct rpc_xprt *xprt)
{
struct sockaddr *sap = (struct sockaddr *)
&rpcx_to_rdmad(xprt).addr;
struct sockaddr_in *sin = (struct sockaddr_in *)sap;
char buf[64];
(void)rpc_ntop(sap, buf, sizeof(buf));
xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL);
snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
/* netid */
xprt->address_strings[RPC_DISPLAY_NETID] = "rdma";
}
static void
xprt_rdma_free_addresses(struct rpc_xprt *xprt)
{
unsigned int i;
for (i = 0; i < RPC_DISPLAY_MAX; i++)
switch (i) {
case RPC_DISPLAY_PROTO:
case RPC_DISPLAY_NETID:
continue;
default:
kfree(xprt->address_strings[i]);
}
}
static void
xprt_rdma_connect_worker(struct work_struct *work)
{
struct rpcrdma_xprt *r_xprt =
container_of(work, struct rpcrdma_xprt, rdma_connect.work);
struct rpc_xprt *xprt = &r_xprt->xprt;
int rc = 0;
xprt_clear_connected(xprt);
dprintk("RPC: %s: %sconnect\n", __func__,
r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
if (rc)
xprt_wake_pending_tasks(xprt, rc);
dprintk("RPC: %s: exit\n", __func__);
xprt_clear_connecting(xprt);
}
/*
* xprt_rdma_destroy
*
* Destroy the xprt.
* Free all memory associated with the object, including its own.
* NOTE: none of the *destroy methods free memory for their top-level
* objects, even though they may have allocated it (they do free
* private memory). It's up to the caller to handle it. In this
* case (RDMA transport), all structure memory is inlined with the
* struct rpcrdma_xprt.
*/
static void
xprt_rdma_destroy(struct rpc_xprt *xprt)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
dprintk("RPC: %s: called\n", __func__);
cancel_delayed_work_sync(&r_xprt->rdma_connect);
xprt_clear_connected(xprt);
rpcrdma_buffer_destroy(&r_xprt->rx_buf);
rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
rpcrdma_ia_close(&r_xprt->rx_ia);
xprt_rdma_free_addresses(xprt);
xprt_free(xprt);
dprintk("RPC: %s: returning\n", __func__);
module_put(THIS_MODULE);
}
static const struct rpc_timeout xprt_rdma_default_timeout = {
.to_initval = 60 * HZ,
.to_maxval = 60 * HZ,
};
/**
* xprt_setup_rdma - Set up transport to use RDMA
*
* @args: rpc transport arguments
*/
static struct rpc_xprt *
xprt_setup_rdma(struct xprt_create *args)
{
struct rpcrdma_create_data_internal cdata;
struct rpc_xprt *xprt;
struct rpcrdma_xprt *new_xprt;
struct rpcrdma_ep *new_ep;
struct sockaddr_in *sin;
int rc;
if (args->addrlen > sizeof(xprt->addr)) {
dprintk("RPC: %s: address too large\n", __func__);
return ERR_PTR(-EBADF);
}
xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt),
xprt_rdma_slot_table_entries,
xprt_rdma_slot_table_entries);
if (xprt == NULL) {
dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
__func__);
return ERR_PTR(-ENOMEM);
}
/* 60 second timeout, no retries */
xprt->timeout = &xprt_rdma_default_timeout;
xprt->bind_timeout = RPCRDMA_BIND_TO;
xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
xprt->resvport = 0; /* privileged port not needed */
xprt->tsh_size = 0; /* RPC-RDMA handles framing */
xprt->ops = &xprt_rdma_procs;
/*
* Set up RDMA-specific connect data.
*/
/* Put server RDMA address in local cdata */
memcpy(&cdata.addr, args->dstaddr, args->addrlen);
/* Ensure xprt->addr holds valid server TCP (not RDMA)
* address, for any side protocols which peek at it */
xprt->prot = IPPROTO_TCP;
xprt->addrlen = args->addrlen;
memcpy(&xprt->addr, &cdata.addr, xprt->addrlen);
sin = (struct sockaddr_in *)&cdata.addr;
if (ntohs(sin->sin_port) != 0)
xprt_set_bound(xprt);
dprintk("RPC: %s: %pI4:%u\n",
__func__, &sin->sin_addr.s_addr, ntohs(sin->sin_port));
/* Set max requests */
cdata.max_requests = xprt->max_reqs;
/* Set some length limits */
cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */
cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */
cdata.inline_wsize = xprt_rdma_max_inline_write;
if (cdata.inline_wsize > cdata.wsize)
cdata.inline_wsize = cdata.wsize;
cdata.inline_rsize = xprt_rdma_max_inline_read;
if (cdata.inline_rsize > cdata.rsize)
cdata.inline_rsize = cdata.rsize;
cdata.padding = xprt_rdma_inline_write_padding;
/*
* Create new transport instance, which includes initialized
* o ia
* o endpoint
* o buffers
*/
new_xprt = rpcx_to_rdmax(xprt);
rc = rpcrdma_ia_open(new_xprt, (struct sockaddr *) &cdata.addr,
xprt_rdma_memreg_strategy);
if (rc)
goto out1;
/*
* initialize and create ep
*/
new_xprt->rx_data = cdata;
new_ep = &new_xprt->rx_ep;
new_ep->rep_remote_addr = cdata.addr;
rc = rpcrdma_ep_create(&new_xprt->rx_ep,
&new_xprt->rx_ia, &new_xprt->rx_data);
if (rc)
goto out2;
/*
* Allocate pre-registered send and receive buffers for headers and
* any inline data. Also specify any padding which will be provided
* from a preregistered zero buffer.
*/
rc = rpcrdma_buffer_create(&new_xprt->rx_buf, new_ep, &new_xprt->rx_ia,
&new_xprt->rx_data);
if (rc)
goto out3;
/*
* Register a callback for connection events. This is necessary because
* connection loss notification is async. We also catch connection loss
* when reaping receives.
*/
INIT_DELAYED_WORK(&new_xprt->rdma_connect, xprt_rdma_connect_worker);
new_ep->rep_func = rpcrdma_conn_func;
new_ep->rep_xprt = xprt;
xprt_rdma_format_addresses(xprt);
xprt->max_payload = rpcrdma_max_payload(new_xprt);
dprintk("RPC: %s: transport data payload maximum: %zu bytes\n",
__func__, xprt->max_payload);
if (!try_module_get(THIS_MODULE))
goto out4;
return xprt;
out4:
xprt_rdma_free_addresses(xprt);
rc = -EINVAL;
out3:
rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
out2:
rpcrdma_ia_close(&new_xprt->rx_ia);
out1:
xprt_free(xprt);
return ERR_PTR(rc);
}
/*
* Close a connection, during shutdown or timeout/reconnect
*/
static void
xprt_rdma_close(struct rpc_xprt *xprt)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
dprintk("RPC: %s: closing\n", __func__);
if (r_xprt->rx_ep.rep_connected > 0)
xprt->reestablish_timeout = 0;
xprt_disconnect_done(xprt);
rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
}
static void
xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
{
struct sockaddr_in *sap;
sap = (struct sockaddr_in *)&xprt->addr;
sap->sin_port = htons(port);
sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr;
sap->sin_port = htons(port);
dprintk("RPC: %s: %u\n", __func__, port);
}
static void
xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
if (r_xprt->rx_ep.rep_connected != 0) {
/* Reconnect */
schedule_delayed_work(&r_xprt->rdma_connect,
xprt->reestablish_timeout);
xprt->reestablish_timeout <<= 1;
if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO)
xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO;
else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO)
xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
} else {
schedule_delayed_work(&r_xprt->rdma_connect, 0);
if (!RPC_IS_ASYNC(task))
flush_delayed_work(&r_xprt->rdma_connect);
}
}
/*
* The RDMA allocate/free functions need the task structure as a place
* to hide the struct rpcrdma_req, which is necessary for the actual send/recv
* sequence. For this reason, the recv buffers are attached to send
* buffers for portions of the RPC. Note that the RPC layer allocates
* both send and receive buffers in the same call. We may register
* the receive buffer portion when using reply chunks.
*/
static void *
xprt_rdma_allocate(struct rpc_task *task, size_t size)
{
struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
struct rpcrdma_req *req, *nreq;
req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf);
if (req == NULL)
return NULL;
if (size > req->rl_size) {
dprintk("RPC: %s: size %zd too large for buffer[%zd]: "
"prog %d vers %d proc %d\n",
__func__, size, req->rl_size,
task->tk_client->cl_prog, task->tk_client->cl_vers,
task->tk_msg.rpc_proc->p_proc);
/*
* Outgoing length shortage. Our inline write max must have
* been configured to perform direct i/o.
*
* This is therefore a large metadata operation, and the
* allocate call was made on the maximum possible message,
* e.g. containing long filename(s) or symlink data. In
* fact, while these metadata operations *might* carry
* large outgoing payloads, they rarely *do*. However, we
* have to commit to the request here, so reallocate and
* register it now. The data path will never require this
* reallocation.
*
* If the allocation or registration fails, the RPC framework
* will (doggedly) retry.
*/
if (task->tk_flags & RPC_TASK_SWAPPER)
nreq = kmalloc(sizeof *req + size, GFP_ATOMIC);
else
nreq = kmalloc(sizeof *req + size, GFP_NOFS);
if (nreq == NULL)
goto outfail;
if (rpcrdma_register_internal(&rpcx_to_rdmax(xprt)->rx_ia,
nreq->rl_base, size + sizeof(struct rpcrdma_req)
- offsetof(struct rpcrdma_req, rl_base),
&nreq->rl_handle, &nreq->rl_iov)) {
kfree(nreq);
goto outfail;
}
rpcx_to_rdmax(xprt)->rx_stats.hardway_register_count += size;
nreq->rl_size = size;
nreq->rl_niovs = 0;
nreq->rl_nchunks = 0;
nreq->rl_buffer = (struct rpcrdma_buffer *)req;
nreq->rl_reply = req->rl_reply;
memcpy(nreq->rl_segments,
req->rl_segments, sizeof nreq->rl_segments);
/* flag the swap with an unused field */
nreq->rl_iov.length = 0;
req->rl_reply = NULL;
req = nreq;
}
dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req);
req->rl_connect_cookie = 0; /* our reserved value */
return req->rl_xdr_buf;
outfail:
rpcrdma_buffer_put(req);
rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++;
return NULL;
}
/*
* This function returns all RDMA resources to the pool.
*/
static void
xprt_rdma_free(void *buffer)
{
struct rpcrdma_req *req;
struct rpcrdma_xprt *r_xprt;
struct rpcrdma_rep *rep;
int i;
if (buffer == NULL)
return;
req = container_of(buffer, struct rpcrdma_req, rl_xdr_buf[0]);
if (req->rl_iov.length == 0) { /* see allocate above */
r_xprt = container_of(((struct rpcrdma_req *) req->rl_buffer)->rl_buffer,
struct rpcrdma_xprt, rx_buf);
} else
r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf);
rep = req->rl_reply;
dprintk("RPC: %s: called on 0x%p%s\n",
__func__, rep, (rep && rep->rr_func) ? " (with waiter)" : "");
/*
* Finish the deregistration. The process is considered
* complete when the rr_func vector becomes NULL - this
* was put in place during rpcrdma_reply_handler() - the wait
* call below will not block if the dereg is "done". If
* interrupted, our framework will clean up.
*/
for (i = 0; req->rl_nchunks;) {
--req->rl_nchunks;
i += rpcrdma_deregister_external(
&req->rl_segments[i], r_xprt);
}
if (req->rl_iov.length == 0) { /* see allocate above */
struct rpcrdma_req *oreq = (struct rpcrdma_req *)req->rl_buffer;
oreq->rl_reply = req->rl_reply;
(void) rpcrdma_deregister_internal(&r_xprt->rx_ia,
req->rl_handle,
&req->rl_iov);
kfree(req);
req = oreq;
}
/* Put back request+reply buffers */
rpcrdma_buffer_put(req);
}
/*
* send_request invokes the meat of RPC RDMA. It must do the following:
* 1. Marshal the RPC request into an RPC RDMA request, which means
* putting a header in front of data, and creating IOVs for RDMA
* from those in the request.
* 2. In marshaling, detect opportunities for RDMA, and use them.
* 3. Post a recv message to set up asynch completion, then send
* the request (rpcrdma_ep_post).
* 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP).
*/
static int
xprt_rdma_send_request(struct rpc_task *task)
{
struct rpc_rqst *rqst = task->tk_rqstp;
struct rpc_xprt *xprt = rqst->rq_xprt;
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
int rc = 0;
if (req->rl_niovs == 0)
rc = rpcrdma_marshal_req(rqst);
else if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
rc = rpcrdma_marshal_chunks(rqst, 0);
if (rc < 0)
goto failed_marshal;
if (req->rl_reply == NULL) /* e.g. reconnection */
rpcrdma_recv_buffer_get(req);
if (req->rl_reply) {
req->rl_reply->rr_func = rpcrdma_reply_handler;
/* this need only be done once, but... */
req->rl_reply->rr_xprt = xprt;
}
/* Must suppress retransmit to maintain credits */
if (req->rl_connect_cookie == xprt->connect_cookie)
goto drop_connection;
req->rl_connect_cookie = xprt->connect_cookie;
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection;
rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
rqst->rq_bytes_sent = 0;
return 0;
failed_marshal:
r_xprt->rx_stats.failed_marshal_count++;
dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n",
__func__, rc);
if (rc == -EIO)
return -EIO;
drop_connection:
xprt_disconnect_done(xprt);
return -ENOTCONN; /* implies disconnect */
}
static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
long idle_time = 0;
if (xprt_connected(xprt))
idle_time = (long)(jiffies - xprt->last_used) / HZ;
seq_printf(seq,
"\txprt:\trdma %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu "
"%lu %lu %lu %Lu %Lu %Lu %Lu %lu %lu %lu\n",
0, /* need a local port? */
xprt->stat.bind_count,
xprt->stat.connect_count,
xprt->stat.connect_time,
idle_time,
xprt->stat.sends,
xprt->stat.recvs,
xprt->stat.bad_xids,
xprt->stat.req_u,
xprt->stat.bklog_u,
r_xprt->rx_stats.read_chunk_count,
r_xprt->rx_stats.write_chunk_count,
r_xprt->rx_stats.reply_chunk_count,
r_xprt->rx_stats.total_rdma_request,
r_xprt->rx_stats.total_rdma_reply,
r_xprt->rx_stats.pullup_copy_count,
r_xprt->rx_stats.fixup_copy_count,
r_xprt->rx_stats.hardway_register_count,
r_xprt->rx_stats.failed_marshal_count,
r_xprt->rx_stats.bad_reply_count);
}
/*
* Plumbing for rpc transport switch and kernel module
*/
static struct rpc_xprt_ops xprt_rdma_procs = {
.reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */
.alloc_slot = xprt_alloc_slot,
.release_request = xprt_release_rqst_cong, /* ditto */
.set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */
.rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */
.set_port = xprt_rdma_set_port,
.connect = xprt_rdma_connect,
.buf_alloc = xprt_rdma_allocate,
.buf_free = xprt_rdma_free,
.send_request = xprt_rdma_send_request,
.close = xprt_rdma_close,
.destroy = xprt_rdma_destroy,
.print_stats = xprt_rdma_print_stats
};
static struct xprt_class xprt_rdma = {
.list = LIST_HEAD_INIT(xprt_rdma.list),
.name = "rdma",
.owner = THIS_MODULE,
.ident = XPRT_TRANSPORT_RDMA,
.setup = xprt_setup_rdma,
};
static void __exit xprt_rdma_cleanup(void)
{
int rc;
dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n");
#ifdef RPC_DEBUG
if (sunrpc_table_header) {
unregister_sysctl_table(sunrpc_table_header);
sunrpc_table_header = NULL;
}
#endif
rc = xprt_unregister_transport(&xprt_rdma);
if (rc)
dprintk("RPC: %s: xprt_unregister returned %i\n",
__func__, rc);
}
static int __init xprt_rdma_init(void)
{
int rc;
rc = xprt_register_transport(&xprt_rdma);
if (rc)
return rc;
dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
dprintk("Defaults:\n");
dprintk("\tSlots %d\n"
"\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
xprt_rdma_slot_table_entries,
xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
dprintk("\tPadding %d\n\tMemreg %d\n",
xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy);
#ifdef RPC_DEBUG
if (!sunrpc_table_header)
sunrpc_table_header = register_sysctl_table(sunrpc_table);
#endif
return 0;
}
module_init(xprt_rdma_init);
module_exit(xprt_rdma_cleanup);

2076
net/sunrpc/xprtrdma/verbs.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,402 @@
/*
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the BSD-type
* license below:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* Neither the name of the Network Appliance, Inc. nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _LINUX_SUNRPC_XPRT_RDMA_H
#define _LINUX_SUNRPC_XPRT_RDMA_H
#include <linux/wait.h> /* wait_queue_head_t, etc */
#include <linux/spinlock.h> /* spinlock_t, etc */
#include <linux/atomic.h> /* atomic_t, etc */
#include <linux/workqueue.h> /* struct work_struct */
#include <rdma/rdma_cm.h> /* RDMA connection api */
#include <rdma/ib_verbs.h> /* RDMA verbs api */
#include <linux/sunrpc/clnt.h> /* rpc_xprt */
#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
#include <linux/sunrpc/svc.h> /* RPCSVC_MAXPAYLOAD */
#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
/*
* Interface Adapter -- one per transport instance
*/
struct rpcrdma_ia {
rwlock_t ri_qplock;
struct rdma_cm_id *ri_id;
struct ib_pd *ri_pd;
struct ib_mr *ri_bind_mem;
u32 ri_dma_lkey;
int ri_have_dma_lkey;
struct completion ri_done;
int ri_async_rc;
enum rpcrdma_memreg ri_memreg_strategy;
unsigned int ri_max_frmr_depth;
};
/*
* RDMA Endpoint -- one per transport instance
*/
#define RPCRDMA_WC_BUDGET (128)
#define RPCRDMA_POLLSIZE (16)
struct rpcrdma_ep {
atomic_t rep_cqcount;
int rep_cqinit;
int rep_connected;
struct rpcrdma_ia *rep_ia;
struct ib_qp_init_attr rep_attr;
wait_queue_head_t rep_connect_wait;
struct ib_sge rep_pad; /* holds zeroed pad */
struct ib_mr *rep_pad_mr; /* holds zeroed pad */
void (*rep_func)(struct rpcrdma_ep *);
struct rpc_xprt *rep_xprt; /* for rep_func */
struct rdma_conn_param rep_remote_cma;
struct sockaddr_storage rep_remote_addr;
struct delayed_work rep_connect_worker;
struct ib_wc rep_send_wcs[RPCRDMA_POLLSIZE];
struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE];
};
#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
enum rpcrdma_chunktype {
rpcrdma_noch = 0,
rpcrdma_readch,
rpcrdma_areadch,
rpcrdma_writech,
rpcrdma_replych
};
/*
* struct rpcrdma_rep -- this structure encapsulates state required to recv
* and complete a reply, asychronously. It needs several pieces of
* state:
* o recv buffer (posted to provider)
* o ib_sge (also donated to provider)
* o status of reply (length, success or not)
* o bookkeeping state to get run by tasklet (list, etc)
*
* These are allocated during initialization, per-transport instance;
* however, the tasklet execution list itself is global, as it should
* always be pretty short.
*
* N of these are associated with a transport instance, and stored in
* struct rpcrdma_buffer. N is the max number of outstanding requests.
*/
/* temporary static scatter/gather max */
#define RPCRDMA_MAX_DATA_SEGS (64) /* max scatter/gather */
#define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */
#define MAX_RPCRDMAHDR (\
/* max supported RPC/RDMA header */ \
sizeof(struct rpcrdma_msg) + (2 * sizeof(u32)) + \
(sizeof(struct rpcrdma_read_chunk) * RPCRDMA_MAX_SEGS) + sizeof(u32))
struct rpcrdma_buffer;
struct rpcrdma_rep {
unsigned int rr_len; /* actual received reply length */
struct rpcrdma_buffer *rr_buffer; /* home base for this structure */
struct rpc_xprt *rr_xprt; /* needed for request/reply matching */
void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */
struct list_head rr_list; /* tasklet list */
struct ib_sge rr_iov; /* for posting */
struct ib_mr *rr_handle; /* handle for mem in rr_iov */
char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */
};
/*
* struct rpcrdma_mw - external memory region metadata
*
* An external memory region is any buffer or page that is registered
* on the fly (ie, not pre-registered).
*
* Each rpcrdma_buffer has a list of free MWs anchored in rb_mws. During
* call_allocate, rpcrdma_buffer_get() assigns one to each segment in
* an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep
* track of registration metadata while each RPC is pending.
* rpcrdma_deregister_external() uses this metadata to unmap and
* release these resources when an RPC is complete.
*/
enum rpcrdma_frmr_state {
FRMR_IS_INVALID, /* ready to be used */
FRMR_IS_VALID, /* in use */
FRMR_IS_STALE, /* failed completion */
};
struct rpcrdma_frmr {
struct ib_fast_reg_page_list *fr_pgl;
struct ib_mr *fr_mr;
enum rpcrdma_frmr_state fr_state;
};
struct rpcrdma_mw {
union {
struct ib_fmr *fmr;
struct rpcrdma_frmr frmr;
} r;
struct list_head mw_list;
struct list_head mw_all;
};
/*
* struct rpcrdma_req -- structure central to the request/reply sequence.
*
* N of these are associated with a transport instance, and stored in
* struct rpcrdma_buffer. N is the max number of outstanding requests.
*
* It includes pre-registered buffer memory for send AND recv.
* The recv buffer, however, is not owned by this structure, and
* is "donated" to the hardware when a recv is posted. When a
* reply is handled, the recv buffer used is given back to the
* struct rpcrdma_req associated with the request.
*
* In addition to the basic memory, this structure includes an array
* of iovs for send operations. The reason is that the iovs passed to
* ib_post_{send,recv} must not be modified until the work request
* completes.
*
* NOTES:
* o RPCRDMA_MAX_SEGS is the max number of addressible chunk elements we
* marshal. The number needed varies depending on the iov lists that
* are passed to us, the memory registration mode we are in, and if
* physical addressing is used, the layout.
*/
struct rpcrdma_mr_seg { /* chunk descriptors */
union { /* chunk memory handles */
struct ib_mr *rl_mr; /* if registered directly */
struct rpcrdma_mw *rl_mw; /* if registered from region */
} mr_chunk;
u64 mr_base; /* registration result */
u32 mr_rkey; /* registration result */
u32 mr_len; /* length of chunk or segment */
int mr_nsegs; /* number of segments in chunk or 0 */
enum dma_data_direction mr_dir; /* segment mapping direction */
dma_addr_t mr_dma; /* segment mapping address */
size_t mr_dmalen; /* segment mapping length */
struct page *mr_page; /* owning page, if any */
char *mr_offset; /* kva if no page, else offset */
};
struct rpcrdma_req {
size_t rl_size; /* actual length of buffer */
unsigned int rl_niovs; /* 0, 2 or 4 */
unsigned int rl_nchunks; /* non-zero if chunks */
unsigned int rl_connect_cookie; /* retry detection */
enum rpcrdma_chunktype rl_rtype, rl_wtype;
struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
struct rpcrdma_rep *rl_reply;/* holder for reply buffer */
struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */
struct ib_sge rl_send_iov[4]; /* for active requests */
struct ib_sge rl_iov; /* for posting */
struct ib_mr *rl_handle; /* handle for mem in rl_iov */
char rl_base[MAX_RPCRDMAHDR]; /* start of actual buffer */
__u32 rl_xdr_buf[0]; /* start of returned rpc rq_buffer */
};
#define rpcr_to_rdmar(r) \
container_of((r)->rq_buffer, struct rpcrdma_req, rl_xdr_buf[0])
/*
* struct rpcrdma_buffer -- holds list/queue of pre-registered memory for
* inline requests/replies, and client/server credits.
*
* One of these is associated with a transport instance
*/
struct rpcrdma_buffer {
spinlock_t rb_lock; /* protects indexes */
atomic_t rb_credits; /* most recent server credits */
int rb_max_requests;/* client max requests */
struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */
struct list_head rb_all;
int rb_send_index;
struct rpcrdma_req **rb_send_bufs;
int rb_recv_index;
struct rpcrdma_rep **rb_recv_bufs;
char *rb_pool;
};
#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
/*
* Internal structure for transport instance creation. This
* exists primarily for modularity.
*
* This data should be set with mount options
*/
struct rpcrdma_create_data_internal {
struct sockaddr_storage addr; /* RDMA server address */
unsigned int max_requests; /* max requests (slots) in flight */
unsigned int rsize; /* mount rsize - max read hdr+data */
unsigned int wsize; /* mount wsize - max write hdr+data */
unsigned int inline_rsize; /* max non-rdma read data payload */
unsigned int inline_wsize; /* max non-rdma write data payload */
unsigned int padding; /* non-rdma write header padding */
};
#define RPCRDMA_INLINE_READ_THRESHOLD(rq) \
(rpcx_to_rdmad(rq->rq_xprt).inline_rsize)
#define RPCRDMA_INLINE_WRITE_THRESHOLD(rq)\
(rpcx_to_rdmad(rq->rq_xprt).inline_wsize)
#define RPCRDMA_INLINE_PAD_VALUE(rq)\
rpcx_to_rdmad(rq->rq_xprt).padding
/*
* Statistics for RPCRDMA
*/
struct rpcrdma_stats {
unsigned long read_chunk_count;
unsigned long write_chunk_count;
unsigned long reply_chunk_count;
unsigned long long total_rdma_request;
unsigned long long total_rdma_reply;
unsigned long long pullup_copy_count;
unsigned long long fixup_copy_count;
unsigned long hardway_register_count;
unsigned long failed_marshal_count;
unsigned long bad_reply_count;
};
/*
* RPCRDMA transport -- encapsulates the structures above for
* integration with RPC.
*
* The contained structures are embedded, not pointers,
* for convenience. This structure need not be visible externally.
*
* It is allocated and initialized during mount, and released
* during unmount.
*/
struct rpcrdma_xprt {
struct rpc_xprt xprt;
struct rpcrdma_ia rx_ia;
struct rpcrdma_ep rx_ep;
struct rpcrdma_buffer rx_buf;
struct rpcrdma_create_data_internal rx_data;
struct delayed_work rdma_connect;
struct rpcrdma_stats rx_stats;
};
#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt)
#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
/* Setting this to 0 ensures interoperability with early servers.
* Setting this to 1 enhances certain unaligned read/write performance.
* Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
extern int xprt_rdma_pad_optimize;
/*
* Interface Adapter calls - xprtrdma/verbs.c
*/
int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int);
void rpcrdma_ia_close(struct rpcrdma_ia *);
/*
* Endpoint calls - xprtrdma/verbs.c
*/
int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
struct rpcrdma_create_data_internal *);
void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
struct rpcrdma_req *);
int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *,
struct rpcrdma_rep *);
/*
* Buffer calls - xprtrdma/verbs.c
*/
int rpcrdma_buffer_create(struct rpcrdma_buffer *, struct rpcrdma_ep *,
struct rpcrdma_ia *,
struct rpcrdma_create_data_internal *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
void rpcrdma_buffer_put(struct rpcrdma_req *);
void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
int rpcrdma_register_internal(struct rpcrdma_ia *, void *, int,
struct ib_mr **, struct ib_sge *);
int rpcrdma_deregister_internal(struct rpcrdma_ia *,
struct ib_mr *, struct ib_sge *);
int rpcrdma_register_external(struct rpcrdma_mr_seg *,
int, int, struct rpcrdma_xprt *);
int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
struct rpcrdma_xprt *);
/*
* RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
*/
void rpcrdma_connect_worker(struct work_struct *);
void rpcrdma_conn_func(struct rpcrdma_ep *);
void rpcrdma_reply_handler(struct rpcrdma_rep *);
/*
* RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
*/
ssize_t rpcrdma_marshal_chunks(struct rpc_rqst *, ssize_t);
int rpcrdma_marshal_req(struct rpc_rqst *);
size_t rpcrdma_max_payload(struct rpcrdma_xprt *);
/* Temporary NFS request map cache. Created in svc_rdma.c */
extern struct kmem_cache *svc_rdma_map_cachep;
/* WR context cache. Created in svc_rdma.c */
extern struct kmem_cache *svc_rdma_ctxt_cachep;
/* Workqueue created in svc_rdma.c */
extern struct workqueue_struct *svc_rdma_wq;
#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
#else
#define RPCSVC_MAXPAYLOAD_RDMA (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
#endif
#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */

3149
net/sunrpc/xprtsock.c Normal file

File diff suppressed because it is too large Load diff