Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

36
net/9p/Kconfig Normal file
View file

@ -0,0 +1,36 @@
#
# 9P protocol configuration
#
menuconfig NET_9P
depends on NET
tristate "Plan 9 Resource Sharing Support (9P2000)"
help
If you say Y here, you will get experimental support for
Plan 9 resource sharing via the 9P2000 protocol.
See <http://v9fs.sf.net> for more information.
If unsure, say N.
if NET_9P
config NET_9P_VIRTIO
depends on VIRTIO
tristate "9P Virtio Transport"
help
This builds support for a transports between
guest partitions and a host partition.
config NET_9P_RDMA
depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS
tristate "9P RDMA Transport (Experimental)"
help
This builds support for an RDMA transport.
config NET_9P_DEBUG
bool "Debug information"
help
Say Y if you want the 9P subsystem to log debug information.
endif

18
net/9p/Makefile Normal file
View file

@ -0,0 +1,18 @@
obj-$(CONFIG_NET_9P) := 9pnet.o
obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
9pnet-objs := \
mod.o \
client.o \
error.o \
util.o \
protocol.o \
trans_fd.o \
trans_common.o \
9pnet_virtio-objs := \
trans_virtio.o \
9pnet_rdma-objs := \
trans_rdma.o \

2277
net/9p/client.c Normal file

File diff suppressed because it is too large Load diff

247
net/9p/error.c Normal file
View file

@ -0,0 +1,247 @@
/*
* linux/fs/9p/error.c
*
* Error string handling
*
* Plan 9 uses error strings, Unix uses error numbers. These functions
* try to help manage that and provide for dynamically adding error
* mappings.
*
* Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
* Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to:
* Free Software Foundation
* 51 Franklin Street, Fifth Floor
* Boston, MA 02111-1301 USA
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/list.h>
#include <linux/jhash.h>
#include <linux/errno.h>
#include <net/9p/9p.h>
/**
* struct errormap - map string errors from Plan 9 to Linux numeric ids
* @name: string sent over 9P
* @val: numeric id most closely representing @name
* @namelen: length of string
* @list: hash-table list for string lookup
*/
struct errormap {
char *name;
int val;
int namelen;
struct hlist_node list;
};
#define ERRHASHSZ 32
static struct hlist_head hash_errmap[ERRHASHSZ];
/* FixMe - reduce to a reasonable size */
static struct errormap errmap[] = {
{"Operation not permitted", EPERM},
{"wstat prohibited", EPERM},
{"No such file or directory", ENOENT},
{"directory entry not found", ENOENT},
{"file not found", ENOENT},
{"Interrupted system call", EINTR},
{"Input/output error", EIO},
{"No such device or address", ENXIO},
{"Argument list too long", E2BIG},
{"Bad file descriptor", EBADF},
{"Resource temporarily unavailable", EAGAIN},
{"Cannot allocate memory", ENOMEM},
{"Permission denied", EACCES},
{"Bad address", EFAULT},
{"Block device required", ENOTBLK},
{"Device or resource busy", EBUSY},
{"File exists", EEXIST},
{"Invalid cross-device link", EXDEV},
{"No such device", ENODEV},
{"Not a directory", ENOTDIR},
{"Is a directory", EISDIR},
{"Invalid argument", EINVAL},
{"Too many open files in system", ENFILE},
{"Too many open files", EMFILE},
{"Text file busy", ETXTBSY},
{"File too large", EFBIG},
{"No space left on device", ENOSPC},
{"Illegal seek", ESPIPE},
{"Read-only file system", EROFS},
{"Too many links", EMLINK},
{"Broken pipe", EPIPE},
{"Numerical argument out of domain", EDOM},
{"Numerical result out of range", ERANGE},
{"Resource deadlock avoided", EDEADLK},
{"File name too long", ENAMETOOLONG},
{"No locks available", ENOLCK},
{"Function not implemented", ENOSYS},
{"Directory not empty", ENOTEMPTY},
{"Too many levels of symbolic links", ELOOP},
{"No message of desired type", ENOMSG},
{"Identifier removed", EIDRM},
{"No data available", ENODATA},
{"Machine is not on the network", ENONET},
{"Package not installed", ENOPKG},
{"Object is remote", EREMOTE},
{"Link has been severed", ENOLINK},
{"Communication error on send", ECOMM},
{"Protocol error", EPROTO},
{"Bad message", EBADMSG},
{"File descriptor in bad state", EBADFD},
{"Streams pipe error", ESTRPIPE},
{"Too many users", EUSERS},
{"Socket operation on non-socket", ENOTSOCK},
{"Message too long", EMSGSIZE},
{"Protocol not available", ENOPROTOOPT},
{"Protocol not supported", EPROTONOSUPPORT},
{"Socket type not supported", ESOCKTNOSUPPORT},
{"Operation not supported", EOPNOTSUPP},
{"Protocol family not supported", EPFNOSUPPORT},
{"Network is down", ENETDOWN},
{"Network is unreachable", ENETUNREACH},
{"Network dropped connection on reset", ENETRESET},
{"Software caused connection abort", ECONNABORTED},
{"Connection reset by peer", ECONNRESET},
{"No buffer space available", ENOBUFS},
{"Transport endpoint is already connected", EISCONN},
{"Transport endpoint is not connected", ENOTCONN},
{"Cannot send after transport endpoint shutdown", ESHUTDOWN},
{"Connection timed out", ETIMEDOUT},
{"Connection refused", ECONNREFUSED},
{"Host is down", EHOSTDOWN},
{"No route to host", EHOSTUNREACH},
{"Operation already in progress", EALREADY},
{"Operation now in progress", EINPROGRESS},
{"Is a named type file", EISNAM},
{"Remote I/O error", EREMOTEIO},
{"Disk quota exceeded", EDQUOT},
/* errors from fossil, vacfs, and u9fs */
{"fid unknown or out of range", EBADF},
{"permission denied", EACCES},
{"file does not exist", ENOENT},
{"authentication failed", ECONNREFUSED},
{"bad offset in directory read", ESPIPE},
{"bad use of fid", EBADF},
{"wstat can't convert between files and directories", EPERM},
{"directory is not empty", ENOTEMPTY},
{"file exists", EEXIST},
{"file already exists", EEXIST},
{"file or directory already exists", EEXIST},
{"fid already in use", EBADF},
{"file in use", ETXTBSY},
{"i/o error", EIO},
{"file already open for I/O", ETXTBSY},
{"illegal mode", EINVAL},
{"illegal name", ENAMETOOLONG},
{"not a directory", ENOTDIR},
{"not a member of proposed group", EPERM},
{"not owner", EACCES},
{"only owner can change group in wstat", EACCES},
{"read only file system", EROFS},
{"no access to special file", EPERM},
{"i/o count too large", EIO},
{"unknown group", EINVAL},
{"unknown user", EINVAL},
{"bogus wstat buffer", EPROTO},
{"exclusive use file already open", EAGAIN},
{"corrupted directory entry", EIO},
{"corrupted file entry", EIO},
{"corrupted block label", EIO},
{"corrupted meta data", EIO},
{"illegal offset", EINVAL},
{"illegal path element", ENOENT},
{"root of file system is corrupted", EIO},
{"corrupted super block", EIO},
{"protocol botch", EPROTO},
{"file system is full", ENOSPC},
{"file is in use", EAGAIN},
{"directory entry is not allocated", ENOENT},
{"file is read only", EROFS},
{"file has been removed", EIDRM},
{"only support truncation to zero length", EPERM},
{"cannot remove root", EPERM},
{"file too big", EFBIG},
{"venti i/o error", EIO},
/* these are not errors */
{"u9fs rhostsauth: no authentication required", 0},
{"u9fs authnone: no authentication required", 0},
{NULL, -1}
};
/**
* p9_error_init - preload mappings into hash list
*
*/
int p9_error_init(void)
{
struct errormap *c;
int bucket;
/* initialize hash table */
for (bucket = 0; bucket < ERRHASHSZ; bucket++)
INIT_HLIST_HEAD(&hash_errmap[bucket]);
/* load initial error map into hash table */
for (c = errmap; c->name != NULL; c++) {
c->namelen = strlen(c->name);
bucket = jhash(c->name, c->namelen, 0) % ERRHASHSZ;
INIT_HLIST_NODE(&c->list);
hlist_add_head(&c->list, &hash_errmap[bucket]);
}
return 1;
}
EXPORT_SYMBOL(p9_error_init);
/**
* errstr2errno - convert error string to error number
* @errstr: error string
* @len: length of error string
*
*/
int p9_errstr2errno(char *errstr, int len)
{
int errno;
struct errormap *c;
int bucket;
errno = 0;
c = NULL;
bucket = jhash(errstr, len, 0) % ERRHASHSZ;
hlist_for_each_entry(c, &hash_errmap[bucket], list) {
if (c->namelen == len && !memcmp(c->name, errstr, len)) {
errno = c->val;
break;
}
}
if (errno == 0) {
/* TODO: if error isn't found, add it dynamically */
errstr[len] = 0;
pr_err("%s: server reported unknown error %s\n",
__func__, errstr);
errno = ESERVERFAULT;
}
return -errno;
}
EXPORT_SYMBOL(p9_errstr2errno);

201
net/9p/mod.c Normal file
View file

@ -0,0 +1,201 @@
/*
* net/9p/9p.c
*
* 9P entry point
*
* Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net>
* Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
* Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to:
* Free Software Foundation
* 51 Franklin Street, Fifth Floor
* Boston, MA 02111-1301 USA
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/moduleparam.h>
#include <net/9p/9p.h>
#include <linux/fs.h>
#include <linux/parser.h>
#include <net/9p/client.h>
#include <net/9p/transport.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#ifdef CONFIG_NET_9P_DEBUG
unsigned int p9_debug_level = 0; /* feature-rific global debug level */
EXPORT_SYMBOL(p9_debug_level);
module_param_named(debug, p9_debug_level, uint, 0);
MODULE_PARM_DESC(debug, "9P debugging level");
void _p9_debug(enum p9_debug_flags level, const char *func,
const char *fmt, ...)
{
struct va_format vaf;
va_list args;
if ((p9_debug_level & level) != level)
return;
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
if (level == P9_DEBUG_9P)
pr_notice("(%8.8d) %pV", task_pid_nr(current), &vaf);
else
pr_notice("-- %s (%d): %pV", func, task_pid_nr(current), &vaf);
va_end(args);
}
EXPORT_SYMBOL(_p9_debug);
#endif
/*
* Dynamic Transport Registration Routines
*
*/
static DEFINE_SPINLOCK(v9fs_trans_lock);
static LIST_HEAD(v9fs_trans_list);
/**
* v9fs_register_trans - register a new transport with 9p
* @m: structure describing the transport module and entry points
*
*/
void v9fs_register_trans(struct p9_trans_module *m)
{
spin_lock(&v9fs_trans_lock);
list_add_tail(&m->list, &v9fs_trans_list);
spin_unlock(&v9fs_trans_lock);
}
EXPORT_SYMBOL(v9fs_register_trans);
/**
* v9fs_unregister_trans - unregister a 9p transport
* @m: the transport to remove
*
*/
void v9fs_unregister_trans(struct p9_trans_module *m)
{
spin_lock(&v9fs_trans_lock);
list_del_init(&m->list);
spin_unlock(&v9fs_trans_lock);
}
EXPORT_SYMBOL(v9fs_unregister_trans);
/**
* v9fs_get_trans_by_name - get transport with the matching name
* @name: string identifying transport
*
*/
struct p9_trans_module *v9fs_get_trans_by_name(char *s)
{
struct p9_trans_module *t, *found = NULL;
spin_lock(&v9fs_trans_lock);
list_for_each_entry(t, &v9fs_trans_list, list)
if (strcmp(t->name, s) == 0 &&
try_module_get(t->owner)) {
found = t;
break;
}
spin_unlock(&v9fs_trans_lock);
return found;
}
EXPORT_SYMBOL(v9fs_get_trans_by_name);
/**
* v9fs_get_default_trans - get the default transport
*
*/
struct p9_trans_module *v9fs_get_default_trans(void)
{
struct p9_trans_module *t, *found = NULL;
spin_lock(&v9fs_trans_lock);
list_for_each_entry(t, &v9fs_trans_list, list)
if (t->def && try_module_get(t->owner)) {
found = t;
break;
}
if (!found)
list_for_each_entry(t, &v9fs_trans_list, list)
if (try_module_get(t->owner)) {
found = t;
break;
}
spin_unlock(&v9fs_trans_lock);
return found;
}
EXPORT_SYMBOL(v9fs_get_default_trans);
/**
* v9fs_put_trans - put trans
* @m: transport to put
*
*/
void v9fs_put_trans(struct p9_trans_module *m)
{
if (m)
module_put(m->owner);
}
/**
* init_p9 - Initialize module
*
*/
static int __init init_p9(void)
{
int ret = 0;
p9_error_init();
pr_info("Installing 9P2000 support\n");
p9_trans_fd_init();
return ret;
}
/**
* exit_p9 - shutdown module
*
*/
static void __exit exit_p9(void)
{
pr_info("Unloading 9P2000 support\n");
p9_trans_fd_exit();
}
module_init(init_p9)
module_exit(exit_p9)
MODULE_AUTHOR("Latchesar Ionkov <lucho@ionkov.net>");
MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
MODULE_AUTHOR("Ron Minnich <rminnich@lanl.gov>");
MODULE_LICENSE("GPL");

636
net/9p/protocol.c Normal file
View file

@ -0,0 +1,636 @@
/*
* net/9p/protocol.c
*
* 9P Protocol Support Code
*
* Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com>
*
* Base on code from Anthony Liguori <aliguori@us.ibm.com>
* Copyright (C) 2008 by IBM, Corp.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to:
* Free Software Foundation
* 51 Franklin Street, Fifth Floor
* Boston, MA 02111-1301 USA
*
*/
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/stddef.h>
#include <linux/types.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
#include "protocol.h"
#include <trace/events/9p.h>
static int
p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
void p9stat_free(struct p9_wstat *stbuf)
{
kfree(stbuf->name);
kfree(stbuf->uid);
kfree(stbuf->gid);
kfree(stbuf->muid);
kfree(stbuf->extension);
}
EXPORT_SYMBOL(p9stat_free);
size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size)
{
size_t len = min(pdu->size - pdu->offset, size);
memcpy(data, &pdu->sdata[pdu->offset], len);
pdu->offset += len;
return size - len;
}
static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size)
{
size_t len = min(pdu->capacity - pdu->size, size);
memcpy(&pdu->sdata[pdu->size], data, len);
pdu->size += len;
return size - len;
}
static size_t
pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
{
size_t len = min(pdu->capacity - pdu->size, size);
if (copy_from_user(&pdu->sdata[pdu->size], udata, len))
len = 0;
pdu->size += len;
return size - len;
}
/*
b - int8_t
w - int16_t
d - int32_t
q - int64_t
s - string
u - numeric uid
g - numeric gid
S - stat
Q - qid
D - data blob (int32_t size followed by void *, results are not freed)
T - array of strings (int16_t count, followed by strings)
R - array of qids (int16_t count, followed by qids)
A - stat for 9p2000.L (p9_stat_dotl)
? - if optional = 1, continue parsing
*/
static int
p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
va_list ap)
{
const char *ptr;
int errcode = 0;
for (ptr = fmt; *ptr; ptr++) {
switch (*ptr) {
case 'b':{
int8_t *val = va_arg(ap, int8_t *);
if (pdu_read(pdu, val, sizeof(*val))) {
errcode = -EFAULT;
break;
}
}
break;
case 'w':{
int16_t *val = va_arg(ap, int16_t *);
__le16 le_val;
if (pdu_read(pdu, &le_val, sizeof(le_val))) {
errcode = -EFAULT;
break;
}
*val = le16_to_cpu(le_val);
}
break;
case 'd':{
int32_t *val = va_arg(ap, int32_t *);
__le32 le_val;
if (pdu_read(pdu, &le_val, sizeof(le_val))) {
errcode = -EFAULT;
break;
}
*val = le32_to_cpu(le_val);
}
break;
case 'q':{
int64_t *val = va_arg(ap, int64_t *);
__le64 le_val;
if (pdu_read(pdu, &le_val, sizeof(le_val))) {
errcode = -EFAULT;
break;
}
*val = le64_to_cpu(le_val);
}
break;
case 's':{
char **sptr = va_arg(ap, char **);
uint16_t len;
errcode = p9pdu_readf(pdu, proto_version,
"w", &len);
if (errcode)
break;
*sptr = kmalloc(len + 1, GFP_NOFS);
if (*sptr == NULL) {
errcode = -EFAULT;
break;
}
if (pdu_read(pdu, *sptr, len)) {
errcode = -EFAULT;
kfree(*sptr);
*sptr = NULL;
} else
(*sptr)[len] = 0;
}
break;
case 'u': {
kuid_t *uid = va_arg(ap, kuid_t *);
__le32 le_val;
if (pdu_read(pdu, &le_val, sizeof(le_val))) {
errcode = -EFAULT;
break;
}
*uid = make_kuid(&init_user_ns,
le32_to_cpu(le_val));
} break;
case 'g': {
kgid_t *gid = va_arg(ap, kgid_t *);
__le32 le_val;
if (pdu_read(pdu, &le_val, sizeof(le_val))) {
errcode = -EFAULT;
break;
}
*gid = make_kgid(&init_user_ns,
le32_to_cpu(le_val));
} break;
case 'Q':{
struct p9_qid *qid =
va_arg(ap, struct p9_qid *);
errcode = p9pdu_readf(pdu, proto_version, "bdq",
&qid->type, &qid->version,
&qid->path);
}
break;
case 'S':{
struct p9_wstat *stbuf =
va_arg(ap, struct p9_wstat *);
memset(stbuf, 0, sizeof(struct p9_wstat));
stbuf->n_uid = stbuf->n_muid = INVALID_UID;
stbuf->n_gid = INVALID_GID;
errcode =
p9pdu_readf(pdu, proto_version,
"wwdQdddqssss?sugu",
&stbuf->size, &stbuf->type,
&stbuf->dev, &stbuf->qid,
&stbuf->mode, &stbuf->atime,
&stbuf->mtime, &stbuf->length,
&stbuf->name, &stbuf->uid,
&stbuf->gid, &stbuf->muid,
&stbuf->extension,
&stbuf->n_uid, &stbuf->n_gid,
&stbuf->n_muid);
if (errcode)
p9stat_free(stbuf);
}
break;
case 'D':{
uint32_t *count = va_arg(ap, uint32_t *);
void **data = va_arg(ap, void **);
errcode =
p9pdu_readf(pdu, proto_version, "d", count);
if (!errcode) {
*count =
min_t(uint32_t, *count,
pdu->size - pdu->offset);
*data = &pdu->sdata[pdu->offset];
}
}
break;
case 'T':{
uint16_t *nwname = va_arg(ap, uint16_t *);
char ***wnames = va_arg(ap, char ***);
errcode = p9pdu_readf(pdu, proto_version,
"w", nwname);
if (!errcode) {
*wnames =
kmalloc(sizeof(char *) * *nwname,
GFP_NOFS);
if (!*wnames)
errcode = -ENOMEM;
}
if (!errcode) {
int i;
for (i = 0; i < *nwname; i++) {
errcode =
p9pdu_readf(pdu,
proto_version,
"s",
&(*wnames)[i]);
if (errcode)
break;
}
}
if (errcode) {
if (*wnames) {
int i;
for (i = 0; i < *nwname; i++)
kfree((*wnames)[i]);
}
kfree(*wnames);
*wnames = NULL;
}
}
break;
case 'R':{
int16_t *nwqid = va_arg(ap, int16_t *);
struct p9_qid **wqids =
va_arg(ap, struct p9_qid **);
*wqids = NULL;
errcode =
p9pdu_readf(pdu, proto_version, "w", nwqid);
if (!errcode) {
*wqids =
kmalloc(*nwqid *
sizeof(struct p9_qid),
GFP_NOFS);
if (*wqids == NULL)
errcode = -ENOMEM;
}
if (!errcode) {
int i;
for (i = 0; i < *nwqid; i++) {
errcode =
p9pdu_readf(pdu,
proto_version,
"Q",
&(*wqids)[i]);
if (errcode)
break;
}
}
if (errcode) {
kfree(*wqids);
*wqids = NULL;
}
}
break;
case 'A': {
struct p9_stat_dotl *stbuf =
va_arg(ap, struct p9_stat_dotl *);
memset(stbuf, 0, sizeof(struct p9_stat_dotl));
errcode =
p9pdu_readf(pdu, proto_version,
"qQdugqqqqqqqqqqqqqqq",
&stbuf->st_result_mask,
&stbuf->qid,
&stbuf->st_mode,
&stbuf->st_uid, &stbuf->st_gid,
&stbuf->st_nlink,
&stbuf->st_rdev, &stbuf->st_size,
&stbuf->st_blksize, &stbuf->st_blocks,
&stbuf->st_atime_sec,
&stbuf->st_atime_nsec,
&stbuf->st_mtime_sec,
&stbuf->st_mtime_nsec,
&stbuf->st_ctime_sec,
&stbuf->st_ctime_nsec,
&stbuf->st_btime_sec,
&stbuf->st_btime_nsec,
&stbuf->st_gen,
&stbuf->st_data_version);
}
break;
case '?':
if ((proto_version != p9_proto_2000u) &&
(proto_version != p9_proto_2000L))
return 0;
break;
default:
BUG();
break;
}
if (errcode)
break;
}
return errcode;
}
int
p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
va_list ap)
{
const char *ptr;
int errcode = 0;
for (ptr = fmt; *ptr; ptr++) {
switch (*ptr) {
case 'b':{
int8_t val = va_arg(ap, int);
if (pdu_write(pdu, &val, sizeof(val)))
errcode = -EFAULT;
}
break;
case 'w':{
__le16 val = cpu_to_le16(va_arg(ap, int));
if (pdu_write(pdu, &val, sizeof(val)))
errcode = -EFAULT;
}
break;
case 'd':{
__le32 val = cpu_to_le32(va_arg(ap, int32_t));
if (pdu_write(pdu, &val, sizeof(val)))
errcode = -EFAULT;
}
break;
case 'q':{
__le64 val = cpu_to_le64(va_arg(ap, int64_t));
if (pdu_write(pdu, &val, sizeof(val)))
errcode = -EFAULT;
}
break;
case 's':{
const char *sptr = va_arg(ap, const char *);
uint16_t len = 0;
if (sptr)
len = min_t(size_t, strlen(sptr),
USHRT_MAX);
errcode = p9pdu_writef(pdu, proto_version,
"w", len);
if (!errcode && pdu_write(pdu, sptr, len))
errcode = -EFAULT;
}
break;
case 'u': {
kuid_t uid = va_arg(ap, kuid_t);
__le32 val = cpu_to_le32(
from_kuid(&init_user_ns, uid));
if (pdu_write(pdu, &val, sizeof(val)))
errcode = -EFAULT;
} break;
case 'g': {
kgid_t gid = va_arg(ap, kgid_t);
__le32 val = cpu_to_le32(
from_kgid(&init_user_ns, gid));
if (pdu_write(pdu, &val, sizeof(val)))
errcode = -EFAULT;
} break;
case 'Q':{
const struct p9_qid *qid =
va_arg(ap, const struct p9_qid *);
errcode =
p9pdu_writef(pdu, proto_version, "bdq",
qid->type, qid->version,
qid->path);
} break;
case 'S':{
const struct p9_wstat *stbuf =
va_arg(ap, const struct p9_wstat *);
errcode =
p9pdu_writef(pdu, proto_version,
"wwdQdddqssss?sugu",
stbuf->size, stbuf->type,
stbuf->dev, &stbuf->qid,
stbuf->mode, stbuf->atime,
stbuf->mtime, stbuf->length,
stbuf->name, stbuf->uid,
stbuf->gid, stbuf->muid,
stbuf->extension, stbuf->n_uid,
stbuf->n_gid, stbuf->n_muid);
} break;
case 'D':{
uint32_t count = va_arg(ap, uint32_t);
const void *data = va_arg(ap, const void *);
errcode = p9pdu_writef(pdu, proto_version, "d",
count);
if (!errcode && pdu_write(pdu, data, count))
errcode = -EFAULT;
}
break;
case 'U':{
int32_t count = va_arg(ap, int32_t);
const char __user *udata =
va_arg(ap, const void __user *);
errcode = p9pdu_writef(pdu, proto_version, "d",
count);
if (!errcode && pdu_write_u(pdu, udata, count))
errcode = -EFAULT;
}
break;
case 'T':{
uint16_t nwname = va_arg(ap, int);
const char **wnames = va_arg(ap, const char **);
errcode = p9pdu_writef(pdu, proto_version, "w",
nwname);
if (!errcode) {
int i;
for (i = 0; i < nwname; i++) {
errcode =
p9pdu_writef(pdu,
proto_version,
"s",
wnames[i]);
if (errcode)
break;
}
}
}
break;
case 'R':{
int16_t nwqid = va_arg(ap, int);
struct p9_qid *wqids =
va_arg(ap, struct p9_qid *);
errcode = p9pdu_writef(pdu, proto_version, "w",
nwqid);
if (!errcode) {
int i;
for (i = 0; i < nwqid; i++) {
errcode =
p9pdu_writef(pdu,
proto_version,
"Q",
&wqids[i]);
if (errcode)
break;
}
}
}
break;
case 'I':{
struct p9_iattr_dotl *p9attr = va_arg(ap,
struct p9_iattr_dotl *);
errcode = p9pdu_writef(pdu, proto_version,
"ddugqqqqq",
p9attr->valid,
p9attr->mode,
p9attr->uid,
p9attr->gid,
p9attr->size,
p9attr->atime_sec,
p9attr->atime_nsec,
p9attr->mtime_sec,
p9attr->mtime_nsec);
}
break;
case '?':
if ((proto_version != p9_proto_2000u) &&
(proto_version != p9_proto_2000L))
return 0;
break;
default:
BUG();
break;
}
if (errcode)
break;
}
return errcode;
}
int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...)
{
va_list ap;
int ret;
va_start(ap, fmt);
ret = p9pdu_vreadf(pdu, proto_version, fmt, ap);
va_end(ap);
return ret;
}
static int
p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...)
{
va_list ap;
int ret;
va_start(ap, fmt);
ret = p9pdu_vwritef(pdu, proto_version, fmt, ap);
va_end(ap);
return ret;
}
int p9stat_read(struct p9_client *clnt, char *buf, int len, struct p9_wstat *st)
{
struct p9_fcall fake_pdu;
int ret;
fake_pdu.size = len;
fake_pdu.capacity = len;
fake_pdu.sdata = buf;
fake_pdu.offset = 0;
ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "S", st);
if (ret) {
p9_debug(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret);
trace_9p_protocol_dump(clnt, &fake_pdu);
}
return ret;
}
EXPORT_SYMBOL(p9stat_read);
int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type)
{
pdu->id = type;
return p9pdu_writef(pdu, 0, "dbw", 0, type, tag);
}
int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu)
{
int size = pdu->size;
int err;
pdu->size = 0;
err = p9pdu_writef(pdu, 0, "d", size);
pdu->size = size;
trace_9p_protocol_dump(clnt, pdu);
p9_debug(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n",
pdu->size, pdu->id, pdu->tag);
return err;
}
void p9pdu_reset(struct p9_fcall *pdu)
{
pdu->offset = 0;
pdu->size = 0;
}
int p9dirent_read(struct p9_client *clnt, char *buf, int len,
struct p9_dirent *dirent)
{
struct p9_fcall fake_pdu;
int ret;
char *nameptr;
fake_pdu.size = len;
fake_pdu.capacity = len;
fake_pdu.sdata = buf;
fake_pdu.offset = 0;
ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "Qqbs", &dirent->qid,
&dirent->d_off, &dirent->d_type, &nameptr);
if (ret) {
p9_debug(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret);
trace_9p_protocol_dump(clnt, &fake_pdu);
goto out;
}
strcpy(dirent->d_name, nameptr);
kfree(nameptr);
out:
return fake_pdu.offset;
}
EXPORT_SYMBOL(p9dirent_read);

34
net/9p/protocol.h Normal file
View file

@ -0,0 +1,34 @@
/*
* net/9p/protocol.h
*
* 9P Protocol Support Code
*
* Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com>
*
* Base on code from Anthony Liguori <aliguori@us.ibm.com>
* Copyright (C) 2008 by IBM, Corp.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to:
* Free Software Foundation
* 51 Franklin Street, Fifth Floor
* Boston, MA 02111-1301 USA
*
*/
int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
va_list ap);
int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type);
int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu);
void p9pdu_reset(struct p9_fcall *pdu);
size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size);

69
net/9p/trans_common.c Normal file
View file

@ -0,0 +1,69 @@
/*
* Copyright IBM Corporation, 2010
* Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2.1 of the GNU Lesser General Public License
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
*/
#include <linux/slab.h>
#include <linux/module.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
#include <linux/scatterlist.h>
#include "trans_common.h"
/**
* p9_release_req_pages - Release pages after the transaction.
*/
void p9_release_pages(struct page **pages, int nr_pages)
{
int i;
for (i = 0; i < nr_pages; i++)
if (pages[i])
put_page(pages[i]);
}
EXPORT_SYMBOL(p9_release_pages);
/**
* p9_nr_pages - Return number of pages needed to accommodate the payload.
*/
int p9_nr_pages(char *data, int len)
{
unsigned long start_page, end_page;
start_page = (unsigned long)data >> PAGE_SHIFT;
end_page = ((unsigned long)data + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
return end_page - start_page;
}
EXPORT_SYMBOL(p9_nr_pages);
/**
* payload_gup - Translates user buffer into kernel pages and
* pins them either for read/write through get_user_pages_fast().
* @req: Request to be sent to server.
* @pdata_off: data offset into the first page after translation (gup).
* @pdata_len: Total length of the IO. gup may not return requested # of pages.
* @nr_pages: number of pages to accommodate the payload
* @rw: Indicates if the pages are for read or write.
*/
int p9_payload_gup(char *data, int *nr_pages, struct page **pages, int write)
{
int nr_mapped_pages;
nr_mapped_pages = get_user_pages_fast((unsigned long)data,
*nr_pages, write, pages);
if (nr_mapped_pages <= 0)
return nr_mapped_pages;
*nr_pages = nr_mapped_pages;
return 0;
}
EXPORT_SYMBOL(p9_payload_gup);

17
net/9p/trans_common.h Normal file
View file

@ -0,0 +1,17 @@
/*
* Copyright IBM Corporation, 2010
* Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2.1 of the GNU Lesser General Public License
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
*/
void p9_release_pages(struct page **, int);
int p9_payload_gup(char *, int *, struct page **, int);
int p9_nr_pages(char *, int);

1118
net/9p/trans_fd.c Normal file

File diff suppressed because it is too large Load diff

765
net/9p/trans_rdma.c Normal file
View file

@ -0,0 +1,765 @@
/*
* linux/fs/9p/trans_rdma.c
*
* RDMA transport layer based on the trans_fd.c implementation.
*
* Copyright (C) 2008 by Tom Tucker <tom@opengridcomputing.com>
* Copyright (C) 2006 by Russ Cox <rsc@swtch.com>
* Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
* Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com>
* Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to:
* Free Software Foundation
* 51 Franklin Street, Fifth Floor
* Boston, MA 02111-1301 USA
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/in.h>
#include <linux/module.h>
#include <linux/net.h>
#include <linux/ipv6.h>
#include <linux/kthread.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/un.h>
#include <linux/uaccess.h>
#include <linux/inet.h>
#include <linux/idr.h>
#include <linux/file.h>
#include <linux/parser.h>
#include <linux/semaphore.h>
#include <linux/slab.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
#include <net/9p/transport.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#define P9_PORT 5640
#define P9_RDMA_SQ_DEPTH 32
#define P9_RDMA_RQ_DEPTH 32
#define P9_RDMA_SEND_SGE 4
#define P9_RDMA_RECV_SGE 4
#define P9_RDMA_IRD 0
#define P9_RDMA_ORD 0
#define P9_RDMA_TIMEOUT 30000 /* 30 seconds */
#define P9_RDMA_MAXSIZE (1024*1024) /* 1MB */
/**
* struct p9_trans_rdma - RDMA transport instance
*
* @state: tracks the transport state machine for connection setup and tear down
* @cm_id: The RDMA CM ID
* @pd: Protection Domain pointer
* @qp: Queue Pair pointer
* @cq: Completion Queue pointer
* @dm_mr: DMA Memory Region pointer
* @lkey: The local access only memory region key
* @timeout: Number of uSecs to wait for connection management events
* @sq_depth: The depth of the Send Queue
* @sq_sem: Semaphore for the SQ
* @rq_depth: The depth of the Receive Queue.
* @rq_sem: Semaphore for the RQ
* @excess_rc : Amount of posted Receive Contexts without a pending request.
* See rdma_request()
* @addr: The remote peer's address
* @req_lock: Protects the active request list
* @cm_done: Completion event for connection management tracking
*/
struct p9_trans_rdma {
enum {
P9_RDMA_INIT,
P9_RDMA_ADDR_RESOLVED,
P9_RDMA_ROUTE_RESOLVED,
P9_RDMA_CONNECTED,
P9_RDMA_FLUSHING,
P9_RDMA_CLOSING,
P9_RDMA_CLOSED,
} state;
struct rdma_cm_id *cm_id;
struct ib_pd *pd;
struct ib_qp *qp;
struct ib_cq *cq;
struct ib_mr *dma_mr;
u32 lkey;
long timeout;
int sq_depth;
struct semaphore sq_sem;
int rq_depth;
struct semaphore rq_sem;
atomic_t excess_rc;
struct sockaddr_in addr;
spinlock_t req_lock;
struct completion cm_done;
};
/**
* p9_rdma_context - Keeps track of in-process WR
*
* @wc_op: The original WR op for when the CQE completes in error.
* @busa: Bus address to unmap when the WR completes
* @req: Keeps track of requests (send)
* @rc: Keepts track of replies (receive)
*/
struct p9_rdma_req;
struct p9_rdma_context {
enum ib_wc_opcode wc_op;
dma_addr_t busa;
union {
struct p9_req_t *req;
struct p9_fcall *rc;
};
};
/**
* p9_rdma_opts - Collection of mount options
* @port: port of connection
* @sq_depth: The requested depth of the SQ. This really doesn't need
* to be any deeper than the number of threads used in the client
* @rq_depth: The depth of the RQ. Should be greater than or equal to SQ depth
* @timeout: Time to wait in msecs for CM events
*/
struct p9_rdma_opts {
short port;
int sq_depth;
int rq_depth;
long timeout;
};
/*
* Option Parsing (code inspired by NFS code)
*/
enum {
/* Options that take integer arguments */
Opt_port, Opt_rq_depth, Opt_sq_depth, Opt_timeout, Opt_err,
};
static match_table_t tokens = {
{Opt_port, "port=%u"},
{Opt_sq_depth, "sq=%u"},
{Opt_rq_depth, "rq=%u"},
{Opt_timeout, "timeout=%u"},
{Opt_err, NULL},
};
/**
* parse_opts - parse mount options into rdma options structure
* @params: options string passed from mount
* @opts: rdma transport-specific structure to parse options into
*
* Returns 0 upon success, -ERRNO upon failure
*/
static int parse_opts(char *params, struct p9_rdma_opts *opts)
{
char *p;
substring_t args[MAX_OPT_ARGS];
int option;
char *options, *tmp_options;
opts->port = P9_PORT;
opts->sq_depth = P9_RDMA_SQ_DEPTH;
opts->rq_depth = P9_RDMA_RQ_DEPTH;
opts->timeout = P9_RDMA_TIMEOUT;
if (!params)
return 0;
tmp_options = kstrdup(params, GFP_KERNEL);
if (!tmp_options) {
p9_debug(P9_DEBUG_ERROR,
"failed to allocate copy of option string\n");
return -ENOMEM;
}
options = tmp_options;
while ((p = strsep(&options, ",")) != NULL) {
int token;
int r;
if (!*p)
continue;
token = match_token(p, tokens, args);
if (token == Opt_err)
continue;
r = match_int(&args[0], &option);
if (r < 0) {
p9_debug(P9_DEBUG_ERROR,
"integer field, but no integer?\n");
continue;
}
switch (token) {
case Opt_port:
opts->port = option;
break;
case Opt_sq_depth:
opts->sq_depth = option;
break;
case Opt_rq_depth:
opts->rq_depth = option;
break;
case Opt_timeout:
opts->timeout = option;
break;
default:
continue;
}
}
/* RQ must be at least as large as the SQ */
opts->rq_depth = max(opts->rq_depth, opts->sq_depth);
kfree(tmp_options);
return 0;
}
static int
p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
{
struct p9_client *c = id->context;
struct p9_trans_rdma *rdma = c->trans;
switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED:
BUG_ON(rdma->state != P9_RDMA_INIT);
rdma->state = P9_RDMA_ADDR_RESOLVED;
break;
case RDMA_CM_EVENT_ROUTE_RESOLVED:
BUG_ON(rdma->state != P9_RDMA_ADDR_RESOLVED);
rdma->state = P9_RDMA_ROUTE_RESOLVED;
break;
case RDMA_CM_EVENT_ESTABLISHED:
BUG_ON(rdma->state != P9_RDMA_ROUTE_RESOLVED);
rdma->state = P9_RDMA_CONNECTED;
break;
case RDMA_CM_EVENT_DISCONNECTED:
if (rdma)
rdma->state = P9_RDMA_CLOSED;
if (c)
c->status = Disconnected;
break;
case RDMA_CM_EVENT_TIMEWAIT_EXIT:
break;
case RDMA_CM_EVENT_ADDR_CHANGE:
case RDMA_CM_EVENT_ROUTE_ERROR:
case RDMA_CM_EVENT_DEVICE_REMOVAL:
case RDMA_CM_EVENT_MULTICAST_JOIN:
case RDMA_CM_EVENT_MULTICAST_ERROR:
case RDMA_CM_EVENT_REJECTED:
case RDMA_CM_EVENT_CONNECT_REQUEST:
case RDMA_CM_EVENT_CONNECT_RESPONSE:
case RDMA_CM_EVENT_CONNECT_ERROR:
case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_UNREACHABLE:
c->status = Disconnected;
rdma_disconnect(rdma->cm_id);
break;
default:
BUG();
}
complete(&rdma->cm_done);
return 0;
}
static void
handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len)
{
struct p9_req_t *req;
int err = 0;
int16_t tag;
req = NULL;
ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize,
DMA_FROM_DEVICE);
if (status != IB_WC_SUCCESS)
goto err_out;
err = p9_parse_header(c->rc, NULL, NULL, &tag, 1);
if (err)
goto err_out;
req = p9_tag_lookup(client, tag);
if (!req)
goto err_out;
/* Check that we have not yet received a reply for this request.
*/
if (unlikely(req->rc)) {
pr_err("Duplicate reply for request %d", tag);
goto err_out;
}
req->rc = c->rc;
p9_client_cb(client, req, REQ_STATUS_RCVD);
return;
err_out:
p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", req, err, status);
rdma->state = P9_RDMA_FLUSHING;
client->status = Disconnected;
}
static void
handle_send(struct p9_client *client, struct p9_trans_rdma *rdma,
struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len)
{
ib_dma_unmap_single(rdma->cm_id->device,
c->busa, c->req->tc->size,
DMA_TO_DEVICE);
}
static void qp_event_handler(struct ib_event *event, void *context)
{
p9_debug(P9_DEBUG_ERROR, "QP event %d context %p\n",
event->event, context);
}
static void cq_comp_handler(struct ib_cq *cq, void *cq_context)
{
struct p9_client *client = cq_context;
struct p9_trans_rdma *rdma = client->trans;
int ret;
struct ib_wc wc;
ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);
while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
struct p9_rdma_context *c = (void *) (unsigned long) wc.wr_id;
switch (c->wc_op) {
case IB_WC_RECV:
handle_recv(client, rdma, c, wc.status, wc.byte_len);
up(&rdma->rq_sem);
break;
case IB_WC_SEND:
handle_send(client, rdma, c, wc.status, wc.byte_len);
up(&rdma->sq_sem);
break;
default:
pr_err("unexpected completion type, c->wc_op=%d, wc.opcode=%d, status=%d\n",
c->wc_op, wc.opcode, wc.status);
break;
}
kfree(c);
}
}
static void cq_event_handler(struct ib_event *e, void *v)
{
p9_debug(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v);
}
static void rdma_destroy_trans(struct p9_trans_rdma *rdma)
{
if (!rdma)
return;
if (rdma->dma_mr && !IS_ERR(rdma->dma_mr))
ib_dereg_mr(rdma->dma_mr);
if (rdma->qp && !IS_ERR(rdma->qp))
ib_destroy_qp(rdma->qp);
if (rdma->pd && !IS_ERR(rdma->pd))
ib_dealloc_pd(rdma->pd);
if (rdma->cq && !IS_ERR(rdma->cq))
ib_destroy_cq(rdma->cq);
if (rdma->cm_id && !IS_ERR(rdma->cm_id))
rdma_destroy_id(rdma->cm_id);
kfree(rdma);
}
static int
post_recv(struct p9_client *client, struct p9_rdma_context *c)
{
struct p9_trans_rdma *rdma = client->trans;
struct ib_recv_wr wr, *bad_wr;
struct ib_sge sge;
c->busa = ib_dma_map_single(rdma->cm_id->device,
c->rc->sdata, client->msize,
DMA_FROM_DEVICE);
if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
goto error;
sge.addr = c->busa;
sge.length = client->msize;
sge.lkey = rdma->lkey;
wr.next = NULL;
c->wc_op = IB_WC_RECV;
wr.wr_id = (unsigned long) c;
wr.sg_list = &sge;
wr.num_sge = 1;
return ib_post_recv(rdma->qp, &wr, &bad_wr);
error:
p9_debug(P9_DEBUG_ERROR, "EIO\n");
return -EIO;
}
static int rdma_request(struct p9_client *client, struct p9_req_t *req)
{
struct p9_trans_rdma *rdma = client->trans;
struct ib_send_wr wr, *bad_wr;
struct ib_sge sge;
int err = 0;
unsigned long flags;
struct p9_rdma_context *c = NULL;
struct p9_rdma_context *rpl_context = NULL;
/* When an error occurs between posting the recv and the send,
* there will be a receive context posted without a pending request.
* Since there is no way to "un-post" it, we remember it and skip
* post_recv() for the next request.
* So here,
* see if we are this `next request' and need to absorb an excess rc.
* If yes, then drop and free our own, and do not recv_post().
**/
if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
/* Got one ! */
kfree(req->rc);
req->rc = NULL;
goto dont_need_post_recv;
} else {
/* We raced and lost. */
atomic_inc(&rdma->excess_rc);
}
}
/* Allocate an fcall for the reply */
rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
if (!rpl_context) {
err = -ENOMEM;
goto recv_error;
}
rpl_context->rc = req->rc;
/*
* Post a receive buffer for this request. We need to ensure
* there is a reply buffer available for every outstanding
* request. A flushed request can result in no reply for an
* outstanding request, so we must keep a count to avoid
* overflowing the RQ.
*/
if (down_interruptible(&rdma->rq_sem)) {
err = -EINTR;
goto recv_error;
}
err = post_recv(client, rpl_context);
if (err) {
p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n");
goto recv_error;
}
/* remove posted receive buffer from request structure */
req->rc = NULL;
dont_need_post_recv:
/* Post the request */
c = kmalloc(sizeof *c, GFP_NOFS);
if (!c) {
err = -ENOMEM;
goto send_error;
}
c->req = req;
c->busa = ib_dma_map_single(rdma->cm_id->device,
c->req->tc->sdata, c->req->tc->size,
DMA_TO_DEVICE);
if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
err = -EIO;
goto send_error;
}
sge.addr = c->busa;
sge.length = c->req->tc->size;
sge.lkey = rdma->lkey;
wr.next = NULL;
c->wc_op = IB_WC_SEND;
wr.wr_id = (unsigned long) c;
wr.opcode = IB_WR_SEND;
wr.send_flags = IB_SEND_SIGNALED;
wr.sg_list = &sge;
wr.num_sge = 1;
if (down_interruptible(&rdma->sq_sem)) {
err = -EINTR;
goto send_error;
}
/* Mark request as `sent' *before* we actually send it,
* because doing if after could erase the REQ_STATUS_RCVD
* status in case of a very fast reply.
*/
req->status = REQ_STATUS_SENT;
err = ib_post_send(rdma->qp, &wr, &bad_wr);
if (err)
goto send_error;
/* Success */
return 0;
/* Handle errors that happened during or while preparing the send: */
send_error:
req->status = REQ_STATUS_ERROR;
kfree(c);
p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err);
/* Ach.
* We did recv_post(), but not send. We have one recv_post in excess.
*/
atomic_inc(&rdma->excess_rc);
return err;
/* Handle errors that happened during or while preparing post_recv(): */
recv_error:
kfree(rpl_context);
spin_lock_irqsave(&rdma->req_lock, flags);
if (rdma->state < P9_RDMA_CLOSING) {
rdma->state = P9_RDMA_CLOSING;
spin_unlock_irqrestore(&rdma->req_lock, flags);
rdma_disconnect(rdma->cm_id);
} else
spin_unlock_irqrestore(&rdma->req_lock, flags);
return err;
}
static void rdma_close(struct p9_client *client)
{
struct p9_trans_rdma *rdma;
if (!client)
return;
rdma = client->trans;
if (!rdma)
return;
client->status = Disconnected;
rdma_disconnect(rdma->cm_id);
rdma_destroy_trans(rdma);
}
/**
* alloc_rdma - Allocate and initialize the rdma transport structure
* @opts: Mount options structure
*/
static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)
{
struct p9_trans_rdma *rdma;
rdma = kzalloc(sizeof(struct p9_trans_rdma), GFP_KERNEL);
if (!rdma)
return NULL;
rdma->sq_depth = opts->sq_depth;
rdma->rq_depth = opts->rq_depth;
rdma->timeout = opts->timeout;
spin_lock_init(&rdma->req_lock);
init_completion(&rdma->cm_done);
sema_init(&rdma->sq_sem, rdma->sq_depth);
sema_init(&rdma->rq_sem, rdma->rq_depth);
atomic_set(&rdma->excess_rc, 0);
return rdma;
}
static int rdma_cancel(struct p9_client *client, struct p9_req_t *req)
{
/* Nothing to do here.
* We will take care of it (if we have to) in rdma_cancelled()
*/
return 1;
}
/* A request has been fully flushed without a reply.
* That means we have posted one buffer in excess.
*/
static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req)
{
struct p9_trans_rdma *rdma = client->trans;
atomic_inc(&rdma->excess_rc);
return 0;
}
/**
* trans_create_rdma - Transport method for creating atransport instance
* @client: client instance
* @addr: IP address string
* @args: Mount options string
*/
static int
rdma_create_trans(struct p9_client *client, const char *addr, char *args)
{
int err;
struct p9_rdma_opts opts;
struct p9_trans_rdma *rdma;
struct rdma_conn_param conn_param;
struct ib_qp_init_attr qp_attr;
struct ib_device_attr devattr;
/* Parse the transport specific mount options */
err = parse_opts(args, &opts);
if (err < 0)
return err;
/* Create and initialize the RDMA transport structure */
rdma = alloc_rdma(&opts);
if (!rdma)
return -ENOMEM;
/* Create the RDMA CM ID */
rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP,
IB_QPT_RC);
if (IS_ERR(rdma->cm_id))
goto error;
/* Associate the client with the transport */
client->trans = rdma;
/* Resolve the server's address */
rdma->addr.sin_family = AF_INET;
rdma->addr.sin_addr.s_addr = in_aton(addr);
rdma->addr.sin_port = htons(opts.port);
err = rdma_resolve_addr(rdma->cm_id, NULL,
(struct sockaddr *)&rdma->addr,
rdma->timeout);
if (err)
goto error;
err = wait_for_completion_interruptible(&rdma->cm_done);
if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED))
goto error;
/* Resolve the route to the server */
err = rdma_resolve_route(rdma->cm_id, rdma->timeout);
if (err)
goto error;
err = wait_for_completion_interruptible(&rdma->cm_done);
if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED))
goto error;
/* Query the device attributes */
err = ib_query_device(rdma->cm_id->device, &devattr);
if (err)
goto error;
/* Create the Completion Queue */
rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler,
cq_event_handler, client,
opts.sq_depth + opts.rq_depth + 1, 0);
if (IS_ERR(rdma->cq))
goto error;
ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);
/* Create the Protection Domain */
rdma->pd = ib_alloc_pd(rdma->cm_id->device);
if (IS_ERR(rdma->pd))
goto error;
/* Cache the DMA lkey in the transport */
rdma->dma_mr = NULL;
if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
rdma->lkey = rdma->cm_id->device->local_dma_lkey;
else {
rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(rdma->dma_mr))
goto error;
rdma->lkey = rdma->dma_mr->lkey;
}
/* Create the Queue Pair */
memset(&qp_attr, 0, sizeof qp_attr);
qp_attr.event_handler = qp_event_handler;
qp_attr.qp_context = client;
qp_attr.cap.max_send_wr = opts.sq_depth;
qp_attr.cap.max_recv_wr = opts.rq_depth;
qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE;
qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE;
qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
qp_attr.qp_type = IB_QPT_RC;
qp_attr.send_cq = rdma->cq;
qp_attr.recv_cq = rdma->cq;
err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr);
if (err)
goto error;
rdma->qp = rdma->cm_id->qp;
/* Request a connection */
memset(&conn_param, 0, sizeof(conn_param));
conn_param.private_data = NULL;
conn_param.private_data_len = 0;
conn_param.responder_resources = P9_RDMA_IRD;
conn_param.initiator_depth = P9_RDMA_ORD;
err = rdma_connect(rdma->cm_id, &conn_param);
if (err)
goto error;
err = wait_for_completion_interruptible(&rdma->cm_done);
if (err || (rdma->state != P9_RDMA_CONNECTED))
goto error;
client->status = Connected;
return 0;
error:
rdma_destroy_trans(rdma);
return -ENOTCONN;
}
static struct p9_trans_module p9_rdma_trans = {
.name = "rdma",
.maxsize = P9_RDMA_MAXSIZE,
.def = 0,
.owner = THIS_MODULE,
.create = rdma_create_trans,
.close = rdma_close,
.request = rdma_request,
.cancel = rdma_cancel,
.cancelled = rdma_cancelled,
};
/**
* p9_trans_rdma_init - Register the 9P RDMA transport driver
*/
static int __init p9_trans_rdma_init(void)
{
v9fs_register_trans(&p9_rdma_trans);
return 0;
}
static void __exit p9_trans_rdma_exit(void)
{
v9fs_unregister_trans(&p9_rdma_trans);
}
module_init(p9_trans_rdma_init);
module_exit(p9_trans_rdma_exit);
MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
MODULE_DESCRIPTION("RDMA Transport for 9P");
MODULE_LICENSE("Dual BSD/GPL");

730
net/9p/trans_virtio.c Normal file
View file

@ -0,0 +1,730 @@
/*
* The Virtio 9p transport driver
*
* This is a block based transport driver based on the lguest block driver
* code.
*
* Copyright (C) 2007, 2008 Eric Van Hensbergen, IBM Corporation
*
* Based on virtio console driver
* Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to:
* Free Software Foundation
* 51 Franklin Street, Fifth Floor
* Boston, MA 02111-1301 USA
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/in.h>
#include <linux/module.h>
#include <linux/net.h>
#include <linux/ipv6.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/un.h>
#include <linux/uaccess.h>
#include <linux/inet.h>
#include <linux/idr.h>
#include <linux/file.h>
#include <linux/highmem.h>
#include <linux/slab.h>
#include <net/9p/9p.h>
#include <linux/parser.h>
#include <net/9p/client.h>
#include <net/9p/transport.h>
#include <linux/scatterlist.h>
#include <linux/swap.h>
#include <linux/virtio.h>
#include <linux/virtio_9p.h>
#include "trans_common.h"
#define VIRTQUEUE_NUM 128
/* a single mutex to manage channel initialization and attachment */
static DEFINE_MUTEX(virtio_9p_lock);
static DECLARE_WAIT_QUEUE_HEAD(vp_wq);
static atomic_t vp_pinned = ATOMIC_INIT(0);
/**
* struct virtio_chan - per-instance transport information
* @initialized: whether the channel is initialized
* @inuse: whether the channel is in use
* @lock: protects multiple elements within this structure
* @client: client instance
* @vdev: virtio dev associated with this channel
* @vq: virtio queue associated with this channel
* @sg: scatter gather list which is used to pack a request (protected?)
*
* We keep all per-channel information in a structure.
* This structure is allocated within the devices dev->mem space.
* A pointer to the structure will get put in the transport private.
*
*/
struct virtio_chan {
bool inuse;
spinlock_t lock;
struct p9_client *client;
struct virtio_device *vdev;
struct virtqueue *vq;
int ring_bufs_avail;
wait_queue_head_t *vc_wq;
/* This is global limit. Since we don't have a global structure,
* will be placing it in each channel.
*/
unsigned long p9_max_pages;
/* Scatterlist: can be too big for stack. */
struct scatterlist sg[VIRTQUEUE_NUM];
int tag_len;
/*
* tag name to identify a mount Non-null terminated
*/
char *tag;
struct list_head chan_list;
};
static struct list_head virtio_chan_list;
/* How many bytes left in this page. */
static unsigned int rest_of_page(void *data)
{
return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE);
}
/**
* p9_virtio_close - reclaim resources of a channel
* @client: client instance
*
* This reclaims a channel by freeing its resources and
* reseting its inuse flag.
*
*/
static void p9_virtio_close(struct p9_client *client)
{
struct virtio_chan *chan = client->trans;
mutex_lock(&virtio_9p_lock);
if (chan)
chan->inuse = false;
mutex_unlock(&virtio_9p_lock);
}
/**
* req_done - callback which signals activity from the server
* @vq: virtio queue activity was received on
*
* This notifies us that the server has triggered some activity
* on the virtio channel - most likely a response to request we
* sent. Figure out which requests now have responses and wake up
* those threads.
*
* Bugs: could do with some additional sanity checking, but appears to work.
*
*/
static void req_done(struct virtqueue *vq)
{
struct virtio_chan *chan = vq->vdev->priv;
struct p9_fcall *rc;
unsigned int len;
struct p9_req_t *req;
unsigned long flags;
p9_debug(P9_DEBUG_TRANS, ": request done\n");
while (1) {
spin_lock_irqsave(&chan->lock, flags);
rc = virtqueue_get_buf(chan->vq, &len);
if (rc == NULL) {
spin_unlock_irqrestore(&chan->lock, flags);
break;
}
chan->ring_bufs_avail = 1;
spin_unlock_irqrestore(&chan->lock, flags);
/* Wakeup if anyone waiting for VirtIO ring space. */
wake_up(chan->vc_wq);
p9_debug(P9_DEBUG_TRANS, ": rc %p\n", rc);
p9_debug(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
req = p9_tag_lookup(chan->client, rc->tag);
p9_client_cb(chan->client, req, REQ_STATUS_RCVD);
}
}
/**
* pack_sg_list - pack a scatter gather list from a linear buffer
* @sg: scatter/gather list to pack into
* @start: which segment of the sg_list to start at
* @limit: maximum segment to pack data to
* @data: data to pack into scatter/gather list
* @count: amount of data to pack into the scatter/gather list
*
* sg_lists have multiple segments of various sizes. This will pack
* arbitrary data into an existing scatter gather list, segmenting the
* data as necessary within constraints.
*
*/
static int pack_sg_list(struct scatterlist *sg, int start,
int limit, char *data, int count)
{
int s;
int index = start;
while (count) {
s = rest_of_page(data);
if (s > count)
s = count;
BUG_ON(index > limit);
/* Make sure we don't terminate early. */
sg_unmark_end(&sg[index]);
sg_set_buf(&sg[index++], data, s);
count -= s;
data += s;
}
if (index-start)
sg_mark_end(&sg[index - 1]);
return index-start;
}
/* We don't currently allow canceling of virtio requests */
static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
{
return 1;
}
/**
* pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer,
* this takes a list of pages.
* @sg: scatter/gather list to pack into
* @start: which segment of the sg_list to start at
* @pdata: a list of pages to add into sg.
* @nr_pages: number of pages to pack into the scatter/gather list
* @data: data to pack into scatter/gather list
* @count: amount of data to pack into the scatter/gather list
*/
static int
pack_sg_list_p(struct scatterlist *sg, int start, int limit,
struct page **pdata, int nr_pages, char *data, int count)
{
int i = 0, s;
int data_off;
int index = start;
BUG_ON(nr_pages > (limit - start));
/*
* if the first page doesn't start at
* page boundary find the offset
*/
data_off = offset_in_page(data);
while (nr_pages) {
s = rest_of_page(data);
if (s > count)
s = count;
/* Make sure we don't terminate early. */
sg_unmark_end(&sg[index]);
sg_set_page(&sg[index++], pdata[i++], s, data_off);
data_off = 0;
data += s;
count -= s;
nr_pages--;
}
if (index-start)
sg_mark_end(&sg[index - 1]);
return index - start;
}
/**
* p9_virtio_request - issue a request
* @client: client instance issuing the request
* @req: request to be issued
*
*/
static int
p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
{
int err;
int in, out, out_sgs, in_sgs;
unsigned long flags;
struct virtio_chan *chan = client->trans;
struct scatterlist *sgs[2];
p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n");
req->status = REQ_STATUS_SENT;
req_retry:
spin_lock_irqsave(&chan->lock, flags);
out_sgs = in_sgs = 0;
/* Handle out VirtIO ring buffers */
out = pack_sg_list(chan->sg, 0,
VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
if (out)
sgs[out_sgs++] = chan->sg;
in = pack_sg_list(chan->sg, out,
VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
if (in)
sgs[out_sgs + in_sgs++] = chan->sg + out;
err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc,
GFP_ATOMIC);
if (err < 0) {
if (err == -ENOSPC) {
chan->ring_bufs_avail = 0;
spin_unlock_irqrestore(&chan->lock, flags);
err = wait_event_interruptible(*chan->vc_wq,
chan->ring_bufs_avail);
if (err == -ERESTARTSYS)
return err;
p9_debug(P9_DEBUG_TRANS, "Retry virtio request\n");
goto req_retry;
} else {
spin_unlock_irqrestore(&chan->lock, flags);
p9_debug(P9_DEBUG_TRANS,
"virtio rpc add_sgs returned failure\n");
return -EIO;
}
}
virtqueue_kick(chan->vq);
spin_unlock_irqrestore(&chan->lock, flags);
p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n");
return 0;
}
static int p9_get_mapped_pages(struct virtio_chan *chan,
struct page **pages, char *data,
int nr_pages, int write, int kern_buf)
{
int err;
if (!kern_buf) {
/*
* We allow only p9_max_pages pinned. We wait for the
* Other zc request to finish here
*/
if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
err = wait_event_interruptible(vp_wq,
(atomic_read(&vp_pinned) < chan->p9_max_pages));
if (err == -ERESTARTSYS)
return err;
}
err = p9_payload_gup(data, &nr_pages, pages, write);
if (err < 0)
return err;
atomic_add(nr_pages, &vp_pinned);
} else {
/* kernel buffer, no need to pin pages */
int s, index = 0;
int count = nr_pages;
while (nr_pages) {
s = rest_of_page(data);
if (is_vmalloc_addr(data))
pages[index++] = vmalloc_to_page(data);
else
pages[index++] = kmap_to_page(data);
data += s;
nr_pages--;
}
nr_pages = count;
}
return nr_pages;
}
/**
* p9_virtio_zc_request - issue a zero copy request
* @client: client instance issuing the request
* @req: request to be issued
* @uidata: user bffer that should be ued for zero copy read
* @uodata: user buffer that shoud be user for zero copy write
* @inlen: read buffer size
* @olen: write buffer size
* @hdrlen: reader header size, This is the size of response protocol data
*
*/
static int
p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
char *uidata, char *uodata, int inlen,
int outlen, int in_hdr_len, int kern_buf)
{
int in, out, err, out_sgs, in_sgs;
unsigned long flags;
int in_nr_pages = 0, out_nr_pages = 0;
struct page **in_pages = NULL, **out_pages = NULL;
struct virtio_chan *chan = client->trans;
struct scatterlist *sgs[4];
p9_debug(P9_DEBUG_TRANS, "virtio request\n");
if (uodata) {
out_nr_pages = p9_nr_pages(uodata, outlen);
out_pages = kmalloc(sizeof(struct page *) * out_nr_pages,
GFP_NOFS);
if (!out_pages) {
err = -ENOMEM;
goto err_out;
}
out_nr_pages = p9_get_mapped_pages(chan, out_pages, uodata,
out_nr_pages, 0, kern_buf);
if (out_nr_pages < 0) {
err = out_nr_pages;
kfree(out_pages);
out_pages = NULL;
goto err_out;
}
}
if (uidata) {
in_nr_pages = p9_nr_pages(uidata, inlen);
in_pages = kmalloc(sizeof(struct page *) * in_nr_pages,
GFP_NOFS);
if (!in_pages) {
err = -ENOMEM;
goto err_out;
}
in_nr_pages = p9_get_mapped_pages(chan, in_pages, uidata,
in_nr_pages, 1, kern_buf);
if (in_nr_pages < 0) {
err = in_nr_pages;
kfree(in_pages);
in_pages = NULL;
goto err_out;
}
}
req->status = REQ_STATUS_SENT;
req_retry_pinned:
spin_lock_irqsave(&chan->lock, flags);
out_sgs = in_sgs = 0;
/* out data */
out = pack_sg_list(chan->sg, 0,
VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
if (out)
sgs[out_sgs++] = chan->sg;
if (out_pages) {
sgs[out_sgs++] = chan->sg + out;
out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
out_pages, out_nr_pages, uodata, outlen);
}
/*
* Take care of in data
* For example TREAD have 11.
* 11 is the read/write header = PDU Header(7) + IO Size (4).
* Arrange in such a way that server places header in the
* alloced memory and payload onto the user buffer.
*/
in = pack_sg_list(chan->sg, out,
VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len);
if (in)
sgs[out_sgs + in_sgs++] = chan->sg + out;
if (in_pages) {
sgs[out_sgs + in_sgs++] = chan->sg + out + in;
in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
in_pages, in_nr_pages, uidata, inlen);
}
BUG_ON(out_sgs + in_sgs > ARRAY_SIZE(sgs));
err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc,
GFP_ATOMIC);
if (err < 0) {
if (err == -ENOSPC) {
chan->ring_bufs_avail = 0;
spin_unlock_irqrestore(&chan->lock, flags);
err = wait_event_interruptible(*chan->vc_wq,
chan->ring_bufs_avail);
if (err == -ERESTARTSYS)
goto err_out;
p9_debug(P9_DEBUG_TRANS, "Retry virtio request\n");
goto req_retry_pinned;
} else {
spin_unlock_irqrestore(&chan->lock, flags);
p9_debug(P9_DEBUG_TRANS,
"virtio rpc add_sgs returned failure\n");
err = -EIO;
goto err_out;
}
}
virtqueue_kick(chan->vq);
spin_unlock_irqrestore(&chan->lock, flags);
p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n");
err = wait_event_interruptible(*req->wq,
req->status >= REQ_STATUS_RCVD);
/*
* Non kernel buffers are pinned, unpin them
*/
err_out:
if (!kern_buf) {
if (in_pages) {
p9_release_pages(in_pages, in_nr_pages);
atomic_sub(in_nr_pages, &vp_pinned);
}
if (out_pages) {
p9_release_pages(out_pages, out_nr_pages);
atomic_sub(out_nr_pages, &vp_pinned);
}
/* wakeup anybody waiting for slots to pin pages */
wake_up(&vp_wq);
}
kfree(in_pages);
kfree(out_pages);
return err;
}
static ssize_t p9_mount_tag_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct virtio_chan *chan;
struct virtio_device *vdev;
vdev = dev_to_virtio(dev);
chan = vdev->priv;
return snprintf(buf, chan->tag_len + 1, "%s", chan->tag);
}
static DEVICE_ATTR(mount_tag, 0444, p9_mount_tag_show, NULL);
/**
* p9_virtio_probe - probe for existence of 9P virtio channels
* @vdev: virtio device to probe
*
* This probes for existing virtio channels.
*
*/
static int p9_virtio_probe(struct virtio_device *vdev)
{
__u16 tag_len;
char *tag;
int err;
struct virtio_chan *chan;
chan = kmalloc(sizeof(struct virtio_chan), GFP_KERNEL);
if (!chan) {
pr_err("Failed to allocate virtio 9P channel\n");
err = -ENOMEM;
goto fail;
}
chan->vdev = vdev;
/* We expect one virtqueue, for requests. */
chan->vq = virtio_find_single_vq(vdev, req_done, "requests");
if (IS_ERR(chan->vq)) {
err = PTR_ERR(chan->vq);
goto out_free_vq;
}
chan->vq->vdev->priv = chan;
spin_lock_init(&chan->lock);
sg_init_table(chan->sg, VIRTQUEUE_NUM);
chan->inuse = false;
if (virtio_has_feature(vdev, VIRTIO_9P_MOUNT_TAG)) {
virtio_cread(vdev, struct virtio_9p_config, tag_len, &tag_len);
} else {
err = -EINVAL;
goto out_free_vq;
}
tag = kmalloc(tag_len, GFP_KERNEL);
if (!tag) {
err = -ENOMEM;
goto out_free_vq;
}
virtio_cread_bytes(vdev, offsetof(struct virtio_9p_config, tag),
tag, tag_len);
chan->tag = tag;
chan->tag_len = tag_len;
err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
if (err) {
goto out_free_tag;
}
chan->vc_wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL);
if (!chan->vc_wq) {
err = -ENOMEM;
goto out_free_tag;
}
init_waitqueue_head(chan->vc_wq);
chan->ring_bufs_avail = 1;
/* Ceiling limit to avoid denial of service attacks */
chan->p9_max_pages = nr_free_buffer_pages()/4;
virtio_device_ready(vdev);
mutex_lock(&virtio_9p_lock);
list_add_tail(&chan->chan_list, &virtio_chan_list);
mutex_unlock(&virtio_9p_lock);
/* Let udev rules use the new mount_tag attribute. */
kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
return 0;
out_free_tag:
kfree(tag);
out_free_vq:
vdev->config->del_vqs(vdev);
kfree(chan);
fail:
return err;
}
/**
* p9_virtio_create - allocate a new virtio channel
* @client: client instance invoking this transport
* @devname: string identifying the channel to connect to (unused)
* @args: args passed from sys_mount() for per-transport options (unused)
*
* This sets up a transport channel for 9p communication. Right now
* we only match the first available channel, but eventually we couldlook up
* alternate channels by matching devname versus a virtio_config entry.
* We use a simple reference count mechanism to ensure that only a single
* mount has a channel open at a time.
*
*/
static int
p9_virtio_create(struct p9_client *client, const char *devname, char *args)
{
struct virtio_chan *chan;
int ret = -ENOENT;
int found = 0;
mutex_lock(&virtio_9p_lock);
list_for_each_entry(chan, &virtio_chan_list, chan_list) {
if (!strncmp(devname, chan->tag, chan->tag_len) &&
strlen(devname) == chan->tag_len) {
if (!chan->inuse) {
chan->inuse = true;
found = 1;
break;
}
ret = -EBUSY;
}
}
mutex_unlock(&virtio_9p_lock);
if (!found) {
pr_err("no channels available\n");
return ret;
}
client->trans = (void *)chan;
client->status = Connected;
chan->client = client;
return 0;
}
/**
* p9_virtio_remove - clean up resources associated with a virtio device
* @vdev: virtio device to remove
*
*/
static void p9_virtio_remove(struct virtio_device *vdev)
{
struct virtio_chan *chan = vdev->priv;
if (chan->inuse)
p9_virtio_close(chan->client);
vdev->config->del_vqs(vdev);
mutex_lock(&virtio_9p_lock);
list_del(&chan->chan_list);
mutex_unlock(&virtio_9p_lock);
sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
kfree(chan->tag);
kfree(chan->vc_wq);
kfree(chan);
}
static struct virtio_device_id id_table[] = {
{ VIRTIO_ID_9P, VIRTIO_DEV_ANY_ID },
{ 0 },
};
static unsigned int features[] = {
VIRTIO_9P_MOUNT_TAG,
};
/* The standard "struct lguest_driver": */
static struct virtio_driver p9_virtio_drv = {
.feature_table = features,
.feature_table_size = ARRAY_SIZE(features),
.driver.name = KBUILD_MODNAME,
.driver.owner = THIS_MODULE,
.id_table = id_table,
.probe = p9_virtio_probe,
.remove = p9_virtio_remove,
};
static struct p9_trans_module p9_virtio_trans = {
.name = "virtio",
.create = p9_virtio_create,
.close = p9_virtio_close,
.request = p9_virtio_request,
.zc_request = p9_virtio_zc_request,
.cancel = p9_virtio_cancel,
/*
* We leave one entry for input and one entry for response
* headers. We also skip one more entry to accomodate, address
* that are not at page boundary, that can result in an extra
* page in zero copy.
*/
.maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3),
.def = 1,
.owner = THIS_MODULE,
};
/* The standard init function */
static int __init p9_virtio_init(void)
{
INIT_LIST_HEAD(&virtio_chan_list);
v9fs_register_trans(&p9_virtio_trans);
return register_virtio_driver(&p9_virtio_drv);
}
static void __exit p9_virtio_cleanup(void)
{
unregister_virtio_driver(&p9_virtio_drv);
v9fs_unregister_trans(&p9_virtio_trans);
}
module_init(p9_virtio_init);
module_exit(p9_virtio_cleanup);
MODULE_DEVICE_TABLE(virtio, id_table);
MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
MODULE_DESCRIPTION("Virtio 9p Transport");
MODULE_LICENSE("GPL");

141
net/9p/util.c Normal file
View file

@ -0,0 +1,141 @@
/*
* net/9p/util.c
*
* This file contains some helper functions
*
* Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net>
* Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
* Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to:
* Free Software Foundation
* 51 Franklin Street, Fifth Floor
* Boston, MA 02111-1301 USA
*
*/
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/parser.h>
#include <linux/idr.h>
#include <linux/slab.h>
#include <net/9p/9p.h>
/**
* struct p9_idpool - per-connection accounting for tag idpool
* @lock: protects the pool
* @pool: idr to allocate tag id from
*
*/
struct p9_idpool {
spinlock_t lock;
struct idr pool;
};
/**
* p9_idpool_create - create a new per-connection id pool
*
*/
struct p9_idpool *p9_idpool_create(void)
{
struct p9_idpool *p;
p = kmalloc(sizeof(struct p9_idpool), GFP_KERNEL);
if (!p)
return ERR_PTR(-ENOMEM);
spin_lock_init(&p->lock);
idr_init(&p->pool);
return p;
}
EXPORT_SYMBOL(p9_idpool_create);
/**
* p9_idpool_destroy - create a new per-connection id pool
* @p: idpool to destroy
*/
void p9_idpool_destroy(struct p9_idpool *p)
{
idr_destroy(&p->pool);
kfree(p);
}
EXPORT_SYMBOL(p9_idpool_destroy);
/**
* p9_idpool_get - allocate numeric id from pool
* @p: pool to allocate from
*
* Bugs: This seems to be an awful generic function, should it be in idr.c with
* the lock included in struct idr?
*/
int p9_idpool_get(struct p9_idpool *p)
{
int i;
unsigned long flags;
idr_preload(GFP_NOFS);
spin_lock_irqsave(&p->lock, flags);
/* no need to store exactly p, we just need something non-null */
i = idr_alloc(&p->pool, p, 0, 0, GFP_NOWAIT);
spin_unlock_irqrestore(&p->lock, flags);
idr_preload_end();
if (i < 0)
return -1;
p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", i, p);
return i;
}
EXPORT_SYMBOL(p9_idpool_get);
/**
* p9_idpool_put - release numeric id from pool
* @id: numeric id which is being released
* @p: pool to release id into
*
* Bugs: This seems to be an awful generic function, should it be in idr.c with
* the lock included in struct idr?
*/
void p9_idpool_put(int id, struct p9_idpool *p)
{
unsigned long flags;
p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", id, p);
spin_lock_irqsave(&p->lock, flags);
idr_remove(&p->pool, id);
spin_unlock_irqrestore(&p->lock, flags);
}
EXPORT_SYMBOL(p9_idpool_put);
/**
* p9_idpool_check - check if the specified id is available
* @id: id to check
* @p: pool to check
*/
int p9_idpool_check(int id, struct p9_idpool *p)
{
return idr_find(&p->pool, id) != NULL;
}
EXPORT_SYMBOL(p9_idpool_check);