Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

View file

@ -0,0 +1,33 @@
infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_cm.o
user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o
obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \
ib_cm.o iw_cm.o ib_addr.o \
$(infiniband-y)
obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
$(user_access-y)
ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_mad-y := mad.o smi.o agent.o mad_rmpp.o
ib_sa-y := sa_query.o multicast.o
ib_cm-y := cm.o
iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o
rdma_cm-y := cma.o
rdma_ucm-y := ucma.o
ib_addr-y := addr.o
ib_umad-y := user_mad.o
ib_ucm-y := ucm.o
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o

View file

@ -0,0 +1,565 @@
/*
* Copyright (c) 2005 Voltaire Inc. All rights reserved.
* Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
* Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
* Copyright (c) 2005 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/mutex.h>
#include <linux/inetdevice.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <linux/module.h>
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
#include <net/netevent.h>
#include <net/addrconf.h>
#include <net/ip6_route.h>
#include <rdma/ib_addr.h>
#include <rdma/ib.h>
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("IB Address Translation");
MODULE_LICENSE("Dual BSD/GPL");
struct addr_req {
struct list_head list;
struct sockaddr_storage src_addr;
struct sockaddr_storage dst_addr;
struct rdma_dev_addr *addr;
struct rdma_addr_client *client;
void *context;
void (*callback)(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context);
unsigned long timeout;
int status;
};
static void process_req(struct work_struct *work);
static DEFINE_MUTEX(lock);
static LIST_HEAD(req_list);
static DECLARE_DELAYED_WORK(work, process_req);
static struct workqueue_struct *addr_wq;
int rdma_addr_size(struct sockaddr *addr)
{
switch (addr->sa_family) {
case AF_INET:
return sizeof(struct sockaddr_in);
case AF_INET6:
return sizeof(struct sockaddr_in6);
case AF_IB:
return sizeof(struct sockaddr_ib);
default:
return 0;
}
}
EXPORT_SYMBOL(rdma_addr_size);
static struct rdma_addr_client self;
void rdma_addr_register_client(struct rdma_addr_client *client)
{
atomic_set(&client->refcount, 1);
init_completion(&client->comp);
}
EXPORT_SYMBOL(rdma_addr_register_client);
static inline void put_client(struct rdma_addr_client *client)
{
if (atomic_dec_and_test(&client->refcount))
complete(&client->comp);
}
void rdma_addr_unregister_client(struct rdma_addr_client *client)
{
put_client(client);
wait_for_completion(&client->comp);
}
EXPORT_SYMBOL(rdma_addr_unregister_client);
int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
const unsigned char *dst_dev_addr)
{
dev_addr->dev_type = dev->type;
memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
if (dst_dev_addr)
memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
dev_addr->bound_dev_if = dev->ifindex;
return 0;
}
EXPORT_SYMBOL(rdma_copy_addr);
int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
u16 *vlan_id)
{
struct net_device *dev;
int ret = -EADDRNOTAVAIL;
if (dev_addr->bound_dev_if) {
dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
if (!dev)
return -ENODEV;
ret = rdma_copy_addr(dev_addr, dev, NULL);
dev_put(dev);
return ret;
}
switch (addr->sa_family) {
case AF_INET:
dev = ip_dev_find(&init_net,
((struct sockaddr_in *) addr)->sin_addr.s_addr);
if (!dev)
return ret;
ret = rdma_copy_addr(dev_addr, dev, NULL);
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
rcu_read_lock();
for_each_netdev_rcu(&init_net, dev) {
if (ipv6_chk_addr(&init_net,
&((struct sockaddr_in6 *) addr)->sin6_addr,
dev, 1)) {
ret = rdma_copy_addr(dev_addr, dev, NULL);
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(dev);
break;
}
}
rcu_read_unlock();
break;
#endif
}
return ret;
}
EXPORT_SYMBOL(rdma_translate_ip);
static void set_timeout(unsigned long time)
{
unsigned long delay;
delay = time - jiffies;
if ((long)delay <= 0)
delay = 1;
mod_delayed_work(addr_wq, &work, delay);
}
static void queue_req(struct addr_req *req)
{
struct addr_req *temp_req;
mutex_lock(&lock);
list_for_each_entry_reverse(temp_req, &req_list, list) {
if (time_after_eq(req->timeout, temp_req->timeout))
break;
}
list_add(&req->list, &temp_req->list);
if (req_list.next == &req->list)
set_timeout(req->timeout);
mutex_unlock(&lock);
}
static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, void *daddr)
{
struct neighbour *n;
int ret;
n = dst_neigh_lookup(dst, daddr);
rcu_read_lock();
if (!n || !(n->nud_state & NUD_VALID)) {
if (n)
neigh_event_send(n, NULL);
ret = -ENODATA;
} else {
ret = rdma_copy_addr(dev_addr, dst->dev, n->ha);
}
rcu_read_unlock();
if (n)
neigh_release(n);
return ret;
}
static int addr4_resolve(struct sockaddr_in *src_in,
struct sockaddr_in *dst_in,
struct rdma_dev_addr *addr)
{
__be32 src_ip = src_in->sin_addr.s_addr;
__be32 dst_ip = dst_in->sin_addr.s_addr;
struct rtable *rt;
struct flowi4 fl4;
int ret;
memset(&fl4, 0, sizeof(fl4));
fl4.daddr = dst_ip;
fl4.saddr = src_ip;
fl4.flowi4_oif = addr->bound_dev_if;
rt = ip_route_output_key(&init_net, &fl4);
if (IS_ERR(rt)) {
ret = PTR_ERR(rt);
goto out;
}
src_in->sin_family = AF_INET;
src_in->sin_addr.s_addr = fl4.saddr;
if (rt->dst.dev->flags & IFF_LOOPBACK) {
ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
goto put;
}
/* If the device does ARP internally, return 'done' */
if (rt->dst.dev->flags & IFF_NOARP) {
ret = rdma_copy_addr(addr, rt->dst.dev, NULL);
goto put;
}
ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr);
put:
ip_rt_put(rt);
out:
return ret;
}
#if IS_ENABLED(CONFIG_IPV6)
static int addr6_resolve(struct sockaddr_in6 *src_in,
struct sockaddr_in6 *dst_in,
struct rdma_dev_addr *addr)
{
struct flowi6 fl6;
struct dst_entry *dst;
int ret;
memset(&fl6, 0, sizeof fl6);
fl6.daddr = dst_in->sin6_addr;
fl6.saddr = src_in->sin6_addr;
fl6.flowi6_oif = addr->bound_dev_if;
dst = ip6_route_output(&init_net, NULL, &fl6);
if ((ret = dst->error))
goto put;
if (ipv6_addr_any(&fl6.saddr)) {
ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
&fl6.daddr, 0, &fl6.saddr);
if (ret)
goto put;
src_in->sin6_family = AF_INET6;
src_in->sin6_addr = fl6.saddr;
}
if (dst->dev->flags & IFF_LOOPBACK) {
ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
goto put;
}
/* If the device does ARP internally, return 'done' */
if (dst->dev->flags & IFF_NOARP) {
ret = rdma_copy_addr(addr, dst->dev, NULL);
goto put;
}
ret = dst_fetch_ha(dst, addr, &fl6.daddr);
put:
dst_release(dst);
return ret;
}
#else
static int addr6_resolve(struct sockaddr_in6 *src_in,
struct sockaddr_in6 *dst_in,
struct rdma_dev_addr *addr)
{
return -EADDRNOTAVAIL;
}
#endif
static int addr_resolve(struct sockaddr *src_in,
struct sockaddr *dst_in,
struct rdma_dev_addr *addr)
{
if (src_in->sa_family == AF_INET) {
return addr4_resolve((struct sockaddr_in *) src_in,
(struct sockaddr_in *) dst_in, addr);
} else
return addr6_resolve((struct sockaddr_in6 *) src_in,
(struct sockaddr_in6 *) dst_in, addr);
}
static void process_req(struct work_struct *work)
{
struct addr_req *req, *temp_req;
struct sockaddr *src_in, *dst_in;
struct list_head done_list;
INIT_LIST_HEAD(&done_list);
mutex_lock(&lock);
list_for_each_entry_safe(req, temp_req, &req_list, list) {
if (req->status == -ENODATA) {
src_in = (struct sockaddr *) &req->src_addr;
dst_in = (struct sockaddr *) &req->dst_addr;
req->status = addr_resolve(src_in, dst_in, req->addr);
if (req->status && time_after_eq(jiffies, req->timeout))
req->status = -ETIMEDOUT;
else if (req->status == -ENODATA)
continue;
}
list_move_tail(&req->list, &done_list);
}
if (!list_empty(&req_list)) {
req = list_entry(req_list.next, struct addr_req, list);
set_timeout(req->timeout);
}
mutex_unlock(&lock);
list_for_each_entry_safe(req, temp_req, &done_list, list) {
list_del(&req->list);
req->callback(req->status, (struct sockaddr *) &req->src_addr,
req->addr, req->context);
put_client(req->client);
kfree(req);
}
}
int rdma_resolve_ip(struct rdma_addr_client *client,
struct sockaddr *src_addr, struct sockaddr *dst_addr,
struct rdma_dev_addr *addr, int timeout_ms,
void (*callback)(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context),
void *context)
{
struct sockaddr *src_in, *dst_in;
struct addr_req *req;
int ret = 0;
req = kzalloc(sizeof *req, GFP_KERNEL);
if (!req)
return -ENOMEM;
src_in = (struct sockaddr *) &req->src_addr;
dst_in = (struct sockaddr *) &req->dst_addr;
if (src_addr) {
if (src_addr->sa_family != dst_addr->sa_family) {
ret = -EINVAL;
goto err;
}
memcpy(src_in, src_addr, rdma_addr_size(src_addr));
} else {
src_in->sa_family = dst_addr->sa_family;
}
memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
req->addr = addr;
req->callback = callback;
req->context = context;
req->client = client;
atomic_inc(&client->refcount);
req->status = addr_resolve(src_in, dst_in, addr);
switch (req->status) {
case 0:
req->timeout = jiffies;
queue_req(req);
break;
case -ENODATA:
req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
queue_req(req);
break;
default:
ret = req->status;
atomic_dec(&client->refcount);
goto err;
}
return ret;
err:
kfree(req);
return ret;
}
EXPORT_SYMBOL(rdma_resolve_ip);
void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
struct addr_req *req, *temp_req;
mutex_lock(&lock);
list_for_each_entry_safe(req, temp_req, &req_list, list) {
if (req->addr == addr) {
req->status = -ECANCELED;
req->timeout = jiffies;
list_move(&req->list, &req_list);
set_timeout(req->timeout);
break;
}
}
mutex_unlock(&lock);
}
EXPORT_SYMBOL(rdma_addr_cancel);
struct resolve_cb_context {
struct rdma_dev_addr *addr;
struct completion comp;
};
static void resolve_cb(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context)
{
memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
rdma_dev_addr));
complete(&((struct resolve_cb_context *)context)->comp);
}
int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac,
u16 *vlan_id)
{
int ret = 0;
struct rdma_dev_addr dev_addr;
struct resolve_cb_context ctx;
struct net_device *dev;
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;
ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid);
if (ret)
return ret;
ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid);
if (ret)
return ret;
memset(&dev_addr, 0, sizeof(dev_addr));
ctx.addr = &dev_addr;
init_completion(&ctx.comp);
ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
&dev_addr, 1000, resolve_cb, &ctx);
if (ret)
return ret;
wait_for_completion(&ctx.comp);
memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
if (!dev)
return -ENODEV;
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
return ret;
}
EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
{
int ret = 0;
struct rdma_dev_addr dev_addr;
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} gid_addr;
ret = rdma_gid2ip(&gid_addr._sockaddr, sgid);
if (ret)
return ret;
memset(&dev_addr, 0, sizeof(dev_addr));
ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
if (ret)
return ret;
memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
return ret;
}
EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
static int netevent_callback(struct notifier_block *self, unsigned long event,
void *ctx)
{
if (event == NETEVENT_NEIGH_UPDATE) {
struct neighbour *neigh = ctx;
if (neigh->nud_state & NUD_VALID) {
set_timeout(jiffies);
}
}
return 0;
}
static struct notifier_block nb = {
.notifier_call = netevent_callback
};
static int __init addr_init(void)
{
addr_wq = create_singlethread_workqueue("ib_addr");
if (!addr_wq)
return -ENOMEM;
register_netevent_notifier(&nb);
rdma_addr_register_client(&self);
return 0;
}
static void __exit addr_cleanup(void)
{
rdma_addr_unregister_client(&self);
unregister_netevent_notifier(&nb);
destroy_workqueue(addr_wq);
}
module_init(addr_init);
module_exit(addr_cleanup);

View file

@ -0,0 +1,217 @@
/*
* Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved.
* Copyright (c) 2004, 2005 Intel Corporation. All rights reserved.
* Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved.
* Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <linux/slab.h>
#include <linux/string.h>
#include "agent.h"
#include "smi.h"
#include "mad_priv.h"
#define SPFX "ib_agent: "
struct ib_agent_port_private {
struct list_head port_list;
struct ib_mad_agent *agent[2];
};
static DEFINE_SPINLOCK(ib_agent_port_list_lock);
static LIST_HEAD(ib_agent_port_list);
static struct ib_agent_port_private *
__ib_get_agent_port(struct ib_device *device, int port_num)
{
struct ib_agent_port_private *entry;
list_for_each_entry(entry, &ib_agent_port_list, port_list) {
if (entry->agent[1]->device == device &&
entry->agent[1]->port_num == port_num)
return entry;
}
return NULL;
}
static struct ib_agent_port_private *
ib_get_agent_port(struct ib_device *device, int port_num)
{
struct ib_agent_port_private *entry;
unsigned long flags;
spin_lock_irqsave(&ib_agent_port_list_lock, flags);
entry = __ib_get_agent_port(device, port_num);
spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
return entry;
}
void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
struct ib_wc *wc, struct ib_device *device,
int port_num, int qpn)
{
struct ib_agent_port_private *port_priv;
struct ib_mad_agent *agent;
struct ib_mad_send_buf *send_buf;
struct ib_ah *ah;
struct ib_mad_send_wr_private *mad_send_wr;
if (device->node_type == RDMA_NODE_IB_SWITCH)
port_priv = ib_get_agent_port(device, 0);
else
port_priv = ib_get_agent_port(device, port_num);
if (!port_priv) {
dev_err(&device->dev, "Unable to find port agent\n");
return;
}
agent = port_priv->agent[qpn];
ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num);
if (IS_ERR(ah)) {
dev_err(&device->dev, "ib_create_ah_from_wc error %ld\n",
PTR_ERR(ah));
return;
}
send_buf = ib_create_send_mad(agent, wc->src_qp, wc->pkey_index, 0,
IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
GFP_KERNEL);
if (IS_ERR(send_buf)) {
dev_err(&device->dev, "ib_create_send_mad error\n");
goto err1;
}
memcpy(send_buf->mad, mad, sizeof *mad);
send_buf->ah = ah;
if (device->node_type == RDMA_NODE_IB_SWITCH) {
mad_send_wr = container_of(send_buf,
struct ib_mad_send_wr_private,
send_buf);
mad_send_wr->send_wr.wr.ud.port_num = port_num;
}
if (ib_post_send_mad(send_buf, NULL)) {
dev_err(&device->dev, "ib_post_send_mad error\n");
goto err2;
}
return;
err2:
ib_free_send_mad(send_buf);
err1:
ib_destroy_ah(ah);
}
static void agent_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_send_wc)
{
ib_destroy_ah(mad_send_wc->send_buf->ah);
ib_free_send_mad(mad_send_wc->send_buf);
}
int ib_agent_port_open(struct ib_device *device, int port_num)
{
struct ib_agent_port_private *port_priv;
unsigned long flags;
int ret;
/* Create new device info */
port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
if (!port_priv) {
dev_err(&device->dev, "No memory for ib_agent_port_private\n");
ret = -ENOMEM;
goto error1;
}
if (rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND) {
/* Obtain send only MAD agent for SMI QP */
port_priv->agent[0] = ib_register_mad_agent(device, port_num,
IB_QPT_SMI, NULL, 0,
&agent_send_handler,
NULL, NULL, 0);
if (IS_ERR(port_priv->agent[0])) {
ret = PTR_ERR(port_priv->agent[0]);
goto error2;
}
}
/* Obtain send only MAD agent for GSI QP */
port_priv->agent[1] = ib_register_mad_agent(device, port_num,
IB_QPT_GSI, NULL, 0,
&agent_send_handler,
NULL, NULL, 0);
if (IS_ERR(port_priv->agent[1])) {
ret = PTR_ERR(port_priv->agent[1]);
goto error3;
}
spin_lock_irqsave(&ib_agent_port_list_lock, flags);
list_add_tail(&port_priv->port_list, &ib_agent_port_list);
spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
return 0;
error3:
if (port_priv->agent[0])
ib_unregister_mad_agent(port_priv->agent[0]);
error2:
kfree(port_priv);
error1:
return ret;
}
int ib_agent_port_close(struct ib_device *device, int port_num)
{
struct ib_agent_port_private *port_priv;
unsigned long flags;
spin_lock_irqsave(&ib_agent_port_list_lock, flags);
port_priv = __ib_get_agent_port(device, port_num);
if (port_priv == NULL) {
spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
dev_err(&device->dev, "Port %d not found\n", port_num);
return -ENODEV;
}
list_del(&port_priv->port_list);
spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
ib_unregister_mad_agent(port_priv->agent[1]);
if (port_priv->agent[0])
ib_unregister_mad_agent(port_priv->agent[0]);
kfree(port_priv);
return 0;
}

View file

@ -0,0 +1,51 @@
/*
* Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2004 Infinicon Corporation. All rights reserved.
* Copyright (c) 2004 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef __AGENT_H_
#define __AGENT_H_
#include <linux/err.h>
#include <rdma/ib_mad.h>
extern int ib_agent_port_open(struct ib_device *device, int port_num);
extern int ib_agent_port_close(struct ib_device *device, int port_num);
extern void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
struct ib_wc *wc, struct ib_device *device,
int port_num, int qpn);
#endif /* __AGENT_H_ */

View file

@ -0,0 +1,439 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <rdma/ib_cache.h>
#include "core_priv.h"
struct ib_pkey_cache {
int table_len;
u16 table[0];
};
struct ib_gid_cache {
int table_len;
union ib_gid table[0];
};
struct ib_update_work {
struct work_struct work;
struct ib_device *device;
u8 port_num;
};
static inline int start_port(struct ib_device *device)
{
return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
}
static inline int end_port(struct ib_device *device)
{
return (device->node_type == RDMA_NODE_IB_SWITCH) ?
0 : device->phys_port_cnt;
}
int ib_get_cached_gid(struct ib_device *device,
u8 port_num,
int index,
union ib_gid *gid)
{
struct ib_gid_cache *cache;
unsigned long flags;
int ret = 0;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
cache = device->cache.gid_cache[port_num - start_port(device)];
if (index < 0 || index >= cache->table_len)
ret = -EINVAL;
else
*gid = cache->table[index];
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_get_cached_gid);
int ib_find_cached_gid(struct ib_device *device,
union ib_gid *gid,
u8 *port_num,
u16 *index)
{
struct ib_gid_cache *cache;
unsigned long flags;
int p, i;
int ret = -ENOENT;
*port_num = -1;
if (index)
*index = -1;
read_lock_irqsave(&device->cache.lock, flags);
for (p = 0; p <= end_port(device) - start_port(device); ++p) {
cache = device->cache.gid_cache[p];
for (i = 0; i < cache->table_len; ++i) {
if (!memcmp(gid, &cache->table[i], sizeof *gid)) {
*port_num = p + start_port(device);
if (index)
*index = i;
ret = 0;
goto found;
}
}
}
found:
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_find_cached_gid);
int ib_get_cached_pkey(struct ib_device *device,
u8 port_num,
int index,
u16 *pkey)
{
struct ib_pkey_cache *cache;
unsigned long flags;
int ret = 0;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
cache = device->cache.pkey_cache[port_num - start_port(device)];
if (index < 0 || index >= cache->table_len)
ret = -EINVAL;
else
*pkey = cache->table[index];
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_get_cached_pkey);
int ib_find_cached_pkey(struct ib_device *device,
u8 port_num,
u16 pkey,
u16 *index)
{
struct ib_pkey_cache *cache;
unsigned long flags;
int i;
int ret = -ENOENT;
int partial_ix = -1;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
cache = device->cache.pkey_cache[port_num - start_port(device)];
*index = -1;
for (i = 0; i < cache->table_len; ++i)
if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
if (cache->table[i] & 0x8000) {
*index = i;
ret = 0;
break;
} else
partial_ix = i;
}
if (ret && partial_ix >= 0) {
*index = partial_ix;
ret = 0;
}
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_find_cached_pkey);
int ib_find_exact_cached_pkey(struct ib_device *device,
u8 port_num,
u16 pkey,
u16 *index)
{
struct ib_pkey_cache *cache;
unsigned long flags;
int i;
int ret = -ENOENT;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
cache = device->cache.pkey_cache[port_num - start_port(device)];
*index = -1;
for (i = 0; i < cache->table_len; ++i)
if (cache->table[i] == pkey) {
*index = i;
ret = 0;
break;
}
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_find_exact_cached_pkey);
int ib_get_cached_lmc(struct ib_device *device,
u8 port_num,
u8 *lmc)
{
unsigned long flags;
int ret = 0;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
*lmc = device->cache.lmc_cache[port_num - start_port(device)];
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_get_cached_lmc);
static void ib_cache_update(struct ib_device *device,
u8 port)
{
struct ib_port_attr *tprops = NULL;
struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
struct ib_gid_cache *gid_cache = NULL, *old_gid_cache;
int i;
int ret;
tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
if (!tprops)
return;
ret = ib_query_port(device, port, tprops);
if (ret) {
printk(KERN_WARNING "ib_query_port failed (%d) for %s\n",
ret, device->name);
goto err;
}
pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len *
sizeof *pkey_cache->table, GFP_KERNEL);
if (!pkey_cache)
goto err;
pkey_cache->table_len = tprops->pkey_tbl_len;
gid_cache = kmalloc(sizeof *gid_cache + tprops->gid_tbl_len *
sizeof *gid_cache->table, GFP_KERNEL);
if (!gid_cache)
goto err;
gid_cache->table_len = tprops->gid_tbl_len;
for (i = 0; i < pkey_cache->table_len; ++i) {
ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
if (ret) {
printk(KERN_WARNING "ib_query_pkey failed (%d) for %s (index %d)\n",
ret, device->name, i);
goto err;
}
}
for (i = 0; i < gid_cache->table_len; ++i) {
ret = ib_query_gid(device, port, i, gid_cache->table + i);
if (ret) {
printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n",
ret, device->name, i);
goto err;
}
}
write_lock_irq(&device->cache.lock);
old_pkey_cache = device->cache.pkey_cache[port - start_port(device)];
old_gid_cache = device->cache.gid_cache [port - start_port(device)];
device->cache.pkey_cache[port - start_port(device)] = pkey_cache;
device->cache.gid_cache [port - start_port(device)] = gid_cache;
device->cache.lmc_cache[port - start_port(device)] = tprops->lmc;
write_unlock_irq(&device->cache.lock);
kfree(old_pkey_cache);
kfree(old_gid_cache);
kfree(tprops);
return;
err:
kfree(pkey_cache);
kfree(gid_cache);
kfree(tprops);
}
static void ib_cache_task(struct work_struct *_work)
{
struct ib_update_work *work =
container_of(_work, struct ib_update_work, work);
ib_cache_update(work->device, work->port_num);
kfree(work);
}
static void ib_cache_event(struct ib_event_handler *handler,
struct ib_event *event)
{
struct ib_update_work *work;
if (event->event == IB_EVENT_PORT_ERR ||
event->event == IB_EVENT_PORT_ACTIVE ||
event->event == IB_EVENT_LID_CHANGE ||
event->event == IB_EVENT_PKEY_CHANGE ||
event->event == IB_EVENT_SM_CHANGE ||
event->event == IB_EVENT_CLIENT_REREGISTER ||
event->event == IB_EVENT_GID_CHANGE) {
work = kmalloc(sizeof *work, GFP_ATOMIC);
if (work) {
INIT_WORK(&work->work, ib_cache_task);
work->device = event->device;
work->port_num = event->element.port_num;
queue_work(ib_wq, &work->work);
}
}
}
static void ib_cache_setup_one(struct ib_device *device)
{
int p;
rwlock_init(&device->cache.lock);
device->cache.pkey_cache =
kmalloc(sizeof *device->cache.pkey_cache *
(end_port(device) - start_port(device) + 1), GFP_KERNEL);
device->cache.gid_cache =
kmalloc(sizeof *device->cache.gid_cache *
(end_port(device) - start_port(device) + 1), GFP_KERNEL);
device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache *
(end_port(device) -
start_port(device) + 1),
GFP_KERNEL);
if (!device->cache.pkey_cache || !device->cache.gid_cache ||
!device->cache.lmc_cache) {
printk(KERN_WARNING "Couldn't allocate cache "
"for %s\n", device->name);
goto err;
}
for (p = 0; p <= end_port(device) - start_port(device); ++p) {
device->cache.pkey_cache[p] = NULL;
device->cache.gid_cache [p] = NULL;
ib_cache_update(device, p + start_port(device));
}
INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
device, ib_cache_event);
if (ib_register_event_handler(&device->cache.event_handler))
goto err_cache;
return;
err_cache:
for (p = 0; p <= end_port(device) - start_port(device); ++p) {
kfree(device->cache.pkey_cache[p]);
kfree(device->cache.gid_cache[p]);
}
err:
kfree(device->cache.pkey_cache);
kfree(device->cache.gid_cache);
kfree(device->cache.lmc_cache);
}
static void ib_cache_cleanup_one(struct ib_device *device)
{
int p;
ib_unregister_event_handler(&device->cache.event_handler);
flush_workqueue(ib_wq);
for (p = 0; p <= end_port(device) - start_port(device); ++p) {
kfree(device->cache.pkey_cache[p]);
kfree(device->cache.gid_cache[p]);
}
kfree(device->cache.pkey_cache);
kfree(device->cache.gid_cache);
kfree(device->cache.lmc_cache);
}
static struct ib_client cache_client = {
.name = "cache",
.add = ib_cache_setup_one,
.remove = ib_cache_cleanup_one
};
int __init ib_cache_setup(void)
{
return ib_register_client(&cache_client);
}
void __exit ib_cache_cleanup(void)
{
ib_unregister_client(&cache_client);
}

3932
drivers/infiniband/core/cm.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,836 @@
/*
* Copyright (c) 2004, 2011 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING the madirectory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use source and binary forms, with or
* withmodification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retathe above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHWARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS THE
* SOFTWARE.
*/
#if !defined(CM_MSGS_H)
#define CM_MSGS_H
#include <rdma/ib_mad.h>
#include <rdma/ib_cm.h>
/*
* Parameters to routines below should be in network-byte order, and values
* are returned in network-byte order.
*/
#define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */
enum cm_msg_sequence {
CM_MSG_SEQUENCE_REQ,
CM_MSG_SEQUENCE_LAP,
CM_MSG_SEQUENCE_DREQ,
CM_MSG_SEQUENCE_SIDR
};
struct cm_req_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 rsvd4;
__be64 service_id;
__be64 local_ca_guid;
__be32 rsvd24;
__be32 local_qkey;
/* local QPN:24, responder resources:8 */
__be32 offset32;
/* local EECN:24, initiator depth:8 */
__be32 offset36;
/*
* remote EECN:24, remote CM response timeout:5,
* transport service type:2, end-to-end flow control:1
*/
__be32 offset40;
/* starting PSN:24, local CM response timeout:5, retry count:3 */
__be32 offset44;
__be16 pkey;
/* path MTU:4, RDC exists:1, RNR retry count:3. */
u8 offset50;
/* max CM Retries:4, SRQ:1, extended transport type:3 */
u8 offset51;
__be16 primary_local_lid;
__be16 primary_remote_lid;
union ib_gid primary_local_gid;
union ib_gid primary_remote_gid;
/* flow label:20, rsvd:6, packet rate:6 */
__be32 primary_offset88;
u8 primary_traffic_class;
u8 primary_hop_limit;
/* SL:4, subnet local:1, rsvd:3 */
u8 primary_offset94;
/* local ACK timeout:5, rsvd:3 */
u8 primary_offset95;
__be16 alt_local_lid;
__be16 alt_remote_lid;
union ib_gid alt_local_gid;
union ib_gid alt_remote_gid;
/* flow label:20, rsvd:6, packet rate:6 */
__be32 alt_offset132;
u8 alt_traffic_class;
u8 alt_hop_limit;
/* SL:4, subnet local:1, rsvd:3 */
u8 alt_offset138;
/* local ACK timeout:5, rsvd:3 */
u8 alt_offset139;
u8 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
static inline __be32 cm_req_get_local_qpn(struct cm_req_msg *req_msg)
{
return cpu_to_be32(be32_to_cpu(req_msg->offset32) >> 8);
}
static inline void cm_req_set_local_qpn(struct cm_req_msg *req_msg, __be32 qpn)
{
req_msg->offset32 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
(be32_to_cpu(req_msg->offset32) &
0x000000FF));
}
static inline u8 cm_req_get_resp_res(struct cm_req_msg *req_msg)
{
return (u8) be32_to_cpu(req_msg->offset32);
}
static inline void cm_req_set_resp_res(struct cm_req_msg *req_msg, u8 resp_res)
{
req_msg->offset32 = cpu_to_be32(resp_res |
(be32_to_cpu(req_msg->offset32) &
0xFFFFFF00));
}
static inline u8 cm_req_get_init_depth(struct cm_req_msg *req_msg)
{
return (u8) be32_to_cpu(req_msg->offset36);
}
static inline void cm_req_set_init_depth(struct cm_req_msg *req_msg,
u8 init_depth)
{
req_msg->offset36 = cpu_to_be32(init_depth |
(be32_to_cpu(req_msg->offset36) &
0xFFFFFF00));
}
static inline u8 cm_req_get_remote_resp_timeout(struct cm_req_msg *req_msg)
{
return (u8) ((be32_to_cpu(req_msg->offset40) & 0xF8) >> 3);
}
static inline void cm_req_set_remote_resp_timeout(struct cm_req_msg *req_msg,
u8 resp_timeout)
{
req_msg->offset40 = cpu_to_be32((resp_timeout << 3) |
(be32_to_cpu(req_msg->offset40) &
0xFFFFFF07));
}
static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg)
{
u8 transport_type = (u8) (be32_to_cpu(req_msg->offset40) & 0x06) >> 1;
switch(transport_type) {
case 0: return IB_QPT_RC;
case 1: return IB_QPT_UC;
case 3:
switch (req_msg->offset51 & 0x7) {
case 1: return IB_QPT_XRC_TGT;
default: return 0;
}
default: return 0;
}
}
static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg,
enum ib_qp_type qp_type)
{
switch(qp_type) {
case IB_QPT_UC:
req_msg->offset40 = cpu_to_be32((be32_to_cpu(
req_msg->offset40) &
0xFFFFFFF9) | 0x2);
break;
case IB_QPT_XRC_INI:
req_msg->offset40 = cpu_to_be32((be32_to_cpu(
req_msg->offset40) &
0xFFFFFFF9) | 0x6);
req_msg->offset51 = (req_msg->offset51 & 0xF8) | 1;
break;
default:
req_msg->offset40 = cpu_to_be32(be32_to_cpu(
req_msg->offset40) &
0xFFFFFFF9);
}
}
static inline u8 cm_req_get_flow_ctrl(struct cm_req_msg *req_msg)
{
return be32_to_cpu(req_msg->offset40) & 0x1;
}
static inline void cm_req_set_flow_ctrl(struct cm_req_msg *req_msg,
u8 flow_ctrl)
{
req_msg->offset40 = cpu_to_be32((flow_ctrl & 0x1) |
(be32_to_cpu(req_msg->offset40) &
0xFFFFFFFE));
}
static inline __be32 cm_req_get_starting_psn(struct cm_req_msg *req_msg)
{
return cpu_to_be32(be32_to_cpu(req_msg->offset44) >> 8);
}
static inline void cm_req_set_starting_psn(struct cm_req_msg *req_msg,
__be32 starting_psn)
{
req_msg->offset44 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) |
(be32_to_cpu(req_msg->offset44) & 0x000000FF));
}
static inline u8 cm_req_get_local_resp_timeout(struct cm_req_msg *req_msg)
{
return (u8) ((be32_to_cpu(req_msg->offset44) & 0xF8) >> 3);
}
static inline void cm_req_set_local_resp_timeout(struct cm_req_msg *req_msg,
u8 resp_timeout)
{
req_msg->offset44 = cpu_to_be32((resp_timeout << 3) |
(be32_to_cpu(req_msg->offset44) & 0xFFFFFF07));
}
static inline u8 cm_req_get_retry_count(struct cm_req_msg *req_msg)
{
return (u8) (be32_to_cpu(req_msg->offset44) & 0x7);
}
static inline void cm_req_set_retry_count(struct cm_req_msg *req_msg,
u8 retry_count)
{
req_msg->offset44 = cpu_to_be32((retry_count & 0x7) |
(be32_to_cpu(req_msg->offset44) & 0xFFFFFFF8));
}
static inline u8 cm_req_get_path_mtu(struct cm_req_msg *req_msg)
{
return req_msg->offset50 >> 4;
}
static inline void cm_req_set_path_mtu(struct cm_req_msg *req_msg, u8 path_mtu)
{
req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF) | (path_mtu << 4));
}
static inline u8 cm_req_get_rnr_retry_count(struct cm_req_msg *req_msg)
{
return req_msg->offset50 & 0x7;
}
static inline void cm_req_set_rnr_retry_count(struct cm_req_msg *req_msg,
u8 rnr_retry_count)
{
req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF8) |
(rnr_retry_count & 0x7));
}
static inline u8 cm_req_get_max_cm_retries(struct cm_req_msg *req_msg)
{
return req_msg->offset51 >> 4;
}
static inline void cm_req_set_max_cm_retries(struct cm_req_msg *req_msg,
u8 retries)
{
req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF) | (retries << 4));
}
static inline u8 cm_req_get_srq(struct cm_req_msg *req_msg)
{
return (req_msg->offset51 & 0x8) >> 3;
}
static inline void cm_req_set_srq(struct cm_req_msg *req_msg, u8 srq)
{
req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF7) |
((srq & 0x1) << 3));
}
static inline __be32 cm_req_get_primary_flow_label(struct cm_req_msg *req_msg)
{
return cpu_to_be32(be32_to_cpu(req_msg->primary_offset88) >> 12);
}
static inline void cm_req_set_primary_flow_label(struct cm_req_msg *req_msg,
__be32 flow_label)
{
req_msg->primary_offset88 = cpu_to_be32(
(be32_to_cpu(req_msg->primary_offset88) &
0x00000FFF) |
(be32_to_cpu(flow_label) << 12));
}
static inline u8 cm_req_get_primary_packet_rate(struct cm_req_msg *req_msg)
{
return (u8) (be32_to_cpu(req_msg->primary_offset88) & 0x3F);
}
static inline void cm_req_set_primary_packet_rate(struct cm_req_msg *req_msg,
u8 rate)
{
req_msg->primary_offset88 = cpu_to_be32(
(be32_to_cpu(req_msg->primary_offset88) &
0xFFFFFFC0) | (rate & 0x3F));
}
static inline u8 cm_req_get_primary_sl(struct cm_req_msg *req_msg)
{
return (u8) (req_msg->primary_offset94 >> 4);
}
static inline void cm_req_set_primary_sl(struct cm_req_msg *req_msg, u8 sl)
{
req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0x0F) |
(sl << 4));
}
static inline u8 cm_req_get_primary_subnet_local(struct cm_req_msg *req_msg)
{
return (u8) ((req_msg->primary_offset94 & 0x08) >> 3);
}
static inline void cm_req_set_primary_subnet_local(struct cm_req_msg *req_msg,
u8 subnet_local)
{
req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0xF7) |
((subnet_local & 0x1) << 3));
}
static inline u8 cm_req_get_primary_local_ack_timeout(struct cm_req_msg *req_msg)
{
return (u8) (req_msg->primary_offset95 >> 3);
}
static inline void cm_req_set_primary_local_ack_timeout(struct cm_req_msg *req_msg,
u8 local_ack_timeout)
{
req_msg->primary_offset95 = (u8) ((req_msg->primary_offset95 & 0x07) |
(local_ack_timeout << 3));
}
static inline __be32 cm_req_get_alt_flow_label(struct cm_req_msg *req_msg)
{
return cpu_to_be32(be32_to_cpu(req_msg->alt_offset132) >> 12);
}
static inline void cm_req_set_alt_flow_label(struct cm_req_msg *req_msg,
__be32 flow_label)
{
req_msg->alt_offset132 = cpu_to_be32(
(be32_to_cpu(req_msg->alt_offset132) &
0x00000FFF) |
(be32_to_cpu(flow_label) << 12));
}
static inline u8 cm_req_get_alt_packet_rate(struct cm_req_msg *req_msg)
{
return (u8) (be32_to_cpu(req_msg->alt_offset132) & 0x3F);
}
static inline void cm_req_set_alt_packet_rate(struct cm_req_msg *req_msg,
u8 rate)
{
req_msg->alt_offset132 = cpu_to_be32(
(be32_to_cpu(req_msg->alt_offset132) &
0xFFFFFFC0) | (rate & 0x3F));
}
static inline u8 cm_req_get_alt_sl(struct cm_req_msg *req_msg)
{
return (u8) (req_msg->alt_offset138 >> 4);
}
static inline void cm_req_set_alt_sl(struct cm_req_msg *req_msg, u8 sl)
{
req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0x0F) |
(sl << 4));
}
static inline u8 cm_req_get_alt_subnet_local(struct cm_req_msg *req_msg)
{
return (u8) ((req_msg->alt_offset138 & 0x08) >> 3);
}
static inline void cm_req_set_alt_subnet_local(struct cm_req_msg *req_msg,
u8 subnet_local)
{
req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0xF7) |
((subnet_local & 0x1) << 3));
}
static inline u8 cm_req_get_alt_local_ack_timeout(struct cm_req_msg *req_msg)
{
return (u8) (req_msg->alt_offset139 >> 3);
}
static inline void cm_req_set_alt_local_ack_timeout(struct cm_req_msg *req_msg,
u8 local_ack_timeout)
{
req_msg->alt_offset139 = (u8) ((req_msg->alt_offset139 & 0x07) |
(local_ack_timeout << 3));
}
/* Message REJected or MRAed */
enum cm_msg_response {
CM_MSG_RESPONSE_REQ = 0x0,
CM_MSG_RESPONSE_REP = 0x1,
CM_MSG_RESPONSE_OTHER = 0x2
};
struct cm_mra_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 remote_comm_id;
/* message MRAed:2, rsvd:6 */
u8 offset8;
/* service timeout:5, rsvd:3 */
u8 offset9;
u8 private_data[IB_CM_MRA_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
static inline u8 cm_mra_get_msg_mraed(struct cm_mra_msg *mra_msg)
{
return (u8) (mra_msg->offset8 >> 6);
}
static inline void cm_mra_set_msg_mraed(struct cm_mra_msg *mra_msg, u8 msg)
{
mra_msg->offset8 = (u8) ((mra_msg->offset8 & 0x3F) | (msg << 6));
}
static inline u8 cm_mra_get_service_timeout(struct cm_mra_msg *mra_msg)
{
return (u8) (mra_msg->offset9 >> 3);
}
static inline void cm_mra_set_service_timeout(struct cm_mra_msg *mra_msg,
u8 service_timeout)
{
mra_msg->offset9 = (u8) ((mra_msg->offset9 & 0x07) |
(service_timeout << 3));
}
struct cm_rej_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 remote_comm_id;
/* message REJected:2, rsvd:6 */
u8 offset8;
/* reject info length:7, rsvd:1. */
u8 offset9;
__be16 reason;
u8 ari[IB_CM_REJ_ARI_LENGTH];
u8 private_data[IB_CM_REJ_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
static inline u8 cm_rej_get_msg_rejected(struct cm_rej_msg *rej_msg)
{
return (u8) (rej_msg->offset8 >> 6);
}
static inline void cm_rej_set_msg_rejected(struct cm_rej_msg *rej_msg, u8 msg)
{
rej_msg->offset8 = (u8) ((rej_msg->offset8 & 0x3F) | (msg << 6));
}
static inline u8 cm_rej_get_reject_info_len(struct cm_rej_msg *rej_msg)
{
return (u8) (rej_msg->offset9 >> 1);
}
static inline void cm_rej_set_reject_info_len(struct cm_rej_msg *rej_msg,
u8 len)
{
rej_msg->offset9 = (u8) ((rej_msg->offset9 & 0x1) | (len << 1));
}
struct cm_rep_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 remote_comm_id;
__be32 local_qkey;
/* local QPN:24, rsvd:8 */
__be32 offset12;
/* local EECN:24, rsvd:8 */
__be32 offset16;
/* starting PSN:24 rsvd:8 */
__be32 offset20;
u8 resp_resources;
u8 initiator_depth;
/* target ACK delay:5, failover accepted:2, end-to-end flow control:1 */
u8 offset26;
/* RNR retry count:3, SRQ:1, rsvd:5 */
u8 offset27;
__be64 local_ca_guid;
u8 private_data[IB_CM_REP_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
static inline __be32 cm_rep_get_local_qpn(struct cm_rep_msg *rep_msg)
{
return cpu_to_be32(be32_to_cpu(rep_msg->offset12) >> 8);
}
static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, __be32 qpn)
{
rep_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
(be32_to_cpu(rep_msg->offset12) & 0x000000FF));
}
static inline __be32 cm_rep_get_local_eecn(struct cm_rep_msg *rep_msg)
{
return cpu_to_be32(be32_to_cpu(rep_msg->offset16) >> 8);
}
static inline void cm_rep_set_local_eecn(struct cm_rep_msg *rep_msg, __be32 eecn)
{
rep_msg->offset16 = cpu_to_be32((be32_to_cpu(eecn) << 8) |
(be32_to_cpu(rep_msg->offset16) & 0x000000FF));
}
static inline __be32 cm_rep_get_qpn(struct cm_rep_msg *rep_msg, enum ib_qp_type qp_type)
{
return (qp_type == IB_QPT_XRC_INI) ?
cm_rep_get_local_eecn(rep_msg) : cm_rep_get_local_qpn(rep_msg);
}
static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg)
{
return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8);
}
static inline void cm_rep_set_starting_psn(struct cm_rep_msg *rep_msg,
__be32 starting_psn)
{
rep_msg->offset20 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) |
(be32_to_cpu(rep_msg->offset20) & 0x000000FF));
}
static inline u8 cm_rep_get_target_ack_delay(struct cm_rep_msg *rep_msg)
{
return (u8) (rep_msg->offset26 >> 3);
}
static inline void cm_rep_set_target_ack_delay(struct cm_rep_msg *rep_msg,
u8 target_ack_delay)
{
rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0x07) |
(target_ack_delay << 3));
}
static inline u8 cm_rep_get_failover(struct cm_rep_msg *rep_msg)
{
return (u8) ((rep_msg->offset26 & 0x06) >> 1);
}
static inline void cm_rep_set_failover(struct cm_rep_msg *rep_msg, u8 failover)
{
rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xF9) |
((failover & 0x3) << 1));
}
static inline u8 cm_rep_get_flow_ctrl(struct cm_rep_msg *rep_msg)
{
return (u8) (rep_msg->offset26 & 0x01);
}
static inline void cm_rep_set_flow_ctrl(struct cm_rep_msg *rep_msg,
u8 flow_ctrl)
{
rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xFE) |
(flow_ctrl & 0x1));
}
static inline u8 cm_rep_get_rnr_retry_count(struct cm_rep_msg *rep_msg)
{
return (u8) (rep_msg->offset27 >> 5);
}
static inline void cm_rep_set_rnr_retry_count(struct cm_rep_msg *rep_msg,
u8 rnr_retry_count)
{
rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0x1F) |
(rnr_retry_count << 5));
}
static inline u8 cm_rep_get_srq(struct cm_rep_msg *rep_msg)
{
return (u8) ((rep_msg->offset27 >> 4) & 0x1);
}
static inline void cm_rep_set_srq(struct cm_rep_msg *rep_msg, u8 srq)
{
rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0xEF) |
((srq & 0x1) << 4));
}
struct cm_rtu_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 remote_comm_id;
u8 private_data[IB_CM_RTU_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
struct cm_dreq_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 remote_comm_id;
/* remote QPN/EECN:24, rsvd:8 */
__be32 offset8;
u8 private_data[IB_CM_DREQ_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
static inline __be32 cm_dreq_get_remote_qpn(struct cm_dreq_msg *dreq_msg)
{
return cpu_to_be32(be32_to_cpu(dreq_msg->offset8) >> 8);
}
static inline void cm_dreq_set_remote_qpn(struct cm_dreq_msg *dreq_msg, __be32 qpn)
{
dreq_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
(be32_to_cpu(dreq_msg->offset8) & 0x000000FF));
}
struct cm_drep_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 remote_comm_id;
u8 private_data[IB_CM_DREP_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
struct cm_lap_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 remote_comm_id;
__be32 rsvd8;
/* remote QPN/EECN:24, remote CM response timeout:5, rsvd:3 */
__be32 offset12;
__be32 rsvd16;
__be16 alt_local_lid;
__be16 alt_remote_lid;
union ib_gid alt_local_gid;
union ib_gid alt_remote_gid;
/* flow label:20, rsvd:4, traffic class:8 */
__be32 offset56;
u8 alt_hop_limit;
/* rsvd:2, packet rate:6 */
u8 offset61;
/* SL:4, subnet local:1, rsvd:3 */
u8 offset62;
/* local ACK timeout:5, rsvd:3 */
u8 offset63;
u8 private_data[IB_CM_LAP_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
static inline __be32 cm_lap_get_remote_qpn(struct cm_lap_msg *lap_msg)
{
return cpu_to_be32(be32_to_cpu(lap_msg->offset12) >> 8);
}
static inline void cm_lap_set_remote_qpn(struct cm_lap_msg *lap_msg, __be32 qpn)
{
lap_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
(be32_to_cpu(lap_msg->offset12) &
0x000000FF));
}
static inline u8 cm_lap_get_remote_resp_timeout(struct cm_lap_msg *lap_msg)
{
return (u8) ((be32_to_cpu(lap_msg->offset12) & 0xF8) >> 3);
}
static inline void cm_lap_set_remote_resp_timeout(struct cm_lap_msg *lap_msg,
u8 resp_timeout)
{
lap_msg->offset12 = cpu_to_be32((resp_timeout << 3) |
(be32_to_cpu(lap_msg->offset12) &
0xFFFFFF07));
}
static inline __be32 cm_lap_get_flow_label(struct cm_lap_msg *lap_msg)
{
return cpu_to_be32(be32_to_cpu(lap_msg->offset56) >> 12);
}
static inline void cm_lap_set_flow_label(struct cm_lap_msg *lap_msg,
__be32 flow_label)
{
lap_msg->offset56 = cpu_to_be32(
(be32_to_cpu(lap_msg->offset56) & 0x00000FFF) |
(be32_to_cpu(flow_label) << 12));
}
static inline u8 cm_lap_get_traffic_class(struct cm_lap_msg *lap_msg)
{
return (u8) be32_to_cpu(lap_msg->offset56);
}
static inline void cm_lap_set_traffic_class(struct cm_lap_msg *lap_msg,
u8 traffic_class)
{
lap_msg->offset56 = cpu_to_be32(traffic_class |
(be32_to_cpu(lap_msg->offset56) &
0xFFFFFF00));
}
static inline u8 cm_lap_get_packet_rate(struct cm_lap_msg *lap_msg)
{
return lap_msg->offset61 & 0x3F;
}
static inline void cm_lap_set_packet_rate(struct cm_lap_msg *lap_msg,
u8 packet_rate)
{
lap_msg->offset61 = (packet_rate & 0x3F) | (lap_msg->offset61 & 0xC0);
}
static inline u8 cm_lap_get_sl(struct cm_lap_msg *lap_msg)
{
return lap_msg->offset62 >> 4;
}
static inline void cm_lap_set_sl(struct cm_lap_msg *lap_msg, u8 sl)
{
lap_msg->offset62 = (sl << 4) | (lap_msg->offset62 & 0x0F);
}
static inline u8 cm_lap_get_subnet_local(struct cm_lap_msg *lap_msg)
{
return (lap_msg->offset62 >> 3) & 0x1;
}
static inline void cm_lap_set_subnet_local(struct cm_lap_msg *lap_msg,
u8 subnet_local)
{
lap_msg->offset62 = ((subnet_local & 0x1) << 3) |
(lap_msg->offset61 & 0xF7);
}
static inline u8 cm_lap_get_local_ack_timeout(struct cm_lap_msg *lap_msg)
{
return lap_msg->offset63 >> 3;
}
static inline void cm_lap_set_local_ack_timeout(struct cm_lap_msg *lap_msg,
u8 local_ack_timeout)
{
lap_msg->offset63 = (local_ack_timeout << 3) |
(lap_msg->offset63 & 0x07);
}
struct cm_apr_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 remote_comm_id;
u8 info_length;
u8 ap_status;
__be16 rsvd;
u8 info[IB_CM_APR_INFO_LENGTH];
u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
struct cm_sidr_req_msg {
struct ib_mad_hdr hdr;
__be32 request_id;
__be16 pkey;
__be16 rsvd;
__be64 service_id;
u8 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
struct cm_sidr_rep_msg {
struct ib_mad_hdr hdr;
__be32 request_id;
u8 status;
u8 info_length;
__be16 rsvd;
/* QPN:24, rsvd:8 */
__be32 offset8;
__be64 service_id;
__be32 qkey;
u8 info[IB_CM_SIDR_REP_INFO_LENGTH];
u8 private_data[IB_CM_SIDR_REP_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
static inline __be32 cm_sidr_rep_get_qpn(struct cm_sidr_rep_msg *sidr_rep_msg)
{
return cpu_to_be32(be32_to_cpu(sidr_rep_msg->offset8) >> 8);
}
static inline void cm_sidr_rep_set_qpn(struct cm_sidr_rep_msg *sidr_rep_msg,
__be32 qpn)
{
sidr_rep_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
(be32_to_cpu(sidr_rep_msg->offset8) &
0x000000FF));
}
#endif /* CM_MSGS_H */

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,54 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _CORE_PRIV_H
#define _CORE_PRIV_H
#include <linux/list.h>
#include <linux/spinlock.h>
#include <rdma/ib_verbs.h>
int ib_device_register_sysfs(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *));
void ib_device_unregister_sysfs(struct ib_device *device);
int ib_sysfs_setup(void);
void ib_sysfs_cleanup(void);
int ib_cache_setup(void);
void ib_cache_cleanup(void);
int ib_resolve_eth_l2_attrs(struct ib_qp *qp,
struct ib_qp_attr *qp_attr, int *qp_attr_mask);
#endif /* _CORE_PRIV_H */

View file

@ -0,0 +1,785 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/module.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/mutex.h>
#include <rdma/rdma_netlink.h>
#include "core_priv.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("core kernel InfiniBand API");
MODULE_LICENSE("Dual BSD/GPL");
struct ib_client_data {
struct list_head list;
struct ib_client *client;
void * data;
};
struct workqueue_struct *ib_wq;
EXPORT_SYMBOL_GPL(ib_wq);
static LIST_HEAD(device_list);
static LIST_HEAD(client_list);
/*
* device_mutex protects access to both device_list and client_list.
* There's no real point to using multiple locks or something fancier
* like an rwsem: we always access both lists, and we're always
* modifying one list or the other list. In any case this is not a
* hot path so there's no point in trying to optimize.
*/
static DEFINE_MUTEX(device_mutex);
static int ib_device_check_mandatory(struct ib_device *device)
{
#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device, x), #x }
static const struct {
size_t offset;
char *name;
} mandatory_table[] = {
IB_MANDATORY_FUNC(query_device),
IB_MANDATORY_FUNC(query_port),
IB_MANDATORY_FUNC(query_pkey),
IB_MANDATORY_FUNC(query_gid),
IB_MANDATORY_FUNC(alloc_pd),
IB_MANDATORY_FUNC(dealloc_pd),
IB_MANDATORY_FUNC(create_ah),
IB_MANDATORY_FUNC(destroy_ah),
IB_MANDATORY_FUNC(create_qp),
IB_MANDATORY_FUNC(modify_qp),
IB_MANDATORY_FUNC(destroy_qp),
IB_MANDATORY_FUNC(post_send),
IB_MANDATORY_FUNC(post_recv),
IB_MANDATORY_FUNC(create_cq),
IB_MANDATORY_FUNC(destroy_cq),
IB_MANDATORY_FUNC(poll_cq),
IB_MANDATORY_FUNC(req_notify_cq),
IB_MANDATORY_FUNC(get_dma_mr),
IB_MANDATORY_FUNC(dereg_mr)
};
int i;
for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
printk(KERN_WARNING "Device %s is missing mandatory function %s\n",
device->name, mandatory_table[i].name);
return -EINVAL;
}
}
return 0;
}
static struct ib_device *__ib_device_get_by_name(const char *name)
{
struct ib_device *device;
list_for_each_entry(device, &device_list, core_list)
if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX))
return device;
return NULL;
}
static int alloc_name(char *name)
{
unsigned long *inuse;
char buf[IB_DEVICE_NAME_MAX];
struct ib_device *device;
int i;
inuse = (unsigned long *) get_zeroed_page(GFP_KERNEL);
if (!inuse)
return -ENOMEM;
list_for_each_entry(device, &device_list, core_list) {
if (!sscanf(device->name, name, &i))
continue;
if (i < 0 || i >= PAGE_SIZE * 8)
continue;
snprintf(buf, sizeof buf, name, i);
if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX))
set_bit(i, inuse);
}
i = find_first_zero_bit(inuse, PAGE_SIZE * 8);
free_page((unsigned long) inuse);
snprintf(buf, sizeof buf, name, i);
if (__ib_device_get_by_name(buf))
return -ENFILE;
strlcpy(name, buf, IB_DEVICE_NAME_MAX);
return 0;
}
static int start_port(struct ib_device *device)
{
return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
}
static int end_port(struct ib_device *device)
{
return (device->node_type == RDMA_NODE_IB_SWITCH) ?
0 : device->phys_port_cnt;
}
/**
* ib_alloc_device - allocate an IB device struct
* @size:size of structure to allocate
*
* Low-level drivers should use ib_alloc_device() to allocate &struct
* ib_device. @size is the size of the structure to be allocated,
* including any private data used by the low-level driver.
* ib_dealloc_device() must be used to free structures allocated with
* ib_alloc_device().
*/
struct ib_device *ib_alloc_device(size_t size)
{
BUG_ON(size < sizeof (struct ib_device));
return kzalloc(size, GFP_KERNEL);
}
EXPORT_SYMBOL(ib_alloc_device);
/**
* ib_dealloc_device - free an IB device struct
* @device:structure to free
*
* Free a structure allocated with ib_alloc_device().
*/
void ib_dealloc_device(struct ib_device *device)
{
if (device->reg_state == IB_DEV_UNINITIALIZED) {
kfree(device);
return;
}
BUG_ON(device->reg_state != IB_DEV_UNREGISTERED);
kobject_put(&device->dev.kobj);
}
EXPORT_SYMBOL(ib_dealloc_device);
static int add_client_context(struct ib_device *device, struct ib_client *client)
{
struct ib_client_data *context;
unsigned long flags;
context = kmalloc(sizeof *context, GFP_KERNEL);
if (!context) {
printk(KERN_WARNING "Couldn't allocate client context for %s/%s\n",
device->name, client->name);
return -ENOMEM;
}
context->client = client;
context->data = NULL;
spin_lock_irqsave(&device->client_data_lock, flags);
list_add(&context->list, &device->client_data_list);
spin_unlock_irqrestore(&device->client_data_lock, flags);
return 0;
}
static int read_port_table_lengths(struct ib_device *device)
{
struct ib_port_attr *tprops = NULL;
int num_ports, ret = -ENOMEM;
u8 port_index;
tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
if (!tprops)
goto out;
num_ports = end_port(device) - start_port(device) + 1;
device->pkey_tbl_len = kmalloc(sizeof *device->pkey_tbl_len * num_ports,
GFP_KERNEL);
device->gid_tbl_len = kmalloc(sizeof *device->gid_tbl_len * num_ports,
GFP_KERNEL);
if (!device->pkey_tbl_len || !device->gid_tbl_len)
goto err;
for (port_index = 0; port_index < num_ports; ++port_index) {
ret = ib_query_port(device, port_index + start_port(device),
tprops);
if (ret)
goto err;
device->pkey_tbl_len[port_index] = tprops->pkey_tbl_len;
device->gid_tbl_len[port_index] = tprops->gid_tbl_len;
}
ret = 0;
goto out;
err:
kfree(device->gid_tbl_len);
kfree(device->pkey_tbl_len);
out:
kfree(tprops);
return ret;
}
/**
* ib_register_device - Register an IB device with IB core
* @device:Device to register
*
* Low-level drivers use ib_register_device() to register their
* devices with the IB core. All registered clients will receive a
* callback for each device that is added. @device must be allocated
* with ib_alloc_device().
*/
int ib_register_device(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *))
{
int ret;
mutex_lock(&device_mutex);
if (strchr(device->name, '%')) {
ret = alloc_name(device->name);
if (ret)
goto out;
}
if (ib_device_check_mandatory(device)) {
ret = -EINVAL;
goto out;
}
INIT_LIST_HEAD(&device->event_handler_list);
INIT_LIST_HEAD(&device->client_data_list);
spin_lock_init(&device->event_handler_lock);
spin_lock_init(&device->client_data_lock);
ret = read_port_table_lengths(device);
if (ret) {
printk(KERN_WARNING "Couldn't create table lengths cache for device %s\n",
device->name);
goto out;
}
ret = ib_device_register_sysfs(device, port_callback);
if (ret) {
printk(KERN_WARNING "Couldn't register device %s with driver model\n",
device->name);
kfree(device->gid_tbl_len);
kfree(device->pkey_tbl_len);
goto out;
}
list_add_tail(&device->core_list, &device_list);
device->reg_state = IB_DEV_REGISTERED;
{
struct ib_client *client;
list_for_each_entry(client, &client_list, list)
if (client->add && !add_client_context(device, client))
client->add(device);
}
out:
mutex_unlock(&device_mutex);
return ret;
}
EXPORT_SYMBOL(ib_register_device);
/**
* ib_unregister_device - Unregister an IB device
* @device:Device to unregister
*
* Unregister an IB device. All clients will receive a remove callback.
*/
void ib_unregister_device(struct ib_device *device)
{
struct ib_client *client;
struct ib_client_data *context, *tmp;
unsigned long flags;
mutex_lock(&device_mutex);
list_for_each_entry_reverse(client, &client_list, list)
if (client->remove)
client->remove(device);
list_del(&device->core_list);
kfree(device->gid_tbl_len);
kfree(device->pkey_tbl_len);
mutex_unlock(&device_mutex);
ib_device_unregister_sysfs(device);
spin_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
kfree(context);
spin_unlock_irqrestore(&device->client_data_lock, flags);
device->reg_state = IB_DEV_UNREGISTERED;
}
EXPORT_SYMBOL(ib_unregister_device);
/**
* ib_register_client - Register an IB client
* @client:Client to register
*
* Upper level users of the IB drivers can use ib_register_client() to
* register callbacks for IB device addition and removal. When an IB
* device is added, each registered client's add method will be called
* (in the order the clients were registered), and when a device is
* removed, each client's remove method will be called (in the reverse
* order that clients were registered). In addition, when
* ib_register_client() is called, the client will receive an add
* callback for all devices already registered.
*/
int ib_register_client(struct ib_client *client)
{
struct ib_device *device;
mutex_lock(&device_mutex);
list_add_tail(&client->list, &client_list);
list_for_each_entry(device, &device_list, core_list)
if (client->add && !add_client_context(device, client))
client->add(device);
mutex_unlock(&device_mutex);
return 0;
}
EXPORT_SYMBOL(ib_register_client);
/**
* ib_unregister_client - Unregister an IB client
* @client:Client to unregister
*
* Upper level users use ib_unregister_client() to remove their client
* registration. When ib_unregister_client() is called, the client
* will receive a remove callback for each IB device still registered.
*/
void ib_unregister_client(struct ib_client *client)
{
struct ib_client_data *context, *tmp;
struct ib_device *device;
unsigned long flags;
mutex_lock(&device_mutex);
list_for_each_entry(device, &device_list, core_list) {
if (client->remove)
client->remove(device);
spin_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
if (context->client == client) {
list_del(&context->list);
kfree(context);
}
spin_unlock_irqrestore(&device->client_data_lock, flags);
}
list_del(&client->list);
mutex_unlock(&device_mutex);
}
EXPORT_SYMBOL(ib_unregister_client);
/**
* ib_get_client_data - Get IB client context
* @device:Device to get context for
* @client:Client to get context for
*
* ib_get_client_data() returns client context set with
* ib_set_client_data().
*/
void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
{
struct ib_client_data *context;
void *ret = NULL;
unsigned long flags;
spin_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry(context, &device->client_data_list, list)
if (context->client == client) {
ret = context->data;
break;
}
spin_unlock_irqrestore(&device->client_data_lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_get_client_data);
/**
* ib_set_client_data - Set IB client context
* @device:Device to set context for
* @client:Client to set context for
* @data:Context to set
*
* ib_set_client_data() sets client context that can be retrieved with
* ib_get_client_data().
*/
void ib_set_client_data(struct ib_device *device, struct ib_client *client,
void *data)
{
struct ib_client_data *context;
unsigned long flags;
spin_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry(context, &device->client_data_list, list)
if (context->client == client) {
context->data = data;
goto out;
}
printk(KERN_WARNING "No client context found for %s/%s\n",
device->name, client->name);
out:
spin_unlock_irqrestore(&device->client_data_lock, flags);
}
EXPORT_SYMBOL(ib_set_client_data);
/**
* ib_register_event_handler - Register an IB event handler
* @event_handler:Handler to register
*
* ib_register_event_handler() registers an event handler that will be
* called back when asynchronous IB events occur (as defined in
* chapter 11 of the InfiniBand Architecture Specification). This
* callback may occur in interrupt context.
*/
int ib_register_event_handler (struct ib_event_handler *event_handler)
{
unsigned long flags;
spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
list_add_tail(&event_handler->list,
&event_handler->device->event_handler_list);
spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
return 0;
}
EXPORT_SYMBOL(ib_register_event_handler);
/**
* ib_unregister_event_handler - Unregister an event handler
* @event_handler:Handler to unregister
*
* Unregister an event handler registered with
* ib_register_event_handler().
*/
int ib_unregister_event_handler(struct ib_event_handler *event_handler)
{
unsigned long flags;
spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
list_del(&event_handler->list);
spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
return 0;
}
EXPORT_SYMBOL(ib_unregister_event_handler);
/**
* ib_dispatch_event - Dispatch an asynchronous event
* @event:Event to dispatch
*
* Low-level drivers must call ib_dispatch_event() to dispatch the
* event to all registered event handlers when an asynchronous event
* occurs.
*/
void ib_dispatch_event(struct ib_event *event)
{
unsigned long flags;
struct ib_event_handler *handler;
spin_lock_irqsave(&event->device->event_handler_lock, flags);
list_for_each_entry(handler, &event->device->event_handler_list, list)
handler->handler(handler, event);
spin_unlock_irqrestore(&event->device->event_handler_lock, flags);
}
EXPORT_SYMBOL(ib_dispatch_event);
/**
* ib_query_device - Query IB device attributes
* @device:Device to query
* @device_attr:Device attributes
*
* ib_query_device() returns the attributes of a device through the
* @device_attr pointer.
*/
int ib_query_device(struct ib_device *device,
struct ib_device_attr *device_attr)
{
return device->query_device(device, device_attr);
}
EXPORT_SYMBOL(ib_query_device);
/**
* ib_query_port - Query IB port attributes
* @device:Device to query
* @port_num:Port number to query
* @port_attr:Port attributes
*
* ib_query_port() returns the attributes of a port through the
* @port_attr pointer.
*/
int ib_query_port(struct ib_device *device,
u8 port_num,
struct ib_port_attr *port_attr)
{
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
return device->query_port(device, port_num, port_attr);
}
EXPORT_SYMBOL(ib_query_port);
/**
* ib_query_gid - Get GID table entry
* @device:Device to query
* @port_num:Port number to query
* @index:GID table index to query
* @gid:Returned GID
*
* ib_query_gid() fetches the specified GID table entry.
*/
int ib_query_gid(struct ib_device *device,
u8 port_num, int index, union ib_gid *gid)
{
return device->query_gid(device, port_num, index, gid);
}
EXPORT_SYMBOL(ib_query_gid);
/**
* ib_query_pkey - Get P_Key table entry
* @device:Device to query
* @port_num:Port number to query
* @index:P_Key table index to query
* @pkey:Returned P_Key
*
* ib_query_pkey() fetches the specified P_Key table entry.
*/
int ib_query_pkey(struct ib_device *device,
u8 port_num, u16 index, u16 *pkey)
{
return device->query_pkey(device, port_num, index, pkey);
}
EXPORT_SYMBOL(ib_query_pkey);
/**
* ib_modify_device - Change IB device attributes
* @device:Device to modify
* @device_modify_mask:Mask of attributes to change
* @device_modify:New attribute values
*
* ib_modify_device() changes a device's attributes as specified by
* the @device_modify_mask and @device_modify structure.
*/
int ib_modify_device(struct ib_device *device,
int device_modify_mask,
struct ib_device_modify *device_modify)
{
if (!device->modify_device)
return -ENOSYS;
return device->modify_device(device, device_modify_mask,
device_modify);
}
EXPORT_SYMBOL(ib_modify_device);
/**
* ib_modify_port - Modifies the attributes for the specified port.
* @device: The device to modify.
* @port_num: The number of the port to modify.
* @port_modify_mask: Mask used to specify which attributes of the port
* to change.
* @port_modify: New attribute values for the port.
*
* ib_modify_port() changes a port's attributes as specified by the
* @port_modify_mask and @port_modify structure.
*/
int ib_modify_port(struct ib_device *device,
u8 port_num, int port_modify_mask,
struct ib_port_modify *port_modify)
{
if (!device->modify_port)
return -ENOSYS;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
return device->modify_port(device, port_num, port_modify_mask,
port_modify);
}
EXPORT_SYMBOL(ib_modify_port);
/**
* ib_find_gid - Returns the port number and GID table index where
* a specified GID value occurs.
* @device: The device to query.
* @gid: The GID value to search for.
* @port_num: The port number of the device where the GID value was found.
* @index: The index into the GID table where the GID was found. This
* parameter may be NULL.
*/
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
u8 *port_num, u16 *index)
{
union ib_gid tmp_gid;
int ret, port, i;
for (port = start_port(device); port <= end_port(device); ++port) {
for (i = 0; i < device->gid_tbl_len[port - start_port(device)]; ++i) {
ret = ib_query_gid(device, port, i, &tmp_gid);
if (ret)
return ret;
if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
*port_num = port;
if (index)
*index = i;
return 0;
}
}
}
return -ENOENT;
}
EXPORT_SYMBOL(ib_find_gid);
/**
* ib_find_pkey - Returns the PKey table index where a specified
* PKey value occurs.
* @device: The device to query.
* @port_num: The port number of the device to search for the PKey.
* @pkey: The PKey value to search for.
* @index: The index into the PKey table where the PKey was found.
*/
int ib_find_pkey(struct ib_device *device,
u8 port_num, u16 pkey, u16 *index)
{
int ret, i;
u16 tmp_pkey;
int partial_ix = -1;
for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) {
ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
if (ret)
return ret;
if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) {
/* if there is full-member pkey take it.*/
if (tmp_pkey & 0x8000) {
*index = i;
return 0;
}
if (partial_ix < 0)
partial_ix = i;
}
}
/*no full-member, if exists take the limited*/
if (partial_ix >= 0) {
*index = partial_ix;
return 0;
}
return -ENOENT;
}
EXPORT_SYMBOL(ib_find_pkey);
static int __init ib_core_init(void)
{
int ret;
ib_wq = alloc_workqueue("infiniband", 0, 0);
if (!ib_wq)
return -ENOMEM;
ret = ib_sysfs_setup();
if (ret) {
printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
goto err;
}
ret = ibnl_init();
if (ret) {
printk(KERN_WARNING "Couldn't init IB netlink interface\n");
goto err_sysfs;
}
ret = ib_cache_setup();
if (ret) {
printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n");
goto err_nl;
}
return 0;
err_nl:
ibnl_cleanup();
err_sysfs:
ib_sysfs_cleanup();
err:
destroy_workqueue(ib_wq);
return ret;
}
static void __exit ib_core_cleanup(void)
{
ib_cache_cleanup();
ibnl_cleanup();
ib_sysfs_cleanup();
/* Make sure that any pending umem accounting work is done. */
destroy_workqueue(ib_wq);
}
module_init(ib_core_init);
module_exit(ib_core_cleanup);

View file

@ -0,0 +1,544 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/errno.h>
#include <linux/spinlock.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/jhash.h>
#include <linux/kthread.h>
#include <rdma/ib_fmr_pool.h>
#include "core_priv.h"
#define PFX "fmr_pool: "
enum {
IB_FMR_MAX_REMAPS = 32,
IB_FMR_HASH_BITS = 8,
IB_FMR_HASH_SIZE = 1 << IB_FMR_HASH_BITS,
IB_FMR_HASH_MASK = IB_FMR_HASH_SIZE - 1
};
/*
* If an FMR is not in use, then the list member will point to either
* its pool's free_list (if the FMR can be mapped again; that is,
* remap_count < pool->max_remaps) or its pool's dirty_list (if the
* FMR needs to be unmapped before being remapped). In either of
* these cases it is a bug if the ref_count is not 0. In other words,
* if ref_count is > 0, then the list member must not be linked into
* either free_list or dirty_list.
*
* The cache_node member is used to link the FMR into a cache bucket
* (if caching is enabled). This is independent of the reference
* count of the FMR. When a valid FMR is released, its ref_count is
* decremented, and if ref_count reaches 0, the FMR is placed in
* either free_list or dirty_list as appropriate. However, it is not
* removed from the cache and may be "revived" if a call to
* ib_fmr_register_physical() occurs before the FMR is remapped. In
* this case we just increment the ref_count and remove the FMR from
* free_list/dirty_list.
*
* Before we remap an FMR from free_list, we remove it from the cache
* (to prevent another user from obtaining a stale FMR). When an FMR
* is released, we add it to the tail of the free list, so that our
* cache eviction policy is "least recently used."
*
* All manipulation of ref_count, list and cache_node is protected by
* pool_lock to maintain consistency.
*/
struct ib_fmr_pool {
spinlock_t pool_lock;
int pool_size;
int max_pages;
int max_remaps;
int dirty_watermark;
int dirty_len;
struct list_head free_list;
struct list_head dirty_list;
struct hlist_head *cache_bucket;
void (*flush_function)(struct ib_fmr_pool *pool,
void * arg);
void *flush_arg;
struct task_struct *thread;
atomic_t req_ser;
atomic_t flush_ser;
wait_queue_head_t force_wait;
};
static inline u32 ib_fmr_hash(u64 first_page)
{
return jhash_2words((u32) first_page, (u32) (first_page >> 32), 0) &
(IB_FMR_HASH_SIZE - 1);
}
/* Caller must hold pool_lock */
static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool,
u64 *page_list,
int page_list_len,
u64 io_virtual_address)
{
struct hlist_head *bucket;
struct ib_pool_fmr *fmr;
if (!pool->cache_bucket)
return NULL;
bucket = pool->cache_bucket + ib_fmr_hash(*page_list);
hlist_for_each_entry(fmr, bucket, cache_node)
if (io_virtual_address == fmr->io_virtual_address &&
page_list_len == fmr->page_list_len &&
!memcmp(page_list, fmr->page_list,
page_list_len * sizeof *page_list))
return fmr;
return NULL;
}
static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
{
int ret;
struct ib_pool_fmr *fmr;
LIST_HEAD(unmap_list);
LIST_HEAD(fmr_list);
spin_lock_irq(&pool->pool_lock);
list_for_each_entry(fmr, &pool->dirty_list, list) {
hlist_del_init(&fmr->cache_node);
fmr->remap_count = 0;
list_add_tail(&fmr->fmr->list, &fmr_list);
#ifdef DEBUG
if (fmr->ref_count !=0) {
printk(KERN_WARNING PFX "Unmapping FMR 0x%08x with ref count %d\n",
fmr, fmr->ref_count);
}
#endif
}
list_splice_init(&pool->dirty_list, &unmap_list);
pool->dirty_len = 0;
spin_unlock_irq(&pool->pool_lock);
if (list_empty(&unmap_list)) {
return;
}
ret = ib_unmap_fmr(&fmr_list);
if (ret)
printk(KERN_WARNING PFX "ib_unmap_fmr returned %d\n", ret);
spin_lock_irq(&pool->pool_lock);
list_splice(&unmap_list, &pool->free_list);
spin_unlock_irq(&pool->pool_lock);
}
static int ib_fmr_cleanup_thread(void *pool_ptr)
{
struct ib_fmr_pool *pool = pool_ptr;
do {
if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) {
ib_fmr_batch_release(pool);
atomic_inc(&pool->flush_ser);
wake_up_interruptible(&pool->force_wait);
if (pool->flush_function)
pool->flush_function(pool, pool->flush_arg);
}
set_current_state(TASK_INTERRUPTIBLE);
if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 &&
!kthread_should_stop())
schedule();
__set_current_state(TASK_RUNNING);
} while (!kthread_should_stop());
return 0;
}
/**
* ib_create_fmr_pool - Create an FMR pool
* @pd:Protection domain for FMRs
* @params:FMR pool parameters
*
* Create a pool of FMRs. Return value is pointer to new pool or
* error code if creation failed.
*/
struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
struct ib_fmr_pool_param *params)
{
struct ib_device *device;
struct ib_fmr_pool *pool;
struct ib_device_attr *attr;
int i;
int ret;
int max_remaps;
if (!params)
return ERR_PTR(-EINVAL);
device = pd->device;
if (!device->alloc_fmr || !device->dealloc_fmr ||
!device->map_phys_fmr || !device->unmap_fmr) {
printk(KERN_INFO PFX "Device %s does not support FMRs\n",
device->name);
return ERR_PTR(-ENOSYS);
}
attr = kmalloc(sizeof *attr, GFP_KERNEL);
if (!attr) {
printk(KERN_WARNING PFX "couldn't allocate device attr struct\n");
return ERR_PTR(-ENOMEM);
}
ret = ib_query_device(device, attr);
if (ret) {
printk(KERN_WARNING PFX "couldn't query device: %d\n", ret);
kfree(attr);
return ERR_PTR(ret);
}
if (!attr->max_map_per_fmr)
max_remaps = IB_FMR_MAX_REMAPS;
else
max_remaps = attr->max_map_per_fmr;
kfree(attr);
pool = kmalloc(sizeof *pool, GFP_KERNEL);
if (!pool) {
printk(KERN_WARNING PFX "couldn't allocate pool struct\n");
return ERR_PTR(-ENOMEM);
}
pool->cache_bucket = NULL;
pool->flush_function = params->flush_function;
pool->flush_arg = params->flush_arg;
INIT_LIST_HEAD(&pool->free_list);
INIT_LIST_HEAD(&pool->dirty_list);
if (params->cache) {
pool->cache_bucket =
kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket,
GFP_KERNEL);
if (!pool->cache_bucket) {
printk(KERN_WARNING PFX "Failed to allocate cache in pool\n");
ret = -ENOMEM;
goto out_free_pool;
}
for (i = 0; i < IB_FMR_HASH_SIZE; ++i)
INIT_HLIST_HEAD(pool->cache_bucket + i);
}
pool->pool_size = 0;
pool->max_pages = params->max_pages_per_fmr;
pool->max_remaps = max_remaps;
pool->dirty_watermark = params->dirty_watermark;
pool->dirty_len = 0;
spin_lock_init(&pool->pool_lock);
atomic_set(&pool->req_ser, 0);
atomic_set(&pool->flush_ser, 0);
init_waitqueue_head(&pool->force_wait);
pool->thread = kthread_run(ib_fmr_cleanup_thread,
pool,
"ib_fmr(%s)",
device->name);
if (IS_ERR(pool->thread)) {
printk(KERN_WARNING PFX "couldn't start cleanup thread\n");
ret = PTR_ERR(pool->thread);
goto out_free_pool;
}
{
struct ib_pool_fmr *fmr;
struct ib_fmr_attr fmr_attr = {
.max_pages = params->max_pages_per_fmr,
.max_maps = pool->max_remaps,
.page_shift = params->page_shift
};
int bytes_per_fmr = sizeof *fmr;
if (pool->cache_bucket)
bytes_per_fmr += params->max_pages_per_fmr * sizeof (u64);
for (i = 0; i < params->pool_size; ++i) {
fmr = kmalloc(bytes_per_fmr, GFP_KERNEL);
if (!fmr) {
printk(KERN_WARNING PFX "failed to allocate fmr "
"struct for FMR %d\n", i);
goto out_fail;
}
fmr->pool = pool;
fmr->remap_count = 0;
fmr->ref_count = 0;
INIT_HLIST_NODE(&fmr->cache_node);
fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr);
if (IS_ERR(fmr->fmr)) {
printk(KERN_WARNING PFX "fmr_create failed "
"for FMR %d\n", i);
kfree(fmr);
goto out_fail;
}
list_add_tail(&fmr->list, &pool->free_list);
++pool->pool_size;
}
}
return pool;
out_free_pool:
kfree(pool->cache_bucket);
kfree(pool);
return ERR_PTR(ret);
out_fail:
ib_destroy_fmr_pool(pool);
return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL(ib_create_fmr_pool);
/**
* ib_destroy_fmr_pool - Free FMR pool
* @pool:FMR pool to free
*
* Destroy an FMR pool and free all associated resources.
*/
void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
{
struct ib_pool_fmr *fmr;
struct ib_pool_fmr *tmp;
LIST_HEAD(fmr_list);
int i;
kthread_stop(pool->thread);
ib_fmr_batch_release(pool);
i = 0;
list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) {
if (fmr->remap_count) {
INIT_LIST_HEAD(&fmr_list);
list_add_tail(&fmr->fmr->list, &fmr_list);
ib_unmap_fmr(&fmr_list);
}
ib_dealloc_fmr(fmr->fmr);
list_del(&fmr->list);
kfree(fmr);
++i;
}
if (i < pool->pool_size)
printk(KERN_WARNING PFX "pool still has %d regions registered\n",
pool->pool_size - i);
kfree(pool->cache_bucket);
kfree(pool);
}
EXPORT_SYMBOL(ib_destroy_fmr_pool);
/**
* ib_flush_fmr_pool - Invalidate all unmapped FMRs
* @pool:FMR pool to flush
*
* Ensure that all unmapped FMRs are fully invalidated.
*/
int ib_flush_fmr_pool(struct ib_fmr_pool *pool)
{
int serial;
struct ib_pool_fmr *fmr, *next;
/*
* The free_list holds FMRs that may have been used
* but have not been remapped enough times to be dirty.
* Put them on the dirty list now so that the cleanup
* thread will reap them too.
*/
spin_lock_irq(&pool->pool_lock);
list_for_each_entry_safe(fmr, next, &pool->free_list, list) {
if (fmr->remap_count > 0)
list_move(&fmr->list, &pool->dirty_list);
}
spin_unlock_irq(&pool->pool_lock);
serial = atomic_inc_return(&pool->req_ser);
wake_up_process(pool->thread);
if (wait_event_interruptible(pool->force_wait,
atomic_read(&pool->flush_ser) - serial >= 0))
return -EINTR;
return 0;
}
EXPORT_SYMBOL(ib_flush_fmr_pool);
/**
* ib_fmr_pool_map_phys -
* @pool:FMR pool to allocate FMR from
* @page_list:List of pages to map
* @list_len:Number of pages in @page_list
* @io_virtual_address:I/O virtual address for new FMR
*
* Map an FMR from an FMR pool.
*/
struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
u64 *page_list,
int list_len,
u64 io_virtual_address)
{
struct ib_fmr_pool *pool = pool_handle;
struct ib_pool_fmr *fmr;
unsigned long flags;
int result;
if (list_len < 1 || list_len > pool->max_pages)
return ERR_PTR(-EINVAL);
spin_lock_irqsave(&pool->pool_lock, flags);
fmr = ib_fmr_cache_lookup(pool,
page_list,
list_len,
io_virtual_address);
if (fmr) {
/* found in cache */
++fmr->ref_count;
if (fmr->ref_count == 1) {
list_del(&fmr->list);
}
spin_unlock_irqrestore(&pool->pool_lock, flags);
return fmr;
}
if (list_empty(&pool->free_list)) {
spin_unlock_irqrestore(&pool->pool_lock, flags);
return ERR_PTR(-EAGAIN);
}
fmr = list_entry(pool->free_list.next, struct ib_pool_fmr, list);
list_del(&fmr->list);
hlist_del_init(&fmr->cache_node);
spin_unlock_irqrestore(&pool->pool_lock, flags);
result = ib_map_phys_fmr(fmr->fmr, page_list, list_len,
io_virtual_address);
if (result) {
spin_lock_irqsave(&pool->pool_lock, flags);
list_add(&fmr->list, &pool->free_list);
spin_unlock_irqrestore(&pool->pool_lock, flags);
printk(KERN_WARNING PFX "fmr_map returns %d\n", result);
return ERR_PTR(result);
}
++fmr->remap_count;
fmr->ref_count = 1;
if (pool->cache_bucket) {
fmr->io_virtual_address = io_virtual_address;
fmr->page_list_len = list_len;
memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list));
spin_lock_irqsave(&pool->pool_lock, flags);
hlist_add_head(&fmr->cache_node,
pool->cache_bucket + ib_fmr_hash(fmr->page_list[0]));
spin_unlock_irqrestore(&pool->pool_lock, flags);
}
return fmr;
}
EXPORT_SYMBOL(ib_fmr_pool_map_phys);
/**
* ib_fmr_pool_unmap - Unmap FMR
* @fmr:FMR to unmap
*
* Unmap an FMR. The FMR mapping may remain valid until the FMR is
* reused (or until ib_flush_fmr_pool() is called).
*/
int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
{
struct ib_fmr_pool *pool;
unsigned long flags;
pool = fmr->pool;
spin_lock_irqsave(&pool->pool_lock, flags);
--fmr->ref_count;
if (!fmr->ref_count) {
if (fmr->remap_count < pool->max_remaps) {
list_add_tail(&fmr->list, &pool->free_list);
} else {
list_add_tail(&fmr->list, &pool->dirty_list);
if (++pool->dirty_len >= pool->dirty_watermark) {
atomic_inc(&pool->req_ser);
wake_up_process(pool->thread);
}
}
}
#ifdef DEBUG
if (fmr->ref_count < 0)
printk(KERN_WARNING PFX "FMR %p has ref count %d < 0\n",
fmr, fmr->ref_count);
#endif
spin_unlock_irqrestore(&pool->pool_lock, flags);
return 0;
}
EXPORT_SYMBOL(ib_fmr_pool_unmap);

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,62 @@
/*
* Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef IWCM_H
#define IWCM_H
enum iw_cm_state {
IW_CM_STATE_IDLE, /* unbound, inactive */
IW_CM_STATE_LISTEN, /* listen waiting for connect */
IW_CM_STATE_CONN_RECV, /* inbound waiting for user accept */
IW_CM_STATE_CONN_SENT, /* outbound waiting for peer accept */
IW_CM_STATE_ESTABLISHED, /* established */
IW_CM_STATE_CLOSING, /* disconnect */
IW_CM_STATE_DESTROYING /* object being deleted */
};
struct iwcm_id_private {
struct iw_cm_id id;
enum iw_cm_state state;
unsigned long flags;
struct ib_qp *qp;
struct completion destroy_comp;
wait_queue_head_t connect_wait;
struct list_head work_list;
spinlock_t lock;
atomic_t refcount;
struct list_head work_free_list;
};
#define IWCM_F_CALLBACK_DESTROY 1
#define IWCM_F_CONNECT_WAIT 2
#endif /* IWCM_H */

View file

@ -0,0 +1,685 @@
/*
* Copyright (c) 2014 Intel Corporation. All rights reserved.
* Copyright (c) 2014 Chelsio, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "iwpm_util.h"
static const char iwpm_ulib_name[] = "iWarpPortMapperUser";
static int iwpm_ulib_version = 3;
static int iwpm_user_pid = IWPM_PID_UNDEFINED;
static atomic_t echo_nlmsg_seq;
int iwpm_valid_pid(void)
{
return iwpm_user_pid > 0;
}
EXPORT_SYMBOL(iwpm_valid_pid);
/*
* iwpm_register_pid - Send a netlink query to user space
* for the iwarp port mapper pid
*
* nlmsg attributes:
* [IWPM_NLA_REG_PID_SEQ]
* [IWPM_NLA_REG_IF_NAME]
* [IWPM_NLA_REG_IBDEV_NAME]
* [IWPM_NLA_REG_ULIB_NAME]
*/
int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
{
struct sk_buff *skb = NULL;
struct iwpm_nlmsg_request *nlmsg_request = NULL;
struct nlmsghdr *nlh;
u32 msg_seq;
const char *err_str = "";
int ret = -EINVAL;
if (!iwpm_valid_client(nl_client)) {
err_str = "Invalid port mapper client";
goto pid_query_error;
}
if (iwpm_registered_client(nl_client))
return 0;
skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REG_PID, &nlh, nl_client);
if (!skb) {
err_str = "Unable to create a nlmsg";
goto pid_query_error;
}
nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq, nl_client, GFP_KERNEL);
if (!nlmsg_request) {
err_str = "Unable to allocate netlink request";
goto pid_query_error;
}
msg_seq = atomic_read(&echo_nlmsg_seq);
/* fill in the pid request message */
err_str = "Unable to put attribute of the nlmsg";
ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_REG_PID_SEQ);
if (ret)
goto pid_query_error;
ret = ibnl_put_attr(skb, nlh, IWPM_IFNAME_SIZE,
pm_msg->if_name, IWPM_NLA_REG_IF_NAME);
if (ret)
goto pid_query_error;
ret = ibnl_put_attr(skb, nlh, IWPM_DEVNAME_SIZE,
pm_msg->dev_name, IWPM_NLA_REG_IBDEV_NAME);
if (ret)
goto pid_query_error;
ret = ibnl_put_attr(skb, nlh, IWPM_ULIBNAME_SIZE,
(char *)iwpm_ulib_name, IWPM_NLA_REG_ULIB_NAME);
if (ret)
goto pid_query_error;
pr_debug("%s: Multicasting a nlmsg (dev = %s ifname = %s iwpm = %s)\n",
__func__, pm_msg->dev_name, pm_msg->if_name, iwpm_ulib_name);
ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_IWPM, GFP_KERNEL);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
iwpm_set_registered(nl_client, 1);
iwpm_user_pid = IWPM_PID_UNAVAILABLE;
err_str = "Unable to send a nlmsg";
goto pid_query_error;
}
nlmsg_request->req_buffer = pm_msg;
ret = iwpm_wait_complete_req(nlmsg_request);
return ret;
pid_query_error:
pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
if (skb)
dev_kfree_skb(skb);
if (nlmsg_request)
iwpm_free_nlmsg_request(&nlmsg_request->kref);
return ret;
}
EXPORT_SYMBOL(iwpm_register_pid);
/*
* iwpm_add_mapping - Send a netlink add mapping message
* to the port mapper
* nlmsg attributes:
* [IWPM_NLA_MANAGE_MAPPING_SEQ]
* [IWPM_NLA_MANAGE_ADDR]
*/
int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
{
struct sk_buff *skb = NULL;
struct iwpm_nlmsg_request *nlmsg_request = NULL;
struct nlmsghdr *nlh;
u32 msg_seq;
const char *err_str = "";
int ret = -EINVAL;
if (!iwpm_valid_client(nl_client)) {
err_str = "Invalid port mapper client";
goto add_mapping_error;
}
if (!iwpm_registered_client(nl_client)) {
err_str = "Unregistered port mapper client";
goto add_mapping_error;
}
if (!iwpm_valid_pid())
return 0;
skb = iwpm_create_nlmsg(RDMA_NL_IWPM_ADD_MAPPING, &nlh, nl_client);
if (!skb) {
err_str = "Unable to create a nlmsg";
goto add_mapping_error;
}
nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq, nl_client, GFP_KERNEL);
if (!nlmsg_request) {
err_str = "Unable to allocate netlink request";
goto add_mapping_error;
}
msg_seq = atomic_read(&echo_nlmsg_seq);
/* fill in the add mapping message */
err_str = "Unable to put attribute of the nlmsg";
ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq,
IWPM_NLA_MANAGE_MAPPING_SEQ);
if (ret)
goto add_mapping_error;
ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
&pm_msg->loc_addr, IWPM_NLA_MANAGE_ADDR);
if (ret)
goto add_mapping_error;
nlmsg_request->req_buffer = pm_msg;
ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
iwpm_user_pid = IWPM_PID_UNDEFINED;
err_str = "Unable to send a nlmsg";
goto add_mapping_error;
}
ret = iwpm_wait_complete_req(nlmsg_request);
return ret;
add_mapping_error:
pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
if (skb)
dev_kfree_skb(skb);
if (nlmsg_request)
iwpm_free_nlmsg_request(&nlmsg_request->kref);
return ret;
}
EXPORT_SYMBOL(iwpm_add_mapping);
/*
* iwpm_add_and_query_mapping - Send a netlink add and query
* mapping message to the port mapper
* nlmsg attributes:
* [IWPM_NLA_QUERY_MAPPING_SEQ]
* [IWPM_NLA_QUERY_LOCAL_ADDR]
* [IWPM_NLA_QUERY_REMOTE_ADDR]
*/
int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
{
struct sk_buff *skb = NULL;
struct iwpm_nlmsg_request *nlmsg_request = NULL;
struct nlmsghdr *nlh;
u32 msg_seq;
const char *err_str = "";
int ret = -EINVAL;
if (!iwpm_valid_client(nl_client)) {
err_str = "Invalid port mapper client";
goto query_mapping_error;
}
if (!iwpm_registered_client(nl_client)) {
err_str = "Unregistered port mapper client";
goto query_mapping_error;
}
if (!iwpm_valid_pid())
return 0;
ret = -ENOMEM;
skb = iwpm_create_nlmsg(RDMA_NL_IWPM_QUERY_MAPPING, &nlh, nl_client);
if (!skb) {
err_str = "Unable to create a nlmsg";
goto query_mapping_error;
}
nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq,
nl_client, GFP_KERNEL);
if (!nlmsg_request) {
err_str = "Unable to allocate netlink request";
goto query_mapping_error;
}
msg_seq = atomic_read(&echo_nlmsg_seq);
/* fill in the query message */
err_str = "Unable to put attribute of the nlmsg";
ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq,
IWPM_NLA_QUERY_MAPPING_SEQ);
if (ret)
goto query_mapping_error;
ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
&pm_msg->loc_addr, IWPM_NLA_QUERY_LOCAL_ADDR);
if (ret)
goto query_mapping_error;
ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
&pm_msg->rem_addr, IWPM_NLA_QUERY_REMOTE_ADDR);
if (ret)
goto query_mapping_error;
nlmsg_request->req_buffer = pm_msg;
ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
err_str = "Unable to send a nlmsg";
goto query_mapping_error;
}
ret = iwpm_wait_complete_req(nlmsg_request);
return ret;
query_mapping_error:
pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
if (skb)
dev_kfree_skb(skb);
if (nlmsg_request)
iwpm_free_nlmsg_request(&nlmsg_request->kref);
return ret;
}
EXPORT_SYMBOL(iwpm_add_and_query_mapping);
/*
* iwpm_remove_mapping - Send a netlink remove mapping message
* to the port mapper
* nlmsg attributes:
* [IWPM_NLA_MANAGE_MAPPING_SEQ]
* [IWPM_NLA_MANAGE_ADDR]
*/
int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client)
{
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
u32 msg_seq;
const char *err_str = "";
int ret = -EINVAL;
if (!iwpm_valid_client(nl_client)) {
err_str = "Invalid port mapper client";
goto remove_mapping_error;
}
if (!iwpm_registered_client(nl_client)) {
err_str = "Unregistered port mapper client";
goto remove_mapping_error;
}
if (!iwpm_valid_pid())
return 0;
skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REMOVE_MAPPING, &nlh, nl_client);
if (!skb) {
ret = -ENOMEM;
err_str = "Unable to create a nlmsg";
goto remove_mapping_error;
}
msg_seq = atomic_read(&echo_nlmsg_seq);
nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
err_str = "Unable to put attribute of the nlmsg";
ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq,
IWPM_NLA_MANAGE_MAPPING_SEQ);
if (ret)
goto remove_mapping_error;
ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
local_addr, IWPM_NLA_MANAGE_ADDR);
if (ret)
goto remove_mapping_error;
ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
iwpm_user_pid = IWPM_PID_UNDEFINED;
err_str = "Unable to send a nlmsg";
goto remove_mapping_error;
}
iwpm_print_sockaddr(local_addr,
"remove_mapping: Local sockaddr:");
return 0;
remove_mapping_error:
pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
if (skb)
dev_kfree_skb_any(skb);
return ret;
}
EXPORT_SYMBOL(iwpm_remove_mapping);
/* netlink attribute policy for the received response to register pid request */
static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = {
[IWPM_NLA_RREG_PID_SEQ] = { .type = NLA_U32 },
[IWPM_NLA_RREG_IBDEV_NAME] = { .type = NLA_STRING,
.len = IWPM_DEVNAME_SIZE - 1 },
[IWPM_NLA_RREG_ULIB_NAME] = { .type = NLA_STRING,
.len = IWPM_ULIBNAME_SIZE - 1 },
[IWPM_NLA_RREG_ULIB_VER] = { .type = NLA_U16 },
[IWPM_NLA_RREG_PID_ERR] = { .type = NLA_U16 }
};
/*
* iwpm_register_pid_cb - Process a port mapper response to
* iwpm_register_pid()
*/
int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb)
{
struct iwpm_nlmsg_request *nlmsg_request = NULL;
struct nlattr *nltb[IWPM_NLA_RREG_PID_MAX];
struct iwpm_dev_data *pm_msg;
char *dev_name, *iwpm_name;
u32 msg_seq;
u8 nl_client;
u16 iwpm_version;
const char *msg_type = "Register Pid response";
if (iwpm_parse_nlmsg(cb, IWPM_NLA_RREG_PID_MAX,
resp_reg_policy, nltb, msg_type))
return -EINVAL;
msg_seq = nla_get_u32(nltb[IWPM_NLA_RREG_PID_SEQ]);
nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
if (!nlmsg_request) {
pr_info("%s: Could not find a matching request (seq = %u)\n",
__func__, msg_seq);
return -EINVAL;
}
pm_msg = nlmsg_request->req_buffer;
nl_client = nlmsg_request->nl_client;
dev_name = (char *)nla_data(nltb[IWPM_NLA_RREG_IBDEV_NAME]);
iwpm_name = (char *)nla_data(nltb[IWPM_NLA_RREG_ULIB_NAME]);
iwpm_version = nla_get_u16(nltb[IWPM_NLA_RREG_ULIB_VER]);
/* check device name, ulib name and version */
if (strcmp(pm_msg->dev_name, dev_name) ||
strcmp(iwpm_ulib_name, iwpm_name) ||
iwpm_version != iwpm_ulib_version) {
pr_info("%s: Incorrect info (dev = %s name = %s version = %d)\n",
__func__, dev_name, iwpm_name, iwpm_version);
nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
goto register_pid_response_exit;
}
iwpm_user_pid = cb->nlh->nlmsg_pid;
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n",
__func__, iwpm_user_pid);
if (iwpm_valid_client(nl_client))
iwpm_set_registered(nl_client, 1);
register_pid_response_exit:
nlmsg_request->request_done = 1;
/* always for found nlmsg_request */
kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
barrier();
wake_up(&nlmsg_request->waitq);
return 0;
}
EXPORT_SYMBOL(iwpm_register_pid_cb);
/* netlink attribute policy for the received response to add mapping request */
static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = {
[IWPM_NLA_MANAGE_MAPPING_SEQ] = { .type = NLA_U32 },
[IWPM_NLA_MANAGE_ADDR] = { .len = sizeof(struct sockaddr_storage) },
[IWPM_NLA_MANAGE_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) },
[IWPM_NLA_RMANAGE_MAPPING_ERR] = { .type = NLA_U16 }
};
/*
* iwpm_add_mapping_cb - Process a port mapper response to
* iwpm_add_mapping()
*/
int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb)
{
struct iwpm_sa_data *pm_msg;
struct iwpm_nlmsg_request *nlmsg_request = NULL;
struct nlattr *nltb[IWPM_NLA_RMANAGE_MAPPING_MAX];
struct sockaddr_storage *local_sockaddr;
struct sockaddr_storage *mapped_sockaddr;
const char *msg_type;
u32 msg_seq;
msg_type = "Add Mapping response";
if (iwpm_parse_nlmsg(cb, IWPM_NLA_RMANAGE_MAPPING_MAX,
resp_add_policy, nltb, msg_type))
return -EINVAL;
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
msg_seq = nla_get_u32(nltb[IWPM_NLA_MANAGE_MAPPING_SEQ]);
nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
if (!nlmsg_request) {
pr_info("%s: Could not find a matching request (seq = %u)\n",
__func__, msg_seq);
return -EINVAL;
}
pm_msg = nlmsg_request->req_buffer;
local_sockaddr = (struct sockaddr_storage *)
nla_data(nltb[IWPM_NLA_MANAGE_ADDR]);
mapped_sockaddr = (struct sockaddr_storage *)
nla_data(nltb[IWPM_NLA_MANAGE_MAPPED_LOC_ADDR]);
if (iwpm_compare_sockaddr(local_sockaddr, &pm_msg->loc_addr)) {
nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
goto add_mapping_response_exit;
}
if (mapped_sockaddr->ss_family != local_sockaddr->ss_family) {
pr_info("%s: Sockaddr family doesn't match the requested one\n",
__func__);
nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
goto add_mapping_response_exit;
}
memcpy(&pm_msg->mapped_loc_addr, mapped_sockaddr,
sizeof(*mapped_sockaddr));
iwpm_print_sockaddr(&pm_msg->loc_addr,
"add_mapping: Local sockaddr:");
iwpm_print_sockaddr(&pm_msg->mapped_loc_addr,
"add_mapping: Mapped local sockaddr:");
add_mapping_response_exit:
nlmsg_request->request_done = 1;
/* always for found request */
kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
barrier();
wake_up(&nlmsg_request->waitq);
return 0;
}
EXPORT_SYMBOL(iwpm_add_mapping_cb);
/* netlink attribute policy for the response to add and query mapping request */
static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = {
[IWPM_NLA_QUERY_MAPPING_SEQ] = { .type = NLA_U32 },
[IWPM_NLA_QUERY_LOCAL_ADDR] = { .len = sizeof(struct sockaddr_storage) },
[IWPM_NLA_QUERY_REMOTE_ADDR] = { .len = sizeof(struct sockaddr_storage) },
[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) },
[IWPM_NLA_RQUERY_MAPPED_REM_ADDR] = { .len = sizeof(struct sockaddr_storage) },
[IWPM_NLA_RQUERY_MAPPING_ERR] = { .type = NLA_U16 }
};
/*
* iwpm_add_and_query_mapping_cb - Process a port mapper response to
* iwpm_add_and_query_mapping()
*/
int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
struct netlink_callback *cb)
{
struct iwpm_sa_data *pm_msg;
struct iwpm_nlmsg_request *nlmsg_request = NULL;
struct nlattr *nltb[IWPM_NLA_RQUERY_MAPPING_MAX];
struct sockaddr_storage *local_sockaddr, *remote_sockaddr;
struct sockaddr_storage *mapped_loc_sockaddr, *mapped_rem_sockaddr;
const char *msg_type;
u32 msg_seq;
u16 err_code;
msg_type = "Query Mapping response";
if (iwpm_parse_nlmsg(cb, IWPM_NLA_RQUERY_MAPPING_MAX,
resp_query_policy, nltb, msg_type))
return -EINVAL;
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
msg_seq = nla_get_u32(nltb[IWPM_NLA_QUERY_MAPPING_SEQ]);
nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
if (!nlmsg_request) {
pr_info("%s: Could not find a matching request (seq = %u)\n",
__func__, msg_seq);
return -EINVAL;
}
pm_msg = nlmsg_request->req_buffer;
local_sockaddr = (struct sockaddr_storage *)
nla_data(nltb[IWPM_NLA_QUERY_LOCAL_ADDR]);
remote_sockaddr = (struct sockaddr_storage *)
nla_data(nltb[IWPM_NLA_QUERY_REMOTE_ADDR]);
mapped_loc_sockaddr = (struct sockaddr_storage *)
nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR]);
mapped_rem_sockaddr = (struct sockaddr_storage *)
nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_REM_ADDR]);
err_code = nla_get_u16(nltb[IWPM_NLA_RQUERY_MAPPING_ERR]);
if (err_code == IWPM_REMOTE_QUERY_REJECT) {
pr_info("%s: Received a Reject (pid = %u, echo seq = %u)\n",
__func__, cb->nlh->nlmsg_pid, msg_seq);
nlmsg_request->err_code = IWPM_REMOTE_QUERY_REJECT;
}
if (iwpm_compare_sockaddr(local_sockaddr, &pm_msg->loc_addr) ||
iwpm_compare_sockaddr(remote_sockaddr, &pm_msg->rem_addr)) {
pr_info("%s: Incorrect local sockaddr\n", __func__);
nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
goto query_mapping_response_exit;
}
if (mapped_loc_sockaddr->ss_family != local_sockaddr->ss_family ||
mapped_rem_sockaddr->ss_family != remote_sockaddr->ss_family) {
pr_info("%s: Sockaddr family doesn't match the requested one\n",
__func__);
nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
goto query_mapping_response_exit;
}
memcpy(&pm_msg->mapped_loc_addr, mapped_loc_sockaddr,
sizeof(*mapped_loc_sockaddr));
memcpy(&pm_msg->mapped_rem_addr, mapped_rem_sockaddr,
sizeof(*mapped_rem_sockaddr));
iwpm_print_sockaddr(&pm_msg->loc_addr,
"query_mapping: Local sockaddr:");
iwpm_print_sockaddr(&pm_msg->mapped_loc_addr,
"query_mapping: Mapped local sockaddr:");
iwpm_print_sockaddr(&pm_msg->rem_addr,
"query_mapping: Remote sockaddr:");
iwpm_print_sockaddr(&pm_msg->mapped_rem_addr,
"query_mapping: Mapped remote sockaddr:");
query_mapping_response_exit:
nlmsg_request->request_done = 1;
/* always for found request */
kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
barrier();
wake_up(&nlmsg_request->waitq);
return 0;
}
EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb);
/* netlink attribute policy for the received request for mapping info */
static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = {
[IWPM_NLA_MAPINFO_ULIB_NAME] = { .type = NLA_STRING,
.len = IWPM_ULIBNAME_SIZE - 1 },
[IWPM_NLA_MAPINFO_ULIB_VER] = { .type = NLA_U16 }
};
/*
* iwpm_mapping_info_cb - Process a port mapper request for mapping info
*/
int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
{
struct nlattr *nltb[IWPM_NLA_MAPINFO_REQ_MAX];
const char *msg_type = "Mapping Info response";
int iwpm_pid;
u8 nl_client;
char *iwpm_name;
u16 iwpm_version;
int ret = -EINVAL;
if (iwpm_parse_nlmsg(cb, IWPM_NLA_MAPINFO_REQ_MAX,
resp_mapinfo_policy, nltb, msg_type)) {
pr_info("%s: Unable to parse nlmsg\n", __func__);
return ret;
}
iwpm_name = (char *)nla_data(nltb[IWPM_NLA_MAPINFO_ULIB_NAME]);
iwpm_version = nla_get_u16(nltb[IWPM_NLA_MAPINFO_ULIB_VER]);
if (strcmp(iwpm_ulib_name, iwpm_name) ||
iwpm_version != iwpm_ulib_version) {
pr_info("%s: Invalid port mapper name = %s version = %d\n",
__func__, iwpm_name, iwpm_version);
return ret;
}
nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
if (!iwpm_valid_client(nl_client)) {
pr_info("%s: Invalid port mapper client = %d\n",
__func__, nl_client);
return ret;
}
iwpm_set_registered(nl_client, 0);
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
if (!iwpm_mapinfo_available())
return 0;
iwpm_pid = cb->nlh->nlmsg_pid;
pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n",
__func__, iwpm_pid);
ret = iwpm_send_mapinfo(nl_client, iwpm_pid);
return ret;
}
EXPORT_SYMBOL(iwpm_mapping_info_cb);
/* netlink attribute policy for the received mapping info ack */
static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = {
[IWPM_NLA_MAPINFO_SEQ] = { .type = NLA_U32 },
[IWPM_NLA_MAPINFO_SEND_NUM] = { .type = NLA_U32 },
[IWPM_NLA_MAPINFO_ACK_NUM] = { .type = NLA_U32 }
};
/*
* iwpm_ack_mapping_info_cb - Process a port mapper ack for
* the provided mapping info records
*/
int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
{
struct nlattr *nltb[IWPM_NLA_MAPINFO_NUM_MAX];
u32 mapinfo_send, mapinfo_ack;
const char *msg_type = "Mapping Info Ack";
if (iwpm_parse_nlmsg(cb, IWPM_NLA_MAPINFO_NUM_MAX,
ack_mapinfo_policy, nltb, msg_type))
return -EINVAL;
mapinfo_send = nla_get_u32(nltb[IWPM_NLA_MAPINFO_SEND_NUM]);
mapinfo_ack = nla_get_u32(nltb[IWPM_NLA_MAPINFO_ACK_NUM]);
if (mapinfo_ack != mapinfo_send)
pr_info("%s: Invalid mapinfo number (sent = %u ack-ed = %u)\n",
__func__, mapinfo_send, mapinfo_ack);
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
return 0;
}
EXPORT_SYMBOL(iwpm_ack_mapping_info_cb);
/* netlink attribute policy for the received port mapper error message */
static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = {
[IWPM_NLA_ERR_SEQ] = { .type = NLA_U32 },
[IWPM_NLA_ERR_CODE] = { .type = NLA_U16 },
};
/*
* iwpm_mapping_error_cb - Process a port mapper error message
*/
int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb)
{
struct iwpm_nlmsg_request *nlmsg_request = NULL;
int nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
struct nlattr *nltb[IWPM_NLA_ERR_MAX];
u32 msg_seq;
u16 err_code;
const char *msg_type = "Mapping Error Msg";
if (iwpm_parse_nlmsg(cb, IWPM_NLA_ERR_MAX,
map_error_policy, nltb, msg_type))
return -EINVAL;
msg_seq = nla_get_u32(nltb[IWPM_NLA_ERR_SEQ]);
err_code = nla_get_u16(nltb[IWPM_NLA_ERR_CODE]);
pr_info("%s: Received msg seq = %u err code = %u client = %d\n",
__func__, msg_seq, err_code, nl_client);
/* look for nlmsg_request */
nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
if (!nlmsg_request) {
/* not all errors have associated requests */
pr_debug("Could not find matching req (seq = %u)\n", msg_seq);
return 0;
}
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
nlmsg_request->err_code = err_code;
nlmsg_request->request_done = 1;
/* always for found request */
kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
barrier();
wake_up(&nlmsg_request->waitq);
return 0;
}
EXPORT_SYMBOL(iwpm_mapping_error_cb);

View file

@ -0,0 +1,607 @@
/*
* Copyright (c) 2014 Chelsio, Inc. All rights reserved.
* Copyright (c) 2014 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "iwpm_util.h"
#define IWPM_HASH_BUCKET_SIZE 512
#define IWPM_HASH_BUCKET_MASK (IWPM_HASH_BUCKET_SIZE - 1)
static LIST_HEAD(iwpm_nlmsg_req_list);
static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock);
static struct hlist_head *iwpm_hash_bucket;
static DEFINE_SPINLOCK(iwpm_mapinfo_lock);
static DEFINE_MUTEX(iwpm_admin_lock);
static struct iwpm_admin_data iwpm_admin;
int iwpm_init(u8 nl_client)
{
if (iwpm_valid_client(nl_client))
return -EINVAL;
mutex_lock(&iwpm_admin_lock);
if (atomic_read(&iwpm_admin.refcount) == 0) {
iwpm_hash_bucket = kzalloc(IWPM_HASH_BUCKET_SIZE *
sizeof(struct hlist_head), GFP_KERNEL);
if (!iwpm_hash_bucket) {
mutex_unlock(&iwpm_admin_lock);
pr_err("%s Unable to create mapinfo hash table\n", __func__);
return -ENOMEM;
}
}
atomic_inc(&iwpm_admin.refcount);
mutex_unlock(&iwpm_admin_lock);
iwpm_set_valid(nl_client, 1);
return 0;
}
EXPORT_SYMBOL(iwpm_init);
static void free_hash_bucket(void);
int iwpm_exit(u8 nl_client)
{
if (!iwpm_valid_client(nl_client))
return -EINVAL;
mutex_lock(&iwpm_admin_lock);
if (atomic_read(&iwpm_admin.refcount) == 0) {
mutex_unlock(&iwpm_admin_lock);
pr_err("%s Incorrect usage - negative refcount\n", __func__);
return -EINVAL;
}
if (atomic_dec_and_test(&iwpm_admin.refcount)) {
free_hash_bucket();
pr_debug("%s: Mapinfo hash table is destroyed\n", __func__);
}
mutex_unlock(&iwpm_admin_lock);
iwpm_set_valid(nl_client, 0);
return 0;
}
EXPORT_SYMBOL(iwpm_exit);
static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage *,
struct sockaddr_storage *);
int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
struct sockaddr_storage *mapped_sockaddr,
u8 nl_client)
{
struct hlist_head *hash_bucket_head;
struct iwpm_mapping_info *map_info;
unsigned long flags;
if (!iwpm_valid_client(nl_client))
return -EINVAL;
map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL);
if (!map_info) {
pr_err("%s: Unable to allocate a mapping info\n", __func__);
return -ENOMEM;
}
memcpy(&map_info->local_sockaddr, local_sockaddr,
sizeof(struct sockaddr_storage));
memcpy(&map_info->mapped_sockaddr, mapped_sockaddr,
sizeof(struct sockaddr_storage));
map_info->nl_client = nl_client;
spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
if (iwpm_hash_bucket) {
hash_bucket_head = get_hash_bucket_head(
&map_info->local_sockaddr,
&map_info->mapped_sockaddr);
hlist_add_head(&map_info->hlist_node, hash_bucket_head);
}
spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
return 0;
}
EXPORT_SYMBOL(iwpm_create_mapinfo);
int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr,
struct sockaddr_storage *mapped_local_addr)
{
struct hlist_node *tmp_hlist_node;
struct hlist_head *hash_bucket_head;
struct iwpm_mapping_info *map_info = NULL;
unsigned long flags;
int ret = -EINVAL;
spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
if (iwpm_hash_bucket) {
hash_bucket_head = get_hash_bucket_head(
local_sockaddr,
mapped_local_addr);
hlist_for_each_entry_safe(map_info, tmp_hlist_node,
hash_bucket_head, hlist_node) {
if (!iwpm_compare_sockaddr(&map_info->mapped_sockaddr,
mapped_local_addr)) {
hlist_del_init(&map_info->hlist_node);
kfree(map_info);
ret = 0;
break;
}
}
}
spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
return ret;
}
EXPORT_SYMBOL(iwpm_remove_mapinfo);
static void free_hash_bucket(void)
{
struct hlist_node *tmp_hlist_node;
struct iwpm_mapping_info *map_info;
unsigned long flags;
int i;
/* remove all the mapinfo data from the list */
spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
hlist_for_each_entry_safe(map_info, tmp_hlist_node,
&iwpm_hash_bucket[i], hlist_node) {
hlist_del_init(&map_info->hlist_node);
kfree(map_info);
}
}
/* free the hash list */
kfree(iwpm_hash_bucket);
iwpm_hash_bucket = NULL;
spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
}
struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq,
u8 nl_client, gfp_t gfp)
{
struct iwpm_nlmsg_request *nlmsg_request = NULL;
unsigned long flags;
nlmsg_request = kzalloc(sizeof(struct iwpm_nlmsg_request), gfp);
if (!nlmsg_request) {
pr_err("%s Unable to allocate a nlmsg_request\n", __func__);
return NULL;
}
spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags);
list_add_tail(&nlmsg_request->inprocess_list, &iwpm_nlmsg_req_list);
spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags);
kref_init(&nlmsg_request->kref);
kref_get(&nlmsg_request->kref);
nlmsg_request->nlmsg_seq = nlmsg_seq;
nlmsg_request->nl_client = nl_client;
nlmsg_request->request_done = 0;
nlmsg_request->err_code = 0;
return nlmsg_request;
}
void iwpm_free_nlmsg_request(struct kref *kref)
{
struct iwpm_nlmsg_request *nlmsg_request;
unsigned long flags;
nlmsg_request = container_of(kref, struct iwpm_nlmsg_request, kref);
spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags);
list_del_init(&nlmsg_request->inprocess_list);
spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags);
if (!nlmsg_request->request_done)
pr_debug("%s Freeing incomplete nlmsg request (seq = %u).\n",
__func__, nlmsg_request->nlmsg_seq);
kfree(nlmsg_request);
}
struct iwpm_nlmsg_request *iwpm_find_nlmsg_request(__u32 echo_seq)
{
struct iwpm_nlmsg_request *nlmsg_request;
struct iwpm_nlmsg_request *found_request = NULL;
unsigned long flags;
spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags);
list_for_each_entry(nlmsg_request, &iwpm_nlmsg_req_list,
inprocess_list) {
if (nlmsg_request->nlmsg_seq == echo_seq) {
found_request = nlmsg_request;
kref_get(&nlmsg_request->kref);
break;
}
}
spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags);
return found_request;
}
int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request)
{
int ret;
init_waitqueue_head(&nlmsg_request->waitq);
ret = wait_event_timeout(nlmsg_request->waitq,
(nlmsg_request->request_done != 0), IWPM_NL_TIMEOUT);
if (!ret) {
ret = -EINVAL;
pr_info("%s: Timeout %d sec for netlink request (seq = %u)\n",
__func__, (IWPM_NL_TIMEOUT/HZ), nlmsg_request->nlmsg_seq);
} else {
ret = nlmsg_request->err_code;
}
kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
return ret;
}
int iwpm_get_nlmsg_seq(void)
{
return atomic_inc_return(&iwpm_admin.nlmsg_seq);
}
int iwpm_valid_client(u8 nl_client)
{
if (nl_client >= RDMA_NL_NUM_CLIENTS)
return 0;
return iwpm_admin.client_list[nl_client];
}
void iwpm_set_valid(u8 nl_client, int valid)
{
if (nl_client >= RDMA_NL_NUM_CLIENTS)
return;
iwpm_admin.client_list[nl_client] = valid;
}
/* valid client */
int iwpm_registered_client(u8 nl_client)
{
return iwpm_admin.reg_list[nl_client];
}
/* valid client */
void iwpm_set_registered(u8 nl_client, int reg)
{
iwpm_admin.reg_list[nl_client] = reg;
}
int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr,
struct sockaddr_storage *b_sockaddr)
{
if (a_sockaddr->ss_family != b_sockaddr->ss_family)
return 1;
if (a_sockaddr->ss_family == AF_INET) {
struct sockaddr_in *a4_sockaddr =
(struct sockaddr_in *)a_sockaddr;
struct sockaddr_in *b4_sockaddr =
(struct sockaddr_in *)b_sockaddr;
if (!memcmp(&a4_sockaddr->sin_addr,
&b4_sockaddr->sin_addr, sizeof(struct in_addr))
&& a4_sockaddr->sin_port == b4_sockaddr->sin_port)
return 0;
} else if (a_sockaddr->ss_family == AF_INET6) {
struct sockaddr_in6 *a6_sockaddr =
(struct sockaddr_in6 *)a_sockaddr;
struct sockaddr_in6 *b6_sockaddr =
(struct sockaddr_in6 *)b_sockaddr;
if (!memcmp(&a6_sockaddr->sin6_addr,
&b6_sockaddr->sin6_addr, sizeof(struct in6_addr))
&& a6_sockaddr->sin6_port == b6_sockaddr->sin6_port)
return 0;
} else {
pr_err("%s: Invalid sockaddr family\n", __func__);
}
return 1;
}
struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh,
int nl_client)
{
struct sk_buff *skb = NULL;
skb = dev_alloc_skb(NLMSG_GOODSIZE);
if (!skb) {
pr_err("%s Unable to allocate skb\n", __func__);
goto create_nlmsg_exit;
}
if (!(ibnl_put_msg(skb, nlh, 0, 0, nl_client, nl_op,
NLM_F_REQUEST))) {
pr_warn("%s: Unable to put the nlmsg header\n", __func__);
dev_kfree_skb(skb);
skb = NULL;
}
create_nlmsg_exit:
return skb;
}
int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max,
const struct nla_policy *nlmsg_policy,
struct nlattr *nltb[], const char *msg_type)
{
int nlh_len = 0;
int ret;
const char *err_str = "";
ret = nlmsg_validate(cb->nlh, nlh_len, policy_max-1, nlmsg_policy);
if (ret) {
err_str = "Invalid attribute";
goto parse_nlmsg_error;
}
ret = nlmsg_parse(cb->nlh, nlh_len, nltb, policy_max-1, nlmsg_policy);
if (ret) {
err_str = "Unable to parse the nlmsg";
goto parse_nlmsg_error;
}
ret = iwpm_validate_nlmsg_attr(nltb, policy_max);
if (ret) {
err_str = "Invalid NULL attribute";
goto parse_nlmsg_error;
}
return 0;
parse_nlmsg_error:
pr_warn("%s: %s (msg type %s ret = %d)\n",
__func__, err_str, msg_type, ret);
return ret;
}
void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg)
{
struct sockaddr_in6 *sockaddr_v6;
struct sockaddr_in *sockaddr_v4;
switch (sockaddr->ss_family) {
case AF_INET:
sockaddr_v4 = (struct sockaddr_in *)sockaddr;
pr_debug("%s IPV4 %pI4: %u(0x%04X)\n",
msg, &sockaddr_v4->sin_addr,
ntohs(sockaddr_v4->sin_port),
ntohs(sockaddr_v4->sin_port));
break;
case AF_INET6:
sockaddr_v6 = (struct sockaddr_in6 *)sockaddr;
pr_debug("%s IPV6 %pI6: %u(0x%04X)\n",
msg, &sockaddr_v6->sin6_addr,
ntohs(sockaddr_v6->sin6_port),
ntohs(sockaddr_v6->sin6_port));
break;
default:
break;
}
}
static u32 iwpm_ipv6_jhash(struct sockaddr_in6 *ipv6_sockaddr)
{
u32 ipv6_hash = jhash(&ipv6_sockaddr->sin6_addr, sizeof(struct in6_addr), 0);
u32 hash = jhash_2words(ipv6_hash, (__force u32) ipv6_sockaddr->sin6_port, 0);
return hash;
}
static u32 iwpm_ipv4_jhash(struct sockaddr_in *ipv4_sockaddr)
{
u32 ipv4_hash = jhash(&ipv4_sockaddr->sin_addr, sizeof(struct in_addr), 0);
u32 hash = jhash_2words(ipv4_hash, (__force u32) ipv4_sockaddr->sin_port, 0);
return hash;
}
static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage
*local_sockaddr,
struct sockaddr_storage
*mapped_sockaddr)
{
u32 local_hash, mapped_hash, hash;
if (local_sockaddr->ss_family == AF_INET) {
local_hash = iwpm_ipv4_jhash((struct sockaddr_in *) local_sockaddr);
mapped_hash = iwpm_ipv4_jhash((struct sockaddr_in *) mapped_sockaddr);
} else if (local_sockaddr->ss_family == AF_INET6) {
local_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) local_sockaddr);
mapped_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) mapped_sockaddr);
} else {
pr_err("%s: Invalid sockaddr family\n", __func__);
return NULL;
}
if (local_hash == mapped_hash) /* if port mapper isn't available */
hash = local_hash;
else
hash = jhash_2words(local_hash, mapped_hash, 0);
return &iwpm_hash_bucket[hash & IWPM_HASH_BUCKET_MASK];
}
static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid)
{
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
u32 msg_seq;
const char *err_str = "";
int ret = -EINVAL;
skb = iwpm_create_nlmsg(RDMA_NL_IWPM_MAPINFO_NUM, &nlh, nl_client);
if (!skb) {
err_str = "Unable to create a nlmsg";
goto mapinfo_num_error;
}
nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
msg_seq = 0;
err_str = "Unable to put attribute of mapinfo number nlmsg";
ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_MAPINFO_SEQ);
if (ret)
goto mapinfo_num_error;
ret = ibnl_put_attr(skb, nlh, sizeof(u32),
&mapping_num, IWPM_NLA_MAPINFO_SEND_NUM);
if (ret)
goto mapinfo_num_error;
ret = ibnl_unicast(skb, nlh, iwpm_pid);
if (ret) {
skb = NULL;
err_str = "Unable to send a nlmsg";
goto mapinfo_num_error;
}
pr_debug("%s: Sent mapping number = %d\n", __func__, mapping_num);
return 0;
mapinfo_num_error:
pr_info("%s: %s\n", __func__, err_str);
if (skb)
dev_kfree_skb(skb);
return ret;
}
static int send_nlmsg_done(struct sk_buff *skb, u8 nl_client, int iwpm_pid)
{
struct nlmsghdr *nlh = NULL;
int ret = 0;
if (!skb)
return ret;
if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client,
RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) {
pr_warn("%s Unable to put NLMSG_DONE\n", __func__);
return -ENOMEM;
}
nlh->nlmsg_type = NLMSG_DONE;
ret = ibnl_unicast(skb, (struct nlmsghdr *)skb->data, iwpm_pid);
if (ret)
pr_warn("%s Unable to send a nlmsg\n", __func__);
return ret;
}
int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid)
{
struct iwpm_mapping_info *map_info;
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
int skb_num = 0, mapping_num = 0;
int i = 0, nlmsg_bytes = 0;
unsigned long flags;
const char *err_str = "";
int ret;
skb = dev_alloc_skb(NLMSG_GOODSIZE);
if (!skb) {
ret = -ENOMEM;
err_str = "Unable to allocate skb";
goto send_mapping_info_exit;
}
skb_num++;
spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
hlist_for_each_entry(map_info, &iwpm_hash_bucket[i],
hlist_node) {
if (map_info->nl_client != nl_client)
continue;
nlh = NULL;
if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client,
RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) {
ret = -ENOMEM;
err_str = "Unable to put the nlmsg header";
goto send_mapping_info_unlock;
}
err_str = "Unable to put attribute of the nlmsg";
ret = ibnl_put_attr(skb, nlh,
sizeof(struct sockaddr_storage),
&map_info->local_sockaddr,
IWPM_NLA_MAPINFO_LOCAL_ADDR);
if (ret)
goto send_mapping_info_unlock;
ret = ibnl_put_attr(skb, nlh,
sizeof(struct sockaddr_storage),
&map_info->mapped_sockaddr,
IWPM_NLA_MAPINFO_MAPPED_ADDR);
if (ret)
goto send_mapping_info_unlock;
iwpm_print_sockaddr(&map_info->local_sockaddr,
"send_mapping_info: Local sockaddr:");
iwpm_print_sockaddr(&map_info->mapped_sockaddr,
"send_mapping_info: Mapped local sockaddr:");
mapping_num++;
nlmsg_bytes += nlh->nlmsg_len;
/* check if all mappings can fit in one skb */
if (NLMSG_GOODSIZE - nlmsg_bytes < nlh->nlmsg_len * 2) {
/* and leave room for NLMSG_DONE */
nlmsg_bytes = 0;
skb_num++;
spin_unlock_irqrestore(&iwpm_mapinfo_lock,
flags);
/* send the skb */
ret = send_nlmsg_done(skb, nl_client, iwpm_pid);
skb = NULL;
if (ret) {
err_str = "Unable to send map info";
goto send_mapping_info_exit;
}
if (skb_num == IWPM_MAPINFO_SKB_COUNT) {
ret = -ENOMEM;
err_str = "Insufficient skbs for map info";
goto send_mapping_info_exit;
}
skb = dev_alloc_skb(NLMSG_GOODSIZE);
if (!skb) {
ret = -ENOMEM;
err_str = "Unable to allocate skb";
goto send_mapping_info_exit;
}
spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
}
}
}
send_mapping_info_unlock:
spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
send_mapping_info_exit:
if (ret) {
pr_warn("%s: %s (ret = %d)\n", __func__, err_str, ret);
if (skb)
dev_kfree_skb(skb);
return ret;
}
send_nlmsg_done(skb, nl_client, iwpm_pid);
return send_mapinfo_num(mapping_num, nl_client, iwpm_pid);
}
int iwpm_mapinfo_available(void)
{
unsigned long flags;
int full_bucket = 0, i = 0;
spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
if (iwpm_hash_bucket) {
for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
if (!hlist_empty(&iwpm_hash_bucket[i])) {
full_bucket = 1;
break;
}
}
}
spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
return full_bucket;
}

View file

@ -0,0 +1,238 @@
/*
* Copyright (c) 2014 Intel Corporation. All rights reserved.
* Copyright (c) 2014 Chelsio, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _IWPM_UTIL_H
#define _IWPM_UTIL_H
#include <linux/module.h>
#include <linux/io.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/spinlock.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/delay.h>
#include <linux/workqueue.h>
#include <linux/mutex.h>
#include <linux/jhash.h>
#include <linux/kref.h>
#include <net/netlink.h>
#include <linux/errno.h>
#include <rdma/iw_portmap.h>
#include <rdma/rdma_netlink.h>
#define IWPM_NL_RETRANS 3
#define IWPM_NL_TIMEOUT (10*HZ)
#define IWPM_MAPINFO_SKB_COUNT 20
#define IWPM_PID_UNDEFINED -1
#define IWPM_PID_UNAVAILABLE -2
struct iwpm_nlmsg_request {
struct list_head inprocess_list;
__u32 nlmsg_seq;
void *req_buffer;
u8 nl_client;
u8 request_done;
u16 err_code;
wait_queue_head_t waitq;
struct kref kref;
};
struct iwpm_mapping_info {
struct hlist_node hlist_node;
struct sockaddr_storage local_sockaddr;
struct sockaddr_storage mapped_sockaddr;
u8 nl_client;
};
struct iwpm_admin_data {
atomic_t refcount;
atomic_t nlmsg_seq;
int client_list[RDMA_NL_NUM_CLIENTS];
int reg_list[RDMA_NL_NUM_CLIENTS];
};
/**
* iwpm_get_nlmsg_request - Allocate and initialize netlink message request
* @nlmsg_seq: Sequence number of the netlink message
* @nl_client: The index of the netlink client
* @gfp: Indicates how the memory for the request should be allocated
*
* Returns the newly allocated netlink request object if successful,
* otherwise returns NULL
*/
struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq,
u8 nl_client, gfp_t gfp);
/**
* iwpm_free_nlmsg_request - Deallocate netlink message request
* @kref: Holds reference of netlink message request
*/
void iwpm_free_nlmsg_request(struct kref *kref);
/**
* iwpm_find_nlmsg_request - Find netlink message request in the request list
* @echo_seq: Sequence number of the netlink request to find
*
* Returns the found netlink message request,
* if not found, returns NULL
*/
struct iwpm_nlmsg_request *iwpm_find_nlmsg_request(__u32 echo_seq);
/**
* iwpm_wait_complete_req - Block while servicing the netlink request
* @nlmsg_request: Netlink message request to service
*
* Wakes up, after the request is completed or expired
* Returns 0 if the request is complete without error
*/
int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request);
/**
* iwpm_get_nlmsg_seq - Get the sequence number for a netlink
* message to send to the port mapper
*
* Returns the sequence number for the netlink message.
*/
int iwpm_get_nlmsg_seq(void);
/**
* iwpm_valid_client - Check if the port mapper client is valid
* @nl_client: The index of the netlink client
*
* Valid clients need to call iwpm_init() before using
* the port mapper
*/
int iwpm_valid_client(u8 nl_client);
/**
* iwpm_set_valid - Set the port mapper client to valid or not
* @nl_client: The index of the netlink client
* @valid: 1 if valid or 0 if invalid
*/
void iwpm_set_valid(u8 nl_client, int valid);
/**
* iwpm_registered_client - Check if the port mapper client is registered
* @nl_client: The index of the netlink client
*
* Call iwpm_register_pid() to register a client
*/
int iwpm_registered_client(u8 nl_client);
/**
* iwpm_set_registered - Set the port mapper client to registered or not
* @nl_client: The index of the netlink client
* @reg: 1 if registered or 0 if not
*/
void iwpm_set_registered(u8 nl_client, int reg);
/**
* iwpm_send_mapinfo - Send local and mapped IPv4/IPv6 address info of
* a client to the user space port mapper
* @nl_client: The index of the netlink client
* @iwpm_pid: The pid of the user space port mapper
*
* If successful, returns the number of sent mapping info records
*/
int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid);
/**
* iwpm_mapinfo_available - Check if any mapping info records is available
* in the hash table
*
* Returns 1 if mapping information is available, otherwise returns 0
*/
int iwpm_mapinfo_available(void);
/**
* iwpm_compare_sockaddr - Compare two sockaddr storage structs
*
* Returns 0 if they are holding the same ip/tcp address info,
* otherwise returns 1
*/
int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr,
struct sockaddr_storage *b_sockaddr);
/**
* iwpm_validate_nlmsg_attr - Check for NULL netlink attributes
* @nltb: Holds address of each netlink message attributes
* @nla_count: Number of netlink message attributes
*
* Returns error if any of the nla_count attributes is NULL
*/
static inline int iwpm_validate_nlmsg_attr(struct nlattr *nltb[],
int nla_count)
{
int i;
for (i = 1; i < nla_count; i++) {
if (!nltb[i])
return -EINVAL;
}
return 0;
}
/**
* iwpm_create_nlmsg - Allocate skb and form a netlink message
* @nl_op: Netlink message opcode
* @nlh: Holds address of the netlink message header in skb
* @nl_client: The index of the netlink client
*
* Returns the newly allcated skb, or NULL if the tailroom of the skb
* is insufficient to store the message header and payload
*/
struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh,
int nl_client);
/**
* iwpm_parse_nlmsg - Validate and parse the received netlink message
* @cb: Netlink callback structure
* @policy_max: Maximum attribute type to be expected
* @nlmsg_policy: Validation policy
* @nltb: Array to store policy_max parsed elements
* @msg_type: Type of netlink message
*
* Returns 0 on success or a negative error code
*/
int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max,
const struct nla_policy *nlmsg_policy,
struct nlattr *nltb[], const char *msg_type);
/**
* iwpm_print_sockaddr - Print IPv4/IPv6 address and TCP port
* @sockaddr: Socket address to print
* @msg: Message to print
*/
void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg);
#endif

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,227 @@
/*
* Copyright (c) 2004, 2005, Voltaire, Inc. All rights reserved.
* Copyright (c) 2005 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2009 HNR Consulting. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef __IB_MAD_PRIV_H__
#define __IB_MAD_PRIV_H__
#include <linux/completion.h>
#include <linux/err.h>
#include <linux/workqueue.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_smi.h>
#define IB_MAD_QPS_CORE 2 /* Always QP0 and QP1 as a minimum */
/* QP and CQ parameters */
#define IB_MAD_QP_SEND_SIZE 128
#define IB_MAD_QP_RECV_SIZE 512
#define IB_MAD_QP_MIN_SIZE 64
#define IB_MAD_QP_MAX_SIZE 8192
#define IB_MAD_SEND_REQ_MAX_SG 2
#define IB_MAD_RECV_REQ_MAX_SG 1
#define IB_MAD_SEND_Q_PSN 0
/* Registration table sizes */
#define MAX_MGMT_CLASS 80
#define MAX_MGMT_VERSION 8
#define MAX_MGMT_OUI 8
#define MAX_MGMT_VENDOR_RANGE2 (IB_MGMT_CLASS_VENDOR_RANGE2_END - \
IB_MGMT_CLASS_VENDOR_RANGE2_START + 1)
struct ib_mad_list_head {
struct list_head list;
struct ib_mad_queue *mad_queue;
};
struct ib_mad_private_header {
struct ib_mad_list_head mad_list;
struct ib_mad_recv_wc recv_wc;
struct ib_wc wc;
u64 mapping;
} __attribute__ ((packed));
struct ib_mad_private {
struct ib_mad_private_header header;
struct ib_grh grh;
union {
struct ib_mad mad;
struct ib_rmpp_mad rmpp_mad;
struct ib_smp smp;
} mad;
} __attribute__ ((packed));
struct ib_rmpp_segment {
struct list_head list;
u32 num;
u8 data[0];
};
struct ib_mad_agent_private {
struct list_head agent_list;
struct ib_mad_agent agent;
struct ib_mad_reg_req *reg_req;
struct ib_mad_qp_info *qp_info;
spinlock_t lock;
struct list_head send_list;
struct list_head wait_list;
struct list_head done_list;
struct delayed_work timed_work;
unsigned long timeout;
struct list_head local_list;
struct work_struct local_work;
struct list_head rmpp_list;
atomic_t refcount;
struct completion comp;
};
struct ib_mad_snoop_private {
struct ib_mad_agent agent;
struct ib_mad_qp_info *qp_info;
int snoop_index;
int mad_snoop_flags;
atomic_t refcount;
struct completion comp;
};
struct ib_mad_send_wr_private {
struct ib_mad_list_head mad_list;
struct list_head agent_list;
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_buf send_buf;
u64 header_mapping;
u64 payload_mapping;
struct ib_send_wr send_wr;
struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
__be64 tid;
unsigned long timeout;
int max_retries;
int retries_left;
int retry;
int refcount;
enum ib_wc_status status;
/* RMPP control */
struct list_head rmpp_list;
struct ib_rmpp_segment *last_ack_seg;
struct ib_rmpp_segment *cur_seg;
int last_ack;
int seg_num;
int newwin;
int pad;
};
struct ib_mad_local_private {
struct list_head completion_list;
struct ib_mad_private *mad_priv;
struct ib_mad_agent_private *recv_mad_agent;
struct ib_mad_send_wr_private *mad_send_wr;
};
struct ib_mad_mgmt_method_table {
struct ib_mad_agent_private *agent[IB_MGMT_MAX_METHODS];
};
struct ib_mad_mgmt_class_table {
struct ib_mad_mgmt_method_table *method_table[MAX_MGMT_CLASS];
};
struct ib_mad_mgmt_vendor_class {
u8 oui[MAX_MGMT_OUI][3];
struct ib_mad_mgmt_method_table *method_table[MAX_MGMT_OUI];
};
struct ib_mad_mgmt_vendor_class_table {
struct ib_mad_mgmt_vendor_class *vendor_class[MAX_MGMT_VENDOR_RANGE2];
};
struct ib_mad_mgmt_version_table {
struct ib_mad_mgmt_class_table *class;
struct ib_mad_mgmt_vendor_class_table *vendor;
};
struct ib_mad_queue {
spinlock_t lock;
struct list_head list;
int count;
int max_active;
struct ib_mad_qp_info *qp_info;
};
struct ib_mad_qp_info {
struct ib_mad_port_private *port_priv;
struct ib_qp *qp;
struct ib_mad_queue send_queue;
struct ib_mad_queue recv_queue;
struct list_head overflow_list;
spinlock_t snoop_lock;
struct ib_mad_snoop_private **snoop_table;
int snoop_table_size;
atomic_t snoop_count;
};
struct ib_mad_port_private {
struct list_head port_list;
struct ib_device *device;
int port_num;
struct ib_cq *cq;
struct ib_pd *pd;
struct ib_mr *mr;
spinlock_t reg_lock;
struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION];
struct list_head agent_list;
struct workqueue_struct *wq;
struct work_struct work;
struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
};
int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr);
struct ib_mad_send_wr_private *
ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv,
struct ib_mad_recv_wc *mad_recv_wc);
void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
struct ib_mad_send_wc *mad_send_wc);
void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr);
void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
int timeout_ms);
#endif /* __IB_MAD_PRIV_H__ */

View file

@ -0,0 +1,953 @@
/*
* Copyright (c) 2005 Intel Inc. All rights reserved.
* Copyright (c) 2005-2006 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/slab.h>
#include "mad_priv.h"
#include "mad_rmpp.h"
enum rmpp_state {
RMPP_STATE_ACTIVE,
RMPP_STATE_TIMEOUT,
RMPP_STATE_COMPLETE,
RMPP_STATE_CANCELING
};
struct mad_rmpp_recv {
struct ib_mad_agent_private *agent;
struct list_head list;
struct delayed_work timeout_work;
struct delayed_work cleanup_work;
struct completion comp;
enum rmpp_state state;
spinlock_t lock;
atomic_t refcount;
struct ib_ah *ah;
struct ib_mad_recv_wc *rmpp_wc;
struct ib_mad_recv_buf *cur_seg_buf;
int last_ack;
int seg_num;
int newwin;
int repwin;
__be64 tid;
u32 src_qp;
u16 slid;
u8 mgmt_class;
u8 class_version;
u8 method;
};
static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
{
if (atomic_dec_and_test(&rmpp_recv->refcount))
complete(&rmpp_recv->comp);
}
static void destroy_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
{
deref_rmpp_recv(rmpp_recv);
wait_for_completion(&rmpp_recv->comp);
ib_destroy_ah(rmpp_recv->ah);
kfree(rmpp_recv);
}
void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent)
{
struct mad_rmpp_recv *rmpp_recv, *temp_rmpp_recv;
unsigned long flags;
spin_lock_irqsave(&agent->lock, flags);
list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
if (rmpp_recv->state != RMPP_STATE_COMPLETE)
ib_free_recv_mad(rmpp_recv->rmpp_wc);
rmpp_recv->state = RMPP_STATE_CANCELING;
}
spin_unlock_irqrestore(&agent->lock, flags);
list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
cancel_delayed_work(&rmpp_recv->timeout_work);
cancel_delayed_work(&rmpp_recv->cleanup_work);
}
flush_workqueue(agent->qp_info->port_priv->wq);
list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv,
&agent->rmpp_list, list) {
list_del(&rmpp_recv->list);
destroy_rmpp_recv(rmpp_recv);
}
}
static void format_ack(struct ib_mad_send_buf *msg,
struct ib_rmpp_mad *data,
struct mad_rmpp_recv *rmpp_recv)
{
struct ib_rmpp_mad *ack = msg->mad;
unsigned long flags;
memcpy(ack, &data->mad_hdr, msg->hdr_len);
ack->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
ack->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ACK;
ib_set_rmpp_flags(&ack->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
spin_lock_irqsave(&rmpp_recv->lock, flags);
rmpp_recv->last_ack = rmpp_recv->seg_num;
ack->rmpp_hdr.seg_num = cpu_to_be32(rmpp_recv->seg_num);
ack->rmpp_hdr.paylen_newwin = cpu_to_be32(rmpp_recv->newwin);
spin_unlock_irqrestore(&rmpp_recv->lock, flags);
}
static void ack_recv(struct mad_rmpp_recv *rmpp_recv,
struct ib_mad_recv_wc *recv_wc)
{
struct ib_mad_send_buf *msg;
int ret, hdr_len;
hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp,
recv_wc->wc->pkey_index, 1, hdr_len,
0, GFP_KERNEL);
if (IS_ERR(msg))
return;
format_ack(msg, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv);
msg->ah = rmpp_recv->ah;
ret = ib_post_send_mad(msg, NULL);
if (ret)
ib_free_send_mad(msg);
}
static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
struct ib_mad_recv_wc *recv_wc)
{
struct ib_mad_send_buf *msg;
struct ib_ah *ah;
int hdr_len;
ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc,
recv_wc->recv_buf.grh, agent->port_num);
if (IS_ERR(ah))
return (void *) ah;
hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
msg = ib_create_send_mad(agent, recv_wc->wc->src_qp,
recv_wc->wc->pkey_index, 1,
hdr_len, 0, GFP_KERNEL);
if (IS_ERR(msg))
ib_destroy_ah(ah);
else {
msg->ah = ah;
msg->context[0] = ah;
}
return msg;
}
static void ack_ds_ack(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *recv_wc)
{
struct ib_mad_send_buf *msg;
struct ib_rmpp_mad *rmpp_mad;
int ret;
msg = alloc_response_msg(&agent->agent, recv_wc);
if (IS_ERR(msg))
return;
rmpp_mad = msg->mad;
memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len);
rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
rmpp_mad->rmpp_hdr.seg_num = 0;
rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(1);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
ib_destroy_ah(msg->ah);
ib_free_send_mad(msg);
}
}
void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
{
if (mad_send_wc->send_buf->context[0] == mad_send_wc->send_buf->ah)
ib_destroy_ah(mad_send_wc->send_buf->ah);
ib_free_send_mad(mad_send_wc->send_buf);
}
static void nack_recv(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *recv_wc, u8 rmpp_status)
{
struct ib_mad_send_buf *msg;
struct ib_rmpp_mad *rmpp_mad;
int ret;
msg = alloc_response_msg(&agent->agent, recv_wc);
if (IS_ERR(msg))
return;
rmpp_mad = msg->mad;
memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len);
rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
rmpp_mad->rmpp_hdr.rmpp_version = IB_MGMT_RMPP_VERSION;
rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ABORT;
ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
rmpp_mad->rmpp_hdr.rmpp_status = rmpp_status;
rmpp_mad->rmpp_hdr.seg_num = 0;
rmpp_mad->rmpp_hdr.paylen_newwin = 0;
ret = ib_post_send_mad(msg, NULL);
if (ret) {
ib_destroy_ah(msg->ah);
ib_free_send_mad(msg);
}
}
static void recv_timeout_handler(struct work_struct *work)
{
struct mad_rmpp_recv *rmpp_recv =
container_of(work, struct mad_rmpp_recv, timeout_work.work);
struct ib_mad_recv_wc *rmpp_wc;
unsigned long flags;
spin_lock_irqsave(&rmpp_recv->agent->lock, flags);
if (rmpp_recv->state != RMPP_STATE_ACTIVE) {
spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
return;
}
rmpp_recv->state = RMPP_STATE_TIMEOUT;
list_del(&rmpp_recv->list);
spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
rmpp_wc = rmpp_recv->rmpp_wc;
nack_recv(rmpp_recv->agent, rmpp_wc, IB_MGMT_RMPP_STATUS_T2L);
destroy_rmpp_recv(rmpp_recv);
ib_free_recv_mad(rmpp_wc);
}
static void recv_cleanup_handler(struct work_struct *work)
{
struct mad_rmpp_recv *rmpp_recv =
container_of(work, struct mad_rmpp_recv, cleanup_work.work);
unsigned long flags;
spin_lock_irqsave(&rmpp_recv->agent->lock, flags);
if (rmpp_recv->state == RMPP_STATE_CANCELING) {
spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
return;
}
list_del(&rmpp_recv->list);
spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
destroy_rmpp_recv(rmpp_recv);
}
static struct mad_rmpp_recv *
create_rmpp_recv(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct mad_rmpp_recv *rmpp_recv;
struct ib_mad_hdr *mad_hdr;
rmpp_recv = kmalloc(sizeof *rmpp_recv, GFP_KERNEL);
if (!rmpp_recv)
return NULL;
rmpp_recv->ah = ib_create_ah_from_wc(agent->agent.qp->pd,
mad_recv_wc->wc,
mad_recv_wc->recv_buf.grh,
agent->agent.port_num);
if (IS_ERR(rmpp_recv->ah))
goto error;
rmpp_recv->agent = agent;
init_completion(&rmpp_recv->comp);
INIT_DELAYED_WORK(&rmpp_recv->timeout_work, recv_timeout_handler);
INIT_DELAYED_WORK(&rmpp_recv->cleanup_work, recv_cleanup_handler);
spin_lock_init(&rmpp_recv->lock);
rmpp_recv->state = RMPP_STATE_ACTIVE;
atomic_set(&rmpp_recv->refcount, 1);
rmpp_recv->rmpp_wc = mad_recv_wc;
rmpp_recv->cur_seg_buf = &mad_recv_wc->recv_buf;
rmpp_recv->newwin = 1;
rmpp_recv->seg_num = 1;
rmpp_recv->last_ack = 0;
rmpp_recv->repwin = 1;
mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr;
rmpp_recv->tid = mad_hdr->tid;
rmpp_recv->src_qp = mad_recv_wc->wc->src_qp;
rmpp_recv->slid = mad_recv_wc->wc->slid;
rmpp_recv->mgmt_class = mad_hdr->mgmt_class;
rmpp_recv->class_version = mad_hdr->class_version;
rmpp_recv->method = mad_hdr->method;
return rmpp_recv;
error: kfree(rmpp_recv);
return NULL;
}
static struct mad_rmpp_recv *
find_rmpp_recv(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct mad_rmpp_recv *rmpp_recv;
struct ib_mad_hdr *mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr;
list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
if (rmpp_recv->tid == mad_hdr->tid &&
rmpp_recv->src_qp == mad_recv_wc->wc->src_qp &&
rmpp_recv->slid == mad_recv_wc->wc->slid &&
rmpp_recv->mgmt_class == mad_hdr->mgmt_class &&
rmpp_recv->class_version == mad_hdr->class_version &&
rmpp_recv->method == mad_hdr->method)
return rmpp_recv;
}
return NULL;
}
static struct mad_rmpp_recv *
acquire_rmpp_recv(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct mad_rmpp_recv *rmpp_recv;
unsigned long flags;
spin_lock_irqsave(&agent->lock, flags);
rmpp_recv = find_rmpp_recv(agent, mad_recv_wc);
if (rmpp_recv)
atomic_inc(&rmpp_recv->refcount);
spin_unlock_irqrestore(&agent->lock, flags);
return rmpp_recv;
}
static struct mad_rmpp_recv *
insert_rmpp_recv(struct ib_mad_agent_private *agent,
struct mad_rmpp_recv *rmpp_recv)
{
struct mad_rmpp_recv *cur_rmpp_recv;
cur_rmpp_recv = find_rmpp_recv(agent, rmpp_recv->rmpp_wc);
if (!cur_rmpp_recv)
list_add_tail(&rmpp_recv->list, &agent->rmpp_list);
return cur_rmpp_recv;
}
static inline int get_last_flag(struct ib_mad_recv_buf *seg)
{
struct ib_rmpp_mad *rmpp_mad;
rmpp_mad = (struct ib_rmpp_mad *) seg->mad;
return ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_LAST;
}
static inline int get_seg_num(struct ib_mad_recv_buf *seg)
{
struct ib_rmpp_mad *rmpp_mad;
rmpp_mad = (struct ib_rmpp_mad *) seg->mad;
return be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num);
}
static inline struct ib_mad_recv_buf * get_next_seg(struct list_head *rmpp_list,
struct ib_mad_recv_buf *seg)
{
if (seg->list.next == rmpp_list)
return NULL;
return container_of(seg->list.next, struct ib_mad_recv_buf, list);
}
static inline int window_size(struct ib_mad_agent_private *agent)
{
return max(agent->qp_info->recv_queue.max_active >> 3, 1);
}
static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list,
int seg_num)
{
struct ib_mad_recv_buf *seg_buf;
int cur_seg_num;
list_for_each_entry_reverse(seg_buf, rmpp_list, list) {
cur_seg_num = get_seg_num(seg_buf);
if (seg_num > cur_seg_num)
return seg_buf;
if (seg_num == cur_seg_num)
break;
}
return NULL;
}
static void update_seg_num(struct mad_rmpp_recv *rmpp_recv,
struct ib_mad_recv_buf *new_buf)
{
struct list_head *rmpp_list = &rmpp_recv->rmpp_wc->rmpp_list;
while (new_buf && (get_seg_num(new_buf) == rmpp_recv->seg_num + 1)) {
rmpp_recv->cur_seg_buf = new_buf;
rmpp_recv->seg_num++;
new_buf = get_next_seg(rmpp_list, new_buf);
}
}
static inline int get_mad_len(struct mad_rmpp_recv *rmpp_recv)
{
struct ib_rmpp_mad *rmpp_mad;
int hdr_size, data_size, pad;
rmpp_mad = (struct ib_rmpp_mad *)rmpp_recv->cur_seg_buf->mad;
hdr_size = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class);
data_size = sizeof(struct ib_rmpp_mad) - hdr_size;
pad = IB_MGMT_RMPP_DATA - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
if (pad > IB_MGMT_RMPP_DATA || pad < 0)
pad = 0;
return hdr_size + rmpp_recv->seg_num * data_size - pad;
}
static struct ib_mad_recv_wc * complete_rmpp(struct mad_rmpp_recv *rmpp_recv)
{
struct ib_mad_recv_wc *rmpp_wc;
ack_recv(rmpp_recv, rmpp_recv->rmpp_wc);
if (rmpp_recv->seg_num > 1)
cancel_delayed_work(&rmpp_recv->timeout_work);
rmpp_wc = rmpp_recv->rmpp_wc;
rmpp_wc->mad_len = get_mad_len(rmpp_recv);
/* 10 seconds until we can find the packet lifetime */
queue_delayed_work(rmpp_recv->agent->qp_info->port_priv->wq,
&rmpp_recv->cleanup_work, msecs_to_jiffies(10000));
return rmpp_wc;
}
static struct ib_mad_recv_wc *
continue_rmpp(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct mad_rmpp_recv *rmpp_recv;
struct ib_mad_recv_buf *prev_buf;
struct ib_mad_recv_wc *done_wc;
int seg_num;
unsigned long flags;
rmpp_recv = acquire_rmpp_recv(agent, mad_recv_wc);
if (!rmpp_recv)
goto drop1;
seg_num = get_seg_num(&mad_recv_wc->recv_buf);
spin_lock_irqsave(&rmpp_recv->lock, flags);
if ((rmpp_recv->state == RMPP_STATE_TIMEOUT) ||
(seg_num > rmpp_recv->newwin))
goto drop3;
if ((seg_num <= rmpp_recv->last_ack) ||
(rmpp_recv->state == RMPP_STATE_COMPLETE)) {
spin_unlock_irqrestore(&rmpp_recv->lock, flags);
ack_recv(rmpp_recv, mad_recv_wc);
goto drop2;
}
prev_buf = find_seg_location(&rmpp_recv->rmpp_wc->rmpp_list, seg_num);
if (!prev_buf)
goto drop3;
done_wc = NULL;
list_add(&mad_recv_wc->recv_buf.list, &prev_buf->list);
if (rmpp_recv->cur_seg_buf == prev_buf) {
update_seg_num(rmpp_recv, &mad_recv_wc->recv_buf);
if (get_last_flag(rmpp_recv->cur_seg_buf)) {
rmpp_recv->state = RMPP_STATE_COMPLETE;
spin_unlock_irqrestore(&rmpp_recv->lock, flags);
done_wc = complete_rmpp(rmpp_recv);
goto out;
} else if (rmpp_recv->seg_num == rmpp_recv->newwin) {
rmpp_recv->newwin += window_size(agent);
spin_unlock_irqrestore(&rmpp_recv->lock, flags);
ack_recv(rmpp_recv, mad_recv_wc);
goto out;
}
}
spin_unlock_irqrestore(&rmpp_recv->lock, flags);
out:
deref_rmpp_recv(rmpp_recv);
return done_wc;
drop3: spin_unlock_irqrestore(&rmpp_recv->lock, flags);
drop2: deref_rmpp_recv(rmpp_recv);
drop1: ib_free_recv_mad(mad_recv_wc);
return NULL;
}
static struct ib_mad_recv_wc *
start_rmpp(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct mad_rmpp_recv *rmpp_recv;
unsigned long flags;
rmpp_recv = create_rmpp_recv(agent, mad_recv_wc);
if (!rmpp_recv) {
ib_free_recv_mad(mad_recv_wc);
return NULL;
}
spin_lock_irqsave(&agent->lock, flags);
if (insert_rmpp_recv(agent, rmpp_recv)) {
spin_unlock_irqrestore(&agent->lock, flags);
/* duplicate first MAD */
destroy_rmpp_recv(rmpp_recv);
return continue_rmpp(agent, mad_recv_wc);
}
atomic_inc(&rmpp_recv->refcount);
if (get_last_flag(&mad_recv_wc->recv_buf)) {
rmpp_recv->state = RMPP_STATE_COMPLETE;
spin_unlock_irqrestore(&agent->lock, flags);
complete_rmpp(rmpp_recv);
} else {
spin_unlock_irqrestore(&agent->lock, flags);
/* 40 seconds until we can find the packet lifetimes */
queue_delayed_work(agent->qp_info->port_priv->wq,
&rmpp_recv->timeout_work,
msecs_to_jiffies(40000));
rmpp_recv->newwin += window_size(agent);
ack_recv(rmpp_recv, mad_recv_wc);
mad_recv_wc = NULL;
}
deref_rmpp_recv(rmpp_recv);
return mad_recv_wc;
}
static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
{
struct ib_rmpp_mad *rmpp_mad;
int timeout;
u32 paylen = 0;
rmpp_mad = mad_send_wr->send_buf.mad;
ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(++mad_send_wr->seg_num);
if (mad_send_wr->seg_num == 1) {
rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_FIRST;
paylen = mad_send_wr->send_buf.seg_count * IB_MGMT_RMPP_DATA -
mad_send_wr->pad;
}
if (mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count) {
rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_LAST;
paylen = IB_MGMT_RMPP_DATA - mad_send_wr->pad;
}
rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen);
/* 2 seconds for an ACK until we can find the packet lifetime */
timeout = mad_send_wr->send_buf.timeout_ms;
if (!timeout || timeout > 2000)
mad_send_wr->timeout = msecs_to_jiffies(2000);
return ib_send_mad(mad_send_wr);
}
static void abort_send(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc, u8 rmpp_status)
{
struct ib_mad_send_wr_private *mad_send_wr;
struct ib_mad_send_wc wc;
unsigned long flags;
spin_lock_irqsave(&agent->lock, flags);
mad_send_wr = ib_find_send_mad(agent, mad_recv_wc);
if (!mad_send_wr)
goto out; /* Unmatched send */
if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
(!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
goto out; /* Send is already done */
ib_mark_mad_done(mad_send_wr);
spin_unlock_irqrestore(&agent->lock, flags);
wc.status = IB_WC_REM_ABORT_ERR;
wc.vendor_err = rmpp_status;
wc.send_buf = &mad_send_wr->send_buf;
ib_mad_complete_send_wr(mad_send_wr, &wc);
return;
out:
spin_unlock_irqrestore(&agent->lock, flags);
}
static inline void adjust_last_ack(struct ib_mad_send_wr_private *wr,
int seg_num)
{
struct list_head *list;
wr->last_ack = seg_num;
list = &wr->last_ack_seg->list;
list_for_each_entry(wr->last_ack_seg, list, list)
if (wr->last_ack_seg->num == seg_num)
break;
}
static void process_ds_ack(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc, int newwin)
{
struct mad_rmpp_recv *rmpp_recv;
rmpp_recv = find_rmpp_recv(agent, mad_recv_wc);
if (rmpp_recv && rmpp_recv->state == RMPP_STATE_COMPLETE)
rmpp_recv->repwin = newwin;
}
static void process_rmpp_ack(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_mad_send_wr_private *mad_send_wr;
struct ib_rmpp_mad *rmpp_mad;
unsigned long flags;
int seg_num, newwin, ret;
rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad;
if (rmpp_mad->rmpp_hdr.rmpp_status) {
abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
return;
}
seg_num = be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num);
newwin = be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
if (newwin < seg_num) {
abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S);
nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S);
return;
}
spin_lock_irqsave(&agent->lock, flags);
mad_send_wr = ib_find_send_mad(agent, mad_recv_wc);
if (!mad_send_wr) {
if (!seg_num)
process_ds_ack(agent, mad_recv_wc, newwin);
goto out; /* Unmatched or DS RMPP ACK */
}
if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) &&
(mad_send_wr->timeout)) {
spin_unlock_irqrestore(&agent->lock, flags);
ack_ds_ack(agent, mad_recv_wc);
return; /* Repeated ACK for DS RMPP transaction */
}
if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
(!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
goto out; /* Send is already done */
if (seg_num > mad_send_wr->send_buf.seg_count ||
seg_num > mad_send_wr->newwin) {
spin_unlock_irqrestore(&agent->lock, flags);
abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B);
nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B);
return;
}
if (newwin < mad_send_wr->newwin || seg_num < mad_send_wr->last_ack)
goto out; /* Old ACK */
if (seg_num > mad_send_wr->last_ack) {
adjust_last_ack(mad_send_wr, seg_num);
mad_send_wr->retries_left = mad_send_wr->max_retries;
}
mad_send_wr->newwin = newwin;
if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) {
/* If no response is expected, the ACK completes the send */
if (!mad_send_wr->send_buf.timeout_ms) {
struct ib_mad_send_wc wc;
ib_mark_mad_done(mad_send_wr);
spin_unlock_irqrestore(&agent->lock, flags);
wc.status = IB_WC_SUCCESS;
wc.vendor_err = 0;
wc.send_buf = &mad_send_wr->send_buf;
ib_mad_complete_send_wr(mad_send_wr, &wc);
return;
}
if (mad_send_wr->refcount == 1)
ib_reset_mad_timeout(mad_send_wr,
mad_send_wr->send_buf.timeout_ms);
spin_unlock_irqrestore(&agent->lock, flags);
ack_ds_ack(agent, mad_recv_wc);
return;
} else if (mad_send_wr->refcount == 1 &&
mad_send_wr->seg_num < mad_send_wr->newwin &&
mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) {
/* Send failure will just result in a timeout/retry */
ret = send_next_seg(mad_send_wr);
if (ret)
goto out;
mad_send_wr->refcount++;
list_move_tail(&mad_send_wr->agent_list,
&mad_send_wr->mad_agent_priv->send_list);
}
out:
spin_unlock_irqrestore(&agent->lock, flags);
}
static struct ib_mad_recv_wc *
process_rmpp_data(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_rmpp_hdr *rmpp_hdr;
u8 rmpp_status;
rmpp_hdr = &((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr;
if (rmpp_hdr->rmpp_status) {
rmpp_status = IB_MGMT_RMPP_STATUS_BAD_STATUS;
goto bad;
}
if (rmpp_hdr->seg_num == cpu_to_be32(1)) {
if (!(ib_get_rmpp_flags(rmpp_hdr) & IB_MGMT_RMPP_FLAG_FIRST)) {
rmpp_status = IB_MGMT_RMPP_STATUS_BAD_SEG;
goto bad;
}
return start_rmpp(agent, mad_recv_wc);
} else {
if (ib_get_rmpp_flags(rmpp_hdr) & IB_MGMT_RMPP_FLAG_FIRST) {
rmpp_status = IB_MGMT_RMPP_STATUS_BAD_SEG;
goto bad;
}
return continue_rmpp(agent, mad_recv_wc);
}
bad:
nack_recv(agent, mad_recv_wc, rmpp_status);
ib_free_recv_mad(mad_recv_wc);
return NULL;
}
static void process_rmpp_stop(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_rmpp_mad *rmpp_mad;
rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad;
if (rmpp_mad->rmpp_hdr.rmpp_status != IB_MGMT_RMPP_STATUS_RESX) {
abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
} else
abort_send(agent, mad_recv_wc, rmpp_mad->rmpp_hdr.rmpp_status);
}
static void process_rmpp_abort(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_rmpp_mad *rmpp_mad;
rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad;
if (rmpp_mad->rmpp_hdr.rmpp_status < IB_MGMT_RMPP_STATUS_ABORT_MIN ||
rmpp_mad->rmpp_hdr.rmpp_status > IB_MGMT_RMPP_STATUS_ABORT_MAX) {
abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
} else
abort_send(agent, mad_recv_wc, rmpp_mad->rmpp_hdr.rmpp_status);
}
struct ib_mad_recv_wc *
ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_rmpp_mad *rmpp_mad;
rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad;
if (!(rmpp_mad->rmpp_hdr.rmpp_rtime_flags & IB_MGMT_RMPP_FLAG_ACTIVE))
return mad_recv_wc;
if (rmpp_mad->rmpp_hdr.rmpp_version != IB_MGMT_RMPP_VERSION) {
abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV);
nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV);
goto out;
}
switch (rmpp_mad->rmpp_hdr.rmpp_type) {
case IB_MGMT_RMPP_TYPE_DATA:
return process_rmpp_data(agent, mad_recv_wc);
case IB_MGMT_RMPP_TYPE_ACK:
process_rmpp_ack(agent, mad_recv_wc);
break;
case IB_MGMT_RMPP_TYPE_STOP:
process_rmpp_stop(agent, mad_recv_wc);
break;
case IB_MGMT_RMPP_TYPE_ABORT:
process_rmpp_abort(agent, mad_recv_wc);
break;
default:
abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT);
nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT);
break;
}
out:
ib_free_recv_mad(mad_recv_wc);
return NULL;
}
static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr)
{
struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv;
struct ib_mad_hdr *mad_hdr = mad_send_wr->send_buf.mad;
struct mad_rmpp_recv *rmpp_recv;
struct ib_ah_attr ah_attr;
unsigned long flags;
int newwin = 1;
if (!(mad_hdr->method & IB_MGMT_METHOD_RESP))
goto out;
spin_lock_irqsave(&agent->lock, flags);
list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
if (rmpp_recv->tid != mad_hdr->tid ||
rmpp_recv->mgmt_class != mad_hdr->mgmt_class ||
rmpp_recv->class_version != mad_hdr->class_version ||
(rmpp_recv->method & IB_MGMT_METHOD_RESP))
continue;
if (ib_query_ah(mad_send_wr->send_buf.ah, &ah_attr))
continue;
if (rmpp_recv->slid == ah_attr.dlid) {
newwin = rmpp_recv->repwin;
break;
}
}
spin_unlock_irqrestore(&agent->lock, flags);
out:
return newwin;
}
int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
{
struct ib_rmpp_mad *rmpp_mad;
int ret;
rmpp_mad = mad_send_wr->send_buf.mad;
if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
IB_MGMT_RMPP_FLAG_ACTIVE))
return IB_RMPP_RESULT_UNHANDLED;
if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) {
mad_send_wr->seg_num = 1;
return IB_RMPP_RESULT_INTERNAL;
}
mad_send_wr->newwin = init_newwin(mad_send_wr);
/* We need to wait for the final ACK even if there isn't a response */
mad_send_wr->refcount += (mad_send_wr->timeout == 0);
ret = send_next_seg(mad_send_wr);
if (!ret)
return IB_RMPP_RESULT_CONSUMED;
return ret;
}
int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
struct ib_mad_send_wc *mad_send_wc)
{
struct ib_rmpp_mad *rmpp_mad;
int ret;
rmpp_mad = mad_send_wr->send_buf.mad;
if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
IB_MGMT_RMPP_FLAG_ACTIVE))
return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */
if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA)
return IB_RMPP_RESULT_INTERNAL; /* ACK, STOP, or ABORT */
if (mad_send_wc->status != IB_WC_SUCCESS ||
mad_send_wr->status != IB_WC_SUCCESS)
return IB_RMPP_RESULT_PROCESSED; /* Canceled or send error */
if (!mad_send_wr->timeout)
return IB_RMPP_RESULT_PROCESSED; /* Response received */
if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) {
mad_send_wr->timeout =
msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
return IB_RMPP_RESULT_PROCESSED; /* Send done */
}
if (mad_send_wr->seg_num == mad_send_wr->newwin ||
mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count)
return IB_RMPP_RESULT_PROCESSED; /* Wait for ACK */
ret = send_next_seg(mad_send_wr);
if (ret) {
mad_send_wc->status = IB_WC_GENERAL_ERR;
return IB_RMPP_RESULT_PROCESSED;
}
return IB_RMPP_RESULT_CONSUMED;
}
int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr)
{
struct ib_rmpp_mad *rmpp_mad;
int ret;
rmpp_mad = mad_send_wr->send_buf.mad;
if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
IB_MGMT_RMPP_FLAG_ACTIVE))
return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */
if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count)
return IB_RMPP_RESULT_PROCESSED;
mad_send_wr->seg_num = mad_send_wr->last_ack;
mad_send_wr->cur_seg = mad_send_wr->last_ack_seg;
ret = send_next_seg(mad_send_wr);
if (ret)
return IB_RMPP_RESULT_PROCESSED;
return IB_RMPP_RESULT_CONSUMED;
}

View file

@ -0,0 +1,58 @@
/*
* Copyright (c) 2005 Intel Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef __MAD_RMPP_H__
#define __MAD_RMPP_H__
enum {
IB_RMPP_RESULT_PROCESSED,
IB_RMPP_RESULT_CONSUMED,
IB_RMPP_RESULT_INTERNAL,
IB_RMPP_RESULT_UNHANDLED
};
int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr);
struct ib_mad_recv_wc *
ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc);
int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
struct ib_mad_send_wc *mad_send_wc);
void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc);
void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent);
int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr);
#endif /* __MAD_RMPP_H__ */

View file

@ -0,0 +1,898 @@
/*
* Copyright (c) 2006 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/completion.h>
#include <linux/dma-mapping.h>
#include <linux/err.h>
#include <linux/interrupt.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/bitops.h>
#include <linux/random.h>
#include <rdma/ib_cache.h>
#include "sa.h"
static void mcast_add_one(struct ib_device *device);
static void mcast_remove_one(struct ib_device *device);
static struct ib_client mcast_client = {
.name = "ib_multicast",
.add = mcast_add_one,
.remove = mcast_remove_one
};
static struct ib_sa_client sa_client;
static struct workqueue_struct *mcast_wq;
static union ib_gid mgid0;
struct mcast_device;
struct mcast_port {
struct mcast_device *dev;
spinlock_t lock;
struct rb_root table;
atomic_t refcount;
struct completion comp;
u8 port_num;
};
struct mcast_device {
struct ib_device *device;
struct ib_event_handler event_handler;
int start_port;
int end_port;
struct mcast_port port[0];
};
enum mcast_state {
MCAST_JOINING,
MCAST_MEMBER,
MCAST_ERROR,
};
enum mcast_group_state {
MCAST_IDLE,
MCAST_BUSY,
MCAST_GROUP_ERROR,
MCAST_PKEY_EVENT
};
enum {
MCAST_INVALID_PKEY_INDEX = 0xFFFF
};
struct mcast_member;
struct mcast_group {
struct ib_sa_mcmember_rec rec;
struct rb_node node;
struct mcast_port *port;
spinlock_t lock;
struct work_struct work;
struct list_head pending_list;
struct list_head active_list;
struct mcast_member *last_join;
int members[3];
atomic_t refcount;
enum mcast_group_state state;
struct ib_sa_query *query;
int query_id;
u16 pkey_index;
u8 leave_state;
int retries;
};
struct mcast_member {
struct ib_sa_multicast multicast;
struct ib_sa_client *client;
struct mcast_group *group;
struct list_head list;
enum mcast_state state;
atomic_t refcount;
struct completion comp;
};
static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
void *context);
static void leave_handler(int status, struct ib_sa_mcmember_rec *rec,
void *context);
static struct mcast_group *mcast_find(struct mcast_port *port,
union ib_gid *mgid)
{
struct rb_node *node = port->table.rb_node;
struct mcast_group *group;
int ret;
while (node) {
group = rb_entry(node, struct mcast_group, node);
ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid);
if (!ret)
return group;
if (ret < 0)
node = node->rb_left;
else
node = node->rb_right;
}
return NULL;
}
static struct mcast_group *mcast_insert(struct mcast_port *port,
struct mcast_group *group,
int allow_duplicates)
{
struct rb_node **link = &port->table.rb_node;
struct rb_node *parent = NULL;
struct mcast_group *cur_group;
int ret;
while (*link) {
parent = *link;
cur_group = rb_entry(parent, struct mcast_group, node);
ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw,
sizeof group->rec.mgid);
if (ret < 0)
link = &(*link)->rb_left;
else if (ret > 0)
link = &(*link)->rb_right;
else if (allow_duplicates)
link = &(*link)->rb_left;
else
return cur_group;
}
rb_link_node(&group->node, parent, link);
rb_insert_color(&group->node, &port->table);
return NULL;
}
static void deref_port(struct mcast_port *port)
{
if (atomic_dec_and_test(&port->refcount))
complete(&port->comp);
}
static void release_group(struct mcast_group *group)
{
struct mcast_port *port = group->port;
unsigned long flags;
spin_lock_irqsave(&port->lock, flags);
if (atomic_dec_and_test(&group->refcount)) {
rb_erase(&group->node, &port->table);
spin_unlock_irqrestore(&port->lock, flags);
kfree(group);
deref_port(port);
} else
spin_unlock_irqrestore(&port->lock, flags);
}
static void deref_member(struct mcast_member *member)
{
if (atomic_dec_and_test(&member->refcount))
complete(&member->comp);
}
static void queue_join(struct mcast_member *member)
{
struct mcast_group *group = member->group;
unsigned long flags;
spin_lock_irqsave(&group->lock, flags);
list_add_tail(&member->list, &group->pending_list);
if (group->state == MCAST_IDLE) {
group->state = MCAST_BUSY;
atomic_inc(&group->refcount);
queue_work(mcast_wq, &group->work);
}
spin_unlock_irqrestore(&group->lock, flags);
}
/*
* A multicast group has three types of members: full member, non member, and
* send only member. We need to keep track of the number of members of each
* type based on their join state. Adjust the number of members the belong to
* the specified join states.
*/
static void adjust_membership(struct mcast_group *group, u8 join_state, int inc)
{
int i;
for (i = 0; i < 3; i++, join_state >>= 1)
if (join_state & 0x1)
group->members[i] += inc;
}
/*
* If a multicast group has zero members left for a particular join state, but
* the group is still a member with the SA, we need to leave that join state.
* Determine which join states we still belong to, but that do not have any
* active members.
*/
static u8 get_leave_state(struct mcast_group *group)
{
u8 leave_state = 0;
int i;
for (i = 0; i < 3; i++)
if (!group->members[i])
leave_state |= (0x1 << i);
return leave_state & group->rec.join_state;
}
static int check_selector(ib_sa_comp_mask comp_mask,
ib_sa_comp_mask selector_mask,
ib_sa_comp_mask value_mask,
u8 selector, u8 src_value, u8 dst_value)
{
int err;
if (!(comp_mask & selector_mask) || !(comp_mask & value_mask))
return 0;
switch (selector) {
case IB_SA_GT:
err = (src_value <= dst_value);
break;
case IB_SA_LT:
err = (src_value >= dst_value);
break;
case IB_SA_EQ:
err = (src_value != dst_value);
break;
default:
err = 0;
break;
}
return err;
}
static int cmp_rec(struct ib_sa_mcmember_rec *src,
struct ib_sa_mcmember_rec *dst, ib_sa_comp_mask comp_mask)
{
/* MGID must already match */
if (comp_mask & IB_SA_MCMEMBER_REC_PORT_GID &&
memcmp(&src->port_gid, &dst->port_gid, sizeof src->port_gid))
return -EINVAL;
if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey)
return -EINVAL;
if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid)
return -EINVAL;
if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR,
IB_SA_MCMEMBER_REC_MTU, dst->mtu_selector,
src->mtu, dst->mtu))
return -EINVAL;
if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS &&
src->traffic_class != dst->traffic_class)
return -EINVAL;
if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey)
return -EINVAL;
if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR,
IB_SA_MCMEMBER_REC_RATE, dst->rate_selector,
src->rate, dst->rate))
return -EINVAL;
if (check_selector(comp_mask,
IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR,
IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME,
dst->packet_life_time_selector,
src->packet_life_time, dst->packet_life_time))
return -EINVAL;
if (comp_mask & IB_SA_MCMEMBER_REC_SL && src->sl != dst->sl)
return -EINVAL;
if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL &&
src->flow_label != dst->flow_label)
return -EINVAL;
if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT &&
src->hop_limit != dst->hop_limit)
return -EINVAL;
if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE && src->scope != dst->scope)
return -EINVAL;
/* join_state checked separately, proxy_join ignored */
return 0;
}
static int send_join(struct mcast_group *group, struct mcast_member *member)
{
struct mcast_port *port = group->port;
int ret;
group->last_join = member;
ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
port->port_num, IB_MGMT_METHOD_SET,
&member->multicast.rec,
member->multicast.comp_mask,
3000, GFP_KERNEL, join_handler, group,
&group->query);
if (ret >= 0) {
group->query_id = ret;
ret = 0;
}
return ret;
}
static int send_leave(struct mcast_group *group, u8 leave_state)
{
struct mcast_port *port = group->port;
struct ib_sa_mcmember_rec rec;
int ret;
rec = group->rec;
rec.join_state = leave_state;
group->leave_state = leave_state;
ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
port->port_num, IB_SA_METHOD_DELETE, &rec,
IB_SA_MCMEMBER_REC_MGID |
IB_SA_MCMEMBER_REC_PORT_GID |
IB_SA_MCMEMBER_REC_JOIN_STATE,
3000, GFP_KERNEL, leave_handler,
group, &group->query);
if (ret >= 0) {
group->query_id = ret;
ret = 0;
}
return ret;
}
static void join_group(struct mcast_group *group, struct mcast_member *member,
u8 join_state)
{
member->state = MCAST_MEMBER;
adjust_membership(group, join_state, 1);
group->rec.join_state |= join_state;
member->multicast.rec = group->rec;
member->multicast.rec.join_state = join_state;
list_move(&member->list, &group->active_list);
}
static int fail_join(struct mcast_group *group, struct mcast_member *member,
int status)
{
spin_lock_irq(&group->lock);
list_del_init(&member->list);
spin_unlock_irq(&group->lock);
return member->multicast.callback(status, &member->multicast);
}
static void process_group_error(struct mcast_group *group)
{
struct mcast_member *member;
int ret = 0;
u16 pkey_index;
if (group->state == MCAST_PKEY_EVENT)
ret = ib_find_pkey(group->port->dev->device,
group->port->port_num,
be16_to_cpu(group->rec.pkey), &pkey_index);
spin_lock_irq(&group->lock);
if (group->state == MCAST_PKEY_EVENT && !ret &&
group->pkey_index == pkey_index)
goto out;
while (!list_empty(&group->active_list)) {
member = list_entry(group->active_list.next,
struct mcast_member, list);
atomic_inc(&member->refcount);
list_del_init(&member->list);
adjust_membership(group, member->multicast.rec.join_state, -1);
member->state = MCAST_ERROR;
spin_unlock_irq(&group->lock);
ret = member->multicast.callback(-ENETRESET,
&member->multicast);
deref_member(member);
if (ret)
ib_sa_free_multicast(&member->multicast);
spin_lock_irq(&group->lock);
}
group->rec.join_state = 0;
out:
group->state = MCAST_BUSY;
spin_unlock_irq(&group->lock);
}
static void mcast_work_handler(struct work_struct *work)
{
struct mcast_group *group;
struct mcast_member *member;
struct ib_sa_multicast *multicast;
int status, ret;
u8 join_state;
group = container_of(work, typeof(*group), work);
retest:
spin_lock_irq(&group->lock);
while (!list_empty(&group->pending_list) ||
(group->state != MCAST_BUSY)) {
if (group->state != MCAST_BUSY) {
spin_unlock_irq(&group->lock);
process_group_error(group);
goto retest;
}
member = list_entry(group->pending_list.next,
struct mcast_member, list);
multicast = &member->multicast;
join_state = multicast->rec.join_state;
atomic_inc(&member->refcount);
if (join_state == (group->rec.join_state & join_state)) {
status = cmp_rec(&group->rec, &multicast->rec,
multicast->comp_mask);
if (!status)
join_group(group, member, join_state);
else
list_del_init(&member->list);
spin_unlock_irq(&group->lock);
ret = multicast->callback(status, multicast);
} else {
spin_unlock_irq(&group->lock);
status = send_join(group, member);
if (!status) {
deref_member(member);
return;
}
ret = fail_join(group, member, status);
}
deref_member(member);
if (ret)
ib_sa_free_multicast(&member->multicast);
spin_lock_irq(&group->lock);
}
join_state = get_leave_state(group);
if (join_state) {
group->rec.join_state &= ~join_state;
spin_unlock_irq(&group->lock);
if (send_leave(group, join_state))
goto retest;
} else {
group->state = MCAST_IDLE;
spin_unlock_irq(&group->lock);
release_group(group);
}
}
/*
* Fail a join request if it is still active - at the head of the pending queue.
*/
static void process_join_error(struct mcast_group *group, int status)
{
struct mcast_member *member;
int ret;
spin_lock_irq(&group->lock);
member = list_entry(group->pending_list.next,
struct mcast_member, list);
if (group->last_join == member) {
atomic_inc(&member->refcount);
list_del_init(&member->list);
spin_unlock_irq(&group->lock);
ret = member->multicast.callback(status, &member->multicast);
deref_member(member);
if (ret)
ib_sa_free_multicast(&member->multicast);
} else
spin_unlock_irq(&group->lock);
}
static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
void *context)
{
struct mcast_group *group = context;
u16 pkey_index = MCAST_INVALID_PKEY_INDEX;
if (status)
process_join_error(group, status);
else {
ib_find_pkey(group->port->dev->device, group->port->port_num,
be16_to_cpu(rec->pkey), &pkey_index);
spin_lock_irq(&group->port->lock);
group->rec = *rec;
if (group->state == MCAST_BUSY &&
group->pkey_index == MCAST_INVALID_PKEY_INDEX)
group->pkey_index = pkey_index;
if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) {
rb_erase(&group->node, &group->port->table);
mcast_insert(group->port, group, 1);
}
spin_unlock_irq(&group->port->lock);
}
mcast_work_handler(&group->work);
}
static void leave_handler(int status, struct ib_sa_mcmember_rec *rec,
void *context)
{
struct mcast_group *group = context;
if (status && group->retries > 0 &&
!send_leave(group, group->leave_state))
group->retries--;
else
mcast_work_handler(&group->work);
}
static struct mcast_group *acquire_group(struct mcast_port *port,
union ib_gid *mgid, gfp_t gfp_mask)
{
struct mcast_group *group, *cur_group;
unsigned long flags;
int is_mgid0;
is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0);
if (!is_mgid0) {
spin_lock_irqsave(&port->lock, flags);
group = mcast_find(port, mgid);
if (group)
goto found;
spin_unlock_irqrestore(&port->lock, flags);
}
group = kzalloc(sizeof *group, gfp_mask);
if (!group)
return NULL;
group->retries = 3;
group->port = port;
group->rec.mgid = *mgid;
group->pkey_index = MCAST_INVALID_PKEY_INDEX;
INIT_LIST_HEAD(&group->pending_list);
INIT_LIST_HEAD(&group->active_list);
INIT_WORK(&group->work, mcast_work_handler);
spin_lock_init(&group->lock);
spin_lock_irqsave(&port->lock, flags);
cur_group = mcast_insert(port, group, is_mgid0);
if (cur_group) {
kfree(group);
group = cur_group;
} else
atomic_inc(&port->refcount);
found:
atomic_inc(&group->refcount);
spin_unlock_irqrestore(&port->lock, flags);
return group;
}
/*
* We serialize all join requests to a single group to make our lives much
* easier. Otherwise, two users could try to join the same group
* simultaneously, with different configurations, one could leave while the
* join is in progress, etc., which makes locking around error recovery
* difficult.
*/
struct ib_sa_multicast *
ib_sa_join_multicast(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
int (*callback)(int status,
struct ib_sa_multicast *multicast),
void *context)
{
struct mcast_device *dev;
struct mcast_member *member;
struct ib_sa_multicast *multicast;
int ret;
dev = ib_get_client_data(device, &mcast_client);
if (!dev)
return ERR_PTR(-ENODEV);
member = kmalloc(sizeof *member, gfp_mask);
if (!member)
return ERR_PTR(-ENOMEM);
ib_sa_client_get(client);
member->client = client;
member->multicast.rec = *rec;
member->multicast.comp_mask = comp_mask;
member->multicast.callback = callback;
member->multicast.context = context;
init_completion(&member->comp);
atomic_set(&member->refcount, 1);
member->state = MCAST_JOINING;
member->group = acquire_group(&dev->port[port_num - dev->start_port],
&rec->mgid, gfp_mask);
if (!member->group) {
ret = -ENOMEM;
goto err;
}
/*
* The user will get the multicast structure in their callback. They
* could then free the multicast structure before we can return from
* this routine. So we save the pointer to return before queuing
* any callback.
*/
multicast = &member->multicast;
queue_join(member);
return multicast;
err:
ib_sa_client_put(client);
kfree(member);
return ERR_PTR(ret);
}
EXPORT_SYMBOL(ib_sa_join_multicast);
void ib_sa_free_multicast(struct ib_sa_multicast *multicast)
{
struct mcast_member *member;
struct mcast_group *group;
member = container_of(multicast, struct mcast_member, multicast);
group = member->group;
spin_lock_irq(&group->lock);
if (member->state == MCAST_MEMBER)
adjust_membership(group, multicast->rec.join_state, -1);
list_del_init(&member->list);
if (group->state == MCAST_IDLE) {
group->state = MCAST_BUSY;
spin_unlock_irq(&group->lock);
/* Continue to hold reference on group until callback */
queue_work(mcast_wq, &group->work);
} else {
spin_unlock_irq(&group->lock);
release_group(group);
}
deref_member(member);
wait_for_completion(&member->comp);
ib_sa_client_put(member->client);
kfree(member);
}
EXPORT_SYMBOL(ib_sa_free_multicast);
int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
union ib_gid *mgid, struct ib_sa_mcmember_rec *rec)
{
struct mcast_device *dev;
struct mcast_port *port;
struct mcast_group *group;
unsigned long flags;
int ret = 0;
dev = ib_get_client_data(device, &mcast_client);
if (!dev)
return -ENODEV;
port = &dev->port[port_num - dev->start_port];
spin_lock_irqsave(&port->lock, flags);
group = mcast_find(port, mgid);
if (group)
*rec = group->rec;
else
ret = -EADDRNOTAVAIL;
spin_unlock_irqrestore(&port->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
struct ib_sa_mcmember_rec *rec,
struct ib_ah_attr *ah_attr)
{
int ret;
u16 gid_index;
u8 p;
ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index);
if (ret)
return ret;
memset(ah_attr, 0, sizeof *ah_attr);
ah_attr->dlid = be16_to_cpu(rec->mlid);
ah_attr->sl = rec->sl;
ah_attr->port_num = port_num;
ah_attr->static_rate = rec->rate;
ah_attr->ah_flags = IB_AH_GRH;
ah_attr->grh.dgid = rec->mgid;
ah_attr->grh.sgid_index = (u8) gid_index;
ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
ah_attr->grh.hop_limit = rec->hop_limit;
ah_attr->grh.traffic_class = rec->traffic_class;
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_mcmember);
static void mcast_groups_event(struct mcast_port *port,
enum mcast_group_state state)
{
struct mcast_group *group;
struct rb_node *node;
unsigned long flags;
spin_lock_irqsave(&port->lock, flags);
for (node = rb_first(&port->table); node; node = rb_next(node)) {
group = rb_entry(node, struct mcast_group, node);
spin_lock(&group->lock);
if (group->state == MCAST_IDLE) {
atomic_inc(&group->refcount);
queue_work(mcast_wq, &group->work);
}
if (group->state != MCAST_GROUP_ERROR)
group->state = state;
spin_unlock(&group->lock);
}
spin_unlock_irqrestore(&port->lock, flags);
}
static void mcast_event_handler(struct ib_event_handler *handler,
struct ib_event *event)
{
struct mcast_device *dev;
int index;
dev = container_of(handler, struct mcast_device, event_handler);
if (rdma_port_get_link_layer(dev->device, event->element.port_num) !=
IB_LINK_LAYER_INFINIBAND)
return;
index = event->element.port_num - dev->start_port;
switch (event->event) {
case IB_EVENT_PORT_ERR:
case IB_EVENT_LID_CHANGE:
case IB_EVENT_SM_CHANGE:
case IB_EVENT_CLIENT_REREGISTER:
mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR);
break;
case IB_EVENT_PKEY_CHANGE:
mcast_groups_event(&dev->port[index], MCAST_PKEY_EVENT);
break;
default:
break;
}
}
static void mcast_add_one(struct ib_device *device)
{
struct mcast_device *dev;
struct mcast_port *port;
int i;
int count = 0;
if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
return;
dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port,
GFP_KERNEL);
if (!dev)
return;
if (device->node_type == RDMA_NODE_IB_SWITCH)
dev->start_port = dev->end_port = 0;
else {
dev->start_port = 1;
dev->end_port = device->phys_port_cnt;
}
for (i = 0; i <= dev->end_port - dev->start_port; i++) {
if (rdma_port_get_link_layer(device, dev->start_port + i) !=
IB_LINK_LAYER_INFINIBAND)
continue;
port = &dev->port[i];
port->dev = dev;
port->port_num = dev->start_port + i;
spin_lock_init(&port->lock);
port->table = RB_ROOT;
init_completion(&port->comp);
atomic_set(&port->refcount, 1);
++count;
}
if (!count) {
kfree(dev);
return;
}
dev->device = device;
ib_set_client_data(device, &mcast_client, dev);
INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler);
ib_register_event_handler(&dev->event_handler);
}
static void mcast_remove_one(struct ib_device *device)
{
struct mcast_device *dev;
struct mcast_port *port;
int i;
dev = ib_get_client_data(device, &mcast_client);
if (!dev)
return;
ib_unregister_event_handler(&dev->event_handler);
flush_workqueue(mcast_wq);
for (i = 0; i <= dev->end_port - dev->start_port; i++) {
if (rdma_port_get_link_layer(device, dev->start_port + i) ==
IB_LINK_LAYER_INFINIBAND) {
port = &dev->port[i];
deref_port(port);
wait_for_completion(&port->comp);
}
}
kfree(dev);
}
int mcast_init(void)
{
int ret;
mcast_wq = create_singlethread_workqueue("ib_mcast");
if (!mcast_wq)
return -ENOMEM;
ib_sa_register_client(&sa_client);
ret = ib_register_client(&mcast_client);
if (ret)
goto err;
return 0;
err:
ib_sa_unregister_client(&sa_client);
destroy_workqueue(mcast_wq);
return ret;
}
void mcast_cleanup(void)
{
ib_unregister_client(&mcast_client);
ib_sa_unregister_client(&sa_client);
destroy_workqueue(mcast_wq);
}

View file

@ -0,0 +1,216 @@
/*
* Copyright (c) 2010 Voltaire Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
#include <linux/export.h>
#include <net/netlink.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <rdma/rdma_netlink.h>
struct ibnl_client {
struct list_head list;
int index;
int nops;
const struct ibnl_client_cbs *cb_table;
};
static DEFINE_MUTEX(ibnl_mutex);
static struct sock *nls;
static LIST_HEAD(client_list);
int ibnl_add_client(int index, int nops,
const struct ibnl_client_cbs cb_table[])
{
struct ibnl_client *cur;
struct ibnl_client *nl_client;
nl_client = kmalloc(sizeof *nl_client, GFP_KERNEL);
if (!nl_client)
return -ENOMEM;
nl_client->index = index;
nl_client->nops = nops;
nl_client->cb_table = cb_table;
mutex_lock(&ibnl_mutex);
list_for_each_entry(cur, &client_list, list) {
if (cur->index == index) {
pr_warn("Client for %d already exists\n", index);
mutex_unlock(&ibnl_mutex);
kfree(nl_client);
return -EINVAL;
}
}
list_add_tail(&nl_client->list, &client_list);
mutex_unlock(&ibnl_mutex);
return 0;
}
EXPORT_SYMBOL(ibnl_add_client);
int ibnl_remove_client(int index)
{
struct ibnl_client *cur, *next;
mutex_lock(&ibnl_mutex);
list_for_each_entry_safe(cur, next, &client_list, list) {
if (cur->index == index) {
list_del(&(cur->list));
mutex_unlock(&ibnl_mutex);
kfree(cur);
return 0;
}
}
pr_warn("Can't remove callback for client idx %d. Not found\n", index);
mutex_unlock(&ibnl_mutex);
return -EINVAL;
}
EXPORT_SYMBOL(ibnl_remove_client);
void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
int len, int client, int op, int flags)
{
unsigned char *prev_tail;
prev_tail = skb_tail_pointer(skb);
*nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op),
len, flags);
if (!*nlh)
goto out_nlmsg_trim;
(*nlh)->nlmsg_len = skb_tail_pointer(skb) - prev_tail;
return nlmsg_data(*nlh);
out_nlmsg_trim:
nlmsg_trim(skb, prev_tail);
return NULL;
}
EXPORT_SYMBOL(ibnl_put_msg);
int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
int len, void *data, int type)
{
unsigned char *prev_tail;
prev_tail = skb_tail_pointer(skb);
if (nla_put(skb, type, len, data))
goto nla_put_failure;
nlh->nlmsg_len += skb_tail_pointer(skb) - prev_tail;
return 0;
nla_put_failure:
nlmsg_trim(skb, prev_tail - nlh->nlmsg_len);
return -EMSGSIZE;
}
EXPORT_SYMBOL(ibnl_put_attr);
static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct ibnl_client *client;
int type = nlh->nlmsg_type;
int index = RDMA_NL_GET_CLIENT(type);
int op = RDMA_NL_GET_OP(type);
list_for_each_entry(client, &client_list, list) {
if (client->index == index) {
if (op < 0 || op >= client->nops ||
!client->cb_table[op].dump)
return -EINVAL;
{
struct netlink_dump_control c = {
.dump = client->cb_table[op].dump,
.module = client->cb_table[op].module,
};
return netlink_dump_start(nls, skb, nlh, &c);
}
}
}
pr_info("Index %d wasn't found in client list\n", index);
return -EINVAL;
}
static void ibnl_rcv(struct sk_buff *skb)
{
mutex_lock(&ibnl_mutex);
netlink_rcv_skb(skb, &ibnl_rcv_msg);
mutex_unlock(&ibnl_mutex);
}
int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh,
__u32 pid)
{
return nlmsg_unicast(nls, skb, pid);
}
EXPORT_SYMBOL(ibnl_unicast);
int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh,
unsigned int group, gfp_t flags)
{
return nlmsg_multicast(nls, skb, 0, group, flags);
}
EXPORT_SYMBOL(ibnl_multicast);
int __init ibnl_init(void)
{
struct netlink_kernel_cfg cfg = {
.input = ibnl_rcv,
};
nls = netlink_kernel_create(&init_net, NETLINK_RDMA, &cfg);
if (!nls) {
pr_warn("Failed to create netlink socket\n");
return -ENOMEM;
}
return 0;
}
void ibnl_cleanup(void)
{
struct ibnl_client *cur, *next;
mutex_lock(&ibnl_mutex);
list_for_each_entry_safe(cur, next, &client_list, list) {
list_del(&(cur->list));
kfree(cur);
}
mutex_unlock(&ibnl_mutex);
netlink_kernel_release(nls);
}

View file

@ -0,0 +1,203 @@
/*
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/export.h>
#include <linux/string.h>
#include <rdma/ib_pack.h>
static u64 value_read(int offset, int size, void *structure)
{
switch (size) {
case 1: return *(u8 *) (structure + offset);
case 2: return be16_to_cpup((__be16 *) (structure + offset));
case 4: return be32_to_cpup((__be32 *) (structure + offset));
case 8: return be64_to_cpup((__be64 *) (structure + offset));
default:
printk(KERN_WARNING "Field size %d bits not handled\n", size * 8);
return 0;
}
}
/**
* ib_pack - Pack a structure into a buffer
* @desc:Array of structure field descriptions
* @desc_len:Number of entries in @desc
* @structure:Structure to pack from
* @buf:Buffer to pack into
*
* ib_pack() packs a list of structure fields into a buffer,
* controlled by the array of fields in @desc.
*/
void ib_pack(const struct ib_field *desc,
int desc_len,
void *structure,
void *buf)
{
int i;
for (i = 0; i < desc_len; ++i) {
if (desc[i].size_bits <= 32) {
int shift;
u32 val;
__be32 mask;
__be32 *addr;
shift = 32 - desc[i].offset_bits - desc[i].size_bits;
if (desc[i].struct_size_bytes)
val = value_read(desc[i].struct_offset_bytes,
desc[i].struct_size_bytes,
structure) << shift;
else
val = 0;
mask = cpu_to_be32(((1ull << desc[i].size_bits) - 1) << shift);
addr = (__be32 *) buf + desc[i].offset_words;
*addr = (*addr & ~mask) | (cpu_to_be32(val) & mask);
} else if (desc[i].size_bits <= 64) {
int shift;
u64 val;
__be64 mask;
__be64 *addr;
shift = 64 - desc[i].offset_bits - desc[i].size_bits;
if (desc[i].struct_size_bytes)
val = value_read(desc[i].struct_offset_bytes,
desc[i].struct_size_bytes,
structure) << shift;
else
val = 0;
mask = cpu_to_be64((~0ull >> (64 - desc[i].size_bits)) << shift);
addr = (__be64 *) ((__be32 *) buf + desc[i].offset_words);
*addr = (*addr & ~mask) | (cpu_to_be64(val) & mask);
} else {
if (desc[i].offset_bits % 8 ||
desc[i].size_bits % 8) {
printk(KERN_WARNING "Structure field %s of size %d "
"bits is not byte-aligned\n",
desc[i].field_name, desc[i].size_bits);
}
if (desc[i].struct_size_bytes)
memcpy(buf + desc[i].offset_words * 4 +
desc[i].offset_bits / 8,
structure + desc[i].struct_offset_bytes,
desc[i].size_bits / 8);
else
memset(buf + desc[i].offset_words * 4 +
desc[i].offset_bits / 8,
0,
desc[i].size_bits / 8);
}
}
}
EXPORT_SYMBOL(ib_pack);
static void value_write(int offset, int size, u64 val, void *structure)
{
switch (size * 8) {
case 8: *( u8 *) (structure + offset) = val; break;
case 16: *(__be16 *) (structure + offset) = cpu_to_be16(val); break;
case 32: *(__be32 *) (structure + offset) = cpu_to_be32(val); break;
case 64: *(__be64 *) (structure + offset) = cpu_to_be64(val); break;
default:
printk(KERN_WARNING "Field size %d bits not handled\n", size * 8);
}
}
/**
* ib_unpack - Unpack a buffer into a structure
* @desc:Array of structure field descriptions
* @desc_len:Number of entries in @desc
* @buf:Buffer to unpack from
* @structure:Structure to unpack into
*
* ib_pack() unpacks a list of structure fields from a buffer,
* controlled by the array of fields in @desc.
*/
void ib_unpack(const struct ib_field *desc,
int desc_len,
void *buf,
void *structure)
{
int i;
for (i = 0; i < desc_len; ++i) {
if (!desc[i].struct_size_bytes)
continue;
if (desc[i].size_bits <= 32) {
int shift;
u32 val;
u32 mask;
__be32 *addr;
shift = 32 - desc[i].offset_bits - desc[i].size_bits;
mask = ((1ull << desc[i].size_bits) - 1) << shift;
addr = (__be32 *) buf + desc[i].offset_words;
val = (be32_to_cpup(addr) & mask) >> shift;
value_write(desc[i].struct_offset_bytes,
desc[i].struct_size_bytes,
val,
structure);
} else if (desc[i].size_bits <= 64) {
int shift;
u64 val;
u64 mask;
__be64 *addr;
shift = 64 - desc[i].offset_bits - desc[i].size_bits;
mask = (~0ull >> (64 - desc[i].size_bits)) << shift;
addr = (__be64 *) buf + desc[i].offset_words;
val = (be64_to_cpup(addr) & mask) >> shift;
value_write(desc[i].struct_offset_bytes,
desc[i].struct_size_bytes,
val,
structure);
} else {
if (desc[i].offset_bits % 8 ||
desc[i].size_bits % 8) {
printk(KERN_WARNING "Structure field %s of size %d "
"bits is not byte-aligned\n",
desc[i].field_name, desc[i].size_bits);
}
memcpy(structure + desc[i].struct_offset_bytes,
buf + desc[i].offset_words * 4 +
desc[i].offset_bits / 8,
desc[i].size_bits / 8);
}
}
}
EXPORT_SYMBOL(ib_unpack);

View file

@ -0,0 +1,66 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Voltaire, Inc. All rights reserved.
* Copyright (c) 2006 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef SA_H
#define SA_H
#include <rdma/ib_sa.h>
static inline void ib_sa_client_get(struct ib_sa_client *client)
{
atomic_inc(&client->users);
}
static inline void ib_sa_client_put(struct ib_sa_client *client)
{
if (atomic_dec_and_test(&client->users))
complete(&client->comp);
}
int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
u8 method,
struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask,
int timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_mcmember_rec *resp,
void *context),
void *context,
struct ib_sa_query **sa_query);
int mcast_init(void);
void mcast_cleanup(void);
#endif /* SA_H */

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,253 @@
/*
* Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved.
* Copyright (c) 2004, 2005 Intel Corporation. All rights reserved.
* Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved.
* Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <rdma/ib_smi.h>
#include "smi.h"
/*
* Fixup a directed route SMP for sending
* Return 0 if the SMP should be discarded
*/
enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
u8 node_type, int port_num)
{
u8 hop_ptr, hop_cnt;
hop_ptr = smp->hop_ptr;
hop_cnt = smp->hop_cnt;
/* See section 14.2.2.2, Vol 1 IB spec */
/* C14-6 -- valid hop_cnt values are from 0 to 63 */
if (hop_cnt >= IB_SMP_MAX_PATH_HOPS)
return IB_SMI_DISCARD;
if (!ib_get_smp_direction(smp)) {
/* C14-9:1 */
if (hop_cnt && hop_ptr == 0) {
smp->hop_ptr++;
return (smp->initial_path[smp->hop_ptr] ==
port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-9:2 */
if (hop_ptr && hop_ptr < hop_cnt) {
if (node_type != RDMA_NODE_IB_SWITCH)
return IB_SMI_DISCARD;
/* smp->return_path set when received */
smp->hop_ptr++;
return (smp->initial_path[smp->hop_ptr] ==
port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-9:3 -- We're at the end of the DR segment of path */
if (hop_ptr == hop_cnt) {
/* smp->return_path set when received */
smp->hop_ptr++;
return (node_type == RDMA_NODE_IB_SWITCH ||
smp->dr_dlid == IB_LID_PERMISSIVE ?
IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
/* C14-9:5 -- Fail unreasonable hop pointer */
return (hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD);
} else {
/* C14-13:1 */
if (hop_cnt && hop_ptr == hop_cnt + 1) {
smp->hop_ptr--;
return (smp->return_path[smp->hop_ptr] ==
port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-13:2 */
if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
if (node_type != RDMA_NODE_IB_SWITCH)
return IB_SMI_DISCARD;
smp->hop_ptr--;
return (smp->return_path[smp->hop_ptr] ==
port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-13:3 -- at the end of the DR segment of path */
if (hop_ptr == 1) {
smp->hop_ptr--;
/* C14-13:3 -- SMPs destined for SM shouldn't be here */
return (node_type == RDMA_NODE_IB_SWITCH ||
smp->dr_slid == IB_LID_PERMISSIVE ?
IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-13:4 -- hop_ptr = 0 -> should have gone to SM */
if (hop_ptr == 0)
return IB_SMI_HANDLE;
/* C14-13:5 -- Check for unreasonable hop pointer */
return IB_SMI_DISCARD;
}
}
/*
* Adjust information for a received SMP
* Return 0 if the SMP should be dropped
*/
enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
int port_num, int phys_port_cnt)
{
u8 hop_ptr, hop_cnt;
hop_ptr = smp->hop_ptr;
hop_cnt = smp->hop_cnt;
/* See section 14.2.2.2, Vol 1 IB spec */
/* C14-6 -- valid hop_cnt values are from 0 to 63 */
if (hop_cnt >= IB_SMP_MAX_PATH_HOPS)
return IB_SMI_DISCARD;
if (!ib_get_smp_direction(smp)) {
/* C14-9:1 -- sender should have incremented hop_ptr */
if (hop_cnt && hop_ptr == 0)
return IB_SMI_DISCARD;
/* C14-9:2 -- intermediate hop */
if (hop_ptr && hop_ptr < hop_cnt) {
if (node_type != RDMA_NODE_IB_SWITCH)
return IB_SMI_DISCARD;
smp->return_path[hop_ptr] = port_num;
/* smp->hop_ptr updated when sending */
return (smp->initial_path[hop_ptr+1] <= phys_port_cnt ?
IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-9:3 -- We're at the end of the DR segment of path */
if (hop_ptr == hop_cnt) {
if (hop_cnt)
smp->return_path[hop_ptr] = port_num;
/* smp->hop_ptr updated when sending */
return (node_type == RDMA_NODE_IB_SWITCH ||
smp->dr_dlid == IB_LID_PERMISSIVE ?
IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
/* C14-9:5 -- fail unreasonable hop pointer */
return (hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD);
} else {
/* C14-13:1 */
if (hop_cnt && hop_ptr == hop_cnt + 1) {
smp->hop_ptr--;
return (smp->return_path[smp->hop_ptr] ==
port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-13:2 */
if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
if (node_type != RDMA_NODE_IB_SWITCH)
return IB_SMI_DISCARD;
/* smp->hop_ptr updated when sending */
return (smp->return_path[hop_ptr-1] <= phys_port_cnt ?
IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-13:3 -- We're at the end of the DR segment of path */
if (hop_ptr == 1) {
if (smp->dr_slid == IB_LID_PERMISSIVE) {
/* giving SMP to SM - update hop_ptr */
smp->hop_ptr--;
return IB_SMI_HANDLE;
}
/* smp->hop_ptr updated when sending */
return (node_type == RDMA_NODE_IB_SWITCH ?
IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-13:4 -- hop_ptr = 0 -> give to SM */
/* C14-13:5 -- Check for unreasonable hop pointer */
return (hop_ptr == 0 ? IB_SMI_HANDLE : IB_SMI_DISCARD);
}
}
enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp)
{
u8 hop_ptr, hop_cnt;
hop_ptr = smp->hop_ptr;
hop_cnt = smp->hop_cnt;
if (!ib_get_smp_direction(smp)) {
/* C14-9:2 -- intermediate hop */
if (hop_ptr && hop_ptr < hop_cnt)
return IB_SMI_FORWARD;
/* C14-9:3 -- at the end of the DR segment of path */
if (hop_ptr == hop_cnt)
return (smp->dr_dlid == IB_LID_PERMISSIVE ?
IB_SMI_SEND : IB_SMI_LOCAL);
/* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
if (hop_ptr == hop_cnt + 1)
return IB_SMI_SEND;
} else {
/* C14-13:2 -- intermediate hop */
if (2 <= hop_ptr && hop_ptr <= hop_cnt)
return IB_SMI_FORWARD;
/* C14-13:3 -- at the end of the DR segment of path */
if (hop_ptr == 1)
return (smp->dr_slid != IB_LID_PERMISSIVE ?
IB_SMI_SEND : IB_SMI_LOCAL);
}
return IB_SMI_LOCAL;
}
/*
* Return the forwarding port number from initial_path for outgoing SMP and
* from return_path for returning SMP
*/
int smi_get_fwd_port(struct ib_smp *smp)
{
return (!ib_get_smp_direction(smp) ? smp->initial_path[smp->hop_ptr+1] :
smp->return_path[smp->hop_ptr-1]);
}

View file

@ -0,0 +1,90 @@
/*
* Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2004 Infinicon Corporation. All rights reserved.
* Copyright (c) 2004 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#ifndef __SMI_H_
#define __SMI_H_
#include <rdma/ib_smi.h>
enum smi_action {
IB_SMI_DISCARD,
IB_SMI_HANDLE
};
enum smi_forward_action {
IB_SMI_LOCAL, /* SMP should be completed up the stack */
IB_SMI_SEND, /* received DR SMP should be forwarded to the send queue */
IB_SMI_FORWARD /* SMP should be forwarded (for switches only) */
};
enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
int port_num, int phys_port_cnt);
int smi_get_fwd_port(struct ib_smp *smp);
extern enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp);
extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
u8 node_type, int port_num);
/*
* Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM
* via process_mad
*/
static inline enum smi_action smi_check_local_smp(struct ib_smp *smp,
struct ib_device *device)
{
/* C14-9:3 -- We're at the end of the DR segment of path */
/* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */
return ((device->process_mad &&
!ib_get_smp_direction(smp) &&
(smp->hop_ptr == smp->hop_cnt + 1)) ?
IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/*
* Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM
* via process_mad
*/
static inline enum smi_action smi_check_local_returning_smp(struct ib_smp *smp,
struct ib_device *device)
{
/* C14-13:3 -- We're at the end of the DR segment of path */
/* C14-13:4 -- Hop Pointer == 0 -> give to SM */
return ((device->process_mad &&
ib_get_smp_direction(smp) &&
!smp->hop_ptr) ? IB_SMI_HANDLE : IB_SMI_DISCARD);
}
#endif /* __SMI_H_ */

View file

@ -0,0 +1,922 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "core_priv.h"
#include <linux/slab.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <rdma/ib_mad.h>
struct ib_port {
struct kobject kobj;
struct ib_device *ibdev;
struct attribute_group gid_group;
struct attribute_group pkey_group;
u8 port_num;
};
struct port_attribute {
struct attribute attr;
ssize_t (*show)(struct ib_port *, struct port_attribute *, char *buf);
ssize_t (*store)(struct ib_port *, struct port_attribute *,
const char *buf, size_t count);
};
#define PORT_ATTR(_name, _mode, _show, _store) \
struct port_attribute port_attr_##_name = __ATTR(_name, _mode, _show, _store)
#define PORT_ATTR_RO(_name) \
struct port_attribute port_attr_##_name = __ATTR_RO(_name)
struct port_table_attribute {
struct port_attribute attr;
char name[8];
int index;
};
static ssize_t port_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
struct port_attribute *port_attr =
container_of(attr, struct port_attribute, attr);
struct ib_port *p = container_of(kobj, struct ib_port, kobj);
if (!port_attr->show)
return -EIO;
return port_attr->show(p, port_attr, buf);
}
static const struct sysfs_ops port_sysfs_ops = {
.show = port_attr_show
};
static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
static const char *state_name[] = {
[IB_PORT_NOP] = "NOP",
[IB_PORT_DOWN] = "DOWN",
[IB_PORT_INIT] = "INIT",
[IB_PORT_ARMED] = "ARMED",
[IB_PORT_ACTIVE] = "ACTIVE",
[IB_PORT_ACTIVE_DEFER] = "ACTIVE_DEFER"
};
ret = ib_query_port(p->ibdev, p->port_num, &attr);
if (ret)
return ret;
return sprintf(buf, "%d: %s\n", attr.state,
attr.state >= 0 && attr.state < ARRAY_SIZE(state_name) ?
state_name[attr.state] : "UNKNOWN");
}
static ssize_t lid_show(struct ib_port *p, struct port_attribute *unused,
char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
ret = ib_query_port(p->ibdev, p->port_num, &attr);
if (ret)
return ret;
return sprintf(buf, "0x%x\n", attr.lid);
}
static ssize_t lid_mask_count_show(struct ib_port *p,
struct port_attribute *unused,
char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
ret = ib_query_port(p->ibdev, p->port_num, &attr);
if (ret)
return ret;
return sprintf(buf, "%d\n", attr.lmc);
}
static ssize_t sm_lid_show(struct ib_port *p, struct port_attribute *unused,
char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
ret = ib_query_port(p->ibdev, p->port_num, &attr);
if (ret)
return ret;
return sprintf(buf, "0x%x\n", attr.sm_lid);
}
static ssize_t sm_sl_show(struct ib_port *p, struct port_attribute *unused,
char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
ret = ib_query_port(p->ibdev, p->port_num, &attr);
if (ret)
return ret;
return sprintf(buf, "%d\n", attr.sm_sl);
}
static ssize_t cap_mask_show(struct ib_port *p, struct port_attribute *unused,
char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
ret = ib_query_port(p->ibdev, p->port_num, &attr);
if (ret)
return ret;
return sprintf(buf, "0x%08x\n", attr.port_cap_flags);
}
static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
char *buf)
{
struct ib_port_attr attr;
char *speed = "";
int rate; /* in deci-Gb/sec */
ssize_t ret;
ret = ib_query_port(p->ibdev, p->port_num, &attr);
if (ret)
return ret;
switch (attr.active_speed) {
case IB_SPEED_DDR:
speed = " DDR";
rate = 50;
break;
case IB_SPEED_QDR:
speed = " QDR";
rate = 100;
break;
case IB_SPEED_FDR10:
speed = " FDR10";
rate = 100;
break;
case IB_SPEED_FDR:
speed = " FDR";
rate = 140;
break;
case IB_SPEED_EDR:
speed = " EDR";
rate = 250;
break;
case IB_SPEED_SDR:
default: /* default to SDR for invalid rates */
rate = 25;
break;
}
rate *= ib_width_enum_to_int(attr.active_width);
if (rate < 0)
return -EINVAL;
return sprintf(buf, "%d%s Gb/sec (%dX%s)\n",
rate / 10, rate % 10 ? ".5" : "",
ib_width_enum_to_int(attr.active_width), speed);
}
static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused,
char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
ret = ib_query_port(p->ibdev, p->port_num, &attr);
if (ret)
return ret;
switch (attr.phys_state) {
case 1: return sprintf(buf, "1: Sleep\n");
case 2: return sprintf(buf, "2: Polling\n");
case 3: return sprintf(buf, "3: Disabled\n");
case 4: return sprintf(buf, "4: PortConfigurationTraining\n");
case 5: return sprintf(buf, "5: LinkUp\n");
case 6: return sprintf(buf, "6: LinkErrorRecovery\n");
case 7: return sprintf(buf, "7: Phy Test\n");
default: return sprintf(buf, "%d: <unknown>\n", attr.phys_state);
}
}
static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused,
char *buf)
{
switch (rdma_port_get_link_layer(p->ibdev, p->port_num)) {
case IB_LINK_LAYER_INFINIBAND:
return sprintf(buf, "%s\n", "InfiniBand");
case IB_LINK_LAYER_ETHERNET:
return sprintf(buf, "%s\n", "Ethernet");
default:
return sprintf(buf, "%s\n", "Unknown");
}
}
static PORT_ATTR_RO(state);
static PORT_ATTR_RO(lid);
static PORT_ATTR_RO(lid_mask_count);
static PORT_ATTR_RO(sm_lid);
static PORT_ATTR_RO(sm_sl);
static PORT_ATTR_RO(cap_mask);
static PORT_ATTR_RO(rate);
static PORT_ATTR_RO(phys_state);
static PORT_ATTR_RO(link_layer);
static struct attribute *port_default_attrs[] = {
&port_attr_state.attr,
&port_attr_lid.attr,
&port_attr_lid_mask_count.attr,
&port_attr_sm_lid.attr,
&port_attr_sm_sl.attr,
&port_attr_cap_mask.attr,
&port_attr_rate.attr,
&port_attr_phys_state.attr,
&port_attr_link_layer.attr,
NULL
};
static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
char *buf)
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
union ib_gid gid;
ssize_t ret;
ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid);
if (ret)
return ret;
return sprintf(buf, "%pI6\n", gid.raw);
}
static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
char *buf)
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
u16 pkey;
ssize_t ret;
ret = ib_query_pkey(p->ibdev, p->port_num, tab_attr->index, &pkey);
if (ret)
return ret;
return sprintf(buf, "0x%04x\n", pkey);
}
#define PORT_PMA_ATTR(_name, _counter, _width, _offset) \
struct port_table_attribute port_pma_attr_##_name = { \
.attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
.index = (_offset) | ((_width) << 16) | ((_counter) << 24) \
}
static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
char *buf)
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
int offset = tab_attr->index & 0xffff;
int width = (tab_attr->index >> 16) & 0xff;
struct ib_mad *in_mad = NULL;
struct ib_mad *out_mad = NULL;
ssize_t ret;
if (!p->ibdev->process_mad)
return sprintf(buf, "N/A (no PMA)\n");
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad) {
ret = -ENOMEM;
goto out;
}
in_mad->mad_hdr.base_version = 1;
in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT;
in_mad->mad_hdr.class_version = 1;
in_mad->mad_hdr.method = IB_MGMT_METHOD_GET;
in_mad->mad_hdr.attr_id = cpu_to_be16(0x12); /* PortCounters */
in_mad->data[41] = p->port_num; /* PortSelect field */
if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY,
p->port_num, NULL, NULL, in_mad, out_mad) &
(IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) !=
(IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) {
ret = -EINVAL;
goto out;
}
switch (width) {
case 4:
ret = sprintf(buf, "%u\n", (out_mad->data[40 + offset / 8] >>
(4 - (offset % 8))) & 0xf);
break;
case 8:
ret = sprintf(buf, "%u\n", out_mad->data[40 + offset / 8]);
break;
case 16:
ret = sprintf(buf, "%u\n",
be16_to_cpup((__be16 *)(out_mad->data + 40 + offset / 8)));
break;
case 32:
ret = sprintf(buf, "%u\n",
be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8)));
break;
default:
ret = 0;
}
out:
kfree(in_mad);
kfree(out_mad);
return ret;
}
static PORT_PMA_ATTR(symbol_error , 0, 16, 32);
static PORT_PMA_ATTR(link_error_recovery , 1, 8, 48);
static PORT_PMA_ATTR(link_downed , 2, 8, 56);
static PORT_PMA_ATTR(port_rcv_errors , 3, 16, 64);
static PORT_PMA_ATTR(port_rcv_remote_physical_errors, 4, 16, 80);
static PORT_PMA_ATTR(port_rcv_switch_relay_errors , 5, 16, 96);
static PORT_PMA_ATTR(port_xmit_discards , 6, 16, 112);
static PORT_PMA_ATTR(port_xmit_constraint_errors , 7, 8, 128);
static PORT_PMA_ATTR(port_rcv_constraint_errors , 8, 8, 136);
static PORT_PMA_ATTR(local_link_integrity_errors , 9, 4, 152);
static PORT_PMA_ATTR(excessive_buffer_overrun_errors, 10, 4, 156);
static PORT_PMA_ATTR(VL15_dropped , 11, 16, 176);
static PORT_PMA_ATTR(port_xmit_data , 12, 32, 192);
static PORT_PMA_ATTR(port_rcv_data , 13, 32, 224);
static PORT_PMA_ATTR(port_xmit_packets , 14, 32, 256);
static PORT_PMA_ATTR(port_rcv_packets , 15, 32, 288);
static struct attribute *pma_attrs[] = {
&port_pma_attr_symbol_error.attr.attr,
&port_pma_attr_link_error_recovery.attr.attr,
&port_pma_attr_link_downed.attr.attr,
&port_pma_attr_port_rcv_errors.attr.attr,
&port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
&port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
&port_pma_attr_port_xmit_discards.attr.attr,
&port_pma_attr_port_xmit_constraint_errors.attr.attr,
&port_pma_attr_port_rcv_constraint_errors.attr.attr,
&port_pma_attr_local_link_integrity_errors.attr.attr,
&port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
&port_pma_attr_VL15_dropped.attr.attr,
&port_pma_attr_port_xmit_data.attr.attr,
&port_pma_attr_port_rcv_data.attr.attr,
&port_pma_attr_port_xmit_packets.attr.attr,
&port_pma_attr_port_rcv_packets.attr.attr,
NULL
};
static struct attribute_group pma_group = {
.name = "counters",
.attrs = pma_attrs
};
static void ib_port_release(struct kobject *kobj)
{
struct ib_port *p = container_of(kobj, struct ib_port, kobj);
struct attribute *a;
int i;
if (p->gid_group.attrs) {
for (i = 0; (a = p->gid_group.attrs[i]); ++i)
kfree(a);
kfree(p->gid_group.attrs);
}
if (p->pkey_group.attrs) {
for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
kfree(a);
kfree(p->pkey_group.attrs);
}
kfree(p);
}
static struct kobj_type port_type = {
.release = ib_port_release,
.sysfs_ops = &port_sysfs_ops,
.default_attrs = port_default_attrs
};
static void ib_device_release(struct device *device)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
kfree(dev);
}
static int ib_device_uevent(struct device *device,
struct kobj_uevent_env *env)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
if (add_uevent_var(env, "NAME=%s", dev->name))
return -ENOMEM;
/*
* It would be nice to pass the node GUID with the event...
*/
return 0;
}
static struct attribute **
alloc_group_attrs(ssize_t (*show)(struct ib_port *,
struct port_attribute *, char *buf),
int len)
{
struct attribute **tab_attr;
struct port_table_attribute *element;
int i;
tab_attr = kcalloc(1 + len, sizeof(struct attribute *), GFP_KERNEL);
if (!tab_attr)
return NULL;
for (i = 0; i < len; i++) {
element = kzalloc(sizeof(struct port_table_attribute),
GFP_KERNEL);
if (!element)
goto err;
if (snprintf(element->name, sizeof(element->name),
"%d", i) >= sizeof(element->name)) {
kfree(element);
goto err;
}
element->attr.attr.name = element->name;
element->attr.attr.mode = S_IRUGO;
element->attr.show = show;
element->index = i;
sysfs_attr_init(&element->attr.attr);
tab_attr[i] = &element->attr.attr;
}
return tab_attr;
err:
while (--i >= 0)
kfree(tab_attr[i]);
kfree(tab_attr);
return NULL;
}
static int add_port(struct ib_device *device, int port_num,
int (*port_callback)(struct ib_device *,
u8, struct kobject *))
{
struct ib_port *p;
struct ib_port_attr attr;
int i;
int ret;
ret = ib_query_port(device, port_num, &attr);
if (ret)
return ret;
p = kzalloc(sizeof *p, GFP_KERNEL);
if (!p)
return -ENOMEM;
p->ibdev = device;
p->port_num = port_num;
ret = kobject_init_and_add(&p->kobj, &port_type,
device->ports_parent,
"%d", port_num);
if (ret) {
kfree(p);
return ret;
}
ret = sysfs_create_group(&p->kobj, &pma_group);
if (ret)
goto err_put;
p->gid_group.name = "gids";
p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
if (!p->gid_group.attrs) {
ret = -ENOMEM;
goto err_remove_pma;
}
ret = sysfs_create_group(&p->kobj, &p->gid_group);
if (ret)
goto err_free_gid;
p->pkey_group.name = "pkeys";
p->pkey_group.attrs = alloc_group_attrs(show_port_pkey,
attr.pkey_tbl_len);
if (!p->pkey_group.attrs) {
ret = -ENOMEM;
goto err_remove_gid;
}
ret = sysfs_create_group(&p->kobj, &p->pkey_group);
if (ret)
goto err_free_pkey;
if (port_callback) {
ret = port_callback(device, port_num, &p->kobj);
if (ret)
goto err_remove_pkey;
}
list_add_tail(&p->kobj.entry, &device->port_list);
kobject_uevent(&p->kobj, KOBJ_ADD);
return 0;
err_remove_pkey:
sysfs_remove_group(&p->kobj, &p->pkey_group);
err_free_pkey:
for (i = 0; i < attr.pkey_tbl_len; ++i)
kfree(p->pkey_group.attrs[i]);
kfree(p->pkey_group.attrs);
p->pkey_group.attrs = NULL;
err_remove_gid:
sysfs_remove_group(&p->kobj, &p->gid_group);
err_free_gid:
for (i = 0; i < attr.gid_tbl_len; ++i)
kfree(p->gid_group.attrs[i]);
kfree(p->gid_group.attrs);
p->gid_group.attrs = NULL;
err_remove_pma:
sysfs_remove_group(&p->kobj, &pma_group);
err_put:
kobject_put(&p->kobj);
return ret;
}
static ssize_t show_node_type(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
switch (dev->node_type) {
case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type);
case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type);
case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type);
case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type);
case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
}
}
static ssize_t show_sys_image_guid(struct device *device,
struct device_attribute *dev_attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
struct ib_device_attr attr;
ssize_t ret;
ret = ib_query_device(dev, &attr);
if (ret)
return ret;
return sprintf(buf, "%04x:%04x:%04x:%04x\n",
be16_to_cpu(((__be16 *) &attr.sys_image_guid)[0]),
be16_to_cpu(((__be16 *) &attr.sys_image_guid)[1]),
be16_to_cpu(((__be16 *) &attr.sys_image_guid)[2]),
be16_to_cpu(((__be16 *) &attr.sys_image_guid)[3]));
}
static ssize_t show_node_guid(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
return sprintf(buf, "%04x:%04x:%04x:%04x\n",
be16_to_cpu(((__be16 *) &dev->node_guid)[0]),
be16_to_cpu(((__be16 *) &dev->node_guid)[1]),
be16_to_cpu(((__be16 *) &dev->node_guid)[2]),
be16_to_cpu(((__be16 *) &dev->node_guid)[3]));
}
static ssize_t show_node_desc(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
return sprintf(buf, "%.64s\n", dev->node_desc);
}
static ssize_t set_node_desc(struct device *device,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
struct ib_device_modify desc = {};
int ret;
if (!dev->modify_device)
return -EIO;
memcpy(desc.node_desc, buf, min_t(int, count, 64));
ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc);
if (ret)
return ret;
return count;
}
static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL);
static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL);
static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL);
static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc);
static struct device_attribute *ib_class_attributes[] = {
&dev_attr_node_type,
&dev_attr_sys_image_guid,
&dev_attr_node_guid,
&dev_attr_node_desc
};
static struct class ib_class = {
.name = "infiniband",
.dev_release = ib_device_release,
.dev_uevent = ib_device_uevent,
};
/* Show a given an attribute in the statistics group */
static ssize_t show_protocol_stat(const struct device *device,
struct device_attribute *attr, char *buf,
unsigned offset)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
union rdma_protocol_stats stats;
ssize_t ret;
ret = dev->get_protocol_stats(dev, &stats);
if (ret)
return ret;
return sprintf(buf, "%llu\n",
(unsigned long long) ((u64 *) &stats)[offset]);
}
/* generate a read-only iwarp statistics attribute */
#define IW_STATS_ENTRY(name) \
static ssize_t show_##name(struct device *device, \
struct device_attribute *attr, char *buf) \
{ \
return show_protocol_stat(device, attr, buf, \
offsetof(struct iw_protocol_stats, name) / \
sizeof (u64)); \
} \
static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
IW_STATS_ENTRY(ipInReceives);
IW_STATS_ENTRY(ipInHdrErrors);
IW_STATS_ENTRY(ipInTooBigErrors);
IW_STATS_ENTRY(ipInNoRoutes);
IW_STATS_ENTRY(ipInAddrErrors);
IW_STATS_ENTRY(ipInUnknownProtos);
IW_STATS_ENTRY(ipInTruncatedPkts);
IW_STATS_ENTRY(ipInDiscards);
IW_STATS_ENTRY(ipInDelivers);
IW_STATS_ENTRY(ipOutForwDatagrams);
IW_STATS_ENTRY(ipOutRequests);
IW_STATS_ENTRY(ipOutDiscards);
IW_STATS_ENTRY(ipOutNoRoutes);
IW_STATS_ENTRY(ipReasmTimeout);
IW_STATS_ENTRY(ipReasmReqds);
IW_STATS_ENTRY(ipReasmOKs);
IW_STATS_ENTRY(ipReasmFails);
IW_STATS_ENTRY(ipFragOKs);
IW_STATS_ENTRY(ipFragFails);
IW_STATS_ENTRY(ipFragCreates);
IW_STATS_ENTRY(ipInMcastPkts);
IW_STATS_ENTRY(ipOutMcastPkts);
IW_STATS_ENTRY(ipInBcastPkts);
IW_STATS_ENTRY(ipOutBcastPkts);
IW_STATS_ENTRY(tcpRtoAlgorithm);
IW_STATS_ENTRY(tcpRtoMin);
IW_STATS_ENTRY(tcpRtoMax);
IW_STATS_ENTRY(tcpMaxConn);
IW_STATS_ENTRY(tcpActiveOpens);
IW_STATS_ENTRY(tcpPassiveOpens);
IW_STATS_ENTRY(tcpAttemptFails);
IW_STATS_ENTRY(tcpEstabResets);
IW_STATS_ENTRY(tcpCurrEstab);
IW_STATS_ENTRY(tcpInSegs);
IW_STATS_ENTRY(tcpOutSegs);
IW_STATS_ENTRY(tcpRetransSegs);
IW_STATS_ENTRY(tcpInErrs);
IW_STATS_ENTRY(tcpOutRsts);
static struct attribute *iw_proto_stats_attrs[] = {
&dev_attr_ipInReceives.attr,
&dev_attr_ipInHdrErrors.attr,
&dev_attr_ipInTooBigErrors.attr,
&dev_attr_ipInNoRoutes.attr,
&dev_attr_ipInAddrErrors.attr,
&dev_attr_ipInUnknownProtos.attr,
&dev_attr_ipInTruncatedPkts.attr,
&dev_attr_ipInDiscards.attr,
&dev_attr_ipInDelivers.attr,
&dev_attr_ipOutForwDatagrams.attr,
&dev_attr_ipOutRequests.attr,
&dev_attr_ipOutDiscards.attr,
&dev_attr_ipOutNoRoutes.attr,
&dev_attr_ipReasmTimeout.attr,
&dev_attr_ipReasmReqds.attr,
&dev_attr_ipReasmOKs.attr,
&dev_attr_ipReasmFails.attr,
&dev_attr_ipFragOKs.attr,
&dev_attr_ipFragFails.attr,
&dev_attr_ipFragCreates.attr,
&dev_attr_ipInMcastPkts.attr,
&dev_attr_ipOutMcastPkts.attr,
&dev_attr_ipInBcastPkts.attr,
&dev_attr_ipOutBcastPkts.attr,
&dev_attr_tcpRtoAlgorithm.attr,
&dev_attr_tcpRtoMin.attr,
&dev_attr_tcpRtoMax.attr,
&dev_attr_tcpMaxConn.attr,
&dev_attr_tcpActiveOpens.attr,
&dev_attr_tcpPassiveOpens.attr,
&dev_attr_tcpAttemptFails.attr,
&dev_attr_tcpEstabResets.attr,
&dev_attr_tcpCurrEstab.attr,
&dev_attr_tcpInSegs.attr,
&dev_attr_tcpOutSegs.attr,
&dev_attr_tcpRetransSegs.attr,
&dev_attr_tcpInErrs.attr,
&dev_attr_tcpOutRsts.attr,
NULL
};
static struct attribute_group iw_stats_group = {
.name = "proto_stats",
.attrs = iw_proto_stats_attrs,
};
static void free_port_list_attributes(struct ib_device *device)
{
struct kobject *p, *t;
list_for_each_entry_safe(p, t, &device->port_list, entry) {
struct ib_port *port = container_of(p, struct ib_port, kobj);
list_del(&p->entry);
sysfs_remove_group(p, &pma_group);
sysfs_remove_group(p, &port->pkey_group);
sysfs_remove_group(p, &port->gid_group);
kobject_put(p);
}
kobject_put(device->ports_parent);
}
int ib_device_register_sysfs(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *))
{
struct device *class_dev = &device->dev;
int ret;
int i;
class_dev->class = &ib_class;
class_dev->parent = device->dma_device;
dev_set_name(class_dev, "%s", device->name);
dev_set_drvdata(class_dev, device);
INIT_LIST_HEAD(&device->port_list);
ret = device_register(class_dev);
if (ret)
goto err;
for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) {
ret = device_create_file(class_dev, ib_class_attributes[i]);
if (ret)
goto err_unregister;
}
device->ports_parent = kobject_create_and_add("ports",
&class_dev->kobj);
if (!device->ports_parent) {
ret = -ENOMEM;
goto err_put;
}
if (device->node_type == RDMA_NODE_IB_SWITCH) {
ret = add_port(device, 0, port_callback);
if (ret)
goto err_put;
} else {
for (i = 1; i <= device->phys_port_cnt; ++i) {
ret = add_port(device, i, port_callback);
if (ret)
goto err_put;
}
}
if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats) {
ret = sysfs_create_group(&class_dev->kobj, &iw_stats_group);
if (ret)
goto err_put;
}
return 0;
err_put:
free_port_list_attributes(device);
err_unregister:
device_unregister(class_dev);
err:
return ret;
}
void ib_device_unregister_sysfs(struct ib_device *device)
{
/* Hold kobject until ib_dealloc_device() */
struct kobject *kobj_dev = kobject_get(&device->dev.kobj);
int i;
if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats)
sysfs_remove_group(kobj_dev, &iw_stats_group);
free_port_list_attributes(device);
for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i)
device_remove_file(&device->dev, ib_class_attributes[i]);
device_unregister(&device->dev);
}
int ib_sysfs_setup(void)
{
return class_register(&ib_class);
}
void ib_sysfs_cleanup(void)
{
class_unregister(&ib_class);
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,414 @@
/*
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/export.h>
#include <linux/if_ether.h>
#include <rdma/ib_pack.h>
#define STRUCT_FIELD(header, field) \
.struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \
.struct_size_bytes = sizeof ((struct ib_unpacked_ ## header *) 0)->field, \
.field_name = #header ":" #field
static const struct ib_field lrh_table[] = {
{ STRUCT_FIELD(lrh, virtual_lane),
.offset_words = 0,
.offset_bits = 0,
.size_bits = 4 },
{ STRUCT_FIELD(lrh, link_version),
.offset_words = 0,
.offset_bits = 4,
.size_bits = 4 },
{ STRUCT_FIELD(lrh, service_level),
.offset_words = 0,
.offset_bits = 8,
.size_bits = 4 },
{ RESERVED,
.offset_words = 0,
.offset_bits = 12,
.size_bits = 2 },
{ STRUCT_FIELD(lrh, link_next_header),
.offset_words = 0,
.offset_bits = 14,
.size_bits = 2 },
{ STRUCT_FIELD(lrh, destination_lid),
.offset_words = 0,
.offset_bits = 16,
.size_bits = 16 },
{ RESERVED,
.offset_words = 1,
.offset_bits = 0,
.size_bits = 5 },
{ STRUCT_FIELD(lrh, packet_length),
.offset_words = 1,
.offset_bits = 5,
.size_bits = 11 },
{ STRUCT_FIELD(lrh, source_lid),
.offset_words = 1,
.offset_bits = 16,
.size_bits = 16 }
};
static const struct ib_field eth_table[] = {
{ STRUCT_FIELD(eth, dmac_h),
.offset_words = 0,
.offset_bits = 0,
.size_bits = 32 },
{ STRUCT_FIELD(eth, dmac_l),
.offset_words = 1,
.offset_bits = 0,
.size_bits = 16 },
{ STRUCT_FIELD(eth, smac_h),
.offset_words = 1,
.offset_bits = 16,
.size_bits = 16 },
{ STRUCT_FIELD(eth, smac_l),
.offset_words = 2,
.offset_bits = 0,
.size_bits = 32 },
{ STRUCT_FIELD(eth, type),
.offset_words = 3,
.offset_bits = 0,
.size_bits = 16 }
};
static const struct ib_field vlan_table[] = {
{ STRUCT_FIELD(vlan, tag),
.offset_words = 0,
.offset_bits = 0,
.size_bits = 16 },
{ STRUCT_FIELD(vlan, type),
.offset_words = 0,
.offset_bits = 16,
.size_bits = 16 }
};
static const struct ib_field grh_table[] = {
{ STRUCT_FIELD(grh, ip_version),
.offset_words = 0,
.offset_bits = 0,
.size_bits = 4 },
{ STRUCT_FIELD(grh, traffic_class),
.offset_words = 0,
.offset_bits = 4,
.size_bits = 8 },
{ STRUCT_FIELD(grh, flow_label),
.offset_words = 0,
.offset_bits = 12,
.size_bits = 20 },
{ STRUCT_FIELD(grh, payload_length),
.offset_words = 1,
.offset_bits = 0,
.size_bits = 16 },
{ STRUCT_FIELD(grh, next_header),
.offset_words = 1,
.offset_bits = 16,
.size_bits = 8 },
{ STRUCT_FIELD(grh, hop_limit),
.offset_words = 1,
.offset_bits = 24,
.size_bits = 8 },
{ STRUCT_FIELD(grh, source_gid),
.offset_words = 2,
.offset_bits = 0,
.size_bits = 128 },
{ STRUCT_FIELD(grh, destination_gid),
.offset_words = 6,
.offset_bits = 0,
.size_bits = 128 }
};
static const struct ib_field bth_table[] = {
{ STRUCT_FIELD(bth, opcode),
.offset_words = 0,
.offset_bits = 0,
.size_bits = 8 },
{ STRUCT_FIELD(bth, solicited_event),
.offset_words = 0,
.offset_bits = 8,
.size_bits = 1 },
{ STRUCT_FIELD(bth, mig_req),
.offset_words = 0,
.offset_bits = 9,
.size_bits = 1 },
{ STRUCT_FIELD(bth, pad_count),
.offset_words = 0,
.offset_bits = 10,
.size_bits = 2 },
{ STRUCT_FIELD(bth, transport_header_version),
.offset_words = 0,
.offset_bits = 12,
.size_bits = 4 },
{ STRUCT_FIELD(bth, pkey),
.offset_words = 0,
.offset_bits = 16,
.size_bits = 16 },
{ RESERVED,
.offset_words = 1,
.offset_bits = 0,
.size_bits = 8 },
{ STRUCT_FIELD(bth, destination_qpn),
.offset_words = 1,
.offset_bits = 8,
.size_bits = 24 },
{ STRUCT_FIELD(bth, ack_req),
.offset_words = 2,
.offset_bits = 0,
.size_bits = 1 },
{ RESERVED,
.offset_words = 2,
.offset_bits = 1,
.size_bits = 7 },
{ STRUCT_FIELD(bth, psn),
.offset_words = 2,
.offset_bits = 8,
.size_bits = 24 }
};
static const struct ib_field deth_table[] = {
{ STRUCT_FIELD(deth, qkey),
.offset_words = 0,
.offset_bits = 0,
.size_bits = 32 },
{ RESERVED,
.offset_words = 1,
.offset_bits = 0,
.size_bits = 8 },
{ STRUCT_FIELD(deth, source_qpn),
.offset_words = 1,
.offset_bits = 8,
.size_bits = 24 }
};
/**
* ib_ud_header_init - Initialize UD header structure
* @payload_bytes:Length of packet payload
* @lrh_present: specify if LRH is present
* @eth_present: specify if Eth header is present
* @vlan_present: packet is tagged vlan
* @grh_present:GRH flag (if non-zero, GRH will be included)
* @immediate_present: specify if immediate data is present
* @header:Structure to initialize
*/
void ib_ud_header_init(int payload_bytes,
int lrh_present,
int eth_present,
int vlan_present,
int grh_present,
int immediate_present,
struct ib_ud_header *header)
{
memset(header, 0, sizeof *header);
if (lrh_present) {
u16 packet_length;
header->lrh.link_version = 0;
header->lrh.link_next_header =
grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL;
packet_length = (IB_LRH_BYTES +
IB_BTH_BYTES +
IB_DETH_BYTES +
(grh_present ? IB_GRH_BYTES : 0) +
payload_bytes +
4 + /* ICRC */
3) / 4; /* round up */
header->lrh.packet_length = cpu_to_be16(packet_length);
}
if (vlan_present)
header->eth.type = cpu_to_be16(ETH_P_8021Q);
if (grh_present) {
header->grh.ip_version = 6;
header->grh.payload_length =
cpu_to_be16((IB_BTH_BYTES +
IB_DETH_BYTES +
payload_bytes +
4 + /* ICRC */
3) & ~3); /* round up */
header->grh.next_header = 0x1b;
}
if (immediate_present)
header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
else
header->bth.opcode = IB_OPCODE_UD_SEND_ONLY;
header->bth.pad_count = (4 - payload_bytes) & 3;
header->bth.transport_header_version = 0;
header->lrh_present = lrh_present;
header->eth_present = eth_present;
header->vlan_present = vlan_present;
header->grh_present = grh_present;
header->immediate_present = immediate_present;
}
EXPORT_SYMBOL(ib_ud_header_init);
/**
* ib_ud_header_pack - Pack UD header struct into wire format
* @header:UD header struct
* @buf:Buffer to pack into
*
* ib_ud_header_pack() packs the UD header structure @header into wire
* format in the buffer @buf.
*/
int ib_ud_header_pack(struct ib_ud_header *header,
void *buf)
{
int len = 0;
if (header->lrh_present) {
ib_pack(lrh_table, ARRAY_SIZE(lrh_table),
&header->lrh, buf + len);
len += IB_LRH_BYTES;
}
if (header->eth_present) {
ib_pack(eth_table, ARRAY_SIZE(eth_table),
&header->eth, buf + len);
len += IB_ETH_BYTES;
}
if (header->vlan_present) {
ib_pack(vlan_table, ARRAY_SIZE(vlan_table),
&header->vlan, buf + len);
len += IB_VLAN_BYTES;
}
if (header->grh_present) {
ib_pack(grh_table, ARRAY_SIZE(grh_table),
&header->grh, buf + len);
len += IB_GRH_BYTES;
}
ib_pack(bth_table, ARRAY_SIZE(bth_table),
&header->bth, buf + len);
len += IB_BTH_BYTES;
ib_pack(deth_table, ARRAY_SIZE(deth_table),
&header->deth, buf + len);
len += IB_DETH_BYTES;
if (header->immediate_present) {
memcpy(buf + len, &header->immediate_data, sizeof header->immediate_data);
len += sizeof header->immediate_data;
}
return len;
}
EXPORT_SYMBOL(ib_ud_header_pack);
/**
* ib_ud_header_unpack - Unpack UD header struct from wire format
* @header:UD header struct
* @buf:Buffer to pack into
*
* ib_ud_header_pack() unpacks the UD header structure @header from wire
* format in the buffer @buf.
*/
int ib_ud_header_unpack(void *buf,
struct ib_ud_header *header)
{
ib_unpack(lrh_table, ARRAY_SIZE(lrh_table),
buf, &header->lrh);
buf += IB_LRH_BYTES;
if (header->lrh.link_version != 0) {
printk(KERN_WARNING "Invalid LRH.link_version %d\n",
header->lrh.link_version);
return -EINVAL;
}
switch (header->lrh.link_next_header) {
case IB_LNH_IBA_LOCAL:
header->grh_present = 0;
break;
case IB_LNH_IBA_GLOBAL:
header->grh_present = 1;
ib_unpack(grh_table, ARRAY_SIZE(grh_table),
buf, &header->grh);
buf += IB_GRH_BYTES;
if (header->grh.ip_version != 6) {
printk(KERN_WARNING "Invalid GRH.ip_version %d\n",
header->grh.ip_version);
return -EINVAL;
}
if (header->grh.next_header != 0x1b) {
printk(KERN_WARNING "Invalid GRH.next_header 0x%02x\n",
header->grh.next_header);
return -EINVAL;
}
break;
default:
printk(KERN_WARNING "Invalid LRH.link_next_header %d\n",
header->lrh.link_next_header);
return -EINVAL;
}
ib_unpack(bth_table, ARRAY_SIZE(bth_table),
buf, &header->bth);
buf += IB_BTH_BYTES;
switch (header->bth.opcode) {
case IB_OPCODE_UD_SEND_ONLY:
header->immediate_present = 0;
break;
case IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE:
header->immediate_present = 1;
break;
default:
printk(KERN_WARNING "Invalid BTH.opcode 0x%02x\n",
header->bth.opcode);
return -EINVAL;
}
if (header->bth.transport_header_version != 0) {
printk(KERN_WARNING "Invalid BTH.transport_header_version %d\n",
header->bth.transport_header_version);
return -EINVAL;
}
ib_unpack(deth_table, ARRAY_SIZE(deth_table),
buf, &header->deth);
buf += IB_DETH_BYTES;
if (header->immediate_present)
memcpy(&header->immediate_data, buf, sizeof header->immediate_data);
return 0;
}
EXPORT_SYMBOL(ib_ud_header_unpack);

View file

@ -0,0 +1,305 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/sched.h>
#include <linux/export.h>
#include <linux/hugetlb.h>
#include <linux/dma-attrs.h>
#include <linux/slab.h>
#include "uverbs.h"
static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
{
struct scatterlist *sg;
struct page *page;
int i;
if (umem->nmap > 0)
ib_dma_unmap_sg(dev, umem->sg_head.sgl,
umem->nmap,
DMA_BIDIRECTIONAL);
for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
page = sg_page(sg);
if (umem->writable && dirty)
set_page_dirty_lock(page);
put_page(page);
}
sg_free_table(&umem->sg_head);
return;
}
/**
* ib_umem_get - Pin and DMA map userspace memory.
* @context: userspace context to pin memory for
* @addr: userspace virtual address to start at
* @size: length of region to pin
* @access: IB_ACCESS_xxx flags for memory being pinned
* @dmasync: flush in-flight DMA when the memory region is written
*/
struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
size_t size, int access, int dmasync)
{
struct ib_umem *umem;
struct page **page_list;
struct vm_area_struct **vma_list;
unsigned long locked;
unsigned long lock_limit;
unsigned long cur_base;
unsigned long npages;
int ret;
int i;
DEFINE_DMA_ATTRS(attrs);
struct scatterlist *sg, *sg_list_start;
int need_release = 0;
if (dmasync)
dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
if (!size)
return ERR_PTR(-EINVAL);
/*
* If the combination of the addr and size requested for this memory
* region causes an integer overflow, return error.
*/
if (((addr + size) < addr) ||
PAGE_ALIGN(addr + size) < (addr + size))
return ERR_PTR(-EINVAL);
if (!can_do_mlock())
return ERR_PTR(-EPERM);
umem = kzalloc(sizeof *umem, GFP_KERNEL);
if (!umem)
return ERR_PTR(-ENOMEM);
umem->context = context;
umem->length = size;
umem->offset = addr & ~PAGE_MASK;
umem->page_size = PAGE_SIZE;
umem->pid = get_task_pid(current, PIDTYPE_PID);
/*
* We ask for writable memory if any access flags other than
* "remote read" are set. "Local write" and "remote write"
* obviously require write access. "Remote atomic" can do
* things like fetch and add, which will modify memory, and
* "MW bind" can change permissions by binding a window.
*/
umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);
/* We assume the memory is from hugetlb until proved otherwise */
umem->hugetlb = 1;
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list) {
kfree(umem);
return ERR_PTR(-ENOMEM);
}
/*
* if we can't alloc the vma_list, it's not so bad;
* just assume the memory is not hugetlb memory
*/
vma_list = (struct vm_area_struct **) __get_free_page(GFP_KERNEL);
if (!vma_list)
umem->hugetlb = 0;
npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
down_write(&current->mm->mmap_sem);
locked = npages + current->mm->pinned_vm;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
ret = -ENOMEM;
goto out;
}
cur_base = addr & PAGE_MASK;
if (npages == 0) {
ret = -EINVAL;
goto out;
}
ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
if (ret)
goto out;
need_release = 1;
sg_list_start = umem->sg_head.sgl;
while (npages) {
ret = get_user_pages(current, current->mm, cur_base,
min_t(unsigned long, npages,
PAGE_SIZE / sizeof (struct page *)),
1, !umem->writable, page_list, vma_list);
if (ret < 0)
goto out;
umem->npages += ret;
cur_base += ret * PAGE_SIZE;
npages -= ret;
for_each_sg(sg_list_start, sg, ret, i) {
if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
umem->hugetlb = 0;
sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
}
/* preparing for next loop */
sg_list_start = sg;
}
umem->nmap = ib_dma_map_sg_attrs(context->device,
umem->sg_head.sgl,
umem->npages,
DMA_BIDIRECTIONAL,
&attrs);
if (umem->nmap <= 0) {
ret = -ENOMEM;
goto out;
}
ret = 0;
out:
if (ret < 0) {
if (need_release)
__ib_umem_release(context->device, umem, 0);
put_pid(umem->pid);
kfree(umem);
} else
current->mm->pinned_vm = locked;
up_write(&current->mm->mmap_sem);
if (vma_list)
free_page((unsigned long) vma_list);
free_page((unsigned long) page_list);
return ret < 0 ? ERR_PTR(ret) : umem;
}
EXPORT_SYMBOL(ib_umem_get);
static void ib_umem_account(struct work_struct *work)
{
struct ib_umem *umem = container_of(work, struct ib_umem, work);
down_write(&umem->mm->mmap_sem);
umem->mm->pinned_vm -= umem->diff;
up_write(&umem->mm->mmap_sem);
mmput(umem->mm);
kfree(umem);
}
/**
* ib_umem_release - release memory pinned with ib_umem_get
* @umem: umem struct to release
*/
void ib_umem_release(struct ib_umem *umem)
{
struct ib_ucontext *context = umem->context;
struct mm_struct *mm;
struct task_struct *task;
unsigned long diff;
__ib_umem_release(umem->context->device, umem, 1);
task = get_pid_task(umem->pid, PIDTYPE_PID);
put_pid(umem->pid);
if (!task)
goto out;
mm = get_task_mm(task);
put_task_struct(task);
if (!mm)
goto out;
diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
/*
* We may be called with the mm's mmap_sem already held. This
* can happen when a userspace munmap() is the call that drops
* the last reference to our file and calls our release
* method. If there are memory regions to destroy, we'll end
* up here and not be able to take the mmap_sem. In that case
* we defer the vm_locked accounting to the system workqueue.
*/
if (context->closing) {
if (!down_write_trylock(&mm->mmap_sem)) {
INIT_WORK(&umem->work, ib_umem_account);
umem->mm = mm;
umem->diff = diff;
queue_work(ib_wq, &umem->work);
return;
}
} else
down_write(&mm->mmap_sem);
mm->pinned_vm -= diff;
up_write(&mm->mmap_sem);
mmput(mm);
out:
kfree(umem);
}
EXPORT_SYMBOL(ib_umem_release);
int ib_umem_page_count(struct ib_umem *umem)
{
int shift;
int i;
int n;
struct scatterlist *sg;
shift = ilog2(umem->page_size);
n = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
n += sg_dma_len(sg) >> shift;
return n;
}
EXPORT_SYMBOL(ib_umem_page_count);

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,262 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2005 Voltaire, Inc. All rights reserved.
* Copyright (c) 2005 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef UVERBS_H
#define UVERBS_H
#include <linux/kref.h>
#include <linux/idr.h>
#include <linux/mutex.h>
#include <linux/completion.h>
#include <linux/cdev.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
do { \
(udata)->inbuf = (const void __user *) (ibuf); \
(udata)->outbuf = (void __user *) (obuf); \
(udata)->inlen = (ilen); \
(udata)->outlen = (olen); \
} while (0)
#define INIT_UDATA_BUF_OR_NULL(udata, ibuf, obuf, ilen, olen) \
do { \
(udata)->inbuf = (ilen) ? (const void __user *) (ibuf) : NULL; \
(udata)->outbuf = (olen) ? (void __user *) (obuf) : NULL; \
(udata)->inlen = (ilen); \
(udata)->outlen = (olen); \
} while (0)
/*
* Our lifetime rules for these structs are the following:
*
* struct ib_uverbs_device: One reference is held by the module and
* released in ib_uverbs_remove_one(). Another reference is taken by
* ib_uverbs_open() each time the character special file is opened,
* and released in ib_uverbs_release_file() when the file is released.
*
* struct ib_uverbs_file: One reference is held by the VFS and
* released when the file is closed. Another reference is taken when
* an asynchronous event queue file is created and released when the
* event file is closed.
*
* struct ib_uverbs_event_file: One reference is held by the VFS and
* released when the file is closed. For asynchronous event files,
* another reference is held by the corresponding main context file
* and released when that file is closed. For completion event files,
* a reference is taken when a CQ is created that uses the file, and
* released when the CQ is destroyed.
*/
struct ib_uverbs_device {
struct kref ref;
int num_comp_vectors;
struct completion comp;
struct device *dev;
struct ib_device *ib_dev;
int devnum;
struct cdev cdev;
struct rb_root xrcd_tree;
struct mutex xrcd_tree_mutex;
};
struct ib_uverbs_event_file {
struct kref ref;
int is_async;
struct ib_uverbs_file *uverbs_file;
spinlock_t lock;
int is_closed;
wait_queue_head_t poll_wait;
struct fasync_struct *async_queue;
struct list_head event_list;
};
struct ib_uverbs_file {
struct kref ref;
struct mutex mutex;
struct ib_uverbs_device *device;
struct ib_ucontext *ucontext;
struct ib_event_handler event_handler;
struct ib_uverbs_event_file *async_file;
};
struct ib_uverbs_event {
union {
struct ib_uverbs_async_event_desc async;
struct ib_uverbs_comp_event_desc comp;
} desc;
struct list_head list;
struct list_head obj_list;
u32 *counter;
};
struct ib_uverbs_mcast_entry {
struct list_head list;
union ib_gid gid;
u16 lid;
};
struct ib_uevent_object {
struct ib_uobject uobject;
struct list_head event_list;
u32 events_reported;
};
struct ib_uxrcd_object {
struct ib_uobject uobject;
atomic_t refcnt;
};
struct ib_usrq_object {
struct ib_uevent_object uevent;
struct ib_uxrcd_object *uxrcd;
};
struct ib_uqp_object {
struct ib_uevent_object uevent;
struct list_head mcast_list;
struct ib_uxrcd_object *uxrcd;
};
struct ib_ucq_object {
struct ib_uobject uobject;
struct ib_uverbs_file *uverbs_file;
struct list_head comp_list;
struct list_head async_list;
u32 comp_events_reported;
u32 async_events_reported;
};
extern spinlock_t ib_uverbs_idr_lock;
extern struct idr ib_uverbs_pd_idr;
extern struct idr ib_uverbs_mr_idr;
extern struct idr ib_uverbs_mw_idr;
extern struct idr ib_uverbs_ah_idr;
extern struct idr ib_uverbs_cq_idr;
extern struct idr ib_uverbs_qp_idr;
extern struct idr ib_uverbs_srq_idr;
extern struct idr ib_uverbs_xrcd_idr;
extern struct idr ib_uverbs_rule_idr;
void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
int is_async);
struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
struct ib_uverbs_event_file *ev_file,
struct ib_ucq_object *uobj);
void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
struct ib_uevent_object *uobj);
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
struct ib_uverbs_flow_spec {
union {
union {
struct ib_uverbs_flow_spec_hdr hdr;
struct {
__u32 type;
__u16 size;
__u16 reserved;
};
};
struct ib_uverbs_flow_spec_eth eth;
struct ib_uverbs_flow_spec_ipv4 ipv4;
struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
};
};
#define IB_UVERBS_DECLARE_CMD(name) \
ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
const char __user *buf, int in_len, \
int out_len)
IB_UVERBS_DECLARE_CMD(get_context);
IB_UVERBS_DECLARE_CMD(query_device);
IB_UVERBS_DECLARE_CMD(query_port);
IB_UVERBS_DECLARE_CMD(alloc_pd);
IB_UVERBS_DECLARE_CMD(dealloc_pd);
IB_UVERBS_DECLARE_CMD(reg_mr);
IB_UVERBS_DECLARE_CMD(rereg_mr);
IB_UVERBS_DECLARE_CMD(dereg_mr);
IB_UVERBS_DECLARE_CMD(alloc_mw);
IB_UVERBS_DECLARE_CMD(dealloc_mw);
IB_UVERBS_DECLARE_CMD(create_comp_channel);
IB_UVERBS_DECLARE_CMD(create_cq);
IB_UVERBS_DECLARE_CMD(resize_cq);
IB_UVERBS_DECLARE_CMD(poll_cq);
IB_UVERBS_DECLARE_CMD(req_notify_cq);
IB_UVERBS_DECLARE_CMD(destroy_cq);
IB_UVERBS_DECLARE_CMD(create_qp);
IB_UVERBS_DECLARE_CMD(open_qp);
IB_UVERBS_DECLARE_CMD(query_qp);
IB_UVERBS_DECLARE_CMD(modify_qp);
IB_UVERBS_DECLARE_CMD(destroy_qp);
IB_UVERBS_DECLARE_CMD(post_send);
IB_UVERBS_DECLARE_CMD(post_recv);
IB_UVERBS_DECLARE_CMD(post_srq_recv);
IB_UVERBS_DECLARE_CMD(create_ah);
IB_UVERBS_DECLARE_CMD(destroy_ah);
IB_UVERBS_DECLARE_CMD(attach_mcast);
IB_UVERBS_DECLARE_CMD(detach_mcast);
IB_UVERBS_DECLARE_CMD(create_srq);
IB_UVERBS_DECLARE_CMD(modify_srq);
IB_UVERBS_DECLARE_CMD(query_srq);
IB_UVERBS_DECLARE_CMD(destroy_srq);
IB_UVERBS_DECLARE_CMD(create_xsrq);
IB_UVERBS_DECLARE_CMD(open_xrcd);
IB_UVERBS_DECLARE_CMD(close_xrcd);
#define IB_UVERBS_DECLARE_EX_CMD(name) \
int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \
struct ib_udata *ucore, \
struct ib_udata *uhw)
IB_UVERBS_DECLARE_EX_CMD(create_flow);
IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
#endif /* UVERBS_H */

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,148 @@
/*
* Copyright (c) 2005 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/export.h>
#include <rdma/ib_marshall.h>
void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
struct ib_ah_attr *src)
{
memcpy(dst->grh.dgid, src->grh.dgid.raw, sizeof src->grh.dgid);
dst->grh.flow_label = src->grh.flow_label;
dst->grh.sgid_index = src->grh.sgid_index;
dst->grh.hop_limit = src->grh.hop_limit;
dst->grh.traffic_class = src->grh.traffic_class;
memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved));
dst->dlid = src->dlid;
dst->sl = src->sl;
dst->src_path_bits = src->src_path_bits;
dst->static_rate = src->static_rate;
dst->is_global = src->ah_flags & IB_AH_GRH ? 1 : 0;
dst->port_num = src->port_num;
dst->reserved = 0;
}
EXPORT_SYMBOL(ib_copy_ah_attr_to_user);
void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
struct ib_qp_attr *src)
{
dst->qp_state = src->qp_state;
dst->cur_qp_state = src->cur_qp_state;
dst->path_mtu = src->path_mtu;
dst->path_mig_state = src->path_mig_state;
dst->qkey = src->qkey;
dst->rq_psn = src->rq_psn;
dst->sq_psn = src->sq_psn;
dst->dest_qp_num = src->dest_qp_num;
dst->qp_access_flags = src->qp_access_flags;
dst->max_send_wr = src->cap.max_send_wr;
dst->max_recv_wr = src->cap.max_recv_wr;
dst->max_send_sge = src->cap.max_send_sge;
dst->max_recv_sge = src->cap.max_recv_sge;
dst->max_inline_data = src->cap.max_inline_data;
ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr);
ib_copy_ah_attr_to_user(&dst->alt_ah_attr, &src->alt_ah_attr);
dst->pkey_index = src->pkey_index;
dst->alt_pkey_index = src->alt_pkey_index;
dst->en_sqd_async_notify = src->en_sqd_async_notify;
dst->sq_draining = src->sq_draining;
dst->max_rd_atomic = src->max_rd_atomic;
dst->max_dest_rd_atomic = src->max_dest_rd_atomic;
dst->min_rnr_timer = src->min_rnr_timer;
dst->port_num = src->port_num;
dst->timeout = src->timeout;
dst->retry_cnt = src->retry_cnt;
dst->rnr_retry = src->rnr_retry;
dst->alt_port_num = src->alt_port_num;
dst->alt_timeout = src->alt_timeout;
memset(dst->reserved, 0, sizeof(dst->reserved));
}
EXPORT_SYMBOL(ib_copy_qp_attr_to_user);
void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
struct ib_sa_path_rec *src)
{
memcpy(dst->dgid, src->dgid.raw, sizeof src->dgid);
memcpy(dst->sgid, src->sgid.raw, sizeof src->sgid);
dst->dlid = src->dlid;
dst->slid = src->slid;
dst->raw_traffic = src->raw_traffic;
dst->flow_label = src->flow_label;
dst->hop_limit = src->hop_limit;
dst->traffic_class = src->traffic_class;
dst->reversible = src->reversible;
dst->numb_path = src->numb_path;
dst->pkey = src->pkey;
dst->sl = src->sl;
dst->mtu_selector = src->mtu_selector;
dst->mtu = src->mtu;
dst->rate_selector = src->rate_selector;
dst->rate = src->rate;
dst->packet_life_time = src->packet_life_time;
dst->preference = src->preference;
dst->packet_life_time_selector = src->packet_life_time_selector;
}
EXPORT_SYMBOL(ib_copy_path_rec_to_user);
void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
struct ib_user_path_rec *src)
{
memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid);
memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid);
dst->dlid = src->dlid;
dst->slid = src->slid;
dst->raw_traffic = src->raw_traffic;
dst->flow_label = src->flow_label;
dst->hop_limit = src->hop_limit;
dst->traffic_class = src->traffic_class;
dst->reversible = src->reversible;
dst->numb_path = src->numb_path;
dst->pkey = src->pkey;
dst->sl = src->sl;
dst->mtu_selector = src->mtu_selector;
dst->mtu = src->mtu;
dst->rate_selector = src->rate_selector;
dst->rate = src->rate;
dst->packet_life_time = src->packet_life_time;
dst->preference = src->preference;
dst->packet_life_time_selector = src->packet_life_time_selector;
memset(dst->smac, 0, sizeof(dst->smac));
memset(dst->dmac, 0, sizeof(dst->dmac));
dst->vlan_id = 0xffff;
}
EXPORT_SYMBOL(ib_copy_path_rec_from_user);

File diff suppressed because it is too large Load diff