Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

View file

@ -0,0 +1,49 @@
config INFINIBAND_IPOIB
tristate "IP-over-InfiniBand"
depends on NETDEVICES && INET && (IPV6 || IPV6=n)
---help---
Support for the IP-over-InfiniBand protocol (IPoIB). This
transports IP packets over InfiniBand so you can use your IB
device as a fancy NIC.
See Documentation/infiniband/ipoib.txt for more information
config INFINIBAND_IPOIB_CM
bool "IP-over-InfiniBand Connected Mode support"
depends on INFINIBAND_IPOIB
default n
---help---
This option enables support for IPoIB connected mode. After
enabling this option, you need to switch to connected mode
through /sys/class/net/ibXXX/mode to actually create
connections, and then increase the interface MTU with
e.g. ifconfig ib0 mtu 65520.
WARNING: Enabling connected mode will trigger some packet
drops for multicast and UD mode traffic from this interface,
unless you limit mtu for these destinations to 2044.
config INFINIBAND_IPOIB_DEBUG
bool "IP-over-InfiniBand debugging" if EXPERT
depends on INFINIBAND_IPOIB
default y
---help---
This option causes debugging code to be compiled into the
IPoIB driver. The output can be turned on via the
debug_level and mcast_debug_level module parameters (which
can also be set after the driver is loaded through sysfs).
This option also creates a directory tree under ipoib/ in
debugfs, which contains files that expose debugging
information about IB multicast groups used by the IPoIB
driver.
config INFINIBAND_IPOIB_DEBUG_DATA
bool "IP-over-InfiniBand data path debugging"
depends on INFINIBAND_IPOIB_DEBUG
---help---
This option compiles debugging code into the data path
of the IPoIB driver. The output can be turned on via the
data_debug_level module parameter; however, even with output
turned off, this debugging code will have some performance
impact.

View file

@ -0,0 +1,12 @@
obj-$(CONFIG_INFINIBAND_IPOIB) += ib_ipoib.o
ib_ipoib-y := ipoib_main.o \
ipoib_ib.o \
ipoib_multicast.o \
ipoib_verbs.o \
ipoib_vlan.o \
ipoib_ethtool.o \
ipoib_netlink.o
ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_CM) += ipoib_cm.o
ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG) += ipoib_fs.o

View file

@ -0,0 +1,773 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _IPOIB_H
#define _IPOIB_H
#include <linux/list.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/workqueue.h>
#include <linux/kref.h>
#include <linux/if_infiniband.h>
#include <linux/mutex.h>
#include <net/neighbour.h>
#include <net/sch_generic.h>
#include <linux/atomic.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_sa.h>
#include <linux/sched.h>
/* constants */
enum ipoib_flush_level {
IPOIB_FLUSH_LIGHT,
IPOIB_FLUSH_NORMAL,
IPOIB_FLUSH_HEAVY
};
enum {
IPOIB_ENCAP_LEN = 4,
IPOIB_UD_HEAD_SIZE = IB_GRH_BYTES + IPOIB_ENCAP_LEN,
IPOIB_UD_RX_SG = 2, /* max buffer needed for 4K mtu */
IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header to 16 */
IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN,
IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE,
IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE,
IPOIB_RX_RING_SIZE = 256,
IPOIB_TX_RING_SIZE = 128,
IPOIB_MAX_QUEUE_SIZE = 8192,
IPOIB_MIN_QUEUE_SIZE = 2,
IPOIB_CM_MAX_CONN_QP = 4096,
IPOIB_NUM_WC = 4,
IPOIB_MAX_PATH_REC_QUEUE = 3,
IPOIB_MAX_MCAST_QUEUE = 3,
IPOIB_FLAG_OPER_UP = 0,
IPOIB_FLAG_INITIALIZED = 1,
IPOIB_FLAG_ADMIN_UP = 2,
IPOIB_PKEY_ASSIGNED = 3,
IPOIB_FLAG_SUBINTERFACE = 5,
IPOIB_MCAST_RUN = 6,
IPOIB_STOP_REAPER = 7,
IPOIB_FLAG_ADMIN_CM = 9,
IPOIB_FLAG_UMCAST = 10,
IPOIB_STOP_NEIGH_GC = 11,
IPOIB_NEIGH_TBL_FLUSH = 12,
IPOIB_MAX_BACKOFF_SECONDS = 16,
IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */
IPOIB_MCAST_FLAG_SENDONLY = 1,
IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
IPOIB_MCAST_FLAG_ATTACHED = 3,
IPOIB_MCAST_JOIN_STARTED = 4,
MAX_SEND_CQE = 16,
IPOIB_CM_COPYBREAK = 256,
IPOIB_NON_CHILD = 0,
IPOIB_LEGACY_CHILD = 1,
IPOIB_RTNL_CHILD = 2,
};
#define IPOIB_OP_RECV (1ul << 31)
#ifdef CONFIG_INFINIBAND_IPOIB_CM
#define IPOIB_OP_CM (1ul << 30)
#else
#define IPOIB_OP_CM (0)
#endif
#define IPOIB_QPN_MASK ((__force u32) cpu_to_be32(0xFFFFFF))
/* structs */
struct ipoib_header {
__be16 proto;
u16 reserved;
};
struct ipoib_cb {
struct qdisc_skb_cb qdisc_cb;
u8 hwaddr[INFINIBAND_ALEN];
};
static inline struct ipoib_cb *ipoib_skb_cb(const struct sk_buff *skb)
{
BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct ipoib_cb));
return (struct ipoib_cb *)skb->cb;
}
/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
struct ipoib_mcast {
struct ib_sa_mcmember_rec mcmember;
struct ib_sa_multicast *mc;
struct ipoib_ah *ah;
struct rb_node rb_node;
struct list_head list;
unsigned long created;
unsigned long backoff;
unsigned long flags;
unsigned char logcount;
struct list_head neigh_list;
struct sk_buff_head pkt_queue;
struct net_device *dev;
struct completion done;
};
struct ipoib_rx_buf {
struct sk_buff *skb;
u64 mapping[IPOIB_UD_RX_SG];
};
struct ipoib_tx_buf {
struct sk_buff *skb;
u64 mapping[MAX_SKB_FRAGS + 1];
};
struct ipoib_cm_tx_buf {
struct sk_buff *skb;
u64 mapping;
};
struct ib_cm_id;
struct ipoib_cm_data {
__be32 qpn; /* High byte MUST be ignored on receive */
__be32 mtu;
};
/*
* Quoting 10.3.1 Queue Pair and EE Context States:
*
* Note, for QPs that are associated with an SRQ, the Consumer should take the
* QP through the Error State before invoking a Destroy QP or a Modify QP to the
* Reset State. The Consumer may invoke the Destroy QP without first performing
* a Modify QP to the Error State and waiting for the Affiliated Asynchronous
* Last WQE Reached Event. However, if the Consumer does not wait for the
* Affiliated Asynchronous Last WQE Reached Event, then WQE and Data Segment
* leakage may occur. Therefore, it is good programming practice to tear down a
* QP that is associated with an SRQ by using the following process:
*
* - Put the QP in the Error State
* - Wait for the Affiliated Asynchronous Last WQE Reached Event;
* - either:
* drain the CQ by invoking the Poll CQ verb and either wait for CQ
* to be empty or the number of Poll CQ operations has exceeded
* CQ capacity size;
* - or
* post another WR that completes on the same CQ and wait for this
* WR to return as a WC;
* - and then invoke a Destroy QP or Reset QP.
*
* We use the second option and wait for a completion on the
* same CQ before destroying QPs attached to our SRQ.
*/
enum ipoib_cm_state {
IPOIB_CM_RX_LIVE,
IPOIB_CM_RX_ERROR, /* Ignored by stale task */
IPOIB_CM_RX_FLUSH /* Last WQE Reached event observed */
};
struct ipoib_cm_rx {
struct ib_cm_id *id;
struct ib_qp *qp;
struct ipoib_cm_rx_buf *rx_ring;
struct list_head list;
struct net_device *dev;
unsigned long jiffies;
enum ipoib_cm_state state;
int recv_count;
};
struct ipoib_cm_tx {
struct ib_cm_id *id;
struct ib_qp *qp;
struct list_head list;
struct net_device *dev;
struct ipoib_neigh *neigh;
struct ipoib_path *path;
struct ipoib_cm_tx_buf *tx_ring;
unsigned tx_head;
unsigned tx_tail;
unsigned long flags;
u32 mtu;
};
struct ipoib_cm_rx_buf {
struct sk_buff *skb;
u64 mapping[IPOIB_CM_RX_SG];
};
struct ipoib_cm_dev_priv {
struct ib_srq *srq;
struct ipoib_cm_rx_buf *srq_ring;
struct ib_cm_id *id;
struct list_head passive_ids; /* state: LIVE */
struct list_head rx_error_list; /* state: ERROR */
struct list_head rx_flush_list; /* state: FLUSH, drain not started */
struct list_head rx_drain_list; /* state: FLUSH, drain started */
struct list_head rx_reap_list; /* state: FLUSH, drain done */
struct work_struct start_task;
struct work_struct reap_task;
struct work_struct skb_task;
struct work_struct rx_reap_task;
struct delayed_work stale_task;
struct sk_buff_head skb_queue;
struct list_head start_list;
struct list_head reap_list;
struct ib_wc ibwc[IPOIB_NUM_WC];
struct ib_sge rx_sge[IPOIB_CM_RX_SG];
struct ib_recv_wr rx_wr;
int nonsrq_conn_qp;
int max_cm_mtu;
int num_frags;
};
struct ipoib_ethtool_st {
u16 coalesce_usecs;
u16 max_coalesced_frames;
};
struct ipoib_neigh_table;
struct ipoib_neigh_hash {
struct ipoib_neigh_table *ntbl;
struct ipoib_neigh __rcu **buckets;
struct rcu_head rcu;
u32 mask;
u32 size;
};
struct ipoib_neigh_table {
struct ipoib_neigh_hash __rcu *htbl;
atomic_t entries;
struct completion flushed;
struct completion deleted;
};
/*
* Device private locking: network stack tx_lock protects members used
* in TX fast path, lock protects everything else. lock nests inside
* of tx_lock (ie tx_lock must be acquired first if needed).
*/
struct ipoib_dev_priv {
spinlock_t lock;
struct net_device *dev;
struct napi_struct napi;
unsigned long flags;
struct rw_semaphore vlan_rwsem;
struct rb_root path_tree;
struct list_head path_list;
struct ipoib_neigh_table ntbl;
struct ipoib_mcast *broadcast;
struct list_head multicast_list;
struct rb_root multicast_tree;
struct delayed_work mcast_task;
struct work_struct carrier_on_task;
struct work_struct flush_light;
struct work_struct flush_normal;
struct work_struct flush_heavy;
struct work_struct restart_task;
struct delayed_work ah_reap_task;
struct delayed_work neigh_reap_task;
struct ib_device *ca;
u8 port;
u16 pkey;
u16 pkey_index;
struct ib_pd *pd;
struct ib_mr *mr;
struct ib_cq *recv_cq;
struct ib_cq *send_cq;
struct ib_qp *qp;
u32 qkey;
union ib_gid local_gid;
u16 local_lid;
unsigned int admin_mtu;
unsigned int mcast_mtu;
unsigned int max_ib_mtu;
struct ipoib_rx_buf *rx_ring;
struct ipoib_tx_buf *tx_ring;
unsigned tx_head;
unsigned tx_tail;
struct ib_sge tx_sge[MAX_SKB_FRAGS + 1];
struct ib_send_wr tx_wr;
unsigned tx_outstanding;
struct ib_wc send_wc[MAX_SEND_CQE];
struct ib_recv_wr rx_wr;
struct ib_sge rx_sge[IPOIB_UD_RX_SG];
struct ib_wc ibwc[IPOIB_NUM_WC];
struct list_head dead_ahs;
struct ib_event_handler event_handler;
struct net_device *parent;
struct list_head child_intfs;
struct list_head list;
int child_type;
#ifdef CONFIG_INFINIBAND_IPOIB_CM
struct ipoib_cm_dev_priv cm;
#endif
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
struct list_head fs_list;
struct dentry *mcg_dentry;
struct dentry *path_dentry;
#endif
int hca_caps;
struct ipoib_ethtool_st ethtool;
struct timer_list poll_timer;
};
struct ipoib_ah {
struct net_device *dev;
struct ib_ah *ah;
struct list_head list;
struct kref ref;
unsigned last_send;
};
struct ipoib_path {
struct net_device *dev;
struct ib_sa_path_rec pathrec;
struct ipoib_ah *ah;
struct sk_buff_head queue;
struct list_head neigh_list;
int query_id;
struct ib_sa_query *query;
struct completion done;
struct rb_node rb_node;
struct list_head list;
int valid;
};
struct ipoib_neigh {
struct ipoib_ah *ah;
#ifdef CONFIG_INFINIBAND_IPOIB_CM
struct ipoib_cm_tx *cm;
#endif
u8 daddr[INFINIBAND_ALEN];
struct sk_buff_head queue;
struct net_device *dev;
struct list_head list;
struct ipoib_neigh __rcu *hnext;
struct rcu_head rcu;
atomic_t refcnt;
unsigned long alive;
};
#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN)
#define IPOIB_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES)
static inline int ipoib_ud_need_sg(unsigned int ib_mtu)
{
return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE;
}
void ipoib_neigh_dtor(struct ipoib_neigh *neigh);
static inline void ipoib_neigh_put(struct ipoib_neigh *neigh)
{
if (atomic_dec_and_test(&neigh->refcnt))
ipoib_neigh_dtor(neigh);
}
struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr);
struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr,
struct net_device *dev);
void ipoib_neigh_free(struct ipoib_neigh *neigh);
void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid);
extern struct workqueue_struct *ipoib_workqueue;
/* functions */
int ipoib_poll(struct napi_struct *napi, int budget);
void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr);
struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
struct ib_pd *pd, struct ib_ah_attr *attr);
void ipoib_free_ah(struct kref *kref);
static inline void ipoib_put_ah(struct ipoib_ah *ah)
{
kref_put(&ah->ref, ipoib_free_ah);
}
int ipoib_open(struct net_device *dev);
int ipoib_add_pkey_attr(struct net_device *dev);
int ipoib_add_umcast_attr(struct net_device *dev);
void ipoib_send(struct net_device *dev, struct sk_buff *skb,
struct ipoib_ah *address, u32 qpn);
void ipoib_reap_ah(struct work_struct *work);
void ipoib_mark_paths_invalid(struct net_device *dev);
void ipoib_flush_paths(struct net_device *dev);
struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
void ipoib_ib_dev_flush_light(struct work_struct *work);
void ipoib_ib_dev_flush_normal(struct work_struct *work);
void ipoib_ib_dev_flush_heavy(struct work_struct *work);
void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev);
int ipoib_ib_dev_open(struct net_device *dev, int flush);
int ipoib_ib_dev_up(struct net_device *dev);
int ipoib_ib_dev_down(struct net_device *dev, int flush);
int ipoib_ib_dev_stop(struct net_device *dev, int flush);
void ipoib_pkey_dev_check_presence(struct net_device *dev);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
void ipoib_dev_cleanup(struct net_device *dev);
void ipoib_mcast_join_task(struct work_struct *work);
void ipoib_mcast_carrier_on_task(struct work_struct *work);
void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
void ipoib_mcast_restart_task(struct work_struct *work);
int ipoib_mcast_start_thread(struct net_device *dev);
int ipoib_mcast_stop_thread(struct net_device *dev, int flush);
void ipoib_mcast_dev_down(struct net_device *dev);
void ipoib_mcast_dev_flush(struct net_device *dev);
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev);
int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter);
void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter,
union ib_gid *gid,
unsigned long *created,
unsigned int *queuelen,
unsigned int *complete,
unsigned int *send_only);
struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev);
int ipoib_path_iter_next(struct ipoib_path_iter *iter);
void ipoib_path_iter_read(struct ipoib_path_iter *iter,
struct ipoib_path *path);
#endif
int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
union ib_gid *mgid, int set_qkey);
int ipoib_init_qp(struct net_device *dev);
int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
void ipoib_transport_dev_cleanup(struct net_device *dev);
void ipoib_event(struct ib_event_handler *handler,
struct ib_event *record);
int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey);
int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey);
int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
u16 pkey, int child_type);
int __init ipoib_netlink_init(void);
void __exit ipoib_netlink_fini(void);
void ipoib_set_umcast(struct net_device *ndev, int umcast_val);
int ipoib_set_mode(struct net_device *dev, const char *buf);
void ipoib_setup(struct net_device *dev);
void ipoib_pkey_open(struct ipoib_dev_priv *priv);
void ipoib_drain_cq(struct net_device *dev);
void ipoib_set_ethtool_ops(struct net_device *dev);
int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca);
#define IPOIB_FLAGS_RC 0x80
#define IPOIB_FLAGS_UC 0x40
/* We don't support UC connections at the moment */
#define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC))
#ifdef CONFIG_INFINIBAND_IPOIB_CM
extern int ipoib_max_conn_qp;
static inline int ipoib_cm_admin_enabled(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
return IPOIB_CM_SUPPORTED(dev->dev_addr) &&
test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
}
static inline int ipoib_cm_enabled(struct net_device *dev, u8 *hwaddr)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
return IPOIB_CM_SUPPORTED(hwaddr) &&
test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
}
static inline int ipoib_cm_up(struct ipoib_neigh *neigh)
{
return test_bit(IPOIB_FLAG_OPER_UP, &neigh->cm->flags);
}
static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh)
{
return neigh->cm;
}
static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx)
{
neigh->cm = tx;
}
static inline int ipoib_cm_has_srq(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
return !!priv->cm.srq;
}
static inline unsigned int ipoib_cm_max_mtu(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
return priv->cm.max_cm_mtu;
}
void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx);
int ipoib_cm_dev_open(struct net_device *dev);
void ipoib_cm_dev_stop(struct net_device *dev);
int ipoib_cm_dev_init(struct net_device *dev);
int ipoib_cm_add_mode_attr(struct net_device *dev);
void ipoib_cm_dev_cleanup(struct net_device *dev);
struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
struct ipoib_neigh *neigh);
void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx);
void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
unsigned int mtu);
void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc);
void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc);
#else
struct ipoib_cm_tx;
#define ipoib_max_conn_qp 0
static inline int ipoib_cm_admin_enabled(struct net_device *dev)
{
return 0;
}
static inline int ipoib_cm_enabled(struct net_device *dev, u8 *hwaddr)
{
return 0;
}
static inline int ipoib_cm_up(struct ipoib_neigh *neigh)
{
return 0;
}
static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh)
{
return NULL;
}
static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx)
{
}
static inline int ipoib_cm_has_srq(struct net_device *dev)
{
return 0;
}
static inline unsigned int ipoib_cm_max_mtu(struct net_device *dev)
{
return 0;
}
static inline
void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
{
return;
}
static inline
int ipoib_cm_dev_open(struct net_device *dev)
{
return 0;
}
static inline
void ipoib_cm_dev_stop(struct net_device *dev)
{
return;
}
static inline
int ipoib_cm_dev_init(struct net_device *dev)
{
return -ENOSYS;
}
static inline
void ipoib_cm_dev_cleanup(struct net_device *dev)
{
return;
}
static inline
struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
struct ipoib_neigh *neigh)
{
return NULL;
}
static inline
void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
{
return;
}
static inline
int ipoib_cm_add_mode_attr(struct net_device *dev)
{
return 0;
}
static inline void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
unsigned int mtu)
{
dev_kfree_skb_any(skb);
}
static inline void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
{
}
static inline void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
{
}
#endif
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
void ipoib_create_debug_files(struct net_device *dev);
void ipoib_delete_debug_files(struct net_device *dev);
int ipoib_register_debugfs(void);
void ipoib_unregister_debugfs(void);
#else
static inline void ipoib_create_debug_files(struct net_device *dev) { }
static inline void ipoib_delete_debug_files(struct net_device *dev) { }
static inline int ipoib_register_debugfs(void) { return 0; }
static inline void ipoib_unregister_debugfs(void) { }
#endif
#define ipoib_printk(level, priv, format, arg...) \
printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg)
#define ipoib_warn(priv, format, arg...) \
ipoib_printk(KERN_WARNING, priv, format , ## arg)
extern int ipoib_sendq_size;
extern int ipoib_recvq_size;
extern struct ib_sa_client ipoib_sa_client;
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
extern int ipoib_debug_level;
#define ipoib_dbg(priv, format, arg...) \
do { \
if (ipoib_debug_level > 0) \
ipoib_printk(KERN_DEBUG, priv, format , ## arg); \
} while (0)
#define ipoib_dbg_mcast(priv, format, arg...) \
do { \
if (mcast_debug_level > 0) \
ipoib_printk(KERN_DEBUG, priv, format , ## arg); \
} while (0)
#else /* CONFIG_INFINIBAND_IPOIB_DEBUG */
#define ipoib_dbg(priv, format, arg...) \
do { (void) (priv); } while (0)
#define ipoib_dbg_mcast(priv, format, arg...) \
do { (void) (priv); } while (0)
#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
#define ipoib_dbg_data(priv, format, arg...) \
do { \
if (data_debug_level > 0) \
ipoib_printk(KERN_DEBUG, priv, format , ## arg); \
} while (0)
#else /* CONFIG_INFINIBAND_IPOIB_DEBUG_DATA */
#define ipoib_dbg_data(priv, format, arg...) \
do { (void) (priv); } while (0)
#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG_DATA */
#define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff)
extern const char ipoib_driver_version[];
#endif /* _IPOIB_H */

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,109 @@
/*
* Copyright (c) 2007 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/kernel.h>
#include <linux/ethtool.h>
#include <linux/netdevice.h>
#include "ipoib.h"
static void ipoib_get_drvinfo(struct net_device *netdev,
struct ethtool_drvinfo *drvinfo)
{
struct ipoib_dev_priv *priv = netdev_priv(netdev);
struct ib_device_attr *attr;
attr = kmalloc(sizeof(*attr), GFP_KERNEL);
if (attr && !ib_query_device(priv->ca, attr))
snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
"%d.%d.%d", (int)(attr->fw_ver >> 32),
(int)(attr->fw_ver >> 16) & 0xffff,
(int)attr->fw_ver & 0xffff);
kfree(attr);
strlcpy(drvinfo->bus_info, dev_name(priv->ca->dma_device),
sizeof(drvinfo->bus_info));
strlcpy(drvinfo->version, ipoib_driver_version,
sizeof(drvinfo->version));
strlcpy(drvinfo->driver, "ib_ipoib", sizeof(drvinfo->driver));
}
static int ipoib_get_coalesce(struct net_device *dev,
struct ethtool_coalesce *coal)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
coal->rx_coalesce_usecs = priv->ethtool.coalesce_usecs;
coal->rx_max_coalesced_frames = priv->ethtool.max_coalesced_frames;
return 0;
}
static int ipoib_set_coalesce(struct net_device *dev,
struct ethtool_coalesce *coal)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret;
/*
* These values are saved in the private data and returned
* when ipoib_get_coalesce() is called
*/
if (coal->rx_coalesce_usecs > 0xffff ||
coal->rx_max_coalesced_frames > 0xffff)
return -EINVAL;
ret = ib_modify_cq(priv->recv_cq, coal->rx_max_coalesced_frames,
coal->rx_coalesce_usecs);
if (ret && ret != -ENOSYS) {
ipoib_warn(priv, "failed modifying CQ (%d)\n", ret);
return ret;
}
priv->ethtool.coalesce_usecs = coal->rx_coalesce_usecs;
priv->ethtool.max_coalesced_frames = coal->rx_max_coalesced_frames;
return 0;
}
static const struct ethtool_ops ipoib_ethtool_ops = {
.get_drvinfo = ipoib_get_drvinfo,
.get_coalesce = ipoib_get_coalesce,
.set_coalesce = ipoib_set_coalesce,
};
void ipoib_set_ethtool_ops(struct net_device *dev)
{
dev->ethtool_ops = &ipoib_ethtool_ops;
}

View file

@ -0,0 +1,297 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/err.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
struct file_operations;
#include <linux/debugfs.h>
#include <linux/export.h>
#include "ipoib.h"
static struct dentry *ipoib_root;
static void format_gid(union ib_gid *gid, char *buf)
{
int i, n;
for (n = 0, i = 0; i < 8; ++i) {
n += sprintf(buf + n, "%x",
be16_to_cpu(((__be16 *) gid->raw)[i]));
if (i < 7)
buf[n++] = ':';
}
}
static void *ipoib_mcg_seq_start(struct seq_file *file, loff_t *pos)
{
struct ipoib_mcast_iter *iter;
loff_t n = *pos;
iter = ipoib_mcast_iter_init(file->private);
if (!iter)
return NULL;
while (n--) {
if (ipoib_mcast_iter_next(iter)) {
kfree(iter);
return NULL;
}
}
return iter;
}
static void *ipoib_mcg_seq_next(struct seq_file *file, void *iter_ptr,
loff_t *pos)
{
struct ipoib_mcast_iter *iter = iter_ptr;
(*pos)++;
if (ipoib_mcast_iter_next(iter)) {
kfree(iter);
return NULL;
}
return iter;
}
static void ipoib_mcg_seq_stop(struct seq_file *file, void *iter_ptr)
{
/* nothing for now */
}
static int ipoib_mcg_seq_show(struct seq_file *file, void *iter_ptr)
{
struct ipoib_mcast_iter *iter = iter_ptr;
char gid_buf[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"];
union ib_gid mgid;
unsigned long created;
unsigned int queuelen, complete, send_only;
if (!iter)
return 0;
ipoib_mcast_iter_read(iter, &mgid, &created, &queuelen,
&complete, &send_only);
format_gid(&mgid, gid_buf);
seq_printf(file,
"GID: %s\n"
" created: %10ld\n"
" queuelen: %9d\n"
" complete: %9s\n"
" send_only: %8s\n"
"\n",
gid_buf, created, queuelen,
complete ? "yes" : "no",
send_only ? "yes" : "no");
return 0;
}
static const struct seq_operations ipoib_mcg_seq_ops = {
.start = ipoib_mcg_seq_start,
.next = ipoib_mcg_seq_next,
.stop = ipoib_mcg_seq_stop,
.show = ipoib_mcg_seq_show,
};
static int ipoib_mcg_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
int ret;
ret = seq_open(file, &ipoib_mcg_seq_ops);
if (ret)
return ret;
seq = file->private_data;
seq->private = inode->i_private;
return 0;
}
static const struct file_operations ipoib_mcg_fops = {
.owner = THIS_MODULE,
.open = ipoib_mcg_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
};
static void *ipoib_path_seq_start(struct seq_file *file, loff_t *pos)
{
struct ipoib_path_iter *iter;
loff_t n = *pos;
iter = ipoib_path_iter_init(file->private);
if (!iter)
return NULL;
while (n--) {
if (ipoib_path_iter_next(iter)) {
kfree(iter);
return NULL;
}
}
return iter;
}
static void *ipoib_path_seq_next(struct seq_file *file, void *iter_ptr,
loff_t *pos)
{
struct ipoib_path_iter *iter = iter_ptr;
(*pos)++;
if (ipoib_path_iter_next(iter)) {
kfree(iter);
return NULL;
}
return iter;
}
static void ipoib_path_seq_stop(struct seq_file *file, void *iter_ptr)
{
/* nothing for now */
}
static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr)
{
struct ipoib_path_iter *iter = iter_ptr;
char gid_buf[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"];
struct ipoib_path path;
int rate;
if (!iter)
return 0;
ipoib_path_iter_read(iter, &path);
format_gid(&path.pathrec.dgid, gid_buf);
seq_printf(file,
"GID: %s\n"
" complete: %6s\n",
gid_buf, path.pathrec.dlid ? "yes" : "no");
if (path.pathrec.dlid) {
rate = ib_rate_to_mbps(path.pathrec.rate);
seq_printf(file,
" DLID: 0x%04x\n"
" SL: %12d\n"
" rate: %8d.%d Gb/sec\n",
be16_to_cpu(path.pathrec.dlid),
path.pathrec.sl,
rate / 1000, rate % 1000);
}
seq_putc(file, '\n');
return 0;
}
static const struct seq_operations ipoib_path_seq_ops = {
.start = ipoib_path_seq_start,
.next = ipoib_path_seq_next,
.stop = ipoib_path_seq_stop,
.show = ipoib_path_seq_show,
};
static int ipoib_path_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
int ret;
ret = seq_open(file, &ipoib_path_seq_ops);
if (ret)
return ret;
seq = file->private_data;
seq->private = inode->i_private;
return 0;
}
static const struct file_operations ipoib_path_fops = {
.owner = THIS_MODULE,
.open = ipoib_path_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
};
void ipoib_create_debug_files(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
char name[IFNAMSIZ + sizeof "_path"];
snprintf(name, sizeof name, "%s_mcg", dev->name);
priv->mcg_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO,
ipoib_root, dev, &ipoib_mcg_fops);
if (!priv->mcg_dentry)
ipoib_warn(priv, "failed to create mcg debug file\n");
snprintf(name, sizeof name, "%s_path", dev->name);
priv->path_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO,
ipoib_root, dev, &ipoib_path_fops);
if (!priv->path_dentry)
ipoib_warn(priv, "failed to create path debug file\n");
}
void ipoib_delete_debug_files(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
debugfs_remove(priv->mcg_dentry);
debugfs_remove(priv->path_dentry);
}
int ipoib_register_debugfs(void)
{
ipoib_root = debugfs_create_dir("ipoib", NULL);
return ipoib_root ? 0 : -ENOMEM;
}
void ipoib_unregister_debugfs(void)
{
debugfs_remove(ipoib_root);
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,963 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/moduleparam.h>
#include <linux/ip.h>
#include <linux/in.h>
#include <linux/igmp.h>
#include <linux/inetdevice.h>
#include <linux/delay.h>
#include <linux/completion.h>
#include <linux/slab.h>
#include <net/dst.h>
#include "ipoib.h"
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
static int mcast_debug_level;
module_param(mcast_debug_level, int, 0644);
MODULE_PARM_DESC(mcast_debug_level,
"Enable multicast debug tracing if > 0");
#endif
static DEFINE_MUTEX(mcast_mutex);
struct ipoib_mcast_iter {
struct net_device *dev;
union ib_gid mgid;
unsigned long created;
unsigned int queuelen;
unsigned int complete;
unsigned int send_only;
};
static void ipoib_mcast_free(struct ipoib_mcast *mcast)
{
struct net_device *dev = mcast->dev;
int tx_dropped = 0;
ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n",
mcast->mcmember.mgid.raw);
/* remove all neigh connected to this mcast */
ipoib_del_neighs_by_gid(dev, mcast->mcmember.mgid.raw);
if (mcast->ah)
ipoib_put_ah(mcast->ah);
while (!skb_queue_empty(&mcast->pkt_queue)) {
++tx_dropped;
dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
}
netif_tx_lock_bh(dev);
dev->stats.tx_dropped += tx_dropped;
netif_tx_unlock_bh(dev);
kfree(mcast);
}
static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
int can_sleep)
{
struct ipoib_mcast *mcast;
mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC);
if (!mcast)
return NULL;
mcast->dev = dev;
mcast->created = jiffies;
mcast->backoff = 1;
INIT_LIST_HEAD(&mcast->list);
INIT_LIST_HEAD(&mcast->neigh_list);
skb_queue_head_init(&mcast->pkt_queue);
return mcast;
}
static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct rb_node *n = priv->multicast_tree.rb_node;
while (n) {
struct ipoib_mcast *mcast;
int ret;
mcast = rb_entry(n, struct ipoib_mcast, rb_node);
ret = memcmp(mgid, mcast->mcmember.mgid.raw,
sizeof (union ib_gid));
if (ret < 0)
n = n->rb_left;
else if (ret > 0)
n = n->rb_right;
else
return mcast;
}
return NULL;
}
static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL;
while (*n) {
struct ipoib_mcast *tmcast;
int ret;
pn = *n;
tmcast = rb_entry(pn, struct ipoib_mcast, rb_node);
ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw,
sizeof (union ib_gid));
if (ret < 0)
n = &pn->rb_left;
else if (ret > 0)
n = &pn->rb_right;
else
return -EEXIST;
}
rb_link_node(&mcast->rb_node, pn, n);
rb_insert_color(&mcast->rb_node, &priv->multicast_tree);
return 0;
}
static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
struct ib_sa_mcmember_rec *mcmember)
{
struct net_device *dev = mcast->dev;
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_ah *ah;
int ret;
int set_qkey = 0;
mcast->mcmember = *mcmember;
/* Set the multicast MTU and cached Q_Key before we attach if it's
* the broadcast group.
*/
if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
sizeof (union ib_gid))) {
spin_lock_irq(&priv->lock);
if (!priv->broadcast) {
spin_unlock_irq(&priv->lock);
return -EAGAIN;
}
priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
spin_unlock_irq(&priv->lock);
priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
set_qkey = 1;
if (!ipoib_cm_admin_enabled(dev)) {
rtnl_lock();
dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
rtnl_unlock();
}
}
if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
ipoib_warn(priv, "multicast group %pI6 already attached\n",
mcast->mcmember.mgid.raw);
return 0;
}
ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid),
&mcast->mcmember.mgid, set_qkey);
if (ret < 0) {
ipoib_warn(priv, "couldn't attach QP to multicast group %pI6\n",
mcast->mcmember.mgid.raw);
clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags);
return ret;
}
}
{
struct ib_ah_attr av = {
.dlid = be16_to_cpu(mcast->mcmember.mlid),
.port_num = priv->port,
.sl = mcast->mcmember.sl,
.ah_flags = IB_AH_GRH,
.static_rate = mcast->mcmember.rate,
.grh = {
.flow_label = be32_to_cpu(mcast->mcmember.flow_label),
.hop_limit = mcast->mcmember.hop_limit,
.sgid_index = 0,
.traffic_class = mcast->mcmember.traffic_class
}
};
av.grh.dgid = mcast->mcmember.mgid;
ah = ipoib_create_ah(dev, priv->pd, &av);
if (IS_ERR(ah)) {
ipoib_warn(priv, "ib_address_create failed %ld\n",
-PTR_ERR(ah));
/* use original error */
return PTR_ERR(ah);
} else {
spin_lock_irq(&priv->lock);
mcast->ah = ah;
spin_unlock_irq(&priv->lock);
ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n",
mcast->mcmember.mgid.raw,
mcast->ah->ah,
be16_to_cpu(mcast->mcmember.mlid),
mcast->mcmember.sl);
}
}
/* actually send any queued packets */
netif_tx_lock_bh(dev);
while (!skb_queue_empty(&mcast->pkt_queue)) {
struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
netif_tx_unlock_bh(dev);
skb->dev = dev;
if (dev_queue_xmit(skb))
ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n");
netif_tx_lock_bh(dev);
}
netif_tx_unlock_bh(dev);
return 0;
}
static int
ipoib_mcast_sendonly_join_complete(int status,
struct ib_sa_multicast *multicast)
{
struct ipoib_mcast *mcast = multicast->context;
struct net_device *dev = mcast->dev;
/* We trap for port events ourselves. */
if (status == -ENETRESET)
return 0;
if (!status)
status = ipoib_mcast_join_finish(mcast, &multicast->rec);
if (status) {
if (mcast->logcount++ < 20)
ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n",
mcast->mcmember.mgid.raw, status);
/* Flush out any queued packets */
netif_tx_lock_bh(dev);
while (!skb_queue_empty(&mcast->pkt_queue)) {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
}
netif_tx_unlock_bh(dev);
/* Clear the busy flag so we try again */
status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
&mcast->flags);
}
return status;
}
static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
{
struct net_device *dev = mcast->dev;
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_sa_mcmember_rec rec = {
#if 0 /* Some SMs don't support send-only yet */
.join_state = 4
#else
.join_state = 1
#endif
};
int ret = 0;
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
return -ENODEV;
}
if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
return -EBUSY;
}
rec.mgid = mcast->mcmember.mgid;
rec.port_gid = priv->local_gid;
rec.pkey = cpu_to_be16(priv->pkey);
mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
priv->port, &rec,
IB_SA_MCMEMBER_REC_MGID |
IB_SA_MCMEMBER_REC_PORT_GID |
IB_SA_MCMEMBER_REC_PKEY |
IB_SA_MCMEMBER_REC_JOIN_STATE,
GFP_ATOMIC,
ipoib_mcast_sendonly_join_complete,
mcast);
if (IS_ERR(mcast->mc)) {
ret = PTR_ERR(mcast->mc);
clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
ret);
} else {
ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n",
mcast->mcmember.mgid.raw);
}
return ret;
}
void ipoib_mcast_carrier_on_task(struct work_struct *work)
{
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
carrier_on_task);
struct ib_port_attr attr;
/*
* Take rtnl_lock to avoid racing with ipoib_stop() and
* turning the carrier back on while a device is being
* removed.
*/
if (ib_query_port(priv->ca, priv->port, &attr) ||
attr.state != IB_PORT_ACTIVE) {
ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
return;
}
rtnl_lock();
netif_carrier_on(priv->dev);
rtnl_unlock();
}
static int ipoib_mcast_join_complete(int status,
struct ib_sa_multicast *multicast)
{
struct ipoib_mcast *mcast = multicast->context;
struct net_device *dev = mcast->dev;
struct ipoib_dev_priv *priv = netdev_priv(dev);
ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n",
mcast->mcmember.mgid.raw, status);
/* We trap for port events ourselves. */
if (status == -ENETRESET) {
status = 0;
goto out;
}
if (!status)
status = ipoib_mcast_join_finish(mcast, &multicast->rec);
if (!status) {
mcast->backoff = 1;
mutex_lock(&mcast_mutex);
if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
queue_delayed_work(ipoib_workqueue,
&priv->mcast_task, 0);
mutex_unlock(&mcast_mutex);
/*
* Defer carrier on work to ipoib_workqueue to avoid a
* deadlock on rtnl_lock here.
*/
if (mcast == priv->broadcast)
queue_work(ipoib_workqueue, &priv->carrier_on_task);
status = 0;
goto out;
}
if (mcast->logcount++ < 20) {
if (status == -ETIMEDOUT || status == -EAGAIN) {
ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
mcast->mcmember.mgid.raw, status);
} else {
ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
mcast->mcmember.mgid.raw, status);
}
}
mcast->backoff *= 2;
if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
/* Clear the busy flag so we try again */
status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
mutex_lock(&mcast_mutex);
spin_lock_irq(&priv->lock);
if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
mcast->backoff * HZ);
spin_unlock_irq(&priv->lock);
mutex_unlock(&mcast_mutex);
out:
complete(&mcast->done);
return status;
}
static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
int create)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_sa_mcmember_rec rec = {
.join_state = 1
};
ib_sa_comp_mask comp_mask;
int ret = 0;
ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw);
rec.mgid = mcast->mcmember.mgid;
rec.port_gid = priv->local_gid;
rec.pkey = cpu_to_be16(priv->pkey);
comp_mask =
IB_SA_MCMEMBER_REC_MGID |
IB_SA_MCMEMBER_REC_PORT_GID |
IB_SA_MCMEMBER_REC_PKEY |
IB_SA_MCMEMBER_REC_JOIN_STATE;
if (create) {
comp_mask |=
IB_SA_MCMEMBER_REC_QKEY |
IB_SA_MCMEMBER_REC_MTU_SELECTOR |
IB_SA_MCMEMBER_REC_MTU |
IB_SA_MCMEMBER_REC_TRAFFIC_CLASS |
IB_SA_MCMEMBER_REC_RATE_SELECTOR |
IB_SA_MCMEMBER_REC_RATE |
IB_SA_MCMEMBER_REC_SL |
IB_SA_MCMEMBER_REC_FLOW_LABEL |
IB_SA_MCMEMBER_REC_HOP_LIMIT;
rec.qkey = priv->broadcast->mcmember.qkey;
rec.mtu_selector = IB_SA_EQ;
rec.mtu = priv->broadcast->mcmember.mtu;
rec.traffic_class = priv->broadcast->mcmember.traffic_class;
rec.rate_selector = IB_SA_EQ;
rec.rate = priv->broadcast->mcmember.rate;
rec.sl = priv->broadcast->mcmember.sl;
rec.flow_label = priv->broadcast->mcmember.flow_label;
rec.hop_limit = priv->broadcast->mcmember.hop_limit;
}
set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
init_completion(&mcast->done);
set_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags);
mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
&rec, comp_mask, GFP_KERNEL,
ipoib_mcast_join_complete, mcast);
if (IS_ERR(mcast->mc)) {
clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
complete(&mcast->done);
ret = PTR_ERR(mcast->mc);
ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
mcast->backoff *= 2;
if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
mutex_lock(&mcast_mutex);
if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
queue_delayed_work(ipoib_workqueue,
&priv->mcast_task,
mcast->backoff * HZ);
mutex_unlock(&mcast_mutex);
}
}
void ipoib_mcast_join_task(struct work_struct *work)
{
struct ipoib_dev_priv *priv =
container_of(work, struct ipoib_dev_priv, mcast_task.work);
struct net_device *dev = priv->dev;
struct ib_port_attr port_attr;
if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
return;
if (ib_query_port(priv->ca, priv->port, &port_attr) ||
port_attr.state != IB_PORT_ACTIVE) {
ipoib_dbg(priv, "port state is not ACTIVE (state = %d) suspending join task\n",
port_attr.state);
return;
}
priv->local_lid = port_attr.lid;
if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
ipoib_warn(priv, "ib_query_gid() failed\n");
else
memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
if (!priv->broadcast) {
struct ipoib_mcast *broadcast;
if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
return;
broadcast = ipoib_mcast_alloc(dev, 1);
if (!broadcast) {
ipoib_warn(priv, "failed to allocate broadcast group\n");
mutex_lock(&mcast_mutex);
if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
queue_delayed_work(ipoib_workqueue,
&priv->mcast_task, HZ);
mutex_unlock(&mcast_mutex);
return;
}
spin_lock_irq(&priv->lock);
memcpy(broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
sizeof (union ib_gid));
priv->broadcast = broadcast;
__ipoib_mcast_add(dev, priv->broadcast);
spin_unlock_irq(&priv->lock);
}
if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
ipoib_mcast_join(dev, priv->broadcast, 0);
return;
}
while (1) {
struct ipoib_mcast *mcast = NULL;
spin_lock_irq(&priv->lock);
list_for_each_entry(mcast, &priv->multicast_list, list) {
if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
&& !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
&& !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
/* Found the next unjoined group */
break;
}
}
spin_unlock_irq(&priv->lock);
if (&mcast->list == &priv->multicast_list) {
/* All done */
break;
}
ipoib_mcast_join(dev, mcast, 1);
return;
}
ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
clear_bit(IPOIB_MCAST_RUN, &priv->flags);
}
int ipoib_mcast_start_thread(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
ipoib_dbg_mcast(priv, "starting multicast thread\n");
mutex_lock(&mcast_mutex);
if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
mutex_unlock(&mcast_mutex);
return 0;
}
int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
ipoib_dbg_mcast(priv, "stopping multicast thread\n");
mutex_lock(&mcast_mutex);
clear_bit(IPOIB_MCAST_RUN, &priv->flags);
cancel_delayed_work(&priv->mcast_task);
mutex_unlock(&mcast_mutex);
if (flush)
flush_workqueue(ipoib_workqueue);
return 0;
}
static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret = 0;
if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
ib_sa_free_multicast(mcast->mc);
if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
ipoib_dbg_mcast(priv, "leaving MGID %pI6\n",
mcast->mcmember.mgid.raw);
/* Remove ourselves from the multicast group */
ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid,
be16_to_cpu(mcast->mcmember.mlid));
if (ret)
ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
}
return 0;
}
void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_mcast *mcast;
unsigned long flags;
void *mgid = daddr + 4;
spin_lock_irqsave(&priv->lock, flags);
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags) ||
!priv->broadcast ||
!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
goto unlock;
}
mcast = __ipoib_mcast_find(dev, mgid);
if (!mcast) {
/* Let's create a new send only group now */
ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n",
mgid);
mcast = ipoib_mcast_alloc(dev, 0);
if (!mcast) {
ipoib_warn(priv, "unable to allocate memory for "
"multicast structure\n");
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
goto out;
}
set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
__ipoib_mcast_add(dev, mcast);
list_add_tail(&mcast->list, &priv->multicast_list);
}
if (!mcast->ah) {
if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE)
skb_queue_tail(&mcast->pkt_queue, skb);
else {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
}
if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
ipoib_dbg_mcast(priv, "no address vector, "
"but multicast join already started\n");
else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
ipoib_mcast_sendonly_join(mcast);
/*
* If lookup completes between here and out:, don't
* want to send packet twice.
*/
mcast = NULL;
}
out:
if (mcast && mcast->ah) {
struct ipoib_neigh *neigh;
spin_unlock_irqrestore(&priv->lock, flags);
neigh = ipoib_neigh_get(dev, daddr);
spin_lock_irqsave(&priv->lock, flags);
if (!neigh) {
neigh = ipoib_neigh_alloc(daddr, dev);
if (neigh) {
kref_get(&mcast->ah->ref);
neigh->ah = mcast->ah;
list_add_tail(&neigh->list, &mcast->neigh_list);
}
}
spin_unlock_irqrestore(&priv->lock, flags);
ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
if (neigh)
ipoib_neigh_put(neigh);
return;
}
unlock:
spin_unlock_irqrestore(&priv->lock, flags);
}
void ipoib_mcast_dev_flush(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
LIST_HEAD(remove_list);
struct ipoib_mcast *mcast, *tmcast;
unsigned long flags;
ipoib_dbg_mcast(priv, "flushing multicast list\n");
spin_lock_irqsave(&priv->lock, flags);
list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
list_del(&mcast->list);
rb_erase(&mcast->rb_node, &priv->multicast_tree);
list_add_tail(&mcast->list, &remove_list);
}
if (priv->broadcast) {
rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
list_add_tail(&priv->broadcast->list, &remove_list);
priv->broadcast = NULL;
}
spin_unlock_irqrestore(&priv->lock, flags);
/* seperate between the wait to the leave*/
list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
if (test_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags))
wait_for_completion(&mcast->done);
list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
ipoib_mcast_leave(dev, mcast);
ipoib_mcast_free(mcast);
}
}
static int ipoib_mcast_addr_is_valid(const u8 *addr, const u8 *broadcast)
{
/* reserved QPN, prefix, scope */
if (memcmp(addr, broadcast, 6))
return 0;
/* signature lower, pkey */
if (memcmp(addr + 7, broadcast + 7, 3))
return 0;
return 1;
}
void ipoib_mcast_restart_task(struct work_struct *work)
{
struct ipoib_dev_priv *priv =
container_of(work, struct ipoib_dev_priv, restart_task);
struct net_device *dev = priv->dev;
struct netdev_hw_addr *ha;
struct ipoib_mcast *mcast, *tmcast;
LIST_HEAD(remove_list);
unsigned long flags;
struct ib_sa_mcmember_rec rec;
ipoib_dbg_mcast(priv, "restarting multicast task\n");
ipoib_mcast_stop_thread(dev, 0);
local_irq_save(flags);
netif_addr_lock(dev);
spin_lock(&priv->lock);
/*
* Unfortunately, the networking core only gives us a list of all of
* the multicast hardware addresses. We need to figure out which ones
* are new and which ones have been removed
*/
/* Clear out the found flag */
list_for_each_entry(mcast, &priv->multicast_list, list)
clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
/* Mark all of the entries that are found or don't exist */
netdev_for_each_mc_addr(ha, dev) {
union ib_gid mgid;
if (!ipoib_mcast_addr_is_valid(ha->addr, dev->broadcast))
continue;
memcpy(mgid.raw, ha->addr + 4, sizeof mgid);
mcast = __ipoib_mcast_find(dev, &mgid);
if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
struct ipoib_mcast *nmcast;
/* ignore group which is directly joined by userspace */
if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) &&
!ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) {
ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid %pI6\n",
mgid.raw);
continue;
}
/* Not found or send-only group, let's add a new entry */
ipoib_dbg_mcast(priv, "adding multicast entry for mgid %pI6\n",
mgid.raw);
nmcast = ipoib_mcast_alloc(dev, 0);
if (!nmcast) {
ipoib_warn(priv, "unable to allocate memory for multicast structure\n");
continue;
}
set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags);
nmcast->mcmember.mgid = mgid;
if (mcast) {
/* Destroy the send only entry */
list_move_tail(&mcast->list, &remove_list);
rb_replace_node(&mcast->rb_node,
&nmcast->rb_node,
&priv->multicast_tree);
} else
__ipoib_mcast_add(dev, nmcast);
list_add_tail(&nmcast->list, &priv->multicast_list);
}
if (mcast)
set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
}
/* Remove all of the entries don't exist anymore */
list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) &&
!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
ipoib_dbg_mcast(priv, "deleting multicast group %pI6\n",
mcast->mcmember.mgid.raw);
rb_erase(&mcast->rb_node, &priv->multicast_tree);
/* Move to the remove list */
list_move_tail(&mcast->list, &remove_list);
}
}
spin_unlock(&priv->lock);
netif_addr_unlock(dev);
local_irq_restore(flags);
/* We have to cancel outside of the spinlock */
list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
ipoib_mcast_leave(mcast->dev, mcast);
ipoib_mcast_free(mcast);
}
if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
ipoib_mcast_start_thread(dev);
}
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev)
{
struct ipoib_mcast_iter *iter;
iter = kmalloc(sizeof *iter, GFP_KERNEL);
if (!iter)
return NULL;
iter->dev = dev;
memset(iter->mgid.raw, 0, 16);
if (ipoib_mcast_iter_next(iter)) {
kfree(iter);
return NULL;
}
return iter;
}
int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter)
{
struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
struct rb_node *n;
struct ipoib_mcast *mcast;
int ret = 1;
spin_lock_irq(&priv->lock);
n = rb_first(&priv->multicast_tree);
while (n) {
mcast = rb_entry(n, struct ipoib_mcast, rb_node);
if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw,
sizeof (union ib_gid)) < 0) {
iter->mgid = mcast->mcmember.mgid;
iter->created = mcast->created;
iter->queuelen = skb_queue_len(&mcast->pkt_queue);
iter->complete = !!mcast->ah;
iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY));
ret = 0;
break;
}
n = rb_next(n);
}
spin_unlock_irq(&priv->lock);
return ret;
}
void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter,
union ib_gid *mgid,
unsigned long *created,
unsigned int *queuelen,
unsigned int *complete,
unsigned int *send_only)
{
*mgid = iter->mgid;
*created = iter->created;
*queuelen = iter->queuelen;
*complete = iter->complete;
*send_only = iter->send_only;
}
#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */

View file

@ -0,0 +1,182 @@
/*
* Copyright (c) 2012 Mellanox Technologies. - All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/netdevice.h>
#include <linux/if_arp.h> /* For ARPHRD_xxx */
#include <linux/module.h>
#include <net/rtnetlink.h>
#include "ipoib.h"
static const struct nla_policy ipoib_policy[IFLA_IPOIB_MAX + 1] = {
[IFLA_IPOIB_PKEY] = { .type = NLA_U16 },
[IFLA_IPOIB_MODE] = { .type = NLA_U16 },
[IFLA_IPOIB_UMCAST] = { .type = NLA_U16 },
};
static int ipoib_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
u16 val;
if (nla_put_u16(skb, IFLA_IPOIB_PKEY, priv->pkey))
goto nla_put_failure;
val = test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
if (nla_put_u16(skb, IFLA_IPOIB_MODE, val))
goto nla_put_failure;
val = test_bit(IPOIB_FLAG_UMCAST, &priv->flags);
if (nla_put_u16(skb, IFLA_IPOIB_UMCAST, val))
goto nla_put_failure;
return 0;
nla_put_failure:
return -EMSGSIZE;
}
static int ipoib_changelink(struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
u16 mode, umcast;
int ret = 0;
if (data[IFLA_IPOIB_MODE]) {
mode = nla_get_u16(data[IFLA_IPOIB_MODE]);
if (mode == IPOIB_MODE_DATAGRAM)
ret = ipoib_set_mode(dev, "datagram\n");
else if (mode == IPOIB_MODE_CONNECTED)
ret = ipoib_set_mode(dev, "connected\n");
else
ret = -EINVAL;
if (ret < 0)
goto out_err;
}
if (data[IFLA_IPOIB_UMCAST]) {
umcast = nla_get_u16(data[IFLA_IPOIB_UMCAST]);
ipoib_set_umcast(dev, umcast);
}
out_err:
return ret;
}
static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
struct net_device *pdev;
struct ipoib_dev_priv *ppriv;
u16 child_pkey;
int err;
if (!tb[IFLA_LINK])
return -EINVAL;
pdev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
if (!pdev || pdev->type != ARPHRD_INFINIBAND)
return -ENODEV;
ppriv = netdev_priv(pdev);
if (test_bit(IPOIB_FLAG_SUBINTERFACE, &ppriv->flags)) {
ipoib_warn(ppriv, "child creation disallowed for child devices\n");
return -EINVAL;
}
if (!data || !data[IFLA_IPOIB_PKEY]) {
ipoib_dbg(ppriv, "no pkey specified, using parent pkey\n");
child_pkey = ppriv->pkey;
} else
child_pkey = nla_get_u16(data[IFLA_IPOIB_PKEY]);
if (child_pkey == 0 || child_pkey == 0x8000)
return -EINVAL;
/*
* Set the full membership bit, so that we join the right
* broadcast group, etc.
*/
child_pkey |= 0x8000;
err = __ipoib_vlan_add(ppriv, netdev_priv(dev), child_pkey, IPOIB_RTNL_CHILD);
if (!err && data)
err = ipoib_changelink(dev, tb, data);
return err;
}
static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head *head)
{
struct ipoib_dev_priv *priv, *ppriv;
priv = netdev_priv(dev);
ppriv = netdev_priv(priv->parent);
down_write(&ppriv->vlan_rwsem);
unregister_netdevice_queue(dev, head);
list_del(&priv->list);
up_write(&ppriv->vlan_rwsem);
}
static size_t ipoib_get_size(const struct net_device *dev)
{
return nla_total_size(2) + /* IFLA_IPOIB_PKEY */
nla_total_size(2) + /* IFLA_IPOIB_MODE */
nla_total_size(2); /* IFLA_IPOIB_UMCAST */
}
static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
.kind = "ipoib",
.maxtype = IFLA_IPOIB_MAX,
.policy = ipoib_policy,
.priv_size = sizeof(struct ipoib_dev_priv),
.setup = ipoib_setup,
.newlink = ipoib_new_child_link,
.changelink = ipoib_changelink,
.dellink = ipoib_unregister_child_dev,
.get_size = ipoib_get_size,
.fill_info = ipoib_fill_info,
};
int __init ipoib_netlink_init(void)
{
return rtnl_link_register(&ipoib_link_ops);
}
void __exit ipoib_netlink_fini(void)
{
rtnl_link_unregister(&ipoib_link_ops);
}
MODULE_ALIAS_RTNL_LINK("ipoib");

View file

@ -0,0 +1,297 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/slab.h>
#include "ipoib.h"
int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int set_qkey)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_attr *qp_attr = NULL;
int ret;
u16 pkey_index;
if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
ret = -ENXIO;
goto out;
}
set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
if (set_qkey) {
ret = -ENOMEM;
qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
if (!qp_attr)
goto out;
/* set correct QKey for QP */
qp_attr->qkey = priv->qkey;
ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY);
if (ret) {
ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret);
goto out;
}
}
/* attach QP to multicast group */
ret = ib_attach_mcast(priv->qp, mgid, mlid);
if (ret)
ipoib_warn(priv, "failed to attach to multicast group, ret = %d\n", ret);
out:
kfree(qp_attr);
return ret;
}
int ipoib_init_qp(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret;
struct ib_qp_attr qp_attr;
int attr_mask;
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
return -1;
qp_attr.qp_state = IB_QPS_INIT;
qp_attr.qkey = 0;
qp_attr.port_num = priv->port;
qp_attr.pkey_index = priv->pkey_index;
attr_mask =
IB_QP_QKEY |
IB_QP_PORT |
IB_QP_PKEY_INDEX |
IB_QP_STATE;
ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask);
if (ret) {
ipoib_warn(priv, "failed to modify QP to init, ret = %d\n", ret);
goto out_fail;
}
qp_attr.qp_state = IB_QPS_RTR;
/* Can't set this in a INIT->RTR transition */
attr_mask &= ~IB_QP_PORT;
ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask);
if (ret) {
ipoib_warn(priv, "failed to modify QP to RTR, ret = %d\n", ret);
goto out_fail;
}
qp_attr.qp_state = IB_QPS_RTS;
qp_attr.sq_psn = 0;
attr_mask |= IB_QP_SQ_PSN;
attr_mask &= ~IB_QP_PKEY_INDEX;
ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask);
if (ret) {
ipoib_warn(priv, "failed to modify QP to RTS, ret = %d\n", ret);
goto out_fail;
}
return 0;
out_fail:
qp_attr.qp_state = IB_QPS_RESET;
if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
ipoib_warn(priv, "Failed to modify QP to RESET state\n");
return ret;
}
int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_init_attr init_attr = {
.cap = {
.max_send_wr = ipoib_sendq_size,
.max_recv_wr = ipoib_recvq_size,
.max_send_sge = 1,
.max_recv_sge = IPOIB_UD_RX_SG
},
.sq_sig_type = IB_SIGNAL_ALL_WR,
.qp_type = IB_QPT_UD
};
int ret, size;
int i;
priv->pd = ib_alloc_pd(priv->ca);
if (IS_ERR(priv->pd)) {
printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name);
return -ENODEV;
}
priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(priv->mr)) {
printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name);
goto out_free_pd;
}
size = ipoib_recvq_size + 1;
ret = ipoib_cm_dev_init(dev);
if (!ret) {
size += ipoib_sendq_size;
if (ipoib_cm_has_srq(dev))
size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */
else
size += ipoib_recvq_size * ipoib_max_conn_qp;
}
priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
if (IS_ERR(priv->recv_cq)) {
printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name);
goto out_free_mr;
}
priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL,
dev, ipoib_sendq_size, 0);
if (IS_ERR(priv->send_cq)) {
printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name);
goto out_free_recv_cq;
}
if (ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP))
goto out_free_send_cq;
init_attr.send_cq = priv->send_cq;
init_attr.recv_cq = priv->recv_cq;
if (priv->hca_caps & IB_DEVICE_UD_TSO)
init_attr.create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK)
init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING)
init_attr.create_flags |= IB_QP_CREATE_NETIF_QP;
if (dev->features & NETIF_F_SG)
init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
priv->qp = ib_create_qp(priv->pd, &init_attr);
if (IS_ERR(priv->qp)) {
printk(KERN_WARNING "%s: failed to create QP\n", ca->name);
goto out_free_send_cq;
}
priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff;
priv->dev->dev_addr[3] = (priv->qp->qp_num ) & 0xff;
for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
priv->tx_sge[i].lkey = priv->mr->lkey;
priv->tx_wr.opcode = IB_WR_SEND;
priv->tx_wr.sg_list = priv->tx_sge;
priv->tx_wr.send_flags = IB_SEND_SIGNALED;
priv->rx_sge[0].lkey = priv->mr->lkey;
if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
priv->rx_sge[0].length = IPOIB_UD_HEAD_SIZE;
priv->rx_sge[1].length = PAGE_SIZE;
priv->rx_sge[1].lkey = priv->mr->lkey;
priv->rx_wr.num_sge = IPOIB_UD_RX_SG;
} else {
priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
priv->rx_wr.num_sge = 1;
}
priv->rx_wr.next = NULL;
priv->rx_wr.sg_list = priv->rx_sge;
return 0;
out_free_send_cq:
ib_destroy_cq(priv->send_cq);
out_free_recv_cq:
ib_destroy_cq(priv->recv_cq);
out_free_mr:
ib_dereg_mr(priv->mr);
ipoib_cm_dev_cleanup(dev);
out_free_pd:
ib_dealloc_pd(priv->pd);
return -ENODEV;
}
void ipoib_transport_dev_cleanup(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
if (priv->qp) {
if (ib_destroy_qp(priv->qp))
ipoib_warn(priv, "ib_qp_destroy failed\n");
priv->qp = NULL;
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
}
if (ib_destroy_cq(priv->send_cq))
ipoib_warn(priv, "ib_cq_destroy (send) failed\n");
if (ib_destroy_cq(priv->recv_cq))
ipoib_warn(priv, "ib_cq_destroy (recv) failed\n");
ipoib_cm_dev_cleanup(dev);
if (ib_dereg_mr(priv->mr))
ipoib_warn(priv, "ib_dereg_mr failed\n");
if (ib_dealloc_pd(priv->pd))
ipoib_warn(priv, "ib_dealloc_pd failed\n");
}
void ipoib_event(struct ib_event_handler *handler,
struct ib_event *record)
{
struct ipoib_dev_priv *priv =
container_of(handler, struct ipoib_dev_priv, event_handler);
if (record->element.port_num != priv->port)
return;
ipoib_dbg(priv, "Event %d on device %s port %d\n", record->event,
record->device->name, record->element.port_num);
if (record->event == IB_EVENT_SM_CHANGE ||
record->event == IB_EVENT_CLIENT_REREGISTER) {
queue_work(ipoib_workqueue, &priv->flush_light);
} else if (record->event == IB_EVENT_PORT_ERR ||
record->event == IB_EVENT_PORT_ACTIVE ||
record->event == IB_EVENT_LID_CHANGE) {
queue_work(ipoib_workqueue, &priv->flush_normal);
} else if (record->event == IB_EVENT_PKEY_CHANGE) {
queue_work(ipoib_workqueue, &priv->flush_heavy);
}
}

View file

@ -0,0 +1,209 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/seq_file.h>
#include <asm/uaccess.h>
#include "ipoib.h"
static ssize_t show_parent(struct device *d, struct device_attribute *attr,
char *buf)
{
struct net_device *dev = to_net_dev(d);
struct ipoib_dev_priv *priv = netdev_priv(dev);
return sprintf(buf, "%s\n", priv->parent->name);
}
static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL);
int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
u16 pkey, int type)
{
int result;
priv->max_ib_mtu = ppriv->max_ib_mtu;
/* MTU will be reset when mcast join happens */
priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu);
priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu;
set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
result = ipoib_set_dev_features(priv, ppriv->ca);
if (result)
goto err;
priv->pkey = pkey;
memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN);
priv->dev->broadcast[8] = pkey >> 8;
priv->dev->broadcast[9] = pkey & 0xff;
result = ipoib_dev_init(priv->dev, ppriv->ca, ppriv->port);
if (result < 0) {
ipoib_warn(ppriv, "failed to initialize subinterface: "
"device %s, port %d",
ppriv->ca->name, ppriv->port);
goto err;
}
result = register_netdevice(priv->dev);
if (result) {
ipoib_warn(priv, "failed to initialize; error %i", result);
goto register_failed;
}
priv->parent = ppriv->dev;
ipoib_create_debug_files(priv->dev);
/* RTNL childs don't need proprietary sysfs entries */
if (type == IPOIB_LEGACY_CHILD) {
if (ipoib_cm_add_mode_attr(priv->dev))
goto sysfs_failed;
if (ipoib_add_pkey_attr(priv->dev))
goto sysfs_failed;
if (ipoib_add_umcast_attr(priv->dev))
goto sysfs_failed;
if (device_create_file(&priv->dev->dev, &dev_attr_parent))
goto sysfs_failed;
}
priv->child_type = type;
priv->dev->iflink = ppriv->dev->ifindex;
list_add_tail(&priv->list, &ppriv->child_intfs);
return 0;
sysfs_failed:
result = -ENOMEM;
ipoib_delete_debug_files(priv->dev);
unregister_netdevice(priv->dev);
register_failed:
ipoib_dev_cleanup(priv->dev);
err:
return result;
}
int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
{
struct ipoib_dev_priv *ppriv, *priv;
char intf_name[IFNAMSIZ];
struct ipoib_dev_priv *tpriv;
int result;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
ppriv = netdev_priv(pdev);
snprintf(intf_name, sizeof intf_name, "%s.%04x",
ppriv->dev->name, pkey);
priv = ipoib_intf_alloc(intf_name);
if (!priv)
return -ENOMEM;
if (!rtnl_trylock())
return restart_syscall();
down_write(&ppriv->vlan_rwsem);
/*
* First ensure this isn't a duplicate. We check the parent device and
* then all of the legacy child interfaces to make sure the Pkey
* doesn't match.
*/
if (ppriv->pkey == pkey) {
result = -ENOTUNIQ;
goto out;
}
list_for_each_entry(tpriv, &ppriv->child_intfs, list) {
if (tpriv->pkey == pkey &&
tpriv->child_type == IPOIB_LEGACY_CHILD) {
result = -ENOTUNIQ;
goto out;
}
}
result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD);
out:
up_write(&ppriv->vlan_rwsem);
if (result)
free_netdev(priv->dev);
rtnl_unlock();
return result;
}
int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
{
struct ipoib_dev_priv *ppriv, *priv, *tpriv;
struct net_device *dev = NULL;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
ppriv = netdev_priv(pdev);
if (!rtnl_trylock())
return restart_syscall();
down_write(&ppriv->vlan_rwsem);
list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
if (priv->pkey == pkey &&
priv->child_type == IPOIB_LEGACY_CHILD) {
unregister_netdevice(priv->dev);
list_del(&priv->list);
dev = priv->dev;
break;
}
}
up_write(&ppriv->vlan_rwsem);
rtnl_unlock();
if (dev) {
free_netdev(dev);
return 0;
}
return -ENODEV;
}