blob: 21602edd1b65413a7b379901a799ac0ebc8bde61 [file] [log] [blame]
/*********************************************************************
*
* Description: Blue Gene driver exposing col and torus as a NIC
*
* Copyright (c) 2007, 2010 International Business Machines
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, see <http://www.gnu.org/licenses>.
*
* Authors:
* Chris Ward <tjcw@uk.ibm.com>
* Volkmar Uhlig <vuhlig@us.ibm.com>
* Andrew Tauferner <ataufer@us.ibm.com>
*
********************************************************************/
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/fcntl.h>
#include <linux/init.h>
#include <linux/poll.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/interrupt.h>
#include <linux/ioport.h>
#include <linux/mm.h>
#include <linux/cdev.h>
#include <linux/proc_fs.h>
#include <linux/etherdevice.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/ip.h>
#include <linux/workqueue.h>
#include <asm/pgtable.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/io.h>
#include <asm/page.h>
#include <asm/bgp_personality.h>
#include <asm/delay.h>
#include <asm/bluegene.h>
#include "bglink.h"
#include "bgnet.h"
#include "bgcol.h"
/* #include "bgtor.h" */
/**********************************************************************
* defines
**********************************************************************/
#define DRV_NAME "bgnet"
#define DRV_VERSION "0.5"
#define DRV_DESC "Blue Gene NIC (IBM)"
MODULE_DESCRIPTION(DRV_DESC);
MODULE_AUTHOR("IBM");
/* #define TRUST_TREE_CRC */
#include <linux/KernelFxLog.h>
#include "../bgp_network/bgp_net_traceflags.h"
#define XTRACEN(i,x...)
#if defined(REQUIRE_TRACE)
#define TRACE(x...) { printk(KERN_EMERG x) ; }
#define TRACE1(x...) { printk(KERN_EMERG x) ; }
#define TRACE2(x...) { printk(KERN_EMERG x) ; }
#define TRACEN(i,x...) { printk(KERN_EMERG x) ; }
#define TRACED(x...) { printk(KERN_EMERG x) ; }
#elif defined(CONFIG_BLUEGENE_COLLECTIVE_TRACE)
#define TRACE(x...) KernelFxLog(bgcol_debug_tracemask & k_t_general,x)
#define TRACE1(x...) KernelFxLog(bgcol_debug_tracemask & k_t_lowvol,x)
#define TRACE2(x...) KernelFxLog(bgcol_debug_tracemask & k_t_detail,x)
#define TRACEN(i,x...) KernelFxLog(bgcol_debug_tracemask & (i),x)
#define TRACED(x...) KernelFxLog(1,x)
#define TRACES(x...) KernelFxLog(1,x)
#else
#define TRACE(x...)
#define TRACE1(x...)
#define TRACE2(x...)
#define TRACEN(i,x...)
#define TRACED(x...)
#define TRACES(x...)
#endif
/* An IPv4 address for slotting into a trace message */
#define NIPQ(X) ((X)>>24)&0xff,((X)>>16)&0xff,((X)>>8)&0xff,(X)&0xff
#define BGNET_FRAG_MTU 240
#define BGNET_MAX_MTU (BGNET_FRAG_MTU * 254)
#define BGNET_DEFAULT_MTU ETH_DATA_LEN
static BGP_Personality_t bgnet_personality;
/* static struct net_device *static_dev ; */
/* static struct bglink_proto bgnet_lnk; */
/* static DEFINE_SPINLOCK(bgnet_lock); */
static LIST_HEAD(bgnet_list);
struct skb_cb_lnk {
struct bglink_hdr_col lnkhdr;
union bgcol_header dest;
};
int bgtorus_start_xmit(struct sk_buff *skb, struct net_device *dev, unsigned int x, unsigned int y, unsigned int z) ;
/**********************************************************************
* Linux module
**********************************************************************/
MODULE_DESCRIPTION("BlueGene Ethernet driver");
MODULE_LICENSE("GPL");
int bgnic_driverparm = 0 ;
static void dumpmem(const void *address, unsigned int length, const char * label)
{
int x ;
TRACEN(k_t_fifocontents,"Memory dump, length=%d: %s",length,label) ;
if( length > 256 ) {
length = 256 ;
}
for (x=0;x<length;x+=32)
{
int *v = (int *)(address+x) ;
TRACEN(k_t_fifocontents,"%p: %08x %08x %08x %08x %08x %08x %08x %08x",
v,v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]
) ;
}
}
/**********************************************************************
* Linux' packet and skb management
**********************************************************************/
static int bgnet_open(struct net_device* dev)
{
struct bgnet_dev* bgnet = (struct bgnet_dev*) netdev_priv(dev);
bgcol_eth_up(bgnet->bgcol) ; /* Indicate that we want to operate as ethernet */
/* bgcol_enable_rcv_wm_interrupt(&bgnet->col->chn[bgnet->col_channel]); */
TRACEN(k_t_napi,"netif_start_queue(dev=%p)",dev) ;
netif_start_queue(dev);
return 0;
}
static int bgnet_stop(struct net_device* dev)
{
struct bgnet_dev* bgnet = (struct bgnet_dev*) netdev_priv(dev);
bgcol_eth_down(bgnet->bgcol) ; /* Indicate that we want to stop operating as ethernet */
TRACEN(k_t_napi,"netif_stop_queue(dev=%p)",dev) ;
netif_stop_queue(dev);
/* bgcol_disable_rcv_wm_interrupt(&bgnet->col->chn[bgnet->col_channel]); */
/* bgcol_disable_inj_wm_interrupt(&bgnet->col->chn[bgnet->col_channel]); */
return 0;
}
static int bgnet_change_mtu(struct net_device *dev, int new_mtu)
{
struct bgnet_dev *bgnet = netdev_priv(dev);
if (new_mtu < 60 || new_mtu > BGNET_MAX_MTU )
return -EINVAL;
dev->mtu = new_mtu;
bgcol_set_mtu(bgnet->bgcol, new_mtu+sizeof(struct ethhdr)) ;
return 0;
}
static inline void stamp_checksum_place_in_skb(struct sk_buff *skb)
{
struct ethhdr *eth = (struct ethhdr *)skb->data;
unsigned int eth_proto = eth->h_proto ;
struct iphdr *iph = (struct iphdr *)((skb->data)+sizeof(struct ethhdr)) ;
/* unsigned int iphlen = 4*iph->ihl ; */
/* struct tcphdr *tcph = (struct tcphdr *) ( ((char *)(iph)) + (iphlen) ); */
/* struct udphdr *udph = (struct udphdr *) ( ((char *)(iph)) + (iphlen) ); */
unsigned int ip_proto = iph->protocol ;
skb->csum_start = skb_transport_header(skb) - skb->head;
if( eth_proto == ETH_P_IP) {
if( ip_proto == IPPROTO_TCP) skb->csum_offset = offsetof(struct tcphdr, check);
else if( ip_proto == IPPROTO_UDP) skb->csum_offset = offsetof(struct udphdr, check);
}
}
/*
* The hardware data rate on 'collective' is 6 bits/cycle, i.e. 5100Mb/s .
* We carry 240 bytes of payload in each 256 byte packet, and there are some bytes of 'overhead' as well
* (CRC, opcode, and a few others); giving a 'peak performance' TCP/IP data rate of a little under 4781 Mb/s .
* The 'collective' hardware should be able to do this in both directions simultaneously.
*
* Driving data into the compute fabric from the 10gE link can achieve more or less this, by using one core as
* interrupt handler for the 10gE and another core as interrupt handler for the collective, if you run (say)
* 16 TCP/IP sessions through the 10gE and the IO node, one to each compute node in the PSET.
*
* Driving data out of the compute fabric and into the 10gE in the normal way for linux device drivers causes
* the core handling the collective interrupt to go 100% busy; there are not enough cycles to drain the collective
* FIFO and also go through the linux networking stack. I have seen about 4Gb/s this way.
* To get the last 15% or so, it seems necessary to have more than one core helping with this work.
*
* I'm trying to do this by having one core handle the 'collective' interrupt and drain the FIFO, and then
* hand the sk_buff off to another core via a 'work queue', so that this second core can drive the linux
* network stack.
*
* I haven't measured the simultaneous-bidirectional data rate capability.
*
*/
static int bgnet_receive(struct bg_col *bgcol, struct sk_buff *skb, struct bglink_hdr_col *lnkhdr, struct bglink_proto* proto)
{
TRACE("(>) skb=%p lnkhdr=%p proto=%p", skb,lnkhdr,proto) ;
if( skb != NULL && lnkhdr != NULL && proto != NULL && -1 != (int) proto )
{
struct net_device *dev = (struct net_device*)proto->private;
struct bgnet_dev *bgnet = netdev_priv(dev);
/* struct net_device *dev = (struct net_device*)((void *)bgnet - */
/* netdev_priv(NULL)); */
TRACE("bgnet rcvd pkt: data=%p, len=%d, head=%d, tail=%d, res len=%d [%s:%d]",
skb->data, skb->len, lnkhdr->opt.opt_net.pad_head,
lnkhdr->opt.opt_net.pad_tail, skb->len - lnkhdr->opt.opt_net.pad_head - lnkhdr->opt.opt_net.pad_tail, __func__, __LINE__);
/* if (skb->len % BGNET_FRAG_MTU != 0) */
/* printk("bgnet: received packet size not multiple of %d\n", BGNET_FRAG_MTU); */
/* skb_pull and trim check for over/underruns. For 0 size the
* add/subtract is the same as a test */
__skb_pull(skb, lnkhdr->opt.opt_net.pad_head);
__skb_trim(skb, skb->len - lnkhdr->opt.opt_net.pad_tail);
if (lnkhdr->src_key == bgnet->bgcol_vector) {
/* drop ether packets that are from ourselves */
/* bg tree device sends packets to itself when broadcasting */
kfree_skb(skb);
return 0;
}
/* dump_skb(skb); */
dumpmem(skb->data,skb->len,"Frame delivered via collective") ;
skb->dev = dev;
skb->protocol = eth_type_trans(skb, dev);
if ( k_trust_collective_crc) skb->ip_summed = CHECKSUM_PARTIAL ;
stamp_checksum_place_in_skb(skb) ;
/* #if defined(TRUST_TREE_CRC) */
/* skb->ip_summed = CHECKSUM_PARTIAL ; // Frame was checked by CRC, but we would need a checksum if it is being forwarded off the BGP fabric */
/* // // Packets from tree-local addresses have been verified by tree hardware */
/* // { */
/* // struct ethhdr *eth = (struct ethhdr *)skb->data; */
/* // if (bgnet->eth_mask == 0 || */
/* // ((bgnet->eth_mask & *(unsigned int *)(&eth->h_source[0])) == */
/* // (bgnet->eth_local))) */
/* // { */
/* // skb->ip_summed = CHECKSUM_UNNECESSARY ; */
/* // } */
/* // else */
/* // { */
/* // skb->ip_summed = CHECKSUM_NONE ; */
/* // } */
/* // } */
/* #endif */
TRACE("Delivering skb->dev=%p skb->protocol=%d skb->pkt_type=%d skb->ip_summed=%d ",
skb->dev, skb->protocol, skb->pkt_type, skb->ip_summed ) ;
dumpmem(skb->data,skb->len,"Frame after stripping header") ;
dev->last_rx = jiffies;
bgnet->stats.rx_packets++;
bgnet->stats.rx_bytes += skb->len;
TRACE("bgnet_receive before-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.tx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.tx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
bgnet->stats.rx_packets, bgnet->stats.tx_packets, bgnet->stats.rx_bytes, bgnet->stats.tx_bytes, bgnet->stats.rx_frame_errors) ;
/* TRACEN(k_t_napi,"netif_rx(skb=%p)",skb) ; // Only tracing the torus ... */
/* if( k_deliver_via_workqueue && bgnet->bgcol->deliver_via_workqueue ) */
/* { */
/* bgnet_deliver_via_workqueue(skb) ; */
/* } */
/* else */
/* { */
#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
if( bgcol_diagnostic_use_napi)
{
{
TRACEN(k_t_napi|k_t_request,"netif_receive_skb(%p)",skb) ;
netif_receive_skb(skb) ;
}
}
else
{
netif_rx(skb);
}
#else
netif_rx(skb);
#endif
/* } */
TRACE("bgnet_receive after-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ;
}
TRACE("(<)") ;
return 0;
}
static int bgnet_receive_trimmed(struct bg_col *bgcol, struct sk_buff *skb, struct bglink_proto* proto, unsigned int src_key )
{
TRACE("(>) skb=%p proto=%p", skb,proto) ;
if( skb != NULL && proto != NULL && -1)
{
struct net_device *dev = (struct net_device*)proto->private;
struct bgnet_dev *bgnet = netdev_priv(dev);
/* struct net_device *dev = (struct net_device*)((void *)bgnet - */
/* netdev_priv(NULL)); */
TRACE("bgnet rcvd pkt: data=%p, len=%d",
skb->data, skb->len);
if( src_key != bgnet->bgcol_vector)
{
dumpmem(skb->data,skb->len,"Frame delivered via collective") ;
skb->dev = dev;
skb->protocol = eth_type_trans(skb, dev);
if ( k_trust_collective_crc) skb->ip_summed = CHECKSUM_PARTIAL ;
stamp_checksum_place_in_skb(skb) ;
TRACE("Delivering skb->dev=%p skb->protocol=%d skb->pkt_type=%d skb->ip_summed=%d ",
skb->dev, skb->protocol, skb->pkt_type, skb->ip_summed ) ;
dumpmem(skb->data,skb->len,"Frame after stripping header") ;
dev->last_rx = jiffies;
bgnet->stats.rx_packets++;
bgnet->stats.rx_bytes += skb->len;
TRACE("bgnet_receive before-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.tx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.tx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
bgnet->stats.rx_packets, bgnet->stats.tx_packets, bgnet->stats.rx_bytes, bgnet->stats.tx_bytes, bgnet->stats.rx_frame_errors) ;
/* TRACEN(k_t_napi,"netif_rx(skb=%p)",skb) ; // Only tracing the torus ... */
#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
if( bgcol_diagnostic_use_napi)
{
{
TRACEN(k_t_napi|k_t_request,"netif_receive_skb(%p)",skb) ;
netif_receive_skb(skb) ;
}
}
else
{
netif_rx(skb);
}
#else
netif_rx_ni(skb); // In a workqueue handler ...
#endif
TRACE("bgnet_receive after-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ;
}
else
{
/* a discardable self-send */
dev_kfree_skb(skb) ;
}
}
TRACE("(<)") ;
return 0;
}
/* A packet gets to the IO node, and needs 'reflecting' to the compute node(s) that want it. */
static int col_reflect(struct bg_col *bgcol, struct sk_buff *skb, struct bglink_hdr_col *lnkhdr,
struct bglink_proto* proto)
{
TRACE("(>) col_reflect skb=%p lnkhdr=%p proto=%p", skb,lnkhdr,proto) ;
if( skb != NULL && lnkhdr != NULL && proto != NULL && -1 != (int) proto )
{
struct net_device *dev = (struct net_device*)proto->private;
struct bgnet_dev *bgnet = netdev_priv(dev);
TRACE("bgnet rcvd pkt for reflection: data=%p, len=%d, head=%d, tail=%d, res len=%d [%s:%d]",
skb->data, skb->len, lnkhdr->opt.opt_net.pad_head,
lnkhdr->opt.opt_net.pad_tail, skb->len - lnkhdr->opt.opt_net.pad_head - lnkhdr->opt.opt_net.pad_tail, __func__, __LINE__);
/* if (skb->len % BGNET_FRAG_MTU != 0) */
/* printk("bgnet: received packet size not multiple of %d\n", BGNET_FRAG_MTU); */
/* skb_pull and trim check for over/underruns. For 0 size the
* add/subtract is the same as a test */
__skb_pull(skb, lnkhdr->opt.opt_net.pad_head);
__skb_trim(skb, skb->len - lnkhdr->opt.opt_net.pad_tail);
/* A 'broadcast' packet needs delivering locally as well as reflecting */
{
struct ethhdr *eth = (struct ethhdr *)skb->data;
if (is_broadcast_ether_addr(eth->h_dest)) {
struct sk_buff *localskb = skb_clone(skb, GFP_KERNEL);
if( localskb )
{
dumpmem(localskb->data,localskb->len,"Frame delivered via tree (broadcast reflection)") ;
localskb->dev = dev;
localskb->protocol = eth_type_trans(localskb, dev);
localskb->ip_summed = CHECKSUM_UNNECESSARY ; /* Packet was from tree, h/w verified it */
TRACE("Delivering localskb->dev=%p localskb->protocol=%d localskb->pkt_type=%d localskb->ip_summed=%d ",
localskb->dev, localskb->protocol, localskb->pkt_type, localskb->ip_summed ) ;
dumpmem(localskb->data,localskb->len,"Frame after stripping header") ;
dev->last_rx = jiffies;
bgnet->stats.rx_packets++;
bgnet->stats.rx_bytes += localskb->len;
TRACE("col_reflect before-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ;
/* TRACEN(k_t_napi,"netif_rx(skb=%p)",localskb) ; // Only tracing the torus ... */
#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
if( bgcol_diagnostic_use_napi)
{
TRACEN(k_t_napi,"netif_receive_skb(%p)",localskb) ;
netif_receive_skb(localskb) ;
}
else
{
netif_rx(localskb);
}
#else
netif_rx(localskb) ;
#endif
TRACE("col_reflect after-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ;
}
}
}
/* dump_skb(skb); */
col_start_xmit(skb, dev) ;
}
TRACE("(<) col_reflect") ;
return 0;
}
/* A packet gets to the IO node, and needs 'reflecting' to the compute node(s) that want it. */
static int col_reflect_trimmed(struct bg_col *bgcol, struct sk_buff *skb,
struct bglink_proto* proto, unsigned int src_key )
{
TRACE("(>) col_reflect skb=%p proto=%p", skb,proto) ;
if( skb != NULL && proto != NULL && -1 != (int) proto )
{
struct net_device *dev = (struct net_device*)proto->private;
struct bgnet_dev *bgnet = netdev_priv(dev);
TRACE("bgnet rcvd pkt for reflection: data=%p, len=%d",
skb->data, skb->len);
/* A 'broadcast' packet needs delivering locally as well as reflecting */
{
struct ethhdr *eth = (struct ethhdr *)skb->data;
if (is_broadcast_ether_addr(eth->h_dest)) {
struct sk_buff *localskb = skb_clone(skb, GFP_KERNEL);
if( localskb )
{
dumpmem(localskb->data,localskb->len,"Frame delivered via tree (broadcast reflection)") ;
localskb->dev = dev;
localskb->protocol = eth_type_trans(localskb, dev);
localskb->ip_summed = CHECKSUM_UNNECESSARY ; /* Packet was from tree, h/w verified it */
TRACE("Delivering localskb->dev=%p localskb->protocol=%d localskb->pkt_type=%d localskb->ip_summed=%d ",
localskb->dev, localskb->protocol, localskb->pkt_type, localskb->ip_summed ) ;
dumpmem(localskb->data,localskb->len,"Frame after stripping header") ;
dev->last_rx = jiffies;
bgnet->stats.rx_packets++;
bgnet->stats.rx_bytes += localskb->len;
TRACE("col_reflect before-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ;
/* TRACEN(k_t_napi,"netif_rx(skb=%p)",localskb) ; // Only tracing the torus ... */
#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
if( bgcol_diagnostic_use_napi)
{
TRACEN(k_t_napi,"netif_receive_skb(%p)",localskb) ;
netif_receive_skb(localskb) ;
}
else
{
netif_rx(localskb);
}
#else
netif_rx(localskb) ;
#endif
TRACE("col_reflect after-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ;
}
}
}
/* dump_skb(skb); */
col_start_xmit(skb, dev) ;
}
TRACE("(<) col_reflect") ;
return 0;
}
#ifdef CONFIG_NET_POLL_CONTROLLER
static void bgnet_poll(struct net_device *dev)
{
/* no-op; packets are fed by the col device */
}
#endif
static inline int is_torus_ether_addr(const u8 *addr)
{
return ((addr[0] & 0x7) == 0x6);
}
unsigned int find_xyz_address(unsigned int ip) ;
static int bgnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
col_start_xmit(skb, dev) ;
return 0 ;
}
static void bgnet_uninit(struct net_device *dev)
{
struct bgnet_dev *bgnet = netdev_priv(dev);
bglink_unregister_proto(&bgnet->lnk);
bglink_unregister_proto(&bgnet->lnkreflect);
}
static struct net_device_stats *bgnet_get_stats(struct net_device *dev)
{
struct bgnet_dev* bgnet = netdev_priv(dev);
return &bgnet->stats;
}
static int bgnet_set_mac_addr(struct net_device* netDev,
void* p)
{
struct sockaddr* addr = p;
if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL;
memcpy(netDev->dev_addr, addr->sa_data, netDev->addr_len);
return 0;
}
static int bgnet_set_config(struct net_device* netDev,
struct ifmap* map)
{
int rc = 0;
struct bgnet_dev* bgnet = netdev_priv(netDev);
/* Set this with ifconfig <interface> port <collective virtual channel> */
if (map->port)
bgnet->bgcol_channel = map->port;
/* Set this with ifconfig <interface> io_addr <collective route> */
if (map->base_addr)
bgnet->bgcol_route = map->base_addr;
return rc;
}
static int bgnet_init(struct net_device *dev)
{
struct bgnet_dev *bgnet = netdev_priv(dev);
TRACE("(>) bgnet_init") ;
bgnet->bgcol_route = 0 /*15*/;
#define ETH_COL_CHANNEL 0
bgnet->bgcol_channel = 0 ;
/* bgnet->bgcol_channel = (bgnet_personality.Block_Config & BGP_PERS_BLKCFG_IPOverCollectiveVC) ? 1 : 0; */
/* bgnet->eth_bridge_vector = -1; */
/* bgnet->link_protocol = BGLINK_P_NET; */
/* bgnet->net_device = dev; */
bgnet->bgcol = bgcol_get_dev();
TRACE("(=) bgnet->bgcol=%p",bgnet->bgcol) ;
if (!bgnet->bgcol)
return -1;
bgnet->bgcol->bgnet_channel = bgnet->bgcol_channel ;
/* bgnet->phandle_tree = 3; */
/* bgnet->phandle_torus = 0; */
/* // bgnet->tree_route = 15; // 15 is 'partition flood' */
/* bgnet->tree_route = 0 ; // 0 is 'compute to IO' or 'IO to compute' */
/* bgnet->tree_channel = BGNET_TREE_CHANNEL ; */
/* bgnet->eth_mask = 0; */
/* // bgnet->eth_bridge_vector = 0; // route through the I/O node? (personality.Network_Config.IONodeRank) */
/* bgnet->eth_bridge_vector = personality.Network_Config.IOnodeRank; // route through the I/O node? (personality.Network_Config.IONodeRank) */
bgnet->eth_bridge_vector = bgnet_personality.Network_Config.IOnodeRank; /* route through the I/O node? (personality.Network_Config.IONodeRank) */
bgnet->bgcol_protocol = 1;
bgnet->bgcol_reflector_protocol = 2 ; /* CN requests reflection from ION */
if( bgnet_personality.Network_Config.Rank != bgnet_personality.Network_Config.IOnodeRank)
{
// On compute nodes, run a global interrupt barrier here with a view to aligning the printk timestamps
bgcol_align_timebase() ;
}
/* bgnet->i_am_ionode = ( personality.Network_Config.IOnodeRank == personality.Network_Config.Rank) ; */
#if 0
p = get_property(np, "local-mac-address", NULL);
if (p == NULL) {
printk(KERN_ERR "%s: Can't find local-mac-address property\n",
np->full_name);
goto err;
}
memcpy(dev->dev_addr, p, 6);
#endif
dev->dev_addr[0] = 0x00;
dev->dev_addr[1] = 0x80;
*((unsigned*)(&dev->dev_addr[2])) = 0x46000000u | bgnet_personality.Network_Config.Rank; /* why 0x46yyyyyy ??? */
bgnet->bgcol_vector = *(unsigned int *)(&dev->dev_addr[2]);
bgnet->eth_local = bgnet->eth_mask & *(unsigned int *)&dev->dev_addr[0];
/* spin_lock(&bgnet_lock); */
if (list_empty(&bgnet_list)) {
/* register with col */
/* bgnet_lnk.lnk_proto = bgnet->link_protocol; */
/* bgnet_lnk.receive_from_self = 0; */
/* bgnet_lnk.col_rcv = col_receive; */
/* bgnet_lnk.col_flush = col_flush; */
/* bgnet_lnk.torus_rcv = torus_receive; */
/* bglink_register_proto(&bgnet_lnk); */
bgnet->lnk.lnk_proto = bgnet->bgcol_protocol;
bgnet->lnk.col_rcv = bgnet_receive;
bgnet->lnk.col_rcv_trimmed = bgnet_receive_trimmed;
bgnet->lnk.private = dev;
bglink_register_proto(&bgnet->lnk);
bgnet->lnkreflect.lnk_proto = bgnet->bgcol_reflector_protocol;
bgnet->lnkreflect.col_rcv = col_reflect;
bgnet->lnkreflect.col_rcv_trimmed = col_reflect_trimmed;
bgnet->lnkreflect.private = dev;
bglink_register_proto(&bgnet->lnkreflect);
/* Hook for the tree interrupt handler to find the 'bgnet' */
bgnet->bgcol->bgnet = bgnet ;
}
/* list_add_rcu(&bgnet->list, &bgnet_list); */
/* */
/* spin_unlock(&bgnet_lock); */
/* */
/* skb_queue_head_init(&bgnet->pending_skb_list); */
bgcol_enable_interrupts(bgnet->bgcol) ; /* Should be able to run tree interrupts now */
TRACE("(<) bgnet_init") ;
return 0;
}
#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
static int bgnet_poll_napi(struct napi_struct * napi, int budget)
{
TRACEN(k_t_napi,"(>) napi=%p budget%d",napi,budget) ;
bgcol_duplex_slih(0) ;
TRACEN(k_t_napi,"(<)") ;
return 0 ;
}
#endif
#if defined(HAVE_NET_DEVICE_OPS)
static const struct net_device_ops netdev_ops = {
.ndo_change_mtu = bgnet_change_mtu ,
.ndo_get_stats = bgnet_get_stats ,
.ndo_start_xmit = bgnet_start_xmit ,
.ndo_init = bgnet_init ,
.ndo_uninit = bgnet_uninit ,
.ndo_open = bgnet_open ,
.ndo_stop = bgnet_stop ,
.ndo_set_config = bgnet_set_config ,
.ndo_set_mac_address = bgnet_set_mac_addr,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = bgnet_poll,
#endif
};
#endif
static int __init
bgnet_module_init(void)
{
struct bgnet_dev *bgnet;
struct net_device *dev;
TRACEN(k_t_general, "(>) bgnet_module_init") ;
dev = alloc_etherdev(sizeof(struct bgnet_dev));
TRACEN(k_t_general, "(=) bgnet_module_init dev=%p", dev) ;
if (!dev)
return -ENOMEM;
/* SET_MODULE_OWNER(dev); // Anachronism */
/* Read personality. */
bluegene_getPersonality((void*) &bgnet_personality, sizeof(bgnet_personality));
bgnet = (struct bgnet_dev*) netdev_priv(dev);
memset(bgnet, 0, sizeof(*bgnet));
bgcol_module_init() ;
/* bgnet_init(dev); */
/* // Set the MAC address for this interface. */
/* if (bluegene_isIONode()) { */
/* unsigned char ipOctet2 = (bgnet_personality.Ethernet_Config.IPAddress.octet[13] + 1) & 0xfc; */
/* */
/* dev->dev_addr[0] = ipOctet2 | 2; */
/* dev->dev_addr[1] = bgnet_personality.Ethernet_Config.IPAddress.octet[14]; */
/* dev->dev_addr[2] = bgnet_personality.Ethernet_Config.IPAddress.octet[15]; */
/* dev->dev_addr[3] = ((bgnet_personality.Network_Config.Rank >> 16) & 0x3f) | (ipOctet2 << 6); */
/* dev->dev_addr[4] = (unsigned char) ((bgnet_personality.Network_Config.Rank >> 8)); */
/* dev->dev_addr[5] = (unsigned char) bgnet_personality.Network_Config.Rank; */
/* } else */
/* memcpy(dev->dev_addr, bgnet_personality.Ethernet_Config.EmacID, sizeof(dev->dev_addr)); */
#if defined(HAVE_NET_DEVICE_OPS)
dev->netdev_ops = &netdev_ops ;
#else
dev->init = bgnet_init;
dev->uninit = bgnet_uninit;
dev->get_stats = bgnet_get_stats;
dev->hard_start_xmit = bgnet_start_xmit;
dev->change_mtu = bgnet_change_mtu;
dev->open = bgnet_open;
dev->stop = bgnet_stop;
dev->set_config = bgnet_set_config;
dev->set_mac_address = bgnet_set_mac_addr;
#ifdef CONFIG_NET_POLL_CONTROLLER
dev->poll_controller = bgnet_poll;
#endif
#endif
dev->mtu = BGNET_DEFAULT_MTU;
/* Tried turning checksum generation off, but this resulted in packets routed off the BGP not having checksums */
/* and lack of interoperability with front-end nodes */
/* (try CHECKSUM_PARTIAL above to see if the TOMAL will generate an IP checksum in this circumstance) */
dev->features = k_trust_collective_crc
? (NETIF_F_HIGHDMA | NETIF_F_NO_CSUM)
: NETIF_F_HIGHDMA ;
/* if( k_trust_collective_crc) */
/* { */
/* dev->features = NETIF_F_HIGHDMA | NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM ; */
/* } */
/* else */
/* { */
/* dev->features = NETIF_F_HIGHDMA ; */
/* } */
/* #if defined(TRUST_TREE_CRC) */
/* dev->features = NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA ; */
/* #endif */
/* dev->features |= NETIF_F_NO_CSUM; */
TRACEN(k_t_general,"(=) dev->name=%s",
dev->name
) ;
{
int rc = register_netdev(dev) ;
TRACEN(k_t_general, "(=) bgnet_module_init register_netdev rc=%d", rc) ;
if( rc != 0 )
goto err;
}
#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
netif_napi_add(dev,&bgnet->napi, bgnet_poll_napi, k_collective_budget) ;
napi_enable(&bgnet->napi) ;
#endif
/* increase header size to fit torus hardware header */
/* if (bgnet->torus) */
/* dev->hard_header_len += 16; */
if (bgnet->eth_bridge_vector != -1)
printk(KERN_INFO " bridge 0x%06x\n", bgnet->eth_bridge_vector);
TRACEN(k_t_general, "(<) bgnet_module_init rc=0") ;
return 0;
err:
free_netdev(dev);
TRACEN(k_t_general, "(<) bgnet_module_init err rc=-1") ;
return -1;
}
/* static void __exit */
/* bgnet_module_exit (void) */
/* { */
/* return; */
/* } */
module_init(bgnet_module_init);
/* module_exit(bgnet_module_exit); */