blob: 07b0aa9a9bfb8d64dac91cd30b0d987b6f340632 [file] [log] [blame]
/*********************************************************************
*
* (C) Copyright IBM Corp. 2010
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, see <http://www.gnu.org/licenses>.
*
* Author: Chris Ward <tjcw@uk.ibm.com>
*
* Description: Blue Gene low-level driver for sockets over torus
*
*
* Intent: Carry torus packets as messages into memory FIFOs, and interpret them
* as eth frames for TCP
* Later on, add token-based flow control with a view to preventing
* congestion collapse as the machine gets larger and the loading gets higher
*
********************************************************************/
#define REQUIRES_DUMPMEM
#include <linux/version.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/fcntl.h>
#include <linux/init.h>
#include <linux/poll.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/interrupt.h>
#include <linux/ioport.h>
#include <linux/mm.h>
#include <linux/cdev.h>
#include <linux/proc_fs.h>
#include <linux/highmem.h>
#include <linux/mman.h>
#include <linux/syscalls.h>
#include <linux/skbuff.h>
#include <linux/etherdevice.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <asm/pgtable.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/io.h>
#include <asm/page.h>
#include <asm/time.h>
#include <asm/bitops.h>
#include <linux/vmalloc.h>
#include <linux/dma-mapping.h>
#include <net/inet_connection_sock.h>
#include <net/inet_sock.h>
#include <net/inet_hashtables.h>
#include <net/tcp.h>
/* #include "bglink.h" */
#include <spi/linux_kernel_spi.h>
#include <asm/time.h>
/* #define CONFIG_BLUEGENE_TORUS_TRACE */
/* #define CRC_CHECK_FRAMES */
#define VERIFY_TARGET
/* #define SIDEBAND_TIMESTAMP */
#include "bgp_dma_tcp.h"
/* void bgp_dma_diag_reissue_rec_counters(dma_tcp_t *dma_tcp) */
/* { */
/* unsigned int x; */
/* for(x=0;x<DMA_NUM_COUNTERS_PER_GROUP;x+=1) */
/* { */
/* struct sk_buff *skb=dma_tcp->rcv_skbs[x] ; */
/* if( skb) */
/* { */
/* frame_injection_cb * ficb = (frame_injection_cb *) skb->cb ; */
/* TRACEN(k_t_general,"Redriving x=%d skb=%p",x,skb) ; */
/* inject_dma_descriptor_propose_accept(dma_tcp,0,&ficb->desc) ; */
/* } */
/* } */
/* } */
static inline void show_tx_skbs(tx_t *tx, unsigned int node_count)
{
unsigned int slot_index ;
unsigned int conn_id ;
unsigned int tx_skb_count = 0 ;
for(slot_index=0;slot_index<node_count;slot_index += 1)
{
for( conn_id=0;conn_id < k_connids_per_node;conn_id += 1)
{
struct sk_buff * skb=get_tx_skb(tx,slot_index,conn_id) ;
if(skb)
{
struct ethhdr *eth = (struct ethhdr *)(skb->data) ;
struct iphdr *iph = (struct iphdr *) (eth+1) ;
unsigned int tot_len=iph->tot_len ;
unsigned int daddr=iph->daddr ;
tx_skb_count += 1 ;
TRACEN(k_t_request,"(---) slot_index=0x%08x conn_id=0x%02x skb=%p tot_len=0x%04x daddr=%d.%d.%d.%d",
slot_index,conn_id,skb,tot_len,daddr>>24, (daddr >> 16) & 0xff,(daddr >> 8) & 0xff, daddr & 0xff) ;
}
}
}
TRACEN(k_t_request,"tx_skb_count=%d",tx_skb_count) ;
}
void dma_tcp_show_reception(dma_tcp_t * dma_tcp)
{
int x ;
int slot ;
unsigned int inUseCount = 0 ;
TRACEN(k_t_request,"rec hitZero 0x%08x 0x%08x",DMA_CounterGetHitZero(&dma_tcp->recCounterGroup,0),DMA_CounterGetHitZero(&dma_tcp->recCounterGroup,1)) ;
for(x=0;x<DMA_NUM_COUNTERS_PER_GROUP;x+=1)
{
bgp_dma_tcp_counter_copies[x] = DMA_CounterGetValueNoMsync(dma_tcp->recCounterGroup.counter+x) ;
if( bgp_dma_tcp_counter_copies[x] != 0 || dma_tcp->recCntrInUse[x] != 0)
{
inUseCount += 1 ;
TRACEN(k_t_request,"rec_counter[0x%02x] value=0x%08x inUse=%d", x,bgp_dma_tcp_counter_copies[x],dma_tcp->recCntrInUse[x]) ;
if(dma_tcp->recCntrInUse[x])
{
dma_tcp_show_reception_one(dma_tcp,x,bgp_dma_tcp_counter_copies[x]) ;
/* struct sk_buff *skb=dma_tcp->rcv_skbs[x] ; */
/* if( skb) */
/* { */
/* struct ethhdr *eth = (struct ethhdr *)(skb->data) ; */
/* unsigned int eth_proto = eth->h_proto ; */
/* */
/* struct iphdr *iph = (struct iphdr *) (eth+1) ; */
/* unsigned int tot_len=iph->tot_len ; */
/* unsigned int saddr=iph->saddr ; */
/* if( tot_len != tot_len_for_rcv[x]) */
/* { */
/* TRACEN(k_t_error,"(!!!) tot_len trampled") ; */
/* } */
/* */
/* TRACEN(k_t_request,"(---) skb=%p eth_proto=0x%04x tot_len=0x%04x saddr=%d.%d.%d.%d slot=0x%08x conn_id=0x%02x tot_len_for_rcv=0x%04x", */
/* skb,eth_proto,tot_len,saddr>>24, (saddr >> 16) & 0xff,(saddr >> 8) & 0xff, saddr & 0xff, dma_tcp->slot_for_rcv[x], dma_tcp->conn_for_rcv[x], tot_len_for_rcv[x] */
/* ) ; */
/* dumpmem(skb->data,0x42,"eth-ip-tcp header") ; */
/* show_dma_descriptor((DMA_InjDescriptor_t *)&skb->cb) ; */
/* #if defined(AUDIT_FRAME_HEADER) */
/* if(memcmp(skb->data,((char *)(all_headers_in_counters+x)),32)) */
/* { */
/* TRACEN(k_t_request,"(!!!) header not as first seen") ; */
/* dumpmem(skb->data-14,sizeof(frame_header_t),"header-now") ; */
/* dumpmem(all_headers_in_counters+x,sizeof(frame_header_t),"header-in-propose") ; */
/* */
/* } */
/* #endif */
/* } */
/* else */
/* { */
/* TRACEN(k_t_error|k_t_request,"(E) x=%d Counter in use but no skb !",x) ; */
/* } */
}
}
}
TRACEN(k_t_request,"inUseCount=%d",inUseCount) ;
show_tx_skbs(&dma_tcp->tx_mux,dma_tcp->node_count) ;
TRACEN(k_t_request,"skb_queue_len(pending_rcv_skbs)=%d",skb_queue_len(&dma_tcp->balancer.b[0].pending_rcv_skbs)) ;
{
struct sk_buff *skb = skb_peek(&dma_tcp->balancer.b[0].pending_rcv_skbs) ;
if(skb)
{
struct ethhdr *eth = (struct ethhdr *)(skb->data) ;
unsigned int eth_proto = eth->h_proto ;
struct iphdr *iph = (struct iphdr *) (eth+1) ;
unsigned int tot_len=iph->tot_len ;
unsigned int saddr=iph->saddr ;
TRACEN(k_t_request,"skb=%p eth_proto=0x%04x tot_len=0x%04x saddr=%d.%d.%d.%d",skb,eth_proto,tot_len,saddr>>24, (saddr >> 16) & 0xff,(saddr >> 8) & 0xff, saddr & 0xff ) ;
}
}
for( slot=0;slot<dma_tcp->node_count; slot+=1)
{
unsigned int proposals_active=get_proposals_active(&dma_tcp->rcvdemux,slot) ;
unsigned int count_pending_f=count_pending_flow(&dma_tcp->rcvdemux,slot) ;
unsigned int located_counters=0 ;
if( proposals_active || count_pending_f )
{
TRACEN(k_t_request,"slot=0x%08x proposals_active=%d count_pending_flow=%d",slot,proposals_active,count_pending_f) ;
}
for(x=0;x<DMA_NUM_COUNTERS_PER_GROUP;x+=1)
{
struct sk_buff *skb=dma_tcp->rcv_skbs[x] ;
if ( skb && slot == dma_tcp->slot_for_rcv[x] )
{
located_counters += 1 ;
}
}
if( located_counters + count_pending_f != proposals_active || ( 0 == located_counters && count_pending_f > 0 ))
{
TRACEN(k_t_request|k_t_error,"(E) slot=0x%08x located_counters=%d count_pending_f=%d proposals_active=%d",
slot,located_counters,count_pending_f,proposals_active) ;
}
}
}
int proc_do_dma_rec_counters(struct ctl_table *ctl, int write, struct file * filp,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int rc ;
dma_tcp_show_reception(&dma_tcp_state ) ;
TRACEN(k_t_entryexit,"(>)ctl=%p write=%d len=%d", ctl,write,*lenp) ;
rc = proc_dointvec(ctl,write,filp,buffer,lenp,ppos) ;
TRACEN(k_t_entryexit,"(<)") ;
return rc ;
}
/* Routine to report how full the outgoing FIFOs are */
void bgp_dma_diag_report_transmission_queue(int __user * report)
{
dma_tcp_t *dma_tcp = &dma_tcp_state ;
unsigned int core ;
TRACEN(k_t_general,"report=%p",report) ;
for( core=0 ; core<k_injecting_cores; core += 1)
{
unsigned int desired_fifo ;
for(desired_fifo=0; desired_fifo<k_injecting_directions; desired_fifo += 1 )
{
unsigned int fifo_initial_head = dma_tcp->idma.idma_core[core].idma_direction[desired_fifo].fifo_initial_head ;
unsigned int fifo_current_head =
(unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+desired_fifo]) ;
unsigned int fifo_current_tail =
(unsigned int) DMA_InjFifoGetTailById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+desired_fifo]) ;
unsigned int headx = (fifo_current_head-fifo_initial_head) >> 5 ;
unsigned int tailx = (fifo_current_tail-fifo_initial_head) >> 5 ;
unsigned int current_injection_used=packet_mod(tailx-headx) ;
put_user(current_injection_used, report) ;
report += 1 ;
TRACEN(k_t_detail,"core=%d desired_fifo=%d current_injection_used=%d",core,desired_fifo,current_injection_used) ;
}
}
put_user(dma_tcp->qtyFreeRecCounters, report) ;
report += 1 ;
put_user(flow_count(dma_tcp,k_send_propose_rpc)-flow_count(dma_tcp,k_act_accept_rpc), report) ;
report += 1 ;
put_user(flow_count(dma_tcp,k_act_propose_rpc)-flow_count(dma_tcp,k_send_accept_rpc), report) ;
}
static int issueDiagnose(
DMA_RecFifo_t *f_ptr,
DMA_PacketHeader_t *packet_ptr,
dma_tcp_t * dma_tcp,
void * request ,
int payload_bytes,
unsigned int src_key,
int Put_Offset
)
{
unsigned int *payload=(unsigned int *)request ;
TRACEN(k_t_request,"src_key=0x%08x Put_Offset=0x%08x payload_bytes=0x%02x [%08x %08x %08x %08x]",
src_key,Put_Offset, payload_bytes,payload[0],payload[1],payload[2],payload[3]) ;
return 0 ;
}
static int issueDiagnoseActor(DMA_RecFifo_t *f_ptr,
DMA_PacketHeader_t *packet_ptr,
void *recv_func_parm,
char *payload_ptr,
int payload_bytes
)
{
unsigned int SW_Arg=packet_ptr->SW_Arg ;
int Put_Offset=packet_ptr->Put_Offset ;
enable_kernel_fp() ; // TODO: don't think this is needed nowadays
TRACEN(k_t_detail,"recv_func_parm=%p payload_ptr=%p SW_Arg=0x%08x payload_bytes=0x%08x Put_Offset=0x%08x",
recv_func_parm,payload_ptr,SW_Arg,payload_bytes,Put_Offset) ;
return issueDiagnose(
f_ptr,
packet_ptr,
(dma_tcp_t *) recv_func_parm,
(void *) payload_ptr,
payload_bytes,
SW_Arg,
Put_Offset
) ;
}
static inline int inject_into_dma_diag_sync(dma_tcp_t *dma_tcp, void * address, unsigned int length, unsigned int x, unsigned int y, unsigned int z, unsigned int my_injection_group, unsigned int desired_fifo, unsigned int SW_Arg ,
unsigned int proto_start )
{
dma_addr_t dataAddr ;
DMA_InjDescriptor_t desc;
int ret1, ret2 __attribute__((unused));
unsigned int firstpacketlength = length ;
TRACEN(k_t_general , "(>) injecting address=%p length=0x%08x x=%d y=%d z=%d my_injection_group=%d desired_fifo=%d",address,length,x,y,z,my_injection_group,desired_fifo);
dataAddr = dma_map_single(NULL, address, length, DMA_TO_DEVICE);
/* First injection is 'start of frame/fragment' */
ret1 = DMA_TorusMemFifoDescriptor( &desc,
x, y, z,
dma_tcp_ReceptionFifoGroup(dma_tcp), /* recv fifo grp id */
0, /* hints */
k_VC_anyway, /* vc - adaptive */
SW_Arg, /* softw arg */
proto_start, /* function id */
dma_tcp_InjectionCounterGroup(dma_tcp), /* inj cntr group id */
k_injCounterId, /* inj counter id */
dataAddr, /* send address */
firstpacketlength /* msg len */
);
#if defined(SIDEBAND_TIMESTAMP)
{
unsigned long now_lo=get_tbl() ;
DMA_DescriptorSetPutOffset(&desc,((-length) & 0x0000ffff ) | (now_lo & 0xffff0000)) ;
}
#else
DMA_DescriptorSetPutOffset(&desc,-length) ; /* For 'memory FIFO packets', the put offset has no hardware use. Set it to indicate the message (fragment) length */
#endif
ret2 = wrapped_DMA_InjFifoInjectDescriptorById( &dma_tcp->injFifoGroupFrames,
dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo],
&desc );
TRACEN(k_t_general , "(<)proto_start=%d firstpacketlength=%d ret1=%d ret2=%d",proto_start,firstpacketlength,ret1, ret2);
return 1 ;
}
static void bgp_dma_diag_drive_sync_at(dma_tcp_t *dma_tcp, int x,int y,int z, int sendBytes)
{
unsigned int desired_fifo= select_transmission_fifo(dma_tcp,x,y,z) ;
unsigned long flags ;
unsigned int current_injection_used=0xffffffff ;
unsigned int aligned_payload_address = (unsigned int)dma_tcp->diag_block_buffer ;
unsigned int aligned_payload_length = sendBytes ;
unsigned int pad_head = 0 ;
int ret = 0;
int ring_ok ;
int my_injection_group ;
skb_group_t skb_group ;
TRACEN(k_t_general ,"(>) at (%02x,%02x,%02x)", x,y,z);
skb_group_init(&skb_group) ;
my_injection_group=injection_group_hash(dma_tcp,x,y,z) ;
spin_lock_irqsave(&dma_tcp->dirInjectionLock[my_injection_group*k_injecting_directions+desired_fifo],flags) ;
{
unsigned int src_key = (dma_tcp->src_key << 6) | (my_injection_group << 4) | pad_head ;
idma_direction_t * buffer = dma_tcp->idma.idma_core[my_injection_group].idma_direction+desired_fifo ;
/* Set up the payload */
unsigned int bhx = buffer->buffer_head_index ;
unsigned int lastx = packet_mod(bhx) ;
unsigned int fifo_initial_head = dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].fifo_initial_head ;
unsigned int fifo_current_head =
(unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo]) ;
unsigned int fifo_current_tail =
(unsigned int) DMA_InjFifoGetTailById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo]) ;
unsigned int headx = (fifo_current_head-fifo_initial_head) >> 5 ;
unsigned int tailx = (fifo_current_tail-fifo_initial_head) >> 5 ;
unsigned int injection_count ;
#if defined(TRACK_LIFETIME_IN_FIFO)
unsigned long long now=get_powerpc_tb() ;
*(unsigned long long*)(skb->cb) = now ;
#endif
current_injection_used=packet_mod(tailx-headx) ;
/* If the network is backing up, we may have to skip out here, */
/* so that we don't overwrite unsent data. */
TRACEN(k_t_general ,"Runway desired_fifo=%d headx=%d tailx=%d bhx=%d current_injection_used=%d",
desired_fifo,headx,tailx,bhx,current_injection_used) ;
if( current_injection_used > buffer->injection_high_watermark )
{
buffer->injection_high_watermark=current_injection_used ; /* Congestion statistic */
}
{
/* Need to have room to inject the in-skbuff data plus all attached 'fragments', each of which may be sent in 3 injections */
if( current_injection_used+3*(MAX_SKB_FRAGS+1) < k_injection_packet_count-1)
{
ring_ok = 1 ;
TRACEN(k_t_general,"Runway slot granted") ;
}
else
{
ring_ok = 0 ;
TRACEN(k_t_congestion,"Runway slot denied tailx=%08x headx=%08x",tailx,headx) ;
}
}
TRACEN(k_t_general ,"Injection my_injection_group=%d desired_fifo=%d bhx=0x%08x headx=%08x tailx=%08x",
my_injection_group, desired_fifo, bhx, headx,tailx
) ;
if ( ring_ok )
{
/* We are going to send something. Display its protocol headers .. */
/* Bump the injection counter. Actually only needs doing once per 4GB or so */
ret=DMA_CounterSetValueWideOpenById ( & dma_tcp->injCounterGroup, k_injCounterId, 0xffffffff );
/* and inject it */
{
injection_count = inject_into_dma_diag_sync(dma_tcp,(void *)aligned_payload_address,aligned_payload_length,x,y,z,my_injection_group,desired_fifo,
src_key,
dma_tcp->proto_issue_diag_sync
) ;
}
{
unsigned int nhx=packet_mod(bhx+injection_count) ;
/* Record the skbuff so it can be freed later, after data is DMA'd out */
dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].idma_skb_array->skb_array[nhx] = NULL ;
/* Remember where we will be pushing the next injection in */
buffer->buffer_head_index = nhx ;
}
/* hang on to the skbs until they are sent ... */
if( current_injection_used != 0xffffffff)
{
unsigned int btx = buffer->buffer_tail_index ; /* This indexes the oldest skbuff that might still be pending send by the DMA unit */
int skql2 = packet_mod(bhx-btx) ;
int count_needing_freeing = skql2-current_injection_used ;
int count_to_free = ( count_needing_freeing > k_skb_group_count) ? k_skb_group_count : count_needing_freeing ;
TRACEN(k_t_detail ,"current_injection_used=%d btx=%d skql2=%d count_needing_freeing=%d count_to_free=%d",current_injection_used,btx,skql2,count_needing_freeing,count_to_free);
skb_group_queue(&skb_group,dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].idma_skb_array->skb_array,btx,count_to_free
#if defined(TRACK_LIFETIME_IN_FIFO)
, my_injection_group, desired_fifo, now
#endif
) ;
btx = packet_mod(btx+count_to_free) ;
buffer->buffer_tail_index = btx ;
TRACEN(k_t_detail ,"buffer=%p buffer->buffer_tail_index=%d",buffer,buffer->buffer_tail_index);
}
}
else
{
TRACEN(k_t_congestion,"Would overrun my_injection_group=%d desired_fifo=%d bhx=0x%08x headx=%08x tailx=%08x lastx=%08x",
my_injection_group, desired_fifo, bhx, headx,tailx, lastx
) ;
}
}
spin_unlock_irqrestore(&dma_tcp->dirInjectionLock[my_injection_group*k_injecting_directions+desired_fifo],flags) ;
skb_group_free(&skb_group) ;
if( k_async_free ) mod_timer(&dma_tcp->transmission_free_skb_timer, jiffies+1) ;
TRACE("(<) desired_fifo=%d",desired_fifo);
}
static void init_shuffle_vector(unsigned int * shuffle_vector, unsigned int xe, unsigned int ye, unsigned int ze)
{
unsigned int x;
unsigned int y;
unsigned int z;
for( x=0; x<xe; x+=1)
{
for(y=0;y<ye;y+=1)
{
for( z=0;z<ze;z+=1)
{
*shuffle_vector = (x<<16)|(y<<8)|z ;
shuffle_vector += 1 ;
}
}
}
}
static inline int next_prbs(int seed)
{
int ncmask = seed >> 31 ; /* 0x00000000 or 0xffffffff */
return (seed << 1) ^ (0x04C11DB7 & ncmask) ; /* CRC-32-IEEE 802.3 from http://en.wikipedia.org/wiki/Cyclic_redundancy_check */
}
static int scatter_prbs(int seed)
{
int a ;
for(a=0;a<32;a+=1)
{
seed=next_prbs(seed) ;
}
return seed ;
}
static int shuffle_shuffle_vector(unsigned int * shuffle_vector, unsigned int xe, unsigned int ye, unsigned int ze, int seed)
{
unsigned int vsize = xe*ye*ze ;
unsigned int vmask = vsize-1 ;
unsigned int a ;
for( a=0; a<vsize;a+=1)
{
unsigned int b = (seed & vmask) ;
unsigned int va = shuffle_vector[a] ;
unsigned int vb = shuffle_vector[b] ;
shuffle_vector[a] = vb ;
shuffle_vector[b] = va ;
seed=next_prbs(seed) ;
}
return seed ;
}
#if 0
void dma_tcp_transfer_activate(int sendBytes)
{
dma_tcp_t *dma_tcp = &dma_tcp_state ;
int a ;
int my_x=dma_tcp->location.coordinate[0] ;
int my_y=dma_tcp->location.coordinate[1] ;
int my_z=dma_tcp->location.coordinate[2] ;
int ext_x=dma_tcp->extent.coordinate[0] ;
int ext_y=dma_tcp->extent.coordinate[1] ;
int ext_z=dma_tcp->extent.coordinate[2] ;
int vsize=ext_x*ext_y*ext_z ;
/* Push the 'diagnostic block' through the DMA unit */
TRACEN(k_t_request,"diagnostic transfer request, sendBytes=0x%08x",sendBytes) ;
dma_tcp->shuffle_seed = shuffle_shuffle_vector(dma_tcp->shuffle_vector,ext_x,ext_y,ext_z,dma_tcp->shuffle_seed) ;
for(a=0;a<vsize;a+=1)
{
unsigned int tg=dma_tcp->shuffle_vector[a] ;
unsigned int tg_x=tg>>16 ;
unsigned int tg_y=(tg>>8) & 0xff ;
unsigned int tg_z=tg & 0xff ;
TRACEN(k_t_detail,"shuffle_vector[%d]=0x%08x",a,dma_tcp->shuffle_vector[a]) ;
if( my_x != tg_x || my_y != tg_y || my_z != tg_z )
{
bgp_dma_diag_drive_block_at(dma_tcp,tg_x,tg_y,tg_z,sendBytes) ;
}
}
}
void dma_tcp_transfer_activate_to_one(int sendBytes, unsigned int tg)
{
dma_tcp_t *dma_tcp = &dma_tcp_state ;
int my_x=dma_tcp->location.coordinate[0] ;
int my_y=dma_tcp->location.coordinate[1] ;
int my_z=dma_tcp->location.coordinate[2] ;
/* Push the 'diagnostic block' through the DMA unit */
TRACEN(k_t_request,"diagnostic transfer request, sendBytes=0x%08x tg=0x%08x",sendBytes,tg) ;
{
unsigned int tg_x=tg>>16 ;
unsigned int tg_y=(tg>>8) & 0xff ;
unsigned int tg_z=tg & 0xff ;
if( my_x != tg_x || my_y != tg_y || my_z != tg_z )
{
bgp_dma_diag_drive_block_at(dma_tcp,tg_x,tg_y,tg_z,sendBytes) ;
}
}
}
#endif
void dma_tcp_transfer_activate_sync(int sendBytes)
{
dma_tcp_t *dma_tcp = &dma_tcp_state ;
int a ;
int my_x=dma_tcp->location.coordinate[0] ;
int my_y=dma_tcp->location.coordinate[1] ;
int my_z=dma_tcp->location.coordinate[2] ;
int ext_x=dma_tcp->extent.coordinate[0] ;
int ext_y=dma_tcp->extent.coordinate[1] ;
int ext_z=dma_tcp->extent.coordinate[2] ;
int vsize=ext_x*ext_y*ext_z ;
/* Push the 'diagnostic block' through the DMA unit */
TRACEN(k_t_general,"diagnostic transfer request, sendBytes=0x%08x",sendBytes) ;
dma_tcp->shuffle_seed = shuffle_shuffle_vector(dma_tcp->shuffle_vector,ext_x,ext_y,ext_z,dma_tcp->shuffle_seed) ;
for(a=0;a<vsize;a+=1)
{
unsigned int tg=dma_tcp->shuffle_vector[a] ;
unsigned int tg_x=tg>>16 ;
unsigned int tg_y=(tg>>8) & 0xff ;
unsigned int tg_z=tg & 0xff ;
TRACEN(k_t_detail,"shuffle_vector[%d]=0x%08x",a,dma_tcp->shuffle_vector[a]) ;
if( my_x != tg_x || my_y != tg_y || my_z != tg_z )
{
bgp_dma_diag_drive_sync_at(dma_tcp,tg_x,tg_y,tg_z,sendBytes) ;
}
}
}
/* 'across faces' transfer in x,y,z directions, as a 'towards peak performance' test */
#if 0
void dma_tcp_transfer_activate_minicube(int sendBytes)
{
dma_tcp_t *dma_tcp = &dma_tcp_state ;
int my_x=dma_tcp->location.coordinate[0] ;
int my_y=dma_tcp->location.coordinate[1] ;
int my_z=dma_tcp->location.coordinate[2] ;
/* Push the 'diagnostic block' through the DMA unit */
TRACEN(k_t_request,"diagnostic transfer request, sendBytes=0x%08x",sendBytes) ;
bgp_dma_diag_drive_block_at(dma_tcp,my_x^1,my_y,my_z,sendBytes) ;
bgp_dma_diag_drive_block_at(dma_tcp,my_x,my_y^1,my_z,sendBytes) ;
bgp_dma_diag_drive_block_at(dma_tcp,my_x,my_y,my_z^1,sendBytes) ;
}
int dma_tcp_transfer_wait(int demandCount)
{
int spincount = 0 ;
TRACEN(k_t_request,"(>) demandCount=%d",demandCount) ;
while(DiagEndCount < demandCount && spincount < 100 )
{
int rc ;
set_current_state(TASK_INTERRUPTIBLE);
rc=schedule_timeout(1) ;
if( 0 != rc) break ;
spincount += 1 ;
}
TRACEN(k_t_request,"(<) DiagEndCount=%d spincount=%d",DiagEndCount,spincount) ;
return DiagEndCount >= demandCount ;
}
#endif
#if defined(BARRIER_WITH_IOCTL)
volatile static int DiagSyncCount ;
static int issueInlineFrameDiagSync(
DMA_RecFifo_t *f_ptr,
DMA_PacketHeader_t *packet_ptr,
dma_tcp_t * dma_tcp,
void * request ,
int payload_bytes,
unsigned int src_key,
int Put_Offset
)
{
timing_histogram(dma_tcp) ;
DiagSyncCount += 1 ;
return 0 ;
}
static int issueInlineFrameDiagSyncActor(DMA_RecFifo_t *f_ptr,
DMA_PacketHeader_t *packet_ptr,
void *recv_func_parm,
char *payload_ptr,
int payload_bytes
)
{
unsigned int SW_Arg=packet_ptr->SW_Arg ;
int Put_Offset=packet_ptr->Put_Offset ;
enable_kernel_fp() ; // TODO: don't think this is needed nowadays
TRACEN(k_t_detail,"recv_func_parm=%p payload_ptr=%p SW_Arg=0x%08x payload_bytes=0x%08x Put_Offset=0x%08x",
recv_func_parm,payload_ptr,SW_Arg,payload_bytes,Put_Offset) ;
return issueInlineFrameDiagSync(
f_ptr,
packet_ptr,
(dma_tcp_t *) recv_func_parm,
(void *) payload_ptr,
payload_bytes,
SW_Arg,
Put_Offset
) ;
}
#endif
int dma_tcp_transfer_wait_sync(int demandCount)
{
int spincount = 0 ;
TRACEN(k_t_general,"(>) demandCount=%d",demandCount) ;
while(DiagSyncCount < demandCount && spincount < 100 )
{
int rc ;
set_current_state(TASK_INTERRUPTIBLE);
rc=schedule_timeout(1) ;
if( 0 != rc) break ;
spincount += 1 ;
}
TRACEN(k_t_general,"(<) DiagSyncCount=%d spincount=%d",DiagSyncCount,spincount) ;
return DiagSyncCount >= demandCount ;
}
void dma_tcp_transfer_clearcount(void)
{
TRACEN(k_t_general,"count cleared") ;
/* DiagEndCount = 0 ; */
DiagSyncCount = 0 ;
}
void dma_tcp_diagnose_init(dma_tcp_t *dma_tcp)
{
#if defined(BARRIER_WITH_IOCTL)
dma_tcp->diag_block_buffer=allocate_diag_block_buffer() ;
dma_tcp->shuffle_vector=allocate_shuffle_vector(dma_tcp->extent.coordinate[0],dma_tcp->extent.coordinate[1],dma_tcp->extent.coordinate[2]) ;
dma_tcp->shuffle_seed = scatter_prbs(dma_tcp->SW_Arg + 1) ;
init_shuffle_vector(dma_tcp->shuffle_vector,dma_tcp->extent.coordinate[0],dma_tcp->extent.coordinate[1],dma_tcp->extent.coordinate[2]) ;
dma_tcp->proto_issue_diag_sync=DMA_RecFifoRegisterRecvFunction(issueInlineFrameDiagSyncActor, dma_tcp, 0, 0);
memset(dma_tcp->timing_histogram_buckets,0,33*sizeof(int)) ;
#endif
dma_tcp->proto_diagnose=DMA_RecFifoRegisterRecvFunction(issueDiagnoseActor, dma_tcp, 0, 0);
}