blob: a3375d4b22e7cf097babb6c9c3329872a47cf20d [file] [log] [blame]
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* Implementation of the Transmission Control Protocol(TCP).
*
* Version: @(#)tcp.c 1.0.16 05/25/93
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Mark Evans, <evansmp@uhura.aston.ac.uk>
* Corey Minyard <wf-rch!minyard@relay.EU.net>
* Florian La Roche, <flla@stud.uni-sb.de>
*
* Fixes:
* Alan Cox : Numerous verify_area() calls
* Alan Cox : Set the ACK bit on a reset
* Alan Cox : Stopped it crashing if it closed while sk->inuse=1
* and was trying to connect (tcp_err()).
* Alan Cox : All icmp error handling was broken
* pointers passed where wrong and the
* socket was looked up backwards. Nobody
* tested any icmp error code obviously.
* Alan Cox : tcp_err() now handled properly. It wakes people
* on errors. select behaves and the icmp error race
* has gone by moving it into sock.c
* Alan Cox : tcp_reset() fixed to work for everything not just
* packets for unknown sockets.
* Alan Cox : tcp option processing.
* Alan Cox : Reset tweaked (still not 100%) [Had syn rule wrong]
* Herp Rosmanith : More reset fixes
* Alan Cox : No longer acks invalid rst frames. Acking
* any kind of RST is right out.
* Alan Cox : Sets an ignore me flag on an rst receive
* otherwise odd bits of prattle escape still
* Alan Cox : Fixed another acking RST frame bug. Should stop
* LAN workplace lockups.
* Alan Cox : Some tidyups using the new skb list facilities
* Alan Cox : sk->keepopen now seems to work
* Alan Cox : Pulls options out correctly on accepts
* Alan Cox : Fixed assorted sk->rqueue->next errors
* Alan Cox : PSH doesn't end a TCP read. Switched a bit to skb ops.
* Alan Cox : Tidied tcp_data to avoid a potential nasty.
* Alan Cox : Added some beter commenting, as the tcp is hard to follow
* Alan Cox : Removed incorrect check for 20 * psh
* Michael O'Reilly : ack < copied bug fix.
* Johannes Stille : Misc tcp fixes (not all in yet).
* Alan Cox : FIN with no memory -> CRASH
* Alan Cox : Added socket option proto entries. Also added awareness of them to accept.
* Alan Cox : Added TCP options (SOL_TCP)
* Alan Cox : Switched wakeup calls to callbacks, so the kernel can layer network sockets.
* Alan Cox : Use ip_tos/ip_ttl settings.
* Alan Cox : Handle FIN (more) properly (we hope).
* Alan Cox : RST frames sent on unsynchronised state ack error/
* Alan Cox : Put in missing check for SYN bit.
* Alan Cox : Added tcp_select_window() aka NET2E
* window non shrink trick.
* Alan Cox : Added a couple of small NET2E timer fixes
* Charles Hedrick : TCP fixes
* Toomas Tamm : TCP window fixes
*
*
* To Fix:
* Possibly a problem with accept(). BSD accept never fails after
* it causes a select. Linux can - given the official select semantics I
* feel that _really_ its the BSD network programs that are bust (notably
* inetd, which hangs occasionally because of this).
* Add VJ Fastrecovery algorithm ?
* Protocol closedown badly messed up.
* Incompatiblity with spider ports (tcp hangs on that
* socket occasionally).
* MSG_PEEK and read on same socket at once can cause crashes.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or(at your option) any later version.
*/
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/termios.h>
#include <linux/in.h>
#include <linux/fcntl.h>
#include "inet.h"
#include "dev.h"
#include "ip.h"
#include "protocol.h"
#include "icmp.h"
#include "tcp.h"
#include "skbuff.h"
#include "sock.h"
#include "arp.h"
#include <linux/errno.h>
#include <linux/timer.h>
#include <asm/system.h>
#include <asm/segment.h>
#include <linux/mm.h>
#define SEQ_TICK 3
unsigned long seq_offset;
static __inline__ int
min(unsigned int a, unsigned int b)
{
if (a < b) return(a);
return(b);
}
void
print_th(struct tcphdr *th)
{
unsigned char *ptr;
if (inet_debug != DBG_TCP) return;
printk("TCP header:\n");
ptr =(unsigned char *)(th + 1);
printk(" source=%d, dest=%d, seq =%ld, ack_seq = %ld\n",
ntohs(th->source), ntohs(th->dest),
ntohl(th->seq), ntohl(th->ack_seq));
printk(" fin=%d, syn=%d, rst=%d, psh=%d, ack=%d, urg=%d res1=%d res2=%d\n",
th->fin, th->syn, th->rst, th->psh, th->ack,
th->urg, th->res1, th->res2);
printk(" window = %d, check = %d urg_ptr = %d\n",
ntohs(th->window), ntohs(th->check), ntohs(th->urg_ptr));
printk(" doff = %d\n", th->doff);
printk(" options = %d %d %d %d\n", ptr[0], ptr[1], ptr[2], ptr[3]);
}
/* This routine grabs the first thing off of a rcv queue. */
static struct sk_buff *
get_firstr(struct sock *sk)
{
return skb_dequeue(&sk->rqueue);
}
/*
* Difference between two values in tcp ack terms.
*/
static long
diff(unsigned long seq1, unsigned long seq2)
{
long d;
d = seq1 - seq2;
if (d > 0) return(d);
/* I hope this returns what I want. */
return(~d+1);
}
/* This routine picks a TCP windows for a socket based on
the following constraints
1. The window can never be shrunk once it is offered (RFC 793)
2. We limit memory per socket
For now we use NET2E3's heuristic of offering half the memory
we have handy. All is not as bad as this seems however because
of two things. Firstly we will bin packets even within the window
in order to get the data we are waiting for into the memory limit.
Secondly we bin common duplicate forms at receive time
Better heuristics welcome
*/
static int tcp_select_window(struct sock *sk)
{
int new_window = sk->prot->rspace(sk);
/* Enforce RFC793 - we've offered it we must live with it */
if(new_window<sk->window)
return(sk->window);
return(new_window);
}
/* Enter the time wait state. */
static void tcp_time_wait(struct sock *sk)
{
sk->state = TCP_TIME_WAIT;
sk->shutdown = SHUTDOWN_MASK;
if (!sk->dead)
sk->state_change(sk);
reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
}
/*
* A timer event has trigger a tcp retransmit timeout. The
* socket xmit queue is ready and set up to send. Because
* the ack receive code keeps the queue straight we do
* nothing clever here.
*/
static void
tcp_retransmit(struct sock *sk, int all)
{
if (all) {
ip_retransmit(sk, all);
return;
}
/*
* If we had the full V-J mechanism, this might be right. But
* for the moment we want simple slow start after error.
*
* if (sk->cong_window > 4)
* sk->cong_window = sk->cong_window / 2;
*/
sk->cong_window = 1;
sk->exp_growth = 0;
/* Do the actuall retransmit. */
ip_retransmit(sk, all);
}
/*
* This routine is called by the ICMP module when it gets some
* sort of error condition. If err < 0 then the socket should
* be closed and the error returned to the user. If err > 0
* it's just the icmp type << 8 | icmp code. After adjustment
* header points to the first 8 bytes of the tcp header. We need
* to find the appropriate port.
*/
void
tcp_err(int err, unsigned char *header, unsigned long daddr,
unsigned long saddr, struct inet_protocol *protocol)
{
struct tcphdr *th;
struct sock *sk;
struct iphdr *iph=(struct iphdr *)header;
header+=4*iph->ihl;
DPRINTF((DBG_TCP, "TCP: tcp_err(%d, hdr=%X, daddr=%X saddr=%X, protocol=%X)\n",
err, header, daddr, saddr, protocol));
th =(struct tcphdr *)header;
sk = get_sock(&tcp_prot, th->source/*dest*/, daddr, th->dest/*source*/, saddr);
print_th(th);
if (sk == NULL) return;
if(err<0)
{
sk->err = -err;
sk->error_report(sk);
return;
}
if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) {
/*
* FIXME:
* For now we will just trigger a linear backoff.
* The slow start code should cause a real backoff here.
*/
if (sk->cong_window > 4) sk->cong_window--;
return;
}
DPRINTF((DBG_TCP, "TCP: icmp_err got error\n"));
sk->err = icmp_err_convert[err & 0xff].errno;
/*
* If we've already connected we will keep trying
* until we time out, or the user gives up.
*/
if (icmp_err_convert[err & 0xff].fatal) {
if (sk->state == TCP_SYN_SENT) {
sk->state = TCP_CLOSE;
sk->error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
}
}
return;
}
/*
* Walk down the receive queue counting readable data until we hit the end or we find a gap
* in the received data queue (ie a frame missing that needs sending to us)
*/
static int
tcp_readable(struct sock *sk)
{
unsigned long counted;
unsigned long amount;
struct sk_buff *skb;
int count=0;
int sum;
unsigned long flags;
DPRINTF((DBG_TCP, "tcp_readable(sk=%X)\n", sk));
if(sk && sk->debug)
printk("tcp_readable: %p - ",sk);
if (sk == NULL || skb_peek(&sk->rqueue) == NULL) /* Empty sockets are easy! */
{
if(sk && sk->debug)
printk("empty\n");
return(0);
}
counted = sk->copied_seq+1; /* Where we are at the moment */
amount = 0;
save_flags(flags); /* So nobody adds things at the wrong moment */
cli();
skb =(struct sk_buff *)sk->rqueue;
/* Do until a push or until we are out of data. */
do {
count++;
#ifdef OLD
/* This is wrong: It breaks Chameleon amongst other stacks */
if (count > 20) {
restore_flags(flags);
DPRINTF((DBG_TCP, "tcp_readable, more than 20 packets without a psh\n"));
printk("tcp_read: possible read_queue corruption.\n");
return(amount);
}
#endif
if (before(counted, skb->h.th->seq)) /* Found a hole so stops here */
break;
sum = skb->len -(counted - skb->h.th->seq); /* Length - header but start from where we are up to (avoid overlaps) */
if (skb->h.th->syn) sum++;
if (skb->h.th->urg) {
sum -= ntohs(skb->h.th->urg_ptr); /* Dont count urg data */
}
if (sum >= 0) { /* Add it up, move on */
amount += sum;
if (skb->h.th->syn) amount--;
counted += sum;
}
if (amount && skb->h.th->psh) break;
skb =(struct sk_buff *)skb->next; /* Move along */
} while(skb != sk->rqueue);
restore_flags(flags);
DPRINTF((DBG_TCP, "tcp readable returning %d bytes\n", amount));
if(sk->debug)
printk("got %lu bytes.\n",amount);
return(amount);
}
/*
* Wait for a TCP event. Note the oddity with SEL_IN and reading. The
* listening socket has a receive queue of sockets to accept.
*/
static int
tcp_select(struct sock *sk, int sel_type, select_table *wait)
{
DPRINTF((DBG_TCP, "tcp_select(sk=%X, sel_type = %d, wait = %X)\n",
sk, sel_type, wait));
sk->inuse = 1;
switch(sel_type) {
case SEL_IN:
if(sk->debug)
printk("select in");
select_wait(sk->sleep, wait);
if(sk->debug)
printk("-select out");
if (skb_peek(&sk->rqueue) != NULL) {
if (sk->state == TCP_LISTEN || tcp_readable(sk)) {
release_sock(sk);
if(sk->debug)
printk("-select ok data\n");
return(1);
}
}
if (sk->err != 0) /* Receiver error */
{
release_sock(sk);
if(sk->debug)
printk("-select ok error");
return(1);
}
if (sk->shutdown & RCV_SHUTDOWN) {
release_sock(sk);
if(sk->debug)
printk("-select ok down\n");
return(1);
} else {
release_sock(sk);
if(sk->debug)
printk("-select fail\n");
return(0);
}
case SEL_OUT:
select_wait(sk->sleep, wait);
if (sk->shutdown & SEND_SHUTDOWN) {
DPRINTF((DBG_TCP,
"write select on shutdown socket.\n"));
/* FIXME: should this return an error? */
release_sock(sk);
return(0);
}
/*
* FIXME:
* Hack so it will probably be able to write
* something if it says it's ok to write.
*/
if (sk->prot->wspace(sk) >= sk->mtu) {
release_sock(sk);
/* This should cause connect to work ok. */
if (sk->state == TCP_SYN_RECV ||
sk->state == TCP_SYN_SENT) return(0);
return(1);
}
DPRINTF((DBG_TCP,
"tcp_select: sleeping on write sk->wmem_alloc = %d, "
"sk->packets_out = %d\n"
"sk->wback = %X, sk->wfront = %X\n"
"sk->send_seq = %u, sk->window_seq=%u\n",
sk->wmem_alloc, sk->packets_out,
sk->wback, sk->wfront,
sk->send_seq, sk->window_seq));
release_sock(sk);
return(0);
case SEL_EX:
select_wait(sk->sleep,wait);
if (sk->err) {
release_sock(sk);
return(1);
}
release_sock(sk);
return(0);
}
release_sock(sk);
return(0);
}
int
tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
int err;
DPRINTF((DBG_TCP, "tcp_ioctl(sk=%X, cmd = %d, arg=%X)\n", sk, cmd, arg));
switch(cmd) {
case DDIOCSDBG:
return(dbg_ioctl((void *) arg, DBG_TCP));
case TIOCINQ:
#ifdef FIXME /* FIXME: */
case FIONREAD:
#endif
{
unsigned long amount;
if (sk->state == TCP_LISTEN) return(-EINVAL);
sk->inuse = 1;
amount = tcp_readable(sk);
release_sock(sk);
DPRINTF((DBG_TCP, "returning %d\n", amount));
err=verify_area(VERIFY_WRITE,(void *)arg,
sizeof(unsigned long));
if(err)
return err;
put_fs_long(amount,(unsigned long *)arg);
return(0);
}
case SIOCATMARK:
{
struct sk_buff *skb;
int answ = 0;
/*
* Try to figure out if we need to read
* some urgent data.
*/
sk->inuse = 1;
if ((skb=skb_peek(&sk->rqueue)) != NULL)
{
if (sk->copied_seq+1 == skb->h.th->seq && skb->h.th->urg)
answ = 1;
}
release_sock(sk);
err=verify_area(VERIFY_WRITE,(void *) arg,
sizeof(unsigned long));
if(err)
return err;
put_fs_long(answ,(int *) arg);
return(0);
}
case TIOCOUTQ:
{
unsigned long amount;
if (sk->state == TCP_LISTEN) return(-EINVAL);
amount = sk->prot->wspace(sk);
err=verify_area(VERIFY_WRITE,(void *)arg,
sizeof(unsigned long));
if(err)
return err;
put_fs_long(amount,(unsigned long *)arg);
return(0);
}
default:
return(-EINVAL);
}
}
/* This routine computes a TCP checksum. */
unsigned short
tcp_check(struct tcphdr *th, int len,
unsigned long saddr, unsigned long daddr)
{
unsigned long sum;
if (saddr == 0) saddr = my_addr();
print_th(th);
__asm__("\t addl %%ecx,%%ebx\n"
"\t adcl %%edx,%%ebx\n"
"\t adcl $0, %%ebx\n"
: "=b"(sum)
: "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
: "cx","bx","dx" );
if (len > 3) {
__asm__("\tclc\n"
"1:\n"
"\t lodsl\n"
"\t adcl %%eax, %%ebx\n"
"\t loop 1b\n"
"\t adcl $0, %%ebx\n"
: "=b"(sum) , "=S"(th)
: "0"(sum), "c"(len/4) ,"1"(th)
: "ax", "cx", "bx", "si" );
}
/* Convert from 32 bits to 16 bits. */
__asm__("\t movl %%ebx, %%ecx\n"
"\t shrl $16,%%ecx\n"
"\t addw %%cx, %%bx\n"
"\t adcw $0, %%bx\n"
: "=b"(sum)
: "0"(sum)
: "bx", "cx");
/* Check for an extra word. */
if ((len & 2) != 0) {
__asm__("\t lodsw\n"
"\t addw %%ax,%%bx\n"
"\t adcw $0, %%bx\n"
: "=b"(sum), "=S"(th)
: "0"(sum) ,"1"(th)
: "si", "ax", "bx");
}
/* Now check for the extra byte. */
if ((len & 1) != 0) {
__asm__("\t lodsb\n"
"\t movb $0,%%ah\n"
"\t addw %%ax,%%bx\n"
"\t adcw $0, %%bx\n"
: "=b"(sum)
: "0"(sum) ,"S"(th)
: "si", "ax", "bx");
}
/* We only want the bottom 16 bits, but we never cleared the top 16. */
return((~sum) & 0xffff);
}
void
tcp_send_check(struct tcphdr *th, unsigned long saddr,
unsigned long daddr, int len, struct sock *sk)
{
th->check = 0;
th->check = tcp_check(th, len, saddr, daddr);
return;
}
static struct sk_buff * dequeue_partial(struct sock * sk)
{
struct sk_buff * skb;
unsigned long flags;
save_flags(flags);
cli();
skb = sk->send_tmp;
if (skb) {
sk->send_tmp = skb->next;
skb->next = NULL;
}
restore_flags(flags);
return skb;
}
static void enqueue_partial(struct sk_buff * skb, struct sock * sk)
{
unsigned long flags;
save_flags(flags);
cli();
skb->next = sk->send_tmp;
sk->send_tmp = skb;
restore_flags(flags);
}
static void tcp_send_partial(struct sock *sk)
{
struct sk_buff *skb;
if (sk == NULL)
return;
while ((skb = dequeue_partial(sk)) != NULL) {
/* If we have queued a header size packet.. */
if(skb->len-(unsigned long)skb->h.th + (unsigned long)skb->data == sizeof(struct tcphdr)) {
/* If its got a syn or fin its notionally included in the size..*/
if(!skb->h.th->syn && !skb->h.th->fin) {
printk("tcp_send_partial: attempt to queue a bogon.\n");
kfree_skb(skb,FREE_WRITE);
return;
}
}
/* We need to complete and send the packet. */
tcp_send_check(skb->h.th, sk->saddr, sk->daddr,
skb->len-(unsigned long)skb->h.th +
(unsigned long)skb->data, sk);
skb->h.seq = sk->send_seq;
if (after(sk->send_seq , sk->window_seq) ||
(sk->retransmits && sk->timeout == TIME_WRITE) ||
sk->packets_out >= sk->cong_window) {
DPRINTF((DBG_TCP, "sk->cong_window = %d, sk->packets_out = %d\n",
sk->cong_window, sk->packets_out));
DPRINTF((DBG_TCP, "sk->send_seq = %d, sk->window_seq = %d\n",
sk->send_seq, sk->window_seq));
skb->next = NULL;
skb->magic = TCP_WRITE_QUEUE_MAGIC;
if (sk->wback == NULL) {
sk->wfront=skb;
} else {
sk->wback->next = skb;
}
sk->wback = skb;
if (before(sk->window_seq, sk->wfront->h.seq) &&
sk->send_head == NULL &&
sk->ack_backlog == 0)
reset_timer(sk, TIME_PROBE0,
backoff(sk->backoff) * (2 * sk->mdev + sk->rtt));
} else {
sk->prot->queue_xmit(sk, skb->dev, skb,0);
}
}
}
/* This routine sends an ack and also updates the window. */
static void
tcp_send_ack(unsigned long sequence, unsigned long ack,
struct sock *sk,
struct tcphdr *th, unsigned long daddr)
{
struct sk_buff *buff;
struct tcphdr *t1;
struct device *dev = NULL;
int tmp;
if(sk->zapped)
return; /* We have been reset, we may not send again */
/*
* We need to grab some memory, and put together an ack,
* and then put it into the queue to be sent.
*/
buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
if (buff == NULL) {
/* Force it to send an ack. */
sk->ack_backlog++;
if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) {
reset_timer(sk, TIME_WRITE, 10);
}
if (inet_debug == DBG_SLIP) printk("\rtcp_ack: malloc failed\n");
return;
}
buff->mem_addr = buff;
buff->mem_len = MAX_ACK_SIZE;
buff->len = sizeof(struct tcphdr);
buff->sk = sk;
t1 =(struct tcphdr *) buff->data;
/* Put in the IP header and routing stuff. */
tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
if (tmp < 0) {
buff->free=1;
sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
if (inet_debug == DBG_SLIP) printk("\rtcp_ack: build_header failed\n");
return;
}
buff->len += tmp;
t1 =(struct tcphdr *)((char *)t1 +tmp);
/* FIXME: */
memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */
/* swap the send and the receive. */
t1->dest = th->source;
t1->source = th->dest;
t1->seq = ntohl(sequence);
t1->ack = 1;
sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/
t1->window = ntohs(sk->window);
t1->res1 = 0;
t1->res2 = 0;
t1->rst = 0;
t1->urg = 0;
t1->syn = 0;
t1->psh = 0;
t1->fin = 0;
if (ack == sk->acked_seq) {
sk->ack_backlog = 0;
sk->bytes_rcv = 0;
sk->ack_timed = 0;
if (sk->send_head == NULL && sk->wfront == NULL && sk->timeout == TIME_WRITE)
{
if(sk->keepopen)
reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
else
delete_timer(sk);
}
}
t1->ack_seq = ntohl(ack);
t1->doff = sizeof(*t1)/4;
tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
if (sk->debug)
printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
sk->prot->queue_xmit(sk, dev, buff, 1);
}
/* This routine builds a generic TCP header. */
static int
tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
{
/* FIXME: want to get rid of this. */
memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
th->seq = htonl(sk->send_seq);
th->psh =(push == 0) ? 1 : 0;
th->doff = sizeof(*th)/4;
th->ack = 1;
th->fin = 0;
sk->ack_backlog = 0;
sk->bytes_rcv = 0;
sk->ack_timed = 0;
th->ack_seq = htonl(sk->acked_seq);
sk->window = tcp_select_window(sk)/*sk->prot->rspace(sk)*/;
th->window = htons(sk->window);
return(sizeof(*th));
}
/*
* This routine copies from a user buffer into a socket,
* and starts the transmit system.
*/
static int
tcp_write(struct sock *sk, unsigned char *from,
int len, int nonblock, unsigned flags)
{
int copied = 0;
int copy;
int tmp;
struct sk_buff *skb;
struct sk_buff *send_tmp;
unsigned char *buff;
struct proto *prot;
struct device *dev = NULL;
DPRINTF((DBG_TCP, "tcp_write(sk=%X, from=%X, len=%d, nonblock=%d, flags=%X)\n",
sk, from, len, nonblock, flags));
sk->inuse=1;
prot = sk->prot;
while(len > 0) {
if (sk->err) { /* Stop on an error */
release_sock(sk);
if (copied) return(copied);
tmp = -sk->err;
sk->err = 0;
return(tmp);
}
/* First thing we do is make sure that we are established. */
if (sk->shutdown & SEND_SHUTDOWN) {
release_sock(sk);
sk->err = EPIPE;
if (copied) return(copied);
sk->err = 0;
return(-EPIPE);
}
/* Wait for a connection to finish. */
while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) {
if (sk->err) {
release_sock(sk);
if (copied) return(copied);
tmp = -sk->err;
sk->err = 0;
return(tmp);
}
if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) {
release_sock(sk);
DPRINTF((DBG_TCP, "tcp_write: return 1\n"));
if (copied) return(copied);
if (sk->err) {
tmp = -sk->err;
sk->err = 0;
return(tmp);
}
if (sk->keepopen) {
send_sig(SIGPIPE, current, 0);
}
return(-EPIPE);
}
if (nonblock || copied) {
release_sock(sk);
DPRINTF((DBG_TCP, "tcp_write: return 2\n"));
if (copied) return(copied);
return(-EAGAIN);
}
release_sock(sk);
cli();
if (sk->state != TCP_ESTABLISHED &&
sk->state != TCP_CLOSE_WAIT && sk->err == 0) {
interruptible_sleep_on(sk->sleep);
if (current->signal & ~current->blocked) {
sti();
DPRINTF((DBG_TCP, "tcp_write: return 3\n"));
if (copied) return(copied);
return(-ERESTARTSYS);
}
}
sk->inuse = 1;
sti();
}
/* Now we need to check if we have a half built packet. */
if ((skb = dequeue_partial(sk)) != NULL) {
int hdrlen;
/* IP header + TCP header */
hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
+ sizeof(struct tcphdr);
/* If sk->mtu has been changed this could cause problems. */
/* Add more stuff to the end of skb->len */
if (!(flags & MSG_OOB)) {
copy = min(sk->mtu - (skb->len - hdrlen), len);
/* FIXME: this is really a bug. */
if (copy <= 0) {
printk("TCP: **bug**: \"copy\" <= 0!!\n");
copy = 0;
}
memcpy_fromfs(skb->data + skb->len, from, copy);
skb->len += copy;
from += copy;
copied += copy;
len -= copy;
sk->send_seq += copy;
}
enqueue_partial(skb, sk);
if ((skb->len - hdrlen) >= sk->mtu || (flags & MSG_OOB)) {
tcp_send_partial(sk);
}
continue;
}
/*
* We also need to worry about the window.
* If window < 1/4 offered window, don't use it. That's
* silly window prevention. What we actually do is
* use the whole MTU. Since the results in the right
* edge of the packet being outside the window, it will
* be queued for later rather than sent.
*/
copy = diff(sk->window_seq, sk->send_seq);
if (copy < (diff(sk->window_seq, sk->rcv_ack_seq) >> 2))
copy = sk->mtu;
copy = min(copy, sk->mtu);
copy = min(copy, len);
/* We should really check the window here also. */
if (sk->packets_out && copy < sk->mtu && !(flags & MSG_OOB)) {
/* We will release the socket incase we sleep here. */
release_sock(sk);
skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header + sizeof(*skb), 0, GFP_KERNEL);
sk->inuse = 1;
send_tmp = skb;
} else {
/* We will release the socket incase we sleep here. */
release_sock(sk);
skb = prot->wmalloc(sk, copy + prot->max_header + sizeof(*skb), 0, GFP_KERNEL);
sk->inuse = 1;
send_tmp = NULL;
}
/* If we didn't get any memory, we need to sleep. */
if (skb == NULL) {
if (nonblock /* || copied */) {
release_sock(sk);
DPRINTF((DBG_TCP, "tcp_write: return 4\n"));
if (copied) return(copied);
return(-EAGAIN);
}
/* FIXME: here is another race condition. */
tmp = sk->wmem_alloc;
release_sock(sk);
cli();
/* Again we will try to avoid it. */
if (tmp <= sk->wmem_alloc &&
(sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
&& sk->err == 0) {
interruptible_sleep_on(sk->sleep);
if (current->signal & ~current->blocked) {
sti();
DPRINTF((DBG_TCP, "tcp_write: return 5\n"));
if (copied) return(copied);
return(-ERESTARTSYS);
}
}
sk->inuse = 1;
sti();
continue;
}
skb->len = 0;
skb->sk = sk;
skb->free = 0;
buff = skb->data;
/*
* FIXME: we need to optimize this.
* Perhaps some hints here would be good.
*/
tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
if (tmp < 0 ) {
prot->wfree(sk, skb->mem_addr, skb->mem_len);
release_sock(sk);
DPRINTF((DBG_TCP, "tcp_write: return 6\n"));
if (copied) return(copied);
return(tmp);
}
skb->len += tmp;
skb->dev = dev;
buff += tmp;
skb->h.th =(struct tcphdr *) buff;
tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
if (tmp < 0) {
prot->wfree(sk, skb->mem_addr, skb->mem_len);
release_sock(sk);
DPRINTF((DBG_TCP, "tcp_write: return 7\n"));
if (copied) return(copied);
return(tmp);
}
if (flags & MSG_OOB) {
((struct tcphdr *)buff)->urg = 1;
((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
}
skb->len += tmp;
memcpy_fromfs(buff+tmp, from, copy);
from += copy;
copied += copy;
len -= copy;
skb->len += copy;
skb->free = 0;
sk->send_seq += copy;
if (send_tmp != NULL) {
enqueue_partial(send_tmp, sk);
continue;
}
tcp_send_check((struct tcphdr *)buff, sk->saddr, sk->daddr,
copy + sizeof(struct tcphdr), sk);
skb->h.seq = sk->send_seq;
if (after(sk->send_seq , sk->window_seq) ||
(sk->retransmits && sk->timeout == TIME_WRITE) ||
sk->packets_out >= sk->cong_window) {
DPRINTF((DBG_TCP, "sk->cong_window = %d, sk->packets_out = %d\n",
sk->cong_window, sk->packets_out));
DPRINTF((DBG_TCP, "sk->send_seq = %d, sk->window_seq = %d\n",
sk->send_seq, sk->window_seq));
skb->next = NULL;
skb->magic = TCP_WRITE_QUEUE_MAGIC;
if (sk->wback == NULL) {
sk->wfront = skb;
} else {
sk->wback->next = skb;
}
sk->wback = skb;
if (before(sk->window_seq, sk->wfront->h.seq) &&
sk->send_head == NULL &&
sk->ack_backlog == 0)
reset_timer(sk, TIME_PROBE0,
backoff(sk->backoff) * (2 * sk->mdev + sk->rtt));
} else {
prot->queue_xmit(sk, dev, skb,0);
}
}
sk->err = 0;
/*
* Nagles rule. Turn Nagle off with TCP_NODELAY for highly
* interactive fast network servers. It's meant to be on and
* it really improves the throughput though not the echo time
* on my slow slip link - Alan
*/
/* Avoid possible race on send_tmp - c/o Johannes Stille */
if(sk->send_tmp &&
((!sk->packets_out)
/* If not nagling we can send on the before case too.. */
|| (sk->nonagle && before(sk->send_seq , sk->window_seq))
))
tcp_send_partial(sk);
/* -- */
release_sock(sk);
DPRINTF((DBG_TCP, "tcp_write: return 8\n"));
return(copied);
}
static int
tcp_sendto(struct sock *sk, unsigned char *from,
int len, int nonblock, unsigned flags,
struct sockaddr_in *addr, int addr_len)
{
struct sockaddr_in sin;
if (addr_len < sizeof(sin)) return(-EINVAL);
memcpy_fromfs(&sin, addr, sizeof(sin));
if (sin.sin_family && sin.sin_family != AF_INET) return(-EINVAL);
if (sin.sin_port != sk->dummy_th.dest) return(-EINVAL);
if (sin.sin_addr.s_addr != sk->daddr) return(-EINVAL);
return(tcp_write(sk, from, len, nonblock, flags));
}
static void
tcp_read_wakeup(struct sock *sk)
{
int tmp;
struct device *dev = NULL;
struct tcphdr *t1;
struct sk_buff *buff;
DPRINTF((DBG_TCP, "in tcp read wakeup\n"));
if (!sk->ack_backlog) return;
/*
* FIXME: we need to put code here to prevent this routine from
* being called. Being called once in a while is ok, so only check
* if this is the second time in a row.
*/
/*
* We need to grab some memory, and put together an ack,
* and then put it into the queue to be sent.
*/
buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
if (buff == NULL) {
/* Try again real soon. */
reset_timer(sk, TIME_WRITE, 10);
return;
}
buff->mem_addr = buff;
buff->mem_len = MAX_ACK_SIZE;
buff->len = sizeof(struct tcphdr);
buff->sk = sk;
/* Put in the IP header and routing stuff. */
tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
if (tmp < 0) {
buff->free=1;
sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
return;
}
buff->len += tmp;
t1 =(struct tcphdr *)(buff->data +tmp);
memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
t1->seq = ntohl(sk->send_seq);
t1->ack = 1;
t1->res1 = 0;
t1->res2 = 0;
t1->rst = 0;
t1->urg = 0;
t1->syn = 0;
t1->psh = 0;
sk->ack_backlog = 0;
sk->bytes_rcv = 0;
sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/
t1->window = ntohs(sk->window);
t1->ack_seq = ntohl(sk->acked_seq);
t1->doff = sizeof(*t1)/4;
tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
sk->prot->queue_xmit(sk, dev, buff, 1);
}
/*
* FIXME:
* This routine frees used buffers.
* It should consider sending an ACK to let the
* other end know we now have a bigger window.
*/
static void
cleanup_rbuf(struct sock *sk)
{
unsigned long flags;
int left;
struct sk_buff *skb;
if(sk->debug)
printk("cleaning rbuf for sk=%p\n", sk);
save_flags(flags);
cli();
left = sk->prot->rspace(sk);
/*
* We have to loop through all the buffer headers,
* and try to free up all the space we can.
*/
while((skb=skb_peek(&sk->rqueue)) != NULL )
{
if (!skb->used)
break;
skb_unlink(skb);
skb->sk = sk;
kfree_skb(skb, FREE_READ);
}
restore_flags(flags);
/*
* FIXME:
* At this point we should send an ack if the difference
* in the window, and the amount of space is bigger than
* TCP_WINDOW_DIFF.
*/
DPRINTF((DBG_TCP, "sk->window left = %d, sk->prot->rspace(sk)=%d\n",
sk->window - sk->bytes_rcv, sk->prot->rspace(sk)));
if(sk->debug)
printk("sk->rspace = %lu, was %d\n", sk->prot->rspace(sk),
left);
if (sk->prot->rspace(sk) != left)
{
/*
* This area has caused the most trouble. The current strategy
* is to simply do nothing if the other end has room to send at
* least 3 full packets, because the ack from those will auto-
* matically update the window. If the other end doesn't think
* we have much space left, but we have room for atleast 1 more
* complete packet than it thinks we do, we will send an ack
* immediatedly. Otherwise we will wait up to .5 seconds in case
* the user reads some more.
*/
sk->ack_backlog++;
if ((sk->prot->rspace(sk) > (sk->window - sk->bytes_rcv + sk->mtu))) {
/* Send an ack right now. */
tcp_read_wakeup(sk);
} else {
/* Force it to send an ack soon. */
int was_active = del_timer(&sk->timer);
if (!was_active || TCP_ACK_TIME < sk->timer.expires) {
reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
} else
add_timer(&sk->timer);
}
}
}
/* Handle reading urgent data. */
static int
tcp_read_urg(struct sock * sk, int nonblock,
unsigned char *to, int len, unsigned flags)
{
int copied = 0;
struct sk_buff *skb;
DPRINTF((DBG_TCP, "tcp_read_urg(sk=%X, to=%X, len=%d, flags=%X)\n",
sk, to, len, flags));
while(len > 0)
{
sk->inuse = 1;
while(sk->urg==0 || skb_peek(&sk->rqueue) == NULL) {
if (sk->err) {
int tmp;
release_sock(sk);
if (copied) return(copied);
tmp = -sk->err;
sk->err = 0;
return(tmp);
}
if (sk->state == TCP_CLOSE || sk->done) {
release_sock(sk);
if (copied) return(copied);
if (!sk->done) {
sk->done = 1;
return(0);
}
return(-ENOTCONN);
}
if (sk->shutdown & RCV_SHUTDOWN) {
release_sock(sk);
if (copied == 0)
sk->done = 1;
return(copied);
}
if (nonblock || copied) {
release_sock(sk);
if (copied) return(copied);
return(-EAGAIN);
}
/* Now at this point, we may have gotten some data. */
release_sock(sk);
cli();
if ((sk->urg == 0 || skb_peek(&sk->rqueue) == NULL) &&
sk->err == 0 && !(sk->shutdown & RCV_SHUTDOWN)) {
interruptible_sleep_on(sk->sleep);
if (current->signal & ~current->blocked) {
sti();
if (copied) return(copied);
return(-ERESTARTSYS);
}
}
sk->inuse = 1;
sti();
}
skb = skb_peek(&sk->rqueue);
do {
int amt;
if (skb->h.th->urg && !skb->urg_used) {
if (skb->h.th->urg_ptr == 0) {
skb->h.th->urg_ptr = ntohs(skb->len);
}
amt = min(ntohs(skb->h.th->urg_ptr),len);
if(amt)
{
memcpy_tofs(to,(unsigned char *)(skb->h.th) +
skb->h.th->doff*4, amt);
}
if (!(flags & MSG_PEEK)) {
skb->urg_used = 1;
sk->urg--;
}
release_sock(sk);
copied += amt;
return(copied);
}
skb =(struct sk_buff *)skb->next;
} while(skb != sk->rqueue);
}
/*sk->urg = 0;*/
release_sock(sk);
return(0);
}
/* This routine copies from a sock struct into the user buffer. */
static int
tcp_read(struct sock *sk, unsigned char *to,
int len, int nonblock, unsigned flags)
{
int copied=0; /* will be used to say how much has been copied. */
struct sk_buff *skb;
unsigned long offset;
unsigned long used;
int err;
if (len == 0) return(0);
if (len < 0) {
return(-EINVAL);
}
err=verify_area(VERIFY_WRITE,to,len);
if(err)
return err;
/* This error should be checked. */
if (sk->state == TCP_LISTEN) return(-ENOTCONN);
/* Urgent data needs to be handled specially. */
if ((flags & MSG_OOB))
return(tcp_read_urg(sk, nonblock, to, len, flags));
/* So no-one else will use this socket. */
sk->inuse = 1;
skb=skb_peek(&sk->rqueue);
DPRINTF((DBG_TCP, "tcp_read(sk=%X, to=%X, len=%d, nonblock=%d, flags=%X)\n",
sk, to, len, nonblock, flags));
while(len > 0) {
/* skb->used just checks to see if we've gone all the way around. */
/* While no data, or first data indicates some is missing, or data is used */
while(skb == NULL ||
before(sk->copied_seq+1, skb->h.th->seq) || skb->used) {
DPRINTF((DBG_TCP, "skb = %X:\n", skb));
cleanup_rbuf(sk);
if (sk->err)
{
int tmp;
release_sock(sk);
if (copied)
{
DPRINTF((DBG_TCP, "tcp_read: returning %d\n",
copied));
return(copied);
}
tmp = -sk->err;
sk->err = 0;
return(tmp);
}
if (sk->state == TCP_CLOSE)
{
release_sock(sk);
if (copied) {
DPRINTF((DBG_TCP, "tcp_read: returning %d\n",
copied));
return(copied);
}
if (!sk->done) {
sk->done = 1;
return(0);
}
return(-ENOTCONN);
}
if (sk->shutdown & RCV_SHUTDOWN)
{
release_sock(sk);
if (copied == 0) sk->done = 1;
DPRINTF((DBG_TCP, "tcp_read: returning %d\n", copied));
return(copied);
}
if (nonblock || copied)
{
release_sock(sk);
if(sk->debug)
printk("read: EAGAIN\n");
if (copied)
{
DPRINTF((DBG_TCP, "tcp_read: returning %d\n",
copied));
return(copied);
}
return(-EAGAIN);
}
if ((flags & MSG_PEEK) && copied != 0)
{
release_sock(sk);
DPRINTF((DBG_TCP, "tcp_read: returning %d\n", copied));
return(copied);
}
DPRINTF((DBG_TCP, "tcp_read about to sleep. state = %d\n",
sk->state));
release_sock(sk);
/*
* Now we may have some data waiting or we could
* have changed state.
*/
cli();
if (sk->shutdown & RCV_SHUTDOWN || sk->err != 0) {
sk->inuse = 1;
sti();
continue;
}
if (skb_peek(&sk->rqueue) == NULL ||
before(sk->copied_seq+1, sk->rqueue->h.th->seq)) {
if(sk->debug)
printk("Read wait sleep\n");
interruptible_sleep_on(sk->sleep);
if(sk->debug)
printk("Read wait wakes\n");
if (current->signal & ~current->blocked) {
sti();
if (copied) {
DPRINTF((DBG_TCP, "tcp_read: returning %d\n",
copied));
return(copied);
}
return(-ERESTARTSYS);
}
}
sk->inuse = 1;
sti();
DPRINTF((DBG_TCP, "tcp_read woke up. \n"));
skb=skb_peek(&sk->rqueue);
/* That may have been null if we were beaten, if so we loop again */
}
/*
* Copy anything from the current block that needs
* to go into the user buffer.
*/
offset = sk->copied_seq+1 - skb->h.th->seq;
if (skb->h.th->syn) offset--;
if (offset < skb->len) /* Some of the packet is useful */
{
/*
* If there is urgent data we must either
* return or skip over it.
*/
if (skb->h.th->urg)
{
if (skb->urg_used)
{
sk->copied_seq += ntohs(skb->h.th->urg_ptr);
offset += ntohs(skb->h.th->urg_ptr);
if (offset >= skb->len)
{
skb->used = 1;
skb =(struct sk_buff *)skb->next;
continue;
}
}
else
{
release_sock(sk);
if (copied)
return(copied);
send_sig(SIGURG, current, 0);
return(-EINTR);
}
}
/* Ok so how much can we use ? */
used = min(skb->len - offset, len);
/* Copy it */
memcpy_tofs(to,((unsigned char *)skb->h.th) +
skb->h.th->doff*4 + offset, used);
copied += used;
len -= used;
to += used;
/* If we were reading the data is 'eaten' */
if (!(flags & MSG_PEEK))
sk->copied_seq += used;
/*
* Mark this data used if we are really reading it,
* and if it doesn't contain any urgent data. And we
* have used all the data.
*/
if (!(flags & MSG_PEEK) &&
(!skb->h.th->urg || skb->urg_used) &&
(used + offset >= skb->len))
skb->used = 1;
/*
* See if this is the end of a message or if the
* remaining data is urgent.
*/
if (/*skb->h.th->psh || */skb->h.th->urg)
{
break;
}
}
else
{ /* already used this data, must be a retransmit */
skb->used = 1;
}
/* Move along a packet */
skb =(struct sk_buff *)skb->next;
}
/* Clean up data we have read: This will do ACK frames */
cleanup_rbuf(sk);
release_sock(sk);
DPRINTF((DBG_TCP, "tcp_read: returning %d\n", copied));
if (copied == 0 && nonblock)
return(-EAGAIN);
return(copied);
}
/*
* Send a FIN without closing the connection.
* Not called at interrupt time.
*/
void
tcp_shutdown(struct sock *sk, int how)
{
struct sk_buff *buff;
struct tcphdr *t1, *th;
struct proto *prot;
int tmp;
struct device *dev = NULL;
/*
* We need to grab some memory, and put together a FIN,
* and then put it into the queue to be sent.
* FIXME:
* Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
* Most of this is guesswork, so maybe it will work...
*/
/* If we've already sent a FIN, return. */
if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2) return;
if (!(how & SEND_SHUTDOWN)) return;
sk->inuse = 1;
/* Clear out any half completed packets. */
if (sk->send_tmp) tcp_send_partial(sk);
prot =(struct proto *)sk->prot;
th =(struct tcphdr *)&sk->dummy_th;
release_sock(sk); /* incase the malloc sleeps. */
buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
if (buff == NULL) return;
sk->inuse = 1;
DPRINTF((DBG_TCP, "tcp_shutdown_send buff = %X\n", buff));
buff->mem_addr = buff;
buff->mem_len = MAX_RESET_SIZE;
buff->sk = sk;
buff->len = sizeof(*t1);
t1 =(struct tcphdr *) buff->data;
/* Put in the IP header and routing stuff. */
tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
IPPROTO_TCP, sk->opt,
sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
if (tmp < 0) {
buff->free=1;
prot->wfree(sk,buff->mem_addr, buff->mem_len);
release_sock(sk);
DPRINTF((DBG_TCP, "Unable to build header for fin.\n"));
return;
}
t1 =(struct tcphdr *)((char *)t1 +tmp);
buff->len += tmp;
buff->dev = dev;
memcpy(t1, th, sizeof(*t1));
t1->seq = ntohl(sk->send_seq);
sk->send_seq++;
buff->h.seq = sk->send_seq;
t1->ack = 1;
t1->ack_seq = ntohl(sk->acked_seq);
t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
t1->fin = 1;
t1->rst = 0;
t1->doff = sizeof(*t1)/4;
tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
/*
* Can't just queue this up.
* It should go at the end of the write queue.
*/
if (sk->wback != NULL) {
buff->free=0;
buff->next = NULL;
sk->wback->next = buff;
sk->wback = buff;
buff->magic = TCP_WRITE_QUEUE_MAGIC;
} else {
sk->prot->queue_xmit(sk, dev, buff, 0);
}
if (sk->state == TCP_ESTABLISHED) sk->state = TCP_FIN_WAIT1;
else sk->state = TCP_FIN_WAIT2;
release_sock(sk);
}
static int
tcp_recvfrom(struct sock *sk, unsigned char *to,
int to_len, int nonblock, unsigned flags,
struct sockaddr_in *addr, int *addr_len)
{
struct sockaddr_in sin;
int len;
int err;
int result;
/* Have to check these first unlike the old code. If
we check them after we lose data on an error
which is wrong */
err = verify_area(VERIFY_WRITE,addr_len,sizeof(long));
if(err)
return err;
len = get_fs_long(addr_len);
if(len > sizeof(sin))
len = sizeof(sin);
err=verify_area(VERIFY_WRITE, addr, len);
if(err)
return err;
result=tcp_read(sk, to, to_len, nonblock, flags);
if (result < 0) return(result);
sin.sin_family = AF_INET;
sin.sin_port = sk->dummy_th.dest;
sin.sin_addr.s_addr = sk->daddr;
memcpy_tofs(addr, &sin, len);
put_fs_long(len, addr_len);
return(result);
}
/* This routine will send an RST to the other tcp. */
static void
tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
{
struct sk_buff *buff;
struct tcphdr *t1;
int tmp;
/*
* We need to grab some memory, and put together an RST,
* and then put it into the queue to be sent.
*/
buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
if (buff == NULL)
return;
DPRINTF((DBG_TCP, "tcp_reset buff = %X\n", buff));
buff->mem_addr = buff;
buff->mem_len = MAX_RESET_SIZE;
buff->len = sizeof(*t1);
buff->sk = NULL;
buff->dev = dev;
t1 =(struct tcphdr *) buff->data;
/* Put in the IP header and routing stuff. */
tmp = prot->build_header(buff, saddr, daddr, &dev, IPPROTO_TCP, opt,
sizeof(struct tcphdr),tos,ttl);
if (tmp < 0) {
buff->free = 1;
prot->wfree(NULL, buff->mem_addr, buff->mem_len);
return;
}
t1 =(struct tcphdr *)((char *)t1 +tmp);
buff->len += tmp;
memcpy(t1, th, sizeof(*t1));
/* Swap the send and the receive. */
t1->dest = th->source;
t1->source = th->dest;
t1->rst = 1;
t1->window = 0;
if(th->ack)
{
t1->ack=0;
t1->seq=th->ack_seq;
t1->ack_seq=0;
}
else
{
t1->ack=1;
if(!th->syn)
t1->ack_seq=htonl(th->seq);
else
t1->ack_seq=htonl(th->seq+1);
t1->seq=0;
}
t1->syn = 0;
t1->urg = 0;
t1->fin = 0;
t1->psh = 0;
t1->doff = sizeof(*t1)/4;
tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
prot->queue_xmit(NULL, dev, buff, 1);
}
/*
* Look for tcp options. Parses everything but only knows about MSS
*/
static void
tcp_options(struct sock *sk, struct tcphdr *th)
{
unsigned char *ptr;
int length=(th->doff*4)-sizeof(struct tcphdr);
ptr = (unsigned char *)(th + 1);
while(length>0)
{
int opcode=*ptr++;
int opsize=*ptr++;
switch(opcode)
{
case TCPOPT_EOL:
return;
case TCPOPT_NOP:
length-=2;
continue;
default:
if(opsize<=2) /* Avoid silly options looping forever */
return;
switch(opcode)
{
case TCPOPT_MSS:
if(opsize==4)
{
sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
}
break;
/* Add other options here as people feel the urge to implement stuff like large windows */
}
ptr+=opsize-2;
length-=opsize;
}
}
}
/*
* This routine handles a connection request.
* It should make sure we haven't already responded.
* Because of the way BSD works, we have to send a syn/ack now.
* This also means it will be harder to close a socket which is
* listening.
*/
static void
tcp_conn_request(struct sock *sk, struct sk_buff *skb,
unsigned long daddr, unsigned long saddr,
struct options *opt, struct device *dev)
{
struct sk_buff *buff;
struct tcphdr *t1;
unsigned char *ptr;
struct sock *newsk;
struct tcphdr *th;
int tmp;
DPRINTF((DBG_TCP, "tcp_conn_request(sk = %X, skb = %X, daddr = %X, sadd4= %X, \n"
" opt = %X, dev = %X)\n",
sk, skb, daddr, saddr, opt, dev));
th = skb->h.th;
/* If the socket is dead, don't accept the connection. */
if (!sk->dead) {
sk->data_ready(sk,0);
} else {
DPRINTF((DBG_TCP, "tcp_conn_request on dead socket\n"));
tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
kfree_skb(skb, FREE_READ);
return;
}
/*
* Make sure we can accept more. This will prevent a
* flurry of syns from eating up all our memory.
*/
if (sk->ack_backlog >= sk->max_ack_backlog) {
kfree_skb(skb, FREE_READ);
return;
}
/*
* We need to build a new sock struct.
* It is sort of bad to have a socket without an inode attached
* to it, but the wake_up's will just wake up the listening socket,
* and if the listening socket is destroyed before this is taken
* off of the queue, this will take care of it.
*/
newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
if (newsk == NULL) {
/* just ignore the syn. It will get retransmitted. */
kfree_skb(skb, FREE_READ);
return;
}
DPRINTF((DBG_TCP, "newsk = %X\n", newsk));
memcpy((void *)newsk,(void *)sk, sizeof(*newsk));
newsk->wback = NULL;
newsk->wfront = NULL;
newsk->rqueue = NULL;
newsk->send_head = NULL;
newsk->send_tail = NULL;
newsk->back_log = NULL;
newsk->rtt = TCP_CONNECT_TIME;
newsk->mdev = 0;
newsk->backoff = 0;
newsk->blog = 0;
newsk->intr = 0;
newsk->proc = 0;
newsk->done = 0;
newsk->send_tmp = NULL;
newsk->pair = NULL;
newsk->wmem_alloc = 0;
newsk->rmem_alloc = 0;
newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
newsk->err = 0;
newsk->shutdown = 0;
newsk->ack_backlog = 0;
newsk->acked_seq = skb->h.th->seq+1;
newsk->fin_seq = skb->h.th->seq;
newsk->copied_seq = skb->h.th->seq;
newsk->state = TCP_SYN_RECV;
newsk->timeout = 0;
newsk->send_seq = jiffies * SEQ_TICK - seq_offset;
newsk->rcv_ack_seq = newsk->send_seq;
newsk->urg =0;
newsk->retransmits = 0;
newsk->destroy = 0;
newsk->timer.data = (unsigned long)newsk;
newsk->timer.function = &net_timer;
newsk->dummy_th.source = skb->h.th->dest;
newsk->dummy_th.dest = skb->h.th->source;
/* Swap these two, they are from our point of view. */
newsk->daddr = saddr;
newsk->saddr = daddr;
put_sock(newsk->num,newsk);
newsk->dummy_th.res1 = 0;
newsk->dummy_th.doff = 6;
newsk->dummy_th.fin = 0;
newsk->dummy_th.syn = 0;
newsk->dummy_th.rst = 0;
newsk->dummy_th.psh = 0;
newsk->dummy_th.ack = 0;
newsk->dummy_th.urg = 0;
newsk->dummy_th.res2 = 0;
newsk->acked_seq = skb->h.th->seq + 1;
newsk->copied_seq = skb->h.th->seq;
/* Grab the ttl and tos values and use them */
newsk->ip_ttl=sk->ip_ttl;
newsk->ip_tos=skb->ip_hdr->tos;
/* use 512 or whatever user asked for */
/* note use of sk->mss, since user has no direct access to newsk */
if (sk->mss)
newsk->mtu = sk->mss;
else
newsk->mtu = 576 - HEADER_SIZE;
/* but not bigger than device MTU */
newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
/* this will min with what arrived in the packet */
tcp_options(newsk,skb->h.th);
buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
if (buff == NULL) {
sk->err = -ENOMEM;
newsk->dead = 1;
release_sock(newsk);
kfree_skb(skb, FREE_READ);
return;
}
buff->mem_addr = buff;
buff->mem_len = MAX_SYN_SIZE;
buff->len = sizeof(struct tcphdr)+4;
buff->sk = newsk;
t1 =(struct tcphdr *) buff->data;
/* Put in the IP header and routing stuff. */
tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &dev,
IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
/* Something went wrong. */
if (tmp < 0) {
sk->err = tmp;
buff->free=1;
kfree_skb(buff,FREE_WRITE);
newsk->dead = 1;
release_sock(newsk);
skb->sk = sk;
kfree_skb(skb, FREE_READ);
return;
}
buff->len += tmp;
t1 =(struct tcphdr *)((char *)t1 +tmp);
memcpy(t1, skb->h.th, sizeof(*t1));
buff->h.seq = newsk->send_seq;
/* Swap the send and the receive. */
t1->dest = skb->h.th->source;
t1->source = newsk->dummy_th.source;
t1->seq = ntohl(newsk->send_seq++);
t1->ack = 1;
newsk->window = tcp_select_window(newsk);/*newsk->prot->rspace(newsk);*/
t1->window = ntohs(newsk->window);
t1->res1 = 0;
t1->res2 = 0;
t1->rst = 0;
t1->urg = 0;
t1->psh = 0;
t1->syn = 1;
t1->ack_seq = ntohl(skb->h.th->seq+1);
t1->doff = sizeof(*t1)/4+1;
ptr =(unsigned char *)(t1+1);
ptr[0] = 2;
ptr[1] = 4;
ptr[2] = ((newsk->mtu) >> 8) & 0xff;
ptr[3] =(newsk->mtu) & 0xff;
tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
newsk->prot->queue_xmit(newsk, dev, buff, 0);
reset_timer(newsk, TIME_WRITE /* -1 ? FIXME ??? */, TCP_CONNECT_TIME);
skb->sk = newsk;
/* Charge the sock_buff to newsk. */
sk->rmem_alloc -= skb->mem_len;
newsk->rmem_alloc += skb->mem_len;
skb_queue_tail(&sk->rqueue,skb);
sk->ack_backlog++;
release_sock(newsk);
}
static void
tcp_close(struct sock *sk, int timeout)
{
struct sk_buff *buff;
int need_reset = 0;
struct tcphdr *t1, *th;
struct proto *prot;
struct device *dev=NULL;
int tmp;
/*
* We need to grab some memory, and put together a FIN,
* and then put it into the queue to be sent.
*/
DPRINTF((DBG_TCP, "tcp_close((struct sock *)%X, %d)\n",sk, timeout));
sk->inuse = 1;
sk->keepopen = 1;
sk->shutdown = SHUTDOWN_MASK;
if (!sk->dead)
sk->state_change(sk);
/* We need to flush the recv. buffs. */
if (skb_peek(&sk->rqueue) != NULL)
{
struct sk_buff *skb;
if(sk->debug)
printk("Clean rcv queue\n");
while((skb=skb_dequeue(&sk->rqueue))!=NULL)
{
if(skb->len > 0 && after(skb->h.th->seq + skb->len + 1 , sk->copied_seq))
need_reset = 1;
kfree_skb(skb, FREE_READ);
}
if(sk->debug)
printk("Cleaned.\n");
}
sk->rqueue = NULL;
/* Get rid off any half-completed packets. */
if (sk->send_tmp) {
tcp_send_partial(sk);
}
switch(sk->state) {
case TCP_FIN_WAIT1:
case TCP_FIN_WAIT2:
case TCP_LAST_ACK:
/* start a timer. */
reset_timer(sk, TIME_CLOSE, 4 * sk->rtt);
if (timeout) tcp_time_wait(sk);
release_sock(sk);
return; /* break causes a double release - messy */
case TCP_TIME_WAIT:
if (timeout) {
sk->state = TCP_CLOSE;
}
release_sock(sk);
return;
case TCP_LISTEN:
sk->state = TCP_CLOSE;
release_sock(sk);
return;
case TCP_CLOSE:
release_sock(sk);
return;
case TCP_CLOSE_WAIT:
case TCP_ESTABLISHED:
case TCP_SYN_SENT:
case TCP_SYN_RECV:
prot =(struct proto *)sk->prot;
th =(struct tcphdr *)&sk->dummy_th;
buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
if (buff == NULL) {
/* This will force it to try again later. */
/* Or it would have if someone released the socket
first. Anyway it might work now */
release_sock(sk);
if (sk->state != TCP_CLOSE_WAIT)
sk->state = TCP_ESTABLISHED;
reset_timer(sk, TIME_CLOSE, 100);
return;
}
buff->mem_addr = buff;
buff->mem_len = MAX_FIN_SIZE;
buff->sk = sk;
buff->free = 1;
buff->len = sizeof(*t1);
t1 =(struct tcphdr *) buff->data;
/* Put in the IP header and routing stuff. */
tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
IPPROTO_TCP, sk->opt,
sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
if (tmp < 0) {
kfree_skb(buff,FREE_WRITE);
DPRINTF((DBG_TCP, "Unable to build header for fin.\n"));
release_sock(sk);
return;
}
t1 =(struct tcphdr *)((char *)t1 +tmp);
buff->len += tmp;
buff->dev = dev;
memcpy(t1, th, sizeof(*t1));
t1->seq = ntohl(sk->send_seq);
sk->send_seq++;
buff->h.seq = sk->send_seq;
t1->ack = 1;
/* Ack everything immediately from now on. */
sk->delay_acks = 0;
t1->ack_seq = ntohl(sk->acked_seq);
t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
t1->fin = 1;
t1->rst = need_reset;
t1->doff = sizeof(*t1)/4;
tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
if (sk->wfront == NULL) {
prot->queue_xmit(sk, dev, buff, 0);
} else {
reset_timer(sk, TIME_WRITE,
backoff(sk->backoff) * (2 * sk->mdev + sk->rtt));
buff->next = NULL;
if (sk->wback == NULL) {
sk->wfront=buff;
} else {
sk->wback->next = buff;
}
sk->wback = buff;
buff->magic = TCP_WRITE_QUEUE_MAGIC;
}
if (sk->state == TCP_CLOSE_WAIT) {
sk->state = TCP_FIN_WAIT2;
} else {
sk->state = TCP_FIN_WAIT1;
}
}
release_sock(sk);
}
/*
* This routine takes stuff off of the write queue,
* and puts it in the xmit queue.
*/
static void
tcp_write_xmit(struct sock *sk)
{
struct sk_buff *skb;
DPRINTF((DBG_TCP, "tcp_write_xmit(sk=%X)\n", sk));
/* The bytes will have to remain here. In time closedown will
empty the write queue and all will be happy */
if(sk->zapped)
return;
while(sk->wfront != NULL &&
before(sk->wfront->h.seq, sk->window_seq +1) &&
(sk->retransmits == 0 ||
sk->timeout != TIME_WRITE ||
before(sk->wfront->h.seq, sk->rcv_ack_seq +1))
&& sk->packets_out < sk->cong_window) {
skb = sk->wfront;
IS_SKB(skb);
sk->wfront =(struct sk_buff *)skb->next;
if (sk->wfront == NULL) sk->wback = NULL;
skb->next = NULL;
if (skb->magic != TCP_WRITE_QUEUE_MAGIC) {
printk("tcp.c skb with bad magic(%X) on write queue. Squashing "
"queue\n", skb->magic);
sk->wfront = NULL;
sk->wback = NULL;
return;
}
skb->magic = 0;
DPRINTF((DBG_TCP, "Sending a packet.\n"));
/* See if we really need to send the packet. */
if (before(skb->h.seq, sk->rcv_ack_seq +1)) {
sk->retransmits = 0;
kfree_skb(skb, FREE_WRITE);
if (!sk->dead) sk->write_space(sk);
} else {
sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
}
}
}
/*
* This routine sorts the send list, and resets the
* sk->send_head and sk->send_tail pointers.
*/
void
sort_send(struct sock *sk)
{
struct sk_buff *list = NULL;
struct sk_buff *skb,*skb2,*skb3;
for (skb = sk->send_head; skb != NULL; skb = skb2) {
skb2 = (struct sk_buff *)skb->link3;
if (list == NULL || before (skb2->h.seq, list->h.seq)) {
skb->link3 = list;
sk->send_tail = skb;
list = skb;
} else {
for (skb3 = list; ; skb3 = (struct sk_buff *)skb3->link3) {
if (skb3->link3 == NULL ||
before(skb->h.seq, skb3->link3->h.seq)) {
skb->link3 = skb3->link3;
skb3->link3 = skb;
if (skb->link3 == NULL) sk->send_tail = skb;
break;
}
}
}
}
sk->send_head = list;
}
/* This routine deals with incoming acks, but not outgoing ones. */
static int
tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
{
unsigned long ack;
int flag = 0;
if(sk->zapped)
return(1); /* Dead, cant ack any more so why bother */
ack = ntohl(th->ack_seq);
DPRINTF((DBG_TCP, "tcp_ack ack=%d, window=%d, "
"sk->rcv_ack_seq=%d, sk->window_seq = %d\n",
ack, ntohs(th->window), sk->rcv_ack_seq, sk->window_seq));
if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
sk->retransmits = 0;
if (after(ack, sk->send_seq+1) || before(ack, sk->rcv_ack_seq-1)) {
if (after(ack, sk->send_seq) ||
(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) {
return(0);
}
if (sk->keepopen) {
reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
}
return(1);
}
if (len != th->doff*4) flag |= 1;
/* See if our window has been shrunk. */
if (after(sk->window_seq, ack+ntohs(th->window))) {
/*
* We may need to move packets from the send queue
* to the write queue, if the window has been shrunk on us.
* The RFC says you are not allowed to shrink your window
* like this, but if the other end does, you must be able
* to deal with it.
*/
struct sk_buff *skb;
struct sk_buff *skb2;
struct sk_buff *wskb = NULL;
skb2 = sk->send_head;
sk->send_head = NULL;
sk->send_tail = NULL;
flag |= 4;
sk->window_seq = ack + ntohs(th->window);
cli();
while (skb2 != NULL) {
skb = skb2;
skb2 = (struct sk_buff *)skb->link3;
skb->link3 = NULL;
if (after(skb->h.seq, sk->window_seq)) {
if (sk->packets_out > 0) sk->packets_out--;
/* We may need to remove this from the dev send list. */
if (skb->next != NULL) {
skb_unlink(skb);
}
/* Now add it to the write_queue. */
skb->magic = TCP_WRITE_QUEUE_MAGIC;
if (wskb == NULL) {
skb->next = sk->wfront;
sk->wfront = skb;
} else {
skb->next = wskb->next;
wskb->next = skb;
}
if (sk->wback == wskb) sk->wback = skb;
wskb = skb;
} else {
if (sk->send_head == NULL) {
sk->send_head = skb;
sk->send_tail = skb;
} else {
sk->send_tail->link3 = skb;
sk->send_tail = skb;
}
skb->link3 = NULL;
}
}
sti();
}
if (sk->send_tail == NULL || sk->send_head == NULL) {
sk->send_head = NULL;
sk->send_tail = NULL;
sk->packets_out= 0;
}
sk->window_seq = ack + ntohs(th->window);
/* We don't want too many packets out there. */
if (sk->timeout == TIME_WRITE &&
sk->cong_window < 2048 && ack != sk->rcv_ack_seq) {
if (sk->exp_growth) sk->cong_window *= 2;
else sk->cong_window++;
}
DPRINTF((DBG_TCP, "tcp_ack: Updating rcv ack sequence.\n"));
sk->rcv_ack_seq = ack;
/*
* if this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission
*/
if (sk->timeout == TIME_PROBE0) {
if (sk->wfront != NULL && /* should always be non-null */
! before (sk->window_seq, sk->wfront->h.seq)) {
sk->retransmits = 0;
sk->backoff = 0;
}
}
/* See if we can take anything off of the retransmit queue. */
while(sk->send_head != NULL) {
/* Check for a bug. */
if (sk->send_head->link3 &&
after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) {
printk("INET: tcp.c: *** bug send_list out of order.\n");
sort_send(sk);
}
if (before(sk->send_head->h.seq, ack+1)) {
struct sk_buff *oskb;
if (sk->retransmits) {
/* if we're retransmitting, don't start any new
* packets until after everything in retransmit queue
* is acked. That's as close as I can come at the
* moment to slow start the way this code is organized
*/
if (sk->send_head->link3)
sk->retransmits = 1;
else
sk->retransmits = 0;
}
/*
* need to restart backoff whenever we get a response,
* or things get impossible if we lose a window-full of
* data with very small MSS
*/
sk->backoff = 0;
/* We have one less packet out there. */
if (sk->packets_out > 0) sk->packets_out --;
DPRINTF((DBG_TCP, "skb=%X skb->h.seq = %d acked ack=%d\n",
sk->send_head, sk->send_head->h.seq, ack));
/* Wake up the process, it can probably write more. */
if (!sk->dead) sk->write_space(sk);
oskb = sk->send_head;
/*
* In theory we're supposed to ignore rtt's when there's
* retransmission in process. Unfortunately this means
* that if there's a sharp increase in RTT, we may
* never get out of retransmission. For the moment
* ignore the test.
*/
if (/* sk->retransmits == 0 && */ !(flag&2)) {
long abserr, rtt = jiffies - oskb->when;
/*
* Berkeley's code puts these limits on a separate timeout
* field, not on the RTT estimate itself. However the way this
* code is done, that would complicate things. If we're going
* to clamp the values, we have to do so before calculating
* the mdev, or we'll get unreasonably large mdev's. Experience
* shows that with a minium rtt of .1 sec, we get spurious
* retransmits, due to delayed acks on some hosts. Berkeley uses
* 1 sec, so why not?
*/
if (rtt < 100) rtt = 100; /* 1 sec */
if (rtt > 12000) rtt = 12000; /* 2 min - max rtt allowed by protocol */
if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) {
/* first ack, so nothing else to average with */
sk->rtt = rtt;
sk->mdev = rtt; /* overcautious initial estimate */
}
else {
abserr = (rtt > sk->rtt) ? rtt - sk->rtt : sk->rtt - rtt;
sk->rtt = (7 * sk->rtt + rtt) >> 3;
sk->mdev = (3 * sk->mdev + abserr) >> 2;
}
sk->backoff = 0;
}
flag |= (2|4);
cli();
oskb = sk->send_head;
IS_SKB(oskb);
sk->send_head =(struct sk_buff *)oskb->link3;
if (sk->send_head == NULL) {
sk->send_tail = NULL;
}
/* We may need to remove this from the dev send list. */
skb_unlink(oskb); /* Much easier! */
sti();
oskb->magic = 0;
kfree_skb(oskb, FREE_WRITE); /* write. */
if (!sk->dead) sk->write_space(sk);
} else {
break;
}
}
/*
* Maybe we can take some stuff off of the write queue,
* and put it onto the xmit queue.
*/
if (sk->wfront != NULL) {
if (after (sk->window_seq+1, sk->wfront->h.seq) &&
(sk->retransmits == 0 ||
sk->timeout != TIME_WRITE ||
before(sk->wfront->h.seq, sk->rcv_ack_seq +1))
&& sk->packets_out < sk->cong_window) {
flag |= 1;
tcp_write_xmit(sk);
} else if (before(sk->window_seq, sk->wfront->h.seq) &&
sk->send_head == NULL &&
sk->ack_backlog == 0 &&
sk->state != TCP_TIME_WAIT) {
reset_timer(sk, TIME_PROBE0,
backoff(sk->backoff) * (2 * sk->mdev + sk->rtt));
}
} else {
if (sk->send_head == NULL && sk->ack_backlog == 0 &&
sk->state != TCP_TIME_WAIT && !sk->keepopen) {
DPRINTF((DBG_TCP, "Nothing to do, going to sleep.\n"));
if (!sk->dead) sk->write_space(sk);
if (sk->keepopen)
reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
else
delete_timer(sk);
} else {
if (sk->state != (unsigned char) sk->keepopen) {
reset_timer(sk, TIME_WRITE,
backoff(sk->backoff) * (2 * sk->mdev + sk->rtt));
}
if (sk->state == TCP_TIME_WAIT) {
reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
}
}
}
if (sk->packets_out == 0 && sk->send_tmp != NULL &&
sk->wfront == NULL && sk->send_head == NULL) {
flag |= 1;
tcp_send_partial(sk);
}
/* See if we are done. */
if (sk->state == TCP_TIME_WAIT) {
if (!sk->dead)
sk->state_change(sk);
if (sk->rcv_ack_seq == sk->send_seq && sk->acked_seq == sk->fin_seq) {
flag |= 1;
sk->state = TCP_CLOSE;
sk->shutdown = SHUTDOWN_MASK;
}
}
if (sk->state == TCP_LAST_ACK || sk->state == TCP_FIN_WAIT2) {
if (!sk->dead) sk->state_change(sk);
if (sk->rcv_ack_seq == sk->send_seq) {
flag |= 1;
if (sk->acked_seq != sk->fin_seq) {
tcp_time_wait(sk);
} else {
DPRINTF((DBG_TCP, "tcp_ack closing socket - %X\n", sk));
tcp_send_ack(sk->send_seq, sk->acked_seq, sk,
th, sk->daddr);
sk->shutdown = SHUTDOWN_MASK;
sk->state = TCP_CLOSE;
}
}
}
if (((!flag) || (flag&4)) && sk->send_head != NULL &&
(sk->send_head->when + backoff(sk->backoff) * (2 * sk->mdev + sk->rtt)
< jiffies)) {
sk->exp_growth = 0;
ip_retransmit(sk, 1);
}
DPRINTF((DBG_TCP, "leaving tcp_ack\n"));
return(1);
}
/*
* This routine handles the data. If there is room in the buffer,
* it will be have already been moved into it. If there is no
* room, then we will just have to discard the packet.
*/
static int
tcp_data(struct sk_buff *skb, struct sock *sk,
unsigned long saddr, unsigned short len)
{
struct sk_buff *skb1, *skb2;
struct tcphdr *th;
int dup_dumped=0;
th = skb->h.th;
print_th(th);
skb->len = len -(th->doff*4);
DPRINTF((DBG_TCP, "tcp_data len = %d sk = %X:\n", skb->len, sk));
sk->bytes_rcv += skb->len;
if (skb->len == 0 && !th->fin && !th->urg && !th->psh) {
/* Don't want to keep passing ack's back and forth. */
if (!th->ack) tcp_send_ack(sk->send_seq, sk->acked_seq,sk, th, saddr);
kfree_skb(skb, FREE_READ);
return(0);
}
if (sk->shutdown & RCV_SHUTDOWN) {
sk->acked_seq = th->seq + skb->len + th->syn + th->fin;
tcp_reset(sk->saddr, sk->daddr, skb->h.th,
sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
sk->state = TCP_CLOSE;
sk->err = EPIPE;
sk->shutdown = SHUTDOWN_MASK;
DPRINTF((DBG_TCP, "tcp_data: closing socket - %X\n", sk));
kfree_skb(skb, FREE_READ);
if (!sk->dead) sk->state_change(sk);
return(0);
}
/*
* Now we have to walk the chain, and figure out where this one
* goes into it. This is set up so that the last packet we received
* will be the first one we look at, that way if everything comes
* in order, there will be no performance loss, and if they come
* out of order we will be able to fit things in nicely.
*/
/* This should start at the last one, and then go around forwards. */
if (sk->rqueue == NULL) {
DPRINTF((DBG_TCP, "tcp_data: skb = %X:\n", skb));
#ifdef OLDWAY
sk->rqueue = skb;
skb->next = skb;
skb->prev = skb;
skb->list = &sk->rqueue;
#else
skb_queue_head(&sk->rqueue,skb);
#endif
skb1= NULL;
} else {
DPRINTF((DBG_TCP, "tcp_data adding to chain sk = %X:\n", sk));
for(skb1=sk->rqueue->prev; ; skb1 =(struct sk_buff *)skb1->prev) {
if(sk->debug)
{
printk("skb1=%p :", skb1);
printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
sk->acked_seq);
}
#ifdef OLD
if (after(th->seq+1, skb1->h.th->seq)) {
skb->prev = skb1;
skb->next = skb1->next;
skb->next->prev = skb;
skb1->next = skb;
if (skb1 == sk->rqueue) sk->rqueue = skb;
break;
}
if (skb1->prev == sk->rqueue) {
skb->next= skb1;
skb->prev = skb1->prev;
skb->prev->next = skb;
skb1->prev = skb;
skb1 = NULL; /* so we know we might be able
to ack stuff. */
break;
}
#else
if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
{
skb_append(skb1,skb);
skb_unlink(skb1);
kfree_skb(skb1,FREE_READ);
dup_dumped=1;
skb1=NULL;
break;
}
if (after(th->seq+1, skb1->h.th->seq))
{
skb_append(skb1,skb);
break;
}
if (skb1 == sk->rqueue)
{
skb_queue_head(&sk->rqueue, skb);
break;
}
#endif
}
DPRINTF((DBG_TCP, "skb = %X:\n", skb));
}
th->ack_seq = th->seq + skb->len;
if (th->syn) th->ack_seq++;
if (th->fin) th->ack_seq++;
if (before(sk->acked_seq, sk->copied_seq)) {
printk("*** tcp.c:tcp_data bug acked < copied\n");
sk->acked_seq = sk->copied_seq;
}
/* Now figure out if we can ack anything. */
if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) {
if (before(th->seq, sk->acked_seq+1)) {
if (after(th->ack_seq, sk->acked_seq))
sk->acked_seq = th->ack_seq;
skb->acked = 1;
/* When we ack the fin, we turn on the RCV_SHUTDOWN flag. */
if (skb->h.th->fin) {
if (!sk->dead) sk->state_change(sk);
sk->shutdown |= RCV_SHUTDOWN;
}
for(skb2 = (struct sk_buff *)skb->next;
skb2 !=(struct sk_buff *) sk->rqueue;
skb2 = (struct sk_buff *)skb2->next) {
if (before(skb2->h.th->seq, sk->acked_seq+1)) {
if (after(skb2->h.th->ack_seq, sk->acked_seq))
{
long old_acked_seq = sk->acked_seq;
sk->acked_seq = skb2->h.th->ack_seq;
if((int)(sk->acked_seq - old_acked_seq) >0)
{
int new_window=sk->window-sk->acked_seq+
old_acked_seq;
if(new_window<0)
new_window=0;
sk->window = new_window;
}
}
skb2->acked = 1;
/*
* When we ack the fin, we turn on
* the RCV_SHUTDOWN flag.
*/
if (skb2->h.th->fin) {
sk->shutdown |= RCV_SHUTDOWN;
if (!sk->dead) sk->state_change(sk);
}
/* Force an immediate ack. */
sk->ack_backlog = sk->max_ack_backlog;
} else {
break;
}
}
/*
* This also takes care of updating the window.
* This if statement needs to be simplified.
*/
if (!sk->delay_acks ||
sk->ack_backlog >= sk->max_ack_backlog ||
sk->bytes_rcv > sk->max_unacked || th->fin) {
/* tcp_send_ack(sk->send_seq, sk->acked_seq,sk,th, saddr); */
} else {
sk->ack_backlog++;
if(sk->debug)
printk("Ack queued.\n");
reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
}
}
}
/*
* If we've missed a packet, send an ack.
* Also start a timer to send another.
*/
if (!skb->acked) {
/*
* This is important. If we don't have much room left,
* we need to throw out a few packets so we have a good
* window.
*/
while (sk->prot->rspace(sk) < sk->mtu) {
skb1 = skb_peek(&sk->rqueue);
if (skb1 == NULL) {
printk("INET: tcp.c:tcp_data memory leak detected.\n");
break;
}
/* Don't throw out something that has been acked. */
if (skb1->acked) {
break;
}
skb_unlink(skb1);
#ifdef OLDWAY
if (skb1->prev == skb1) {
sk->rqueue = NULL;
} else {
sk->rqueue = (struct sk_buff *)skb1->prev;
skb1->next->prev = skb1->prev;
skb1->prev->next = skb1->next;
}
#endif
kfree_skb(skb1, FREE_READ);
}
tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr);
sk->ack_backlog++;
reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
} else {
/* We missed a packet. Send an ack to try to resync things. */
tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr);
}
/* Now tell the user we may have some data. */
if (!sk->dead) {
if(sk->debug)
printk("Data wakeup.\n");
sk->data_ready(sk,0);
} else {
DPRINTF((DBG_TCP, "data received on dead socket.\n"));
}
if (sk->state == TCP_FIN_WAIT2 &&
sk->acked_seq == sk->fin_seq && sk->rcv_ack_seq == sk->send_seq) {
DPRINTF((DBG_TCP, "tcp_data: entering last_ack state sk = %X\n", sk));
/* tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr); */
sk->shutdown = SHUTDOWN_MASK;
sk->state = TCP_LAST_ACK;
if (!sk->dead) sk->state_change(sk);
}
return(0);
}
static int
tcp_urg(struct sock *sk, struct tcphdr *th, unsigned long saddr)
{
extern int kill_pg(int pg, int sig, int priv);
extern int kill_proc(int pid, int sig, int priv);
if (!sk->dead)
sk->data_ready(sk,0);
if (sk->urginline) {
th->urg = 0;
th->psh = 1;
return(0);
}
if (!sk->urg) {
/* So if we get more urgent data, we don't signal the user again. */
if (sk->proc != 0) {
if (sk->proc > 0) {
kill_proc(sk->proc, SIGURG, 1);
} else {
kill_pg(-sk->proc, SIGURG, 1);
}
}
}
sk->urg++;
return(0);
}
/* This deals with incoming fins. 'Linus at 9 O'clock' 8-) */
static int
tcp_fin(struct sock *sk, struct tcphdr *th,
unsigned long saddr, struct device *dev)
{
DPRINTF((DBG_TCP, "tcp_fin(sk=%X, th=%X, saddr=%X, dev=%X)\n",
sk, th, saddr, dev));
if (!sk->dead) {
sk->state_change(sk);
}
switch(sk->state) {
case TCP_SYN_RECV:
case TCP_SYN_SENT:
case TCP_ESTABLISHED:
/* Contains the one that needs to be acked */
sk->fin_seq = th->seq+1;
sk->state = TCP_CLOSE_WAIT;
if (th->rst) sk->shutdown = SHUTDOWN_MASK;
break;
case TCP_CLOSE_WAIT:
case TCP_FIN_WAIT2:
break; /* we got a retransmit of the fin. */
case TCP_FIN_WAIT1:
/* Contains the one that needs to be acked */
sk->fin_seq = th->seq+1;
sk->state = TCP_FIN_WAIT2;
break;
default:
case TCP_TIME_WAIT:
sk->state = TCP_LAST_ACK;
/* Start the timers. */
reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
return(0);
}
sk->ack_backlog++;
return(0);
}
/* This will accept the next outstanding connection. */
static struct sock *
tcp_accept(struct sock *sk, int flags)
{
struct sock *newsk;
struct sk_buff *skb;
DPRINTF((DBG_TCP, "tcp_accept(sk=%X, flags=%X, addr=%s)\n",
sk, flags, in_ntoa(sk->saddr)));
/*
* We need to make sure that this socket is listening,
* and that it has something pending.
*/
if (sk->state != TCP_LISTEN) {
sk->err = EINVAL;
return(NULL);
}
/* avoid the race. */
cli();
sk->inuse = 1;
while((skb = get_firstr(sk)) == NULL) {
if (flags & O_NONBLOCK) {
sti();
release_sock(sk);
sk->err = EAGAIN;
return(NULL);
}
release_sock(sk);
interruptible_sleep_on(sk->sleep);
if (current->signal & ~current->blocked) {
sti();
sk->err = ERESTARTSYS;
return(NULL);
}
sk->inuse = 1;
}
sti();
/* Now all we need to do is return skb->sk. */
newsk = skb->sk;
kfree_skb(skb, FREE_READ);
sk->ack_backlog--;
release_sock(sk);
return(newsk);
}
/* This will initiate an outgoing connection. */
static int
tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
{
struct sk_buff *buff;
struct sockaddr_in sin;
struct device *dev=NULL;
unsigned char *ptr;
int tmp;
struct tcphdr *t1;
int err;
if (sk->state != TCP_CLOSE) return(-EISCONN);
if (addr_len < 8) return(-EINVAL);
err=verify_area(VERIFY_READ, usin, addr_len);
if(err)
return err;
memcpy_fromfs(&sin,usin, min(sizeof(sin), addr_len));
if (sin.sin_family && sin.sin_family != AF_INET) return(-EAFNOSUPPORT);
DPRINTF((DBG_TCP, "TCP connect daddr=%s\n", in_ntoa(sin.sin_addr.s_addr)));
/* Don't want a TCP connection going to a broadcast address */
if (chk_addr(sin.sin_addr.s_addr) == IS_BROADCAST) {
DPRINTF((DBG_TCP, "TCP connection to broadcast address not allowed\n"));
return(-ENETUNREACH);
}
sk->inuse = 1;
sk->daddr = sin.sin_addr.s_addr;
sk->send_seq = jiffies * SEQ_TICK - seq_offset;
sk->rcv_ack_seq = sk->send_seq -1;
sk->err = 0;
sk->dummy_th.dest = sin.sin_port;
release_sock(sk);
buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
if (buff == NULL) {
return(-ENOMEM);
}
sk->inuse = 1;
buff->mem_addr = buff;
buff->mem_len = MAX_SYN_SIZE;
buff->len = 24;
buff->sk = sk;
buff->free = 1;
t1 = (struct tcphdr *) buff->data;
/* Put in the IP header and routing stuff. */
/* We need to build the routing stuff fromt the things saved in skb. */
tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
if (tmp < 0) {
sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
release_sock(sk);
return(-ENETUNREACH);
}
buff->len += tmp;
t1 = (struct tcphdr *)((char *)t1 +tmp);
memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
t1->seq = ntohl(sk->send_seq++);
buff->h.seq = sk->send_seq;
t1->ack = 0;
t1->window = 2;
t1->res1=0;
t1->res2=0;
t1->rst = 0;
t1->urg = 0;
t1->psh = 0;
t1->syn = 1;
t1->urg_ptr = 0;
t1->doff = 6;
/* use 512 or whatever user asked for */
if (sk->mss)
sk->mtu = sk->mss;
else
sk->mtu = 576 - HEADER_SIZE;
/* but not bigger than device MTU */
sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
/* Put in the TCP options to say MTU. */
ptr = (unsigned char *)(t1+1);
ptr[0] = 2;
ptr[1] = 4;
ptr[2] = (sk->mtu) >> 8;
ptr[3] = (sk->mtu) & 0xff;
tcp_send_check(t1, sk->saddr, sk->daddr,
sizeof(struct tcphdr) + 4, sk);
/* This must go first otherwise a really quick response will get reset. */
sk->state = TCP_SYN_SENT;
sk->rtt = TCP_CONNECT_TIME;
reset_timer(sk, TIME_WRITE, TCP_CONNECT_TIME); /* Timer for repeating the SYN until an answer */
sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
sk->prot->queue_xmit(sk, dev, buff, 0);
release_sock(sk);
return(0);
}
/* This functions checks to see if the tcp header is actually acceptible. */
static int
tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
struct options *opt, unsigned long saddr, struct device *dev)
{
/*
* This isn't quite right. sk->acked_seq could be more recent
* than sk->window. This is however close enough. We will accept
* slightly more packets than we should, but it should not cause
* problems unless someone is trying to forge packets.
*/
DPRINTF((DBG_TCP, "tcp_sequence(sk=%X, th=%X, len = %d, opt=%d, saddr=%X)\n",
sk, th, len, opt, saddr));
if (between(th->seq, sk->acked_seq, sk->acked_seq + sk->window)||
between(th->seq + len-(th->doff*4), sk->acked_seq + 1,
sk->acked_seq + sk->window) ||
(before(th->seq, sk->acked_seq) &&
after(th->seq + len -(th->doff*4), sk->acked_seq + sk->window))) {
return(1);
}
DPRINTF((DBG_TCP, "tcp_sequence: rejecting packet.\n"));
/*
* Send a reset if we get something not ours and we are
* unsynchronized. Note: We don't do anything to our end. We
* are just killing the bogus remote connection then we will
* connect again and it will work (with luck).
*/
if(sk->state==TCP_SYN_SENT||sk->state==TCP_SYN_RECV)
{
tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
return(1);
}
/*
* If it's too far ahead, send an ack to let the
* other end know what we expect.
*/
if (after(th->seq, sk->acked_seq + sk->window)) {
if(!th->rst)
tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr);
return(0);
}
#ifdef undef
/*
* if we do this, we won't respond to keepalive packets, since those
* are slightly out of window, and we have to generate an ack
* a late ack out still not to have a sequence number less than
* one we've seen before. Berkeley doesn't seem to do this, but it's
* always hard to be sure.
*/
/* In case it's just a late ack, let it through. */
if (th->ack && len == (th->doff * 4) &&
after(th->seq, sk->acked_seq - 32767) &&
!th->fin && !th->syn) return(1);
#endif
if (!th->rst) {
/* Try to resync things. */
tcp_send_ack(sk->send_seq, sk->acked_seq, sk, th, saddr);
}
return(0);
}
int
tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
unsigned long daddr, unsigned short len,
unsigned long saddr, int redo, struct inet_protocol * protocol)
{
struct tcphdr *th;
struct sock *sk;
if (!skb) {
DPRINTF((DBG_TCP, "tcp.c: tcp_rcv skb = NULL\n"));
return(0);
}
#if 0 /* FIXME: it's ok for protocol to be NULL */
if (!protocol) {
DPRINTF((DBG_TCP, "tcp.c: tcp_rcv protocol = NULL\n"));
return(0);
}
if (!opt) { /* FIXME: it's ok for opt to be NULL */
DPRINTF((DBG_TCP, "tcp.c: tcp_rcv opt = NULL\n"));
}
#endif
if (!dev) {
DPRINTF((DBG_TCP, "tcp.c: tcp_rcv dev = NULL\n"));
return(0);
}
th = skb->h.th;
/* Find the socket. */
sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
DPRINTF((DBG_TCP, "<<\n"));
DPRINTF((DBG_TCP, "len = %d, redo = %d, skb=%X\n", len, redo, skb));
/* If this socket has got a reset its to all intents and purposes
really dead */
if (sk!=NULL && sk->zapped)
sk=NULL;
if (sk) {
DPRINTF((DBG_TCP, "sk = %X:\n", sk));
}
if (!redo) {
if (tcp_check(th, len, saddr, daddr )) {
skb->sk = NULL;
DPRINTF((DBG_TCP, "packet dropped with bad checksum.\n"));
if (inet_debug == DBG_SLIP) printk("\rtcp_rcv: bad checksum\n");
kfree_skb(skb,FREE_READ);
/*
* We don't release the socket because it was
* never marked in use.
*/
return(0);
}
/* See if we know about the socket. */
if (sk == NULL) {
if (!th->rst)
{
th->seq = ntohl(th->seq);
/* So reset is always called with th->seq in host order */
tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
}
skb->sk = NULL;
kfree_skb(skb, FREE_READ);
return(0);
}
skb->len = len;
skb->sk = sk;
skb->acked = 0;
skb->used = 0;
skb->free = 0;
skb->urg_used = 0;
skb->saddr = daddr;
skb->daddr = saddr;
th->seq = ntohl(th->seq);
/* We may need to add it to the backlog here. */
cli();
if (sk->inuse) {
if (sk->back_log == NULL) {
sk->back_log = skb;
skb->next = skb;
skb->prev = skb;
} else {
skb->next = sk->back_log;
skb->prev = sk->back_log->prev;
skb->prev->next = skb;
skb->next->prev = skb;
}
sti();
return(0);
}
sk->inuse = 1;
sti();
} else {
if (!sk) {
DPRINTF((DBG_TCP, "tcp.c: tcp_rcv bug sk=NULL redo = 1\n"));
return(0);
}
}
if (!sk->prot) {
DPRINTF((DBG_TCP, "tcp.c: tcp_rcv sk->prot = NULL \n"));
return(0);
}
/* Charge the memory to the socket. */
if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) {
skb->sk = NULL;
DPRINTF((DBG_TCP, "dropping packet due to lack of buffer space.\n"));
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
}
sk->rmem_alloc += skb->mem_len;
DPRINTF((DBG_TCP, "About to do switch.\n"));
/* Now deal with it. */
switch(sk->state) {
/*
* This should close the system down if it's waiting
* for an ack that is never going to be sent.
*/
case TCP_LAST_ACK:
if (th->rst) {
sk->zapped=1;
sk->err = ECONNRESET;
sk->state = TCP_CLOSE;
sk->shutdown = SHUTDOWN_MASK;
if (!sk->dead) {
sk->state_change(sk);
}
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
}
case TCP_ESTABLISHED:
case TCP_CLOSE_WAIT:
case TCP_FIN_WAIT1:
case TCP_FIN_WAIT2:
case TCP_TIME_WAIT:
if (!tcp_sequence(sk, th, len, opt, saddr,dev)) {
if (inet_debug == DBG_SLIP) printk("\rtcp_rcv: not in seq\n");
#ifdef undef
/* nice idea, but tcp_sequence already does this. Maybe it shouldn't?? */
if(!th->rst)
tcp_send_ack(sk->send_seq, sk->acked_seq,
sk, th, saddr);
#endif
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
}
if (th->rst) {
sk->zapped=1;
/* This means the thing should really be closed. */
sk->err = ECONNRESET;
if (sk->state == TCP_CLOSE_WAIT) {
sk->err = EPIPE;
}
/*
* A reset with a fin just means that
* the data was not all read.
*/
sk->state = TCP_CLOSE;
sk->shutdown = SHUTDOWN_MASK;
if (!sk->dead) {
sk->state_change(sk);
}
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
}
if (
#if 0
if ((opt && (opt->security != 0 ||
opt->compartment != 0)) ||
#endif
th->syn) {
sk->err = ECONNRESET;
sk->state = TCP_CLOSE;
sk->shutdown = SHUTDOWN_MASK;
tcp_reset(daddr, saddr, th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl);
if (!sk->dead) {
sk->state_change(sk);
}
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
}
if (th->ack) {
if (!tcp_ack(sk, th, saddr, len)) {
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
}
}
if (th->urg) {
if (tcp_urg(sk, th, saddr)) {
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
}
}
if (tcp_data(skb, sk, saddr, len)) {
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
}
/* Moved: you must do data then fin bit */
if (th->fin && tcp_fin(sk, th, saddr, dev)) {
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
}
release_sock(sk);
return(0);
case TCP_CLOSE:
if (sk->dead || sk->daddr) {
DPRINTF((DBG_TCP, "packet received for closed,dead socket\n"));
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
}
if (!th->rst) {
if (!th->ack)
th->ack_seq = 0;
tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
}
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
case TCP_LISTEN:
if (th->rst) {
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
}
if (th->ack) {
tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
}
if (th->syn) {
#if 0
if (opt->security != 0 || opt->compartment != 0) {
tcp_reset(daddr, saddr, th, prot, opt,dev);
release_sock(sk);
return(0);
}
#endif
/*
* Now we just put the whole thing including
* the header and saddr, and protocol pointer
* into the buffer. We can't respond until the
* user tells us to accept the connection.
*/
tcp_conn_request(sk, skb, daddr, saddr, opt, dev);
release_sock(sk);
return(0);
}
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
default:
if (!tcp_sequence(sk, th, len, opt, saddr,dev)) {
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
}
case TCP_SYN_SENT:
if (th->rst) {
sk->err = ECONNREFUSED;
sk->state = TCP_CLOSE;
sk->shutdown = SHUTDOWN_MASK;
sk->zapped = 1;
if (!sk->dead) {
sk->state_change(sk);
}
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
}
#if 0
if (opt->security != 0 || opt->compartment != 0) {
sk->err = ECONNRESET;
sk->state = TCP_CLOSE;
sk->shutdown = SHUTDOWN_MASK;
tcp_reset(daddr, saddr, th, sk->prot, opt, dev);
if (!sk->dead) {
wake_up(sk->sleep);
}
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
}
#endif
if (!th->ack) {
if (th->syn) {
sk->state = TCP_SYN_RECV;
}
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
}
switch(sk->state) {
case TCP_SYN_SENT:
if (!tcp_ack(sk, th, saddr, len)) {
tcp_reset(daddr, saddr, th,
sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
}
/*
* If the syn bit is also set, switch to
* tcp_syn_recv, and then to established.
*/
if (!th->syn) {
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
}
/* Ack the syn and fall through. */
sk->acked_seq = th->seq+1;
sk->fin_seq = th->seq;
tcp_send_ack(sk->send_seq, th->seq+1,
sk, th, sk->daddr);
case TCP_SYN_RECV:
if (!tcp_ack(sk, th, saddr, len)) {
tcp_reset(daddr, saddr, th,
sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
}
sk->state = TCP_ESTABLISHED;
/*
* Now we need to finish filling out
* some of the tcp header.
*/
/* We need to check for mtu info. */
tcp_options(sk, th);
sk->dummy_th.dest = th->source;
sk->copied_seq = sk->acked_seq-1;
if (!sk->dead) {
sk->state_change(sk);
}
/*
* Now process the rest like we were
* already in the established state.
*/
if (th->urg) {
if (tcp_urg(sk, th, saddr)) {
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
}
}
if (tcp_data(skb, sk, saddr, len))
kfree_skb(skb, FREE_READ);
if (th->fin) tcp_fin(sk, th, saddr, dev);
release_sock(sk);
return(0);
}
if (th->urg) {
if (tcp_urg(sk, th, saddr)) {
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
}
}
if (tcp_data(skb, sk, saddr, len)) {
kfree_skb(skb, FREE_READ);
release_sock(sk);
return(0);
}
if (!th->fin) {
release_sock(sk);
return(0);
}
tcp_fin(sk, th, saddr, dev);
release_sock(sk);
return(0);
}
}
/*
* This routine sends a packet with an out of date sequence
* number. It assumes the other end will try to ack it.
*/
static void
tcp_write_wakeup(struct sock *sk)
{
struct sk_buff *buff;
struct tcphdr *t1;
struct device *dev=NULL;
int tmp;
if (sk->zapped)
return; /* Afer a valid reset we can send no more */
if (sk -> state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) return;
buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
if (buff == NULL) return;
buff->mem_addr = buff;
buff->mem_len = MAX_ACK_SIZE;
buff->len = sizeof(struct tcphdr);
buff->free = 1;
buff->sk = sk;
DPRINTF((DBG_TCP, "in tcp_write_wakeup\n"));
t1 = (struct tcphdr *) buff->data;
/* Put in the IP header and routing stuff. */
tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
if (tmp < 0) {
sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
return;
}
buff->len += tmp;
t1 = (struct tcphdr *)((char *)t1 +tmp);
memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
/*
* Use a previous sequence.
* This should cause the other end to send an ack.
*/
t1->seq = ntohl(sk->send_seq-1);
t1->ack = 1;
t1->res1= 0;
t1->res2= 0;
t1->rst = 0;
t1->urg = 0;
t1->psh = 0;
t1->fin = 0;
t1->syn = 0;
t1->ack_seq = ntohl(sk->acked_seq);
t1->window = ntohs(tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
t1->doff = sizeof(*t1)/4;
tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
/* Send it and free it.
* This will prevent the timer from automatically being restarted.
*/
sk->prot->queue_xmit(sk, dev, buff, 1);
}
/*
* This routine probes a zero window. It makes a copy of the first
* packet in the write queue, but with just one byte of data.
*/
void
tcp_send_probe0(struct sock *sk)
{
unsigned char *raw;
struct iphdr *iph;
struct sk_buff *skb2, *skb;
int len, hlen, data;
struct tcphdr *t1;
struct device *dev;
if (sk->zapped)
return; /* Afer a valid reset we can send no more */
if (sk -> state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT &&
sk -> state != TCP_FIN_WAIT1 && sk->state != TCP_FIN_WAIT2)
return;
skb = sk->wfront;
if (skb == NULL)
return;
dev = skb->dev;
/* I know this can't happen but as it does.. */
if(dev==NULL)
{
printk("tcp_send_probe0: NULL device bug!\n");
return;
}
IS_SKB(skb);
raw = skb->data;
iph = (struct iphdr *) (raw + dev->hard_header_len);
hlen = (iph->ihl * sizeof(unsigned long)) + dev->hard_header_len;
data = skb->len - hlen - sizeof(struct tcphdr);
len = hlen + sizeof(struct tcphdr) + (data ? 1 : 0);
/* Allocate buffer. */
if ((skb2 = alloc_skb(sizeof(struct sk_buff) + len,GFP_KERNEL)) == NULL) {
/* printk("alloc failed raw %x th %x hlen %d data %d len %d\n",
raw, skb->h.th, hlen, data, len); */
reset_timer (sk, TIME_PROBE0, 10); /* try again real soon */
return;
}
skb2->arp = skb->arp;
skb2->len = len;
skb2->h.raw = (char *)(skb2->data);
sk->wmem_alloc += skb2->mem_len;
/* Copy the packet header into the new buffer. */
memcpy(skb2->h.raw, raw, len);
skb2->h.raw += hlen; /* it's now h.th -- pointer to the tcp header */
t1 = skb2->h.th;
/* source, dest, seq, from existing packet */
t1->ack_seq = ntohl(sk->acked_seq);
t1->res1 = 0;
/* doff, fin, from existing packet. Fin is safe because Linux always
* sends fin in a separate packet
* syn, rst, had better be zero in original */
t1->ack = 1;
t1->urg = 0; /* urgent pointer might be beyond this fragment */
t1->res2 = 0;
t1->window = ntohs(tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
t1->urg_ptr = 0;
tcp_send_check(t1, sk->saddr, sk->daddr, len - hlen, sk);
/* Send it and free it.
* This will prevent the timer from automatically being restarted.
*/
sk->prot->queue_xmit(sk, dev, skb2, 1);
sk->backoff++;
reset_timer (sk, TIME_PROBE0,
backoff (sk->backoff) * (2 * sk->mdev + sk->rtt));
sk->retransmits++;
sk->prot->retransmits ++;
}
/*
* Socket option code for TCP.
*/
int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
{
int val,err;
if(level!=SOL_TCP)
return ip_setsockopt(sk,level,optname,optval,optlen);
if (optval == NULL)
return(-EINVAL);
err=verify_area(VERIFY_READ, optval, sizeof(int));
if(err)
return err;
val = get_fs_long((unsigned long *)optval);
switch(optname)
{
case TCP_MAXSEG:
if(val<200||val>2048 || val>sk->mtu)
return -EINVAL;
sk->mss=val;
return 0;
case TCP_NODELAY:
sk->nonagle=(val==0)?0:1;
return 0;
default:
return(-ENOPROTOOPT);
}
}
int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
{
int val,err;
if(level!=SOL_TCP)
return ip_getsockopt(sk,level,optname,optval,optlen);
switch(optname)
{
case TCP_MAXSEG:
val=sk->mss;
break;
case TCP_NODELAY:
val=sk->nonagle; /* Until Johannes stuff is in */
break;
default:
return(-ENOPROTOOPT);
}
err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
if(err)
return err;
put_fs_long(sizeof(int),(unsigned long *) optlen);
err=verify_area(VERIFY_WRITE, optval, sizeof(int));
if(err)
return err;
put_fs_long(val,(unsigned long *)optval);
return(0);
}
struct proto tcp_prot = {
sock_wmalloc,
sock_rmalloc,
sock_wfree,
sock_rfree,
sock_rspace,
sock_wspace,
tcp_close,
tcp_read,
tcp_write,
tcp_sendto,
tcp_recvfrom,
ip_build_header,
tcp_connect,
tcp_accept,
ip_queue_xmit,
tcp_retransmit,
tcp_write_wakeup,
tcp_read_wakeup,
tcp_rcv,
tcp_select,
tcp_ioctl,
NULL,
tcp_shutdown,
tcp_setsockopt,
tcp_getsockopt,
128,
0,
{NULL,},
"TCP"
};