| /* |
| * INET An implementation of the TCP/IP protocol suite for the LINUX |
| * operating system. INET is implemented using the BSD Socket |
| * interface as the means of communication with the user level. |
| * |
| * Implementation of the Transmission Control Protocol(TCP). |
| * |
| * Version: @(#)tcp.c 1.0.16 05/25/93 |
| * |
| * Authors: Ross Biro, <bir7@leland.Stanford.Edu> |
| * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
| * Mark Evans, <evansmp@uhura.aston.ac.uk> |
| * Corey Minyard <wf-rch!minyard@relay.EU.net> |
| * Florian La Roche, <flla@stud.uni-sb.de> |
| * |
| * Fixes: |
| * Alan Cox : Numerous verify_area() calls |
| * Alan Cox : Set the ACK bit on a reset |
| * Alan Cox : Stopped it crashing if it closed while sk->inuse=1 |
| * and was trying to connect (tcp_err()). |
| * Alan Cox : All icmp error handling was broken |
| * pointers passed where wrong and the |
| * socket was looked up backwards. Nobody |
| * tested any icmp error code obviously. |
| * Alan Cox : tcp_err() now handled properly. It wakes people |
| * on errors. select behaves and the icmp error race |
| * has gone by moving it into sock.c |
| * Alan Cox : tcp_reset() fixed to work for everything not just |
| * packets for unknown sockets. |
| * Alan Cox : tcp option processing. |
| * Alan Cox : Reset tweaked (still not 100%) [Had syn rule wrong] |
| * Herp Rosmanith : More reset fixes |
| * Alan Cox : No longer acks invalid rst frames. Acking |
| * any kind of RST is right out. |
| * Alan Cox : Sets an ignore me flag on an rst receive |
| * otherwise odd bits of prattle escape still |
| * Alan Cox : Fixed another acking RST frame bug. Should stop |
| * LAN workplace lockups. |
| * Alan Cox : Some tidyups using the new skb list facilities |
| * Alan Cox : sk->keepopen now seems to work |
| * Alan Cox : Pulls options out correctly on accepts |
| * Alan Cox : Fixed assorted sk->rqueue->next errors |
| * Alan Cox : PSH doesn't end a TCP read. Switched a bit to skb ops. |
| * Alan Cox : Tidied tcp_data to avoid a potential nasty. |
| * Alan Cox : Added some beter commenting, as the tcp is hard to follow |
| * Alan Cox : Removed incorrect check for 20 * psh |
| * Michael O'Reilly : ack < copied bug fix. |
| * Johannes Stille : Misc tcp fixes (not all in yet). |
| * Alan Cox : FIN with no memory -> CRASH |
| * Alan Cox : Added socket option proto entries. Also added awareness of them to accept. |
| * Alan Cox : Added TCP options (SOL_TCP) |
| * Alan Cox : Switched wakeup calls to callbacks, so the kernel can layer network sockets. |
| * Alan Cox : Use ip_tos/ip_ttl settings. |
| * Alan Cox : Handle FIN (more) properly (we hope). |
| * Alan Cox : RST frames sent on unsynchronised state ack error/ |
| * Alan Cox : Put in missing check for SYN bit. |
| * Alan Cox : Added tcp_select_window() aka NET2E |
| * window non shrink trick. |
| * Alan Cox : Added a couple of small NET2E timer fixes |
| * Charles Hedrick : TCP fixes |
| * Toomas Tamm : TCP window fixes |
| * Alan Cox : Small URG fix to rlogin ^C ack fight |
| * Charles Hedrick : Window fix |
| * Linus : Rewrote tcp_read() and URG handling |
| * completely |
| * |
| * |
| * To Fix: |
| * Possibly a problem with accept(). BSD accept never fails after |
| * it causes a select. Linux can - given the official select semantics I |
| * feel that _really_ its the BSD network programs that are bust (notably |
| * inetd, which hangs occasionally because of this). |
| * Add VJ Fastrecovery algorithm ? |
| * Protocol closedown badly messed up. |
| * Incompatiblity with spider ports (tcp hangs on that |
| * socket occasionally). |
| * MSG_PEEK and read on same socket at once can cause crashes. |
| * |
| * This program is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU General Public License |
| * as published by the Free Software Foundation; either version |
| * 2 of the License, or(at your option) any later version. |
| */ |
| #include <linux/types.h> |
| #include <linux/sched.h> |
| #include <linux/mm.h> |
| #include <linux/string.h> |
| #include <linux/socket.h> |
| #include <linux/sockios.h> |
| #include <linux/termios.h> |
| #include <linux/in.h> |
| #include <linux/fcntl.h> |
| #include "inet.h" |
| #include "dev.h" |
| #include "ip.h" |
| #include "protocol.h" |
| #include "icmp.h" |
| #include "tcp.h" |
| #include "skbuff.h" |
| #include "sock.h" |
| #include "arp.h" |
| #include <linux/errno.h> |
| #include <linux/timer.h> |
| #include <asm/system.h> |
| #include <asm/segment.h> |
| #include <linux/mm.h> |
| |
| #define SEQ_TICK 3 |
| unsigned long seq_offset; |
| #define SUBNETSARELOCAL |
| |
| static __inline__ int |
| min(unsigned int a, unsigned int b) |
| { |
| if (a < b) return(a); |
| return(b); |
| } |
| |
| |
| static void __print_th(struct tcphdr *th) |
| { |
| unsigned char *ptr; |
| |
| printk("TCP header:\n"); |
| printk(" source=%d, dest=%d, seq =%ld, ack_seq = %ld\n", |
| ntohs(th->source), ntohs(th->dest), |
| ntohl(th->seq), ntohl(th->ack_seq)); |
| printk(" fin=%d, syn=%d, rst=%d, psh=%d, ack=%d, urg=%d res1=%d res2=%d\n", |
| th->fin, th->syn, th->rst, th->psh, th->ack, |
| th->urg, th->res1, th->res2); |
| printk(" window = %d, check = %d urg_ptr = %d\n", |
| ntohs(th->window), ntohs(th->check), ntohs(th->urg_ptr)); |
| printk(" doff = %d\n", th->doff); |
| ptr =(unsigned char *)(th + 1); |
| printk(" options = %d %d %d %d\n", ptr[0], ptr[1], ptr[2], ptr[3]); |
| } |
| |
| static inline void print_th(struct tcphdr *th) |
| { |
| if (inet_debug == DBG_TCP) |
| __print_th(th); |
| } |
| |
| /* This routine grabs the first thing off of a rcv queue. */ |
| static struct sk_buff * |
| get_firstr(struct sock *sk) |
| { |
| return skb_dequeue(&sk->rqueue); |
| } |
| |
| /* |
| * Difference between two values in tcp ack terms. |
| */ |
| |
| static long |
| diff(unsigned long seq1, unsigned long seq2) |
| { |
| long d; |
| |
| d = seq1 - seq2; |
| if (d > 0) return(d); |
| |
| /* I hope this returns what I want. */ |
| return(~d+1); |
| } |
| |
| /* This routine picks a TCP windows for a socket based on |
| the following constraints |
| |
| 1. The window can never be shrunk once it is offered (RFC 793) |
| 2. We limit memory per socket |
| |
| For now we use NET2E3's heuristic of offering half the memory |
| we have handy. All is not as bad as this seems however because |
| of two things. Firstly we will bin packets even within the window |
| in order to get the data we are waiting for into the memory limit. |
| Secondly we bin common duplicate forms at receive time |
| |
| Better heuristics welcome |
| */ |
| |
| static int tcp_select_window(struct sock *sk) |
| { |
| int new_window = sk->prot->rspace(sk); |
| |
| /* |
| * two things are going on here. First, we don't ever offer a |
| * window less than min(sk->mss, MAX_WINDOW/2). This is the |
| * receiver side of SWS as specified in RFC1122. |
| * Second, we always give them at least the window they |
| * had before, in order to avoid retracting window. This |
| * is technically allowed, but RFC1122 advises against it and |
| * in practice it causes trouble. |
| */ |
| if (new_window < min(sk->mss, MAX_WINDOW/2) || |
| new_window < sk->window) |
| return(sk->window); |
| return(new_window); |
| } |
| |
| /* Enter the time wait state. */ |
| |
| static void tcp_time_wait(struct sock *sk) |
| { |
| sk->state = TCP_TIME_WAIT; |
| sk->shutdown = SHUTDOWN_MASK; |
| if (!sk->dead) |
| sk->state_change(sk); |
| reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); |
| } |
| |
| /* |
| * A timer event has trigger a tcp retransmit timeout. The |
| * socket xmit queue is ready and set up to send. Because |
| * the ack receive code keeps the queue straight we do |
| * nothing clever here. |
| */ |
| |
| static void |
| tcp_retransmit(struct sock *sk, int all) |
| { |
| if (all) { |
| ip_retransmit(sk, all); |
| return; |
| } |
| |
| sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */ |
| /* sk->ssthresh in theory can be zero. I guess that's OK */ |
| sk->cong_count = 0; |
| |
| sk->cong_window = 1; |
| |
| /* Do the actual retransmit. */ |
| ip_retransmit(sk, all); |
| } |
| |
| |
| /* |
| * This routine is called by the ICMP module when it gets some |
| * sort of error condition. If err < 0 then the socket should |
| * be closed and the error returned to the user. If err > 0 |
| * it's just the icmp type << 8 | icmp code. After adjustment |
| * header points to the first 8 bytes of the tcp header. We need |
| * to find the appropriate port. |
| */ |
| void |
| tcp_err(int err, unsigned char *header, unsigned long daddr, |
| unsigned long saddr, struct inet_protocol *protocol) |
| { |
| struct tcphdr *th; |
| struct sock *sk; |
| struct iphdr *iph=(struct iphdr *)header; |
| |
| header+=4*iph->ihl; |
| |
| DPRINTF((DBG_TCP, "TCP: tcp_err(%d, hdr=%X, daddr=%X saddr=%X, protocol=%X)\n", |
| err, header, daddr, saddr, protocol)); |
| |
| th =(struct tcphdr *)header; |
| sk = get_sock(&tcp_prot, th->source/*dest*/, daddr, th->dest/*source*/, saddr); |
| print_th(th); |
| |
| if (sk == NULL) return; |
| |
| if(err<0) |
| { |
| sk->err = -err; |
| sk->error_report(sk); |
| return; |
| } |
| |
| if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) { |
| /* |
| * FIXME: |
| * For now we will just trigger a linear backoff. |
| * The slow start code should cause a real backoff here. |
| */ |
| if (sk->cong_window > 4) sk->cong_window--; |
| return; |
| } |
| |
| DPRINTF((DBG_TCP, "TCP: icmp_err got error\n")); |
| sk->err = icmp_err_convert[err & 0xff].errno; |
| |
| /* |
| * If we've already connected we will keep trying |
| * until we time out, or the user gives up. |
| */ |
| if (icmp_err_convert[err & 0xff].fatal) { |
| if (sk->state == TCP_SYN_SENT) { |
| sk->state = TCP_CLOSE; |
| sk->error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ |
| } |
| } |
| return; |
| } |
| |
| |
| /* |
| * Walk down the receive queue counting readable data until we hit the end or we find a gap |
| * in the received data queue (ie a frame missing that needs sending to us) |
| */ |
| |
| static int |
| tcp_readable(struct sock *sk) |
| { |
| unsigned long counted; |
| unsigned long amount; |
| struct sk_buff *skb; |
| int count=0; |
| int sum; |
| unsigned long flags; |
| |
| DPRINTF((DBG_TCP, "tcp_readable(sk=%X)\n", sk)); |
| if(sk && sk->debug) |
| printk("tcp_readable: %p - ",sk); |
| |
| if (sk == NULL || skb_peek(&sk->rqueue) == NULL) /* Empty sockets are easy! */ |
| { |
| if(sk && sk->debug) |
| printk("empty\n"); |
| return(0); |
| } |
| |
| counted = sk->copied_seq+1; /* Where we are at the moment */ |
| amount = 0; |
| |
| save_flags(flags); /* So nobody adds things at the wrong moment */ |
| cli(); |
| skb =(struct sk_buff *)sk->rqueue; |
| |
| /* Do until a push or until we are out of data. */ |
| do { |
| count++; |
| #ifdef OLD |
| /* This is wrong: It breaks Chameleon amongst other stacks */ |
| if (count > 20) { |
| restore_flags(flags); |
| DPRINTF((DBG_TCP, "tcp_readable, more than 20 packets without a psh\n")); |
| printk("tcp_read: possible read_queue corruption.\n"); |
| return(amount); |
| } |
| #endif |
| if (before(counted, skb->h.th->seq)) /* Found a hole so stops here */ |
| break; |
| sum = skb->len -(counted - skb->h.th->seq); /* Length - header but start from where we are up to (avoid overlaps) */ |
| if (skb->h.th->syn) |
| sum++; |
| if (sum >= 0) { /* Add it up, move on */ |
| amount += sum; |
| if (skb->h.th->syn) amount--; |
| counted += sum; |
| } |
| if (amount && skb->h.th->psh) break; |
| skb =(struct sk_buff *)skb->next; /* Move along */ |
| } while(skb != sk->rqueue); |
| if (amount && !sk->urginline && sk->urg_data && |
| (sk->urg_seq - sk->copied_seq) <= (counted - sk->copied_seq)) |
| amount--; /* don't count urg data */ |
| restore_flags(flags); |
| DPRINTF((DBG_TCP, "tcp readable returning %d bytes\n", amount)); |
| if(sk->debug) |
| printk("got %lu bytes.\n",amount); |
| return(amount); |
| } |
| |
| |
| /* |
| * Wait for a TCP event. Note the oddity with SEL_IN and reading. The |
| * listening socket has a receive queue of sockets to accept. |
| */ |
| |
| static int |
| tcp_select(struct sock *sk, int sel_type, select_table *wait) |
| { |
| DPRINTF((DBG_TCP, "tcp_select(sk=%X, sel_type = %d, wait = %X)\n", |
| sk, sel_type, wait)); |
| |
| sk->inuse = 1; |
| switch(sel_type) { |
| case SEL_IN: |
| if(sk->debug) |
| printk("select in"); |
| select_wait(sk->sleep, wait); |
| if(sk->debug) |
| printk("-select out"); |
| if (skb_peek(&sk->rqueue) != NULL) { |
| if (sk->state == TCP_LISTEN || tcp_readable(sk)) { |
| release_sock(sk); |
| if(sk->debug) |
| printk("-select ok data\n"); |
| return(1); |
| } |
| } |
| if (sk->err != 0) /* Receiver error */ |
| { |
| release_sock(sk); |
| if(sk->debug) |
| printk("-select ok error"); |
| return(1); |
| } |
| if (sk->shutdown & RCV_SHUTDOWN) { |
| release_sock(sk); |
| if(sk->debug) |
| printk("-select ok down\n"); |
| return(1); |
| } else { |
| release_sock(sk); |
| if(sk->debug) |
| printk("-select fail\n"); |
| return(0); |
| } |
| case SEL_OUT: |
| select_wait(sk->sleep, wait); |
| if (sk->shutdown & SEND_SHUTDOWN) { |
| DPRINTF((DBG_TCP, |
| "write select on shutdown socket.\n")); |
| |
| /* FIXME: should this return an error? */ |
| release_sock(sk); |
| return(0); |
| } |
| |
| /* |
| * FIXME: |
| * Hack so it will probably be able to write |
| * something if it says it's ok to write. |
| */ |
| if (sk->prot->wspace(sk) >= sk->mss) { |
| release_sock(sk); |
| /* This should cause connect to work ok. */ |
| if (sk->state == TCP_SYN_RECV || |
| sk->state == TCP_SYN_SENT) return(0); |
| return(1); |
| } |
| DPRINTF((DBG_TCP, |
| "tcp_select: sleeping on write sk->wmem_alloc = %d, " |
| "sk->packets_out = %d\n" |
| "sk->wback = %X, sk->wfront = %X\n" |
| "sk->write_seq = %u, sk->window_seq=%u\n", |
| sk->wmem_alloc, sk->packets_out, |
| sk->wback, sk->wfront, |
| sk->write_seq, sk->window_seq)); |
| |
| release_sock(sk); |
| return(0); |
| case SEL_EX: |
| select_wait(sk->sleep,wait); |
| if (sk->err || sk->urg_data) { |
| release_sock(sk); |
| return(1); |
| } |
| release_sock(sk); |
| return(0); |
| } |
| |
| release_sock(sk); |
| return(0); |
| } |
| |
| |
| int |
| tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) |
| { |
| int err; |
| DPRINTF((DBG_TCP, "tcp_ioctl(sk=%X, cmd = %d, arg=%X)\n", sk, cmd, arg)); |
| switch(cmd) { |
| case DDIOCSDBG: |
| return(dbg_ioctl((void *) arg, DBG_TCP)); |
| |
| case TIOCINQ: |
| #ifdef FIXME /* FIXME: */ |
| case FIONREAD: |
| #endif |
| { |
| unsigned long amount; |
| |
| if (sk->state == TCP_LISTEN) return(-EINVAL); |
| |
| sk->inuse = 1; |
| amount = tcp_readable(sk); |
| release_sock(sk); |
| DPRINTF((DBG_TCP, "returning %d\n", amount)); |
| err=verify_area(VERIFY_WRITE,(void *)arg, |
| sizeof(unsigned long)); |
| if(err) |
| return err; |
| put_fs_long(amount,(unsigned long *)arg); |
| return(0); |
| } |
| case SIOCATMARK: |
| { |
| int answ = sk->urg_data && sk->urg_seq == sk->copied_seq+1; |
| |
| err = verify_area(VERIFY_WRITE,(void *) arg, |
| sizeof(unsigned long)); |
| if (err) |
| return err; |
| put_fs_long(answ,(int *) arg); |
| return(0); |
| } |
| case TIOCOUTQ: |
| { |
| unsigned long amount; |
| |
| if (sk->state == TCP_LISTEN) return(-EINVAL); |
| amount = sk->prot->wspace(sk); |
| err=verify_area(VERIFY_WRITE,(void *)arg, |
| sizeof(unsigned long)); |
| if(err) |
| return err; |
| put_fs_long(amount,(unsigned long *)arg); |
| return(0); |
| } |
| default: |
| return(-EINVAL); |
| } |
| } |
| |
| |
| /* This routine computes a TCP checksum. */ |
| unsigned short |
| tcp_check(struct tcphdr *th, int len, |
| unsigned long saddr, unsigned long daddr) |
| { |
| unsigned long sum; |
| |
| if (saddr == 0) saddr = my_addr(); |
| print_th(th); |
| __asm__("\t addl %%ecx,%%ebx\n" |
| "\t adcl %%edx,%%ebx\n" |
| "\t adcl $0, %%ebx\n" |
| : "=b"(sum) |
| : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256) |
| : "cx","bx","dx" ); |
| |
| if (len > 3) { |
| __asm__("\tclc\n" |
| "1:\n" |
| "\t lodsl\n" |
| "\t adcl %%eax, %%ebx\n" |
| "\t loop 1b\n" |
| "\t adcl $0, %%ebx\n" |
| : "=b"(sum) , "=S"(th) |
| : "0"(sum), "c"(len/4) ,"1"(th) |
| : "ax", "cx", "bx", "si" ); |
| } |
| |
| /* Convert from 32 bits to 16 bits. */ |
| __asm__("\t movl %%ebx, %%ecx\n" |
| "\t shrl $16,%%ecx\n" |
| "\t addw %%cx, %%bx\n" |
| "\t adcw $0, %%bx\n" |
| : "=b"(sum) |
| : "0"(sum) |
| : "bx", "cx"); |
| |
| /* Check for an extra word. */ |
| if ((len & 2) != 0) { |
| __asm__("\t lodsw\n" |
| "\t addw %%ax,%%bx\n" |
| "\t adcw $0, %%bx\n" |
| : "=b"(sum), "=S"(th) |
| : "0"(sum) ,"1"(th) |
| : "si", "ax", "bx"); |
| } |
| |
| /* Now check for the extra byte. */ |
| if ((len & 1) != 0) { |
| __asm__("\t lodsb\n" |
| "\t movb $0,%%ah\n" |
| "\t addw %%ax,%%bx\n" |
| "\t adcw $0, %%bx\n" |
| : "=b"(sum) |
| : "0"(sum) ,"S"(th) |
| : "si", "ax", "bx"); |
| } |
| |
| /* We only want the bottom 16 bits, but we never cleared the top 16. */ |
| return((~sum) & 0xffff); |
| } |
| |
| |
| void tcp_send_check(struct tcphdr *th, unsigned long saddr, |
| unsigned long daddr, int len, struct sock *sk) |
| { |
| th->check = 0; |
| th->check = tcp_check(th, len, saddr, daddr); |
| return; |
| } |
| |
| static void tcp_send_skb(struct sock *sk, struct sk_buff *skb) |
| { |
| int size; |
| struct tcphdr * th = skb->h.th; |
| |
| /* length of packet (not counting length of pre-tcp headers) */ |
| size = skb->len - ((unsigned char *) th - skb->data); |
| |
| /* sanity check it.. */ |
| if (size < sizeof(struct tcphdr) || size > skb->len) { |
| printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n", |
| skb, skb->data, th, skb->len); |
| kfree_skb(skb, FREE_WRITE); |
| return; |
| } |
| |
| /* If we have queued a header size packet.. */ |
| if (size == sizeof(struct tcphdr)) { |
| /* If its got a syn or fin its notionally included in the size..*/ |
| if(!th->syn && !th->fin) { |
| printk("tcp_send_skb: attempt to queue a bogon.\n"); |
| kfree_skb(skb,FREE_WRITE); |
| return; |
| } |
| } |
| |
| /* We need to complete and send the packet. */ |
| tcp_send_check(th, sk->saddr, sk->daddr, size, sk); |
| |
| skb->h.seq = ntohl(th->seq) + size - 4*th->doff; |
| if (after(skb->h.seq, sk->window_seq) || |
| (sk->retransmits && sk->timeout == TIME_WRITE) || |
| sk->packets_out >= sk->cong_window) { |
| DPRINTF((DBG_TCP, "sk->cong_window = %d, sk->packets_out = %d\n", |
| sk->cong_window, sk->packets_out)); |
| DPRINTF((DBG_TCP, "sk->write_seq = %d, sk->window_seq = %d\n", |
| sk->write_seq, sk->window_seq)); |
| skb->next = NULL; |
| skb->magic = TCP_WRITE_QUEUE_MAGIC; |
| if (sk->wback == NULL) { |
| sk->wfront = skb; |
| } else { |
| sk->wback->next = skb; |
| } |
| sk->wback = skb; |
| if (before(sk->window_seq, sk->wfront->h.seq) && |
| sk->send_head == NULL && |
| sk->ack_backlog == 0) |
| reset_timer(sk, TIME_PROBE0, sk->rto); |
| } else { |
| sk->sent_seq = sk->write_seq; |
| sk->prot->queue_xmit(sk, skb->dev, skb, 0); |
| } |
| } |
| |
| struct sk_buff * tcp_dequeue_partial(struct sock * sk) |
| { |
| struct sk_buff * skb; |
| unsigned long flags; |
| |
| save_flags(flags); |
| cli(); |
| skb = sk->partial; |
| if (skb) { |
| sk->partial = NULL; |
| del_timer(&sk->partial_timer); |
| } |
| restore_flags(flags); |
| return skb; |
| } |
| |
| static void tcp_send_partial(struct sock *sk) |
| { |
| struct sk_buff *skb; |
| |
| if (sk == NULL) |
| return; |
| while ((skb = tcp_dequeue_partial(sk)) != NULL) |
| tcp_send_skb(sk, skb); |
| } |
| |
| void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk) |
| { |
| struct sk_buff * tmp; |
| unsigned long flags; |
| |
| save_flags(flags); |
| cli(); |
| tmp = sk->partial; |
| if (tmp) |
| del_timer(&sk->partial_timer); |
| sk->partial = skb; |
| sk->partial_timer.expires = HZ; |
| sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial; |
| sk->partial_timer.data = (unsigned long) sk; |
| add_timer(&sk->partial_timer); |
| restore_flags(flags); |
| if (tmp) |
| tcp_send_skb(sk, tmp); |
| } |
| |
| |
| /* This routine sends an ack and also updates the window. */ |
| static void |
| tcp_send_ack(unsigned long sequence, unsigned long ack, |
| struct sock *sk, |
| struct tcphdr *th, unsigned long daddr) |
| { |
| struct sk_buff *buff; |
| struct tcphdr *t1; |
| struct device *dev = NULL; |
| int tmp; |
| |
| if(sk->zapped) |
| return; /* We have been reset, we may not send again */ |
| /* |
| * We need to grab some memory, and put together an ack, |
| * and then put it into the queue to be sent. |
| */ |
| buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC); |
| if (buff == NULL) { |
| /* Force it to send an ack. */ |
| sk->ack_backlog++; |
| if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) { |
| reset_timer(sk, TIME_WRITE, 10); |
| } |
| if (inet_debug == DBG_SLIP) printk("\rtcp_ack: malloc failed\n"); |
| return; |
| } |
| |
| buff->mem_addr = buff; |
| buff->mem_len = MAX_ACK_SIZE; |
| buff->len = sizeof(struct tcphdr); |
| buff->sk = sk; |
| t1 =(struct tcphdr *) buff->data; |
| |
| /* Put in the IP header and routing stuff. */ |
| tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev, |
| IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl); |
| if (tmp < 0) { |
| buff->free=1; |
| sk->prot->wfree(sk, buff->mem_addr, buff->mem_len); |
| if (inet_debug == DBG_SLIP) printk("\rtcp_ack: build_header failed\n"); |
| return; |
| } |
| buff->len += tmp; |
| t1 =(struct tcphdr *)((char *)t1 +tmp); |
| |
| /* FIXME: */ |
| memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */ |
| |
| /* swap the send and the receive. */ |
| t1->dest = th->source; |
| t1->source = th->dest; |
| t1->seq = ntohl(sequence); |
| t1->ack = 1; |
| sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/ |
| t1->window = ntohs(sk->window); |
| t1->res1 = 0; |
| t1->res2 = 0; |
| t1->rst = 0; |
| t1->urg = 0; |
| t1->syn = 0; |
| t1->psh = 0; |
| t1->fin = 0; |
| if (ack == sk->acked_seq) { |
| sk->ack_backlog = 0; |
| sk->bytes_rcv = 0; |
| sk->ack_timed = 0; |
| if (sk->send_head == NULL && sk->wfront == NULL && sk->timeout == TIME_WRITE) |
| { |
| if(sk->keepopen) |
| reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN); |
| else |
| delete_timer(sk); |
| } |
| } |
| t1->ack_seq = ntohl(ack); |
| t1->doff = sizeof(*t1)/4; |
| tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk); |
| if (sk->debug) |
| printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack); |
| sk->prot->queue_xmit(sk, dev, buff, 1); |
| } |
| |
| |
| /* This routine builds a generic TCP header. */ |
| static int |
| tcp_build_header(struct tcphdr *th, struct sock *sk, int push) |
| { |
| |
| /* FIXME: want to get rid of this. */ |
| memcpy(th,(void *) &(sk->dummy_th), sizeof(*th)); |
| th->seq = htonl(sk->write_seq); |
| th->psh =(push == 0) ? 1 : 0; |
| th->doff = sizeof(*th)/4; |
| th->ack = 1; |
| th->fin = 0; |
| sk->ack_backlog = 0; |
| sk->bytes_rcv = 0; |
| sk->ack_timed = 0; |
| th->ack_seq = htonl(sk->acked_seq); |
| sk->window = tcp_select_window(sk)/*sk->prot->rspace(sk)*/; |
| th->window = htons(sk->window); |
| |
| return(sizeof(*th)); |
| } |
| |
| /* |
| * This routine copies from a user buffer into a socket, |
| * and starts the transmit system. |
| */ |
| static int |
| tcp_write(struct sock *sk, unsigned char *from, |
| int len, int nonblock, unsigned flags) |
| { |
| int copied = 0; |
| int copy; |
| int tmp; |
| struct sk_buff *skb; |
| struct sk_buff *send_tmp; |
| unsigned char *buff; |
| struct proto *prot; |
| struct device *dev = NULL; |
| |
| DPRINTF((DBG_TCP, "tcp_write(sk=%X, from=%X, len=%d, nonblock=%d, flags=%X)\n", |
| sk, from, len, nonblock, flags)); |
| |
| sk->inuse=1; |
| prot = sk->prot; |
| while(len > 0) { |
| if (sk->err) { /* Stop on an error */ |
| release_sock(sk); |
| if (copied) return(copied); |
| tmp = -sk->err; |
| sk->err = 0; |
| return(tmp); |
| } |
| |
| /* First thing we do is make sure that we are established. */ |
| if (sk->shutdown & SEND_SHUTDOWN) { |
| release_sock(sk); |
| sk->err = EPIPE; |
| if (copied) return(copied); |
| sk->err = 0; |
| return(-EPIPE); |
| } |
| |
| |
| /* Wait for a connection to finish. */ |
| |
| while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) { |
| if (sk->err) { |
| release_sock(sk); |
| if (copied) return(copied); |
| tmp = -sk->err; |
| sk->err = 0; |
| return(tmp); |
| } |
| |
| if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) { |
| release_sock(sk); |
| DPRINTF((DBG_TCP, "tcp_write: return 1\n")); |
| if (copied) return(copied); |
| |
| if (sk->err) { |
| tmp = -sk->err; |
| sk->err = 0; |
| return(tmp); |
| } |
| |
| if (sk->keepopen) { |
| send_sig(SIGPIPE, current, 0); |
| } |
| return(-EPIPE); |
| } |
| |
| if (nonblock || copied) { |
| release_sock(sk); |
| DPRINTF((DBG_TCP, "tcp_write: return 2\n")); |
| if (copied) return(copied); |
| return(-EAGAIN); |
| } |
| |
| release_sock(sk); |
| cli(); |
| if (sk->state != TCP_ESTABLISHED && |
| sk->state != TCP_CLOSE_WAIT && sk->err == 0) { |
| interruptible_sleep_on(sk->sleep); |
| if (current->signal & ~current->blocked) { |
| sti(); |
| DPRINTF((DBG_TCP, "tcp_write: return 3\n")); |
| if (copied) return(copied); |
| return(-ERESTARTSYS); |
| } |
| } |
| sk->inuse = 1; |
| sti(); |
| } |
| |
| /* |
| * The following code can result in copy <= if sk->mss is ever |
| * decreased. It shouldn't be. sk->mss is min(sk->mtu, sk->max_window). |
| * sk->mtu is constant once SYN processing is finished. I.e. we |
| * had better not get here until we've seen his SYN and at least one |
| * valid ack. (The SYN sets sk->mtu and the ack sets sk->max_window.) |
| * But ESTABLISHED should guarantee that. sk->max_window is by definition |
| * non-decreasing. Note that any ioctl to set user_mss must be done |
| * before the exchange of SYN's. If the initial ack from the other |
| * end has a window of 0, max_window and thus mss will both be 0. |
| */ |
| |
| /* Now we need to check if we have a half built packet. */ |
| if ((skb = tcp_dequeue_partial(sk)) != NULL) { |
| int hdrlen; |
| |
| /* IP header + TCP header */ |
| hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data) |
| + sizeof(struct tcphdr); |
| |
| /* Add more stuff to the end of skb->len */ |
| if (!(flags & MSG_OOB)) { |
| copy = min(sk->mss - (skb->len - hdrlen), len); |
| /* FIXME: this is really a bug. */ |
| if (copy <= 0) { |
| printk("TCP: **bug**: \"copy\" <= 0!!\n"); |
| copy = 0; |
| } |
| |
| memcpy_fromfs(skb->data + skb->len, from, copy); |
| skb->len += copy; |
| from += copy; |
| copied += copy; |
| len -= copy; |
| sk->write_seq += copy; |
| } |
| if ((skb->len - hdrlen) >= sk->mss || |
| (flags & MSG_OOB) || |
| !sk->packets_out) |
| tcp_send_skb(sk, skb); |
| else |
| tcp_enqueue_partial(skb, sk); |
| continue; |
| } |
| |
| /* |
| * We also need to worry about the window. |
| * If window < 1/2 the maximum window we've seen from this |
| * host, don't use it. This is sender side |
| * silly window prevention, as specified in RFC1122. |
| * (Note that this is diffferent than earlier versions of |
| * SWS prevention, e.g. RFC813.). What we actually do is |
| * use the whole MSS. Since the results in the right |
| * edge of the packet being outside the window, it will |
| * be queued for later rather than sent. |
| */ |
| |
| copy = sk->window_seq - sk->write_seq; |
| if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss) |
| copy = sk->mss; |
| if (copy > len) |
| copy = len; |
| |
| /* We should really check the window here also. */ |
| send_tmp = NULL; |
| if (copy < sk->mss && !(flags & MSG_OOB)) { |
| /* We will release the socket incase we sleep here. */ |
| release_sock(sk); |
| /* NB: following must be mtu, because mss can be increased. |
| * mss is always <= mtu */ |
| skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header + sizeof(*skb), 0, GFP_KERNEL); |
| sk->inuse = 1; |
| send_tmp = skb; |
| } else { |
| /* We will release the socket incase we sleep here. */ |
| release_sock(sk); |
| skb = prot->wmalloc(sk, copy + prot->max_header + sizeof(*skb), 0, GFP_KERNEL); |
| sk->inuse = 1; |
| } |
| |
| /* If we didn't get any memory, we need to sleep. */ |
| if (skb == NULL) { |
| if (nonblock /* || copied */) { |
| release_sock(sk); |
| DPRINTF((DBG_TCP, "tcp_write: return 4\n")); |
| if (copied) return(copied); |
| return(-EAGAIN); |
| } |
| |
| /* FIXME: here is another race condition. */ |
| tmp = sk->wmem_alloc; |
| release_sock(sk); |
| cli(); |
| /* Again we will try to avoid it. */ |
| if (tmp <= sk->wmem_alloc && |
| (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT) |
| && sk->err == 0) { |
| interruptible_sleep_on(sk->sleep); |
| if (current->signal & ~current->blocked) { |
| sti(); |
| DPRINTF((DBG_TCP, "tcp_write: return 5\n")); |
| if (copied) return(copied); |
| return(-ERESTARTSYS); |
| } |
| } |
| sk->inuse = 1; |
| sti(); |
| continue; |
| } |
| |
| skb->len = 0; |
| skb->sk = sk; |
| skb->free = 0; |
| |
| buff = skb->data; |
| |
| /* |
| * FIXME: we need to optimize this. |
| * Perhaps some hints here would be good. |
| */ |
| tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev, |
| IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl); |
| if (tmp < 0 ) { |
| prot->wfree(sk, skb->mem_addr, skb->mem_len); |
| release_sock(sk); |
| DPRINTF((DBG_TCP, "tcp_write: return 6\n")); |
| if (copied) return(copied); |
| return(tmp); |
| } |
| skb->len += tmp; |
| skb->dev = dev; |
| buff += tmp; |
| skb->h.th =(struct tcphdr *) buff; |
| tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy); |
| if (tmp < 0) { |
| prot->wfree(sk, skb->mem_addr, skb->mem_len); |
| release_sock(sk); |
| DPRINTF((DBG_TCP, "tcp_write: return 7\n")); |
| if (copied) return(copied); |
| return(tmp); |
| } |
| |
| if (flags & MSG_OOB) { |
| ((struct tcphdr *)buff)->urg = 1; |
| ((struct tcphdr *)buff)->urg_ptr = ntohs(copy); |
| } |
| skb->len += tmp; |
| memcpy_fromfs(buff+tmp, from, copy); |
| |
| from += copy; |
| copied += copy; |
| len -= copy; |
| skb->len += copy; |
| skb->free = 0; |
| sk->write_seq += copy; |
| |
| if (send_tmp != NULL && sk->packets_out) { |
| tcp_enqueue_partial(send_tmp, sk); |
| continue; |
| } |
| tcp_send_skb(sk, skb); |
| } |
| sk->err = 0; |
| |
| /* |
| * Nagles rule. Turn Nagle off with TCP_NODELAY for highly |
| * interactive fast network servers. It's meant to be on and |
| * it really improves the throughput though not the echo time |
| * on my slow slip link - Alan |
| */ |
| |
| /* Avoid possible race on send_tmp - c/o Johannes Stille */ |
| if(sk->partial && |
| ((!sk->packets_out) |
| /* If not nagling we can send on the before case too.. */ |
| || (sk->nonagle && before(sk->write_seq , sk->window_seq)) |
| )) |
| tcp_send_partial(sk); |
| /* -- */ |
| release_sock(sk); |
| DPRINTF((DBG_TCP, "tcp_write: return 8\n")); |
| return(copied); |
| } |
| |
| |
| static int |
| tcp_sendto(struct sock *sk, unsigned char *from, |
| int len, int nonblock, unsigned flags, |
| struct sockaddr_in *addr, int addr_len) |
| { |
| struct sockaddr_in sin; |
| |
| if (addr_len < sizeof(sin)) return(-EINVAL); |
| memcpy_fromfs(&sin, addr, sizeof(sin)); |
| if (sin.sin_family && sin.sin_family != AF_INET) return(-EINVAL); |
| if (sin.sin_port != sk->dummy_th.dest) return(-EINVAL); |
| if (sin.sin_addr.s_addr != sk->daddr) return(-EINVAL); |
| return(tcp_write(sk, from, len, nonblock, flags)); |
| } |
| |
| |
| static void |
| tcp_read_wakeup(struct sock *sk) |
| { |
| int tmp; |
| struct device *dev = NULL; |
| struct tcphdr *t1; |
| struct sk_buff *buff; |
| |
| DPRINTF((DBG_TCP, "in tcp read wakeup\n")); |
| if (!sk->ack_backlog) return; |
| |
| /* |
| * FIXME: we need to put code here to prevent this routine from |
| * being called. Being called once in a while is ok, so only check |
| * if this is the second time in a row. |
| */ |
| |
| /* |
| * We need to grab some memory, and put together an ack, |
| * and then put it into the queue to be sent. |
| */ |
| buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC); |
| if (buff == NULL) { |
| /* Try again real soon. */ |
| reset_timer(sk, TIME_WRITE, 10); |
| return; |
| } |
| |
| buff->mem_addr = buff; |
| buff->mem_len = MAX_ACK_SIZE; |
| buff->len = sizeof(struct tcphdr); |
| buff->sk = sk; |
| |
| /* Put in the IP header and routing stuff. */ |
| tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev, |
| IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl); |
| if (tmp < 0) { |
| buff->free=1; |
| sk->prot->wfree(sk, buff->mem_addr, buff->mem_len); |
| return; |
| } |
| |
| buff->len += tmp; |
| t1 =(struct tcphdr *)(buff->data +tmp); |
| |
| memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1)); |
| t1->seq = htonl(sk->sent_seq); |
| t1->ack = 1; |
| t1->res1 = 0; |
| t1->res2 = 0; |
| t1->rst = 0; |
| t1->urg = 0; |
| t1->syn = 0; |
| t1->psh = 0; |
| sk->ack_backlog = 0; |
| sk->bytes_rcv = 0; |
| sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/ |
| t1->window = ntohs(sk->window); |
| t1->ack_seq = ntohl(sk->acked_seq); |
| t1->doff = sizeof(*t1)/4; |
| tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk); |
| sk->prot->queue_xmit(sk, dev, buff, 1); |
| } |
| |
| |
| /* |
| * FIXME: |
| * This routine frees used buffers. |
| * It should consider sending an ACK to let the |
| * other end know we now have a bigger window. |
| */ |
| static void |
| cleanup_rbuf(struct sock *sk) |
| { |
| unsigned long flags; |
| int left; |
| struct sk_buff *skb; |
| |
| if(sk->debug) |
| printk("cleaning rbuf for sk=%p\n", sk); |
| |
| save_flags(flags); |
| cli(); |
| |
| left = sk->prot->rspace(sk); |
| |
| /* |
| * We have to loop through all the buffer headers, |
| * and try to free up all the space we can. |
| */ |
| while((skb=skb_peek(&sk->rqueue)) != NULL ) |
| { |
| if (!skb->used) |
| break; |
| skb_unlink(skb); |
| skb->sk = sk; |
| kfree_skb(skb, FREE_READ); |
| } |
| |
| restore_flags(flags); |
| |
| /* |
| * FIXME: |
| * At this point we should send an ack if the difference |
| * in the window, and the amount of space is bigger than |
| * TCP_WINDOW_DIFF. |
| */ |
| DPRINTF((DBG_TCP, "sk->window left = %d, sk->prot->rspace(sk)=%d\n", |
| sk->window - sk->bytes_rcv, sk->prot->rspace(sk))); |
| |
| if(sk->debug) |
| printk("sk->rspace = %lu, was %d\n", sk->prot->rspace(sk), |
| left); |
| if (sk->prot->rspace(sk) != left) |
| { |
| /* |
| * This area has caused the most trouble. The current strategy |
| * is to simply do nothing if the other end has room to send at |
| * least 3 full packets, because the ack from those will auto- |
| * matically update the window. If the other end doesn't think |
| * we have much space left, but we have room for atleast 1 more |
| * complete packet than it thinks we do, we will send an ack |
| * immediatedly. Otherwise we will wait up to .5 seconds in case |
| * the user reads some more. |
| */ |
| sk->ack_backlog++; |
| /* |
| * It's unclear whether to use sk->mtu or sk->mss here. They differ only |
| * if the other end is offering a window smaller than the agreed on MSS |
| * (called sk->mtu here). In theory there's no connection between send |
| * and receive, and so no reason to think that they're going to send |
| * small packets. For the moment I'm using the hack of reducing the mss |
| * only on the send side, so I'm putting mtu here. |
| */ |
| if ((sk->prot->rspace(sk) > (sk->window - sk->bytes_rcv + sk->mtu))) { |
| /* Send an ack right now. */ |
| tcp_read_wakeup(sk); |
| } else { |
| /* Force it to send an ack soon. */ |
| int was_active = del_timer(&sk->timer); |
| if (!was_active || TCP_ACK_TIME < sk->timer.expires) { |
| reset_timer(sk, TIME_WRITE, TCP_ACK_TIME); |
| } else |
| add_timer(&sk->timer); |
| } |
| } |
| } |
| |
| |
| /* Handle reading urgent data. */ |
| static int |
| tcp_read_urg(struct sock * sk, int nonblock, |
| unsigned char *to, int len, unsigned flags) |
| { |
| struct wait_queue wait = { current, NULL }; |
| |
| while (len > 0) { |
| if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ) |
| return -EINVAL; |
| if (sk->urg_data & URG_VALID) { |
| char c = sk->urg_data; |
| if (!(flags & MSG_PEEK)) |
| sk->urg_data = URG_READ; |
| put_fs_byte(c, to); |
| return 1; |
| } |
| |
| if (sk->err) { |
| int tmp = -sk->err; |
| sk->err = 0; |
| return tmp; |
| } |
| |
| if (sk->state == TCP_CLOSE || sk->done) { |
| if (!sk->done) { |
| sk->done = 1; |
| return 0; |
| } |
| return -ENOTCONN; |
| } |
| |
| if (sk->shutdown & RCV_SHUTDOWN) { |
| sk->done = 1; |
| return 0; |
| } |
| |
| if (nonblock) |
| return -EAGAIN; |
| |
| if (current->signal & ~current->blocked) |
| return -ERESTARTSYS; |
| |
| current->state = TASK_INTERRUPTIBLE; |
| add_wait_queue(sk->sleep, &wait); |
| if ((sk->urg_data & URG_NOTYET) && sk->err == 0 && |
| !(sk->shutdown & RCV_SHUTDOWN)) |
| schedule(); |
| remove_wait_queue(sk->sleep, &wait); |
| current->state = TASK_RUNNING; |
| } |
| return 0; |
| } |
| |
| |
| /* This routine copies from a sock struct into the user buffer. */ |
| static int tcp_read(struct sock *sk, unsigned char *to, |
| int len, int nonblock, unsigned flags) |
| { |
| struct wait_queue wait = { current, NULL }; |
| int copied = 0; |
| unsigned long peek_seq; |
| unsigned long *seq; |
| unsigned long used; |
| int err; |
| |
| if (len == 0) |
| return 0; |
| |
| if (len < 0) |
| return -EINVAL; |
| |
| err = verify_area(VERIFY_WRITE, to, len); |
| if (err) |
| return err; |
| |
| /* This error should be checked. */ |
| if (sk->state == TCP_LISTEN) |
| return -ENOTCONN; |
| |
| /* Urgent data needs to be handled specially. */ |
| if (flags & MSG_OOB) |
| return tcp_read_urg(sk, nonblock, to, len, flags); |
| |
| peek_seq = sk->copied_seq; |
| seq = &sk->copied_seq; |
| if (flags & MSG_PEEK) |
| seq = &peek_seq; |
| |
| add_wait_queue(sk->sleep, &wait); |
| sk->inuse = 1; |
| while (len > 0) { |
| struct sk_buff * skb; |
| unsigned long offset; |
| |
| /* |
| * are we at urgent data? Stop if we have read anything. |
| */ |
| if (copied && sk->urg_data && sk->urg_seq == 1+*seq) |
| break; |
| |
| current->state = TASK_INTERRUPTIBLE; |
| |
| skb = sk->rqueue; |
| do { |
| if (!skb) |
| break; |
| if (before(1+*seq, skb->h.th->seq)) |
| break; |
| offset = 1 + *seq - skb->h.th->seq; |
| if (skb->h.th->syn) |
| offset--; |
| if (offset < skb->len) |
| goto found_ok_skb; |
| if (!(flags & MSG_PEEK)) |
| skb->used = 1; |
| skb = (struct sk_buff *)skb->next; |
| } while (skb != sk->rqueue); |
| |
| if (copied) |
| break; |
| |
| if (sk->err) { |
| copied = -sk->err; |
| sk->err = 0; |
| break; |
| } |
| |
| if (sk->state == TCP_CLOSE) { |
| if (!sk->done) { |
| sk->done = 1; |
| break; |
| } |
| copied = -ENOTCONN; |
| break; |
| } |
| |
| if (sk->shutdown & RCV_SHUTDOWN) { |
| sk->done = 1; |
| break; |
| } |
| |
| if (nonblock) { |
| copied = -EAGAIN; |
| break; |
| } |
| |
| cleanup_rbuf(sk); |
| release_sock(sk); |
| schedule(); |
| sk->inuse = 1; |
| |
| if (current->signal & ~current->blocked) { |
| copied = -ERESTARTSYS; |
| break; |
| } |
| continue; |
| |
| found_ok_skb: |
| /* Ok so how much can we use ? */ |
| used = skb->len - offset; |
| if (len < used) |
| used = len; |
| /* do we have urgent data here? */ |
| if (sk->urg_data) { |
| unsigned long urg_offset = sk->urg_seq - (1 + *seq); |
| if (urg_offset < used) { |
| if (!urg_offset) { |
| if (!sk->urginline) { |
| ++*seq; |
| offset++; |
| used--; |
| } |
| } else |
| used = urg_offset; |
| } |
| } |
| /* Copy it */ |
| memcpy_tofs(to,((unsigned char *)skb->h.th) + |
| skb->h.th->doff*4 + offset, used); |
| copied += used; |
| len -= used; |
| to += used; |
| *seq += used; |
| if (after(sk->copied_seq+1,sk->urg_seq)) |
| sk->urg_data = 0; |
| if (!(flags & MSG_PEEK) && (used + offset >= skb->len)) |
| skb->used = 1; |
| } |
| remove_wait_queue(sk->sleep, &wait); |
| current->state = TASK_RUNNING; |
| |
| /* Clean up data we have read: This will do ACK frames */ |
| cleanup_rbuf(sk); |
| release_sock(sk); |
| DPRINTF((DBG_TCP, "tcp_read: returning %d\n", copied)); |
| return copied; |
| } |
| |
| |
| /* |
| * Send a FIN without closing the connection. |
| * Not called at interrupt time. |
| */ |
| void |
| tcp_shutdown(struct sock *sk, int how) |
| { |
| struct sk_buff *buff; |
| struct tcphdr *t1, *th; |
| struct proto *prot; |
| int tmp; |
| struct device *dev = NULL; |
| |
| /* |
| * We need to grab some memory, and put together a FIN, |
| * and then put it into the queue to be sent. |
| * FIXME: |
| * Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92. |
| * Most of this is guesswork, so maybe it will work... |
| */ |
| /* If we've already sent a FIN, return. */ |
| if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2) return; |
| if (!(how & SEND_SHUTDOWN)) return; |
| sk->inuse = 1; |
| |
| /* Clear out any half completed packets. */ |
| if (sk->partial) |
| tcp_send_partial(sk); |
| |
| prot =(struct proto *)sk->prot; |
| th =(struct tcphdr *)&sk->dummy_th; |
| release_sock(sk); /* incase the malloc sleeps. */ |
| buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL); |
| if (buff == NULL) return; |
| sk->inuse = 1; |
| |
| DPRINTF((DBG_TCP, "tcp_shutdown_send buff = %X\n", buff)); |
| buff->mem_addr = buff; |
| buff->mem_len = MAX_RESET_SIZE; |
| buff->sk = sk; |
| buff->len = sizeof(*t1); |
| t1 =(struct tcphdr *) buff->data; |
| |
| /* Put in the IP header and routing stuff. */ |
| tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev, |
| IPPROTO_TCP, sk->opt, |
| sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl); |
| if (tmp < 0) { |
| buff->free=1; |
| prot->wfree(sk,buff->mem_addr, buff->mem_len); |
| release_sock(sk); |
| DPRINTF((DBG_TCP, "Unable to build header for fin.\n")); |
| return; |
| } |
| |
| t1 =(struct tcphdr *)((char *)t1 +tmp); |
| buff->len += tmp; |
| buff->dev = dev; |
| memcpy(t1, th, sizeof(*t1)); |
| t1->seq = ntohl(sk->write_seq); |
| sk->write_seq++; |
| buff->h.seq = sk->write_seq; |
| t1->ack = 1; |
| t1->ack_seq = ntohl(sk->acked_seq); |
| t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/); |
| t1->fin = 1; |
| t1->rst = 0; |
| t1->doff = sizeof(*t1)/4; |
| tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk); |
| |
| /* |
| * Can't just queue this up. |
| * It should go at the end of the write queue. |
| */ |
| if (sk->wback != NULL) { |
| buff->free=0; |
| buff->next = NULL; |
| sk->wback->next = buff; |
| sk->wback = buff; |
| buff->magic = TCP_WRITE_QUEUE_MAGIC; |
| } else { |
| sk->sent_seq = sk->write_seq; |
| sk->prot->queue_xmit(sk, dev, buff, 0); |
| } |
| |
| if (sk->state == TCP_ESTABLISHED) sk->state = TCP_FIN_WAIT1; |
| else sk->state = TCP_FIN_WAIT2; |
| |
| release_sock(sk); |
| } |
| |
| |
| static int |
| tcp_recvfrom(struct sock *sk, unsigned char *to, |
| int to_len, int nonblock, unsigned flags, |
| struct sockaddr_in *addr, int *addr_len) |
| { |
| struct sockaddr_in sin; |
| int len; |
| int err; |
| int result; |
| |
| /* Have to check these first unlike the old code. If |
| we check them after we lose data on an error |
| which is wrong */ |
| err = verify_area(VERIFY_WRITE,addr_len,sizeof(long)); |
| if(err) |
| return err; |
| len = get_fs_long(addr_len); |
| if(len > sizeof(sin)) |
| len = sizeof(sin); |
| err=verify_area(VERIFY_WRITE, addr, len); |
| if(err) |
| return err; |
| |
| result=tcp_read(sk, to, to_len, nonblock, flags); |
| |
| if (result < 0) return(result); |
| |
| sin.sin_family = AF_INET; |
| sin.sin_port = sk->dummy_th.dest; |
| sin.sin_addr.s_addr = sk->daddr; |
| |
| memcpy_tofs(addr, &sin, len); |
| put_fs_long(len, addr_len); |
| return(result); |
| } |
| |
| |
| /* This routine will send an RST to the other tcp. */ |
| static void |
| tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th, |
| struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl) |
| { |
| struct sk_buff *buff; |
| struct tcphdr *t1; |
| int tmp; |
| |
| /* |
| * We need to grab some memory, and put together an RST, |
| * and then put it into the queue to be sent. |
| */ |
| buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC); |
| if (buff == NULL) |
| return; |
| |
| DPRINTF((DBG_TCP, "tcp_reset buff = %X\n", buff)); |
| buff->mem_addr = buff; |
| buff->mem_len = MAX_RESET_SIZE; |
| buff->len = sizeof(*t1); |
| buff->sk = NULL; |
| buff->dev = dev; |
| |
| t1 =(struct tcphdr *) buff->data; |
| |
| /* Put in the IP header and routing stuff. */ |
| tmp = prot->build_header(buff, saddr, daddr, &dev, IPPROTO_TCP, opt, |
| sizeof(struct tcphdr),tos,ttl); |
| if (tmp < 0) { |
| buff->free = 1; |
| prot->wfree(NULL, buff->mem_addr, buff->mem_len); |
| return; |
| } |
| t1 =(struct tcphdr *)((char *)t1 +tmp); |
| buff->len += tmp; |
| memcpy(t1, th, sizeof(*t1)); |
| |
| /* Swap the send and the receive. */ |
| t1->dest = th->source; |
| t1->source = th->dest; |
| t1->rst = 1; |
| t1->window = 0; |
| |
| if(th->ack) |
| { |
| t1->ack = 0; |
| t1->seq = th->ack_seq; |
| t1->ack_seq = 0; |
| } |
| else |
| { |
| t1->ack = 1; |
| if(!th->syn) |
| t1->ack_seq=htonl(th->seq); |
| else |
| t1->ack_seq=htonl(th->seq+1); |
| t1->seq=0; |
| } |
| |
| t1->syn = 0; |
| t1->urg = 0; |
| t1->fin = 0; |
| t1->psh = 0; |
| t1->doff = sizeof(*t1)/4; |
| tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL); |
| prot->queue_xmit(NULL, dev, buff, 1); |
| } |
| |
| |
| /* |
| * Look for tcp options. Parses everything but only knows about MSS. |
| * This routine is always called with the packet containing the SYN. |
| * However it may also be called with the ack to the SYN. So you |
| * can't assume this is always the SYN. It's always called after |
| * we have set up sk->mtu to our own MTU. |
| */ |
| |
| static void |
| tcp_options(struct sock *sk, struct tcphdr *th) |
| { |
| unsigned char *ptr; |
| int length=(th->doff*4)-sizeof(struct tcphdr); |
| int mss_seen = 0; |
| |
| ptr = (unsigned char *)(th + 1); |
| |
| while(length>0) |
| { |
| int opcode=*ptr++; |
| int opsize=*ptr++; |
| switch(opcode) |
| { |
| case TCPOPT_EOL: |
| return; |
| case TCPOPT_NOP: |
| length-=2; |
| continue; |
| |
| default: |
| if(opsize<=2) /* Avoid silly options looping forever */ |
| return; |
| switch(opcode) |
| { |
| case TCPOPT_MSS: |
| if(opsize==4 && th->syn) |
| { |
| sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr)); |
| mss_seen = 1; |
| } |
| break; |
| /* Add other options here as people feel the urge to implement stuff like large windows */ |
| } |
| ptr+=opsize-2; |
| length-=opsize; |
| } |
| } |
| if (th->syn) { |
| if (! mss_seen) |
| sk->mtu=min(sk->mtu, 536); /* default MSS if none sent */ |
| } |
| sk->mss = min(sk->max_window, sk->mtu); |
| } |
| |
| static inline unsigned long default_mask(unsigned long dst) |
| { |
| dst = ntohl(dst); |
| if (IN_CLASSA(dst)) |
| return htonl(IN_CLASSA_NET); |
| if (IN_CLASSB(dst)) |
| return htonl(IN_CLASSB_NET); |
| return htonl(IN_CLASSC_NET); |
| } |
| |
| /* |
| * This routine handles a connection request. |
| * It should make sure we haven't already responded. |
| * Because of the way BSD works, we have to send a syn/ack now. |
| * This also means it will be harder to close a socket which is |
| * listening. |
| */ |
| static void |
| tcp_conn_request(struct sock *sk, struct sk_buff *skb, |
| unsigned long daddr, unsigned long saddr, |
| struct options *opt, struct device *dev) |
| { |
| struct sk_buff *buff; |
| struct tcphdr *t1; |
| unsigned char *ptr; |
| struct sock *newsk; |
| struct tcphdr *th; |
| int tmp; |
| |
| DPRINTF((DBG_TCP, "tcp_conn_request(sk = %X, skb = %X, daddr = %X, sadd4= %X, \n" |
| " opt = %X, dev = %X)\n", |
| sk, skb, daddr, saddr, opt, dev)); |
| |
| th = skb->h.th; |
| |
| /* If the socket is dead, don't accept the connection. */ |
| if (!sk->dead) { |
| sk->data_ready(sk,0); |
| } else { |
| DPRINTF((DBG_TCP, "tcp_conn_request on dead socket\n")); |
| tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl); |
| kfree_skb(skb, FREE_READ); |
| return; |
| } |
| |
| /* |
| * Make sure we can accept more. This will prevent a |
| * flurry of syns from eating up all our memory. |
| */ |
| if (sk->ack_backlog >= sk->max_ack_backlog) { |
| kfree_skb(skb, FREE_READ); |
| return; |
| } |
| |
| /* |
| * We need to build a new sock struct. |
| * It is sort of bad to have a socket without an inode attached |
| * to it, but the wake_up's will just wake up the listening socket, |
| * and if the listening socket is destroyed before this is taken |
| * off of the queue, this will take care of it. |
| */ |
| newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC); |
| if (newsk == NULL) { |
| /* just ignore the syn. It will get retransmitted. */ |
| kfree_skb(skb, FREE_READ); |
| return; |
| } |
| |
| DPRINTF((DBG_TCP, "newsk = %X\n", newsk)); |
| memcpy((void *)newsk,(void *)sk, sizeof(*newsk)); |
| newsk->wback = NULL; |
| newsk->wfront = NULL; |
| newsk->rqueue = NULL; |
| newsk->send_head = NULL; |
| newsk->send_tail = NULL; |
| newsk->back_log = NULL; |
| newsk->rtt = TCP_CONNECT_TIME << 3; |
| newsk->rto = TCP_CONNECT_TIME; |
| newsk->mdev = 0; |
| newsk->max_window = 0; |
| newsk->cong_window = 1; |
| newsk->cong_count = 0; |
| newsk->ssthresh = 0; |
| newsk->backoff = 0; |
| newsk->blog = 0; |
| newsk->intr = 0; |
| newsk->proc = 0; |
| newsk->done = 0; |
| newsk->partial = NULL; |
| newsk->pair = NULL; |
| newsk->wmem_alloc = 0; |
| newsk->rmem_alloc = 0; |
| |
| newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF; |
| |
| newsk->err = 0; |
| newsk->shutdown = 0; |
| newsk->ack_backlog = 0; |
| newsk->acked_seq = skb->h.th->seq+1; |
| newsk->fin_seq = skb->h.th->seq; |
| newsk->copied_seq = skb->h.th->seq; |
| newsk->state = TCP_SYN_RECV; |
| newsk->timeout = 0; |
| newsk->write_seq = jiffies * SEQ_TICK - seq_offset; |
| newsk->window_seq = newsk->write_seq; |
| newsk->rcv_ack_seq = newsk->write_seq; |
| newsk->urg_data = 0; |
| newsk->retransmits = 0; |
| newsk->destroy = 0; |
| newsk->timer.data = (unsigned long)newsk; |
| newsk->timer.function = &net_timer; |
| newsk->dummy_th.source = skb->h.th->dest; |
| newsk->dummy_th.dest = skb->h.th->source; |
| |
| /* Swap these two, they are from our point of view. */ |
| newsk->daddr = saddr; |
| newsk->saddr = daddr; |
| |
| put_sock(newsk->num,newsk); |
| newsk->dummy_th.res1 = 0; |
| newsk->dummy_th.doff = 6; |
| newsk->dummy_th.fin = 0; |
| newsk->dummy_th.syn = 0; |
| newsk->dummy_th.rst = 0; |
| newsk->dummy_th.psh = 0; |
| newsk->dummy_th.ack = 0; |
| newsk->dummy_th.urg = 0; |
| newsk->dummy_th.res2 = 0; |
| newsk->acked_seq = skb->h.th->seq + 1; |
| newsk->copied_seq = skb->h.th->seq; |
| |
| /* Grab the ttl and tos values and use them */ |
| newsk->ip_ttl=sk->ip_ttl; |
| newsk->ip_tos=skb->ip_hdr->tos; |
| |
| /* use 512 or whatever user asked for */ |
| /* note use of sk->user_mss, since user has no direct access to newsk */ |
| if (sk->user_mss) |
| newsk->mtu = sk->user_mss; |
| else { |
| #ifdef SUBNETSARELOCAL |
| if ((saddr ^ daddr) & default_mask(saddr)) |
| #else |
| if ((saddr ^ daddr) & dev->pa_mask) |
| #endif |
| newsk->mtu = 576 - HEADER_SIZE; |
| else |
| newsk->mtu = MAX_WINDOW; |
| } |
| /* but not bigger than device MTU */ |
| newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE); |
| |
| /* this will min with what arrived in the packet */ |
| tcp_options(newsk,skb->h.th); |
| |
| buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC); |
| if (buff == NULL) { |
| sk->err = -ENOMEM; |
| newsk->dead = 1; |
| release_sock(newsk); |
| kfree_skb(skb, FREE_READ); |
| return; |
| } |
| |
| buff->mem_addr = buff; |
| buff->mem_len = MAX_SYN_SIZE; |
| buff->len = sizeof(struct tcphdr)+4; |
| buff->sk = newsk; |
| |
| t1 =(struct tcphdr *) buff->data; |
| |
| /* Put in the IP header and routing stuff. */ |
| tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &dev, |
| IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl); |
| |
| /* Something went wrong. */ |
| if (tmp < 0) { |
| sk->err = tmp; |
| buff->free=1; |
| kfree_skb(buff,FREE_WRITE); |
| newsk->dead = 1; |
| release_sock(newsk); |
| skb->sk = sk; |
| kfree_skb(skb, FREE_READ); |
| return; |
| } |
| |
| buff->len += tmp; |
| t1 =(struct tcphdr *)((char *)t1 +tmp); |
| |
| memcpy(t1, skb->h.th, sizeof(*t1)); |
| buff->h.seq = newsk->write_seq; |
| |
| /* Swap the send and the receive. */ |
| t1->dest = skb->h.th->source; |
| t1->source = newsk->dummy_th.source; |
| t1->seq = ntohl(newsk->write_seq++); |
| t1->ack = 1; |
| newsk->window = tcp_select_window(newsk);/*newsk->prot->rspace(newsk);*/ |
| newsk->sent_seq = newsk->write_seq; |
| t1->window = ntohs(newsk->window); |
| t1->res1 = 0; |
| t1->res2 = 0; |
| t1->rst = 0; |
| t1->urg = 0; |
| t1->psh = 0; |
| t1->syn = 1; |
| t1->ack_seq = ntohl(skb->h.th->seq+1); |
| t1->doff = sizeof(*t1)/4+1; |
| |
| ptr =(unsigned char *)(t1+1); |
| ptr[0] = 2; |
| ptr[1] = 4; |
| ptr[2] = ((newsk->mtu) >> 8) & 0xff; |
| ptr[3] =(newsk->mtu) & 0xff; |
| |
| tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk); |
| newsk->prot->queue_xmit(newsk, dev, buff, 0); |
| |
| reset_timer(newsk, TIME_WRITE /* -1 ? FIXME ??? */, TCP_CONNECT_TIME); |
| skb->sk = newsk; |
| |
| /* Charge the sock_buff to newsk. */ |
| sk->rmem_alloc -= skb->mem_len; |
| newsk->rmem_alloc += skb->mem_len; |
| |
| skb_queue_tail(&sk->rqueue,skb); |
| sk->ack_backlog++; |
| release_sock(newsk); |
| } |
| |
| |
| static void |
| tcp_close(struct sock *sk, int timeout) |
| { |
| struct sk_buff *buff; |
| int need_reset = 0; |
| struct tcphdr *t1, *th; |
| struct proto *prot; |
| struct device *dev=NULL; |
| int tmp; |
| |
| /* |
| * We need to grab some memory, and put together a FIN, |
| * and then put it into the queue to be sent. |
| */ |
| DPRINTF((DBG_TCP, "tcp_close((struct sock *)%X, %d)\n",sk, timeout)); |
| sk->inuse = 1; |
| sk->keepopen = 1; |
| sk->shutdown = SHUTDOWN_MASK; |
| |
| if (!sk->dead) |
| sk->state_change(sk); |
| |
| /* We need to flush the recv. buffs. */ |
| if (skb_peek(&sk->rqueue) != NULL) |
| { |
| struct sk_buff *skb; |
| if(sk->debug) |
| printk("Clean rcv queue\n"); |
| while((skb=skb_dequeue(&sk->rqueue))!=NULL) |
| { |
| if(skb->len > 0 && after(skb->h.th->seq + skb->len + 1 , sk->copied_seq)) |
| need_reset = 1; |
| kfree_skb(skb, FREE_READ); |
| } |
| if(sk->debug) |
| printk("Cleaned.\n"); |
| } |
| sk->rqueue = NULL; |
| |
| /* Get rid off any half-completed packets. */ |
| if (sk->partial) { |
| tcp_send_partial(sk); |
| } |
| |
| switch(sk->state) { |
| case TCP_FIN_WAIT1: |
| case TCP_FIN_WAIT2: |
| case TCP_LAST_ACK: |
| /* start a timer. */ |
| /* original code was 4 * sk->rtt. In converting to the |
| * new rtt representation, we can't quite use that. |
| * it seems to make most sense to use the backed off value |
| */ |
| reset_timer(sk, TIME_CLOSE, 4 * sk->rto); |
| if (timeout) tcp_time_wait(sk); |
| release_sock(sk); |
| return; /* break causes a double release - messy */ |
| case TCP_TIME_WAIT: |
| if (timeout) { |
| sk->state = TCP_CLOSE; |
| } |
| release_sock(sk); |
| return; |
| case TCP_LISTEN: |
| sk->state = TCP_CLOSE; |
| release_sock(sk); |
| return; |
| case TCP_CLOSE: |
| release_sock(sk); |
| return; |
| case TCP_CLOSE_WAIT: |
| case TCP_ESTABLISHED: |
| case TCP_SYN_SENT: |
| case TCP_SYN_RECV: |
| prot =(struct proto *)sk->prot; |
| th =(struct tcphdr *)&sk->dummy_th; |
| buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC); |
| if (buff == NULL) { |
| /* This will force it to try again later. */ |
| /* Or it would have if someone released the socket |
| first. Anyway it might work now */ |
| release_sock(sk); |
| if (sk->state != TCP_CLOSE_WAIT) |
| sk->state = TCP_ESTABLISHED; |
| reset_timer(sk, TIME_CLOSE, 100); |
| return; |
| } |
| buff->mem_addr = buff; |
| buff->mem_len = MAX_FIN_SIZE; |
| buff->sk = sk; |
| buff->free = 1; |
| buff->len = sizeof(*t1); |
| t1 =(struct tcphdr *) buff->data; |
| |
| /* Put in the IP header and routing stuff. */ |
| tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev, |
| IPPROTO_TCP, sk->opt, |
| sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl); |
| if (tmp < 0) { |
| kfree_skb(buff,FREE_WRITE); |
| DPRINTF((DBG_TCP, "Unable to build header for fin.\n")); |
| release_sock(sk); |
| return; |
| } |
| |
| t1 =(struct tcphdr *)((char *)t1 +tmp); |
| buff->len += tmp; |
| buff->dev = dev; |
| memcpy(t1, th, sizeof(*t1)); |
| t1->seq = ntohl(sk->write_seq); |
| sk->write_seq++; |
| buff->h.seq = sk->write_seq; |
| t1->ack = 1; |
| |
| /* Ack everything immediately from now on. */ |
| sk->delay_acks = 0; |
| t1->ack_seq = ntohl(sk->acked_seq); |
| t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/); |
| t1->fin = 1; |
| t1->rst = need_reset; |
| t1->doff = sizeof(*t1)/4; |
| tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk); |
| |
| if (sk->wfront == NULL) { |
| sk->sent_seq = sk->write_seq; |
| prot->queue_xmit(sk, dev, buff, 0); |
| } else { |
| reset_timer(sk, TIME_WRITE, sk->rto); |
| buff->next = NULL; |
| if (sk->wback == NULL) { |
| sk->wfront = buff; |
| } else { |
| sk->wback->next = buff; |
| } |
| sk->wback = buff; |
| buff->magic = TCP_WRITE_QUEUE_MAGIC; |
| } |
| |
| if (sk->state == TCP_CLOSE_WAIT) { |
| sk->state = TCP_FIN_WAIT2; |
| } else { |
| sk->state = TCP_FIN_WAIT1; |
| } |
| } |
| release_sock(sk); |
| } |
| |
| |
| /* |
| * This routine takes stuff off of the write queue, |
| * and puts it in the xmit queue. |
| */ |
| static void |
| tcp_write_xmit(struct sock *sk) |
| { |
| struct sk_buff *skb; |
| |
| DPRINTF((DBG_TCP, "tcp_write_xmit(sk=%X)\n", sk)); |
| |
| /* The bytes will have to remain here. In time closedown will |
| empty the write queue and all will be happy */ |
| if(sk->zapped) |
| return; |
| |
| while(sk->wfront != NULL && |
| before(sk->wfront->h.seq, sk->window_seq +1) && |
| (sk->retransmits == 0 || |
| sk->timeout != TIME_WRITE || |
| before(sk->wfront->h.seq, sk->rcv_ack_seq +1)) |
| && sk->packets_out < sk->cong_window) { |
| skb = sk->wfront; |
| IS_SKB(skb); |
| sk->wfront = skb->next; |
| if (sk->wfront == NULL) sk->wback = NULL; |
| skb->next = NULL; |
| if (skb->magic != TCP_WRITE_QUEUE_MAGIC) { |
| printk("tcp.c skb with bad magic(%X) on write queue. Squashing " |
| "queue\n", skb->magic); |
| sk->wfront = NULL; |
| sk->wback = NULL; |
| return; |
| } |
| skb->magic = 0; |
| DPRINTF((DBG_TCP, "Sending a packet.\n")); |
| |
| /* See if we really need to send the packet. */ |
| if (before(skb->h.seq, sk->rcv_ack_seq +1)) { |
| sk->retransmits = 0; |
| kfree_skb(skb, FREE_WRITE); |
| if (!sk->dead) sk->write_space(sk); |
| } else { |
| sk->sent_seq = skb->h.seq; |
| sk->prot->queue_xmit(sk, skb->dev, skb, skb->free); |
| } |
| } |
| } |
| |
| |
| /* |
| * This routine sorts the send list, and resets the |
| * sk->send_head and sk->send_tail pointers. |
| */ |
| void |
| sort_send(struct sock *sk) |
| { |
| struct sk_buff *list = NULL; |
| struct sk_buff *skb,*skb2,*skb3; |
| |
| for (skb = sk->send_head; skb != NULL; skb = skb2) { |
| skb2 = (struct sk_buff *)skb->link3; |
| if (list == NULL || before (skb2->h.seq, list->h.seq)) { |
| skb->link3 = list; |
| sk->send_tail = skb; |
| list = skb; |
| } else { |
| for (skb3 = list; ; skb3 = (struct sk_buff *)skb3->link3) { |
| if (skb3->link3 == NULL || |
| before(skb->h.seq, skb3->link3->h.seq)) { |
| skb->link3 = skb3->link3; |
| skb3->link3 = skb; |
| if (skb->link3 == NULL) sk->send_tail = skb; |
| break; |
| } |
| } |
| } |
| } |
| sk->send_head = list; |
| } |
| |
| |
| /* This routine deals with incoming acks, but not outgoing ones. */ |
| static int |
| tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len) |
| { |
| unsigned long ack; |
| int flag = 0; |
| /* |
| * 1 - there was data in packet as well as ack or new data is sent or |
| * in shutdown state |
| * 2 - data from retransmit queue was acked and removed |
| * 4 - window shrunk or data from retransmit queue was acked and removed |
| */ |
| |
| if(sk->zapped) |
| return(1); /* Dead, cant ack any more so why bother */ |
| |
| ack = ntohl(th->ack_seq); |
| DPRINTF((DBG_TCP, "tcp_ack ack=%d, window=%d, " |
| "sk->rcv_ack_seq=%d, sk->window_seq = %d\n", |
| ack, ntohs(th->window), sk->rcv_ack_seq, sk->window_seq)); |
| |
| if (ntohs(th->window) > sk->max_window) { |
| sk->max_window = ntohs(th->window); |
| sk->mss = min(sk->max_window, sk->mtu); |
| } |
| |
| if (sk->retransmits && sk->timeout == TIME_KEEPOPEN) |
| sk->retransmits = 0; |
| |
| /* not quite clear why the +1 and -1 here, and why not +1 in next line */ |
| if (after(ack, sk->sent_seq+1) || before(ack, sk->rcv_ack_seq-1)) { |
| if (after(ack, sk->sent_seq) || |
| (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) { |
| return(0); |
| } |
| if (sk->keepopen) { |
| reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN); |
| } |
| return(1); |
| } |
| |
| if (len != th->doff*4) flag |= 1; |
| |
| /* See if our window has been shrunk. */ |
| if (after(sk->window_seq, ack+ntohs(th->window))) { |
| /* |
| * We may need to move packets from the send queue |
| * to the write queue, if the window has been shrunk on us. |
| * The RFC says you are not allowed to shrink your window |
| * like this, but if the other end does, you must be able |
| * to deal with it. |
| */ |
| struct sk_buff *skb; |
| struct sk_buff *skb2; |
| struct sk_buff *wskb = NULL; |
| |
| skb2 = sk->send_head; |
| sk->send_head = NULL; |
| sk->send_tail = NULL; |
| |
| flag |= 4; |
| |
| sk->window_seq = ack + ntohs(th->window); |
| cli(); |
| while (skb2 != NULL) { |
| skb = skb2; |
| skb2 = (struct sk_buff *)skb->link3; |
| skb->link3 = NULL; |
| if (after(skb->h.seq, sk->window_seq)) { |
| if (sk->packets_out > 0) sk->packets_out--; |
| /* We may need to remove this from the dev send list. */ |
| if (skb->next != NULL) { |
| skb_unlink(skb); |
| } |
| /* Now add it to the write_queue. */ |
| skb->magic = TCP_WRITE_QUEUE_MAGIC; |
| if (wskb == NULL) { |
| skb->next = sk->wfront; |
| sk->wfront = skb; |
| } else { |
| skb->next = wskb->next; |
| wskb->next = skb; |
| } |
| if (sk->wback == wskb) sk->wback = skb; |
| wskb = skb; |
| } else { |
| if (sk->send_head == NULL) { |
| sk->send_head = skb; |
| sk->send_tail = skb; |
| } else { |
| sk->send_tail->link3 = skb; |
| sk->send_tail = skb; |
| } |
| skb->link3 = NULL; |
| } |
| } |
| sti(); |
| } |
| |
| if (sk->send_tail == NULL || sk->send_head == NULL) { |
| sk->send_head = NULL; |
| sk->send_tail = NULL; |
| sk->packets_out= 0; |
| } |
| |
| sk->window_seq = ack + ntohs(th->window); |
| |
| /* We don't want too many packets out there. */ |
| if (sk->timeout == TIME_WRITE && |
| sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) { |
| /* |
| * This is Jacobson's slow start and congestion avoidance. |
| * SIGCOMM '88, p. 328. Because we keep cong_window in integral |
| * mss's, we can't do cwnd += 1 / cwnd. Instead, maintain a |
| * counter and increment it once every cwnd times. It's possible |
| * that this should be done only if sk->retransmits == 0. I'm |
| * interpreting "new data is acked" as including data that has |
| * been retransmitted but is just now being acked. |
| */ |
| if (sk->cong_window < sk->ssthresh) |
| /* in "safe" area, increase */ |
| sk->cong_window++; |
| else { |
| /* in dangerous area, increase slowly. In theory this is |
| sk->cong_window += 1 / sk->cong_window |
| */ |
| if (sk->cong_count >= sk->cong_window) { |
| sk->cong_window++; |
| sk->cong_count = 0; |
| } else |
| sk->cong_count++; |
| } |
| } |
| |
| DPRINTF((DBG_TCP, "tcp_ack: Updating rcv ack sequence.\n")); |
| sk->rcv_ack_seq = ack; |
| |
| /* |
| * if this ack opens up a zero window, clear backoff. It was |
| * being used to time the probes, and is probably far higher than |
| * it needs to be for normal retransmission |
| */ |
| if (sk->timeout == TIME_PROBE0) { |
| if (sk->wfront != NULL && /* should always be non-null */ |
| ! before (sk->window_seq, sk->wfront->h.seq)) { |
| sk->retransmits = 0; |
| sk->backoff = 0; |
| /* recompute rto from rtt. this eliminates any backoff */ |
| sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1; |
| if (sk->rto > 120*HZ) |
| sk->rto = 120*HZ; |
| if (sk->rto < 1*HZ) |
| sk->rto = 1*HZ; |
| } |
| } |
| |
| /* See if we can take anything off of the retransmit queue. */ |
| while(sk->send_head != NULL) { |
| /* Check for a bug. */ |
| if (sk->send_head->link3 && |
| after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) { |
| printk("INET: tcp.c: *** bug send_list out of order.\n"); |
| sort_send(sk); |
| } |
| |
| if (before(sk->send_head->h.seq, ack+1)) { |
| struct sk_buff *oskb; |
| |
| if (sk->retransmits) { |
| |
| /* we were retransmitting. don't count this in RTT est */ |
| flag |= 2; |
| |
| /* |
| * even though we've gotten an ack, we're still |
| * retransmitting as long as we're sending from |
| * the retransmit queue. Keeping retransmits non-zero |
| * prevents us from getting new data interspersed with |
| * retransmissions. |
| */ |
| |
| if (sk->send_head->link3) |
| sk->retransmits = 1; |
| else |
| sk->retransmits = 0; |
| |
| } |
| |
| /* |
| * Note that we only reset backoff and rto in the |
| * rtt recomputation code. And that doesn't happen |
| * if there were retransmissions in effect. So the |
| * first new packet after the retransmissions is |
| * sent with the backoff still in effect. Not until |
| * we get an ack from a non-retransmitted packet do |
| * we reset the backoff and rto. This allows us to deal |
| * with a situation where the network delay has increased |
| * suddenly. I.e. Karn's algorithm. (SIGCOMM '87, p5.) |
| */ |
| |
| /* We have one less packet out there. */ |
| if (sk->packets_out > 0) sk->packets_out --; |
| DPRINTF((DBG_TCP, "skb=%X skb->h.seq = %d acked ack=%d\n", |
| sk->send_head, sk->send_head->h.seq, ack)); |
| |
| /* Wake up the process, it can probably write more. */ |
| if (!sk->dead) sk->write_space(sk); |
| |
| oskb = sk->send_head; |
| |
| if (!(flag&2)) { |
| long m; |
| |
| /* The following amusing code comes from Jacobson's |
| * article in SIGCOMM '88. Note that rtt and mdev |
| * are scaled versions of rtt and mean deviation. |
| * This is designed to be as fast as possible |
| * m stands for "measurement". |
| */ |
| |
| m = jiffies - oskb->when; /* RTT */ |
| m -= (sk->rtt >> 3); /* m is now error in rtt est */ |
| sk->rtt += m; /* rtt = 7/8 rtt + 1/8 new */ |
| if (m < 0) |
| m = -m; /* m is now abs(error) */ |
| m -= (sk->mdev >> 2); /* similar update on mdev */ |
| sk->mdev += m; /* mdev = 3/4 mdev + 1/4 new */ |
| |
| /* now update timeout. Note that this removes any backoff */ |
| sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1; |
| if (sk->rto > 120*HZ) |
| sk->rto = 120*HZ; |
| if (sk->rto < 1*HZ) |
| sk->rto = 1*HZ; |
| sk->backoff = 0; |
| |
| } |
| flag |= (2|4); |
| |
| cli(); |
| |
| oskb = sk->send_head; |
| IS_SKB(oskb); |
| sk->send_head =(struct sk_buff *)oskb->link3; |
| if (sk->send_head == NULL) { |
| sk->send_tail = NULL; |
| } |
| |
| /* We may need to remove this from the dev send list. */ |
| skb_unlink(oskb); /* Much easier! */ |
| sti(); |
| oskb->magic = 0; |
| kfree_skb(oskb, FREE_WRITE); /* write. */ |
| if (!sk->dead) sk->write_space(sk); |
| } else { |
| break; |
| } |
| } |
| |
| /* |
| * Maybe we can take some stuff off of the write queue, |
| * and put it onto the xmit queue. |
| */ |
| if (sk->wfront != NULL) { |
| if (after (sk->window_seq+1, sk->wfront->h.seq) && |
| (sk->retransmits == 0 || |
| sk->timeout != TIME_WRITE || |
| before(sk->wfront->h.seq, sk->rcv_ack_seq +1)) |
| && sk->packets_out < sk->cong_window) { |
| flag |= 1; |
| tcp_write_xmit(sk); |
| } else if (before(sk->window_seq, sk->wfront->h.seq) && |
| sk->send_head == NULL && |
| sk->ack_backlog == 0 && |
| sk->state != TCP_TIME_WAIT) { |
| reset_timer(sk, TIME_PROBE0, sk->rto); |
| } |
| } else { |
| if (sk->send_head == NULL && sk->ack_backlog == 0 && |
| sk->state != TCP_TIME_WAIT && !sk->keepopen) { |
| DPRINTF((DBG_TCP, "Nothing to do, going to sleep.\n")); |
| if (!sk->dead) sk->write_space(sk); |
| |
| if (sk->keepopen) |
| reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN); |
| else |
| delete_timer(sk); |
| } else { |
| if (sk->state != (unsigned char) sk->keepopen) { |
| reset_timer(sk, TIME_WRITE, sk->rto); |
| } |
| if (sk->state == TCP_TIME_WAIT) { |
| reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); |
| } |
| } |
| } |
| |
| if (sk->packets_out == 0 && sk->partial != NULL && |
| sk->wfront == NULL && sk->send_head == NULL) { |
| flag |= 1; |
| tcp_send_partial(sk); |
| } |
| |
| /* See if we are done. */ |
| if (sk->state == TCP_TIME_WAIT) { |
| if (!sk->dead) |
| sk->state_change(sk); |
| if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq) { |
| flag |= 1; |
| sk->state = TCP_CLOSE; |
| sk->shutdown = SHUTDOWN_MASK; |
| } |
| } |
| |
| if (sk->state == TCP_LAST_ACK || sk->state == TCP_FIN_WAIT2) { |
| if (!sk->dead) sk->state_change(sk); |
| if (sk->rcv_ack_seq == sk->write_seq) { |
| flag |= 1; |
| if (sk->acked_seq != sk->fin_seq) { |
| tcp_time_wait(sk); |
| } else { |
| DPRINTF((DBG_TCP, "tcp_ack closing socket - %X\n", sk)); |
| tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, |
| th, sk->daddr); |
| sk->shutdown = SHUTDOWN_MASK; |
| sk->state = TCP_CLOSE; |
| } |
| } |
| } |
| |
| /* |
| * I make no guarantees about the first clause in the following |
| * test, i.e. "(!flag) || (flag&4)". I'm not entirely sure under |
| * what conditions "!flag" would be true. However I think the rest |
| * of the conditions would prevent that from causing any |
| * unnecessary retransmission. |
| * Clearly if the first packet has expired it should be |
| * retransmitted. The other alternative, "flag&2 && retransmits", is |
| * harder to explain: You have to look carefully at how and when the |
| * timer is set and with what timeout. The most recent transmission always |
| * sets the timer. So in general if the most recent thing has timed |
| * out, everything before it has as well. So we want to go ahead and |
| * retransmit some more. If we didn't explicitly test for this |
| * condition with "flag&2 && retransmits", chances are "when + rto < jiffies" |
| * would not be true. If you look at the pattern of timing, you can |
| * show that rto is increased fast enough that the next packet would |
| * almost never be retransmitted immediately. Then you'd end up |
| * waiting for a timeout to send each packet on the retranmission |
| * queue. With my implementation of the Karn sampling algorithm, |
| * the timeout would double each time. The net result is that it would |
| * take a hideous amount of time to recover from a single dropped packet. |
| * It's possible that there should also be a test for TIME_WRITE, but |
| * I think as long as "send_head != NULL" and "retransmit" is on, we've |
| * got to be in real retransmission mode. |
| * Note that ip_do_retransmit is called with all==1. Setting cong_window |
| * back to 1 at the timeout will cause us to send 1, then 2, etc. packets. |
| * As long as no further losses occur, this seems reasonable. |
| */ |
| |
| if (((!flag) || (flag&4)) && sk->send_head != NULL && |
| (((flag&2) && sk->retransmits) || |
| (sk->send_head->when + sk->rto < jiffies))) { |
| ip_do_retransmit(sk, 1); |
| reset_timer(sk, TIME_WRITE, sk->rto); |
| } |
| |
| DPRINTF((DBG_TCP, "leaving tcp_ack\n")); |
| return(1); |
| } |
| |
| |
| /* |
| * This routine handles the data. If there is room in the buffer, |
| * it will be have already been moved into it. If there is no |
| * room, then we will just have to discard the packet. |
| */ |
| static int |
| tcp_data(struct sk_buff *skb, struct sock *sk, |
| unsigned long saddr, unsigned short len) |
| { |
| struct sk_buff *skb1, *skb2; |
| struct tcphdr *th; |
| int dup_dumped=0; |
| |
| th = skb->h.th; |
| print_th(th); |
| skb->len = len -(th->doff*4); |
| |
| DPRINTF((DBG_TCP, "tcp_data len = %d sk = %X:\n", skb->len, sk)); |
| |
| sk->bytes_rcv += skb->len; |
| if (skb->len == 0 && !th->fin && !th->urg && !th->psh) { |
| /* Don't want to keep passing ack's back and forth. */ |
| if (!th->ack) tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr); |
| kfree_skb(skb, FREE_READ); |
| return(0); |
| } |
| |
| if (sk->shutdown & RCV_SHUTDOWN) { |
| sk->acked_seq = th->seq + skb->len + th->syn + th->fin; |
| tcp_reset(sk->saddr, sk->daddr, skb->h.th, |
| sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl); |
| sk->state = TCP_CLOSE; |
| sk->err = EPIPE; |
| sk->shutdown = SHUTDOWN_MASK; |
| DPRINTF((DBG_TCP, "tcp_data: closing socket - %X\n", sk)); |
| kfree_skb(skb, FREE_READ); |
| if (!sk->dead) sk->state_change(sk); |
| return(0); |
| } |
| |
| /* |
| * Now we have to walk the chain, and figure out where this one |
| * goes into it. This is set up so that the last packet we received |
| * will be the first one we look at, that way if everything comes |
| * in order, there will be no performance loss, and if they come |
| * out of order we will be able to fit things in nicely. |
| */ |
| |
| /* This should start at the last one, and then go around forwards. */ |
| if (sk->rqueue == NULL) { |
| DPRINTF((DBG_TCP, "tcp_data: skb = %X:\n", skb)); |
| #ifdef OLDWAY |
| sk->rqueue = skb; |
| skb->next = skb; |
| skb->prev = skb; |
| skb->list = &sk->rqueue; |
| #else |
| skb_queue_head(&sk->rqueue,skb); |
| #endif |
| skb1= NULL; |
| } else { |
| DPRINTF((DBG_TCP, "tcp_data adding to chain sk = %X:\n", sk)); |
| for(skb1=sk->rqueue->prev; ; skb1 =(struct sk_buff *)skb1->prev) { |
| if(sk->debug) |
| { |
| printk("skb1=%p :", skb1); |
| printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq); |
| printk("skb->h.th->seq = %ld\n",skb->h.th->seq); |
| printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq, |
| sk->acked_seq); |
| } |
| #ifdef OLD |
| if (after(th->seq+1, skb1->h.th->seq)) { |
| skb->prev = skb1; |
| skb->next = skb1->next; |
| skb->next->prev = skb; |
| skb1->next = skb; |
| if (skb1 == sk->rqueue) sk->rqueue = skb; |
| break; |
| } |
| if (skb1->prev == sk->rqueue) { |
| skb->next= skb1; |
| skb->prev = skb1->prev; |
| skb->prev->next = skb; |
| skb1->prev = skb; |
| skb1 = NULL; /* so we know we might be able |
| to ack stuff. */ |
| break; |
| } |
| #else |
| if (th->seq==skb1->h.th->seq && skb->len>= skb1->len) |
| { |
| skb_append(skb1,skb); |
| skb_unlink(skb1); |
| kfree_skb(skb1,FREE_READ); |
| dup_dumped=1; |
| skb1=NULL; |
| break; |
| } |
| if (after(th->seq+1, skb1->h.th->seq)) |
| { |
| skb_append(skb1,skb); |
| break; |
| } |
| if (skb1 == sk->rqueue) |
| { |
| skb_queue_head(&sk->rqueue, skb); |
| break; |
| } |
| #endif |
| } |
| DPRINTF((DBG_TCP, "skb = %X:\n", skb)); |
| } |
| |
| th->ack_seq = th->seq + skb->len; |
| if (th->syn) th->ack_seq++; |
| if (th->fin) th->ack_seq++; |
| |
| if (before(sk->acked_seq, sk->copied_seq)) { |
| printk("*** tcp.c:tcp_data bug acked < copied\n"); |
| sk->acked_seq = sk->copied_seq; |
| } |
| |
| /* Now figure out if we can ack anything. */ |
| if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) { |
| if (before(th->seq, sk->acked_seq+1)) { |
| int newwindow; |
| |
| if (after(th->ack_seq, sk->acked_seq)) { |
| newwindow = sk->window - |
| (th->ack_seq - sk->acked_seq); |
| if (newwindow < 0) |
| newwindow = 0; |
| sk->window = newwindow; |
| sk->acked_seq = th->ack_seq; |
| } |
| skb->acked = 1; |
| |
| /* When we ack the fin, we turn on the RCV_SHUTDOWN flag. */ |
| if (skb->h.th->fin) { |
| if (!sk->dead) sk->state_change(sk); |
| sk->shutdown |= RCV_SHUTDOWN; |
| } |
| |
| for(skb2 = (struct sk_buff *)skb->next; |
| skb2 !=(struct sk_buff *) sk->rqueue; |
| skb2 = (struct sk_buff *)skb2->next) { |
| if (before(skb2->h.th->seq, sk->acked_seq+1)) { |
| if (after(skb2->h.th->ack_seq, sk->acked_seq)) |
| { |
| newwindow = sk->window - |
| (skb2->h.th->ack_seq - sk->acked_seq); |
| if (newwindow < 0) |
| newwindow = 0; |
| sk->window = newwindow; |
| sk->acked_seq = skb2->h.th->ack_seq; |
| } |
| skb2->acked = 1; |
| |
| /* |
| * When we ack the fin, we turn on |
| * the RCV_SHUTDOWN flag. |
| */ |
| if (skb2->h.th->fin) { |
| sk->shutdown |= RCV_SHUTDOWN; |
| if (!sk->dead) sk->state_change(sk); |
| } |
| |
| /* Force an immediate ack. */ |
| sk->ack_backlog = sk->max_ack_backlog; |
| } else { |
| break; |
| } |
| } |
| |
| /* |
| * This also takes care of updating the window. |
| * This if statement needs to be simplified. |
| */ |
| if (!sk->delay_acks || |
| sk->ack_backlog >= sk->max_ack_backlog || |
| sk->bytes_rcv > sk->max_unacked || th->fin) { |
| /* tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */ |
| } else { |
| sk->ack_backlog++; |
| if(sk->debug) |
| printk("Ack queued.\n"); |
| reset_timer(sk, TIME_WRITE, TCP_ACK_TIME); |
| } |
| } |
| } |
| |
| /* |
| * If we've missed a packet, send an ack. |
| * Also start a timer to send another. |
| */ |
| if (!skb->acked) { |
| /* |
| * This is important. If we don't have much room left, |
| * we need to throw out a few packets so we have a good |
| * window. Note that mtu is used, not mss, because mss is really |
| * for the send side. He could be sending us stuff as large as mtu. |
| */ |
| while (sk->prot->rspace(sk) < sk->mtu) { |
| skb1 = skb_peek(&sk->rqueue); |
| if (skb1 == NULL) { |
| printk("INET: tcp.c:tcp_data memory leak detected.\n"); |
| break; |
| } |
| |
| /* Don't throw out something that has been acked. */ |
| if (skb1->acked) { |
| break; |
| } |
| |
| skb_unlink(skb1); |
| #ifdef OLDWAY |
| if (skb1->prev == skb1) { |
| sk->rqueue = NULL; |
| } else { |
| sk->rqueue = (struct sk_buff *)skb1->prev; |
| skb1->next->prev = skb1->prev; |
| skb1->prev->next = skb1->next; |
| } |
| #endif |
| kfree_skb(skb1, FREE_READ); |
| } |
| tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr); |
| sk->ack_backlog++; |
| reset_timer(sk, TIME_WRITE, TCP_ACK_TIME); |
| } else { |
| /* We missed a packet. Send an ack to try to resync things. */ |
| tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr); |
| } |
| |
| /* Now tell the user we may have some data. */ |
| if (!sk->dead) { |
| if(sk->debug) |
| printk("Data wakeup.\n"); |
| sk->data_ready(sk,0); |
| } else { |
| DPRINTF((DBG_TCP, "data received on dead socket.\n")); |
| } |
| |
| if (sk->state == TCP_FIN_WAIT2 && |
| sk->acked_seq == sk->fin_seq && sk->rcv_ack_seq == sk->write_seq) { |
| DPRINTF((DBG_TCP, "tcp_data: entering last_ack state sk = %X\n", sk)); |
| |
| /* tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr); */ |
| sk->shutdown = SHUTDOWN_MASK; |
| sk->state = TCP_LAST_ACK; |
| if (!sk->dead) sk->state_change(sk); |
| } |
| |
| return(0); |
| } |
| |
| |
| static void tcp_check_urg(struct sock * sk, struct tcphdr * th) |
| { |
| unsigned long ptr = ntohs(th->urg_ptr); |
| |
| if (ptr) |
| ptr--; |
| ptr += th->seq; |
| |
| /* ignore urgent data that we've already seen and read */ |
| if (after(sk->copied_seq+1, ptr)) |
| return; |
| |
| /* do we already have a newer (or duplicate) urgent pointer? */ |
| if (sk->urg_data && !after(ptr, sk->urg_seq)) |
| return; |
| |
| /* tell the world about our new urgent pointer */ |
| if (sk->proc != 0) { |
| if (sk->proc > 0) { |
| kill_proc(sk->proc, SIGURG, 1); |
| } else { |
| kill_pg(-sk->proc, SIGURG, 1); |
| } |
| } |
| sk->urg_data = URG_NOTYET; |
| sk->urg_seq = ptr; |
| } |
| |
| static inline int tcp_urg(struct sock *sk, struct tcphdr *th, |
| unsigned long saddr, unsigned long len) |
| { |
| unsigned long ptr; |
| |
| /* check if we get a new urgent pointer */ |
| if (th->urg) |
| tcp_check_urg(sk,th); |
| |
| /* do we wait for any urgent data? */ |
| if (sk->urg_data != URG_NOTYET) |
| return 0; |
| |
| /* is the urgent pointer pointing into this packet? */ |
| ptr = sk->urg_seq - th->seq + th->doff*4; |
| if (ptr >= len) |
| return 0; |
| |
| /* ok, got the correct packet, update info */ |
| sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th); |
| if (!sk->dead) |
| wake_up_interruptible(sk->sleep); |
| return 0; |
| } |
| |
| |
| /* This deals with incoming fins. 'Linus at 9 O'clock' 8-) */ |
| static int |
| tcp_fin(struct sock *sk, struct tcphdr *th, |
| unsigned long saddr, struct device *dev) |
| { |
| DPRINTF((DBG_TCP, "tcp_fin(sk=%X, th=%X, saddr=%X, dev=%X)\n", |
| sk, th, saddr, dev)); |
| |
| if (!sk->dead) { |
| sk->state_change(sk); |
| } |
| |
| switch(sk->state) { |
| case TCP_SYN_RECV: |
| case TCP_SYN_SENT: |
| case TCP_ESTABLISHED: |
| /* Contains the one that needs to be acked */ |
| sk->fin_seq = th->seq+1; |
| sk->state = TCP_CLOSE_WAIT; |
| if (th->rst) sk->shutdown = SHUTDOWN_MASK; |
| break; |
| |
| case TCP_CLOSE_WAIT: |
| case TCP_FIN_WAIT2: |
| break; /* we got a retransmit of the fin. */ |
| |
| case TCP_FIN_WAIT1: |
| /* Contains the one that needs to be acked */ |
| sk->fin_seq = th->seq+1; |
| sk->state = TCP_FIN_WAIT2; |
| break; |
| |
| default: |
| case TCP_TIME_WAIT: |
| sk->state = TCP_LAST_ACK; |
| |
| /* Start the timers. */ |
| reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); |
| return(0); |
| } |
| sk->ack_backlog++; |
| |
| return(0); |
| } |
| |
| |
| /* This will accept the next outstanding connection. */ |
| static struct sock * |
| tcp_accept(struct sock *sk, int flags) |
| { |
| struct sock *newsk; |
| struct sk_buff *skb; |
| |
| DPRINTF((DBG_TCP, "tcp_accept(sk=%X, flags=%X, addr=%s)\n", |
| sk, flags, in_ntoa(sk->saddr))); |
| |
| /* |
| * We need to make sure that this socket is listening, |
| * and that it has something pending. |
| */ |
| if (sk->state != TCP_LISTEN) { |
| sk->err = EINVAL; |
| return(NULL); |
| } |
| |
| /* avoid the race. */ |
| cli(); |
| sk->inuse = 1; |
| while((skb = get_firstr(sk)) == NULL) { |
| if (flags & O_NONBLOCK) { |
| sti(); |
| release_sock(sk); |
| sk->err = EAGAIN; |
| return(NULL); |
| } |
| |
| release_sock(sk); |
| interruptible_sleep_on(sk->sleep); |
| if (current->signal & ~current->blocked) { |
| sti(); |
| sk->err = ERESTARTSYS; |
| return(NULL); |
| } |
| sk->inuse = 1; |
| } |
| sti(); |
| |
| /* Now all we need to do is return skb->sk. */ |
| newsk = skb->sk; |
| |
| kfree_skb(skb, FREE_READ); |
| sk->ack_backlog--; |
| release_sock(sk); |
| return(newsk); |
| } |
| |
| |
| /* This will initiate an outgoing connection. */ |
| static int |
| tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len) |
| { |
| struct sk_buff *buff; |
| struct sockaddr_in sin; |
| struct device *dev=NULL; |
| unsigned char *ptr; |
| int tmp; |
| struct tcphdr *t1; |
| int err; |
| |
| if (sk->state != TCP_CLOSE) return(-EISCONN); |
| if (addr_len < 8) return(-EINVAL); |
| |
| err=verify_area(VERIFY_READ, usin, addr_len); |
| if(err) |
| return err; |
| |
| memcpy_fromfs(&sin,usin, min(sizeof(sin), addr_len)); |
| |
| if (sin.sin_family && sin.sin_family != AF_INET) return(-EAFNOSUPPORT); |
| |
| DPRINTF((DBG_TCP, "TCP connect daddr=%s\n", in_ntoa(sin.sin_addr.s_addr))); |
| |
| /* Don't want a TCP connection going to a broadcast address */ |
| if (chk_addr(sin.sin_addr.s_addr) == IS_BROADCAST) { |
| DPRINTF((DBG_TCP, "TCP connection to broadcast address not allowed\n")); |
| return(-ENETUNREACH); |
| } |
| |
| /* Connect back to the same socket: Blows up so disallow it */ |
| if(sk->saddr == sin.sin_addr.s_addr && sk->num==ntohs(sin.sin_port)) |
| return -EBUSY; |
| |
| sk->inuse = 1; |
| sk->daddr = sin.sin_addr.s_addr; |
| sk->write_seq = jiffies * SEQ_TICK - seq_offset; |
| sk->window_seq = sk->write_seq; |
| sk->rcv_ack_seq = sk->write_seq -1; |
| sk->err = 0; |
| sk->dummy_th.dest = sin.sin_port; |
| release_sock(sk); |
| |
| buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL); |
| if (buff == NULL) { |
| return(-ENOMEM); |
| } |
| sk->inuse = 1; |
| buff->mem_addr = buff; |
| buff->mem_len = MAX_SYN_SIZE; |
| buff->len = 24; |
| buff->sk = sk; |
| buff->free = 1; |
| t1 = (struct tcphdr *) buff->data; |
| |
| /* Put in the IP header and routing stuff. */ |
| /* We need to build the routing stuff fromt the things saved in skb. */ |
| tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev, |
| IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl); |
| if (tmp < 0) { |
| sk->prot->wfree(sk, buff->mem_addr, buff->mem_len); |
| release_sock(sk); |
| return(-ENETUNREACH); |
| } |
| buff->len += tmp; |
| t1 = (struct tcphdr *)((char *)t1 +tmp); |
| |
| memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1)); |
| t1->seq = ntohl(sk->write_seq++); |
| sk->sent_seq = sk->write_seq; |
| buff->h.seq = sk->write_seq; |
| t1->ack = 0; |
| t1->window = 2; |
| t1->res1=0; |
| t1->res2=0; |
| t1->rst = 0; |
| t1->urg = 0; |
| t1->psh = 0; |
| t1->syn = 1; |
| t1->urg_ptr = 0; |
| t1->doff = 6; |
| |
| /* use 512 or whatever user asked for */ |
| if (sk->user_mss) |
| sk->mtu = sk->user_mss; |
| else { |
| #ifdef SUBNETSARELOCAL |
| if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr)) |
| #else |
| if ((sk->saddr ^ sk->daddr) & dev->pa_mask) |
| #endif |
| sk->mtu = 576 - HEADER_SIZE; |
| else |
| sk->mtu = MAX_WINDOW; |
| } |
| /* but not bigger than device MTU */ |
| sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE); |
| |
| /* Put in the TCP options to say MTU. */ |
| ptr = (unsigned char *)(t1+1); |
| ptr[0] = 2; |
| ptr[1] = 4; |
| ptr[2] = (sk->mtu) >> 8; |
| ptr[3] = (sk->mtu) & 0xff; |
| tcp_send_check(t1, sk->saddr, sk->daddr, |
| sizeof(struct tcphdr) + 4, sk); |
| |
| /* This must go first otherwise a really quick response will get reset. */ |
| sk->state = TCP_SYN_SENT; |
| sk->rtt = TCP_CONNECT_TIME; |
| reset_timer(sk, TIME_WRITE, TCP_CONNECT_TIME); /* Timer for repeating the SYN until an answer */ |
| sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES; |
| |
| sk->prot->queue_xmit(sk, dev, buff, 0); |
| |
| release_sock(sk); |
| return(0); |
| } |
| |
| |
| /* This functions checks to see if the tcp header is actually acceptable. */ |
| static int |
| tcp_sequence(struct sock *sk, struct tcphdr *th, short len, |
| struct options *opt, unsigned long saddr, struct device *dev) |
| { |
| unsigned long next_seq; |
| |
| next_seq = len - 4*th->doff; |
| if (th->fin) |
| next_seq++; |
| /* if we have a zero window, we can't have any data in the packet.. */ |
| if (next_seq && !sk->window) |
| goto ignore_it; |
| next_seq += th->seq; |
| |
| /* |
| * This isn't quite right. sk->acked_seq could be more recent |
| * than sk->window. This is however close enough. We will accept |
| * slightly more packets than we should, but it should not cause |
| * problems unless someone is trying to forge packets. |
| */ |
| |
| /* have we already seen all of this packet? */ |
| if (!after(next_seq+1, sk->acked_seq)) |
| goto ignore_it; |
| /* or does it start beyond the window? */ |
| if (!before(th->seq, sk->acked_seq + sk->window + 1)) |
| goto ignore_it; |
| |
| /* ok, at least part of this packet would seem interesting.. */ |
| return 1; |
| |
| ignore_it: |
| DPRINTF((DBG_TCP, "tcp_sequence: rejecting packet.\n")); |
| |
| /* |
| * Send a reset if we get something not ours and we are |
| * unsynchronized. Note: We don't do anything to our end. We |
| * are just killing the bogus remote connection then we will |
| * connect again and it will work (with luck). |
| */ |
| |
| if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) { |
| tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl); |
| return 1; |
| } |
| |
| if (th->rst) |
| return 0; |
| |
| /* Try to resync things. */ |
| tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr); |
| return 0; |
| } |
| |
| |
| int |
| tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, |
| unsigned long daddr, unsigned short len, |
| unsigned long saddr, int redo, struct inet_protocol * protocol) |
| { |
| struct tcphdr *th; |
| struct sock *sk; |
| |
| if (!skb) { |
| DPRINTF((DBG_TCP, "tcp.c: tcp_rcv skb = NULL\n")); |
| return(0); |
| } |
| #if 0 /* FIXME: it's ok for protocol to be NULL */ |
| if (!protocol) { |
| DPRINTF((DBG_TCP, "tcp.c: tcp_rcv protocol = NULL\n")); |
| return(0); |
| } |
| |
| if (!opt) { /* FIXME: it's ok for opt to be NULL */ |
| DPRINTF((DBG_TCP, "tcp.c: tcp_rcv opt = NULL\n")); |
| } |
| #endif |
| if (!dev) { |
| DPRINTF((DBG_TCP, "tcp.c: tcp_rcv dev = NULL\n")); |
| return(0); |
| } |
| th = skb->h.th; |
| |
| /* Find the socket. */ |
| sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr); |
| DPRINTF((DBG_TCP, "<<\n")); |
| DPRINTF((DBG_TCP, "len = %d, redo = %d, skb=%X\n", len, redo, skb)); |
| |
| /* If this socket has got a reset its to all intents and purposes |
| really dead */ |
| if (sk!=NULL && sk->zapped) |
| sk=NULL; |
| |
| if (sk) { |
| DPRINTF((DBG_TCP, "sk = %X:\n", sk)); |
| } |
| |
| if (!redo) { |
| if (tcp_check(th, len, saddr, daddr )) { |
| skb->sk = NULL; |
| DPRINTF((DBG_TCP, "packet dropped with bad checksum.\n")); |
| if (inet_debug == DBG_SLIP) printk("\rtcp_rcv: bad checksum\n"); |
| kfree_skb(skb,FREE_READ); |
| /* |
| * We don't release the socket because it was |
| * never marked in use. |
| */ |
| return(0); |
| } |
| |
| th->seq = ntohl(th->seq); |
| |
| /* See if we know about the socket. */ |
| if (sk == NULL) { |
| if (!th->rst) |
| tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255); |
| skb->sk = NULL; |
| kfree_skb(skb, FREE_READ); |
| return(0); |
| } |
| |
| skb->len = len; |
| skb->sk = sk; |
| skb->acked = 0; |
| skb->used = 0; |
| skb->free = 0; |
| skb->saddr = daddr; |
| skb->daddr = saddr; |
| |
| /* We may need to add it to the backlog here. */ |
| cli(); |
| if (sk->inuse) { |
| if (sk->back_log == NULL) { |
| sk->back_log = skb; |
| skb->next = skb; |
| skb->prev = skb; |
| } else { |
| skb->next = sk->back_log; |
| skb->prev = sk->back_log->prev; |
| skb->prev->next = skb; |
| skb->next->prev = skb; |
| } |
| sti(); |
| return(0); |
| } |
| sk->inuse = 1; |
| sti(); |
| } else { |
| if (!sk) { |
| DPRINTF((DBG_TCP, "tcp.c: tcp_rcv bug sk=NULL redo = 1\n")); |
| return(0); |
| } |
| } |
| |
| if (!sk->prot) { |
| DPRINTF((DBG_TCP, "tcp.c: tcp_rcv sk->prot = NULL \n")); |
| return(0); |
| } |
| |
| /* Charge the memory to the socket. */ |
| if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) { |
| skb->sk = NULL; |
| DPRINTF((DBG_TCP, "dropping packet due to lack of buffer space.\n")); |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| } |
| sk->rmem_alloc += skb->mem_len; |
| |
| DPRINTF((DBG_TCP, "About to do switch.\n")); |
| |
| /* Now deal with it. */ |
| switch(sk->state) { |
| /* |
| * This should close the system down if it's waiting |
| * for an ack that is never going to be sent. |
| */ |
| case TCP_LAST_ACK: |
| if (th->rst) { |
| sk->zapped=1; |
| sk->err = ECONNRESET; |
| sk->state = TCP_CLOSE; |
| sk->shutdown = SHUTDOWN_MASK; |
| if (!sk->dead) { |
| sk->state_change(sk); |
| } |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| } |
| |
| case TCP_ESTABLISHED: |
| case TCP_CLOSE_WAIT: |
| case TCP_FIN_WAIT1: |
| case TCP_FIN_WAIT2: |
| case TCP_TIME_WAIT: |
| if (!tcp_sequence(sk, th, len, opt, saddr,dev)) { |
| if (inet_debug == DBG_SLIP) printk("\rtcp_rcv: not in seq\n"); |
| #ifdef undef |
| /* nice idea, but tcp_sequence already does this. Maybe it shouldn't?? */ |
| if(!th->rst) |
| tcp_send_ack(sk->sent_seq, sk->acked_seq, |
| sk, th, saddr); |
| #endif |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| } |
| |
| if (th->rst) { |
| sk->zapped=1; |
| /* This means the thing should really be closed. */ |
| sk->err = ECONNRESET; |
| |
| if (sk->state == TCP_CLOSE_WAIT) { |
| sk->err = EPIPE; |
| } |
| |
| /* |
| * A reset with a fin just means that |
| * the data was not all read. |
| */ |
| sk->state = TCP_CLOSE; |
| sk->shutdown = SHUTDOWN_MASK; |
| if (!sk->dead) { |
| sk->state_change(sk); |
| } |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| } |
| if ( |
| #if 0 |
| if ((opt && (opt->security != 0 || |
| opt->compartment != 0)) || |
| #endif |
| th->syn) { |
| sk->err = ECONNRESET; |
| sk->state = TCP_CLOSE; |
| sk->shutdown = SHUTDOWN_MASK; |
| tcp_reset(daddr, saddr, th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl); |
| if (!sk->dead) { |
| sk->state_change(sk); |
| } |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| } |
| |
| if (th->ack && !tcp_ack(sk, th, saddr, len)) { |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| } |
| |
| if (tcp_urg(sk, th, saddr, len)) { |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| } |
| |
| if (tcp_data(skb, sk, saddr, len)) { |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| } |
| |
| /* Moved: you must do data then fin bit */ |
| if (th->fin && tcp_fin(sk, th, saddr, dev)) { |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| } |
| |
| release_sock(sk); |
| return(0); |
| |
| case TCP_CLOSE: |
| if (sk->dead || sk->daddr) { |
| DPRINTF((DBG_TCP, "packet received for closed,dead socket\n")); |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| } |
| |
| if (!th->rst) { |
| if (!th->ack) |
| th->ack_seq = 0; |
| tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl); |
| } |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| |
| case TCP_LISTEN: |
| if (th->rst) { |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| } |
| if (th->ack) { |
| tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl); |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| } |
| |
| if (th->syn) { |
| #if 0 |
| if (opt->security != 0 || opt->compartment != 0) { |
| tcp_reset(daddr, saddr, th, prot, opt,dev); |
| release_sock(sk); |
| return(0); |
| } |
| #endif |
| |
| /* |
| * Now we just put the whole thing including |
| * the header and saddr, and protocol pointer |
| * into the buffer. We can't respond until the |
| * user tells us to accept the connection. |
| */ |
| tcp_conn_request(sk, skb, daddr, saddr, opt, dev); |
| release_sock(sk); |
| return(0); |
| } |
| |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| |
| default: |
| if (!tcp_sequence(sk, th, len, opt, saddr,dev)) { |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| } |
| |
| case TCP_SYN_SENT: |
| if (th->rst) { |
| sk->err = ECONNREFUSED; |
| sk->state = TCP_CLOSE; |
| sk->shutdown = SHUTDOWN_MASK; |
| sk->zapped = 1; |
| if (!sk->dead) { |
| sk->state_change(sk); |
| } |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| } |
| #if 0 |
| if (opt->security != 0 || opt->compartment != 0) { |
| sk->err = ECONNRESET; |
| sk->state = TCP_CLOSE; |
| sk->shutdown = SHUTDOWN_MASK; |
| tcp_reset(daddr, saddr, th, sk->prot, opt, dev); |
| if (!sk->dead) { |
| wake_up_interruptible(sk->sleep); |
| } |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| } |
| #endif |
| if (!th->ack) { |
| if (th->syn) { |
| sk->state = TCP_SYN_RECV; |
| } |
| |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| } |
| |
| switch(sk->state) { |
| case TCP_SYN_SENT: |
| if (!tcp_ack(sk, th, saddr, len)) { |
| tcp_reset(daddr, saddr, th, |
| sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl); |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| } |
| |
| /* |
| * If the syn bit is also set, switch to |
| * tcp_syn_recv, and then to established. |
| */ |
| if (!th->syn) { |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| } |
| |
| /* Ack the syn and fall through. */ |
| sk->acked_seq = th->seq+1; |
| sk->fin_seq = th->seq; |
| tcp_send_ack(sk->sent_seq, th->seq+1, |
| sk, th, sk->daddr); |
| |
| case TCP_SYN_RECV: |
| if (!tcp_ack(sk, th, saddr, len)) { |
| tcp_reset(daddr, saddr, th, |
| sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl); |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| } |
| sk->state = TCP_ESTABLISHED; |
| |
| /* |
| * Now we need to finish filling out |
| * some of the tcp header. |
| */ |
| /* We need to check for mtu info. */ |
| tcp_options(sk, th); |
| sk->dummy_th.dest = th->source; |
| sk->copied_seq = sk->acked_seq-1; |
| if (!sk->dead) { |
| sk->state_change(sk); |
| } |
| |
| /* |
| * We've already processed his first |
| * ack. In just about all cases that |
| * will have set max_window. This is |
| * to protect us against the possibility |
| * that the initial window he sent was 0. |
| * This must occur after tcp_options, which |
| * sets sk->mtu. |
| */ |
| if (sk->max_window == 0) { |
| sk->max_window = 32; |
| sk->mss = min(sk->max_window, sk->mtu); |
| } |
| |
| /* |
| * Now process the rest like we were |
| * already in the established state. |
| */ |
| if (th->urg) { |
| if (tcp_urg(sk, th, saddr, len)) { |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| } |
| } |
| if (tcp_data(skb, sk, saddr, len)) |
| kfree_skb(skb, FREE_READ); |
| |
| if (th->fin) tcp_fin(sk, th, saddr, dev); |
| release_sock(sk); |
| return(0); |
| } |
| |
| if (th->urg) { |
| if (tcp_urg(sk, th, saddr, len)) { |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| } |
| } |
| |
| if (tcp_data(skb, sk, saddr, len)) { |
| kfree_skb(skb, FREE_READ); |
| release_sock(sk); |
| return(0); |
| } |
| |
| if (!th->fin) { |
| release_sock(sk); |
| return(0); |
| } |
| tcp_fin(sk, th, saddr, dev); |
| release_sock(sk); |
| return(0); |
| } |
| } |
| |
| |
| /* |
| * This routine sends a packet with an out of date sequence |
| * number. It assumes the other end will try to ack it. |
| */ |
| static void |
| tcp_write_wakeup(struct sock *sk) |
| { |
| struct sk_buff *buff; |
| struct tcphdr *t1; |
| struct device *dev=NULL; |
| int tmp; |
| |
| if (sk->zapped) |
| return; /* Afer a valid reset we can send no more */ |
| |
| if (sk -> state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT && |
| sk -> state != TCP_FIN_WAIT1 && sk->state != TCP_FIN_WAIT2) |
| return; |
| |
| buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC); |
| if (buff == NULL) return; |
| |
| buff->mem_addr = buff; |
| buff->mem_len = MAX_ACK_SIZE; |
| buff->len = sizeof(struct tcphdr); |
| buff->free = 1; |
| buff->sk = sk; |
| DPRINTF((DBG_TCP, "in tcp_write_wakeup\n")); |
| t1 = (struct tcphdr *) buff->data; |
| |
| /* Put in the IP header and routing stuff. */ |
| tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev, |
| IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl); |
| if (tmp < 0) { |
| sk->prot->wfree(sk, buff->mem_addr, buff->mem_len); |
| return; |
| } |
| |
| buff->len += tmp; |
| t1 = (struct tcphdr *)((char *)t1 +tmp); |
| |
| memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1)); |
| |
| /* |
| * Use a previous sequence. |
| * This should cause the other end to send an ack. |
| */ |
| t1->seq = htonl(sk->sent_seq-1); |
| t1->ack = 1; |
| t1->res1= 0; |
| t1->res2= 0; |
| t1->rst = 0; |
| t1->urg = 0; |
| t1->psh = 0; |
| t1->fin = 0; |
| t1->syn = 0; |
| t1->ack_seq = ntohl(sk->acked_seq); |
| t1->window = ntohs(tcp_select_window(sk)/*sk->prot->rspace(sk)*/); |
| t1->doff = sizeof(*t1)/4; |
| tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk); |
| |
| /* Send it and free it. |
| * This will prevent the timer from automatically being restarted. |
| */ |
| sk->prot->queue_xmit(sk, dev, buff, 1); |
| } |
| |
| void |
| tcp_send_probe0(struct sock *sk) |
| { |
| if (sk->zapped) |
| return; /* Afer a valid reset we can send no more */ |
| |
| tcp_write_wakeup(sk); |
| |
| sk->backoff++; |
| sk->rto = min(sk->rto << 1, 120*HZ); |
| reset_timer (sk, TIME_PROBE0, sk->rto); |
| sk->retransmits++; |
| sk->prot->retransmits ++; |
| } |
| |
| |
| /* |
| * Socket option code for TCP. |
| */ |
| int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen) |
| { |
| int val,err; |
| |
| if(level!=SOL_TCP) |
| return ip_setsockopt(sk,level,optname,optval,optlen); |
| |
| if (optval == NULL) |
| return(-EINVAL); |
| |
| err=verify_area(VERIFY_READ, optval, sizeof(int)); |
| if(err) |
| return err; |
| |
| val = get_fs_long((unsigned long *)optval); |
| |
| switch(optname) |
| { |
| case TCP_MAXSEG: |
| /* if(val<200||val>2048 || val>sk->mtu) */ |
| /* |
| * values greater than interface MTU won't take effect. however at |
| * the point when this call is done we typically don't yet know |
| * which interface is going to be used |
| */ |
| if(val<1||val>MAX_WINDOW) |
| return -EINVAL; |
| sk->user_mss=val; |
| return 0; |
| case TCP_NODELAY: |
| sk->nonagle=(val==0)?0:1; |
| return 0; |
| default: |
| return(-ENOPROTOOPT); |
| } |
| } |
| |
| int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen) |
| { |
| int val,err; |
| |
| if(level!=SOL_TCP) |
| return ip_getsockopt(sk,level,optname,optval,optlen); |
| |
| switch(optname) |
| { |
| case TCP_MAXSEG: |
| val=sk->user_mss; |
| break; |
| case TCP_NODELAY: |
| val=sk->nonagle; /* Until Johannes stuff is in */ |
| break; |
| default: |
| return(-ENOPROTOOPT); |
| } |
| err=verify_area(VERIFY_WRITE, optlen, sizeof(int)); |
| if(err) |
| return err; |
| put_fs_long(sizeof(int),(unsigned long *) optlen); |
| |
| err=verify_area(VERIFY_WRITE, optval, sizeof(int)); |
| if(err) |
| return err; |
| put_fs_long(val,(unsigned long *)optval); |
| |
| return(0); |
| } |
| |
| |
| struct proto tcp_prot = { |
| sock_wmalloc, |
| sock_rmalloc, |
| sock_wfree, |
| sock_rfree, |
| sock_rspace, |
| sock_wspace, |
| tcp_close, |
| tcp_read, |
| tcp_write, |
| tcp_sendto, |
| tcp_recvfrom, |
| ip_build_header, |
| tcp_connect, |
| tcp_accept, |
| ip_queue_xmit, |
| tcp_retransmit, |
| tcp_write_wakeup, |
| tcp_read_wakeup, |
| tcp_rcv, |
| tcp_select, |
| tcp_ioctl, |
| NULL, |
| tcp_shutdown, |
| tcp_setsockopt, |
| tcp_getsockopt, |
| 128, |
| 0, |
| {NULL,}, |
| "TCP" |
| }; |