1: /* 2: * Copyright (c) 1982, 1986 Regents of the University of California. 3: * All rights reserved. 4: * 5: * Redistribution and use in source and binary forms are permitted 6: * provided that this notice is preserved and that due credit is given 7: * to the University of California at Berkeley. The name of the University 8: * may not be used to endorse or promote products derived from this 9: * software without specific prior written permission. This software 10: * is provided ``as is'' without express or implied warranty. 11: * 12: * @(#)tcp_output.c 7.13.1.4 (Berkeley) 1995/10/10 13: */ 14: 15: #include "param.h" 16: #include "systm.h" 17: #include "mbuf.h" 18: #include "protosw.h" 19: #include "socket.h" 20: #include "socketvar.h" 21: #include "errno.h" 22: 23: #include "../net/route.h" 24: 25: #include "domain.h" 26: #include "in.h" 27: #include "in_pcb.h" 28: #include "in_systm.h" 29: #include "ip.h" 30: #include "ip_var.h" 31: #include "tcp.h" 32: #define TCPOUTFLAGS 33: #include "tcp_fsm.h" 34: #include "tcp_seq.h" 35: #include "tcp_timer.h" 36: #include "tcp_var.h" 37: #include "tcpip.h" 38: #include "tcp_debug.h" 39: 40: /* 41: * Initial options. 42: */ 43: u_char tcp_initopt[4] = { TCPOPT_MAXSEG, 4, 0x0, 0x0, }; 44: 45: /* 46: * Tcp output routine: figure out what should be sent and send it. 47: */ 48: tcp_output(tp) 49: register struct tcpcb *tp; 50: { 51: register struct socket *so = tp->t_inpcb->inp_socket; 52: register int len, win; 53: struct mbuf *m0; 54: int off, flags, error; 55: register struct mbuf *m; 56: register struct tcpiphdr *ti; 57: u_char *opt; 58: unsigned optlen = 0; 59: int idle, sendalot; 60: 61: /* 62: * Determine length of data that should be transmitted, 63: * and flags that will be used. 64: * If there is some data or critical controls (SYN, RST) 65: * to send, then transmit; otherwise, investigate further. 66: */ 67: idle = (tp->snd_max == tp->snd_una); 68: again: 69: sendalot = 0; 70: off = tp->snd_nxt - tp->snd_una; 71: win = MIN(tp->snd_wnd, tp->snd_cwnd); 72: 73: /* 74: * If in persist timeout with window of 0, send 1 byte. 75: * Otherwise, if window is small but nonzero 76: * and timer expired, we will send what we can 77: * and go to transmit state. 78: */ 79: if (tp->t_force) { 80: if (win == 0) 81: win = 1; 82: else { 83: tp->t_timer[TCPT_PERSIST] = 0; 84: tp->t_rxtshift = 0; 85: } 86: } 87: 88: len = MIN(so->so_snd.sb_cc, win) - off; 89: flags = tcp_outflags[tp->t_state]; 90: 91: if (len < 0) { 92: /* 93: * If FIN has been sent but not acked, 94: * but we haven't been called to retransmit, 95: * len will be -1. Otherwise, window shrank 96: * after we sent into it. If window shrank to 0, 97: * cancel pending retransmit and pull snd_nxt 98: * back to (closed) window. We will enter persist 99: * state below. If the window didn't close completely, 100: * just wait for an ACK. 101: */ 102: len = 0; 103: if (win == 0) { 104: tp->t_timer[TCPT_REXMT] = 0; 105: tp->snd_nxt = tp->snd_una; 106: } 107: } 108: if (len > tp->t_maxseg) { 109: len = tp->t_maxseg; 110: sendalot = 1; 111: } 112: if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) 113: flags &= ~TH_FIN; 114: win = sbspace(&so->so_rcv); 115: 116: 117: /* 118: * If our state indicates that FIN should be sent 119: * and we have not yet done so, or we're retransmitting the FIN, 120: * then we need to send. 121: */ 122: if (flags & TH_FIN && 123: ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una)) 124: goto send; 125: /* 126: * Send if we owe peer an ACK. 127: */ 128: if (tp->t_flags & TF_ACKNOW) 129: goto send; 130: if (flags & (TH_SYN|TH_RST)) 131: goto send; 132: if (SEQ_GT(tp->snd_up, tp->snd_una)) 133: goto send; 134: 135: /* 136: * Sender silly window avoidance. If connection is idle 137: * and can send all data, a maximum segment, 138: * at least a maximum default-size segment do it, 139: * or are forced, do it; otherwise don't bother. 140: * If peer's buffer is tiny, then send 141: * when window is at least half open. 142: * If retransmitting (possibly after persist timer forced us 143: * to send into a small window), then must resend. 144: */ 145: if (len) { 146: if (len == tp->t_maxseg) 147: goto send; 148: if ((idle || tp->t_flags & TF_NODELAY) && 149: len + off >= so->so_snd.sb_cc) 150: goto send; 151: if (tp->t_force) 152: goto send; 153: if (len >= tp->max_sndwnd / 2) 154: goto send; 155: if (SEQ_LT(tp->snd_nxt, tp->snd_max)) 156: goto send; 157: } 158: 159: /* 160: * Compare available window to amount of window 161: * known to peer (as advertised window less 162: * next expected input). If the difference is at least two 163: * max size segments or at least 35% of the maximum possible 164: * window, then want to send a window update to peer. 165: */ 166: if (win > 0) { 167: int adv = win - (tp->rcv_adv - tp->rcv_nxt); 168: 169: if (so->so_rcv.sb_cc == 0 && adv >= 2 * tp->t_maxseg) 170: goto send; 171: if (100 * adv / so->so_rcv.sb_hiwat >= 35) 172: goto send; 173: } 174: 175: /* 176: * TCP window updates are not reliable, rather a polling protocol 177: * using ``persist'' packets is used to insure receipt of window 178: * updates. The three ``states'' for the output side are: 179: * idle not doing retransmits or persists 180: * persisting to move a small or zero window 181: * (re)transmitting and thereby not persisting 182: * 183: * tp->t_timer[TCPT_PERSIST] 184: * is set when we are in persist state. 185: * tp->t_force 186: * is set when we are called to send a persist packet. 187: * tp->t_timer[TCPT_REXMT] 188: * is set when we are retransmitting 189: * The output side is idle when both timers are zero. 190: * 191: * If send window is too small, there is data to transmit, and no 192: * retransmit or persist is pending, then go to persist state. 193: * If nothing happens soon, send when timer expires: 194: * if window is nonzero, transmit what we can, 195: * otherwise force out a byte. 196: */ 197: if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 && 198: tp->t_timer[TCPT_PERSIST] == 0) { 199: tp->t_rxtshift = 0; 200: tcp_setpersist(tp); 201: } 202: 203: /* 204: * No reason to send a segment, just return. 205: */ 206: return (0); 207: 208: send: 209: /* 210: * Grab a header mbuf, attaching a copy of data to 211: * be transmitted, and initialize the header from 212: * the template for sends on this connection. 213: */ 214: MGET(m, M_DONTWAIT, MT_HEADER); 215: if (m == NULL) 216: return (ENOBUFS); 217: m->m_off = MMAXOFF - sizeof (struct tcpiphdr); 218: m->m_len = sizeof (struct tcpiphdr); 219: if (len) { 220: if (tp->t_force && len == 1) 221: tcpstat.tcps_sndprobe++; 222: else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) { 223: tcpstat.tcps_sndrexmitpack++; 224: tcpstat.tcps_sndrexmitbyte += len; 225: } else { 226: tcpstat.tcps_sndpack++; 227: tcpstat.tcps_sndbyte += len; 228: } 229: m->m_next = m_copy(so->so_snd.sb_mb, off, len); 230: if (m->m_next == 0) 231: len = 0; 232: } else if (tp->t_flags & TF_ACKNOW) 233: tcpstat.tcps_sndacks++; 234: else if (flags & (TH_SYN|TH_FIN|TH_RST)) 235: tcpstat.tcps_sndctrl++; 236: else if (SEQ_GT(tp->snd_up, tp->snd_una)) 237: tcpstat.tcps_sndurg++; 238: else 239: tcpstat.tcps_sndwinup++; 240: 241: ti = mtod(m, struct tcpiphdr *); 242: if (tp->t_template == 0) 243: panic("tcp_output"); 244: bcopy((caddr_t)tp->t_template, (caddr_t)ti, sizeof (struct tcpiphdr)); 245: 246: /* 247: * Fill in fields, remembering maximum advertised 248: * window for use in delaying messages about window sizes. 249: * If resending a FIN, be sure not to use a new sequence number. 250: */ 251: if (flags & TH_FIN && tp->t_flags & TF_SENTFIN && 252: tp->snd_nxt == tp->snd_max) 253: tp->snd_nxt--; 254: ti->ti_seq = htonl(tp->snd_nxt); 255: ti->ti_ack = htonl(tp->rcv_nxt); 256: /* 257: * Before ESTABLISHED, force sending of initial options 258: * unless TCP set to not do any options. 259: */ 260: opt = NULL; 261: if (flags & TH_SYN && (tp->t_flags & TF_NOOPT) == 0) { 262: u_short mss; 263: 264: mss = MIN(so->so_rcv.sb_hiwat / 2, tcp_mss(tp)); 265: if (mss > IP_MSS - sizeof(struct tcpiphdr)) { 266: opt = tcp_initopt; 267: optlen = sizeof (tcp_initopt); 268: *(u_short *)(opt + 2) = htons(mss); 269: } 270: } 271: if (opt) { 272: m0 = m->m_next; 273: m->m_next = m_get(M_DONTWAIT, MT_DATA); 274: if (m->m_next == 0) { 275: (void) m_free(m); 276: m_freem(m0); 277: return (ENOBUFS); 278: } 279: m->m_next->m_next = m0; 280: m0 = m->m_next; 281: m0->m_len = optlen; 282: bcopy((caddr_t)opt, mtod(m0, caddr_t), optlen); 283: opt = (u_char *)(mtod(m0, caddr_t) + optlen); 284: while (m0->m_len & 0x3) { 285: *opt++ = TCPOPT_EOL; 286: m0->m_len++; 287: } 288: optlen = m0->m_len; 289: ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2; 290: } 291: ti->ti_flags = flags; 292: /* 293: * Calculate receive window. Don't shrink window, 294: * but avoid silly window syndrome. 295: */ 296: if (win < (so->so_rcv.sb_hiwat / 4) && win < tp->t_maxseg) 297: win = 0; 298: if (win < (int)(tp->rcv_adv - tp->rcv_nxt)) 299: win = (int)(tp->rcv_adv - tp->rcv_nxt); 300: if (win > IP_MAXPACKET) 301: win = IP_MAXPACKET; 302: ti->ti_win = htons((u_short)win); 303: if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { 304: ti->ti_urp = htons((u_short)(tp->snd_up - tp->snd_nxt)); 305: ti->ti_flags |= TH_URG; 306: } else 307: /* 308: * If no urgent pointer to send, then we pull 309: * the urgent pointer to the left edge of the send window 310: * so that it doesn't drift into the send window on sequence 311: * number wraparound. 312: */ 313: tp->snd_up = tp->snd_una; /* drag it along */ 314: /* 315: * If anything to send and we can send it all, set PUSH. 316: * (This will keep happy those implementations which only 317: * give data to the user when a buffer fills or a PUSH comes in.) 318: */ 319: if (len && off+len == so->so_snd.sb_cc) 320: ti->ti_flags |= TH_PUSH; 321: 322: /* 323: * Put TCP length in extended header, and then 324: * checksum extended header and data. 325: */ 326: if (len + optlen) 327: ti->ti_len = htons((u_short)(sizeof(struct tcphdr) + 328: optlen + len)); 329: ti->ti_sum = in_cksum(m, sizeof (struct tcpiphdr) + (int)optlen + len); 330: 331: /* 332: * In transmit state, time the transmission and arrange for 333: * the retransmit. In persist state, just set snd_max. 334: */ 335: if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) { 336: tcp_seq startseq = tp->snd_nxt; 337: 338: /* 339: * Advance snd_nxt over sequence space of this segment. 340: */ 341: if (flags & TH_SYN) 342: tp->snd_nxt++; 343: if (flags & TH_FIN) { 344: tp->snd_nxt++; 345: tp->t_flags |= TF_SENTFIN; 346: } 347: tp->snd_nxt += len; 348: if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { 349: tp->snd_max = tp->snd_nxt; 350: /* 351: * Time this transmission if not a retransmission and 352: * not currently timing anything. 353: */ 354: if (tp->t_rtt == 0) { 355: tp->t_rtt = 1; 356: tp->t_rtseq = startseq; 357: tcpstat.tcps_segstimed++; 358: } 359: } 360: 361: /* 362: * Set retransmit timer if not currently set, 363: * and not doing an ack or a keep-alive probe. 364: * Initial value for retransmit timer is smoothed 365: * round-trip time + 2 * round-trip time variance. 366: * Initialize shift counter which is used for backoff 367: * of retransmit time. 368: */ 369: if (tp->t_timer[TCPT_REXMT] == 0 && 370: tp->snd_nxt != tp->snd_una) { 371: tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; 372: if (tp->t_timer[TCPT_PERSIST]) { 373: tp->t_timer[TCPT_PERSIST] = 0; 374: tp->t_rxtshift = 0; 375: } 376: } 377: } else 378: if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) 379: tp->snd_max = tp->snd_nxt + len; 380: 381: /* 382: * Trace. 383: */ 384: if (so->so_options & SO_DEBUG) 385: tcp_trace(TA_OUTPUT, tp->t_state, tp, ti, 0); 386: 387: /* 388: * Fill in IP length and desired time to live and 389: * send to IP level. 390: */ 391: ((struct ip *)ti)->ip_len = sizeof (struct tcpiphdr) + optlen + len; 392: ((struct ip *)ti)->ip_ttl = ip_defttl; /* XXX */ 393: #if BSD>=43 394: error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route, 395: so->so_options & SO_DONTROUTE); 396: #else 397: error = ip_output(m, (struct mbuf *)0, &tp->t_inpcb->inp_route, 398: so->so_options & SO_DONTROUTE); 399: #endif 400: if (error) { 401: if (error == ENOBUFS) { 402: tcp_quench(tp->t_inpcb); 403: return (0); 404: } 405: return (error); 406: } 407: tcpstat.tcps_sndtotal++; 408: 409: /* 410: * Data sent (as far as we can tell). 411: * If this advertises a larger window than any other segment, 412: * then remember the size of the advertised window. 413: * Any pending ACK has now been sent. 414: */ 415: if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv)) 416: tp->rcv_adv = tp->rcv_nxt + win; 417: tp->t_flags &= ~(TF_ACKNOW|TF_DELACK); 418: if (sendalot) 419: goto again; 420: return (0); 421: } 422: 423: tcp_setpersist(tp) 424: register struct tcpcb *tp; 425: { 426: register t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; 427: 428: if (tp->t_timer[TCPT_REXMT]) 429: panic("tcp_output REXMT"); 430: /* 431: * Start/restart persistance timer. 432: */ 433: TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], 434: t * tcp_backoff[tp->t_rxtshift], 435: TCPTV_PERSMIN, TCPTV_PERSMAX); 436: if (tp->t_rxtshift < TCP_MAXRXTSHIFT) 437: tp->t_rxtshift++; 438: }