ns2--tcp-full.cc
1 /* -*- Mode:C++; c-basic-offset:8; tab-8; indent-tabs-mode:t -*- */ 2 3 /* 4 * Copyright (c) Intel Corporation 2001. All rights reserved. 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 /* 19 * Copyright (c) 1997, 1998 The Regents of the University of California. 20 * All rights reserved. 21 * 22 * Redistribution and use in source and binary forms, with or without 23 * modification, are permitted provided that the following conditions 24 * are met: 25 * 1. Redistributions of source code must retain the above copyright 26 * notice, this list of conditions and the following disclaimer. 27 * 2. Redistributions in binary form must reproduce the above copyright 28 * notice, this list of conditions and the following disclaimer in the 29 * documentation and/or other materials provided with the distribution. 30 * 3. All advertising materials mentioning features or use of this software 31 * must display the following acknowledgement: 32 * This product includes software developed by the Network Research 33 * Group at Lawrence Berkeley National Laboratory. 34 * 4. Neither the name of the University nor of the Laboratory may be used 35 * to endorse or promote products derived from this software without 36 * specific prior written permission. 37 * 38 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 39 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 41 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 42 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 43 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 44 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 45 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 46 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 47 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 48 * SUCH DAMAGE. 49 */ 50 51 /* 52 * 53 * Full-TCP : A two-way TCP very similar to the 4.4BSD version of Reno TCP. 54 * This version also includes variants Tahoe, NewReno, and SACK. 55 * 56 * This code below has received a fairly major restructuring (Aug. 2001). 57 * The ReassemblyQueue structure is now removed to a separate module and 58 * entirely re-written. 59 * Also, the SACK functionality has been re-written (almost) entirely. 60 * -KF [kfall@intel.com] 61 * 62 * This code below was motivated in part by code contributed by 63 * Kathie Nichols (nichols@baynetworks.com). The code below is based primarily 64 * on the 4.4BSD TCP implementation. -KF [kfall@ee.lbl.gov] 65 * 66 * Kathie Nichols and Van Jacobson have contributed significant bug fixes, 67 * especially with respect to the the handling of sequence numbers during 68 * connection establishment/clearin. Additional fixes have followed 69 * theirs. 70 * 71 * Fixes for gensack() and ReassemblyQueue::add() contributed by Richard 72 * Mortier <Richard.Mortier@cl.cam.ac.uk> 73 * 74 * Some warnings and comments: 75 * this version of TCP will not work correctly if the sequence number 76 * goes above 2147483648 due to sequence number wrap 77 * 78 * this version of TCP by default sends data at the beginning of a 79 * connection in the "typical" way... That is, 80 * A ------> SYN ------> B 81 * A <----- SYN+ACK ---- B 82 * A ------> ACK ------> B 83 * A ------> data -----> B 84 * 85 * there is no dynamic receiver's advertised window. The advertised 86 * window is simulated by simply telling the sender a bound on the window 87 * size (wnd_). 88 * 89 * in real TCP, a user process performing a read (via PRU_RCVD) 90 * calls tcp_output each time to (possibly) send a window 91 * update. Here we don't have a user process, so we simulate 92 * a user process always ready to consume all the receive buffer 93 * 94 * Notes: 95 * wnd_, wnd_init_, cwnd_, ssthresh_ are in segment units 96 * sequence and ack numbers are in byte units 97 * 98 * Futures: 99 * there are different existing TCPs with respect to how 100 * ack's are handled on connection startup. Some delay 101 * the ack for the first segment, which can cause connections 102 * to take longer to start up than if we be sure to ack it quickly. 103 * 104 * some TCPs arrange for immediate ACK generation if the incoming segment 105 * contains the PUSH bit 106 * 107 * 108 */ 109 110 #ifndef lint 111 static const char rcsid[] = 112 "@(#) $Header: /cvsroot/nsnam/ns-2/tcp/tcp-full.cc,v 1.130 2010/03/08 05:54:54 tom_henderson Exp $ (LBL)"; 113 #endif 114 115 #include "ip.h" 116 #include "tcp-full.h" 117 #include "flags.h" 118 #include "random.h" 119 #include "template.h" 120 121 #ifndef TRUE 122 #define TRUE 1 123 #endif 124 125 #ifndef FALSE 126 #define FALSE 0 127 #endif 128 129 /* 130 * Tcl Linkage for the following: 131 * Agent/TCP/FullTcp, Agent/TCP/FullTcp/Tahoe, 132 * Agent/TCP/FullTcp/Newreno, Agent/TCP/FullTcp/Sack 133 * 134 * See tcl/lib/ns-default.tcl for init methods for 135 * Tahoe, Newreno, and Sack 136 */ 137 138 static class FullTcpClass : public TclClass { 139 public: 140 FullTcpClass() : TclClass("Agent/TCP/FullTcp") {} 141 TclObject* create(int, const char*const*) { 142 return (new FullTcpAgent()); 143 } 144 } class_full; 145 146 static class TahoeFullTcpClass : public TclClass { 147 public: 148 TahoeFullTcpClass() : TclClass("Agent/TCP/FullTcp/Tahoe") {} 149 TclObject* create(int, const char*const*) { 150 // ns-default sets reno_fastrecov_ to false 151 return (new TahoeFullTcpAgent()); 152 } 153 } class_tahoe_full; 154 155 static class NewRenoFullTcpClass : public TclClass { 156 public: 157 NewRenoFullTcpClass() : TclClass("Agent/TCP/FullTcp/Newreno") {} 158 TclObject* create(int, const char*const*) { 159 // ns-default sets open_cwnd_on_pack_ to false 160 return (new NewRenoFullTcpAgent()); 161 } 162 } class_newreno_full; 163 164 static class SackFullTcpClass : public TclClass { 165 public: 166 SackFullTcpClass() : TclClass("Agent/TCP/FullTcp/Sack") {} 167 TclObject* create(int, const char*const*) { 168 // ns-default sets reno_fastrecov_ to false 169 // ns-default sets open_cwnd_on_pack_ to false 170 return (new SackFullTcpAgent()); 171 } 172 } class_sack_full; 173 174 /* 175 * Delayed-binding variable linkage 176 */ 177 178 void 179 FullTcpAgent::delay_bind_init_all() 180 { 181 delay_bind_init_one("segsperack_"); 182 delay_bind_init_one("segsize_"); 183 delay_bind_init_one("tcprexmtthresh_"); 184 delay_bind_init_one("iss_"); 185 delay_bind_init_one("nodelay_"); 186 delay_bind_init_one("data_on_syn_"); 187 delay_bind_init_one("dupseg_fix_"); 188 delay_bind_init_one("dupack_reset_"); 189 delay_bind_init_one("close_on_empty_"); 190 delay_bind_init_one("signal_on_empty_"); 191 delay_bind_init_one("interval_"); 192 delay_bind_init_one("ts_option_size_"); 193 delay_bind_init_one("reno_fastrecov_"); 194 delay_bind_init_one("pipectrl_"); 195 delay_bind_init_one("open_cwnd_on_pack_"); 196 delay_bind_init_one("halfclose_"); 197 delay_bind_init_one("nopredict_"); 198 delay_bind_init_one("ecn_syn_"); 199 delay_bind_init_one("ecn_syn_wait_"); 200 delay_bind_init_one("debug_"); 201 delay_bind_init_one("spa_thresh_"); 202 203 TcpAgent::delay_bind_init_all(); 204 205 reset(); 206 } 207 208 int 209 FullTcpAgent::delay_bind_dispatch(const char *varName, const char *localName, TclObject *tracer) 210 { 211 if (delay_bind(varName, localName, "segsperack_", &segs_per_ack_, tracer)) return TCL_OK; 212 if (delay_bind(varName, localName, "segsize_", &maxseg_, tracer)) return TCL_OK; 213 if (delay_bind(varName, localName, "tcprexmtthresh_", &tcprexmtthresh_, tracer)) return TCL_OK; 214 if (delay_bind(varName, localName, "iss_", &iss_, tracer)) return TCL_OK; 215 if (delay_bind(varName, localName, "spa_thresh_", &spa_thresh_, tracer)) return TCL_OK; 216 if (delay_bind_bool(varName, localName, "nodelay_", &nodelay_, tracer)) return TCL_OK; 217 if (delay_bind_bool(varName, localName, "data_on_syn_", &data_on_syn_, tracer)) return TCL_OK; 218 if (delay_bind_bool(varName, localName, "dupseg_fix_", &dupseg_fix_, tracer)) return TCL_OK; 219 if (delay_bind_bool(varName, localName, "dupack_reset_", &dupack_reset_, tracer)) return TCL_OK; 220 if (delay_bind_bool(varName, localName, "close_on_empty_", &close_on_empty_, tracer)) return TCL_OK; 221 if (delay_bind_bool(varName, localName, "signal_on_empty_", &signal_on_empty_, tracer)) return TCL_OK; 222 if (delay_bind_time(varName, localName, "interval_", &delack_interval_, tracer)) return TCL_OK; 223 if (delay_bind(varName, localName, "ts_option_size_", &ts_option_size_, tracer)) return TCL_OK; 224 if (delay_bind_bool(varName, localName, "reno_fastrecov_", &reno_fastrecov_, tracer)) return TCL_OK; 225 if (delay_bind_bool(varName, localName, "pipectrl_", &pipectrl_, tracer)) return TCL_OK; 226 if (delay_bind_bool(varName, localName, "open_cwnd_on_pack_", &open_cwnd_on_pack_, tracer)) return TCL_OK; 227 if (delay_bind_bool(varName, localName, "halfclose_", &halfclose_, tracer)) return TCL_OK; 228 if (delay_bind_bool(varName, localName, "nopredict_", &nopredict_, tracer)) return TCL_OK; 229 if (delay_bind_bool(varName, localName, "ecn_syn_", &ecn_syn_, tracer)) return TCL_OK; 230 if (delay_bind(varName, localName, "ecn_syn_wait_", &ecn_syn_wait_, tracer)) return TCL_OK; 231 if (delay_bind_bool(varName, localName, "debug_", &debug_, tracer)) return TCL_OK; 232 233 return TcpAgent::delay_bind_dispatch(varName, localName, tracer); 234 } 235 236 void 237 SackFullTcpAgent::delay_bind_init_all() 238 { 239 delay_bind_init_one("clear_on_timeout_"); 240 delay_bind_init_one("sack_rtx_cthresh_"); 241 delay_bind_init_one("sack_rtx_bthresh_"); 242 delay_bind_init_one("sack_block_size_"); 243 delay_bind_init_one("sack_option_size_"); 244 delay_bind_init_one("max_sack_blocks_"); 245 delay_bind_init_one("sack_rtx_threshmode_"); 246 FullTcpAgent::delay_bind_init_all(); 247 } 248 249 int 250 SackFullTcpAgent::delay_bind_dispatch(const char *varName, const char *localName, TclObject *tracer) 251 { 252 if (delay_bind_bool(varName, localName, "clear_on_timeout_", &clear_on_timeout_, tracer)) return TCL_OK; 253 if (delay_bind(varName, localName, "sack_rtx_cthresh_", &sack_rtx_cthresh_, tracer)) return TCL_OK; 254 if (delay_bind(varName, localName, "sack_rtx_bthresh_", &sack_rtx_bthresh_, tracer)) return TCL_OK; 255 if (delay_bind(varName, localName, "sack_rtx_threshmode_", &sack_rtx_threshmode_, tracer)) return TCL_OK; 256 if (delay_bind(varName, localName, "sack_block_size_", &sack_block_size_, tracer)) return TCL_OK; 257 if (delay_bind(varName, localName, "sack_option_size_", &sack_option_size_, tracer)) return TCL_OK; 258 if (delay_bind(varName, localName, "max_sack_blocks_", &max_sack_blocks_, tracer)) return TCL_OK; 259 return FullTcpAgent::delay_bind_dispatch(varName, localName, tracer); 260 } 261 262 int 263 FullTcpAgent::command(int argc, const char*const* argv) 264 { 265 // would like to have some "connect" primitive 266 // here, but the problem is that we get called before 267 // the simulation is running and we want to send a SYN. 268 // Because no routing exists yet, this fails. 269 // Instead, see code in advance(). 270 // 271 // listen can happen any time because it just changes state_ 272 // 273 // close is designed to happen at some point after the 274 // simulation is running (using an ns 'at' command) 275 276 if (argc == 2) { 277 if (strcmp(argv[1], "listen") == 0) { 278 // just a state transition 279 listen(); 280 return (TCL_OK); 281 } 282 if (strcmp(argv[1], "close") == 0) { 283 usrclosed(); 284 return (TCL_OK); 285 } 286 } 287 if (argc == 3) { 288 if (strcmp(argv[1], "advance") == 0) { 289 advanceby(atoi(argv[2])); 290 return (TCL_OK); 291 } 292 if (strcmp(argv[1], "advanceby") == 0) { 293 advanceby(atoi(argv[2])); 294 return (TCL_OK); 295 } 296 if (strcmp(argv[1], "advance-bytes") == 0) { 297 advance_bytes(atoi(argv[2])); 298 return (TCL_OK); 299 } 300 } 301 if (argc == 4) { 302 if (strcmp(argv[1], "sendmsg") == 0) { 303 sendmsg(atoi(argv[2]), argv[3]); 304 return (TCL_OK); 305 } 306 } 307 return (TcpAgent::command(argc, argv)); 308 } 309 310 /* 311 * "User Interface" Functions for Full TCP 312 * advanceby(number of packets) 313 * advance_bytes(number of bytes) 314 * sendmsg(int bytes, char* buf) 315 * listen 316 * close 317 */ 318 319 /* 320 * the 'advance' interface to the regular tcp is in packet 321 * units. Here we scale this to bytes for full tcp. 322 * 323 * 'advance' is normally called by an "application" (i.e. data source) 324 * to signal that there is something to send 325 * 326 * 'curseq_' is the sequence number of the last byte provided 327 * by the application. In the case where no data has been supplied 328 * by the application, curseq_ is the iss_. 329 */ 330 void 331 FullTcpAgent::advanceby(int np) 332 { 333 // XXX hack: 334 // because np is in packets and a data source 335 // may pass a *huge* number as a way to tell us 336 // to go forever, just look for the huge number 337 // and if it's there, pre-divide it 338 if (np >= 0x10000000) 339 np /= maxseg_; 340 341 advance_bytes(np * maxseg_); 342 return; 343 } 344 345 /* 346 * the byte-oriented interface: advance_bytes(int nbytes) 347 */ 348 349 void 350 FullTcpAgent::advance_bytes(int nb) 351 { 352 353 // 354 // state-specific operations: 355 // if CLOSED or LISTEN, reset and try a new active open/connect 356 // if ESTABLISHED, queue and try to send more 357 // if SYN_SENT or SYN_RCVD, just queue 358 // if above ESTABLISHED, we are closing, so don't allow 359 // 360 361 switch (state_) { 362 363 case TCPS_CLOSED: 364 case TCPS_LISTEN: 365 reset(); 366 curseq_ = iss_ + nb; 367 connect(); // initiate new connection 368 break; 369 370 case TCPS_ESTABLISHED: 371 case TCPS_SYN_SENT: 372 case TCPS_SYN_RECEIVED: 373 if (curseq_ < iss_) 374 curseq_ = iss_; 375 curseq_ += nb; 376 break; 377 378 default: 379 if (debug_) 380 fprintf(stderr, "%f: FullTcpAgent::advance(%s): cannot advance while in state %s ", 381 now(), name(), statestr(state_)); 382 383 } 384 385 if (state_ == TCPS_ESTABLISHED) 386 send_much(0, REASON_NORMAL, maxburst_); 387 388 return; 389 } 390 391 /* 392 * If MSG_EOF is set, by setting close_on_empty_ to TRUE, we ensure that 393 * a FIN will be sent when the send buffer emptys. 394 * If DAT_EOF is set, the callback function done_data is called 395 * when the send buffer empty 396 * 397 * When (in the future?) FullTcpAgent implements T/TCP, avoidance of 3-way 398 * handshake can be handled in this function. 399 */ 400 void 401 FullTcpAgent::sendmsg(int nbytes, const char *flags) 402 { 403 if (flags && strcmp(flags, "MSG_EOF") == 0) 404 close_on_empty_ = TRUE; 405 if (flags && strcmp(flags, "DAT_EOF") == 0) 406 signal_on_empty_ = TRUE; 407 408 if (nbytes == -1) { 409 infinite_send_ = TRUE; 410 advance_bytes(0); 411 } else 412 advance_bytes(nbytes); 413 } 414 415 /* 416 * do an active open 417 * (in real TCP, see tcp_usrreq, case PRU_CONNECT) 418 */ 419 void 420 FullTcpAgent::connect() 421 { 422 newstate(TCPS_SYN_SENT); // sending a SYN now 423 sent(iss_, foutput(iss_, REASON_NORMAL)); 424 return; 425 } 426 427 /* 428 * be a passive opener 429 * (in real TCP, see tcp_usrreq, case PRU_LISTEN) 430 * (for simulation, make this peer's ptype ACKs) 431 */ 432 void 433 FullTcpAgent::listen() 434 { 435 newstate(TCPS_LISTEN); 436 type_ = PT_ACK; // instead of PT_TCP 437 } 438 439 440 /* 441 * This function is invoked when the sender buffer is empty. It in turn 442 * invokes the Tcl done_data procedure that was registered with TCP. 443 */ 444 445 void 446 FullTcpAgent::bufferempty() 447 { 448 signal_on_empty_=FALSE; 449 Tcl::instance().evalf("%s done_data", this->name()); 450 } 451 452 453 /* 454 * called when user/application performs 'close' 455 */ 456 457 void 458 FullTcpAgent::usrclosed() 459 { 460 curseq_ = maxseq_ - 1; // now, no more data 461 infinite_send_ = FALSE; // stop infinite send 462 463 switch (state_) { 464 case TCPS_CLOSED: 465 case TCPS_LISTEN: 466 cancel_timers(); 467 newstate(TCPS_CLOSED); 468 finish(); 469 break; 470 case TCPS_SYN_SENT: 471 newstate(TCPS_CLOSED); 472 /* fall through */ 473 case TCPS_LAST_ACK: 474 flags_ |= TF_NEEDFIN; 475 send_much(1, REASON_NORMAL, maxburst_); 476 break; 477 case TCPS_SYN_RECEIVED: 478 case TCPS_ESTABLISHED: 479 newstate(TCPS_FIN_WAIT_1); 480 flags_ |= TF_NEEDFIN; 481 send_much(1, REASON_NORMAL, maxburst_); 482 break; 483 case TCPS_CLOSE_WAIT: 484 newstate(TCPS_LAST_ACK); 485 flags_ |= TF_NEEDFIN; 486 send_much(1, REASON_NORMAL, maxburst_); 487 break; 488 case TCPS_FIN_WAIT_1: 489 case TCPS_FIN_WAIT_2: 490 case TCPS_CLOSING: 491 /* usr asked for a close more than once [?] */ 492 if (debug_) 493 fprintf(stderr, 494 "%f FullTcpAgent(%s): app close in bad state %s ", 495 now(), name(), statestr(state_)); 496 break; 497 default: 498 if (debug_) 499 fprintf(stderr, 500 "%f FullTcpAgent(%s): app close in unknown state %s ", 501 now(), name(), statestr(state_)); 502 } 503 504 return; 505 } 506 507 /* 508 * Utility type functions 509 */ 510 511 void 512 FullTcpAgent::cancel_timers() 513 { 514 515 // cancel: rtx, burstsend, delsnd 516 TcpAgent::cancel_timers(); 517 // cancel: delack 518 delack_timer_.force_cancel(); 519 } 520 521 void 522 FullTcpAgent::newstate(int state) 523 { 524 //printf("%f(%s): state changed from %s to %s ", 525 //now(), name(), statestr(state_), statestr(state)); 526 527 state_ = state; 528 } 529 530 void 531 FullTcpAgent::prpkt(Packet *pkt) 532 { 533 hdr_tcp *tcph = hdr_tcp::access(pkt); // TCP header 534 hdr_cmn *th = hdr_cmn::access(pkt); // common header (size, etc) 535 //hdr_flags *fh = hdr_flags::access(pkt); // flags (CWR, CE, bits) 536 hdr_ip* iph = hdr_ip::access(pkt); 537 int datalen = th->size() - tcph->hlen(); // # payload bytes 538 539 fprintf(stdout, " [%d:%d.%d>%d.%d] (hlen:%d, dlen:%d, seq:%d, ack:%d, flags:0x%x (%s), salen:%d, reason:0x%x) ", 540 th->uid(), 541 iph->saddr(), iph->sport(), 542 iph->daddr(), iph->dport(), 543 tcph->hlen(), 544 datalen, 545 tcph->seqno(), 546 tcph->ackno(), 547 tcph->flags(), flagstr(tcph->flags()), 548 tcph->sa_length(), 549 tcph->reason()); 550 } 551 552 char * 553 FullTcpAgent::flagstr(int hflags) 554 { 555 // update this if tcp header flags change 556 static char *flagstrs[28] = { 557 "<null>", "<FIN>", "<SYN>", "<SYN,FIN>", // 0-3 558 "<?>", "<?,FIN>", "<?,SYN>", "<?,SYN,FIN>", // 4-7 559 "<PSH>", "<PSH,FIN>", "<PSH,SYN>", "<PSH,SYN,FIN>", // 0x08-0x0b 560 /* do not use <??, in next line because that's an ANSI trigraph */ 561 "<?>", "<?,FIN>", "<?,SYN>", "<?,SYN,FIN>", // 0x0c-0x0f 562 "<ACK>", "<ACK,FIN>", "<ACK,SYN>", "<ACK,SYN,FIN>", // 0x10-0x13 563 "<ACK>", "<ACK,FIN>", "<ACK,SYN>", "<ACK,SYN,FIN>", // 0x14-0x17 564 "<PSH,ACK>", "<PSH,ACK,FIN>", "<PSH,ACK,SYN>", "<PSH,ACK,SYN,FIN>", // 0x18-0x1b 565 }; 566 if (hflags < 0 || (hflags > 28)) { 567 /* Added strings for CWR and ECE -M. Weigle 6/27/02 */ 568 if (hflags == 72) 569 return ("<ECE,PSH>"); 570 else if (hflags == 80) 571 return ("<ECE,ACK>"); 572 else if (hflags == 88) 573 return ("<ECE,PSH,ACK>"); 574 else if (hflags == 152) 575 return ("<CWR,PSH,ACK>"); 576 else if (hflags == 153) 577 return ("<CWR,PSH,ACK,FIN>"); 578 else 579 return ("<invalid>"); 580 } 581 return (flagstrs[hflags]); 582 } 583 584 char * 585 FullTcpAgent::statestr(int state) 586 { 587 static char *statestrs[TCP_NSTATES] = { 588 "CLOSED", "LISTEN", "SYN_SENT", "SYN_RCVD", 589 "ESTABLISHED", "CLOSE_WAIT", "FIN_WAIT_1", "CLOSING", 590 "LAST_ACK", "FIN_WAIT_2" 591 }; 592 if (state < 0 || (state >= TCP_NSTATES)) 593 return ("INVALID"); 594 return (statestrs[state]); 595 } 596 597 void 598 DelAckTimer::expire(Event *) { 599 a_->timeout(TCP_TIMER_DELACK); 600 } 601 602 /* 603 * reset to starting point, don't set state_ here, 604 * because our starting point might be LISTEN rather 605 * than CLOSED if we're a passive opener 606 */ 607 void 608 FullTcpAgent::reset() 609 { 610 cancel_timers(); // cancel timers first 611 TcpAgent::reset(); // resets most variables 612 rq_.clear(); // clear reassembly queue 613 rtt_init(); // zero rtt, srtt, backoff 614 615 last_ack_sent_ = -1; 616 rcv_nxt_ = -1; 617 pipe_ = 0; 618 rtxbytes_ = 0; 619 flags_ = 0; 620 t_seqno_ = iss_; 621 maxseq_ = -1; 622 irs_ = -1; 623 last_send_time_ = -1.0; 624 if (ts_option_) 625 recent_ = recent_age_ = 0.0; 626 else 627 recent_ = recent_age_ = -1.0; 628 629 fastrecov_ = FALSE; 630 631 closed_ = 0; 632 close_on_empty_ = FALSE; 633 634 if (ecn_syn_) 635 ecn_syn_next_ = 1; 636 else 637 ecn_syn_next_ = 0; 638 639 } 640 641 /* 642 * This function is invoked when the connection is done. It in turn 643 * invokes the Tcl finish procedure that was registered with TCP. 644 * This function mimics tcp_close() 645 */ 646 647 void 648 FullTcpAgent::finish() 649 { 650 Tcl::instance().evalf("%s done", this->name()); 651 } 652 /* 653 * headersize: 654 * how big is an IP+TCP header in bytes; include options such as ts 655 * this function should be virtual so others (e.g. SACK) can override 656 */ 657 int 658 FullTcpAgent::headersize() 659 { 660 int total = tcpip_base_hdr_size_; 661 if (total < 1) { 662 fprintf(stderr, 663 "%f: FullTcpAgent(%s): warning: tcpip hdr size is only %d bytes ", 664 now(), name(), tcpip_base_hdr_size_); 665 } 666 667 if (ts_option_) 668 total += ts_option_size_; 669 670 return (total); 671 } 672 673 /* 674 * flags that are completely dependent on the tcp state 675 * these are used for the next outgoing packet in foutput() 676 * (in real TCP, see tcp_fsm.h, the "tcp_outflags" array) 677 */ 678 int 679 FullTcpAgent::outflags() 680 { 681 // in real TCP an RST is added in the CLOSED state 682 static int tcp_outflags[TCP_NSTATES] = { 683 TH_ACK, /* 0, CLOSED */ 684 0, /* 1, LISTEN */ 685 TH_SYN, /* 2, SYN_SENT */ 686 TH_SYN|TH_ACK, /* 3, SYN_RECEIVED */ 687 TH_ACK, /* 4, ESTABLISHED */ 688 TH_ACK, /* 5, CLOSE_WAIT */ 689 TH_FIN|TH_ACK, /* 6, FIN_WAIT_1 */ 690 TH_FIN|TH_ACK, /* 7, CLOSING */ 691 TH_FIN|TH_ACK, /* 8, LAST_ACK */ 692 TH_ACK, /* 9, FIN_WAIT_2 */ 693 /* 10, TIME_WAIT --- not used in simulator */ 694 }; 695 696 if (state_ < 0 || (state_ >= TCP_NSTATES)) { 697 fprintf(stderr, "%f FullTcpAgent(%s): invalid state %d ", 698 now(), name(), state_); 699 return (0x0); 700 } 701 702 return (tcp_outflags[state_]); 703 } 704 705 /* 706 * reaass() -- extract the appropriate fields from the packet 707 * and pass this info the ReassemblyQueue add routine 708 * 709 * returns the TCP header flags representing the "or" of 710 * the flags contained in the adjacent sequence # blocks 711 */ 712 713 int 714 FullTcpAgent::reass(Packet* pkt) 715 { 716 hdr_tcp *tcph = hdr_tcp::access(pkt); 717 hdr_cmn *th = hdr_cmn::access(pkt); 718 719 int start = tcph->seqno(); 720 int end = start + th->size() - tcph->hlen(); 721 int tiflags = tcph->flags(); 722 int fillshole = (start == rcv_nxt_); 723 int flags; 724 725 // end contains the seq of the last byte of 726 // in the packet plus one 727 728 if (start == end && (tiflags & TH_FIN) == 0) { 729 fprintf(stderr, "%f: FullTcpAgent(%s)::reass() -- bad condition - adding non-FIN zero-len seg ", 730 now(), name()); 731 abort(); 732 } 733 734 flags = rq_.add(start, end, tiflags, 0); 735 736 //present: 737 // 738 // If we've never received a SYN (unlikely) 739 // or this is an out of order addition, no reason to coalesce 740 // 741 742 if (TCPS_HAVERCVDSYN(state_) == 0 || !fillshole) { 743 return (0x00); 744 } 745 // 746 // If we get some data in SYN_RECVD, no need to present to user yet 747 // 748 if (state_ == TCPS_SYN_RECEIVED && (end > start)) 749 return (0x00); 750 751 // clear out data that has been passed, up to rcv_nxt_, 752 // collects flags 753 754 flags |= rq_.cleartonxt(); 755 756 return (flags); 757 } 758 759 /* 760 * utility function to set rcv_next_ during inital exchange of seq #s 761 */ 762 763 int 764 FullTcpAgent::rcvseqinit(int seq, int dlen) 765 { 766 return (seq + dlen + 1); 767 } 768 769 /* 770 * build a header with the timestamp option if asked 771 */ 772 int 773 FullTcpAgent::build_options(hdr_tcp* tcph) 774 { 775 int total = 0; 776 if (ts_option_) { 777 tcph->ts() = now(); 778 tcph->ts_echo() = recent_; 779 total += ts_option_size_; 780 } else { 781 tcph->ts() = tcph->ts_echo() = -1.0; 782 } 783 return (total); 784 } 785 786 /* 787 * pack() -- is the ACK a partial ACK? (not past recover_) 788 */ 789 790 int 791 FullTcpAgent::pack(Packet *pkt) 792 { 793 hdr_tcp *tcph = hdr_tcp::access(pkt); 794 /* Added check for fast recovery. -M. Weigle 5/2/02 */ 795 return (fastrecov_ && tcph->ackno() >= highest_ack_ && 796 tcph->ackno() < recover_); 797 } 798 799 /* 800 * baseline reno TCP exists fast recovery on a partial ACK 801 */ 802 803 void 804 FullTcpAgent::pack_action(Packet*) 805 { 806 if (reno_fastrecov_ && fastrecov_ && cwnd_ > double(ssthresh_)) { 807 cwnd_ = double(ssthresh_); // retract window if inflated 808 } 809 fastrecov_ = FALSE; 810 //printf("%f: EXITED FAST RECOVERY ", now()); 811 dupacks_ = 0; 812 } 813 814 /* 815 * ack_action -- same as partial ACK action for base Reno TCP 816 */ 817 818 void 819 FullTcpAgent::ack_action(Packet* p) 820 { 821 FullTcpAgent::pack_action(p); 822 } 823 824 825 /* 826 * sendpacket: 827 * allocate a packet, fill in header fields, and send 828 * also keeps stats on # of data pkts, acks, re-xmits, etc 829 * 830 * fill in packet fields. Agent::allocpkt() fills 831 * in most of the network layer fields for us. 832 * So fill in tcp hdr and adjust the packet size. 833 * 834 * Also, set the size of the tcp header. 835 */ 836 void 837 FullTcpAgent::sendpacket(int seqno, int ackno, int pflags, int datalen, int reason, Packet *p) 838 { 839 if (!p) p = allocpkt(); 840 hdr_tcp *tcph = hdr_tcp::access(p); 841 hdr_flags *fh = hdr_flags::access(p); 842 843 /* build basic header w/options */ 844 845 tcph->seqno() = seqno; 846 tcph->ackno() = ackno; 847 tcph->flags() = pflags; 848 tcph->reason() |= reason; // make tcph->reason look like ns1 pkt->flags? 849 tcph->sa_length() = 0; // may be increased by build_options() 850 tcph->hlen() = tcpip_base_hdr_size_; 851 tcph->hlen() += build_options(tcph); 852 853 /* 854 * Explicit Congestion Notification (ECN) related: 855 * Bits in header: 856 * ECT (EC Capable Transport), 857 * ECNECHO (ECHO of ECN Notification generated at router), 858 * CWR (Congestion Window Reduced from RFC 2481) 859 * States in TCP: 860 * ecn_: I am supposed to do ECN if my peer does 861 * ect_: I am doing ECN (ecn_ should be T and peer does ECN) 862 */ 863 864 if (datalen > 0 && ecn_ ){ 865 // set ect on data packets 866 fh->ect() = ect_; // on after mutual agreement on ECT 867 } else if (ecn_ && ecn_syn_ && ecn_syn_next_ && (pflags & TH_SYN) && (pflags & TH_ACK)) { 868 // set ect on syn/ack packet, if syn packet was negotiating ECT 869 fh->ect() = ect_; 870 } else { 871 /* Set ect() to 0. -M. Weigle 1/19/05 */ 872 fh->ect() = 0; 873 } 874 if (ecn_ && ect_ && recent_ce_ ) { 875 // This is needed here for the ACK in a SYN, SYN/ACK, ACK 876 // sequence. 877 pflags |= TH_ECE; 878 } 879 // fill in CWR and ECE bits which don't actually sit in 880 // the tcp_flags but in hdr_flags 881 if ( pflags & TH_ECE) { 882 fh->ecnecho() = 1; 883 } else { 884 fh->ecnecho() = 0; 885 } 886 if ( pflags & TH_CWR ) { 887 fh->cong_action() = 1; 888 } 889 else { 890 /* Set cong_action() to 0 -M. Weigle 1/19/05 */ 891 fh->cong_action() = 0; 892 } 893 894 /* actual size is data length plus header length */ 895 896 hdr_cmn *ch = hdr_cmn::access(p); 897 ch->size() = datalen + tcph->hlen(); 898 899 if (datalen <= 0) 900 ++nackpack_; 901 else { 902 ++ndatapack_; 903 ndatabytes_ += datalen; 904 last_send_time_ = now(); // time of last data 905 } 906 if (reason == REASON_TIMEOUT || reason == REASON_DUPACK || reason == REASON_SACK) { 907 ++nrexmitpack_; 908 nrexmitbytes_ += datalen; 909 } 910 911 last_ack_sent_ = ackno; 912 913 //if (state_ != TCPS_ESTABLISHED) { 914 //printf("%f(%s)[state:%s]: sending pkt ", now(), name(), statestr(state_)); 915 //prpkt(p); 916 //} 917 918 send(p, 0); 919 920 return; 921 } 922 923 // 924 // reset_rtx_timer: called during a retransmission timeout 925 // to perform exponential backoff. Also, note that because 926 // we have performed a retransmission, our rtt timer is now 927 // invalidated (indicate this by setting rtt_active_ false) 928 // 929 void 930 FullTcpAgent::reset_rtx_timer(int /* mild */) 931 { 932 // cancel old timer, set a new one 933 /* if there is no outstanding data, don't back off rtx timer * 934 * (Fix from T. Kelly.) */ 935 if (!(highest_ack_ == maxseq_ && restart_bugfix_)) { 936 rtt_backoff(); // double current timeout 937 } 938 set_rtx_timer(); // set new timer 939 rtt_active_ = FALSE; // no timing during this window 940 } 941 942 /* 943 * see if we should send a segment, and if so, send it 944 * (may be ACK or data) 945 * return the number of data bytes sent (count a SYN or FIN as 1 each) 946 * 947 * simulator var, desc (name in real TCP) 948 * -------------------------------------- 949 * maxseq_, largest seq# we've sent plus one (snd_max) 950 * flags_, flags regarding our internal state (t_state) 951 * pflags, a local used to build up the tcp header flags (flags) 952 * curseq_, is the highest sequence number given to us by "application" 953 * highest_ack_, the highest ACK we've seen for our data (snd_una-1) 954 * seqno, the next seq# we're going to send (snd_nxt) 955 */ 956 int 957 FullTcpAgent::foutput(int seqno, int reason) 958 { 959 // if maxseg_ not set, set it appropriately 960 // Q: how can this happen? 961 962 if (maxseg_ == 0) 963 maxseg_ = size_ - headersize(); 964 else 965 size_ = maxseg_ + headersize(); 966 967 int is_retransmit = (seqno < maxseq_); 968 int quiet = (highest_ack_ == maxseq_); 969 int pflags = outflags(); 970 int syn = (seqno == iss_); 971 int emptying_buffer = FALSE; 972 int buffered_bytes = (infinite_send_) ? TCP_MAXSEQ : 973 curseq_ - highest_ack_ + 1; 974 975 int win = window() * maxseg_; // window (in bytes) 976 int off = seqno - highest_ack_; // offset of seg in window 977 int datalen; 978 //int amtsent = 0; 979 980 // be careful if we have not received any ACK yet 981 if (highest_ack_ < 0) { 982 if (!infinite_send_) 983 buffered_bytes = curseq_ - iss_;; 984 off = seqno - iss_; 985 } 986 987 if (syn && !data_on_syn_) 988 datalen = 0; 989 else if (pipectrl_) 990 datalen = buffered_bytes - off; 991 else 992 datalen = min(buffered_bytes, win) - off; 993 994 if ((signal_on_empty_) && (!buffered_bytes) && (!syn)) 995 bufferempty(); 996 997 // 998 // in real TCP datalen (len) could be < 0 if there was window 999 // shrinkage, or if a FIN has been sent and neither ACKd nor 1000 // retransmitted. Only this 2nd case concerns us here... 1001 // 1002 if (datalen < 0) { 1003 datalen = 0; 1004 } else if (datalen > maxseg_) { 1005 datalen = maxseg_; 1006 } 1007 1008 // 1009 // this is an option that causes us to slow-start if we've 1010 // been idle for a "long" time, where long means a rto or longer 1011 // the slow-start is a sort that does not set ssthresh 1012 // 1013 1014 if (slow_start_restart_ && quiet && datalen > 0) { 1015 if (idle_restart()) { 1016 slowdown(CLOSE_CWND_INIT); 1017 } 1018 } 1019 1020 // 1021 // see if sending this packet will empty the send buffer 1022 // a dataless SYN packet counts also 1023 // 1024 1025 if (!infinite_send_ && ((seqno + datalen) > curseq_ || 1026 (syn && datalen == 0))) { 1027 emptying_buffer = TRUE; 1028 // 1029 // if not a retransmission, notify application that 1030 // everything has been sent out at least once. 1031 // 1032 if (!syn) { 1033 idle(); 1034 if (close_on_empty_ && quiet) { 1035 flags_ |= TF_NEEDCLOSE; 1036 } 1037 } 1038 pflags |= TH_PUSH; 1039 // 1040 // if close_on_empty set, we are finished 1041 // with this connection; close it 1042 // 1043 } else { 1044 /* not emptying buffer, so can't be FIN */ 1045 pflags &= ~TH_FIN; 1046 } 1047 if (infinite_send_ && (syn && datalen == 0)) 1048 pflags |= TH_PUSH; // set PUSH for dataless SYN 1049 1050 /* sender SWS avoidance (Nagle) */ 1051 1052 if (datalen > 0) { 1053 // if full-sized segment, ok 1054 if (datalen == maxseg_) 1055 goto send; 1056 // if Nagle disabled and buffer clearing, ok 1057 if ((quiet || nodelay_) && emptying_buffer) 1058 goto send; 1059 // if a retransmission 1060 if (is_retransmit) 1061 goto send; 1062 // if big "enough", ok... 1063 // (this is not a likely case, and would 1064 // only happen for tiny windows) 1065 if (datalen >= ((wnd_ * maxseg_) / 2.0)) 1066 goto send; 1067 } 1068 1069 if (need_send()) 1070 goto send; 1071 1072 /* 1073 * send now if a control packet or we owe peer an ACK 1074 * TF_ACKNOW can be set during connection establishment and 1075 * to generate acks for out-of-order data 1076 */ 1077 1078 if ((flags_ & (TF_ACKNOW|TF_NEEDCLOSE)) || 1079 (pflags & (TH_SYN|TH_FIN))) { 1080 goto send; 1081 } 1082 1083 /* 1084 * No reason to send a segment, just return. 1085 */ 1086 return 0; 1087 1088 send: 1089 1090 // is a syn or fin? 1091 1092 syn = (pflags & TH_SYN) ? 1 : 0; 1093 int fin = (pflags & TH_FIN) ? 1 : 0; 1094 1095 /* setup ECN syn and ECN SYN+ACK packet headers */ 1096 if (ecn_ && syn && !(pflags & TH_ACK)){ 1097 pflags |= TH_ECE; 1098 pflags |= TH_CWR; 1099 } 1100 if (ecn_ && syn && (pflags & TH_ACK)){ 1101 pflags |= TH_ECE; 1102 pflags &= ~TH_CWR; 1103 } 1104 else if (ecn_ && ect_ && cong_action_ && 1105 (!is_retransmit || SetCWRonRetransmit_)) { 1106 /* 1107 * Don't set CWR for a retranmitted SYN+ACK (has ecn_ 1108 * and cong_action_ set). 1109 * -M. Weigle 6/19/02 1110 * 1111 * SetCWRonRetransmit_ was changed to true, 1112 * allowing CWR on retransmitted data packets. 1113 * See test ecn_burstyEcn_reno_full 1114 * in test-suite-ecn-full.tcl. 1115 * - Sally Floyd, 6/5/08. 1116 */ 1117 /* set CWR if necessary */ 1118 pflags |= TH_CWR; 1119 /* Turn cong_action_ off: Added 6/5/08, Sally Floyd. */ 1120 cong_action_ = FALSE; 1121 } 1122 1123 /* moved from sendpacket() -M. Weigle 6/19/02 */ 1124 // 1125 // although CWR bit is ordinarily associated with ECN, 1126 // it has utility within the simulator for traces. Thus, set 1127 // it even if we aren't doing ECN 1128 // 1129 if (datalen > 0 && cong_action_ && !is_retransmit) { 1130 pflags |= TH_CWR; 1131 } 1132 1133 /* set ECE if necessary */ 1134 if (ecn_ && ect_ && recent_ce_ ) { 1135 pflags |= TH_ECE; 1136 } 1137 1138 /* 1139 * Tack on the FIN flag to the data segment if close_on_empty_ 1140 * was previously set-- avoids sending a separate FIN 1141 */ 1142 if (flags_ & TF_NEEDCLOSE) { 1143 flags_ &= ~TF_NEEDCLOSE; 1144 if (state_ <= TCPS_ESTABLISHED && state_ != TCPS_CLOSED) 1145 { 1146 pflags |=TH_FIN; 1147 fin = 1; /* FIN consumes sequence number */ 1148 newstate(TCPS_FIN_WAIT_1); 1149 } 1150 } 1151 sendpacket(seqno, rcv_nxt_, pflags, datalen, reason); 1152 1153 /* 1154 * Data sent (as far as we can tell). 1155 * Any pending ACK has now been sent. 1156 */ 1157 flags_ &= ~(TF_ACKNOW|TF_DELACK); 1158 1159 /* 1160 * if we have reacted to congestion recently, the 1161 * slowdown() procedure will have set cong_action_ and 1162 * sendpacket will have copied that to the outgoing pkt 1163 * CWR field. If that packet contains data, then 1164 * it will be reliably delivered, so we are free to turn off the 1165 * cong_action_ state now If only a pure ACK, we keep the state 1166 * around until we actually send a segment 1167 */ 1168 1169 int reliable = datalen + syn + fin; // seq #'s reliably sent 1170 /* 1171 * Don't reset cong_action_ until we send new data. 1172 * -M. Weigle 6/19/02 1173 */ 1174 if (cong_action_ && reliable > 0 && !is_retransmit) 1175 cong_action_ = FALSE; 1176 1177 // highest: greatest sequence number sent + 1 1178 // and adjusted for SYNs and FINs which use up one number 1179 1180 int highest = seqno + reliable; 1181 if (highest > maxseq_) { 1182 maxseq_ = highest; 1183 // 1184 // if we are using conventional RTT estimation, 1185 // establish timing on this segment 1186 // 1187 if (!ts_option_ && rtt_active_ == FALSE) { 1188 rtt_active_ = TRUE; // set timer 1189 rtt_seq_ = seqno; // timed seq # 1190 rtt_ts_ = now(); // when set 1191 } 1192 } 1193 1194 /* 1195 * Set retransmit timer if not currently set, 1196 * and not doing an ack or a keep-alive probe. 1197 * Initial value for retransmit timer is smoothed 1198 * round-trip time + 2 * round-trip time variance. 1199 * Future values are rtt + 4 * rttvar. 1200 */ 1201 if (rtx_timer_.status() != TIMER_PENDING && reliable) { 1202 set_rtx_timer(); // no timer pending, schedule one 1203 } 1204 1205 return (reliable); 1206 } 1207 1208 /* 1209 * 1210 * send_much: send as much data as we are allowed to. This is 1211 * controlled by the "pipectrl_" variable. If pipectrl_ is set 1212 * to FALSE, then we are working as a normal window-based TCP and 1213 * we are allowed to send whatever the window allows. 1214 * If pipectrl_ is set to TRUE, then we are allowed to send whatever 1215 * pipe_ allows us to send. One tricky part is to make sure we 1216 * do not overshoot the receiver's advertised window if we are 1217 * in (pipectrl_ == TRUE) mode. 1218 */ 1219 1220 void 1221 FullTcpAgent::send_much(int force, int reason, int maxburst) 1222 { 1223 int npackets = 0; // sent so far 1224 1225 //if ((int(t_seqno_)) > 1) 1226 //printf("%f: send_much(f:%d, win:%d, pipectrl:%d, pipe:%d, t_seqno:%d, topwin:%d, maxseq_:%d ", 1227 //now(), force, win, pipectrl_, pipe_, int(t_seqno_), topwin, int(maxseq_)); 1228 1229 if (!force && (delsnd_timer_.status() == TIMER_PENDING)) 1230 return; 1231 1232 while (1) { 1233 1234 /* 1235 * note that if output decides to not actually send 1236 * (e.g. because of Nagle), then if we don't break out 1237 * of this loop, we can loop forever at the same 1238 * simulated time instant 1239 */ 1240 int amt; 1241 int seq = nxt_tseq(); 1242 if (!force && !send_allowed(seq)) 1243 break; 1244 // Q: does this need to be here too? 1245 if (!force && overhead_ != 0 && 1246 (delsnd_timer_.status() != TIMER_PENDING)) { 1247 delsnd_timer_.resched(Random::uniform(overhead_)); 1248 return; 1249 } 1250 if ((amt = foutput(seq, reason)) <= 0) 1251 break; 1252 if ((outflags() & TH_FIN)) 1253 --amt; // don't count FINs 1254 sent(seq, amt); 1255 force = 0; 1256 1257 if ((outflags() & (TH_SYN|TH_FIN)) || 1258 (maxburst && ++npackets >= maxburst)) 1259 break; 1260 } 1261 return; 1262 } 1263 1264 /* 1265 * base TCP: we are allowed to send a sequence number if it 1266 * is in the window 1267 */ 1268 int 1269 FullTcpAgent::send_allowed(int seq) 1270 { 1271 int win = window() * maxseg_; 1272 int topwin = curseq_; // 1 seq number past the last byte we can send 1273 1274 if ((topwin > highest_ack_ + win) || infinite_send_) 1275 topwin = highest_ack_ + win; 1276 1277 return (seq < topwin); 1278 } 1279 /* 1280 * Process an ACK 1281 * this version of the routine doesn't necessarily 1282 * require the ack to be one which advances the ack number 1283 * 1284 * if this ACKs a rtt estimate 1285 * indicate we are not timing 1286 * reset the exponential timer backoff (gamma) 1287 * update rtt estimate 1288 * cancel retrans timer if everything is sent and ACK'd, else set it 1289 * advance the ack number if appropriate 1290 * update segment to send next if appropriate 1291 */ 1292 void 1293 FullTcpAgent::newack(Packet* pkt) 1294 { 1295 hdr_tcp *tcph = hdr_tcp::access(pkt); 1296 1297 register int ackno = tcph->ackno(); 1298 int progress = (ackno > highest_ack_); 1299 1300 if (ackno == maxseq_) { 1301 cancel_rtx_timer(); // all data ACKd 1302 } else if (progress) { 1303 set_rtx_timer(); 1304 } 1305 1306 // advance the ack number if this is for new data 1307 if (progress) 1308 highest_ack_ = ackno; 1309 1310 // if we have suffered a retransmit timeout, t_seqno_ 1311 // will have been reset to highest_ ack. If the 1312 // receiver has cached some data above t_seqno_, the 1313 // new-ack value could (should) jump forward. We must 1314 // update t_seqno_ here, otherwise we would be doing 1315 // go-back-n. 1316 1317 if (t_seqno_ < highest_ack_) 1318 t_seqno_ = highest_ack_; // seq# to send next 1319 1320 /* 1321 * Update RTT only if it's OK to do so from info in the flags header. 1322 * This is needed for protocols in which intermediate agents 1323 1324 * in the network intersperse acks (e.g., ack-reconstructors) for 1325 * various reasons (without violating e2e semantics). 1326 */ 1327 hdr_flags *fh = hdr_flags::access(pkt); 1328 1329 if (!fh->no_ts_) { 1330 if (ts_option_) { 1331 recent_age_ = now(); 1332 recent_ = tcph->ts(); 1333 rtt_update(now() - tcph->ts_echo()); 1334 if (ts_resetRTO_ && (!ect_ || !ecn_backoff_ || 1335 !hdr_flags::access(pkt)->ecnecho())) { 1336 // From Andrei Gurtov 1337 // 1338 // Don't end backoff if still in ECN-Echo with 1339 // a congestion window of 1 packet. 1340 t_backoff_ = 1; 1341 } 1342 } else if (rtt_active_ && ackno > rtt_seq_) { 1343 // got an RTT sample, record it 1344 // "t_backoff_ = 1;" deleted by T. Kelly. 1345 rtt_active_ = FALSE; 1346 rtt_update(now() - rtt_ts_); 1347 } 1348 if (!ect_ || !ecn_backoff_ || 1349 !hdr_flags::access(pkt)->ecnecho()) { 1350 /* 1351 * Don't end backoff if still in ECN-Echo with 1352 * a congestion window of 1 packet. 1353 * Fix from T. Kelly. 1354 */ 1355 t_backoff_ = 1; 1356 ecn_backoff_ = 0; 1357 } 1358 1359 } 1360 return; 1361 } 1362 1363 /* 1364 * this is the simulated form of the header prediction 1365 * predicate. While not really necessary for a simulation, it 1366 * follows the code base more closely and can sometimes help to reveal 1367 * odd behavior caused by the implementation structure.. 1368 * 1369 * Here's the comment from the real TCP: 1370 * 1371 * Header prediction: check for the two common cases 1372 * of a uni-directional data xfer. If the packet has 1373 * no control flags, is in-sequence, the window didn't 1374 * change and we're not retransmitting, it's a 1375 * candidate. If the length is zero and the ack moved 1376 * forward, we're the sender side of the xfer. Just 1377 * free the data acked & wake any higher level process 1378 * that was blocked waiting for space. If the length 1379 * is non-zero and the ack didn't move, we're the 1380 * receiver side. If we're getting packets in-order 1381 * (the reassembly queue is empty), add the data to 1382 * the socket buffer and note that we need a delayed ack. 1383 * Make sure that the hidden state-flags are also off. 1384 * Since we check for TCPS_ESTABLISHED above, it can only 1385 * be TF_NEEDSYN. 1386 */ 1387 1388 int 1389 FullTcpAgent::predict_ok(Packet* pkt) 1390 { 1391 hdr_tcp *tcph = hdr_tcp::access(pkt); 1392 hdr_flags *fh = hdr_flags::access(pkt); 1393 1394 /* not the fastest way to do this, but perhaps clearest */ 1395 1396 int p1 = (state_ == TCPS_ESTABLISHED); // ready 1397 int p2 = ((tcph->flags() & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK); // ACK 1398 int p3 = ((flags_ & TF_NEEDFIN) == 0); // don't need fin 1399 int p4 = (!ts_option_ || fh->no_ts_ || (tcph->ts() >= recent_)); // tsok 1400 int p5 = (tcph->seqno() == rcv_nxt_); // in-order data 1401 int p6 = (t_seqno_ == maxseq_); // not re-xmit 1402 int p7 = (!ecn_ || fh->ecnecho() == 0); // no ECN 1403 int p8 = (tcph->sa_length() == 0); // no SACK info 1404 1405 return (p1 && p2 && p3 && p4 && p5 && p6 && p7 && p8); 1406 } 1407 1408 /* 1409 * fast_retransmit using the given seqno 1410 * perform fast RTX, set recover_, set last_cwnd_action 1411 */ 1412 1413 int 1414 FullTcpAgent::fast_retransmit(int seq) 1415 { 1416 // we are now going to fast-retransmit and willtrace that event 1417 trace_event("FAST_RETX"); 1418 1419 recover_ = maxseq_; // recovery target 1420 last_cwnd_action_ = CWND_ACTION_DUPACK; 1421 return(foutput(seq, REASON_DUPACK)); // send one pkt 1422 } 1423 1424 /* 1425 * real tcp determines if the remote 1426 * side should receive a window update/ACK from us, and often 1427 * results in sending an update every 2 segments, thereby 1428 * giving the familiar 2-packets-per-ack behavior of TCP. 1429 * Here, we don't advertise any windows, so we just see if 1430 * there's at least 'segs_per_ack_' pkts not yet acked 1431 * 1432 * also, provide for a segs-per-ack "threshold" where 1433 * we generate 1-ack-per-seg until enough stuff 1434 * (spa_thresh_ bytes) has been received from the other side 1435 * This idea came from vj/kmn in BayTcp. Added 8/21/01. 1436 */ 1437 1438 int 1439 FullTcpAgent::need_send() 1440 { 1441 if (flags_ & TF_ACKNOW) 1442 return TRUE; 1443 1444 int spa = (spa_thresh_ > 0 && ((rcv_nxt_ - irs_) < spa_thresh_)) ? 1445 1 : segs_per_ack_; 1446 1447 return ((rcv_nxt_ - last_ack_sent_) >= (spa * maxseg_)); 1448 } 1449 1450 /* 1451 * determine whether enough time has elapsed in order to 1452 * conclude a "restart" is necessary (e.g. a slow-start) 1453 * 1454 * for now, keep track of this similarly to how rtt_update() does 1455 */ 1456 1457 int 1458 FullTcpAgent::idle_restart() 1459 { 1460 if (last_send_time_ < 0.0) { 1461 // last_send_time_ isn't set up yet, we shouldn't 1462 // do the idle_restart 1463 return (0); 1464 } 1465 1466 double tao = now() - last_send_time_; 1467 if (!ts_option_) { 1468 double tickoff = fmod(last_send_time_ + boot_time_, 1469 tcp_tick_); 1470 tao = int((tao + tickoff) / tcp_tick_) * tcp_tick_; 1471 } 1472 1473 return (tao > t_rtxcur_); // verify this CHECKME 1474 } 1475 1476 /* 1477 * tcp-full's version of set_initial_window()... over-rides 1478 * the one in tcp.cc 1479 */ 1480 void 1481 FullTcpAgent::set_initial_window() 1482 { 1483 syn_ = TRUE; // full-tcp always models SYN exchange 1484 TcpAgent::set_initial_window(); 1485 } 1486 1487 /* 1488 * main reception path - 1489 * called from the agent that handles the data path below in its muxing mode 1490 * advance() is called when connection is established with size sent from 1491 * user/application agent 1492 * 1493 * This is a fairly complex function. It operates generally as follows: 1494 * do header prediction for simple cases (pure ACKS or data) 1495 * if in LISTEN and we get a SYN, begin initializing connection 1496 * if in SYN_SENT and we get an ACK, complete connection init 1497 * trim any redundant data from received dataful segment 1498 * deal with ACKS: 1499 * if in SYN_RCVD, complete connection init then go on 1500 * see if ACK is old or at the current highest_ack 1501 * if at current high, is the threshold reached or not 1502 * if so, maybe do fast rtx... otherwise drop or inflate win 1503 * deal with incoming data 1504 * deal with FIN bit on in arriving packet 1505 */ 1506 void 1507 FullTcpAgent::recv(Packet *pkt, Handler*) 1508 { 1509 hdr_tcp *tcph = hdr_tcp::access(pkt); // TCP header 1510 hdr_cmn *th = hdr_cmn::access(pkt); // common header (size, etc) 1511 hdr_flags *fh = hdr_flags::access(pkt); // flags (CWR, CE, bits) 1512 1513 int needoutput = FALSE; 1514 int ourfinisacked = FALSE; 1515 int dupseg = FALSE; // recv'd dup data segment 1516 int todrop = 0; // duplicate DATA cnt in seg 1517 1518 last_state_ = state_; 1519 1520 int datalen = th->size() - tcph->hlen(); // # payload bytes 1521 int ackno = tcph->ackno(); // ack # from packet 1522 int tiflags = tcph->flags() ; // tcp flags from packet 1523 1524 //if (state_ != TCPS_ESTABLISHED || (tiflags&(TH_SYN|TH_FIN))) { 1525 //fprintf(stdout, "%f(%s)in state %s recv'd this packet: ", now(), name(), statestr(state_)); 1526 //prpkt(pkt); 1527 //} 1528 1529 /* 1530 * Acknowledge FIN from passive closer even in TCPS_CLOSED state 1531 * (since we lack TIME_WAIT state and RST packets, 1532 * the loss of the FIN packet from the passive closer will make that 1533 * endpoint retransmit the FIN forever) 1534 * -F. Hernandez-Campos 8/6/00 1535 */ 1536 if ( (state_ == TCPS_CLOSED) && (tiflags & TH_FIN) ) { 1537 goto dropafterack; 1538 } 1539 1540 /* 1541 * Don't expect to see anything while closed 1542 */ 1543 1544 if (state_ == TCPS_CLOSED) { 1545 if (debug_) { 1546 fprintf(stderr, "%f: FullTcp(%s): recv'd pkt in CLOSED state: ", 1547 now(), name()); 1548 prpkt(pkt); 1549 } 1550 goto drop; 1551 } 1552 1553 /* 1554 * Process options if not in LISTEN state, 1555 * else do it below 1556 */ 1557 if (state_ != TCPS_LISTEN) 1558 dooptions(pkt); 1559 1560 /* 1561 * if we are using delayed-ACK timers and 1562 * no delayed-ACK timer is set, set one. 1563 * They are set to fire every 'interval_' secs, starting 1564 * at time t0 = (0.0 + k * interval_) for some k such 1565 * that t0 > now 1566 */ 1567 if (delack_interval_ > 0.0 && 1568 (delack_timer_.status() != TIMER_PENDING)) { 1569 int last = int(now() / delack_interval_); 1570 delack_timer_.resched(delack_interval_ * (last + 1.0) - now()); 1571 } 1572 1573 /* 1574 * Try header prediction: in seq data or in seq pure ACK 1575 * with no funny business 1576 */ 1577 if (!nopredict_ && predict_ok(pkt)) { 1578 /* 1579 * If last ACK falls within this segment's sequence numbers, 1580 * record the timestamp. 1581 * See RFC1323 (now RFC1323 bis) 1582 */ 1583 if (ts_option_ && !fh->no_ts_ && 1584 tcph->seqno() <= last_ack_sent_) { 1585 /* 1586 * this is the case where the ts value is newer than 1587 * the last one we've seen, and the seq # is the one 1588 * we expect [seqno == last_ack_sent_] or older 1589 */ 1590 recent_age_ = now(); 1591 recent_ = tcph->ts(); 1592 } 1593 1594 // 1595 // generate a stream of ecnecho bits until we see a true 1596 // cong_action bit 1597 // 1598 1599 if (ecn_) { 1600 if (fh->ce() && fh->ect()) { 1601 // no CWR from peer yet... arrange to 1602 // keep sending ECNECHO 1603 recent_ce_ = TRUE; 1604 } else if (fh->cwr()) { 1605 // got CWR response from peer.. stop 1606 // sending ECNECHO bits 1607 recent_ce_ = FALSE; 1608 } 1609 } 1610 1611 // Header predication basically looks to see 1612 // if the incoming packet is an expected pure ACK 1613 // or an expected data segment 1614 1615 if (datalen == 0) { 1616 // check for a received pure ACK in the correct range.. 1617 // also checks to see if we are wnd_ limited 1618 // (we don't change cwnd at all below), plus 1619 // not being in fast recovery and not a partial ack. 1620 // If we are in fast 1621 // recovery, go below so we can remember to deflate 1622 // the window if we need to 1623 if (ackno > highest_ack_ && ackno < maxseq_ && 1624 cwnd_ >= wnd_ && !fastrecov_) { 1625 newack(pkt); // update timers, highest_ack_ 1626 send_much(0, REASON_NORMAL, maxburst_); 1627 Packet::free(pkt); 1628 return; 1629 } 1630 } else if (ackno == highest_ack_ && rq_.empty()) { 1631 // check for pure incoming segment 1632 // the next data segment we're awaiting, and 1633 // that there's nothing sitting in the reassem- 1634 // bly queue 1635 // give to "application" here 1636 // note: DELACK is inspected only by 1637 // tcp_fasttimo() in real tcp. Every 200 ms 1638 // this routine scans all tcpcb's looking for 1639 // DELACK segments and when it finds them 1640 // changes DELACK to ACKNOW and calls tcp_output() 1641 rcv_nxt_ += datalen; 1642 flags_ |= TF_DELACK; 1643 recvBytes(datalen); // notify application of "delivery" 1644 // 1645 // special code here to simulate the operation 1646 // of a receiver who always consumes data, 1647 // resulting in a call to tcp_output 1648 Packet::free(pkt); 1649 if (need_send()) 1650 send_much(1, REASON_NORMAL, maxburst_); 1651 return; 1652 } 1653 } /* header prediction */ 1654 1655 1656 // 1657 // header prediction failed 1658 // (e.g. pure ACK out of valid range, SACK present, etc)... 1659 // do slow path processing 1660 1661 // 1662 // the following switch does special things for these states: 1663 // TCPS_LISTEN, TCPS_SYN_SENT 1664 // 1665 1666 switch (state_) { 1667 1668 /* 1669 * If the segment contains an ACK then it is bad and do reset. 1670 * If it does not contain a SYN then it is not interesting; drop it. 1671 * Otherwise initialize tp->rcv_nxt, and tp->irs, iss is already 1672 * selected, and send a segment: 1673 * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK> 1674 * Initialize tp->snd_nxt to tp->iss. 1675 * Enter SYN_RECEIVED state, and process any other fields of this 1676 * segment in this state. 1677 */ 1678 1679 case TCPS_LISTEN: /* awaiting peer's SYN */ 1680 1681 if (tiflags & TH_ACK) { 1682 if (debug_) { 1683 fprintf(stderr, 1684 "%f: FullTcpAgent(%s): warning: recv'd ACK while in LISTEN: ", 1685 now(), name()); 1686 prpkt(pkt); 1687 } 1688 // don't want ACKs in LISTEN 1689 goto dropwithreset; 1690 } 1691 if ((tiflags & TH_SYN) == 0) { 1692 if (debug_) { 1693 fprintf(stderr, "%f: FullTcpAgent(%s): warning: recv'd NON-SYN while in LISTEN ", 1694 now(), name()); 1695 prpkt(pkt); 1696 } 1697 // any non-SYN is discarded 1698 goto drop; 1699 } 1700 1701 /* 1702 * must by a SYN (no ACK) at this point... 1703 * in real tcp we would bump the iss counter here also 1704 */ 1705 dooptions(pkt); 1706 irs_ = tcph->seqno(); 1707 t_seqno_ = iss_; /* tcp_sendseqinit() macro in real tcp */ 1708 rcv_nxt_ = rcvseqinit(irs_, datalen); 1709 flags_ |= TF_ACKNOW; 1710 1711 // check for a ECN-SYN with ECE|CWR 1712 if (ecn_ && fh->ecnecho() && fh->cong_action()) { 1713 ect_ = TRUE; 1714 } 1715 1716 1717 if (fid_ == 0) { 1718 // XXX: sort of hack... If we do not 1719 // have a special flow ID, pick up that 1720 // of the sender (active opener) 1721 hdr_ip* iph = hdr_ip::access(pkt); 1722 fid_ = iph->flowid(); 1723 } 1724 1725 newstate(TCPS_SYN_RECEIVED); 1726 goto trimthenstep6; 1727 1728 /* 1729 * If the state is SYN_SENT: 1730 * if seg contains an ACK, but not for our SYN, drop the input. 1731 * if seg does not contain SYN, then drop it. 1732 * Otherwise this is an acceptable SYN segment 1733 * initialize tp->rcv_nxt and tp->irs 1734 * if seg contains ack then advance tp->snd_una 1735 * if SYN has been acked change to ESTABLISHED else SYN_RCVD state 1736 * arrange for segment to be acked (eventually) 1737 * continue processing rest of data/controls, beginning with URG 1738 */ 1739 1740 case TCPS_SYN_SENT: /* we sent SYN, expecting SYN+ACK (or SYN) */ 1741 1742 /* drop if it's a SYN+ACK and the ack field is bad */ 1743 if ((tiflags & TH_ACK) && 1744 ((ackno <= iss_) || (ackno > maxseq_))) { 1745 // not an ACK for our SYN, discard 1746 if (debug_) { 1747 fprintf(stderr, "%f: FullTcpAgent::recv(%s): bad ACK for our SYN: ", 1748 now(), name()); 1749 prpkt(pkt); 1750 } 1751 goto dropwithreset; 1752 } 1753 1754 if ((tiflags & TH_SYN) == 0) { 1755 if (debug_) { 1756 fprintf(stderr, "%f: FullTcpAgent::recv(%s): no SYN for our SYN: ", 1757 now(), name()); 1758 prpkt(pkt); 1759 } 1760 goto drop; 1761 } 1762 1763 /* looks like an ok SYN or SYN+ACK */ 1764 // If ecn_syn_wait is set to 2: 1765 // Check if CE-marked SYN/ACK packet, then just send an ACK 1766 // packet with ECE set, and drop the SYN/ACK packet. 1767 // Don't update TCP state. 1768 if (tiflags & TH_ACK) 1769 { 1770 if (ecn_ && fh->ecnecho() && !fh->cong_action() && ecn_syn_wait_ == 2) 1771 // if SYN/ACK packet and ecn_syn_wait_ == 2 1772 { 1773 if ( fh->ce() ) 1774 // If SYN/ACK packet is CE-marked 1775 { 1776 //cancel_rtx_timer(); 1777 //newack(pkt); 1778 set_rtx_timer(); 1779 sendpacket(t_seqno_, rcv_nxt_, TH_ACK|TH_ECE, 0, 0); 1780 goto drop; 1781 } 1782 } 1783 } 1784 1785 1786 #ifdef notdef 1787 cancel_rtx_timer(); // cancel timer on our 1st SYN [does this belong!?] 1788 #endif 1789 irs_ = tcph->seqno(); // get initial recv'd seq # 1790 rcv_nxt_ = rcvseqinit(irs_, datalen); 1791 1792 if (tiflags & TH_ACK) { 1793 // SYN+ACK (our SYN was acked) 1794 if (ecn_ && fh->ecnecho() && !fh->cong_action()) { 1795 ect_ = TRUE; 1796 if ( fh->ce() ) 1797 recent_ce_ = TRUE; 1798 } 1799 highest_ack_ = ackno; 1800 cwnd_ = initial_window(); 1801 1802 #ifdef notdef 1803 /* 1804 * if we didn't have to retransmit the SYN, 1805 * use its rtt as our initial srtt & rtt var. 1806 */ 1807 if (t_rtt_) { 1808 double tao = now() - tcph->ts(); 1809 rtt_update(tao); 1810 } 1811 #endif 1812 1813 /* 1814 * if there's data, delay ACK; if there's also a FIN 1815 * ACKNOW will be turned on later. 1816 */ 1817 if (datalen > 0) { 1818 flags_ |= TF_DELACK; // data there: wait 1819 } else { 1820 flags_ |= TF_ACKNOW; // ACK peer's SYN 1821 } 1822 1823 /* 1824 * Received <SYN,ACK> in SYN_SENT[*] state. 1825 * Transitions: 1826 * SYN_SENT --> ESTABLISHED 1827 * SYN_SENT* --> FIN_WAIT_1 1828 */ 1829 1830 if (flags_ & TF_NEEDFIN) { 1831 newstate(TCPS_FIN_WAIT_1); 1832 flags_ &= ~TF_NEEDFIN; 1833 tiflags &= ~TH_SYN; 1834 } else { 1835 newstate(TCPS_ESTABLISHED); 1836 } 1837 1838 // special to ns: 1839 // generate pure ACK here. 1840 // this simulates the ordinary connection establishment 1841 // where the ACK of the peer's SYN+ACK contains 1842 // no data. This is typically caused by the way 1843 // the connect() socket call works in which the 1844 // entire 3-way handshake occurs prior to the app 1845 // being able to issue a write() [which actually 1846 // causes the segment to be sent]. 1847 sendpacket(t_seqno_, rcv_nxt_, TH_ACK, 0, 0); 1848 } else { 1849 // Check ECN-SYN packet 1850 if (ecn_ && fh->ecnecho() && fh->cong_action()) 1851 ect_ = TRUE; 1852 1853 // SYN (no ACK) (simultaneous active opens) 1854 flags_ |= TF_ACKNOW; 1855 cancel_rtx_timer(); 1856 newstate(TCPS_SYN_RECEIVED); 1857 /* 1858 * decrement t_seqno_: we are sending a 1859 * 2nd SYN (this time in the form of a 1860 * SYN+ACK, so t_seqno_ will have been 1861 * advanced to 2... reduce this 1862 */ 1863 t_seqno_--; // CHECKME 1864 } 1865 1866 trimthenstep6: 1867 /* 1868 * advance the seq# to correspond to first data byte 1869 */ 1870 tcph->seqno()++; 1871 1872 if (tiflags & TH_ACK) 1873 goto process_ACK; 1874 1875 goto step6; 1876 1877 case TCPS_LAST_ACK: 1878 /* 1879 * The only way we're in LAST_ACK is if we've already 1880 * received a FIN, so ignore all retranmitted FINS. 1881 * -M. Weigle 7/23/02 1882 */ 1883 if (tiflags & TH_FIN) { 1884 goto drop; 1885 } 1886 break; 1887 case TCPS_CLOSING: 1888 break; 1889 } /* end switch(state_) */ 1890 1891 /* 1892 * States other than LISTEN or SYN_SENT. 1893 * First check timestamp, if present. 1894 * Then check that at least some bytes of segment are within 1895 * receive window. If segment begins before rcv_nxt, 1896 * drop leading data (and SYN); if nothing left, just ack. 1897 * 1898 * RFC 1323 PAWS: If we have a timestamp reply on this segment 1899 * and it's less than ts_recent, drop it. 1900 */ 1901 1902 if (ts_option_ && !fh->no_ts_ && recent_ && tcph->ts() < recent_) { 1903 if ((now() - recent_age_) > TCP_PAWS_IDLE) { 1904 /* 1905 * this is basically impossible in the simulator, 1906 * but here it is... 1907 */ 1908 /* 1909 * Invalidate ts_recent. If this segment updates 1910 * ts_recent, the age will be reset later and ts_recent 1911 * will get a valid value. If it does not, setting 1912 * ts_recent to zero will at least satisfy the 1913 * requirement that zero be placed in the timestamp 1914 * echo reply when ts_recent isn't valid. The 1915 * age isn't reset until we get a valid ts_recent 1916 * because we don't want out-of-order segments to be 1917 * dropped when ts_recent is old. 1918 */ 1919 recent_ = 0.0; 1920 } else { 1921 fprintf(stderr, "%f: FullTcpAgent(%s): dropped pkt due to bad ts ", 1922 now(), name()); 1923 goto dropafterack; 1924 } 1925 } 1926 1927 // check for redundant data at head/tail of segment 1928 // note that the 4.4bsd [Net/3] code has 1929 // a bug here which can cause us to ignore the 1930 // perfectly good ACKs on duplicate segments. The 1931 // fix is described in (Stevens, Vol2, p. 959-960). 1932 // This code is based on that correction. 1933 // 1934 // In addition, it has a modification so that duplicate segments 1935 // with dup acks don't trigger a fast retransmit when dupseg_fix_ 1936 // is enabled. 1937 // 1938 // Yet one more modification: make sure that if the received 1939 // segment had datalen=0 and wasn't a SYN or FIN that 1940 // we don't turn on the ACKNOW status bit. If we were to 1941 // allow ACKNOW to be turned on, normal pure ACKs that happen 1942 // to have seq #s below rcv_nxt can trigger an ACK war by 1943 // forcing us to ACK the pure ACKs 1944 // 1945 // Update: if we have a dataless FIN, don't really want to 1946 // do anything with it. In particular, would like to 1947 // avoid ACKing an incoming FIN+ACK while in CLOSING 1948 // 1949 todrop = rcv_nxt_ - tcph->seqno(); // how much overlap? 1950 1951 if (todrop > 0 && ((tiflags & (TH_SYN)) || datalen > 0)) { 1952 //printf("%f(%s): trim 1..todrop:%d, dlen:%d ",now(), name(), todrop, datalen); 1953 if (tiflags & TH_SYN) { 1954 tiflags &= ~TH_SYN; 1955 tcph->seqno()++; 1956 th->size()--; // XXX Must decrease packet size too!! 1957 // Q: Why?.. this is only a SYN 1958 todrop--; 1959 } 1960 // 1961 // see Stevens, vol 2, p. 960 for this check; 1962 // this check is to see if we are dropping 1963 // more than this segment (i.e. the whole pkt + a FIN), 1964 // or just the whole packet (no FIN) 1965 // 1966 if ((todrop > datalen) || 1967 (todrop == datalen && ((tiflags & TH_FIN) == 0))) { 1968 //printf("%f(%s): trim 2..todrop:%d, dlen:%d ",now(), name(), todrop, datalen); 1969 /* 1970 * Any valid FIN must be to the left of the window. 1971 * At this point the FIN must be a duplicate or out 1972 * of sequence; drop it. 1973 */ 1974 1975 tiflags &= ~TH_FIN; 1976 1977 /* 1978 * Send an ACK to resynchronize and drop any data. 1979 * But keep on processing for RST or ACK. 1980 */ 1981 1982 flags_ |= TF_ACKNOW; 1983 todrop = datalen; 1984 dupseg = TRUE; // *completely* duplicate 1985 1986 } 1987 1988 /* 1989 * Trim duplicate data from the front of the packet 1990 */ 1991 1992 tcph->seqno() += todrop; 1993 th->size() -= todrop; // XXX Must decrease size too!! 1994 // why? [kf]..prob when put in RQ 1995 datalen -= todrop; 1996 1997 } /* data trim */ 1998 1999 /* 2000 * If we are doing timstamps and this packet has one, and 2001 * If last ACK falls within this segment's sequence numbers, 2002 * record the timestamp. 2003 * See RFC1323 (now RFC1323 bis) 2004 */ 2005 if (ts_option_ && !fh->no_ts_ && tcph->seqno() <= last_ack_sent_) { 2006 /* 2007 * this is the case where the ts value is newer than 2008 * the last one we've seen, and the seq # is the one we expect 2009 * [seqno == last_ack_sent_] or older 2010 */ 2011 recent_age_ = now(); 2012 recent_ = tcph->ts(); 2013 } 2014 2015 if (tiflags & TH_SYN) { 2016 if (debug_) { 2017 fprintf(stderr, "%f: FullTcpAgent::recv(%s) received unexpected SYN (state:%d): ", 2018 now(), name(), state_); 2019 prpkt(pkt); 2020 } 2021 goto dropwithreset; 2022 } 2023 2024 if ((tiflags & TH_ACK) == 0) { 2025 /* 2026 * Added check for state != SYN_RECEIVED. We will receive a 2027 * duplicate SYN in SYN_RECEIVED when our SYN/ACK was dropped. 2028 * We should just ignore the duplicate SYN (our timeout for 2029 * resending the SYN/ACK is about the same as the client's 2030 * timeout for resending the SYN), but give no error message. 2031 * -M. Weigle 07/24/01 2032 */ 2033 if (state_ != TCPS_SYN_RECEIVED) { 2034 if (debug_) { 2035 fprintf(stderr, "%f: FullTcpAgent::recv(%s) got packet lacking ACK (state:%d): ", 2036 now(), name(), state_); 2037 prpkt(pkt); 2038 } 2039 } 2040 goto drop; 2041 } 2042 2043 /* 2044 * Ack processing. 2045 */ 2046 2047 switch (state_) { 2048 case TCPS_SYN_RECEIVED: /* want ACK for our SYN+ACK */ 2049 if (ackno < highest_ack_ || ackno > maxseq_) { 2050 // not in useful range 2051 if (debug_) { 2052 fprintf(stderr, "%f: FullTcpAgent(%s): ack(%d) not in range while in SYN_RECEIVED: ", 2053 now(), name(), ackno); 2054 prpkt(pkt); 2055 } 2056 goto dropwithreset; 2057 } 2058 2059 if (ecn_ && ect_ && ecn_syn_ && fh->ecnecho() && ecn_syn_wait_ == 2) 2060 { 2061 // The SYN/ACK packet was ECN-marked. 2062 // Reset the rtx timer, send another SYN/ACK packet 2063 // immediately, and drop the ACK packet. 2064 // Do not move to TCPS_ESTB state or update TCP variables. 2065 cancel_rtx_timer(); 2066 ecn_syn_next_ = 0; 2067 foutput(iss_, REASON_NORMAL); 2068 wnd_init_option_ = 1; 2069 wnd_init_ = 1; 2070 goto drop; 2071 } 2072 if (ecn_ && ect_ && ecn_syn_ && fh->ecnecho() && ecn_syn_wait_ < 2) { 2073 // The SYN/ACK packet was ECN-marked. 2074 if (ecn_syn_wait_ == 1) { 2075 // A timer will be called in ecn(). 2076 cwnd_ = 1; 2077 use_rtt_ = 1; //KK, wait for timeout() period 2078 } else { 2079 // Congestion window will be halved in ecn(). 2080 cwnd_ = 2; 2081 } 2082 } else { 2083 cwnd_ = initial_window(); 2084 } 2085 2086 /* 2087 * Make transitions: 2088 * SYN-RECEIVED -> ESTABLISHED 2089 * SYN-RECEIVED* -> FIN-WAIT-1 2090 */ 2091 if (flags_ & TF_NEEDFIN) { 2092 newstate(TCPS_FIN_WAIT_1); 2093 flags_ &= ~TF_NEEDFIN; 2094 } else { 2095 newstate(TCPS_ESTABLISHED); 2096 } 2097 2098 /* fall into ... */ 2099 2100 2101 /* 2102 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range 2103 * ACKs. If the ack is in the range 2104 * tp->snd_una < ti->ti_ack <= tp->snd_max 2105 * then advance tp->snd_una to ti->ti_ack and drop 2106 * data from the retransmission queue. 2107 * 2108 * note that state TIME_WAIT isn't used 2109 * in the simulator 2110 */ 2111 2112 case TCPS_ESTABLISHED: 2113 case TCPS_FIN_WAIT_1: 2114 case TCPS_FIN_WAIT_2: 2115 case TCPS_CLOSE_WAIT: 2116 case TCPS_CLOSING: 2117 case TCPS_LAST_ACK: 2118 2119 // 2120 // look for ECNs in ACKs, react as necessary 2121 // 2122 2123 if (fh->ecnecho() && (!ecn_ || !ect_)) { 2124 fprintf(stderr, 2125 "%f: FullTcp(%s): warning, recvd ecnecho but I am not ECN capable! ", 2126 now(), name()); 2127 } 2128 2129 // 2130 // generate a stream of ecnecho bits until we see a true 2131 // cong_action bit 2132 // 2133 if (ecn_) { 2134 if (fh->ce() && fh->ect()) 2135 recent_ce_ = TRUE; 2136 else if (fh->cwr()) 2137 recent_ce_ = FALSE; 2138 } 2139 2140 // 2141 // If ESTABLISHED or starting to close, process SACKS 2142 // 2143 2144 if (state_ >= TCPS_ESTABLISHED && tcph->sa_length() > 0) { 2145 process_sack(tcph); 2146 } 2147 2148 // 2149 // ACK indicates packet left the network 2150 // try not to be fooled by data 2151 // 2152 2153 if (fastrecov_ && (datalen == 0 || ackno > highest_ack_)) 2154 pipe_ -= maxseg_; 2155 2156 // look for dup ACKs (dup ack numbers, no data) 2157 // 2158 // do fast retransmit/recovery if at/past thresh 2159 if (ackno <= highest_ack_) { 2160 // a pure ACK which doesn't advance highest_ack_ 2161 if (datalen == 0 && (!dupseg_fix_ || !dupseg)) { 2162 2163 /* 2164 * If we have outstanding data 2165 * this is a completely 2166 * duplicate ack, 2167 * the ack is the biggest we've 2168 * seen and we've seen exactly our rexmt 2169 * threshhold of them, assume a packet 2170 * has been dropped and retransmit it. 2171 * 2172 * We know we're losing at the current 2173 * window size so do congestion avoidance. 2174 * 2175 * Dup acks mean that packets have left the 2176 * network (they're now cached at the receiver) 2177 * so bump cwnd by the amount in the receiver 2178 * to keep a constant cwnd packets in the 2179 * network. 2180 */ 2181 2182 if ((rtx_timer_.status() != TIMER_PENDING) || 2183 ackno < highest_ack_) { 2184 // Q: significance of timer not pending? 2185 // ACK below highest_ack_ 2186 oldack(); 2187 } else if (++dupacks_ == tcprexmtthresh_) { 2188 // ACK at highest_ack_ AND meets threshold 2189 //trace_event("FAST_RECOVERY"); 2190 dupack_action(); // maybe fast rexmt 2191 goto drop; 2192 2193 } else if (dupacks_ > tcprexmtthresh_) { 2194 // ACK at highest_ack_ AND above threshole 2195 //trace_event("FAST_RECOVERY"); 2196 extra_ack(); 2197 2198 // send whatever window allows 2199 send_much(0, REASON_DUPACK, maxburst_); 2200 goto drop; 2201 } 2202 } else { 2203 // non zero-length [dataful] segment 2204 // with a dup ack (normal for dataful segs) 2205 // (or window changed in real TCP). 2206 if (dupack_reset_) { 2207 dupacks_ = 0; 2208 fastrecov_ = FALSE; 2209 } 2210 } 2211 break; /* take us to "step6" */ 2212 } /* end of dup/old acks */ 2213 2214 /* 2215 * we've finished the fast retransmit/recovery period 2216 * (i.e. received an ACK which advances highest_ack_) 2217 * The ACK may be "good" or "partial" 2218 */ 2219 2220 process_ACK: 2221 2222 if (ackno > maxseq_) { 2223 // ack more than we sent(!?) 2224 if (debug_) { 2225 fprintf(stderr, "%f: FullTcpAgent::recv(%s) too-big ACK (maxseq:%d): ", 2226 now(), name(), int(maxseq_)); 2227 prpkt(pkt); 2228 } 2229 goto dropafterack; 2230 } 2231 2232 /* 2233 * If we have a timestamp reply, update smoothed 2234 * round trip time. If no timestamp is present but 2235 * transmit timer is running and timed sequence 2236 * number was acked, update smoothed round trip time. 2237 * Since we now have an rtt measurement, cancel the 2238 * timer backoff (cf., Phil Karn's retransmit alg.). 2239 * Recompute the initial retransmit timer. 2240 * 2241 * If all outstanding data is acked, stop retransmit 2242 * If there is more data to be acked, restart retransmit 2243 * timer, using current (possibly backed-off) value. 2244 */ 2245 newack(pkt); // handle timers, update highest_ack_ 2246 2247 /* 2248 * if this is a partial ACK, invoke whatever we should 2249 * note that newack() must be called before the action 2250 * functions, as some of them depend on side-effects 2251 * of newack() 2252 */ 2253 2254 int partial = pack(pkt); 2255 2256 if (partial) 2257 pack_action(pkt); 2258 else 2259 ack_action(pkt); 2260 2261 /* 2262 * if this is an ACK with an ECN indication, handle this 2263 * but not if it is a syn packet 2264 */ 2265 if (fh->ecnecho() && !(tiflags&TH_SYN) ) 2266 if (fh->ecnecho()) { 2267 // Note from Sally: In one-way TCP, 2268 // ecn() is called before newack()... 2269 ecn(highest_ack_); // updated by newack(), above 2270 // "set_rtx_timer();" from T. Kelly. 2271 if (cwnd_ < 1) 2272 set_rtx_timer(); 2273 } 2274 // CHECKME: handling of rtx timer 2275 if (ackno == maxseq_) { 2276 needoutput = TRUE; 2277 } 2278 2279 /* 2280 * If no data (only SYN) was ACK'd, 2281 * skip rest of ACK processing. 2282 */ 2283 if (ackno == (highest_ack_ + 1)) 2284 goto step6; 2285 2286 // if we are delaying initial cwnd growth (probably due to 2287 // large initial windows), then only open cwnd if data has 2288 // been received 2289 // Q: check when this happens 2290 /* 2291 * When new data is acked, open the congestion window. 2292 * If the window gives us less than ssthresh packets 2293 * in flight, open exponentially (maxseg per packet). 2294 * Otherwise open about linearly: maxseg per window 2295 * (maxseg^2 / cwnd per packet). 2296 */ 2297 if ((!delay_growth_ || (rcv_nxt_ > 0)) && 2298 last_state_ == TCPS_ESTABLISHED) { 2299 if (!partial || open_cwnd_on_pack_) { 2300 if (!ect_ || !hdr_flags::access(pkt)->ecnecho()) 2301 opencwnd(); 2302 } 2303 } 2304 2305 if ((state_ >= TCPS_FIN_WAIT_1) && (ackno == maxseq_)) { 2306 ourfinisacked = TRUE; 2307 } 2308 2309 // 2310 // special additional processing when our state 2311 // is one of the closing states: 2312 // FIN_WAIT_1, CLOSING, LAST_ACK 2313 2314 switch (state_) { 2315 /* 2316 * In FIN_WAIT_1 STATE in addition to the processing 2317 * for the ESTABLISHED state if our FIN is now acknowledged 2318 * then enter FIN_WAIT_2. 2319 */ 2320 case TCPS_FIN_WAIT_1: /* doing active close */ 2321 if (ourfinisacked) { 2322 // got the ACK, now await incoming FIN 2323 newstate(TCPS_FIN_WAIT_2); 2324 cancel_timers(); 2325 needoutput = FALSE; 2326 } 2327 break; 2328 2329 /* 2330 * In CLOSING STATE in addition to the processing for 2331 * the ESTABLISHED state if the ACK acknowledges our FIN 2332 * then enter the TIME-WAIT state, otherwise ignore 2333 * the segment. 2334 */ 2335 case TCPS_CLOSING: /* simultaneous active close */; 2336 if (ourfinisacked) { 2337 newstate(TCPS_CLOSED); 2338 cancel_timers(); 2339 } 2340 break; 2341 /* 2342 * In LAST_ACK, we may still be waiting for data to drain 2343 * and/or to be acked, as well as for the ack of our FIN. 2344 * If our FIN is now acknowledged, 2345 * enter the closed state and return. 2346 */ 2347 case TCPS_LAST_ACK: /* passive close */ 2348 // K: added state change here 2349 if (ourfinisacked) { 2350 newstate(TCPS_CLOSED); 2351 finish(); // cancels timers, erc 2352 reset(); // for connection re-use (bug fix from ns-users list) 2353 goto drop; 2354 } else { 2355 // should be a FIN we've seen 2356 if (debug_) { 2357 fprintf(stderr, "%f: FullTcpAgent(%s)::received non-ACK (state:%d): ", 2358 now(), name(), state_); 2359 prpkt(pkt); 2360 } 2361 } 2362 break; 2363 2364 /* no case for TIME_WAIT in simulator */ 2365 } // inner state_ switch (closing states) 2366 } // outer state_ switch (ack processing) 2367 2368 step6: 2369 2370 /* 2371 * Processing of incoming DATAful segments. 2372 * Code above has already trimmed redundant data. 2373 * 2374 * real TCP handles window updates and URG data here also 2375 */ 2376 2377 /* dodata: this label is in the "real" code.. here only for reference */ 2378 2379 if ((datalen > 0 || (tiflags & TH_FIN)) && 2380 TCPS_HAVERCVDFIN(state_) == 0) { 2381 2382 // 2383 // the following 'if' implements the "real" TCP 2384 // TCP_REASS macro 2385 // 2386 2387 if (tcph->seqno() == rcv_nxt_ && rq_.empty()) { 2388 // got the in-order packet we were looking 2389 // for, nobody is in the reassembly queue, 2390 // so this is the common case... 2391 // note: in "real" TCP we must also be in 2392 // ESTABLISHED state to come here, because 2393 // data arriving before ESTABLISHED is 2394 // queued in the reassembly queue. Since we 2395 // don't really have a process anyhow, just 2396 // accept the data here as-is (i.e. don't 2397 // require being in ESTABLISHED state) 2398 flags_ |= TF_DELACK; 2399 rcv_nxt_ += datalen; 2400 tiflags = tcph->flags() & TH_FIN; 2401 2402 // give to "application" here 2403 // in "real" TCP, this is sbappend() + sorwakeup() 2404 if (datalen) 2405 recvBytes(datalen); // notify app. of "delivery" 2406 needoutput = need_send(); 2407 } else { 2408 // see the "tcp_reass" function: 2409 // not the one we want next (or it 2410 // is but there's stuff on the reass queue); 2411 // do whatever we need to do for out-of-order 2412 // segments or hole-fills. Also, 2413 // send an ACK (or SACK) to the other side right now. 2414 // Note that we may have just a FIN here (datalen = 0) 2415 int rcv_nxt_old_ = rcv_nxt_; // notify app. if changes 2416 tiflags = reass(pkt); 2417 if (rcv_nxt_ > rcv_nxt_old_) { 2418 // if rcv_nxt_ has advanced, must have 2419 // been a hole fill. In this case, there 2420 // is something to give to application 2421 recvBytes(rcv_nxt_ - rcv_nxt_old_); 2422 } 2423 flags_ |= TF_ACKNOW; 2424 2425 if (tiflags & TH_PUSH) { 2426 // 2427 // ???: does this belong here 2428 // K: APPLICATION recv 2429 needoutput = need_send(); 2430 } 2431 } 2432 } else { 2433 /* 2434 * we're closing down or this is a pure ACK that 2435 * wasn't handled by the header prediction part above 2436 * (e.g. because cwnd < wnd) 2437 */ 2438 // K: this is deleted 2439 tiflags &= ~TH_FIN; 2440 } 2441 2442 /* 2443 * if FIN is received, ACK the FIN 2444 * (let user know if we could do so) 2445 */ 2446 2447 if (tiflags & TH_FIN) { 2448 if (TCPS_HAVERCVDFIN(state_) == 0) { 2449 flags_ |= TF_ACKNOW; 2450 rcv_nxt_++; 2451 } 2452 switch (state_) { 2453 /* 2454 * In SYN_RECEIVED and ESTABLISHED STATES 2455 * enter the CLOSE_WAIT state. 2456 * (passive close) 2457 */ 2458 case TCPS_SYN_RECEIVED: 2459 case TCPS_ESTABLISHED: 2460 newstate(TCPS_CLOSE_WAIT); 2461 break; 2462 2463 /* 2464 * If still in FIN_WAIT_1 STATE FIN has not been acked so 2465 * enter the CLOSING state. 2466 * (simultaneous close) 2467 */ 2468 case TCPS_FIN_WAIT_1: 2469 newstate(TCPS_CLOSING); 2470 break; 2471 /* 2472 * In FIN_WAIT_2 state enter the TIME_WAIT state, 2473 * starting the time-wait timer, turning off the other 2474 * standard timers. 2475 * (in the simulator, just go to CLOSED) 2476 * (completion of active close) 2477 */ 2478 case TCPS_FIN_WAIT_2: 2479 newstate(TCPS_CLOSED); 2480 cancel_timers(); 2481 break; 2482 } 2483 } /* end of if FIN bit on */ 2484 2485 if (needoutput || (flags_ & TF_ACKNOW)) 2486 send_much(1, REASON_NORMAL, maxburst_); 2487 else if (curseq_ >= highest_ack_ || infinite_send_) 2488 send_much(0, REASON_NORMAL, maxburst_); 2489 // K: which state to return to when nothing left? 2490 2491 if (!halfclose_ && state_ == TCPS_CLOSE_WAIT && highest_ack_ == maxseq_) 2492 usrclosed(); 2493 2494 Packet::free(pkt); 2495 2496 // haoboy: Is here the place for done{} of active close? 2497 // It cannot be put in the switch above because we might need to do 2498 // send_much() (an ACK) 2499 if (state_ == TCPS_CLOSED) 2500 Tcl::instance().evalf("%s done", this->name()); 2501 2502 return; 2503 2504 // 2505 // various ways of dropping (some also ACK, some also RST) 2506 // 2507 2508 dropafterack: 2509 flags_ |= TF_ACKNOW; 2510 send_much(1, REASON_NORMAL, maxburst_); 2511 goto drop; 2512 2513 dropwithreset: 2514 /* we should be sending an RST here, but can't in simulator */ 2515 if (tiflags & TH_ACK) { 2516 sendpacket(ackno, 0, 0x0, 0, REASON_NORMAL); 2517 } else { 2518 int ack = tcph->seqno() + datalen; 2519 if (tiflags & TH_SYN) 2520 ack--; 2521 sendpacket(0, ack, TH_ACK, 0, REASON_NORMAL); 2522 } 2523 drop: 2524 Packet::free(pkt); 2525 return; 2526 } 2527 2528 /* 2529 * Dupack-action: what to do on a DUP ACK. After the initial check 2530 * of 'recover' below, this function implements the following truth 2531 * table: 2532 * 2533 * bugfix ecn last-cwnd == ecn action 2534 * 2535 * 0 0 0 full_reno_action 2536 * 0 0 1 full_reno_action [impossible] 2537 * 0 1 0 full_reno_action 2538 * 0 1 1 1/2 window, return 2539 * 1 0 0 nothing 2540 * 1 0 1 nothing [impossible] 2541 * 1 1 0 nothing 2542 * 1 1 1 1/2 window, return 2543 */ 2544 2545 void 2546 FullTcpAgent::dupack_action() 2547 { 2548 2549 int recovered = (highest_ack_ > recover_); 2550 2551 fastrecov_ = TRUE; 2552 rtxbytes_ = 0; 2553 2554 if (recovered || (!bug_fix_ && !ecn_) 2555 || (last_cwnd_action_ == CWND_ACTION_DUPACK) 2556 || ( highest_ack_ == 0)) { 2557 goto full_reno_action; 2558 } 2559 2560 if (ecn_ && last_cwnd_action_ == CWND_ACTION_ECN) { 2561 slowdown(CLOSE_CWND_HALF); 2562 cancel_rtx_timer(); 2563 rtt_active_ = FALSE; 2564 (void)fast_retransmit(highest_ack_); 2565 return; 2566 } 2567 2568 if (bug_fix_) { 2569 /* 2570 * The line below, for "bug_fix_" true, avoids 2571 * problems with multiple fast retransmits in one 2572 * window of data. 2573 */ 2574 return; 2575 } 2576 2577 full_reno_action: 2578 slowdown(CLOSE_SSTHRESH_HALF|CLOSE_CWND_HALF); 2579 cancel_rtx_timer(); 2580 rtt_active_ = FALSE; 2581 recover_ = maxseq_; 2582 (void)fast_retransmit(highest_ack_); 2583 // we measure cwnd in packets, 2584 // so don't scale by maxseg_ 2585 // as real TCP does 2586 cwnd_ = double(ssthresh_) + double(dupacks_); 2587 return; 2588 } 2589 2590 void 2591 FullTcpAgent::timeout_action() 2592 { 2593 recover_ = maxseq_; 2594 2595 if (cwnd_ < 1.0) { 2596 if (debug_) { 2597 fprintf(stderr, "%f: FullTcpAgent(%s):: resetting cwnd from %f to 1 ", 2598 now(), name(), double(cwnd_)); 2599 } 2600 cwnd_ = 1.0; 2601 } 2602 2603 if (last_cwnd_action_ == CWND_ACTION_ECN) { 2604 slowdown(CLOSE_CWND_ONE); 2605 } else { 2606 slowdown(CLOSE_SSTHRESH_HALF|CLOSE_CWND_RESTART); 2607 last_cwnd_action_ = CWND_ACTION_TIMEOUT; 2608 } 2609 reset_rtx_timer(1); 2610 t_seqno_ = (highest_ack_ < 0) ? iss_ : int(highest_ack_); 2611 fastrecov_ = FALSE; 2612 dupacks_ = 0; 2613 } 2614 /* 2615 * deal with timers going off. 2616 * 2 types for now: 2617 * retransmission timer (rtx_timer_) 2618 * delayed ack timer (delack_timer_) 2619 * delayed send (randomization) timer (delsnd_timer_) 2620 * 2621 * real TCP initializes the RTO as 6 sec 2622 * (A + 2D, where A=0, D=3), [Stevens p. 305] 2623 * and thereafter uses 2624 * (A + 4D, where A and D are dynamic estimates) 2625 * 2626 * note that in the simulator t_srtt_, t_rttvar_ and t_rtt_ 2627 * are all measured in 'tcp_tick_'-second units 2628 */ 2629 2630 void 2631 FullTcpAgent::timeout(int tno) 2632 { 2633 2634 /* 2635 * Due to F. Hernandez-Campos' fix in recv(), we may send an ACK 2636 * while in the CLOSED state. -M. Weigle 7/24/01 2637 */ 2638 if (state_ == TCPS_LISTEN) { 2639 // shouldn't be getting timeouts here 2640 if (debug_) { 2641 fprintf(stderr, "%f: FullTcpAgent(%s): unexpected timeout %d in state %s ", 2642 now(), name(), tno, statestr(state_)); 2643 } 2644 return; 2645 } 2646 2647 switch (tno) { 2648 2649 case TCP_TIMER_RTX: 2650 /* retransmit timer */ 2651 ++nrexmit_; 2652 timeout_action(); 2653 /* fall thru */ 2654 case TCP_TIMER_DELSND: 2655 /* for phase effects */ 2656 send_much(1, PF_TIMEOUT, maxburst_); 2657 break; 2658 2659 case TCP_TIMER_DELACK: 2660 if (flags_ & TF_DELACK) { 2661 flags_ &= ~TF_DELACK; 2662 flags_ |= TF_ACKNOW; 2663 send_much(1, REASON_NORMAL, 0); 2664 } 2665 delack_timer_.resched(delack_interval_); 2666 break; 2667 default: 2668 fprintf(stderr, "%f: FullTcpAgent(%s) Unknown Timeout type %d ", 2669 now(), name(), tno); 2670 } 2671 return; 2672 } 2673 2674 void 2675 FullTcpAgent::dooptions(Packet* pkt) 2676 { 2677 // interesting options: timestamps (here), 2678 // CC, CCNEW, CCECHO (future work perhaps?) 2679 2680 hdr_flags *fh = hdr_flags::access(pkt); 2681 hdr_tcp *tcph = hdr_tcp::access(pkt); 2682 2683 if (ts_option_ && !fh->no_ts_) { 2684 if (tcph->ts() < 0.0) { 2685 fprintf(stderr, 2686 "%f: FullTcpAgent(%s) warning: ts_option enabled in this TCP, but appears to be disabled in peer ", 2687 now(), name()); 2688 } else if (tcph->flags() & TH_SYN) { 2689 flags_ |= TF_RCVD_TSTMP; 2690 recent_ = tcph->ts(); 2691 recent_age_ = now(); 2692 } 2693 } 2694 2695 return; 2696 } 2697 2698 // 2699 // this shouldn't ever happen 2700 // 2701 void 2702 FullTcpAgent::process_sack(hdr_tcp*) 2703 { 2704 fprintf(stderr, "%f: FullTcpAgent(%s) Non-SACK capable FullTcpAgent received a SACK ", 2705 now(), name()); 2706 return; 2707 } 2708 2709 2710 /* 2711 * ****** Tahoe ****** 2712 * 2713 * for TCP Tahoe, we force a slow-start as the dup ack 2714 * action. Also, no window inflation due to multiple dup 2715 * acks. The latter is arranged by setting reno_fastrecov_ 2716 * false [which is performed by the Tcl init function for Tahoe in 2717 * ns-default.tcl]. 2718 */ 2719 2720 /* 2721 * Tahoe 2722 * Dupack-action: what to do on a DUP ACK. After the initial check 2723 * of 'recover' below, this function implements the following truth 2724 * table: 2725 * 2726 * bugfix ecn last-cwnd == ecn action 2727 * 2728 * 0 0 0 full_tahoe_action 2729 * 0 0 1 full_tahoe_action [impossible] 2730 * 0 1 0 full_tahoe_action 2731 * 0 1 1 1/2 window, return 2732 * 1 0 0 nothing 2733 * 1 0 1 nothing [impossible] 2734 * 1 1 0 nothing 2735 * 1 1 1 1/2 window, return 2736 */ 2737 2738 void 2739 TahoeFullTcpAgent::dupack_action() 2740 { 2741 int recovered = (highest_ack_ > recover_); 2742 2743 fastrecov_ = TRUE; 2744 rtxbytes_ = 0; 2745 2746 if (recovered || (!bug_fix_ && !ecn_) || highest_ack_ == 0) { 2747 goto full_tahoe_action; 2748 } 2749 2750 if (ecn_ && last_cwnd_action_ == CWND_ACTION_ECN) { 2751 // slow start on ECN 2752 last_cwnd_action_ = CWND_ACTION_DUPACK; 2753 slowdown(CLOSE_CWND_ONE); 2754 set_rtx_timer(); 2755 rtt_active_ = FALSE; 2756 t_seqno_ = highest_ack_; 2757 return; 2758 } 2759 2760 if (bug_fix_) { 2761 /* 2762 * The line below, for "bug_fix_" true, avoids 2763 * problems with multiple fast retransmits in one 2764 * window of data. 2765 */ 2766 return; 2767 } 2768 2769 full_tahoe_action: 2770 // slow-start and reset ssthresh 2771 trace_event("FAST_RETX"); 2772 recover_ = maxseq_; 2773 last_cwnd_action_ = CWND_ACTION_DUPACK; 2774 slowdown(CLOSE_SSTHRESH_HALF|CLOSE_CWND_ONE); // cwnd->1 2775 set_rtx_timer(); 2776 rtt_active_ = FALSE; 2777 t_seqno_ = highest_ack_; 2778 send_much(0, REASON_NORMAL, 0); 2779 return; 2780 } 2781 2782 /* 2783 * ****** Newreno ****** 2784 * 2785 * for NewReno, a partial ACK does not exit fast recovery, 2786 * and does not reset the dup ACK counter (which might trigger fast 2787 * retransmits we don't want). In addition, the number of packets 2788 * sent in response to an ACK is limited to recov_maxburst_ during 2789 * recovery periods. 2790 */ 2791 2792 NewRenoFullTcpAgent::NewRenoFullTcpAgent() : save_maxburst_(-1) 2793 { 2794 bind("recov_maxburst_", &recov_maxburst_); 2795 } 2796 2797 void 2798 NewRenoFullTcpAgent::pack_action(Packet*) 2799 { 2800 (void)fast_retransmit(highest_ack_); 2801 cwnd_ = double(ssthresh_); 2802 if (save_maxburst_ < 0) { 2803 save_maxburst_ = maxburst_; 2804 maxburst_ = recov_maxburst_; 2805 } 2806 return; 2807 } 2808 2809 void 2810 NewRenoFullTcpAgent::ack_action(Packet* p) 2811 { 2812 if (save_maxburst_ >= 0) { 2813 maxburst_ = save_maxburst_; 2814 save_maxburst_ = -1; 2815 } 2816 FullTcpAgent::ack_action(p); 2817 return; 2818 } 2819 2820 /* 2821 * 2822 * ****** SACK ****** 2823 * 2824 * for Sack, receiver part must report SACK data 2825 * sender part maintains a 'scoreboard' (sq_) that 2826 * records what it hears from receiver 2827 * sender fills holes during recovery and obeys 2828 * "pipe" style control until recovery is complete 2829 */ 2830 2831 void 2832 SackFullTcpAgent::reset() 2833 { 2834 sq_.clear(); // no SACK blocks 2835 /* Fixed typo. -M. Weigle 6/17/02 */ 2836 sack_min_ = h_seqno_ = -1; // no left edge of SACK blocks 2837 FullTcpAgent::reset(); 2838 } 2839 2840 2841 int 2842 SackFullTcpAgent::hdrsize(int nsackblocks) 2843 { 2844 int total = FullTcpAgent::headersize(); 2845 // use base header size plus SACK option size 2846 if (nsackblocks > 0) { 2847 total += ((nsackblocks * sack_block_size_) 2848 + sack_option_size_); 2849 } 2850 return (total); 2851 } 2852 2853 void 2854 SackFullTcpAgent::dupack_action() 2855 { 2856 2857 int recovered = (highest_ack_ > recover_); 2858 2859 fastrecov_ = TRUE; 2860 rtxbytes_ = 0; 2861 pipe_ = maxseq_ - highest_ack_ - sq_.total(); 2862 2863 //printf("%f: SACK DUPACK-ACTION:pipe_:%d, sq-total:%d, bugfix:%d, cwnd:%d, highest_ack:%d, recover_:%d ", 2864 //now(), pipe_, sq_.total(), bug_fix_, int(cwnd_), int(highest_ack_), recover_); 2865 2866 if (recovered || (!bug_fix_ && !ecn_)) { 2867 goto full_sack_action; 2868 } 2869 2870 if (ecn_ && last_cwnd_action_ == CWND_ACTION_ECN) { 2871 /* 2872 * Received ECN notification and 3 DUPACKs in same 2873 * window. Don't cut cwnd again, but retransmit lost 2874 * packet. -M. Weigle 6/19/02 2875 */ 2876 last_cwnd_action_ = CWND_ACTION_DUPACK; 2877 cancel_rtx_timer(); 2878 rtt_active_ = FALSE; 2879 int amt = fast_retransmit(highest_ack_); 2880 pipectrl_ = TRUE; 2881 h_seqno_ = highest_ack_ + amt; 2882 send_much(0, REASON_DUPACK, maxburst_); 2883 return; 2884 } 2885 2886 if (bug_fix_) { 2887 /* 2888 * The line below, for "bug_fix_" true, avoids 2889 * problems with multiple fast retransmits in one 2890 * window of data. 2891 */ 2892 2893 //printf("%f: SACK DUPACK-ACTION BUGFIX RETURN:pipe_:%d, sq-total:%d, bugfix:%d, cwnd:%d ", 2894 //now(), pipe_, sq_.total(), bug_fix_, int(cwnd_)); 2895 return; 2896 } 2897 2898 full_sack_action: 2899 trace_event("FAST_RECOVERY"); 2900 slowdown(CLOSE_SSTHRESH_HALF|CLOSE_CWND_HALF); 2901 cancel_rtx_timer(); 2902 rtt_active_ = FALSE; 2903 2904 // these initiate SACK-style "pipe" recovery 2905 pipectrl_ = TRUE; 2906 recover_ = maxseq_; // where I am when recovery starts 2907 2908 int amt = fast_retransmit(highest_ack_); 2909 h_seqno_ = highest_ack_ + amt; 2910 2911 //printf("%f: FAST-RTX seq:%d, h_seqno_ is now:%d, pipe:%d, cwnd:%d, recover:%d ", 2912 //now(), int(highest_ack_), h_seqno_, pipe_, int(cwnd_), recover_); 2913 2914 send_much(0, REASON_DUPACK, maxburst_); 2915 2916 return; 2917 } 2918 2919 void 2920 SackFullTcpAgent::pack_action(Packet*) 2921 { 2922 if (!sq_.empty() && sack_min_ < highest_ack_) { 2923 sack_min_ = highest_ack_; 2924 sq_.cleartonxt(); 2925 } 2926 pipe_ -= maxseg_; // see comment in tcp-sack1.cc 2927 if (h_seqno_ < highest_ack_) 2928 h_seqno_ = highest_ack_; 2929 } 2930 2931 void 2932 SackFullTcpAgent::ack_action(Packet*) 2933 { 2934 //printf("%f: EXITING fast recovery, recover:%d ", 2935 //now(), recover_); 2936 fastrecov_ = pipectrl_ = FALSE; 2937 if (!sq_.empty() && sack_min_ < highest_ack_) { 2938 sack_min_ = highest_ack_; 2939 sq_.cleartonxt(); 2940 } 2941 dupacks_ = 0; 2942 2943 /* 2944 * Update h_seqno_ on new ACK (same as for partial ACKS) 2945 * -M. Weigle 6/3/05 2946 */ 2947 if (h_seqno_ < highest_ack_) 2948 h_seqno_ = highest_ack_; 2949 } 2950 2951 // 2952 // receiver side: if there are things in the reassembly queue, 2953 // build the appropriate SACK blocks to carry in the SACK 2954 // 2955 int 2956 SackFullTcpAgent::build_options(hdr_tcp* tcph) 2957 { 2958 int total = FullTcpAgent::build_options(tcph); 2959 2960 if (!rq_.empty()) { 2961 int nblk = rq_.gensack(&tcph->sa_left(0), max_sack_blocks_); 2962 tcph->sa_length() = nblk; 2963 total += (nblk * sack_block_size_) + sack_option_size_; 2964 } else { 2965 tcph->sa_length() = 0; 2966 } 2967 return (total); 2968 } 2969 2970 void 2971 SackFullTcpAgent::timeout_action() 2972 { 2973 FullTcpAgent::timeout_action(); 2974 2975 // 2976 // original SACK spec says the sender is 2977 // supposed to clear out its knowledge of what 2978 // the receiver has in the case of a timeout 2979 // (on the chance the receiver has renig'd). 2980 // Here, this happens when clear_on_timeout_ is 2981 // enabled. 2982 // 2983 2984 if (clear_on_timeout_) { 2985 sq_.clear(); 2986 sack_min_ = highest_ack_; 2987 } 2988 2989 return; 2990 } 2991 2992 void 2993 SackFullTcpAgent::process_sack(hdr_tcp* tcph) 2994 { 2995 // 2996 // Figure out how many sack blocks are 2997 // in the pkt. Insert each block range 2998 // into the scoreboard 2999 // 3000 3001 if (max_sack_blocks_ <= 0) { 3002 fprintf(stderr, 3003 "%f: FullTcpAgent(%s) warning: received SACK block but I am not SACK enabled ", 3004 now(), name()); 3005 return; 3006 } 3007 3008 int slen = tcph->sa_length(), i; 3009 for (i = 0; i < slen; ++i) { 3010 /* Added check for FIN -M. Weigle 5/21/02 */ 3011 if (((tcph->flags() & TH_FIN) == 0) && 3012 tcph->sa_left(i) >= tcph->sa_right(i)) { 3013 fprintf(stderr, 3014 "%f: FullTcpAgent(%s) warning: received illegal SACK block [%d,%d] ", 3015 now(), name(), tcph->sa_left(i), tcph->sa_right(i)); 3016 continue; 3017 } 3018 sq_.add(tcph->sa_left(i), tcph->sa_right(i), 0); 3019 } 3020 3021 return; 3022 } 3023 3024 int 3025 SackFullTcpAgent::send_allowed(int seq) 3026 { 3027 // not in pipe control, so use regular control 3028 if (!pipectrl_) 3029 return (FullTcpAgent::send_allowed(seq)); 3030 3031 // don't overshoot receiver's advertised window 3032 int topawin = highest_ack_ + int(wnd_) * maxseg_; 3033 if (seq >= topawin) { 3034 //printf("%f: SEND(%d) NOT ALLOWED DUE TO AWIN:%d, pipe:%d, cwnd:%d ", 3035 //now(), seq, topawin, pipe_, int(cwnd_)); 3036 return FALSE; 3037 } 3038 3039 /* 3040 * If not in ESTABLISHED, don't send anything we don't have 3041 * -M. Weigle 7/18/02 3042 */ 3043 if (state_ != TCPS_ESTABLISHED && seq > curseq_) 3044 return FALSE; 3045 3046 // don't overshoot cwnd_ 3047 int cwin = int(cwnd_) * maxseg_; 3048 return (pipe_ < cwin); 3049 } 3050 3051 3052 // 3053 // Calculate the next seq# to send by send_much. If we are recovering and 3054 // we have learned about data cached at the receiver via a SACK, 3055 // we may want something other than new data (t_seqno) 3056 // 3057 3058 int 3059 SackFullTcpAgent::nxt_tseq() 3060 { 3061 3062 int in_recovery = (highest_ack_ < recover_); 3063 int seq = h_seqno_; 3064 3065 if (!in_recovery) { 3066 //if (int(t_seqno_) > 1) 3067 //printf("%f: non-recovery nxt_tseq called w/t_seqno:%d ", 3068 //now(), int(t_seqno_)); 3069 //sq_.dumplist(); 3070 return (t_seqno_); 3071 } 3072 3073 int fcnt; // following count-- the 3074 // count field in the block 3075 // after the seq# we are about 3076 // to send 3077 int fbytes; // fcnt in bytes 3078 3079 //if (int(t_seqno_) > 1) 3080 //printf("%f: recovery nxt_tseq called w/t_seqno:%d, seq:%d, mode:%d ", 3081 //now(), int(t_seqno_), seq, sack_rtx_threshmode_); 3082 //sq_.dumplist(); 3083 3084 while ((seq = sq_.nexthole(seq, fcnt, fbytes)) > 0) { 3085 // if we have a following block 3086 // with a large enough count 3087 // we should use the seq# we get 3088 // from nexthole() 3089 if (sack_rtx_threshmode_ == 0 || 3090 (sack_rtx_threshmode_ == 1 && fcnt >= sack_rtx_cthresh_) || 3091 (sack_rtx_threshmode_ == 2 && fbytes >= sack_rtx_bthresh_) || 3092 (sack_rtx_threshmode_ == 3 && (fcnt >= sack_rtx_cthresh_ || fbytes >= sack_rtx_bthresh_)) || 3093 (sack_rtx_threshmode_ == 4 && (fcnt >= sack_rtx_cthresh_ && fbytes >= sack_rtx_bthresh_))) { 3094 3095 //if (int(t_seqno_) > 1) 3096 //printf("%f: nxt_tseq<hole> returning %d ", 3097 //now(), int(seq)); 3098 // adjust h_seqno, as we may have 3099 // been "jumped ahead" by learning 3100 // about a filled hole 3101 if (seq > h_seqno_) 3102 h_seqno_ = seq; 3103 return (seq); 3104 } else if (fcnt <= 0) 3105 break; 3106 else { 3107 seq += maxseg_; 3108 } 3109 } 3110 //if (int(t_seqno_) > 1) 3111 //printf("%f: nxt_tseq<top> returning %d ", 3112 //now(), int(t_seqno_)); 3113 return (t_seqno_); 3114 }