From 2900f1f9952cd901156de62e66c74743da97c8bc Mon Sep 17 00:00:00 2001
From: Joseph Henry <josephjah@gmail.com>
Date: Mon, 9 Oct 2017 17:56:40 -0700
Subject: [PATCH 01/11] Windows Support

---
 Makefile                                      |  19 +-
 examples/ztproxy/ztproxy.cpp                  |  31 +-
 examples/ztproxy/ztproxy.hpp                  |   5 +-
 .../ports/unix/include}/cc.h                  |   0
 .../ports/unix/include}/perf.h                |   0
 .../ports/unix/include}/sys_arch.h            |   0
 .../ports/win32/include/arch/cc.h             |   2 +-
 .../include/{lwipopts.h => lwipopts.h.bak}    |   0
 .../ports/win32/{pcapif.c => pcapif.c.bak}    |   0
 .../ports/win32/{pcapif.h => pcapif.h.bak}    |   0
 .../{pcapif_helper.c => pcapif_helper.c.bak}  |   0
 .../ports/win32/{sio.c => sio.c.bak}          |   0
 .../ports/win32/{test.c => test.c.bak}        |   0
 ext/lwip/src/api/err.c                        |   2 +-
 ext/lwip/src/api/sockets.c                    |  13 +-
 .../src/include/lwip/{errno.h => errno.h.bak} | 386 +++++++++---------
 include/Debug.hpp                             |  10 +-
 include/Defs.h                                |   2 +
 include/Utilities.h                           |  13 +-
 include/libzt.h                               |   7 +-
 include/lwIP.hpp                              |   6 +-
 make-liblwip.mk                               |  37 +-
 src/Platform.cpp                              |   3 +-
 src/Utilities.cpp                             | 167 ++++++++
 src/VirtualSocket.cpp                         |   3 +
 src/VirtualTap.cpp                            |   4 +-
 src/libzt.cpp                                 |   6 +-
 src/lwIP.cpp                                  |  11 +-
 28 files changed, 486 insertions(+), 241 deletions(-)
 rename ext/{lwip/src/include/arch => lwip-contrib/ports/unix/include}/cc.h (100%)
 rename ext/{lwip/src/include/arch => lwip-contrib/ports/unix/include}/perf.h (100%)
 rename ext/{lwip/src/include/arch => lwip-contrib/ports/unix/include}/sys_arch.h (100%)
 rename ext/lwip-contrib/ports/win32/include/{lwipopts.h => lwipopts.h.bak} (100%)
 rename ext/lwip-contrib/ports/win32/{pcapif.c => pcapif.c.bak} (100%)
 rename ext/lwip-contrib/ports/win32/{pcapif.h => pcapif.h.bak} (100%)
 rename ext/lwip-contrib/ports/win32/{pcapif_helper.c => pcapif_helper.c.bak} (100%)
 rename ext/lwip-contrib/ports/win32/{sio.c => sio.c.bak} (100%)
 rename ext/lwip-contrib/ports/win32/{test.c => test.c.bak} (100%)
 rename ext/lwip/src/include/lwip/{errno.h => errno.h.bak} (97%)
 mode change 100755 => 100644

diff --git a/Makefile b/Makefile
index eec7e33..75f074d 100644
--- a/Makefile
+++ b/Makefile
@@ -36,6 +36,18 @@ endif
 OSTYPE=$(shell uname -s | tr '[A-Z]' '[a-z]')
 BUILD=build/$(OSTYPE)
 
+# Windows
+ifeq ($(OSTYPE),mingw32_nt-6.2)
+ARTOOL=ar
+ARFLAGS=rcs
+CC=gcc
+CXX=g++
+CXXFLAGS+=-Wno-unknown-pragmas -Wno-pointer-arith -Wno-deprecated-declarations -Wno-conversion-null
+WINDEFS=-lws2_32 -lshlwapi -liphlpapi -static -static-libgcc -static-libstdc++
+CONTRIBDIR=ext/lwip-contrib
+LWIPARCH=$(CONTRIBDIR)/ports/win32
+LWIPARCHINCLUDE=$(LWIPARCH)/include
+endif
 # Darwin
 ifeq ($(OSTYPE),darwin)
 ARTOOL=libtool
@@ -147,7 +159,7 @@ ifeq ($(SDK_JNI), 1)
 	LIBZT_DEFS+=-DSDK_JNI
 endif
 
-CXXFLAGS=$(CFLAGS) -Wno-format -fno-rtti -std=c++11
+CXXFLAGS+=$(CFLAGS) -Wno-format -fno-rtti -std=c++11
 ZT_DEFS+=-DZT_SDK -DZT_SOFTWARE_UPDATE_DEFAULT="\"disable\""
 LIBZT_FILES:=src/VirtualTap.cpp src/libzt.cpp src/Utilities.cpp
 STATIC_LIB=$(BUILD)/libzt.a
@@ -211,9 +223,8 @@ endif
 STACK_DRIVER_DEFS+=-DLWIP_DONT_PROVIDE_BYTEORDER_FUNCTIONS
 STACK_DRIVER_DEFS+=-DSTACK_LWIP
 STACK_DRIVER_FILES:=src/lwIP.cpp
-LWIPARCH=$(CONTRIBDIR)/ports/unix
 LWIPDIR=ext/lwip/src
-STACK_INCLUDES+=-Iext/lwip/src/include/lwip \
+STACK_INCLUDES+=$(LWIPARCHINCLUDE) -Iext/lwip/src/include/lwip \
 	-I$(LWIPDIR)/include \
 	-I$(LWIPARCH)/include \
 	-I$(LWIPDIR)/include/ipv4 \
@@ -349,7 +360,7 @@ nativetest:
 	@date +"Build script finished on %F %T"
 ztproxy:
 	$(CXX) $(CXXFLAGS) $(SANFLAGS) $(LIBZT_INCLUDES) $(LIBZT_DEFS) $(ZT_INCLUDES) \
-		examples/ztproxy/ztproxy.cpp -o $(BUILD)/ztproxy $< -L$(BUILD) -lzt
+		examples/ztproxy/ztproxy.cpp -o $(BUILD)/ztproxy $< -L$(BUILD) -lzt $(WINDEFS)
 	@./check.sh $(BUILD)/ztproxy
 	@date +"Build script finished on %F %T"
 intercept:
diff --git a/examples/ztproxy/ztproxy.cpp b/examples/ztproxy/ztproxy.cpp
index 2556f04..d5a913c 100644
--- a/examples/ztproxy/ztproxy.cpp
+++ b/examples/ztproxy/ztproxy.cpp
@@ -26,12 +26,17 @@
 
 #include <unistd.h>
 #include <string.h>
-#include <netdb.h>
+
+#if defined(__linux__) || defined(__APPLE__)
+ #include <netdb.h>
+#endif
+
+//#include "Winsock2.h"
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string>
 #include <fcntl.h>
-#include <regex.h>
 
 #include <vector>
 #include <algorithm>
@@ -39,6 +44,7 @@
 
 #include "RingBuffer.hpp"
 #include "ztproxy.hpp"
+#include "Utilities.h"
 #include "libzt.h"
 
 namespace ZeroTier {
@@ -69,7 +75,11 @@ namespace ZeroTier {
 		in4.sin_addr.s_addr = Utils::hton((uint32_t)(0x7f000001)); // listen for TCP @127.0.0.1
 		in4.sin_port = Utils::hton((uint16_t)proxy_listen_port);
 		_tcpListenSocket = _phy.tcpListen((const struct sockaddr *)&in4,this);
+		if (!_tcpListenSocket) {
+			DEBUG_ERROR("Error binding on port %d for IPv4 HTTP listen socket", proxy_listen_port);
+		}
 		// IPv6
+		/*
 		struct sockaddr_in6 in6;
 		memset((void *)&in6,0,sizeof(in6));
 		in6.sin6_family = AF_INET6;
@@ -77,13 +87,12 @@ namespace ZeroTier {
 		in6.sin6_addr.s6_addr[15] = 1; // IPv6 localhost == ::1
 		in6.sin6_port = Utils::hton((uint16_t)proxy_listen_port);
 		_tcpListenSocket6 = _phy.tcpListen((const struct sockaddr *)&in6,this);
-
-		if (!_tcpListenSocket) {
-			DEBUG_ERROR("Error binding on port %d for IPv4 HTTP listen socket", proxy_listen_port);
-		}
+		*/
+		/*
 		if (!_tcpListenSocket6) {
 			DEBUG_ERROR("Error binding on port %d for IPv6 HTTP listen socket", proxy_listen_port);
 		}
+		*/
 		_thread = Thread::start(this);
 	} 
 
@@ -117,7 +126,7 @@ namespace ZeroTier {
   		// Moves data between client application socket and libzt VirtualSocket 
 		while(_run) {
 
-			_phy.poll(5);
+			_phy.poll(1);
 
 			conn_m.lock();
 			// build fd_sets to select upon
@@ -212,7 +221,6 @@ namespace ZeroTier {
 			exit(0);
 		}
 		if (conn->zfd < 0) { // no connection yet
-			DEBUG_INFO("no connection yet, will establish...");
 			if (host == "") {
 				DEBUG_ERROR("invalid hostname or address (empty)");
 				return;
@@ -256,6 +264,7 @@ namespace ZeroTier {
 			}
 			if (ipv == 6) {
 				//DEBUG_INFO("attempting to proxy [0.0.0.0:%d -> %s:%d]", _proxy_listen_port, host.c_str(), dest_port);
+				/*
 				struct sockaddr_in6 in6;
 				memset(&in6,0,sizeof(in6));
 				in6.sin6_family = AF_INET;
@@ -265,6 +274,7 @@ namespace ZeroTier {
 				in6.sin6_port = Utils::hton(dest_port);			
 				zfd = zts_socket(AF_INET, SOCK_STREAM, 0);
 				err = zts_connect(zfd, (const struct sockaddr *)&in6, sizeof(in6));
+				*/
 			}
 			if (zfd < 0 || err < 0) {
 				// now release TX buffer contents we previously saved, since we can't connect
@@ -287,9 +297,6 @@ namespace ZeroTier {
 			zmap[zfd] = conn;
 			conn_m.unlock();			
 		}
-		else {
-			DEBUG_INFO("connection already established, reusing...");
-		}
 		// Write data coming from client TCP connection to its TX buffer, later emptied into libzt by threadMain I/O loop
 		conn->tx_m.lock();
 		if ((wr = conn->TXbuf->write((const unsigned char *)data, len)) < 0) {
@@ -371,7 +378,7 @@ int main(int argc, char **argv)
 	std::string nwid          = argv[3];
 	std::string internal_addr = argv[4];
 	int internal_port         = atoi(argv[5]);
-	std::string dns_nameserver= argv[6];
+	std::string dns_nameserver= "";//argv[6];
 
 	ZeroTier::ZTProxy *proxy = new ZeroTier::ZTProxy(proxy_listen_port, nwid, path, internal_addr, internal_port, dns_nameserver);
 	
diff --git a/examples/ztproxy/ztproxy.hpp b/examples/ztproxy/ztproxy.hpp
index 304ec53..91f49ab 100644
--- a/examples/ztproxy/ztproxy.hpp
+++ b/examples/ztproxy/ztproxy.hpp
@@ -34,10 +34,13 @@
 #include "Phy.hpp"
 #include "OSUtils.hpp"
 
+#if defined(__linux__) || defined(__APPLE__)
+ #include <sys/select.h>
+#endif
+
 #include <queue>
 #include <vector>
 #include <stdio.h>
-#include <sys/select.h>
 
 #define BUF_SZ 1024*1024
 
diff --git a/ext/lwip/src/include/arch/cc.h b/ext/lwip-contrib/ports/unix/include/cc.h
similarity index 100%
rename from ext/lwip/src/include/arch/cc.h
rename to ext/lwip-contrib/ports/unix/include/cc.h
diff --git a/ext/lwip/src/include/arch/perf.h b/ext/lwip-contrib/ports/unix/include/perf.h
similarity index 100%
rename from ext/lwip/src/include/arch/perf.h
rename to ext/lwip-contrib/ports/unix/include/perf.h
diff --git a/ext/lwip/src/include/arch/sys_arch.h b/ext/lwip-contrib/ports/unix/include/sys_arch.h
similarity index 100%
rename from ext/lwip/src/include/arch/sys_arch.h
rename to ext/lwip-contrib/ports/unix/include/sys_arch.h
diff --git a/ext/lwip-contrib/ports/win32/include/arch/cc.h b/ext/lwip-contrib/ports/win32/include/arch/cc.h
index a1a2a70..56e6ebc 100644
--- a/ext/lwip-contrib/ports/win32/include/arch/cc.h
+++ b/ext/lwip-contrib/ports/win32/include/arch/cc.h
@@ -39,7 +39,7 @@
 #pragma warning (disable: 4820) /* 'x' bytes padding added after data member 'y' */
 #endif
 
-#define LWIP_PROVIDE_ERRNO
+#define LWIP_PROVIDE_ERRNO 0
 
 /* Define platform endianness (might already be defined) */
 #ifndef BYTE_ORDER
diff --git a/ext/lwip-contrib/ports/win32/include/lwipopts.h b/ext/lwip-contrib/ports/win32/include/lwipopts.h.bak
similarity index 100%
rename from ext/lwip-contrib/ports/win32/include/lwipopts.h
rename to ext/lwip-contrib/ports/win32/include/lwipopts.h.bak
diff --git a/ext/lwip-contrib/ports/win32/pcapif.c b/ext/lwip-contrib/ports/win32/pcapif.c.bak
similarity index 100%
rename from ext/lwip-contrib/ports/win32/pcapif.c
rename to ext/lwip-contrib/ports/win32/pcapif.c.bak
diff --git a/ext/lwip-contrib/ports/win32/pcapif.h b/ext/lwip-contrib/ports/win32/pcapif.h.bak
similarity index 100%
rename from ext/lwip-contrib/ports/win32/pcapif.h
rename to ext/lwip-contrib/ports/win32/pcapif.h.bak
diff --git a/ext/lwip-contrib/ports/win32/pcapif_helper.c b/ext/lwip-contrib/ports/win32/pcapif_helper.c.bak
similarity index 100%
rename from ext/lwip-contrib/ports/win32/pcapif_helper.c
rename to ext/lwip-contrib/ports/win32/pcapif_helper.c.bak
diff --git a/ext/lwip-contrib/ports/win32/sio.c b/ext/lwip-contrib/ports/win32/sio.c.bak
similarity index 100%
rename from ext/lwip-contrib/ports/win32/sio.c
rename to ext/lwip-contrib/ports/win32/sio.c.bak
diff --git a/ext/lwip-contrib/ports/win32/test.c b/ext/lwip-contrib/ports/win32/test.c.bak
similarity index 100%
rename from ext/lwip-contrib/ports/win32/test.c
rename to ext/lwip-contrib/ports/win32/test.c.bak
diff --git a/ext/lwip/src/api/err.c b/ext/lwip/src/api/err.c
index 6e9ab76..5c795a4 100755
--- a/ext/lwip/src/api/err.c
+++ b/ext/lwip/src/api/err.c
@@ -40,7 +40,7 @@
 #include "lwip/def.h"
 #include "lwip/sys.h"
 
-#include "lwip/errno.h"
+#include "errno.h"
 
 #if !NO_SYS
 /** Table to quickly map an lwIP error (err_t) to a socket error
diff --git a/ext/lwip/src/api/sockets.c b/ext/lwip/src/api/sockets.c
index d72724f..387e035 100755
--- a/ext/lwip/src/api/sockets.c
+++ b/ext/lwip/src/api/sockets.c
@@ -658,10 +658,19 @@ lwip_connect(int s, const struct sockaddr *name, socklen_t namelen)
   }
 
   LWIP_UNUSED_ARG(namelen);
-  if (name->sa_family == AF_UNSPEC) {
+
+  if ( 
+#ifdef __MINGW32__
+        false
+#else
+    name->sa_family == AF_UNSPEC 
+#endif
+    ) 
+  {
     LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_connect(%d, AF_UNSPEC)\n", s));
     err = netconn_disconnect(sock->conn);
-  } else {
+  } 
+  else {
     ip_addr_t remote_addr;
     u16_t remote_port;
 
diff --git a/ext/lwip/src/include/lwip/errno.h b/ext/lwip/src/include/lwip/errno.h.bak
old mode 100755
new mode 100644
similarity index 97%
rename from ext/lwip/src/include/lwip/errno.h
rename to ext/lwip/src/include/lwip/errno.h.bak
index 47a4ff2..1c770df
--- a/ext/lwip/src/include/lwip/errno.h
+++ b/ext/lwip/src/include/lwip/errno.h.bak
@@ -1,193 +1,193 @@
-/**
- * @file
- * Posix Errno defines
- */
-
-/*
- * Copyright (c) 2001-2004 Swedish Institute of Computer Science.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- *    this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- *    derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
- * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
- * OF SUCH DAMAGE.
- *
- * This file is part of the lwIP TCP/IP stack.
- *
- * Author: Adam Dunkels <adam@sics.se>
- *
- */
-#ifndef LWIP_HDR_ERRNO_H
-#define LWIP_HDR_ERRNO_H
-
-#include "lwip/opt.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef LWIP_PROVIDE_ERRNO
-
-#define  EPERM            1  /* Operation not permitted */
-#define  ENOENT           2  /* No such file or directory */
-#define  ESRCH            3  /* No such process */
-#define  EINTR            4  /* Interrupted system call */
-#define  EIO              5  /* I/O error */
-#define  ENXIO            6  /* No such device or address */
-#define  E2BIG            7  /* Arg list too long */
-#define  ENOEXEC          8  /* Exec format error */
-#define  EBADF            9  /* Bad file number */
-#define  ECHILD          10  /* No child processes */
-#define  EAGAIN          11  /* Try again */
-#define  ENOMEM          12  /* Out of memory */
-#define  EACCES          13  /* Permission denied */
-#define  EFAULT          14  /* Bad address */
-#define  ENOTBLK         15  /* Block device required */
-#define  EBUSY           16  /* Device or resource busy */
-#define  EEXIST          17  /* File exists */
-#define  EXDEV           18  /* Cross-device link */
-#define  ENODEV          19  /* No such device */
-#define  ENOTDIR         20  /* Not a directory */
-#define  EISDIR          21  /* Is a directory */
-#define  EINVAL          22  /* Invalid argument */
-#define  ENFILE          23  /* File table overflow */
-#define  EMFILE          24  /* Too many open files */
-#define  ENOTTY          25  /* Not a typewriter */
-#define  ETXTBSY         26  /* Text file busy */
-#define  EFBIG           27  /* File too large */
-#define  ENOSPC          28  /* No space left on device */
-#define  ESPIPE          29  /* Illegal seek */
-#define  EROFS           30  /* Read-only file system */
-#define  EMLINK          31  /* Too many links */
-#define  EPIPE           32  /* Broken pipe */
-#define  EDOM            33  /* Math argument out of domain of func */
-#define  ERANGE          34  /* Math result not representable */
-#define  EDEADLK         35  /* Resource deadlock would occur */
-#define  ENAMETOOLONG    36  /* File name too long */
-#define  ENOLCK          37  /* No record locks available */
-#define  ENOSYS          38  /* Function not implemented */
-#define  ENOTEMPTY       39  /* Directory not empty */
-#define  ELOOP           40  /* Too many symbolic links encountered */
-#define  EWOULDBLOCK     EAGAIN  /* Operation would block */
-#define  ENOMSG          42  /* No message of desired type */
-#define  EIDRM           43  /* Identifier removed */
-#define  ECHRNG          44  /* Channel number out of range */
-#define  EL2NSYNC        45  /* Level 2 not synchronized */
-#define  EL3HLT          46  /* Level 3 halted */
-#define  EL3RST          47  /* Level 3 reset */
-#define  ELNRNG          48  /* Link number out of range */
-#define  EUNATCH         49  /* Protocol driver not attached */
-#define  ENOCSI          50  /* No CSI structure available */
-#define  EL2HLT          51  /* Level 2 halted */
-#define  EBADE           52  /* Invalid exchange */
-#define  EBADR           53  /* Invalid request descriptor */
-#define  EXFULL          54  /* Exchange full */
-#define  ENOANO          55  /* No anode */
-#define  EBADRQC         56  /* Invalid request code */
-#define  EBADSLT         57  /* Invalid slot */
-
-#define  EDEADLOCK       EDEADLK
-
-#define  EBFONT          59  /* Bad font file format */
-#define  ENOSTR          60  /* Device not a stream */
-#define  ENODATA         61  /* No data available */
-#define  ETIME           62  /* Timer expired */
-#define  ENOSR           63  /* Out of streams resources */
-#define  ENONET          64  /* Machine is not on the network */
-#define  ENOPKG          65  /* Package not installed */
-#define  EREMOTE         66  /* Object is remote */
-#define  ENOLINK         67  /* Link has been severed */
-#define  EADV            68  /* Advertise error */
-#define  ESRMNT          69  /* Srmount error */
-#define  ECOMM           70  /* Communication error on send */
-#define  EPROTO          71  /* Protocol error */
-#define  EMULTIHOP       72  /* Multihop attempted */
-#define  EDOTDOT         73  /* RFS specific error */
-#define  EBADMSG         74  /* Not a data message */
-#define  EOVERFLOW       75  /* Value too large for defined data type */
-#define  ENOTUNIQ        76  /* Name not unique on network */
-#define  EBADFD          77  /* File descriptor in bad state */
-#define  EREMCHG         78  /* Remote address changed */
-#define  ELIBACC         79  /* Can not access a needed shared library */
-#define  ELIBBAD         80  /* Accessing a corrupted shared library */
-#define  ELIBSCN         81  /* .lib section in a.out corrupted */
-#define  ELIBMAX         82  /* Attempting to link in too many shared libraries */
-#define  ELIBEXEC        83  /* Cannot exec a shared library directly */
-#define  EILSEQ          84  /* Illegal byte sequence */
-#define  ERESTART        85  /* Interrupted system call should be restarted */
-#define  ESTRPIPE        86  /* Streams pipe error */
-#define  EUSERS          87  /* Too many users */
-#define  ENOTSOCK        88  /* Socket operation on non-socket */
-#define  EDESTADDRREQ    89  /* Destination address required */
-#define  EMSGSIZE        90  /* Message too long */
-#define  EPROTOTYPE      91  /* Protocol wrong type for socket */
-#define  ENOPROTOOPT     92  /* Protocol not available */
-#define  EPROTONOSUPPORT 93  /* Protocol not supported */
-#define  ESOCKTNOSUPPORT 94  /* Socket type not supported */
-#define  EOPNOTSUPP      95  /* Operation not supported on transport endpoint */
-#define  EPFNOSUPPORT    96  /* Protocol family not supported */
-#define  EAFNOSUPPORT    97  /* Address family not supported by protocol */
-#define  EADDRINUSE      98  /* Address already in use */
-#define  EADDRNOTAVAIL   99  /* Cannot assign requested address */
-#define  ENETDOWN       100  /* Network is down */
-#define  ENETUNREACH    101  /* Network is unreachable */
-#define  ENETRESET      102  /* Network dropped connection because of reset */
-#define  ECONNABORTED   103  /* Software caused connection abort */
-#define  ECONNRESET     104  /* Connection reset by peer */
-#define  ENOBUFS        105  /* No buffer space available */
-#define  EISCONN        106  /* Transport endpoint is already connected */
-#define  ENOTCONN       107  /* Transport endpoint is not connected */
-#define  ESHUTDOWN      108  /* Cannot send after transport endpoint shutdown */
-#define  ETOOMANYREFS   109  /* Too many references: cannot splice */
-#define  ETIMEDOUT      110  /* Connection timed out */
-#define  ECONNREFUSED   111  /* Connection refused */
-#define  EHOSTDOWN      112  /* Host is down */
-#define  EHOSTUNREACH   113  /* No route to host */
-#define  EALREADY       114  /* Operation already in progress */
-#define  EINPROGRESS    115  /* Operation now in progress */
-#define  ESTALE         116  /* Stale NFS file handle */
-#define  EUCLEAN        117  /* Structure needs cleaning */
-#define  ENOTNAM        118  /* Not a XENIX named type file */
-#define  ENAVAIL        119  /* No XENIX semaphores available */
-#define  EISNAM         120  /* Is a named type file */
-#define  EREMOTEIO      121  /* Remote I/O error */
-#define  EDQUOT         122  /* Quota exceeded */
-
-#define  ENOMEDIUM      123  /* No medium found */
-#define  EMEDIUMTYPE    124  /* Wrong medium type */
-
-#ifndef errno
-extern int errno;
-#endif
-
-#else /* LWIP_PROVIDE_ERRNO */
-
-/* Define LWIP_ERRNO_INCLUDE to <errno.h> to include the error defines here */
-#ifdef LWIP_ERRNO_INCLUDE
-//#include 
-#endif /* LWIP_ERRNO_INCLUDE */
-
-#endif /* LWIP_PROVIDE_ERRNO */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* LWIP_HDR_ERRNO_H */
+/**
+ * @file
+ * Posix Errno defines
+ */
+
+/*
+ * Copyright (c) 2001-2004 Swedish Institute of Computer Science.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE.
+ *
+ * This file is part of the lwIP TCP/IP stack.
+ *
+ * Author: Adam Dunkels <adam@sics.se>
+ *
+ */
+#ifndef LWIP_HDR_ERRNO_H
+#define LWIP_HDR_ERRNO_H
+
+#include "lwip/opt.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef LWIP_PROVIDE_ERRNO
+
+#define  EPERM            1  /* Operation not permitted */
+#define  ENOENT           2  /* No such file or directory */
+#define  ESRCH            3  /* No such process */
+#define  EINTR            4  /* Interrupted system call */
+#define  EIO              5  /* I/O error */
+#define  ENXIO            6  /* No such device or address */
+#define  E2BIG            7  /* Arg list too long */
+#define  ENOEXEC          8  /* Exec format error */
+#define  EBADF            9  /* Bad file number */
+#define  ECHILD          10  /* No child processes */
+#define  EAGAIN          11  /* Try again */
+#define  ENOMEM          12  /* Out of memory */
+#define  EACCES          13  /* Permission denied */
+#define  EFAULT          14  /* Bad address */
+#define  ENOTBLK         15  /* Block device required */
+#define  EBUSY           16  /* Device or resource busy */
+#define  EEXIST          17  /* File exists */
+#define  EXDEV           18  /* Cross-device link */
+#define  ENODEV          19  /* No such device */
+#define  ENOTDIR         20  /* Not a directory */
+#define  EISDIR          21  /* Is a directory */
+#define  EINVAL          22  /* Invalid argument */
+#define  ENFILE          23  /* File table overflow */
+#define  EMFILE          24  /* Too many open files */
+#define  ENOTTY          25  /* Not a typewriter */
+#define  ETXTBSY         26  /* Text file busy */
+#define  EFBIG           27  /* File too large */
+#define  ENOSPC          28  /* No space left on device */
+#define  ESPIPE          29  /* Illegal seek */
+#define  EROFS           30  /* Read-only file system */
+#define  EMLINK          31  /* Too many links */
+#define  EPIPE           32  /* Broken pipe */
+#define  EDOM            33  /* Math argument out of domain of func */
+#define  ERANGE          34  /* Math result not representable */
+#define  EDEADLK         35  /* Resource deadlock would occur */
+#define  ENAMETOOLONG    36  /* File name too long */
+#define  ENOLCK          37  /* No record locks available */
+#define  ENOSYS          38  /* Function not implemented */
+#define  ENOTEMPTY       39  /* Directory not empty */
+#define  ELOOP           40  /* Too many symbolic links encountered */
+#define  EWOULDBLOCK     EAGAIN  /* Operation would block */
+#define  ENOMSG          42  /* No message of desired type */
+#define  EIDRM           43  /* Identifier removed */
+#define  ECHRNG          44  /* Channel number out of range */
+#define  EL2NSYNC        45  /* Level 2 not synchronized */
+#define  EL3HLT          46  /* Level 3 halted */
+#define  EL3RST          47  /* Level 3 reset */
+#define  ELNRNG          48  /* Link number out of range */
+#define  EUNATCH         49  /* Protocol driver not attached */
+#define  ENOCSI          50  /* No CSI structure available */
+#define  EL2HLT          51  /* Level 2 halted */
+#define  EBADE           52  /* Invalid exchange */
+#define  EBADR           53  /* Invalid request descriptor */
+#define  EXFULL          54  /* Exchange full */
+#define  ENOANO          55  /* No anode */
+#define  EBADRQC         56  /* Invalid request code */
+#define  EBADSLT         57  /* Invalid slot */
+
+#define  EDEADLOCK       EDEADLK
+
+#define  EBFONT          59  /* Bad font file format */
+#define  ENOSTR          60  /* Device not a stream */
+#define  ENODATA         61  /* No data available */
+#define  ETIME           62  /* Timer expired */
+#define  ENOSR           63  /* Out of streams resources */
+#define  ENONET          64  /* Machine is not on the network */
+#define  ENOPKG          65  /* Package not installed */
+#define  EREMOTE         66  /* Object is remote */
+#define  ENOLINK         67  /* Link has been severed */
+#define  EADV            68  /* Advertise error */
+#define  ESRMNT          69  /* Srmount error */
+#define  ECOMM           70  /* Communication error on send */
+#define  EPROTO          71  /* Protocol error */
+#define  EMULTIHOP       72  /* Multihop attempted */
+#define  EDOTDOT         73  /* RFS specific error */
+#define  EBADMSG         74  /* Not a data message */
+#define  EOVERFLOW       75  /* Value too large for defined data type */
+#define  ENOTUNIQ        76  /* Name not unique on network */
+#define  EBADFD          77  /* File descriptor in bad state */
+#define  EREMCHG         78  /* Remote address changed */
+#define  ELIBACC         79  /* Can not access a needed shared library */
+#define  ELIBBAD         80  /* Accessing a corrupted shared library */
+#define  ELIBSCN         81  /* .lib section in a.out corrupted */
+#define  ELIBMAX         82  /* Attempting to link in too many shared libraries */
+#define  ELIBEXEC        83  /* Cannot exec a shared library directly */
+#define  EILSEQ          84  /* Illegal byte sequence */
+#define  ERESTART        85  /* Interrupted system call should be restarted */
+#define  ESTRPIPE        86  /* Streams pipe error */
+#define  EUSERS          87  /* Too many users */
+#define  ENOTSOCK        88  /* Socket operation on non-socket */
+#define  EDESTADDRREQ    89  /* Destination address required */
+#define  EMSGSIZE        90  /* Message too long */
+#define  EPROTOTYPE      91  /* Protocol wrong type for socket */
+#define  ENOPROTOOPT     92  /* Protocol not available */
+#define  EPROTONOSUPPORT 93  /* Protocol not supported */
+#define  ESOCKTNOSUPPORT 94  /* Socket type not supported */
+#define  EOPNOTSUPP      95  /* Operation not supported on transport endpoint */
+#define  EPFNOSUPPORT    96  /* Protocol family not supported */
+#define  EAFNOSUPPORT    97  /* Address family not supported by protocol */
+#define  EADDRINUSE      98  /* Address already in use */
+#define  EADDRNOTAVAIL   99  /* Cannot assign requested address */
+#define  ENETDOWN       100  /* Network is down */
+#define  ENETUNREACH    101  /* Network is unreachable */
+#define  ENETRESET      102  /* Network dropped connection because of reset */
+#define  ECONNABORTED   103  /* Software caused connection abort */
+#define  ECONNRESET     104  /* Connection reset by peer */
+#define  ENOBUFS        105  /* No buffer space available */
+#define  EISCONN        106  /* Transport endpoint is already connected */
+#define  ENOTCONN       107  /* Transport endpoint is not connected */
+#define  ESHUTDOWN      108  /* Cannot send after transport endpoint shutdown */
+#define  ETOOMANYREFS   109  /* Too many references: cannot splice */
+#define  ETIMEDOUT      110  /* Connection timed out */
+#define  ECONNREFUSED   111  /* Connection refused */
+#define  EHOSTDOWN      112  /* Host is down */
+#define  EHOSTUNREACH   113  /* No route to host */
+#define  EALREADY       114  /* Operation already in progress */
+#define  EINPROGRESS    115  /* Operation now in progress */
+#define  ESTALE         116  /* Stale NFS file handle */
+#define  EUCLEAN        117  /* Structure needs cleaning */
+#define  ENOTNAM        118  /* Not a XENIX named type file */
+#define  ENAVAIL        119  /* No XENIX semaphores available */
+#define  EISNAM         120  /* Is a named type file */
+#define  EREMOTEIO      121  /* Remote I/O error */
+#define  EDQUOT         122  /* Quota exceeded */
+
+#define  ENOMEDIUM      123  /* No medium found */
+#define  EMEDIUMTYPE    124  /* Wrong medium type */
+
+#ifndef errno
+extern int errno;
+#endif
+
+#else /* LWIP_PROVIDE_ERRNO */
+
+/* Define LWIP_ERRNO_INCLUDE to <errno.h> to include the error defines here */
+#ifdef LWIP_ERRNO_INCLUDE
+//#include 
+#endif /* LWIP_ERRNO_INCLUDE */
+
+#endif /* LWIP_PROVIDE_ERRNO */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* LWIP_HDR_ERRNO_H */
diff --git a/include/Debug.hpp b/include/Debug.hpp
index b306d1e..d807cbb 100644
--- a/include/Debug.hpp
+++ b/include/Debug.hpp
@@ -34,11 +34,14 @@
 #define LIBZT_DEBUG_HPP
 
 #include <pthread.h>
-#include <sys/syscall.h>
 #include <sys/types.h>
 #include <unistd.h>
 #include <cstring>
 
+#if defined(__linux__)
+#include <sys/syscall.h>
+#endif
+
 #include "Platform.h"
 
 #define ZT_MSG_ERROR       true // Errors
@@ -53,7 +56,7 @@
 #if defined(__APPLE__)
 		#include "TargetConditionals.h"
 #endif
-#if defined(ZT_COLOR) && !defined(__ANDROID__) && !defined(TARGET_OS_IPHONE) && !defined(TARGET_IPHONE_SIMULATOR) && !defined(__APP_FRAMEWORK__)
+#if defined(ZT_COLOR) && !defined(__MINGW32__) && !defined(__ANDROID__) && !defined(TARGET_OS_IPHONE) && !defined(TARGET_IPHONE_SIMULATOR) && !defined(__APP_FRAMEWORK__)
 	#define ZT_RED   "\x1B[31m"
 	#define ZT_GRN   "\x1B[32m"
 	#define ZT_YEL   "\x1B[33m"
@@ -88,6 +91,9 @@ extern unsigned int gettid(); // defined in libzt.cpp
 #elif __APPLE__
   #define ZT_THREAD_ID (long)0//(long)gettid()
 #endif
+#ifdef __MINGW32__
+  #define ZT_THREAD_ID (long)0
+#endif
 
 #if defined(__JNI_LIB__)
 		#include <jni.h>
diff --git a/include/Defs.h b/include/Defs.h
index 02a7936..8c2776c 100644
--- a/include/Defs.h
+++ b/include/Defs.h
@@ -48,7 +48,9 @@
  */
 #define ZTO_ID_LEN                  16
 
+#if !defined(__MINGW32__)
 typedef uint32_t socklen_t;
+#endif
 
 /****************************************************************************/
 /* For SOCK_RAW support, it will initially be modeled after linux's API, so */
diff --git a/include/Utilities.h b/include/Utilities.h
index 625c6db..ae1b6a1 100644
--- a/include/Utilities.h
+++ b/include/Utilities.h
@@ -35,6 +35,17 @@
 
 #include "InetAddress.hpp"
 
+#if defined(__MINGW32__)
+ 
+#define NS_INADDRSZ  4
+#define NS_IN6ADDRSZ 16
+#define NS_INT16SZ   2
+
+int inet_pton4(const char *src, void *dst);
+int inet_pton6(const char *src, void *dst);
+int inet_pton(int af, const char *src, void *dst);
+#endif
+
 /**
  * @brief Returns masked address for subnet comparisons
  *
@@ -62,7 +73,7 @@ char *beautify_eth_proto_nums(int proto);
  * @param inet
  * @return
  */
-void sockaddr2inet(int socket_family, const struct sockaddr *addr, ZeroTier::InetAddress *inet);
+//void sockaddr2inet(int socket_family, const struct sockaddr *addr, ZeroTier::InetAddress *inet);
 
 /**
  * @brief Convert a raw MAC address byte array into a human-readable string
diff --git a/include/libzt.h b/include/libzt.h
index 714a603..5fa678b 100644
--- a/include/libzt.h
+++ b/include/libzt.h
@@ -33,11 +33,14 @@
 #ifndef LIBZT_H
 #define LIBZT_H
 
-#include <poll.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <vector>
 
+#if defined(__linux__)
+ #include <poll.h>
+#endif
+
 #include "Debug.hpp"
 #include "Defs.h"
 
@@ -440,7 +443,9 @@ int zts_close(int fd);
  * @param timeout
  * @return
  */
+#if defined(__linux__)
 int zts_poll(struct pollfd *fds, nfds_t nfds, int timeout);
+#endif
 
 /**
  * @brief Monitor multiple file descriptors, waiting until one or more of the file descriptors become "ready"
diff --git a/include/lwIP.hpp b/include/lwIP.hpp
index 236d1df..1c05742 100644
--- a/include/lwIP.hpp
+++ b/include/lwIP.hpp
@@ -60,12 +60,12 @@ void lwip_dns_init();
  * @usage lwip_driver_init()
  * @return
  */
-void lwip_start_dhcp(struct netif *interface);
+void lwip_start_dhcp(void *netif);
 
-void general_lwip_init_interface(void *tapref, struct netif *interface, const char *name, const ZeroTier::MAC &mac,
+void general_lwip_init_interface(void *tapref, void *netif, const char *name, const ZeroTier::MAC &mac,
 	const ZeroTier::InetAddress &addr, const ZeroTier::InetAddress &nm, const ZeroTier::InetAddress &gw);
 
-void general_turn_on_interface(struct netif *interface);
+void general_turn_on_interface(void *netif);
 
 /**
  * @brief Set up an interface in the network stack for the VirtualTap.
diff --git a/make-liblwip.mk b/make-liblwip.mk
index 04638af..3c738af 100644
--- a/make-liblwip.mk
+++ b/make-liblwip.mk
@@ -32,21 +32,38 @@
 #
 
 CONTRIBDIR=ext/lwip-contrib
-LWIPARCH=$(CONTRIBDIR)/ports/unix
-
-#Set this to where you have the lwip core module checked out from CVS
-#default assumes it's a dir named lwip at the same level as the contrib module
 LWIPDIR=ext/lwip/src
-
 CCDEP=clang++
+
 # Automagically pick clang or gcc, with preference for clang
 # This is only done if we have not overridden these with an environment or CLI variable
-ifeq ($(origin CCX),default)
-	CCX=$(shell if [ -e /usr/bin/clang++ ]; then echo clang++; else echo g++; fi)
+ifeq ($(origin CXX),default)
+	CXX=$(shell if [ -e /usr/bin/clang++ ]; then echo clang++; else echo g++; fi)
 endif
 
-LWIPINCLUDES:=-I$(LWIPDIR)/include -I$(LWIPARCH) -I$(LWIPDIR) -I. -Iext -Iinclude
-CFLAGS=-Wno-format -Wno-deprecated -O3 -g -Wall -fPIC
+OSTYPE=$(shell uname -s | tr '[A-Z]' '[a-z]')
+BUILD=build/$(OSTYPE)
+
+CCX=clang++
+
+# Windows
+ifeq ($(OSTYPE),mingw32_nt-6.2)
+CCX=g++
+WINDEFS=-Wno-c++11-compat -std=c++98
+LWIPARCH=$(CONTRIBDIR)/ports/win32
+endif
+ifeq ($(OSTYPE),linux)
+LWIPARCH=$(CONTRIBDIR)/ports/unix
+endif
+ifeq ($(OSTYPE),darwin)
+LWIPARCH=$(CONTRIBDIR)/ports/unix
+endif
+ifeq ($(OSTYPE),freebsd)
+LWIPARCH=$(CONTRIBDIR)/ports/unix
+endif
+
+LWIPINCLUDES:=-I$(LWIPDIR)/include -I$(LWIPARCH) -I$(LWIPARCH)/include -I$(LWIPDIR) -I. -Iext -Iinclude
+CFLAGS= $(WINDEFS) -Wno-format -Wno-deprecated -O3 -g -Wall -fPIC
 CFLAGS+=-DLWIP_IPV4 -DLWIP_IPV6=0 -DIPv4 -DLWIP_DEBUG=1 $(LWIPINCLUDES)
 
 UNIXLIB=liblwip.a
@@ -63,8 +80,6 @@ LWIPNOAPPSFILES+=$(ARCHFILES)
 LWIPNOAPPSFILESW=$(wildcard $(LWIPNOAPPSFILES))
 LWIPNOAPPSOBJS=$(notdir $(LWIPNOAPPSFILESW:.c=.o))
 
-CCX=clang++
-
 %.o:
 	$(CCX) $(CFLAGS) -c $(<:.o=.c)
 
diff --git a/src/Platform.cpp b/src/Platform.cpp
index 51b2b4f..5cbc1ae 100644
--- a/src/Platform.cpp
+++ b/src/Platform.cpp
@@ -53,7 +53,8 @@ void handle_general_failure() {
 inline unsigned int gettid()
 {
 #ifdef _WIN32
-		return GetCurrentThreadId();
+		//return GetCurrentThreadId();
+		return 0;
 #elif defined(__linux__)
 		return static_cast<unsigned int>(syscall(__NR_gettid));
 #elif defined(__APPLE__)
diff --git a/src/Utilities.cpp b/src/Utilities.cpp
index 7c1d1bb..d0010e7 100644
--- a/src/Utilities.cpp
+++ b/src/Utilities.cpp
@@ -32,6 +32,171 @@
 
 #include "InetAddress.hpp"
 #include "Debug.hpp"
+#include "Utilities.h"
+
+#if defined(__MINGW32__)
+
+int inet_pton4(const char *src, void *dst)
+{
+    uint8_t tmp[NS_INADDRSZ], *tp;
+
+    int saw_digit = 0;
+    int octets = 0;
+    *(tp = tmp) = 0;
+
+    int ch;
+    while ((ch = *src++) != '\0')
+    {
+        if (ch >= '0' && ch <= '9')
+        {
+            uint32_t n = *tp * 10 + (ch - '0');
+
+            if (saw_digit && *tp == 0)
+                return 0;
+
+            if (n > 255)
+                return 0;
+
+            *tp = n;
+            if (!saw_digit)
+            {
+                if (++octets > 4)
+                    return 0;
+                saw_digit = 1;
+            }
+        }
+        else if (ch == '.' && saw_digit)
+        {
+            if (octets == 4)
+                return 0;
+            *++tp = 0;
+            saw_digit = 0;
+        }
+        else
+            return 0;
+    }
+    if (octets < 4)
+        return 0;
+
+    memcpy(dst, tmp, NS_INADDRSZ);
+
+    return 1;
+}
+
+int inet_pton6(const char *src, void *dst)
+{
+    static const char xdigits[] = "0123456789abcdef";
+    uint8_t tmp[NS_IN6ADDRSZ];
+
+    uint8_t *tp = (uint8_t*) memset(tmp, '\0', NS_IN6ADDRSZ);
+    uint8_t *endp = tp + NS_IN6ADDRSZ;
+    uint8_t *colonp = NULL;
+
+    /* Leading :: requires some special handling. */
+    if (*src == ':')
+    {
+        if (*++src != ':')
+            return 0;
+    }
+
+    const char *curtok = src;
+    int saw_xdigit = 0;
+    uint32_t val = 0;
+    int ch;
+    while ((ch = tolower(*src++)) != '\0')
+    {
+        const char *pch = strchr(xdigits, ch);
+        if (pch != NULL)
+        {
+            val <<= 4;
+            val |= (pch - xdigits);
+            if (val > 0xffff)
+                return 0;
+            saw_xdigit = 1;
+            continue;
+        }
+        if (ch == ':')
+        {
+            curtok = src;
+            if (!saw_xdigit)
+            {
+                if (colonp)
+                    return 0;
+                colonp = tp;
+                continue;
+            }
+            else if (*src == '\0')
+            {
+                return 0;
+            }
+            if (tp + NS_INT16SZ > endp)
+                return 0;
+            *tp++ = (uint8_t) (val >> 8) & 0xff;
+            *tp++ = (uint8_t) val & 0xff;
+            saw_xdigit = 0;
+            val = 0;
+            continue;
+        }
+        if (ch == '.' && ((tp + NS_INADDRSZ) <= endp) &&
+                inet_pton4(curtok, (char*) tp) > 0)
+        {
+            tp += NS_INADDRSZ;
+            saw_xdigit = 0;
+            break; /* '\0' was seen by inet_pton4(). */
+        }
+        return 0;
+    }
+    if (saw_xdigit)
+    {
+        if (tp + NS_INT16SZ > endp)
+            return 0;
+        *tp++ = (uint8_t) (val >> 8) & 0xff;
+        *tp++ = (uint8_t) val & 0xff;
+    }
+    if (colonp != NULL)
+    {
+        /*
+         * Since some memmove()'s erroneously fail to handle
+         * overlapping regions, we'll do the shift by hand.
+         */
+        const int n = tp - colonp;
+
+        if (tp == endp)
+            return 0;
+
+        for (int i = 1; i <= n; i++)
+        {
+            endp[-i] = colonp[n - i];
+            colonp[n - i] = 0;
+        }
+        tp = endp;
+    }
+    if (tp != endp)
+        return 0;
+
+    memcpy(dst, tmp, NS_IN6ADDRSZ);
+
+    return 1;
+}
+
+int inet_pton(int af, const char *src, void *dst)
+{
+    switch (af)
+    {
+    case AF_INET:
+        return inet_pton4(src, dst);
+    case AF_INET6:
+        return inet_pton6(src, dst);
+    default:
+        return -1;
+    }
+}
+
+#endif
+
+
+
+
 
 char *beautify_eth_proto_nums(int proto)
 {
@@ -120,6 +285,7 @@ bool ipv6_in_subnet(ZeroTier::InetAddress *subnet, ZeroTier::InetAddress *addr)
 	return !strcmp(r.toIpString(b0), b.toIpString(b1));
 }
 
+/*
 void sockaddr2inet(int socket_family, const struct sockaddr *addr, ZeroTier::InetAddress *inet)
 {
 	char ipstr[INET6_ADDRSTRLEN];
@@ -137,6 +303,7 @@ void sockaddr2inet(int socket_family, const struct sockaddr *addr, ZeroTier::Ine
 		inet->fromString(addrstr);
 	}
 }
+*/
 
 void mac2str(char *macbuf, int len, unsigned char* addr)
 {
diff --git a/src/VirtualSocket.cpp b/src/VirtualSocket.cpp
index 9bb0edc..6cc872a 100644
--- a/src/VirtualSocket.cpp
+++ b/src/VirtualSocket.cpp
@@ -34,7 +34,10 @@
 #define ZT_VIRTUALSOCKET_HPP
 
 #include <ctime>
+
+#if defined(__linux__) || #defined(__APPLE__)
 #include <sys/socket.h>
+#endif
 
 #include "VirtualSocket.h"
 #include "VirtualBindingPair.h"
diff --git a/src/VirtualTap.cpp b/src/VirtualTap.cpp
index 85cb9b0..c33a0c0 100644
--- a/src/VirtualTap.cpp
+++ b/src/VirtualTap.cpp
@@ -421,7 +421,7 @@ namespace ZeroTier {
 					target_addr = managed_routes->at(i).target;
 					via_addr = managed_routes->at(i).via;
 					nm = target_addr.netmask();
-					for (int j=0; j<routes.size(); j++) {
+					for (size_t j=0; j<routes.size(); j++) {
 						if (via_addr.ipsEqual(null_addr) || target_addr.ipsEqual(null_addr)) {
 							found=true;
 							continue;
@@ -439,7 +439,7 @@ namespace ZeroTier {
 					}
 				}
 				// check if route exists in tap but not in pushed routes (remove)
-				for (int i=0; i<routes.size(); i++) {
+				for (size_t i=0; i<routes.size(); i++) {
 					found = false;
 					for (int j=0; j<ZT_MAX_NETWORK_ROUTES; j++) {
 						target_addr = managed_routes->at(j).target;
diff --git a/src/libzt.cpp b/src/libzt.cpp
index 662ba2b..2775851 100644
--- a/src/libzt.cpp
+++ b/src/libzt.cpp
@@ -66,7 +66,7 @@ void sys2lwip(int fd, const struct sockaddr *orig, struct sockaddr *modified)
 	}
 #if defined(LIBZT_IPV4)
 	if (ss.ss_family == AF_INET) {
-#if defined(__linux__)
+#if defined(__linux__) || defined(__MINGW32__)
 		struct sockaddr_in *p4 = (struct sockaddr_in *)modified;
 		struct sockaddr_in *addr4 = (struct sockaddr_in*)orig;
 		p4->sin_len = sizeof(struct sockaddr_in);
@@ -80,7 +80,7 @@ void sys2lwip(int fd, const struct sockaddr *orig, struct sockaddr *modified)
 
 #if defined(LIBZT_IPV6)
 	if (ss.ss_family == AF_INET6) {
-#if defined(__linux__)
+#if defined(__linux__) || defined(__MINGW32__)
 		struct sockaddr_in6 *p6 = (struct sockaddr_in6 *)modified;
 		struct sockaddr_in6 *addr6 = (struct sockaddr_in6*)orig;
 		p6->sin6_len = sizeof(struct sockaddr_in6);
@@ -311,6 +311,7 @@ int zts_close(int fd)
 	return err;
 }
 
+#if defined(__linux__)
 int zts_poll(struct pollfd *fds, nfds_t nfds, int timeout)
 {
 	int err = -1;
@@ -324,6 +325,7 @@ int zts_poll(struct pollfd *fds, nfds_t nfds, int timeout)
 #endif
 	return err;
 }
+#endif
 
 int zts_select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, 
 	struct timeval *timeout)
diff --git a/src/lwIP.cpp b/src/lwIP.cpp
index 429335c..0e3c309 100644
--- a/src/lwIP.cpp
+++ b/src/lwIP.cpp
@@ -115,6 +115,9 @@ void lwip_driver_init()
 	if (lwip_driver_initialized == true) {
 		return;
 	}
+#if defined(__MINGW32__)
+	sys_init(); // required for win32 initializtion of critical sections
+#endif
 	sys_thread_new("main_network_stack_thread", main_network_stack_thread,
 		NULL, DEFAULT_THREAD_STACKSIZE, DEFAULT_THREAD_PRIO);
 }
@@ -160,7 +163,7 @@ err_t lwip_eth_tx(struct netif *netif, struct pbuf *p)
 	return ERR_OK;
 }
 
-void general_lwip_init_interface(void *tapref, struct netif *interface, const char *name, const ZeroTier::MAC &mac, const ZeroTier::InetAddress &addr, const ZeroTier::InetAddress &nm, const ZeroTier::InetAddress &gw)
+void general_lwip_init_interface(void *tapref, void *netif, const char *name, const ZeroTier::MAC &mac, const ZeroTier::InetAddress &addr, const ZeroTier::InetAddress &nm, const ZeroTier::InetAddress &gw)
 {
 #if defined(LIBZT_IPV4)
 	char ipbuf[INET6_ADDRSTRLEN], nmbuf[INET6_ADDRSTRLEN], gwbuf[INET6_ADDRSTRLEN];
@@ -192,7 +195,7 @@ void general_lwip_init_interface(void *tapref, struct netif *interface, const ch
 #endif
 }
 
-void general_turn_on_interface(struct netif *interface)
+void general_turn_on_interface(void *netif)
 {
 	//netif_set_up(&n1);
 	//netif_set_default(&n1);
@@ -206,9 +209,9 @@ void lwip_dns_init()
 	dns_init();
 }
 
-void lwip_start_dhcp(struct netif *interface)
+void lwip_start_dhcp(void *netif)
 {
-	netifapi_dhcp_start(interface);
+	netifapi_dhcp_start((struct netif *)netif);
 }
 
 void lwip_init_interface(void *tapref, const ZeroTier::MAC &mac, const ZeroTier::InetAddress &ip)

From 29e31dd60a608bdd9e1c735e395bb19d2f3f274b Mon Sep 17 00:00:00 2001
From: Joseph Henry <josephjah@gmail.com>
Date: Tue, 10 Oct 2017 11:40:14 -0700
Subject: [PATCH 02/11] Fixes for Unix-like builds after introduction of
 Windows code

---
 Makefile                                           | 11 +++++++----
 .../ports/unix/include/{ => arch}/cc.h             |  0
 .../ports/unix/include/{ => arch}/perf.h           |  0
 .../ports/unix/include/{ => arch}/sys_arch.h       |  0
 src/VirtualSocket.cpp                              |  2 +-
 src/VirtualTap.hpp                                 | 14 ++++++++++++++
 6 files changed, 22 insertions(+), 5 deletions(-)
 rename ext/lwip-contrib/ports/unix/include/{ => arch}/cc.h (100%)
 rename ext/lwip-contrib/ports/unix/include/{ => arch}/perf.h (100%)
 rename ext/lwip-contrib/ports/unix/include/{ => arch}/sys_arch.h (100%)

diff --git a/Makefile b/Makefile
index 75f074d..5b97fdd 100644
--- a/Makefile
+++ b/Makefile
@@ -35,6 +35,7 @@ endif
 
 OSTYPE=$(shell uname -s | tr '[A-Z]' '[a-z]')
 BUILD=build/$(OSTYPE)
+LWIPCONTRIBDIR=ext/lwip-contrib
 
 # Windows
 ifeq ($(OSTYPE),mingw32_nt-6.2)
@@ -44,29 +45,31 @@ CC=gcc
 CXX=g++
 CXXFLAGS+=-Wno-unknown-pragmas -Wno-pointer-arith -Wno-deprecated-declarations -Wno-conversion-null
 WINDEFS=-lws2_32 -lshlwapi -liphlpapi -static -static-libgcc -static-libstdc++
-CONTRIBDIR=ext/lwip-contrib
-LWIPARCH=$(CONTRIBDIR)/ports/win32
-LWIPARCHINCLUDE=$(LWIPARCH)/include
+LWIPARCHINCLUDE=$(LWIPCONTRIBDIR)/ports/win32/include
 endif
 # Darwin
 ifeq ($(OSTYPE),darwin)
 ARTOOL=libtool
 ARFLAGS=-static
+LWIPARCHINCLUDE=$(LWIPCONTRIBDIR)/ports/unix/include
 endif
 # Linux
 ifeq ($(OSTYPE),linux)
 ARTOOL=ar
 ARFLAGS=rcs
+LWIPARCHINCLUDE=$(LWIPCONTRIBDIR)/ports/unix/include
 endif
 # FreeBSD
 ifeq ($(OSTYPE),freebsd)
 ARTOOL=ar
 ARFLAGS=rcs
+LWIPARCHINCLUDE=$(LWIPCONTRIBDIR)/ports/unix/include
 endif
 # OpenBSD
 ifeq ($(OSTYPE),openbsd)
 ARTOOL=ar
 ARFLAGS=rcs
+LWIPARCHINCLUDE=$(LWIPCONTRIBDIR)/ports/unix/include
 endif
 
 ##############################################################################
@@ -224,7 +227,7 @@ STACK_DRIVER_DEFS+=-DLWIP_DONT_PROVIDE_BYTEORDER_FUNCTIONS
 STACK_DRIVER_DEFS+=-DSTACK_LWIP
 STACK_DRIVER_FILES:=src/lwIP.cpp
 LWIPDIR=ext/lwip/src
-STACK_INCLUDES+=$(LWIPARCHINCLUDE) -Iext/lwip/src/include/lwip \
+STACK_INCLUDES+=-I$(LWIPARCHINCLUDE) -Iext/lwip/src/include/lwip \
 	-I$(LWIPDIR)/include \
 	-I$(LWIPARCH)/include \
 	-I$(LWIPDIR)/include/ipv4 \
diff --git a/ext/lwip-contrib/ports/unix/include/cc.h b/ext/lwip-contrib/ports/unix/include/arch/cc.h
similarity index 100%
rename from ext/lwip-contrib/ports/unix/include/cc.h
rename to ext/lwip-contrib/ports/unix/include/arch/cc.h
diff --git a/ext/lwip-contrib/ports/unix/include/perf.h b/ext/lwip-contrib/ports/unix/include/arch/perf.h
similarity index 100%
rename from ext/lwip-contrib/ports/unix/include/perf.h
rename to ext/lwip-contrib/ports/unix/include/arch/perf.h
diff --git a/ext/lwip-contrib/ports/unix/include/sys_arch.h b/ext/lwip-contrib/ports/unix/include/arch/sys_arch.h
similarity index 100%
rename from ext/lwip-contrib/ports/unix/include/sys_arch.h
rename to ext/lwip-contrib/ports/unix/include/arch/sys_arch.h
diff --git a/src/VirtualSocket.cpp b/src/VirtualSocket.cpp
index 6cc872a..f19edc4 100644
--- a/src/VirtualSocket.cpp
+++ b/src/VirtualSocket.cpp
@@ -35,7 +35,7 @@
 
 #include <ctime>
 
-#if defined(__linux__) || #defined(__APPLE__)
+#if defined(__linux__) || defined(__APPLE__)
 #include <sys/socket.h>
 #endif
 
diff --git a/src/VirtualTap.hpp b/src/VirtualTap.hpp
index 9fc3457..b1e5c30 100644
--- a/src/VirtualTap.hpp
+++ b/src/VirtualTap.hpp
@@ -119,6 +119,20 @@ namespace ZeroTier {
 		void threadMain()
 			throw();
 
+#if defined(__MINGW32__) 
+		/* The following is merely to make ZeroTier's OneService happy while building on Windows.
+			we won't use these in libzt */
+		NET_LUID _deviceLuid;
+		std::string _deviceInstanceId;
+		
+		/**
+		 * Returns whether the VirtualTap interface has been initialized
+		 */
+		bool isInitialized() const { return _initialized; };
+
+		inline const NET_LUID &luid() const { return _deviceLuid; }
+		inline const std::string &instanceId() const { return _deviceInstanceId; }
+#endif
 		/**
 		 * For moving data onto the ZeroTier virtual wire
 		 */

From 35aa1820eff0758a5a2b86f45b0959512b4af665 Mon Sep 17 00:00:00 2001
From: Joseph Henry <josephjah@gmail.com>
Date: Tue, 10 Oct 2017 12:15:10 -0700
Subject: [PATCH 03/11] Fixed signed comparison warnings when compiling under
 Windows

---
 src/ZT1Service.cpp | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/ZT1Service.cpp b/src/ZT1Service.cpp
index 18dc10f..bff5ad5 100644
--- a/src/ZT1Service.cpp
+++ b/src/ZT1Service.cpp
@@ -69,7 +69,7 @@ ZeroTier::VirtualTap *getTapByNWID(uint64_t nwid)
 {
 	ZeroTier::_vtaps_lock.lock();
 	ZeroTier::VirtualTap *s, *tap = nullptr;
-	for (int i=0; i<ZeroTier::vtaps.size(); i++) {
+	for (size_t i=0; i<ZeroTier::vtaps.size(); i++) {
 		s = (ZeroTier::VirtualTap*)ZeroTier::vtaps[i];
 		if (s->_nwid == nwid) { tap = s; }
 	}
@@ -82,7 +82,7 @@ ZeroTier::VirtualTap *getTapByAddr(ZeroTier::InetAddress *addr)
 	ZeroTier::_vtaps_lock.lock();
 	ZeroTier::VirtualTap *s, *tap = nullptr;
 	//char ipbuf[64], ipbuf2[64], ipbuf3[64];
-	for (int i=0; i<ZeroTier::vtaps.size(); i++) {
+	for (size_t i=0; i<ZeroTier::vtaps.size(); i++) {
 		s = (ZeroTier::VirtualTap*)ZeroTier::vtaps[i];
 		// check address schemes
 		for (int j=0; j<s->_ips.size(); j++) {
@@ -104,7 +104,7 @@ ZeroTier::VirtualTap *getTapByAddr(ZeroTier::InetAddress *addr)
 		if (tap == NULL) {
 			std::vector<ZT_VirtualNetworkRoute> *managed_routes = ZeroTier::zt1Service->getRoutes(s->_nwid);
 			ZeroTier::InetAddress target, nm, via;
-			for (int i=0; i<managed_routes->size(); i++) {
+			for (size_t i=0; i<managed_routes->size(); i++) {
 				target = managed_routes->at(i).target;
 				nm = target.netmask();
 				via = managed_routes->at(i).via;
@@ -124,7 +124,7 @@ ZeroTier::VirtualTap *getTapByName(char *ifname)
 {
 	ZeroTier::_vtaps_lock.lock();
 	ZeroTier::VirtualTap *s, *tap = nullptr;
-	for (int i=0; i<ZeroTier::vtaps.size(); i++) {
+	for (size_t i=0; i<ZeroTier::vtaps.size(); i++) {
 		s = (ZeroTier::VirtualTap*)ZeroTier::vtaps[i];
 		if (strcmp(s->_dev.c_str(), ifname) == false) {
 			tap = s;
@@ -134,11 +134,11 @@ ZeroTier::VirtualTap *getTapByName(char *ifname)
 	return tap;
 }
 
-ZeroTier::VirtualTap *getTapByIndex(int index)
+ZeroTier::VirtualTap *getTapByIndex(size_t index)
 {
 	ZeroTier::_vtaps_lock.lock();
 	ZeroTier::VirtualTap *s, *tap = nullptr;
-	for (int i=0; i<ZeroTier::vtaps.size(); i++) {
+	for (size_t i=0; i<ZeroTier::vtaps.size(); i++) {
 		s = (ZeroTier::VirtualTap*)ZeroTier::vtaps[i];
 		if (s->ifindex == index) {
 			tap = s;
@@ -247,7 +247,7 @@ void *zts_start_service(void *thread_id)
 void disableTaps()
 {
 	ZeroTier::_vtaps_lock.lock();
-	for (int i=0; i<ZeroTier::vtaps.size(); i++) {
+	for (size_t i=0; i<ZeroTier::vtaps.size(); i++) {
 		DEBUG_EXTRA("vt=%p", ZeroTier::vtaps[i]);
 		((ZeroTier::VirtualTap*)ZeroTier::vtaps[i])->_enabled = false;
 	}
@@ -260,7 +260,7 @@ void zts_get_ipv4_address(const char *nwid, char *addrstr, const int addrlen)
 		uint64_t nwid_int = strtoull(nwid, NULL, 16);
 		ZeroTier::VirtualTap *tap = getTapByNWID(nwid_int);
 		if (tap && tap->_ips.size()) {
-			for (int i=0; i<tap->_ips.size(); i++) {
+			for (size_t i=0; i<tap->_ips.size(); i++) {
 				if (tap->_ips[i].isV4()) {
 					char ipbuf[INET_ADDRSTRLEN];
 					std::string addr = tap->_ips[i].toString(ipbuf);
@@ -282,7 +282,7 @@ void zts_get_ipv6_address(const char *nwid, char *addrstr, const int addrlen)
 		uint64_t nwid_int = strtoull(nwid, NULL, 16);
 		ZeroTier::VirtualTap *tap = getTapByNWID(nwid_int);
 		if (tap && tap->_ips.size()) {
-			for (int i=0; i<tap->_ips.size(); i++) {
+			for (size_t i=0; i<tap->_ips.size(); i++) {
 				if (tap->_ips[i].isV6()) {
 					char ipbuf[INET6_ADDRSTRLEN];
 					std::string addr = tap->_ips[i].toString(ipbuf);
@@ -351,7 +351,7 @@ void zts_join(const char * nwid) {
 	}
 	// provide ZTO service reference to virtual taps
 	// TODO: This might prove to be unreliable, but it works for now
-	for (int i=0;i<ZeroTier::vtaps.size(); i++) {
+	for (size_t i=0;i<ZeroTier::vtaps.size(); i++) {
 		ZeroTier::VirtualTap *s = (ZeroTier::VirtualTap*)ZeroTier::vtaps[i];
 		s->zt1ServiceRef=(void*)ZeroTier::zt1Service;
 	}
@@ -470,7 +470,7 @@ int zts_get_peer_address(char *peer, const char *devID) {
 	if (ZeroTier::zt1Service) {
 		ZT_PeerList *pl = ZeroTier::zt1Service->getNode()->peers();
 		// uint64_t addr;
-		for (int i=0; i<pl->peerCount; i++) {
+		for (size_t i=0; i<pl->peerCount; i++) {
 			// ZT_Peer *p = &(pl->peers[i]);
 			// DEBUG_INFO("peer[%d] = %lx", i, p->address);
 		}

From f9754d82162d0e1a25fcab77dcf5ce3809b97cb4 Mon Sep 17 00:00:00 2001
From: Joseph Henry <josephjah@gmail.com>
Date: Tue, 10 Oct 2017 12:22:57 -0700
Subject: [PATCH 04/11] zts_start initialization order fix for ztproxy. Was
 creating Phy instance before calling WSAStartup on Windows

---
 examples/ztproxy/ztproxy.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/examples/ztproxy/ztproxy.cpp b/examples/ztproxy/ztproxy.cpp
index d5a913c..07f5fbb 100644
--- a/examples/ztproxy/ztproxy.cpp
+++ b/examples/ztproxy/ztproxy.cpp
@@ -63,10 +63,6 @@ namespace ZeroTier {
 			_internal_addr(internal_addr),
 			_phy(this,false,true)
 	{
-		// Start ZeroTier Node
-		// Join Network which contains resources we need to proxy
-		DEBUG_INFO("waiting for libzt to come online");
-		zts_simple_start(path.c_str(), nwid.c_str());
 		// Set up TCP listen sockets
 		// IPv4
 		struct sockaddr_in in4;
@@ -380,6 +376,11 @@ int main(int argc, char **argv)
 	int internal_port         = atoi(argv[5]);
 	std::string dns_nameserver= "";//argv[6];
 
+	// Start ZeroTier Node
+	// Join Network which contains resources we need to proxy
+	DEBUG_INFO("waiting for libzt to come online");
+	zts_simple_start(path.c_str(), nwid.c_str());
+		
 	ZeroTier::ZTProxy *proxy = new ZeroTier::ZTProxy(proxy_listen_port, nwid, path, internal_addr, internal_port, dns_nameserver);
 	
 	if (proxy) {

From 27dddb2f459ab5ff37c1e8eb4644e8ceda8ecdcc Mon Sep 17 00:00:00 2001
From: Joseph Henry <josephjah@gmail.com>
Date: Tue, 10 Oct 2017 12:23:42 -0700
Subject: [PATCH 05/11] Added WSAStartup() and WSACleanup() calls to
 zts_start() and zts_stop()

---
 src/ZT1Service.cpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/ZT1Service.cpp b/src/ZT1Service.cpp
index bff5ad5..d995a7a 100644
--- a/src/ZT1Service.cpp
+++ b/src/ZT1Service.cpp
@@ -55,6 +55,10 @@ namespace ZeroTier {
 	ZeroTier::Mutex _multiplexer_lock;
 }
 
+#if defined(__MINGW32__) || defined(__MINGW64__)
+WSADATA wsaData;
+#endif
+
 /****************************************************************************/
 /* ZeroTier Core helper functions for libzt - DON'T CALL THESE DIRECTLY     */
 /****************************************************************************/
@@ -395,6 +399,9 @@ void zts_start(const char *path)
 	if (path) {
 		ZeroTier::homeDir = path;
 	}
+#if defined(__MINGW32__) || defined(__MINGW64__)
+		WSAStartup(MAKEWORD(2, 2), &wsaData) // initialize WinSock. Used in Phy for loopback pipe
+#endif
 	pthread_t service_thread;
 	pthread_create(&service_thread, NULL, zts_start_service, NULL);
 }
@@ -426,6 +433,9 @@ void zts_stop() {
 		ZeroTier::zt1Service->terminate();
 		disableTaps();
 	}
+#if defined(__MINGW32__) || defined(__MINGW64__)
+	WSACleanup(); // clean up WinSock
+#endif
 }
 
 void zts_get_homepath(char *homePath, int len) {

From 2f59773b2691391da9236e6a9b5d32ad96887870 Mon Sep 17 00:00:00 2001
From: Joseph Henry <josephjah@gmail.com>
Date: Tue, 10 Oct 2017 12:30:44 -0700
Subject: [PATCH 06/11] Updated ZTO submobule to f2c69ede for
 CancelSynchronousIo omission in Thread.hpp

---
 zto | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/zto b/zto
index 59b7cbb..f2c69ed 160000
--- a/zto
+++ b/zto
@@ -1 +1 @@
-Subproject commit 59b7cbb591b8f9ed4abfc25773619d6b1bebc4d2
+Subproject commit f2c69ede9604f52da4aa885ae8d4d16a7f3fdabf

From 427f87db913d7b8b6a3817f3293126892fdf8288 Mon Sep 17 00:00:00 2001
From: Joseph Henry <josephjah@gmail.com>
Date: Tue, 10 Oct 2017 12:41:36 -0700
Subject: [PATCH 07/11] More fixes for Windows support

---
 src/VirtualTap.cpp | 1 +
 src/VirtualTap.hpp | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/VirtualTap.cpp b/src/VirtualTap.cpp
index c33a0c0..1ac169b 100644
--- a/src/VirtualTap.cpp
+++ b/src/VirtualTap.cpp
@@ -84,6 +84,7 @@ namespace ZeroTier {
 			_handler(handler),
 			_homePath(homePath),
 			_arg(arg),
+			_initialized(false),
 			_enabled(true),
 			_run(true),
 			_mac(mac),
diff --git a/src/VirtualTap.hpp b/src/VirtualTap.hpp
index b1e5c30..58fb788 100644
--- a/src/VirtualTap.hpp
+++ b/src/VirtualTap.hpp
@@ -206,6 +206,7 @@ namespace ZeroTier {
 
 		std::string _homePath;
 		void *_arg;
+		volatile bool _initialized;
 		volatile bool _enabled;
 		volatile bool _run;
 		MAC _mac;

From 99dcce6d8b4cc30f5079b300282576818a171d27 Mon Sep 17 00:00:00 2001
From: Joseph Henry <josephjah@gmail.com>
Date: Tue, 10 Oct 2017 12:48:37 -0700
Subject: [PATCH 08/11] Fixed signed comparison warnings in ztproxy when
 compiling under Windows

---
 examples/ztproxy/ztproxy.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/ztproxy/ztproxy.cpp b/examples/ztproxy/ztproxy.cpp
index 07f5fbb..f7ca154 100644
--- a/examples/ztproxy/ztproxy.cpp
+++ b/examples/ztproxy/ztproxy.cpp
@@ -129,7 +129,7 @@ namespace ZeroTier {
 			FD_ZERO(&read_set);
   			FD_ZERO(&write_set);
 			nfds = 0;
-  			for (int i=0; i<clist.size(); i++) {
+  			for (size_t i=0; i<clist.size(); i++) {
   				FD_SET(clist[i]->zfd, &read_set);
   				FD_SET(clist[i]->zfd, &write_set);
   				nfds = clist[i]->zfd > nfds ? clist[i]->zfd : nfds;
@@ -324,7 +324,7 @@ namespace ZeroTier {
 				zts_close(conn->zfd);
 			}
 			cmap.erase(sock);
-			for (int i=0; i<clist.size(); i++) {
+			for (size_t i=0; i<clist.size(); i++) {
 				if (conn == clist[i]) {
 					clist.erase(clist.begin()+i);
 					break;
@@ -380,7 +380,7 @@ int main(int argc, char **argv)
 	// Join Network which contains resources we need to proxy
 	DEBUG_INFO("waiting for libzt to come online");
 	zts_simple_start(path.c_str(), nwid.c_str());
-		
+
 	ZeroTier::ZTProxy *proxy = new ZeroTier::ZTProxy(proxy_listen_port, nwid, path, internal_addr, internal_port, dns_nameserver);
 	
 	if (proxy) {

From e3e22c35f1b2a6edae3bfa92f4812cd820bb5295 Mon Sep 17 00:00:00 2001
From: Joseph Henry <josephjah@gmail.com>
Date: Tue, 10 Oct 2017 13:02:21 -0700
Subject: [PATCH 09/11] Fixed typo

---
 src/ZT1Service.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ZT1Service.cpp b/src/ZT1Service.cpp
index d995a7a..085ba4a 100644
--- a/src/ZT1Service.cpp
+++ b/src/ZT1Service.cpp
@@ -400,7 +400,7 @@ void zts_start(const char *path)
 		ZeroTier::homeDir = path;
 	}
 #if defined(__MINGW32__) || defined(__MINGW64__)
-		WSAStartup(MAKEWORD(2, 2), &wsaData) // initialize WinSock. Used in Phy for loopback pipe
+		WSAStartup(MAKEWORD(2, 2), &wsaData); // initialize WinSock. Used in Phy for loopback pipe
 #endif
 	pthread_t service_thread;
 	pthread_create(&service_thread, NULL, zts_start_service, NULL);

From 9b9aa108c22242689013c8d4f54cfd2fdee5541a Mon Sep 17 00:00:00 2001
From: Joseph Henry <josephjah@gmail.com>
Date: Tue, 10 Oct 2017 13:05:08 -0700
Subject: [PATCH 10/11] Added includes for various interface-related Windows
 defines

---
 src/VirtualTap.hpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/VirtualTap.hpp b/src/VirtualTap.hpp
index 58fb788..6ad6a7c 100644
--- a/src/VirtualTap.hpp
+++ b/src/VirtualTap.hpp
@@ -41,6 +41,13 @@
 #include "Thread.hpp"
 #include "Phy.hpp"
 
+#if defined(__MINGW32__) || defined(__MINGW64__)
+#include <WinSock2.h>
+#include <Windows.h>
+#include <IPHlpApi.h>
+#include <Ifdef.h>
+#endif
+
 namespace ZeroTier {
 
 	/**

From 08b7ccb921bcfff1f52a5e9895f417979ad085a7 Mon Sep 17 00:00:00 2001
From: Joseph Henry <josephjah@gmail.com>
Date: Tue, 10 Oct 2017 14:20:20 -0700
Subject: [PATCH 11/11] Added return values to zts_start() and
 zts_simple_start(). Also Fixed signed comparison warnings in ztproxy when
 compiling under Windows

---
 ext/picotcp/RFC/rfc0793.txt |  5247 +++++++++++++++++
 ext/picotcp/RFC/rfc1066.txt |  5043 ++++++++++++++++
 ext/picotcp/RFC/rfc1122.txt |  6844 +++++++++++++++++++++
 ext/picotcp/RFC/rfc1123.txt |  5782 ++++++++++++++++++
 ext/picotcp/RFC/rfc1323.txt |  2075 +++++++
 ext/picotcp/RFC/rfc1379.txt |  2131 +++++++
 ext/picotcp/RFC/rfc1470.txt | 10755 ++++++++++++++++++++++++++++++++++
 ext/picotcp/RFC/rfc1644.txt |  2131 +++++++
 ext/picotcp/RFC/rfc1661.txt |  2976 ++++++++++
 ext/picotcp/RFC/rfc1693.txt |  2019 +++++++
 ext/picotcp/RFC/rfc2026.txt |  2019 +++++++
 ext/picotcp/RFC/rfc2131.txt |  2523 ++++++++
 ext/picotcp/RFC/rfc2460.txt |  2187 +++++++
 ext/picotcp/RFC/rfc2525.txt |  3419 +++++++++++
 ext/picotcp/RFC/rfc2757.txt |  2579 ++++++++
 ext/picotcp/RFC/rfc2760.txt |  2579 ++++++++
 ext/picotcp/RFC/rfc3135.txt |  2523 ++++++++
 ext/picotcp/RFC/rfc3168.txt |  3531 +++++++++++
 ext/picotcp/RFC/rfc3449.txt |  2299 ++++++++
 ext/picotcp/RFC/rfc3493.txt |  2187 +++++++
 ext/picotcp/RFC/rfc3649.txt |  1907 ++++++
 ext/picotcp/RFC/rfc3819.txt |  3363 +++++++++++
 ext/picotcp/RFC/rfc3927.txt |  1851 ++++++
 ext/picotcp/RFC/rfc4614.txt |  1851 ++++++
 ext/picotcp/RFC/rfc6762.txt |  3923 +++++++++++++
 include/ZT1Service.h        |    16 +-
 include/libzt.h             |    11 +-
 src/VirtualTap.hpp          |     2 +-
 src/ZT1Service.cpp          |    19 +-
 29 files changed, 83768 insertions(+), 24 deletions(-)
 create mode 100644 ext/picotcp/RFC/rfc0793.txt
 create mode 100644 ext/picotcp/RFC/rfc1066.txt
 create mode 100644 ext/picotcp/RFC/rfc1122.txt
 create mode 100644 ext/picotcp/RFC/rfc1123.txt
 create mode 100644 ext/picotcp/RFC/rfc1323.txt
 create mode 100644 ext/picotcp/RFC/rfc1379.txt
 create mode 100644 ext/picotcp/RFC/rfc1470.txt
 create mode 100644 ext/picotcp/RFC/rfc1644.txt
 create mode 100644 ext/picotcp/RFC/rfc1661.txt
 create mode 100644 ext/picotcp/RFC/rfc1693.txt
 create mode 100644 ext/picotcp/RFC/rfc2026.txt
 create mode 100644 ext/picotcp/RFC/rfc2131.txt
 create mode 100644 ext/picotcp/RFC/rfc2460.txt
 create mode 100644 ext/picotcp/RFC/rfc2525.txt
 create mode 100644 ext/picotcp/RFC/rfc2757.txt
 create mode 100644 ext/picotcp/RFC/rfc2760.txt
 create mode 100644 ext/picotcp/RFC/rfc3135.txt
 create mode 100644 ext/picotcp/RFC/rfc3168.txt
 create mode 100644 ext/picotcp/RFC/rfc3449.txt
 create mode 100644 ext/picotcp/RFC/rfc3493.txt
 create mode 100644 ext/picotcp/RFC/rfc3649.txt
 create mode 100644 ext/picotcp/RFC/rfc3819.txt
 create mode 100644 ext/picotcp/RFC/rfc3927.txt
 create mode 100644 ext/picotcp/RFC/rfc4614.txt
 create mode 100644 ext/picotcp/RFC/rfc6762.txt

diff --git a/ext/picotcp/RFC/rfc0793.txt b/ext/picotcp/RFC/rfc0793.txt
new file mode 100644
index 0000000..603a78c
--- /dev/null
+++ b/ext/picotcp/RFC/rfc0793.txt
@@ -0,0 +1,5247 @@
+
+
+RFC: 793
+                                    
+                                    
+                                    
+                                    
+                                    
+                                    
+                                    
+                     TRANSMISSION CONTROL PROTOCOL
+                                    
+                                    
+                         DARPA INTERNET PROGRAM
+                                    
+                         PROTOCOL SPECIFICATION
+                                    
+                                    
+                                    
+                             September 1981
+
+
+
+
+
+
+
+
+
+
+
+
+
+                              prepared for
+                                    
+               Defense Advanced Research Projects Agency
+                Information Processing Techniques Office
+                         1400 Wilson Boulevard
+                       Arlington, Virginia  22209
+
+
+
+
+
+
+
+                                   by
+
+                     Information Sciences Institute
+                   University of Southern California
+                           4676 Admiralty Way
+                   Marina del Rey, California  90291
+
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+
+
+
+                           TABLE OF CONTENTS
+
+    PREFACE ........................................................ iii
+
+1.  INTRODUCTION ..................................................... 1
+
+  1.1  Motivation .................................................... 1
+  1.2  Scope ......................................................... 2
+  1.3  About This Document ........................................... 2
+  1.4  Interfaces .................................................... 3
+  1.5  Operation ..................................................... 3
+
+2.  PHILOSOPHY ....................................................... 7
+
+  2.1  Elements of the Internetwork System ........................... 7
+  2.2  Model of Operation ............................................ 7
+  2.3  The Host Environment .......................................... 8
+  2.4  Interfaces .................................................... 9
+  2.5  Relation to Other Protocols ................................... 9
+  2.6  Reliable Communication ........................................ 9
+  2.7  Connection Establishment and Clearing ........................ 10
+  2.8  Data Communication ........................................... 12
+  2.9  Precedence and Security ...................................... 13
+  2.10 Robustness Principle ......................................... 13
+
+3.  FUNCTIONAL SPECIFICATION ........................................ 15
+
+  3.1  Header Format ................................................ 15
+  3.2  Terminology .................................................. 19
+  3.3  Sequence Numbers ............................................. 24
+  3.4  Establishing a connection .................................... 30
+  3.5  Closing a Connection ......................................... 37
+  3.6  Precedence and Security ...................................... 40
+  3.7  Data Communication ........................................... 40
+  3.8  Interfaces ................................................... 44
+  3.9  Event Processing ............................................. 52
+
+GLOSSARY ............................................................ 79
+
+REFERENCES .......................................................... 85
+
+
+
+
+
+
+
+
+
+
+
+                                                                [Page i]
+
+
+                                                          September 1981
+Transmission Control Protocol
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+[Page ii]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+
+
+
+                                PREFACE
+
+
+
+This document describes the DoD Standard Transmission Control Protocol
+(TCP).  There have been nine earlier editions of the ARPA TCP
+specification on which this standard is based, and the present text
+draws heavily from them.  There have been many contributors to this work
+both in terms of concepts and in terms of text.  This edition clarifies
+several details and removes the end-of-letter buffer-size adjustments,
+and redescribes the letter mechanism as a push function.
+
+                                                           Jon Postel
+
+                                                           Editor
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+                                                              [Page iii]
+
+
+
+
+RFC:  793
+Replaces: RFC 761
+IENs:  129, 124, 112, 81,
+55, 44, 40, 27, 21, 5
+
+                     TRANSMISSION CONTROL PROTOCOL
+
+                         DARPA INTERNET PROGRAM
+                         PROTOCOL SPECIFICATION
+
+
+
+                            1.  INTRODUCTION
+
+The Transmission Control Protocol (TCP) is intended for use as a highly
+reliable host-to-host protocol between hosts in packet-switched computer
+communication networks, and in interconnected systems of such networks.
+
+This document describes the functions to be performed by the
+Transmission Control Protocol, the program that implements it, and its
+interface to programs or users that require its services.
+
+1.1.  Motivation
+
+  Computer communication systems are playing an increasingly important
+  role in military, government, and civilian environments.  This
+  document focuses its attention primarily on military computer
+  communication requirements, especially robustness in the presence of
+  communication unreliability and availability in the presence of
+  congestion, but many of these problems are found in the civilian and
+  government sector as well.
+
+  As strategic and tactical computer communication networks are
+  developed and deployed, it is essential to provide means of
+  interconnecting them and to provide standard interprocess
+  communication protocols which can support a broad range of
+  applications.  In anticipation of the need for such standards, the
+  Deputy Undersecretary of Defense for Research and Engineering has
+  declared the Transmission Control Protocol (TCP) described herein to
+  be a basis for DoD-wide inter-process communication protocol
+  standardization.
+
+  TCP is a connection-oriented, end-to-end reliable protocol designed to
+  fit into a layered hierarchy of protocols which support multi-network
+  applications.  The TCP provides for reliable inter-process
+  communication between pairs of processes in host computers attached to
+  distinct but interconnected computer communication networks.  Very few
+  assumptions are made as to the reliability of the communication
+  protocols below the TCP layer.  TCP assumes it can obtain a simple,
+  potentially unreliable datagram service from the lower level
+  protocols.  In principle, the TCP should be able to operate above a
+  wide spectrum of communication systems ranging from hard-wired
+  connections to packet-switched or circuit-switched networks.
+
+
+                                                                [Page 1]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Introduction
+
+
+
+  TCP is based on concepts first described by Cerf and Kahn in [1].  The
+  TCP fits into a layered protocol architecture just above a basic
+  Internet Protocol [2] which provides a way for the TCP to send and
+  receive variable-length segments of information enclosed in internet
+  datagram "envelopes".  The internet datagram provides a means for
+  addressing source and destination TCPs in different networks.  The
+  internet protocol also deals with any fragmentation or reassembly of
+  the TCP segments required to achieve transport and delivery through
+  multiple networks and interconnecting gateways.  The internet protocol
+  also carries information on the precedence, security classification
+  and compartmentation of the TCP segments, so this information can be
+  communicated end-to-end across multiple networks.
+
+                           Protocol Layering
+
+                        +---------------------+
+                        |     higher-level    |
+                        +---------------------+
+                        |        TCP          |
+                        +---------------------+
+                        |  internet protocol  |
+                        +---------------------+
+                        |communication network|
+                        +---------------------+
+
+                                Figure 1
+
+  Much of this document is written in the context of TCP implementations
+  which are co-resident with higher level protocols in the host
+  computer.  Some computer systems will be connected to networks via
+  front-end computers which house the TCP and internet protocol layers,
+  as well as network specific software.  The TCP specification describes
+  an interface to the higher level protocols which appears to be
+  implementable even for the front-end case, as long as a suitable
+  host-to-front end protocol is implemented.
+
+1.2.  Scope
+
+  The TCP is intended to provide a reliable process-to-process
+  communication service in a multinetwork environment.  The TCP is
+  intended to be a host-to-host protocol in common use in multiple
+  networks.
+
+1.3.  About this Document
+
+  This document represents a specification of the behavior required of
+  any TCP implementation, both in its interactions with higher level
+  protocols and in its interactions with other TCPs.  The rest of this
+
+
+[Page 2]                                                                
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                            Introduction
+
+
+
+  section offers a very brief view of the protocol interfaces and
+  operation.  Section 2 summarizes the philosophical basis for the TCP
+  design.  Section 3 offers both a detailed description of the actions
+  required of TCP when various events occur (arrival of new segments,
+  user calls, errors, etc.) and the details of the formats of TCP
+  segments.
+
+1.4.  Interfaces
+
+  The TCP interfaces on one side to user or application processes and on
+  the other side to a lower level protocol such as Internet Protocol.
+
+  The interface between an application process and the TCP is
+  illustrated in reasonable detail.  This interface consists of a set of
+  calls much like the calls an operating system provides to an
+  application process for manipulating files.  For example, there are
+  calls to open and close connections and to send and receive data on
+  established connections.  It is also expected that the TCP can
+  asynchronously communicate with application programs.  Although
+  considerable freedom is permitted to TCP implementors to design
+  interfaces which are appropriate to a particular operating system
+  environment, a minimum functionality is required at the TCP/user
+  interface for any valid implementation.
+
+  The interface between TCP and lower level protocol is essentially
+  unspecified except that it is assumed there is a mechanism whereby the
+  two levels can asynchronously pass information to each other.
+  Typically, one expects the lower level protocol to specify this
+  interface.  TCP is designed to work in a very general environment of
+  interconnected networks.  The lower level protocol which is assumed
+  throughout this document is the Internet Protocol [2].
+
+1.5.  Operation
+
+  As noted above, the primary purpose of the TCP is to provide reliable,
+  securable logical circuit or connection service between pairs of
+  processes.  To provide this service on top of a less reliable internet
+  communication system requires facilities in the following areas:
+
+    Basic Data Transfer
+    Reliability
+    Flow Control
+    Multiplexing
+    Connections
+    Precedence and Security
+
+  The basic operation of the TCP in each of these areas is described in
+  the following paragraphs.
+
+
+                                                                [Page 3]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Introduction
+
+
+
+  Basic Data Transfer:
+
+    The TCP is able to transfer a continuous stream of octets in each
+    direction between its users by packaging some number of octets into
+    segments for transmission through the internet system.  In general,
+    the TCPs decide when to block and forward data at their own
+    convenience.
+
+    Sometimes users need to be sure that all the data they have
+    submitted to the TCP has been transmitted.  For this purpose a push
+    function is defined.  To assure that data submitted to a TCP is
+    actually transmitted the sending user indicates that it should be
+    pushed through to the receiving user.  A push causes the TCPs to
+    promptly forward and deliver data up to that point to the receiver.
+    The exact push point might not be visible to the receiving user and
+    the push function does not supply a record boundary marker.
+
+  Reliability:
+
+    The TCP must recover from data that is damaged, lost, duplicated, or
+    delivered out of order by the internet communication system.  This
+    is achieved by assigning a sequence number to each octet
+    transmitted, and requiring a positive acknowledgment (ACK) from the
+    receiving TCP.  If the ACK is not received within a timeout
+    interval, the data is retransmitted.  At the receiver, the sequence
+    numbers are used to correctly order segments that may be received
+    out of order and to eliminate duplicates.  Damage is handled by
+    adding a checksum to each segment transmitted, checking it at the
+    receiver, and discarding damaged segments.
+
+    As long as the TCPs continue to function properly and the internet
+    system does not become completely partitioned, no transmission
+    errors will affect the correct delivery of data.  TCP recovers from
+    internet communication system errors.
+
+  Flow Control:
+
+    TCP provides a means for the receiver to govern the amount of data
+    sent by the sender.  This is achieved by returning a "window" with
+    every ACK indicating a range of acceptable sequence numbers beyond
+    the last segment successfully received.  The window indicates an
+    allowed number of octets that the sender may transmit before
+    receiving further permission.
+
+
+
+
+
+
+
+[Page 4]                                                                
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                            Introduction
+
+
+
+  Multiplexing:
+
+    To allow for many processes within a single Host to use TCP
+    communication facilities simultaneously, the TCP provides a set of
+    addresses or ports within each host.  Concatenated with the network
+    and host addresses from the internet communication layer, this forms
+    a socket.  A pair of sockets uniquely identifies each connection.
+    That is, a socket may be simultaneously used in multiple
+    connections.
+
+    The binding of ports to processes is handled independently by each
+    Host.  However, it proves useful to attach frequently used processes
+    (e.g., a "logger" or timesharing service) to fixed sockets which are
+    made known to the public.  These services can then be accessed
+    through the known addresses.  Establishing and learning the port
+    addresses of other processes may involve more dynamic mechanisms.
+
+  Connections:
+
+    The reliability and flow control mechanisms described above require
+    that TCPs initialize and maintain certain status information for
+    each data stream.  The combination of this information, including
+    sockets, sequence numbers, and window sizes, is called a connection.
+    Each connection is uniquely specified by a pair of sockets
+    identifying its two sides.
+
+    When two processes wish to communicate, their TCP's must first
+    establish a connection (initialize the status information on each
+    side).  When their communication is complete, the connection is
+    terminated or closed to free the resources for other uses.
+
+    Since connections must be established between unreliable hosts and
+    over the unreliable internet communication system, a handshake
+    mechanism with clock-based sequence numbers is used to avoid
+    erroneous initialization of connections.
+
+  Precedence and Security:
+
+    The users of TCP may indicate the security and precedence of their
+    communication.  Provision is made for default values to be used when
+    these features are not needed.
+
+    
+
+
+
+
+
+
+
+                                                                [Page 5]
+
+
+                                                          September 1981
+Transmission Control Protocol
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+[Page 6]                                                                
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+
+
+
+                             2.  PHILOSOPHY
+
+2.1.  Elements of the Internetwork System
+
+  The internetwork environment consists of hosts connected to networks
+  which are in turn interconnected via gateways.  It is assumed here
+  that the networks may be either local networks (e.g., the ETHERNET) or
+  large networks (e.g., the ARPANET), but in any case are based on
+  packet switching technology.  The active agents that produce and
+  consume messages are processes.  Various levels of protocols in the
+  networks, the gateways, and the hosts support an interprocess
+  communication system that provides two-way data flow on logical
+  connections between process ports.
+
+  The term packet is used generically here to mean the data of one
+  transaction between a host and its network.  The format of data blocks
+  exchanged within the a network will generally not be of concern to us.
+
+  Hosts are computers attached to a network, and from the communication
+  network's point of view, are the sources and destinations of packets.
+  Processes are viewed as the active elements in host computers (in
+  accordance with the fairly common definition of a process as a program
+  in execution).  Even terminals and files or other I/O devices are
+  viewed as communicating with each other through the use of processes.
+  Thus, all communication is viewed as inter-process communication.
+
+  Since a process may need to distinguish among several communication
+  streams between itself and another process (or processes), we imagine
+  that each process may have a number of ports through which it
+  communicates with the ports of other processes.
+
+2.2.  Model of Operation
+
+  Processes transmit data by calling on the TCP and passing buffers of
+  data as arguments.  The TCP packages the data from these buffers into
+  segments and calls on the internet module to transmit each segment to
+  the destination TCP.  The receiving TCP places the data from a segment
+  into the receiving user's buffer and notifies the receiving user.  The
+  TCPs include control information in the segments which they use to
+  ensure reliable ordered data transmission.
+
+  The model of internet communication is that there is an internet
+  protocol module associated with each TCP which provides an interface
+  to the local network.  This internet module packages TCP segments
+  inside internet datagrams and routes these datagrams to a destination
+  internet module or intermediate gateway.  To transmit the datagram
+  through the local network, it is embedded in a local network packet.
+
+  The packet switches may perform further packaging, fragmentation, or
+
+
+                                                                [Page 7]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Philosophy
+
+
+
+  other operations to achieve the delivery of the local packet to the
+  destination internet module.
+
+  At a gateway between networks, the internet datagram is "unwrapped"
+  from its local packet and examined to determine through which network
+  the internet datagram should travel next.  The internet datagram is
+  then "wrapped" in a local packet suitable to the next network and
+  routed to the next gateway, or to the final destination.
+
+  A gateway is permitted to break up an internet datagram into smaller
+  internet datagram fragments if this is necessary for transmission
+  through the next network.  To do this, the gateway produces a set of
+  internet datagrams; each carrying a fragment.  Fragments may be
+  further broken into smaller fragments at subsequent gateways.  The
+  internet datagram fragment format is designed so that the destination
+  internet module can reassemble fragments into internet datagrams.
+
+  A destination internet module unwraps the segment from the datagram
+  (after reassembling the datagram, if necessary) and passes it to the
+  destination TCP.
+
+  This simple model of the operation glosses over many details.  One
+  important feature is the type of service.  This provides information
+  to the gateway (or internet module) to guide it in selecting the
+  service parameters to be used in traversing the next network.
+  Included in the type of service information is the precedence of the
+  datagram.  Datagrams may also carry security information to permit
+  host and gateways that operate in multilevel secure environments to
+  properly segregate datagrams for security considerations.
+
+2.3.  The Host Environment
+
+  The TCP is assumed to be a module in an operating system.  The users
+  access the TCP much like they would access the file system.  The TCP
+  may call on other operating system functions, for example, to manage
+  data structures.  The actual interface to the network is assumed to be
+  controlled by a device driver module.  The TCP does not call on the
+  network device driver directly, but rather calls on the internet
+  datagram protocol module which may in turn call on the device driver.
+
+  The mechanisms of TCP do not preclude implementation of the TCP in a
+  front-end processor.  However, in such an implementation, a
+  host-to-front-end protocol must provide the functionality to support
+  the type of TCP-user interface described in this document.
+
+
+
+
+
+
+[Page 8]                                                                
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                              Philosophy
+
+
+
+2.4.  Interfaces
+
+  The TCP/user interface provides for calls made by the user on the TCP
+  to OPEN or CLOSE a connection, to SEND or RECEIVE data, or to obtain
+  STATUS about a connection.  These calls are like other calls from user
+  programs on the operating system, for example, the calls to open, read
+  from, and close a file.
+
+  The TCP/internet interface provides calls to send and receive
+  datagrams addressed to TCP modules in hosts anywhere in the internet
+  system.  These calls have parameters for passing the address, type of
+  service, precedence, security, and other control information.
+
+2.5.  Relation to Other Protocols
+
+  The following diagram illustrates the place of the TCP in the protocol
+  hierarchy:
+
+                                    
+       +------+ +-----+ +-----+       +-----+                    
+       |Telnet| | FTP | |Voice|  ...  |     |  Application Level 
+       +------+ +-----+ +-----+       +-----+                    
+             |   |         |             |                       
+            +-----+     +-----+       +-----+                    
+            | TCP |     | RTP |  ...  |     |  Host Level        
+            +-----+     +-----+       +-----+                    
+               |           |             |                       
+            +-------------------------------+                    
+            |    Internet Protocol & ICMP   |  Gateway Level     
+            +-------------------------------+                    
+                           |                                     
+              +---------------------------+                      
+              |   Local Network Protocol  |    Network Level     
+              +---------------------------+                      
+
+                         Protocol Relationships
+
+                               Figure 2.
+
+  It is expected that the TCP will be able to support higher level
+  protocols efficiently.  It should be easy to interface higher level
+  protocols like the ARPANET Telnet or AUTODIN II THP to the TCP.
+
+2.6.  Reliable Communication
+
+  A stream of data sent on a TCP connection is delivered reliably and in
+  order at the destination.
+
+
+
+                                                                [Page 9]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Philosophy
+
+
+
+  Transmission is made reliable via the use of sequence numbers and
+  acknowledgments.  Conceptually, each octet of data is assigned a
+  sequence number.  The sequence number of the first octet of data in a
+  segment is transmitted with that segment and is called the segment
+  sequence number.  Segments also carry an acknowledgment number which
+  is the sequence number of the next expected data octet of
+  transmissions in the reverse direction.  When the TCP transmits a
+  segment containing data, it puts a copy on a retransmission queue and
+  starts a timer; when the acknowledgment for that data is received, the
+  segment is deleted from the queue.  If the acknowledgment is not
+  received before the timer runs out, the segment is retransmitted.
+
+  An acknowledgment by TCP does not guarantee that the data has been
+  delivered to the end user, but only that the receiving TCP has taken
+  the responsibility to do so.
+
+  To govern the flow of data between TCPs, a flow control mechanism is
+  employed.  The receiving TCP reports a "window" to the sending TCP.
+  This window specifies the number of octets, starting with the
+  acknowledgment number, that the receiving TCP is currently prepared to
+  receive.
+
+2.7.  Connection Establishment and Clearing
+
+  To identify the separate data streams that a TCP may handle, the TCP
+  provides a port identifier.  Since port identifiers are selected
+  independently by each TCP they might not be unique.  To provide for
+  unique addresses within each TCP, we concatenate an internet address
+  identifying the TCP with a port identifier to create a socket which
+  will be unique throughout all networks connected together.
+
+  A connection is fully specified by the pair of sockets at the ends.  A
+  local socket may participate in many connections to different foreign
+  sockets.  A connection can be used to carry data in both directions,
+  that is, it is "full duplex".
+
+  TCPs are free to associate ports with processes however they choose.
+  However, several basic concepts are necessary in any implementation.
+  There must be well-known sockets which the TCP associates only with
+  the "appropriate" processes by some means.  We envision that processes
+  may "own" ports, and that processes can initiate connections only on
+  the ports they own.  (Means for implementing ownership is a local
+  issue, but we envision a Request Port user command, or a method of
+  uniquely allocating a group of ports to a given process, e.g., by
+  associating the high order bits of a port name with a given process.)
+
+  A connection is specified in the OPEN call by the local port and
+  foreign socket arguments.  In return, the TCP supplies a (short) local
+
+
+[Page 10]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                              Philosophy
+
+
+
+  connection name by which the user refers to the connection in
+  subsequent calls.  There are several things that must be remembered
+  about a connection.  To store this information we imagine that there
+  is a data structure called a Transmission Control Block (TCB).  One
+  implementation strategy would have the local connection name be a
+  pointer to the TCB for this connection.  The OPEN call also specifies
+  whether the connection establishment is to be actively pursued, or to
+  be passively waited for.
+
+  A passive OPEN request means that the process wants to accept incoming
+  connection requests rather than attempting to initiate a connection.
+  Often the process requesting a passive OPEN will accept a connection
+  request from any caller.  In this case a foreign socket of all zeros
+  is used to denote an unspecified socket.  Unspecified foreign sockets
+  are allowed only on passive OPENs.
+
+  A service process that wished to provide services for unknown other
+  processes would issue a passive OPEN request with an unspecified
+  foreign socket.  Then a connection could be made with any process that
+  requested a connection to this local socket.  It would help if this
+  local socket were known to be associated with this service.
+
+  Well-known sockets are a convenient mechanism for a priori associating
+  a socket address with a standard service.  For instance, the
+  "Telnet-Server" process is permanently assigned to a particular
+  socket, and other sockets are reserved for File Transfer, Remote Job
+  Entry, Text Generator, Echoer, and Sink processes (the last three
+  being for test purposes).  A socket address might be reserved for
+  access to a "Look-Up" service which would return the specific socket
+  at which a newly created service would be provided.  The concept of a
+  well-known socket is part of the TCP specification, but the assignment
+  of sockets to services is outside this specification.  (See [4].)
+
+  Processes can issue passive OPENs and wait for matching active OPENs
+  from other processes and be informed by the TCP when connections have
+  been established.  Two processes which issue active OPENs to each
+  other at the same time will be correctly connected.  This flexibility
+  is critical for the support of distributed computing in which
+  components act asynchronously with respect to each other.
+
+  There are two principal cases for matching the sockets in the local
+  passive OPENs and an foreign active OPENs.  In the first case, the
+  local passive OPENs has fully specified the foreign socket.  In this
+  case, the match must be exact.  In the second case, the local passive
+  OPENs has left the foreign socket unspecified.  In this case, any
+  foreign socket is acceptable as long as the local sockets match.
+  Other possibilities include partially restricted matches.
+
+
+
+                                                               [Page 11]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Philosophy
+
+
+
+  If there are several pending passive OPENs (recorded in TCBs) with the
+  same local socket, an foreign active OPEN will be matched to a TCB
+  with the specific foreign socket in the foreign active OPEN, if such a
+  TCB exists, before selecting a TCB with an unspecified foreign socket.
+
+  The procedures to establish connections utilize the synchronize (SYN)
+  control flag and involves an exchange of three messages.  This
+  exchange has been termed a three-way hand shake [3].
+
+  A connection is initiated by the rendezvous of an arriving segment
+  containing a SYN and a waiting TCB entry each created by a user OPEN
+  command.  The matching of local and foreign sockets determines when a
+  connection has been initiated.  The connection becomes "established"
+  when sequence numbers have been synchronized in both directions.
+
+  The clearing of a connection also involves the exchange of segments,
+  in this case carrying the FIN control flag.
+
+2.8.  Data Communication
+
+  The data that flows on a connection may be thought of as a stream of
+  octets.  The sending user indicates in each SEND call whether the data
+  in that call (and any preceeding calls) should be immediately pushed
+  through to the receiving user by the setting of the PUSH flag.
+
+  A sending TCP is allowed to collect data from the sending user and to
+  send that data in segments at its own convenience, until the push
+  function is signaled, then it must send all unsent data.  When a
+  receiving TCP sees the PUSH flag, it must not wait for more data from
+  the sending TCP before passing the data to the receiving process.
+
+  There is no necessary relationship between push functions and segment
+  boundaries.  The data in any particular segment may be the result of a
+  single SEND call, in whole or part, or of multiple SEND calls.
+
+  The purpose of push function and the PUSH flag is to push data through
+  from the sending user to the receiving user.  It does not provide a
+  record service.
+
+  There is a coupling between the push function and the use of buffers
+  of data that cross the TCP/user interface.  Each time a PUSH flag is
+  associated with data placed into the receiving user's buffer, the
+  buffer is returned to the user for processing even if the buffer is
+  not filled.  If data arrives that fills the user's buffer before a
+  PUSH is seen, the data is passed to the user in buffer size units.
+
+  TCP also provides a means to communicate to the receiver of data that
+  at some point further along in the data stream than the receiver is
+
+
+[Page 12]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                              Philosophy
+
+
+
+  currently reading there is urgent data.  TCP does not attempt to
+  define what the user specifically does upon being notified of pending
+  urgent data, but the general notion is that the receiving process will
+  take action to process the urgent data quickly.
+
+2.9.  Precedence and Security
+
+  The TCP makes use of the internet protocol type of service field and
+  security option to provide precedence and security on a per connection
+  basis to TCP users.  Not all TCP modules will necessarily function in
+  a multilevel secure environment; some may be limited to unclassified
+  use only, and others may operate at only one security level and
+  compartment.  Consequently, some TCP implementations and services to
+  users may be limited to a subset of the multilevel secure case.
+
+  TCP modules which operate in a multilevel secure environment must
+  properly mark outgoing segments with the security, compartment, and
+  precedence.  Such TCP modules must also provide to their users or
+  higher level protocols such as Telnet or THP an interface to allow
+  them to specify the desired security level, compartment, and
+  precedence of connections.
+
+2.10.  Robustness Principle
+
+  TCP implementations will follow a general principle of robustness:  be
+  conservative in what you do, be liberal in what you accept from
+  others.
+
+  
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+                                                               [Page 13]
+
+
+                                                          September 1981
+Transmission Control Protocol
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+[Page 14]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+
+
+
+                      3.  FUNCTIONAL SPECIFICATION
+
+3.1.  Header Format
+
+  TCP segments are sent as internet datagrams.  The Internet Protocol
+  header carries several information fields, including the source and
+  destination host addresses [2].  A TCP header follows the internet
+  header, supplying information specific to the TCP protocol.  This
+  division allows for the existence of host level protocols other than
+  TCP.
+
+  TCP Header Format
+
+                                    
+    0                   1                   2                   3   
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |          Source Port          |       Destination Port        |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                        Sequence Number                        |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                    Acknowledgment Number                      |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |  Data |           |U|A|P|R|S|F|                               |
+   | Offset| Reserved  |R|C|S|S|Y|I|            Window             |
+   |       |           |G|K|H|T|N|N|                               |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |           Checksum            |         Urgent Pointer        |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                    Options                    |    Padding    |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                             data                              |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+                            TCP Header Format
+
+          Note that one tick mark represents one bit position.
+
+                               Figure 3.
+
+  Source Port:  16 bits
+
+    The source port number.
+
+  Destination Port:  16 bits
+
+    The destination port number.
+
+
+
+
+                                                               [Page 15]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+
+
+
+  Sequence Number:  32 bits
+
+    The sequence number of the first data octet in this segment (except
+    when SYN is present). If SYN is present the sequence number is the
+    initial sequence number (ISN) and the first data octet is ISN+1.
+
+  Acknowledgment Number:  32 bits
+
+    If the ACK control bit is set this field contains the value of the
+    next sequence number the sender of the segment is expecting to
+    receive.  Once a connection is established this is always sent.
+
+  Data Offset:  4 bits
+
+    The number of 32 bit words in the TCP Header.  This indicates where
+    the data begins.  The TCP header (even one including options) is an
+    integral number of 32 bits long.
+
+  Reserved:  6 bits
+
+    Reserved for future use.  Must be zero.
+
+  Control Bits:  6 bits (from left to right):
+
+    URG:  Urgent Pointer field significant
+    ACK:  Acknowledgment field significant
+    PSH:  Push Function
+    RST:  Reset the connection
+    SYN:  Synchronize sequence numbers
+    FIN:  No more data from sender
+
+  Window:  16 bits
+
+    The number of data octets beginning with the one indicated in the
+    acknowledgment field which the sender of this segment is willing to
+    accept.
+
+  Checksum:  16 bits
+
+    The checksum field is the 16 bit one's complement of the one's
+    complement sum of all 16 bit words in the header and text.  If a
+    segment contains an odd number of header and text octets to be
+    checksummed, the last octet is padded on the right with zeros to
+    form a 16 bit word for checksum purposes.  The pad is not
+    transmitted as part of the segment.  While computing the checksum,
+    the checksum field itself is replaced with zeros.
+
+    The checksum also covers a 96 bit pseudo header conceptually
+
+
+[Page 16]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+
+
+
+    prefixed to the TCP header.  This pseudo header contains the Source
+    Address, the Destination Address, the Protocol, and TCP length.
+    This gives the TCP protection against misrouted segments.  This
+    information is carried in the Internet Protocol and is transferred
+    across the TCP/Network interface in the arguments or results of
+    calls by the TCP on the IP.
+
+                     +--------+--------+--------+--------+
+                     |           Source Address          |
+                     +--------+--------+--------+--------+
+                     |         Destination Address       |
+                     +--------+--------+--------+--------+
+                     |  zero  |  PTCL  |    TCP Length   |
+                     +--------+--------+--------+--------+
+
+      The TCP Length is the TCP header length plus the data length in
+      octets (this is not an explicitly transmitted quantity, but is
+      computed), and it does not count the 12 octets of the pseudo
+      header.
+
+  Urgent Pointer:  16 bits
+
+    This field communicates the current value of the urgent pointer as a
+    positive offset from the sequence number in this segment.  The
+    urgent pointer points to the sequence number of the octet following
+    the urgent data.  This field is only be interpreted in segments with
+    the URG control bit set.
+
+  Options:  variable
+
+    Options may occupy space at the end of the TCP header and are a
+    multiple of 8 bits in length.  All options are included in the
+    checksum.  An option may begin on any octet boundary.  There are two
+    cases for the format of an option:
+
+      Case 1:  A single octet of option-kind.
+
+      Case 2:  An octet of option-kind, an octet of option-length, and
+               the actual option-data octets.
+
+    The option-length counts the two octets of option-kind and
+    option-length as well as the option-data octets.
+
+    Note that the list of options may be shorter than the data offset
+    field might imply.  The content of the header beyond the
+    End-of-Option option must be header padding (i.e., zero).
+
+    A TCP must implement all options.
+
+
+                                                               [Page 17]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+
+
+
+    Currently defined options include (kind indicated in octal):
+
+      Kind     Length    Meaning
+      ----     ------    -------
+       0         -       End of option list.
+       1         -       No-Operation.
+       2         4       Maximum Segment Size.
+      
+
+    Specific Option Definitions
+
+      End of Option List
+
+        +--------+
+        |00000000|
+        +--------+
+         Kind=0
+
+        This option code indicates the end of the option list.  This
+        might not coincide with the end of the TCP header according to
+        the Data Offset field.  This is used at the end of all options,
+        not the end of each option, and need only be used if the end of
+        the options would not otherwise coincide with the end of the TCP
+        header.
+
+      No-Operation
+
+        +--------+
+        |00000001|
+        +--------+
+         Kind=1
+
+        This option code may be used between options, for example, to
+        align the beginning of a subsequent option on a word boundary.
+        There is no guarantee that senders will use this option, so
+        receivers must be prepared to process options even if they do
+        not begin on a word boundary.
+
+      Maximum Segment Size
+
+        +--------+--------+---------+--------+
+        |00000010|00000100|   max seg size   |
+        +--------+--------+---------+--------+
+         Kind=2   Length=4
+
+
+
+
+
+
+[Page 18]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+
+
+
+        Maximum Segment Size Option Data:  16 bits
+
+          If this option is present, then it communicates the maximum
+          receive segment size at the TCP which sends this segment.
+          This field must only be sent in the initial connection request
+          (i.e., in segments with the SYN control bit set).  If this
+          option is not used, any segment size is allowed.
+
+  Padding:  variable
+
+    The TCP header padding is used to ensure that the TCP header ends
+    and data begins on a 32 bit boundary.  The padding is composed of
+    zeros.
+
+3.2.  Terminology
+
+  Before we can discuss very much about the operation of the TCP we need
+  to introduce some detailed terminology.  The maintenance of a TCP
+  connection requires the remembering of several variables.  We conceive
+  of these variables being stored in a connection record called a
+  Transmission Control Block or TCB.  Among the variables stored in the
+  TCB are the local and remote socket numbers, the security and
+  precedence of the connection, pointers to the user's send and receive
+  buffers, pointers to the retransmit queue and to the current segment.
+  In addition several variables relating to the send and receive
+  sequence numbers are stored in the TCB.
+
+    Send Sequence Variables
+
+      SND.UNA - send unacknowledged
+      SND.NXT - send next
+      SND.WND - send window
+      SND.UP  - send urgent pointer
+      SND.WL1 - segment sequence number used for last window update
+      SND.WL2 - segment acknowledgment number used for last window
+                update
+      ISS     - initial send sequence number
+
+    Receive Sequence Variables
+
+      RCV.NXT - receive next
+      RCV.WND - receive window
+      RCV.UP  - receive urgent pointer
+      IRS     - initial receive sequence number
+
+
+
+
+
+
+                                                               [Page 19]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+
+
+
+  The following diagrams may help to relate some of these variables to
+  the sequence space.
+
+  Send Sequence Space
+
+                   1         2          3          4      
+              ----------|----------|----------|---------- 
+                     SND.UNA    SND.NXT    SND.UNA        
+                                          +SND.WND        
+
+        1 - old sequence numbers which have been acknowledged  
+        2 - sequence numbers of unacknowledged data            
+        3 - sequence numbers allowed for new data transmission 
+        4 - future sequence numbers which are not yet allowed  
+
+                          Send Sequence Space
+
+                               Figure 4.
+    
+    
+
+  The send window is the portion of the sequence space labeled 3 in
+  figure 4.
+
+  Receive Sequence Space
+
+                       1          2          3      
+                   ----------|----------|---------- 
+                          RCV.NXT    RCV.NXT        
+                                    +RCV.WND        
+
+        1 - old sequence numbers which have been acknowledged  
+        2 - sequence numbers allowed for new reception         
+        3 - future sequence numbers which are not yet allowed  
+
+                         Receive Sequence Space
+
+                               Figure 5.
+    
+    
+
+  The receive window is the portion of the sequence space labeled 2 in
+  figure 5.
+
+  There are also some variables used frequently in the discussion that
+  take their values from the fields of the current segment.
+
+
+
+
+[Page 20]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+
+
+
+    Current Segment Variables
+
+      SEG.SEQ - segment sequence number
+      SEG.ACK - segment acknowledgment number
+      SEG.LEN - segment length
+      SEG.WND - segment window
+      SEG.UP  - segment urgent pointer
+      SEG.PRC - segment precedence value
+
+  A connection progresses through a series of states during its
+  lifetime.  The states are:  LISTEN, SYN-SENT, SYN-RECEIVED,
+  ESTABLISHED, FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK,
+  TIME-WAIT, and the fictional state CLOSED.  CLOSED is fictional
+  because it represents the state when there is no TCB, and therefore,
+  no connection.  Briefly the meanings of the states are:
+
+    LISTEN - represents waiting for a connection request from any remote
+    TCP and port.
+
+    SYN-SENT - represents waiting for a matching connection request
+    after having sent a connection request.
+
+    SYN-RECEIVED - represents waiting for a confirming connection
+    request acknowledgment after having both received and sent a
+    connection request.
+
+    ESTABLISHED - represents an open connection, data received can be
+    delivered to the user.  The normal state for the data transfer phase
+    of the connection.
+
+    FIN-WAIT-1 - represents waiting for a connection termination request
+    from the remote TCP, or an acknowledgment of the connection
+    termination request previously sent.
+
+    FIN-WAIT-2 - represents waiting for a connection termination request
+    from the remote TCP.
+
+    CLOSE-WAIT - represents waiting for a connection termination request
+    from the local user.
+
+    CLOSING - represents waiting for a connection termination request
+    acknowledgment from the remote TCP.
+
+    LAST-ACK - represents waiting for an acknowledgment of the
+    connection termination request previously sent to the remote TCP
+    (which includes an acknowledgment of its connection termination
+    request).
+
+
+
+                                                               [Page 21]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+
+
+
+    TIME-WAIT - represents waiting for enough time to pass to be sure
+    the remote TCP received the acknowledgment of its connection
+    termination request.
+
+    CLOSED - represents no connection state at all.
+
+  A TCP connection progresses from one state to another in response to
+  events.  The events are the user calls, OPEN, SEND, RECEIVE, CLOSE,
+  ABORT, and STATUS; the incoming segments, particularly those
+  containing the SYN, ACK, RST and FIN flags; and timeouts.
+
+  The state diagram in figure 6 illustrates only state changes, together
+  with the causing events and resulting actions, but addresses neither
+  error conditions nor actions which are not connected with state
+  changes.  In a later section, more detail is offered with respect to
+  the reaction of the TCP to events.
+
+  NOTE BENE:  this diagram is only a summary and must not be taken as
+  the total specification.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+[Page 22]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+
+
+
+                                    
+                              +---------+ ---------\      active OPEN  
+                              |  CLOSED |            \    -----------  
+                              +---------+<---------\   \   create TCB  
+                                |     ^              \   \  snd SYN    
+                   passive OPEN |     |   CLOSE        \   \           
+                   ------------ |     | ----------       \   \         
+                    create TCB  |     | delete TCB         \   \       
+                                V     |                      \   \     
+                              +---------+            CLOSE    |    \   
+                              |  LISTEN |          ---------- |     |  
+                              +---------+          delete TCB |     |  
+                   rcv SYN      |     |     SEND              |     |  
+                  -----------   |     |    -------            |     V  
+ +---------+      snd SYN,ACK  /       \   snd SYN          +---------+
+ |         |<-----------------           ------------------>|         |
+ |   SYN   |                    rcv SYN                     |   SYN   |
+ |   RCVD  |<-----------------------------------------------|   SENT  |
+ |         |                    snd ACK                     |         |
+ |         |------------------           -------------------|         |
+ +---------+   rcv ACK of SYN  \       /  rcv SYN,ACK       +---------+
+   |           --------------   |     |   -----------                  
+   |                  x         |     |     snd ACK                    
+   |                            V     V                                
+   |  CLOSE                   +---------+                              
+   | -------                  |  ESTAB  |                              
+   | snd FIN                  +---------+                              
+   |                   CLOSE    |     |    rcv FIN                     
+   V                  -------   |     |    -------                     
+ +---------+          snd FIN  /       \   snd ACK          +---------+
+ |  FIN    |<-----------------           ------------------>|  CLOSE  |
+ | WAIT-1  |------------------                              |   WAIT  |
+ +---------+          rcv FIN  \                            +---------+
+   | rcv ACK of FIN   -------   |                            CLOSE  |  
+   | --------------   snd ACK   |                           ------- |  
+   V        x                   V                           snd FIN V  
+ +---------+                  +---------+                   +---------+
+ |FINWAIT-2|                  | CLOSING |                   | LAST-ACK|
+ +---------+                  +---------+                   +---------+
+   |                rcv ACK of FIN |                 rcv ACK of FIN |  
+   |  rcv FIN       -------------- |    Timeout=2MSL -------------- |  
+   |  -------              x       V    ------------        x       V  
+    \ snd ACK                 +---------+delete TCB         +---------+
+     ------------------------>|TIME WAIT|------------------>| CLOSED  |
+                              +---------+                   +---------+
+
+                      TCP Connection State Diagram
+                               Figure 6.
+
+
+                                                               [Page 23]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+
+
+
+3.3.  Sequence Numbers
+
+  A fundamental notion in the design is that every octet of data sent
+  over a TCP connection has a sequence number.  Since every octet is
+  sequenced, each of them can be acknowledged.  The acknowledgment
+  mechanism employed is cumulative so that an acknowledgment of sequence
+  number X indicates that all octets up to but not including X have been
+  received.  This mechanism allows for straight-forward duplicate
+  detection in the presence of retransmission.  Numbering of octets
+  within a segment is that the first data octet immediately following
+  the header is the lowest numbered, and the following octets are
+  numbered consecutively.
+
+  It is essential to remember that the actual sequence number space is
+  finite, though very large.  This space ranges from 0 to 2**32 - 1.
+  Since the space is finite, all arithmetic dealing with sequence
+  numbers must be performed modulo 2**32.  This unsigned arithmetic
+  preserves the relationship of sequence numbers as they cycle from
+  2**32 - 1 to 0 again.  There are some subtleties to computer modulo
+  arithmetic, so great care should be taken in programming the
+  comparison of such values.  The symbol "=<" means "less than or equal"
+  (modulo 2**32).
+
+  The typical kinds of sequence number comparisons which the TCP must
+  perform include:
+
+    (a)  Determining that an acknowledgment refers to some sequence
+         number sent but not yet acknowledged.
+
+    (b)  Determining that all sequence numbers occupied by a segment
+         have been acknowledged (e.g., to remove the segment from a
+         retransmission queue).
+
+    (c)  Determining that an incoming segment contains sequence numbers
+         which are expected (i.e., that the segment "overlaps" the
+         receive window).
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+[Page 24]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+
+
+
+  In response to sending data the TCP will receive acknowledgments.  The
+  following comparisons are needed to process the acknowledgments.
+
+    SND.UNA = oldest unacknowledged sequence number
+
+    SND.NXT = next sequence number to be sent
+
+    SEG.ACK = acknowledgment from the receiving TCP (next sequence
+              number expected by the receiving TCP)
+
+    SEG.SEQ = first sequence number of a segment
+
+    SEG.LEN = the number of octets occupied by the data in the segment
+              (counting SYN and FIN)
+
+    SEG.SEQ+SEG.LEN-1 = last sequence number of a segment
+
+  A new acknowledgment (called an "acceptable ack"), is one for which
+  the inequality below holds:
+
+    SND.UNA < SEG.ACK =< SND.NXT
+
+  A segment on the retransmission queue is fully acknowledged if the sum
+  of its sequence number and length is less or equal than the
+  acknowledgment value in the incoming segment.
+
+  When data is received the following comparisons are needed:
+
+    RCV.NXT = next sequence number expected on an incoming segments, and
+        is the left or lower edge of the receive window
+
+    RCV.NXT+RCV.WND-1 = last sequence number expected on an incoming
+        segment, and is the right or upper edge of the receive window
+
+    SEG.SEQ = first sequence number occupied by the incoming segment
+
+    SEG.SEQ+SEG.LEN-1 = last sequence number occupied by the incoming
+        segment
+
+  A segment is judged to occupy a portion of valid receive sequence
+  space if
+
+    RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND
+
+  or
+
+    RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND
+
+
+
+                                                               [Page 25]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+
+
+
+  The first part of this test checks to see if the beginning of the
+  segment falls in the window, the second part of the test checks to see
+  if the end of the segment falls in the window; if the segment passes
+  either part of the test it contains data in the window.
+
+  Actually, it is a little more complicated than this.  Due to zero
+  windows and zero length segments, we have four cases for the
+  acceptability of an incoming segment:
+
+    Segment Receive  Test
+    Length  Window
+    ------- -------  -------------------------------------------
+
+       0       0     SEG.SEQ = RCV.NXT
+
+       0      >0     RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND
+
+      >0       0     not acceptable
+
+      >0      >0     RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND
+                  or RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND
+
+  Note that when the receive window is zero no segments should be
+  acceptable except ACK segments.  Thus, it is be possible for a TCP to
+  maintain a zero receive window while transmitting data and receiving
+  ACKs.  However, even when the receive window is zero, a TCP must
+  process the RST and URG fields of all incoming segments.
+
+  We have taken advantage of the numbering scheme to protect certain
+  control information as well.  This is achieved by implicitly including
+  some control flags in the sequence space so they can be retransmitted
+  and acknowledged without confusion (i.e., one and only one copy of the
+  control will be acted upon).  Control information is not physically
+  carried in the segment data space.  Consequently, we must adopt rules
+  for implicitly assigning sequence numbers to control.  The SYN and FIN
+  are the only controls requiring this protection, and these controls
+  are used only at connection opening and closing.  For sequence number
+  purposes, the SYN is considered to occur before the first actual data
+  octet of the segment in which it occurs, while the FIN is considered
+  to occur after the last actual data octet in a segment in which it
+  occurs.  The segment length (SEG.LEN) includes both data and sequence
+  space occupying controls.  When a SYN is present then SEG.SEQ is the
+  sequence number of the SYN.
+
+
+
+
+
+
+
+[Page 26]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+
+
+
+  Initial Sequence Number Selection
+
+  The protocol places no restriction on a particular connection being
+  used over and over again.  A connection is defined by a pair of
+  sockets.  New instances of a connection will be referred to as
+  incarnations of the connection.  The problem that arises from this is
+  -- "how does the TCP identify duplicate segments from previous
+  incarnations of the connection?"  This problem becomes apparent if the
+  connection is being opened and closed in quick succession, or if the
+  connection breaks with loss of memory and is then reestablished.
+
+  To avoid confusion we must prevent segments from one incarnation of a
+  connection from being used while the same sequence numbers may still
+  be present in the network from an earlier incarnation.  We want to
+  assure this, even if a TCP crashes and loses all knowledge of the
+  sequence numbers it has been using.  When new connections are created,
+  an initial sequence number (ISN) generator is employed which selects a
+  new 32 bit ISN.  The generator is bound to a (possibly fictitious) 32
+  bit clock whose low order bit is incremented roughly every 4
+  microseconds.  Thus, the ISN cycles approximately every 4.55 hours.
+  Since we assume that segments will stay in the network no more than
+  the Maximum Segment Lifetime (MSL) and that the MSL is less than 4.55
+  hours we can reasonably assume that ISN's will be unique.
+
+  For each connection there is a send sequence number and a receive
+  sequence number.  The initial send sequence number (ISS) is chosen by
+  the data sending TCP, and the initial receive sequence number (IRS) is
+  learned during the connection establishing procedure.
+
+  For a connection to be established or initialized, the two TCPs must
+  synchronize on each other's initial sequence numbers.  This is done in
+  an exchange of connection establishing segments carrying a control bit
+  called "SYN" (for synchronize) and the initial sequence numbers.  As a
+  shorthand, segments carrying the SYN bit are also called "SYNs".
+  Hence, the solution requires a suitable mechanism for picking an
+  initial sequence number and a slightly involved handshake to exchange
+  the ISN's.
+
+  The synchronization requires each side to send it's own initial
+  sequence number and to receive a confirmation of it in acknowledgment
+  from the other side.  Each side must also receive the other side's
+  initial sequence number and send a confirming acknowledgment.
+
+    1) A --> B  SYN my sequence number is X
+    2) A <-- B  ACK your sequence number is X
+    3) A <-- B  SYN my sequence number is Y
+    4) A --> B  ACK your sequence number is Y
+
+
+
+                                                               [Page 27]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+
+
+
+  Because steps 2 and 3 can be combined in a single message this is
+  called the three way (or three message) handshake.
+
+  A three way handshake is necessary because sequence numbers are not
+  tied to a global clock in the network, and TCPs may have different
+  mechanisms for picking the ISN's.  The receiver of the first SYN has
+  no way of knowing whether the segment was an old delayed one or not,
+  unless it remembers the last sequence number used on the connection
+  (which is not always possible), and so it must ask the sender to
+  verify this SYN.  The three way handshake and the advantages of a
+  clock-driven scheme are discussed in [3].
+
+  Knowing When to Keep Quiet
+
+  To be sure that a TCP does not create a segment that carries a
+  sequence number which may be duplicated by an old segment remaining in
+  the network, the TCP must keep quiet for a maximum segment lifetime
+  (MSL) before assigning any sequence numbers upon starting up or
+  recovering from a crash in which memory of sequence numbers in use was
+  lost.  For this specification the MSL is taken to be 2 minutes.  This
+  is an engineering choice, and may be changed if experience indicates
+  it is desirable to do so.  Note that if a TCP is reinitialized in some
+  sense, yet retains its memory of sequence numbers in use, then it need
+  not wait at all; it must only be sure to use sequence numbers larger
+  than those recently used.
+
+  The TCP Quiet Time Concept
+
+    This specification provides that hosts which "crash" without
+    retaining any knowledge of the last sequence numbers transmitted on
+    each active (i.e., not closed) connection shall delay emitting any
+    TCP segments for at least the agreed Maximum Segment Lifetime (MSL)
+    in the internet system of which the host is a part.  In the
+    paragraphs below, an explanation for this specification is given.
+    TCP implementors may violate the "quiet time" restriction, but only
+    at the risk of causing some old data to be accepted as new or new
+    data rejected as old duplicated by some receivers in the internet
+    system.
+
+    TCPs consume sequence number space each time a segment is formed and
+    entered into the network output queue at a source host. The
+    duplicate detection and sequencing algorithm in the TCP protocol
+    relies on the unique binding of segment data to sequence space to
+    the extent that sequence numbers will not cycle through all 2**32
+    values before the segment data bound to those sequence numbers has
+    been delivered and acknowledged by the receiver and all duplicate
+    copies of the segments have "drained" from the internet.  Without
+    such an assumption, two distinct TCP segments could conceivably be
+
+
+[Page 28]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+
+
+
+    assigned the same or overlapping sequence numbers, causing confusion
+    at the receiver as to which data is new and which is old.  Remember
+    that each segment is bound to as many consecutive sequence numbers
+    as there are octets of data in the segment.
+
+    Under normal conditions, TCPs keep track of the next sequence number
+    to emit and the oldest awaiting acknowledgment so as to avoid
+    mistakenly using a sequence number over before its first use has
+    been acknowledged.  This alone does not guarantee that old duplicate
+    data is drained from the net, so the sequence space has been made
+    very large to reduce the probability that a wandering duplicate will
+    cause trouble upon arrival.  At 2 megabits/sec. it takes 4.5 hours
+    to use up 2**32 octets of sequence space.  Since the maximum segment
+    lifetime in the net is not likely to exceed a few tens of seconds,
+    this is deemed ample protection for foreseeable nets, even if data
+    rates escalate to l0's of megabits/sec.  At 100 megabits/sec, the
+    cycle time is 5.4 minutes which may be a little short, but still
+    within reason.
+
+    The basic duplicate detection and sequencing algorithm in TCP can be
+    defeated, however, if a source TCP does not have any memory of the
+    sequence numbers it last used on a given connection. For example, if
+    the TCP were to start all connections with sequence number 0, then
+    upon crashing and restarting, a TCP might re-form an earlier
+    connection (possibly after half-open connection resolution) and emit
+    packets with sequence numbers identical to or overlapping with
+    packets still in the network which were emitted on an earlier
+    incarnation of the same connection.  In the absence of knowledge
+    about the sequence numbers used on a particular connection, the TCP
+    specification recommends that the source delay for MSL seconds
+    before emitting segments on the connection, to allow time for
+    segments from the earlier connection incarnation to drain from the
+    system.
+
+    Even hosts which can remember the time of day and used it to select
+    initial sequence number values are not immune from this problem
+    (i.e., even if time of day is used to select an initial sequence
+    number for each new connection incarnation).
+
+    Suppose, for example, that a connection is opened starting with
+    sequence number S.  Suppose that this connection is not used much
+    and that eventually the initial sequence number function (ISN(t))
+    takes on a value equal to the sequence number, say S1, of the last
+    segment sent by this TCP on a particular connection.  Now suppose,
+    at this instant, the host crashes, recovers, and establishes a new
+    incarnation of the connection. The initial sequence number chosen is
+    S1 = ISN(t) -- last used sequence number on old incarnation of
+    connection!  If the recovery occurs quickly enough, any old
+
+
+                                                               [Page 29]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+
+
+
+    duplicates in the net bearing sequence numbers in the neighborhood
+    of S1 may arrive and be treated as new packets by the receiver of
+    the new incarnation of the connection.
+
+    The problem is that the recovering host may not know for how long it
+    crashed nor does it know whether there are still old duplicates in
+    the system from earlier connection incarnations.
+
+    One way to deal with this problem is to deliberately delay emitting
+    segments for one MSL after recovery from a crash- this is the "quite
+    time" specification.  Hosts which prefer to avoid waiting are
+    willing to risk possible confusion of old and new packets at a given
+    destination may choose not to wait for the "quite time".
+    Implementors may provide TCP users with the ability to select on a
+    connection by connection basis whether to wait after a crash, or may
+    informally implement the "quite time" for all connections.
+    Obviously, even where a user selects to "wait," this is not
+    necessary after the host has been "up" for at least MSL seconds.
+
+    To summarize: every segment emitted occupies one or more sequence
+    numbers in the sequence space, the numbers occupied by a segment are
+    "busy" or "in use" until MSL seconds have passed, upon crashing a
+    block of space-time is occupied by the octets of the last emitted
+    segment, if a new connection is started too soon and uses any of the
+    sequence numbers in the space-time footprint of the last segment of
+    the previous connection incarnation, there is a potential sequence
+    number overlap area which could cause confusion at the receiver.
+
+3.4.  Establishing a connection
+
+  The "three-way handshake" is the procedure used to establish a
+  connection.  This procedure normally is initiated by one TCP and
+  responded to by another TCP.  The procedure also works if two TCP
+  simultaneously initiate the procedure.  When simultaneous attempt
+  occurs, each TCP receives a "SYN" segment which carries no
+  acknowledgment after it has sent a "SYN".  Of course, the arrival of
+  an old duplicate "SYN" segment can potentially make it appear, to the
+  recipient, that a simultaneous connection initiation is in progress.
+  Proper use of "reset" segments can disambiguate these cases.
+
+  Several examples of connection initiation follow.  Although these
+  examples do not show connection synchronization using data-carrying
+  segments, this is perfectly legitimate, so long as the receiving TCP
+  doesn't deliver the data to the user until it is clear the data is
+  valid (i.e., the data must be buffered at the receiver until the
+  connection reaches the ESTABLISHED state).  The three-way handshake
+  reduces the possibility of false connections.  It is the
+
+
+
+[Page 30]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+
+
+
+  implementation of a trade-off between memory and messages to provide
+  information for this checking.
+
+  The simplest three-way handshake is shown in figure 7 below.  The
+  figures should be interpreted in the following way.  Each line is
+  numbered for reference purposes.  Right arrows (-->) indicate
+  departure of a TCP segment from TCP A to TCP B, or arrival of a
+  segment at B from A.  Left arrows (<--), indicate the reverse.
+  Ellipsis (...) indicates a segment which is still in the network
+  (delayed).  An "XXX" indicates a segment which is lost or rejected.
+  Comments appear in parentheses.  TCP states represent the state AFTER
+  the departure or arrival of the segment (whose contents are shown in
+  the center of each line).  Segment contents are shown in abbreviated
+  form, with sequence number, control flags, and ACK field.  Other
+  fields such as window, addresses, lengths, and text have been left out
+  in the interest of clarity.
+
+  
+
+      TCP A                                                TCP B
+
+  1.  CLOSED                                               LISTEN
+
+  2.  SYN-SENT    --> <SEQ=100><CTL=SYN>               --> SYN-RECEIVED
+
+  3.  ESTABLISHED <-- <SEQ=300><ACK=101><CTL=SYN,ACK>  <-- SYN-RECEIVED
+
+  4.  ESTABLISHED --> <SEQ=101><ACK=301><CTL=ACK>       --> ESTABLISHED
+
+  5.  ESTABLISHED --> <SEQ=101><ACK=301><CTL=ACK><DATA> --> ESTABLISHED
+
+          Basic 3-Way Handshake for Connection Synchronization
+
+                                Figure 7.
+
+  In line 2 of figure 7, TCP A begins by sending a SYN segment
+  indicating that it will use sequence numbers starting with sequence
+  number 100.  In line 3, TCP B sends a SYN and acknowledges the SYN it
+  received from TCP A.  Note that the acknowledgment field indicates TCP
+  B is now expecting to hear sequence 101, acknowledging the SYN which
+  occupied sequence 100.
+
+  At line 4, TCP A responds with an empty segment containing an ACK for
+  TCP B's SYN; and in line 5, TCP A sends some data.  Note that the
+  sequence number of the segment in line 5 is the same as in line 4
+  because the ACK does not occupy sequence number space (if it did, we
+  would wind up ACKing ACK's!).
+
+
+
+                                                               [Page 31]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+
+
+
+  Simultaneous initiation is only slightly more complex, as is shown in
+  figure 8.  Each TCP cycles from CLOSED to SYN-SENT to SYN-RECEIVED to
+  ESTABLISHED.
+
+  
+
+      TCP A                                            TCP B
+
+  1.  CLOSED                                           CLOSED
+
+  2.  SYN-SENT     --> <SEQ=100><CTL=SYN>              ...
+
+  3.  SYN-RECEIVED <-- <SEQ=300><CTL=SYN>              <-- SYN-SENT
+
+  4.               ... <SEQ=100><CTL=SYN>              --> SYN-RECEIVED
+
+  5.  SYN-RECEIVED --> <SEQ=100><ACK=301><CTL=SYN,ACK> ...
+
+  6.  ESTABLISHED  <-- <SEQ=300><ACK=101><CTL=SYN,ACK> <-- SYN-RECEIVED
+
+  7.               ... <SEQ=101><ACK=301><CTL=ACK>     --> ESTABLISHED
+
+                Simultaneous Connection Synchronization
+
+                               Figure 8.
+
+  The principle reason for the three-way handshake is to prevent old
+  duplicate connection initiations from causing confusion.  To deal with
+  this, a special control message, reset, has been devised.  If the
+  receiving TCP is in a  non-synchronized state (i.e., SYN-SENT,
+  SYN-RECEIVED), it returns to LISTEN on receiving an acceptable reset.
+  If the TCP is in one of the synchronized states (ESTABLISHED,
+  FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT), it
+  aborts the connection and informs its user.  We discuss this latter
+  case under "half-open" connections below.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+[Page 32]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+
+
+
+  
+
+      TCP A                                                TCP B
+
+  1.  CLOSED                                               LISTEN
+
+  2.  SYN-SENT    --> <SEQ=100><CTL=SYN>               ...
+
+  3.  (duplicate) ... <SEQ=90><CTL=SYN>               --> SYN-RECEIVED
+
+  4.  SYN-SENT    <-- <SEQ=300><ACK=91><CTL=SYN,ACK>  <-- SYN-RECEIVED
+
+  5.  SYN-SENT    --> <SEQ=91><CTL=RST>               --> LISTEN
+  
+
+  6.              ... <SEQ=100><CTL=SYN>               --> SYN-RECEIVED
+
+  7.  SYN-SENT    <-- <SEQ=400><ACK=101><CTL=SYN,ACK>  <-- SYN-RECEIVED
+
+  8.  ESTABLISHED --> <SEQ=101><ACK=401><CTL=ACK>      --> ESTABLISHED
+
+                    Recovery from Old Duplicate SYN
+
+                               Figure 9.
+
+  As a simple example of recovery from old duplicates, consider
+  figure 9.  At line 3, an old duplicate SYN arrives at TCP B.  TCP B
+  cannot tell that this is an old duplicate, so it responds normally
+  (line 4).  TCP A detects that the ACK field is incorrect and returns a
+  RST (reset) with its SEQ field selected to make the segment
+  believable.  TCP B, on receiving the RST, returns to the LISTEN state.
+  When the original SYN (pun intended) finally arrives at line 6, the
+  synchronization proceeds normally.  If the SYN at line 6 had arrived
+  before the RST, a more complex exchange might have occurred with RST's
+  sent in both directions.
+
+  Half-Open Connections and Other Anomalies
+
+  An established connection is said to be  "half-open" if one of the
+  TCPs has closed or aborted the connection at its end without the
+  knowledge of the other, or if the two ends of the connection have
+  become desynchronized owing to a crash that resulted in loss of
+  memory.  Such connections will automatically become reset if an
+  attempt is made to send data in either direction.  However, half-open
+  connections are expected to be unusual, and the recovery procedure is
+  mildly involved.
+
+  If at site A the connection no longer exists, then an attempt by the
+
+
+                                                               [Page 33]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+
+
+
+  user at site B to send any data on it will result in the site B TCP
+  receiving a reset control message.  Such a message indicates to the
+  site B TCP that something is wrong, and it is expected to abort the
+  connection.
+
+  Assume that two user processes A and B are communicating with one
+  another when a crash occurs causing loss of memory to A's TCP.
+  Depending on the operating system supporting A's TCP, it is likely
+  that some error recovery mechanism exists.  When the TCP is up again,
+  A is likely to start again from the beginning or from a recovery
+  point.  As a result, A will probably try to OPEN the connection again
+  or try to SEND on the connection it believes open.  In the latter
+  case, it receives the error message "connection not open" from the
+  local (A's) TCP.  In an attempt to establish the connection, A's TCP
+  will send a segment containing SYN.  This scenario leads to the
+  example shown in figure 10.  After TCP A crashes, the user attempts to
+  re-open the connection.  TCP B, in the meantime, thinks the connection
+  is open.
+
+  
+
+      TCP A                                           TCP B
+
+  1.  (CRASH)                               (send 300,receive 100)
+
+  2.  CLOSED                                           ESTABLISHED
+
+  3.  SYN-SENT --> <SEQ=400><CTL=SYN>              --> (??)
+
+  4.  (!!)     <-- <SEQ=300><ACK=100><CTL=ACK>     <-- ESTABLISHED
+
+  5.  SYN-SENT --> <SEQ=100><CTL=RST>              --> (Abort!!)
+
+  6.  SYN-SENT                                         CLOSED
+
+  7.  SYN-SENT --> <SEQ=400><CTL=SYN>              -->
+
+                     Half-Open Connection Discovery
+
+                               Figure 10.
+
+  When the SYN arrives at line 3, TCP B, being in a synchronized state,
+  and the incoming segment outside the window, responds with an
+  acknowledgment indicating what sequence it next expects to hear (ACK
+  100).  TCP A sees that this segment does not acknowledge anything it
+  sent and, being unsynchronized, sends a reset (RST) because it has
+  detected a half-open connection.  TCP B aborts at line 5.  TCP A will
+
+
+
+[Page 34]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+
+
+
+  continue to try to establish the connection; the problem is now
+  reduced to the basic 3-way handshake of figure 7.
+
+  An interesting alternative case occurs when TCP A crashes and TCP B
+  tries to send data on what it thinks is a synchronized connection.
+  This is illustrated in figure 11.  In this case, the data arriving at
+  TCP A from TCP B (line 2) is unacceptable because no such connection
+  exists, so TCP A sends a RST.  The RST is acceptable so TCP B
+  processes it and aborts the connection.
+
+  
+
+        TCP A                                              TCP B
+
+  1.  (CRASH)                                   (send 300,receive 100)
+
+  2.  (??)    <-- <SEQ=300><ACK=100><DATA=10><CTL=ACK> <-- ESTABLISHED
+
+  3.          --> <SEQ=100><CTL=RST>                   --> (ABORT!!)
+
+           Active Side Causes Half-Open Connection Discovery
+
+                               Figure 11.
+
+  In figure 12, we find the two TCPs A and B with passive connections
+  waiting for SYN.  An old duplicate arriving at TCP B (line 2) stirs B
+  into action.  A SYN-ACK is returned (line 3) and causes TCP A to
+  generate a RST (the ACK in line 3 is not acceptable).  TCP B accepts
+  the reset and returns to its passive LISTEN state.
+
+  
+
+      TCP A                                         TCP B
+
+  1.  LISTEN                                        LISTEN
+
+  2.       ... <SEQ=Z><CTL=SYN>                -->  SYN-RECEIVED
+
+  3.  (??) <-- <SEQ=X><ACK=Z+1><CTL=SYN,ACK>   <--  SYN-RECEIVED
+
+  4.       --> <SEQ=Z+1><CTL=RST>              -->  (return to LISTEN!)
+
+  5.  LISTEN                                        LISTEN
+
+       Old Duplicate SYN Initiates a Reset on two Passive Sockets
+
+                               Figure 12.
+
+
+
+                                                               [Page 35]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+
+
+
+  A variety of other cases are possible, all of which are accounted for
+  by the following rules for RST generation and processing.
+
+  Reset Generation
+
+  As a general rule, reset (RST) must be sent whenever a segment arrives
+  which apparently is not intended for the current connection.  A reset
+  must not be sent if it is not clear that this is the case.
+
+  There are three groups of states:
+
+    1.  If the connection does not exist (CLOSED) then a reset is sent
+    in response to any incoming segment except another reset.  In
+    particular, SYNs addressed to a non-existent connection are rejected
+    by this means.
+
+    If the incoming segment has an ACK field, the reset takes its
+    sequence number from the ACK field of the segment, otherwise the
+    reset has sequence number zero and the ACK field is set to the sum
+    of the sequence number and segment length of the incoming segment.
+    The connection remains in the CLOSED state.
+
+    2.  If the connection is in any non-synchronized state (LISTEN,
+    SYN-SENT, SYN-RECEIVED), and the incoming segment acknowledges
+    something not yet sent (the segment carries an unacceptable ACK), or
+    if an incoming segment has a security level or compartment which
+    does not exactly match the level and compartment requested for the
+    connection, a reset is sent.
+
+    If our SYN has not been acknowledged and the precedence level of the
+    incoming segment is higher than the precedence level requested then
+    either raise the local precedence level (if allowed by the user and
+    the system) or send a reset; or if the precedence level of the
+    incoming segment is lower than the precedence level requested then
+    continue as if the precedence matched exactly (if the remote TCP
+    cannot raise the precedence level to match ours this will be
+    detected in the next segment it sends, and the connection will be
+    terminated then).  If our SYN has been acknowledged (perhaps in this
+    incoming segment) the precedence level of the incoming segment must
+    match the local precedence level exactly, if it does not a reset
+    must be sent.
+
+    If the incoming segment has an ACK field, the reset takes its
+    sequence number from the ACK field of the segment, otherwise the
+    reset has sequence number zero and the ACK field is set to the sum
+    of the sequence number and segment length of the incoming segment.
+    The connection remains in the same state.
+
+
+
+[Page 36]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+
+
+
+    3.  If the connection is in a synchronized state (ESTABLISHED,
+    FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT),
+    any unacceptable segment (out of window sequence number or
+    unacceptible acknowledgment number) must elicit only an empty
+    acknowledgment segment containing the current send-sequence number
+    and an acknowledgment indicating the next sequence number expected
+    to be received, and the connection remains in the same state.
+
+    If an incoming segment has a security level, or compartment, or
+    precedence which does not exactly match the level, and compartment,
+    and precedence requested for the connection,a reset is sent and
+    connection goes to the CLOSED state.  The reset takes its sequence
+    number from the ACK field of the incoming segment.
+
+  Reset Processing
+
+  In all states except SYN-SENT, all reset (RST) segments are validated
+  by checking their SEQ-fields.  A reset is valid if its sequence number
+  is in the window.  In the SYN-SENT state (a RST received in response
+  to an initial SYN), the RST is acceptable if the ACK field
+  acknowledges the SYN.
+
+  The receiver of a RST first validates it, then changes state.  If the
+  receiver was in the LISTEN state, it ignores it.  If the receiver was
+  in SYN-RECEIVED state and had previously been in the LISTEN state,
+  then the receiver returns to the LISTEN state, otherwise the receiver
+  aborts the connection and goes to the CLOSED state.  If the receiver
+  was in any other state, it aborts the connection and advises the user
+  and goes to the CLOSED state.
+
+3.5.  Closing a Connection
+
+  CLOSE is an operation meaning "I have no more data to send."  The
+  notion of closing a full-duplex connection is subject to ambiguous
+  interpretation, of course, since it may not be obvious how to treat
+  the receiving side of the connection.  We have chosen to treat CLOSE
+  in a simplex fashion.  The user who CLOSEs may continue to RECEIVE
+  until he is told that the other side has CLOSED also.  Thus, a program
+  could initiate several SENDs followed by a CLOSE, and then continue to
+  RECEIVE until signaled that a RECEIVE failed because the other side
+  has CLOSED.  We assume that the TCP will signal a user, even if no
+  RECEIVEs are outstanding, that the other side has closed, so the user
+  can terminate his side gracefully.  A TCP will reliably deliver all
+  buffers SENT before the connection was CLOSED so a user who expects no
+  data in return need only wait to hear the connection was CLOSED
+  successfully to know that all his data was received at the destination
+  TCP.  Users must keep reading connections they close for sending until
+  the TCP says no more data.
+
+
+                                                               [Page 37]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+
+
+
+  There are essentially three cases:
+
+    1) The user initiates by telling the TCP to CLOSE the connection
+
+    2) The remote TCP initiates by sending a FIN control signal
+
+    3) Both users CLOSE simultaneously
+
+  Case 1:  Local user initiates the close
+
+    In this case, a FIN segment can be constructed and placed on the
+    outgoing segment queue.  No further SENDs from the user will be
+    accepted by the TCP, and it enters the FIN-WAIT-1 state.  RECEIVEs
+    are allowed in this state.  All segments preceding and including FIN
+    will be retransmitted until acknowledged.  When the other TCP has
+    both acknowledged the FIN and sent a FIN of its own, the first TCP
+    can ACK this FIN.  Note that a TCP receiving a FIN will ACK but not
+    send its own FIN until its user has CLOSED the connection also.
+
+  Case 2:  TCP receives a FIN from the network
+
+    If an unsolicited FIN arrives from the network, the receiving TCP
+    can ACK it and tell the user that the connection is closing.  The
+    user will respond with a CLOSE, upon which the TCP can send a FIN to
+    the other TCP after sending any remaining data.  The TCP then waits
+    until its own FIN is acknowledged whereupon it deletes the
+    connection.  If an ACK is not forthcoming, after the user timeout
+    the connection is aborted and the user is told.
+
+  Case 3:  both users close simultaneously
+
+    A simultaneous CLOSE by users at both ends of a connection causes
+    FIN segments to be exchanged.  When all segments preceding the FINs
+    have been processed and acknowledged, each TCP can ACK the FIN it
+    has received.  Both will, upon receiving these ACKs, delete the
+    connection.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+[Page 38]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+
+
+
+  
+
+      TCP A                                                TCP B
+
+  1.  ESTABLISHED                                          ESTABLISHED
+
+  2.  (Close)
+      FIN-WAIT-1  --> <SEQ=100><ACK=300><CTL=FIN,ACK>  --> CLOSE-WAIT
+
+  3.  FIN-WAIT-2  <-- <SEQ=300><ACK=101><CTL=ACK>      <-- CLOSE-WAIT
+
+  4.                                                       (Close)
+      TIME-WAIT   <-- <SEQ=300><ACK=101><CTL=FIN,ACK>  <-- LAST-ACK
+
+  5.  TIME-WAIT   --> <SEQ=101><ACK=301><CTL=ACK>      --> CLOSED
+
+  6.  (2 MSL)
+      CLOSED                                                      
+
+                         Normal Close Sequence
+
+                               Figure 13.
+
+  
+
+      TCP A                                                TCP B
+
+  1.  ESTABLISHED                                          ESTABLISHED
+
+  2.  (Close)                                              (Close)
+      FIN-WAIT-1  --> <SEQ=100><ACK=300><CTL=FIN,ACK>  ... FIN-WAIT-1
+                  <-- <SEQ=300><ACK=100><CTL=FIN,ACK>  <--
+                  ... <SEQ=100><ACK=300><CTL=FIN,ACK>  -->
+
+  3.  CLOSING     --> <SEQ=101><ACK=301><CTL=ACK>      ... CLOSING
+                  <-- <SEQ=301><ACK=101><CTL=ACK>      <--
+                  ... <SEQ=101><ACK=301><CTL=ACK>      -->
+
+  4.  TIME-WAIT                                            TIME-WAIT
+      (2 MSL)                                              (2 MSL)
+      CLOSED                                               CLOSED
+
+                      Simultaneous Close Sequence
+
+                               Figure 14.
+
+
+
+
+
+                                                               [Page 39]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+
+
+
+3.6.  Precedence and Security
+
+  The intent is that connection be allowed only between ports operating
+  with exactly the same security and compartment values and at the
+  higher of the precedence level requested by the two ports.
+
+  The precedence and security parameters used in TCP are exactly those
+  defined in the Internet Protocol (IP) [2].  Throughout this TCP
+  specification the term "security/compartment" is intended to indicate
+  the security parameters used in IP including security, compartment,
+  user group, and handling restriction.
+
+  A connection attempt with mismatched security/compartment values or a
+  lower precedence value must be rejected by sending a reset.  Rejecting
+  a connection due to too low a precedence only occurs after an
+  acknowledgment of the SYN has been received.
+
+  Note that TCP modules which operate only at the default value of
+  precedence will still have to check the precedence of incoming
+  segments and possibly raise the precedence level they use on the
+  connection.
+
+  The security paramaters may be used even in a non-secure environment
+  (the values would indicate unclassified data), thus hosts in
+  non-secure environments must be prepared to receive the security
+  parameters, though they need not send them.
+
+3.7.  Data Communication
+
+  Once the connection is established data is communicated by the
+  exchange of segments.  Because segments may be lost due to errors
+  (checksum test failure), or network congestion, TCP uses
+  retransmission (after a timeout) to ensure delivery of every segment.
+  Duplicate segments may arrive due to network or TCP retransmission.
+  As discussed in the section on sequence numbers the TCP performs
+  certain tests on the sequence and acknowledgment numbers in the
+  segments to verify their acceptability.
+
+  The sender of data keeps track of the next sequence number to use in
+  the variable SND.NXT.  The receiver of data keeps track of the next
+  sequence number to expect in the variable RCV.NXT.  The sender of data
+  keeps track of the oldest unacknowledged sequence number in the
+  variable SND.UNA.  If the data flow is momentarily idle and all data
+  sent has been acknowledged then the three variables will be equal.
+
+  When the sender creates a segment and transmits it the sender advances
+  SND.NXT.  When the receiver accepts a segment it advances RCV.NXT and
+  sends an acknowledgment.  When the data sender receives an
+
+
+[Page 40]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+
+
+
+  acknowledgment it advances SND.UNA.  The extent to which the values of
+  these variables differ is a measure of the delay in the communication.
+  The amount by which the variables are advanced is the length of the
+  data in the segment.  Note that once in the ESTABLISHED state all
+  segments must carry current acknowledgment information.
+
+  The CLOSE user call implies a push function, as does the FIN control
+  flag in an incoming segment.
+
+  Retransmission Timeout
+
+  Because of the variability of the networks that compose an
+  internetwork system and the wide range of uses of TCP connections the
+  retransmission timeout must be dynamically determined.  One procedure
+  for determining a retransmission time out is given here as an
+  illustration.
+
+    An Example Retransmission Timeout Procedure
+
+      Measure the elapsed time between sending a data octet with a
+      particular sequence number and receiving an acknowledgment that
+      covers that sequence number (segments sent do not have to match
+      segments received).  This measured elapsed time is the Round Trip
+      Time (RTT).  Next compute a Smoothed Round Trip Time (SRTT) as:
+
+        SRTT = ( ALPHA * SRTT ) + ((1-ALPHA) * RTT)
+
+      and based on this, compute the retransmission timeout (RTO) as:
+
+        RTO = min[UBOUND,max[LBOUND,(BETA*SRTT)]]
+
+      where UBOUND is an upper bound on the timeout (e.g., 1 minute),
+      LBOUND is a lower bound on the timeout (e.g., 1 second), ALPHA is
+      a smoothing factor (e.g., .8 to .9), and BETA is a delay variance
+      factor (e.g., 1.3 to 2.0).
+
+  The Communication of Urgent Information
+
+  The objective of the TCP urgent mechanism is to allow the sending user
+  to stimulate the receiving user to accept some urgent data and to
+  permit the receiving TCP to indicate to the receiving user when all
+  the currently known urgent data has been received by the user.
+
+  This mechanism permits a point in the data stream to be designated as
+  the end of urgent information.  Whenever this point is in advance of
+  the receive sequence number (RCV.NXT) at the receiving TCP, that TCP
+  must tell the user to go into "urgent mode"; when the receive sequence
+  number catches up to the urgent pointer, the TCP must tell user to go
+
+
+                                                               [Page 41]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+
+
+
+  into "normal mode".  If the urgent pointer is updated while the user
+  is in "urgent mode", the update will be invisible to the user.
+
+  The method employs a urgent field which is carried in all segments
+  transmitted.  The URG control flag indicates that the urgent field is
+  meaningful and must be added to the segment sequence number to yield
+  the urgent pointer.  The absence of this flag indicates that there is
+  no urgent data outstanding.
+
+  To send an urgent indication the user must also send at least one data
+  octet.  If the sending user also indicates a push, timely delivery of
+  the urgent information to the destination process is enhanced.
+
+  Managing the Window
+
+  The window sent in each segment indicates the range of sequence
+  numbers the sender of the window (the data receiver) is currently
+  prepared to accept.  There is an assumption that this is related to
+  the currently available data buffer space available for this
+  connection.
+
+  Indicating a large window encourages transmissions.  If more data
+  arrives than can be accepted, it will be discarded.  This will result
+  in excessive retransmissions, adding unnecessarily to the load on the
+  network and the TCPs.  Indicating a small window may restrict the
+  transmission of data to the point of introducing a round trip delay
+  between each new segment transmitted.
+
+  The mechanisms provided allow a TCP to advertise a large window and to
+  subsequently advertise a much smaller window without having accepted
+  that much data.  This, so called "shrinking the window," is strongly
+  discouraged.  The robustness principle dictates that TCPs will not
+  shrink the window themselves, but will be prepared for such behavior
+  on the part of other TCPs.
+
+  The sending TCP must be prepared to accept from the user and send at
+  least one octet of new data even if the send window is zero.  The
+  sending TCP must regularly retransmit to the receiving TCP even when
+  the window is zero.  Two minutes is recommended for the retransmission
+  interval when the window is zero.  This retransmission is essential to
+  guarantee that when either TCP has a zero window the re-opening of the
+  window will be reliably reported to the other.
+
+  When the receiving TCP has a zero window and a segment arrives it must
+  still send an acknowledgment showing its next expected sequence number
+  and current window (zero).
+
+  The sending TCP packages the data to be transmitted into segments
+
+
+[Page 42]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+
+
+
+  which fit the current window, and may repackage segments on the
+  retransmission queue.  Such repackaging is not required, but may be
+  helpful.
+
+  In a connection with a one-way data flow, the window information will
+  be carried in acknowledgment segments that all have the same sequence
+  number so there will be no way to reorder them if they arrive out of
+  order.  This is not a serious problem, but it will allow the window
+  information to be on occasion temporarily based on old reports from
+  the data receiver.  A refinement to avoid this problem is to act on
+  the window information from segments that carry the highest
+  acknowledgment number (that is segments with acknowledgment number
+  equal or greater than the highest previously received).
+
+  The window management procedure has significant influence on the
+  communication performance.  The following comments are suggestions to
+  implementers.
+
+    Window Management Suggestions
+
+      Allocating a very small window causes data to be transmitted in
+      many small segments when better performance is achieved using
+      fewer large segments.
+
+      One suggestion for avoiding small windows is for the receiver to
+      defer updating a window until the additional allocation is at
+      least X percent of the maximum allocation possible for the
+      connection (where X might be 20 to 40).
+
+      Another suggestion is for the sender to avoid sending small
+      segments by waiting until the window is large enough before
+      sending data.  If the the user signals a push function then the
+      data must be sent even if it is a small segment.
+
+      Note that the acknowledgments should not be delayed or unnecessary
+      retransmissions will result.  One strategy would be to send an
+      acknowledgment when a small segment arrives (with out updating the
+      window information), and then to send another acknowledgment with
+      new window information when the window is larger.
+
+      The segment sent to probe a zero window may also begin a break up
+      of transmitted data into smaller and smaller segments.  If a
+      segment containing a single data octet sent to probe a zero window
+      is accepted, it consumes one octet of the window now available.
+      If the sending TCP simply sends as much as it can whenever the
+      window is non zero, the transmitted data will be broken into
+      alternating big and small segments.  As time goes on, occasional
+      pauses in the receiver making window allocation available will
+
+
+                                                               [Page 43]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+
+
+
+      result in breaking the big segments into a small and not quite so
+      big pair. And after a while the data transmission will be in
+      mostly small segments.
+
+      The suggestion here is that the TCP implementations need to
+      actively attempt to combine small window allocations into larger
+      windows, since the mechanisms for managing the window tend to lead
+      to many small windows in the simplest minded implementations.
+
+3.8.  Interfaces
+
+  There are of course two interfaces of concern:  the user/TCP interface
+  and the TCP/lower-level interface.  We have a fairly elaborate model
+  of the user/TCP interface, but the interface to the lower level
+  protocol module is left unspecified here, since it will be specified
+  in detail by the specification of the lowel level protocol.  For the
+  case that the lower level is IP we note some of the parameter values
+  that TCPs might use.
+
+  User/TCP Interface
+
+    The following functional description of user commands to the TCP is,
+    at best, fictional, since every operating system will have different
+    facilities.  Consequently, we must warn readers that different TCP
+    implementations may have different user interfaces.  However, all
+    TCPs must provide a certain minimum set of services to guarantee
+    that all TCP implementations can support the same protocol
+    hierarchy.  This section specifies the functional interfaces
+    required of all TCP implementations.
+
+    TCP User Commands
+
+      The following sections functionally characterize a USER/TCP
+      interface.  The notation used is similar to most procedure or
+      function calls in high level languages, but this usage is not
+      meant to rule out trap type service calls (e.g., SVCs, UUOs,
+      EMTs).
+
+      The user commands described below specify the basic functions the
+      TCP must perform to support interprocess communication.
+      Individual implementations must define their own exact format, and
+      may provide combinations or subsets of the basic functions in
+      single calls.  In particular, some implementations may wish to
+      automatically OPEN a connection on the first SEND or RECEIVE
+      issued by the user for a given connection.
+
+
+
+
+
+[Page 44]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+
+
+
+      In providing interprocess communication facilities, the TCP must
+      not only accept commands, but must also return information to the
+      processes it serves.  The latter consists of:
+
+        (a) general information about a connection (e.g., interrupts,
+        remote close, binding of unspecified foreign socket).
+
+        (b) replies to specific user commands indicating success or
+        various types of failure.
+
+      Open
+
+        Format:  OPEN (local port, foreign socket, active/passive
+        [, timeout] [, precedence] [, security/compartment] [, options])
+        -> local connection name
+
+        We assume that the local TCP is aware of the identity of the
+        processes it serves and will check the authority of the process
+        to use the connection specified.  Depending upon the
+        implementation of the TCP, the local network and TCP identifiers
+        for the source address will either be supplied by the TCP or the
+        lower level protocol (e.g., IP).  These considerations are the
+        result of concern about security, to the extent that no TCP be
+        able to masquerade as another one, and so on.  Similarly, no
+        process can masquerade as another without the collusion of the
+        TCP.
+
+        If the active/passive flag is set to passive, then this is a
+        call to LISTEN for an incoming connection.  A passive open may
+        have either a fully specified foreign socket to wait for a
+        particular connection or an unspecified foreign socket to wait
+        for any call.  A fully specified passive call can be made active
+        by the subsequent execution of a SEND.
+
+        A transmission control block (TCB) is created and partially
+        filled in with data from the OPEN command parameters.
+
+        On an active OPEN command, the TCP will begin the procedure to
+        synchronize (i.e., establish) the connection at once.
+
+        The timeout, if present, permits the caller to set up a timeout
+        for all data submitted to TCP.  If data is not successfully
+        delivered to the destination within the timeout period, the TCP
+        will abort the connection.  The present global default is five
+        minutes.
+
+        The TCP or some component of the operating system will verify
+        the users authority to open a connection with the specified
+
+
+                                                               [Page 45]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+
+
+
+        precedence or security/compartment.  The absence of precedence
+        or security/compartment specification in the OPEN call indicates
+        the default values must be used.
+
+        TCP will accept incoming requests as matching only if the
+        security/compartment information is exactly the same and only if
+        the precedence is equal to or higher than the precedence
+        requested in the OPEN call.
+
+        The precedence for the connection is the higher of the values
+        requested in the OPEN call and received from the incoming
+        request, and fixed at that value for the life of the
+        connection.Implementers may want to give the user control of
+        this precedence negotiation.  For example, the user might be
+        allowed to specify that the precedence must be exactly matched,
+        or that any attempt to raise the precedence be confirmed by the
+        user.
+
+        A local connection name will be returned to the user by the TCP.
+        The local connection name can then be used as a short hand term
+        for the connection defined by the <local socket, foreign socket>
+        pair.
+
+      Send
+
+        Format:  SEND (local connection name, buffer address, byte
+        count, PUSH flag, URGENT flag [,timeout])
+
+        This call causes the data contained in the indicated user buffer
+        to be sent on the indicated connection.  If the connection has
+        not been opened, the SEND is considered an error.  Some
+        implementations may allow users to SEND first; in which case, an
+        automatic OPEN would be done.  If the calling process is not
+        authorized to use this connection, an error is returned.
+
+        If the PUSH flag is set, the data must be transmitted promptly
+        to the receiver, and the PUSH bit will be set in the last TCP
+        segment created from the buffer.  If the PUSH flag is not set,
+        the data may be combined with data from subsequent SENDs for
+        transmission efficiency.
+
+        If the URGENT flag is set, segments sent to the destination TCP
+        will have the urgent pointer set.  The receiving TCP will signal
+        the urgent condition to the receiving process if the urgent
+        pointer indicates that data preceding the urgent pointer has not
+        been consumed by the receiving process.  The purpose of urgent
+        is to stimulate the receiver to process the urgent data and to
+        indicate to the receiver when all the currently known urgent
+
+
+[Page 46]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+
+
+
+        data has been received.  The number of times the sending user's
+        TCP signals urgent will not necessarily be equal to the number
+        of times the receiving user will be notified of the presence of
+        urgent data.
+
+        If no foreign socket was specified in the OPEN, but the
+        connection is established (e.g., because a LISTENing connection
+        has become specific due to a foreign segment arriving for the
+        local socket), then the designated buffer is sent to the implied
+        foreign socket.  Users who make use of OPEN with an unspecified
+        foreign socket can make use of SEND without ever explicitly
+        knowing the foreign socket address.
+
+        However, if a SEND is attempted before the foreign socket
+        becomes specified, an error will be returned.  Users can use the
+        STATUS call to determine the status of the connection.  In some
+        implementations the TCP may notify the user when an unspecified
+        socket is bound.
+
+        If a timeout is specified, the current user timeout for this
+        connection is changed to the new one.
+
+        In the simplest implementation, SEND would not return control to
+        the sending process until either the transmission was complete
+        or the timeout had been exceeded.  However, this simple method
+        is both subject to deadlocks (for example, both sides of the
+        connection might try to do SENDs before doing any RECEIVEs) and
+        offers poor performance, so it is not recommended.  A more
+        sophisticated implementation would return immediately to allow
+        the process to run concurrently with network I/O, and,
+        furthermore, to allow multiple SENDs to be in progress.
+        Multiple SENDs are served in first come, first served order, so
+        the TCP will queue those it cannot service immediately.
+
+        We have implicitly assumed an asynchronous user interface in
+        which a SEND later elicits some kind of SIGNAL or
+        pseudo-interrupt from the serving TCP.  An alternative is to
+        return a response immediately.  For instance, SENDs might return
+        immediate local acknowledgment, even if the segment sent had not
+        been acknowledged by the distant TCP.  We could optimistically
+        assume eventual success.  If we are wrong, the connection will
+        close anyway due to the timeout.  In implementations of this
+        kind (synchronous), there will still be some asynchronous
+        signals, but these will deal with the connection itself, and not
+        with specific segments or buffers.
+
+        In order for the process to distinguish among error or success
+        indications for different SENDs, it might be appropriate for the
+
+
+                                                               [Page 47]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+
+
+
+        buffer address to be returned along with the coded response to
+        the SEND request.  TCP-to-user signals are discussed below,
+        indicating the information which should be returned to the
+        calling process.
+
+      Receive
+
+        Format:  RECEIVE (local connection name, buffer address, byte
+        count) -> byte count, urgent flag, push flag
+
+        This command allocates a receiving buffer associated with the
+        specified connection.  If no OPEN precedes this command or the
+        calling process is not authorized to use this connection, an
+        error is returned.
+
+        In the simplest implementation, control would not return to the
+        calling program until either the buffer was filled, or some
+        error occurred, but this scheme is highly subject to deadlocks.
+        A more sophisticated implementation would permit several
+        RECEIVEs to be outstanding at once.  These would be filled as
+        segments arrive.  This strategy permits increased throughput at
+        the cost of a more elaborate scheme (possibly asynchronous) to
+        notify the calling program that a PUSH has been seen or a buffer
+        filled.
+
+        If enough data arrive to fill the buffer before a PUSH is seen,
+        the PUSH flag will not be set in the response to the RECEIVE.
+        The buffer will be filled with as much data as it can hold.  If
+        a PUSH is seen before the buffer is filled the buffer will be
+        returned partially filled and PUSH indicated.
+
+        If there is urgent data the user will have been informed as soon
+        as it arrived via a TCP-to-user signal.  The receiving user
+        should thus be in "urgent mode".  If the URGENT flag is on,
+        additional urgent data remains.  If the URGENT flag is off, this
+        call to RECEIVE has returned all the urgent data, and the user
+        may now leave "urgent mode".  Note that data following the
+        urgent pointer (non-urgent data) cannot be delivered to the user
+        in the same buffer with preceeding urgent data unless the
+        boundary is clearly marked for the user.
+
+        To distinguish among several outstanding RECEIVEs and to take
+        care of the case that a buffer is not completely filled, the
+        return code is accompanied by both a buffer pointer and a byte
+        count indicating the actual length of the data received.
+
+        Alternative implementations of RECEIVE might have the TCP
+
+
+
+[Page 48]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+
+
+
+        allocate buffer storage, or the TCP might share a ring buffer
+        with the user.
+
+      Close
+
+        Format:  CLOSE (local connection name)
+
+        This command causes the connection specified to be closed.  If
+        the connection is not open or the calling process is not
+        authorized to use this connection, an error is returned.
+        Closing connections is intended to be a graceful operation in
+        the sense that outstanding SENDs will be transmitted (and
+        retransmitted), as flow control permits, until all have been
+        serviced.  Thus, it should be acceptable to make several SEND
+        calls, followed by a CLOSE, and expect all the data to be sent
+        to the destination.  It should also be clear that users should
+        continue to RECEIVE on CLOSING connections, since the other side
+        may be trying to transmit the last of its data.  Thus, CLOSE
+        means "I have no more to send" but does not mean "I will not
+        receive any more."  It may happen (if the user level protocol is
+        not well thought out) that the closing side is unable to get rid
+        of all its data before timing out.  In this event, CLOSE turns
+        into ABORT, and the closing TCP gives up.
+
+        The user may CLOSE the connection at any time on his own
+        initiative, or in response to various prompts from the TCP
+        (e.g., remote close executed, transmission timeout exceeded,
+        destination inaccessible).
+
+        Because closing a connection requires communication with the
+        foreign TCP, connections may remain in the closing state for a
+        short time.  Attempts to reopen the connection before the TCP
+        replies to the CLOSE command will result in error responses.
+
+        Close also implies push function.
+
+      Status
+
+        Format:  STATUS (local connection name) -> status data
+
+        This is an implementation dependent user command and could be
+        excluded without adverse effect.  Information returned would
+        typically come from the TCB associated with the connection.
+
+        This command returns a data block containing the following
+        information:
+
+          local socket,
+
+
+                                                               [Page 49]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+
+
+
+          foreign socket,
+          local connection name,
+          receive window,
+          send window,
+          connection state,
+          number of buffers awaiting acknowledgment,
+          number of buffers pending receipt,
+          urgent state,
+          precedence,
+          security/compartment,
+          and transmission timeout.
+
+        Depending on the state of the connection, or on the
+        implementation itself, some of this information may not be
+        available or meaningful.  If the calling process is not
+        authorized to use this connection, an error is returned.  This
+        prevents unauthorized processes from gaining information about a
+        connection.
+
+      Abort
+
+        Format:  ABORT (local connection name)
+
+        This command causes all pending SENDs and RECEIVES to be
+        aborted, the TCB to be removed, and a special RESET message to
+        be sent to the TCP on the other side of the connection.
+        Depending on the implementation, users may receive abort
+        indications for each outstanding SEND or RECEIVE, or may simply
+        receive an ABORT-acknowledgment.
+
+    TCP-to-User Messages
+
+      It is assumed that the operating system environment provides a
+      means for the TCP to asynchronously signal the user program.  When
+      the TCP does signal a user program, certain information is passed
+      to the user.  Often in the specification the information will be
+      an error message.  In other cases there will be information
+      relating to the completion of processing a SEND or RECEIVE or
+      other user call.
+
+      The following information is provided:
+
+        Local Connection Name                    Always
+        Response String                          Always
+        Buffer Address                           Send & Receive
+        Byte count (counts bytes received)       Receive
+        Push flag                                Receive
+        Urgent flag                              Receive
+
+
+[Page 50]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+
+
+
+  TCP/Lower-Level Interface
+
+    The TCP calls on a lower level protocol module to actually send and
+    receive information over a network.  One case is that of the ARPA
+    internetwork system where the lower level module is the Internet
+    Protocol (IP) [2].
+
+    If the lower level protocol is IP it provides arguments for a type
+    of service and for a time to live.  TCP uses the following settings
+    for these parameters:
+
+      Type of Service = Precedence: routine, Delay: normal, Throughput:
+      normal, Reliability: normal; or 00000000.
+
+      Time to Live    = one minute, or 00111100.
+
+        Note that the assumed maximum segment lifetime is two minutes.
+        Here we explicitly ask that a segment be destroyed if it cannot
+        be delivered by the internet system within one minute.
+
+    If the lower level is IP (or other protocol that provides this
+    feature) and source routing is used, the interface must allow the
+    route information to be communicated.  This is especially important
+    so that the source and destination addresses used in the TCP
+    checksum be the originating source and ultimate destination. It is
+    also important to preserve the return route to answer connection
+    requests.
+
+    Any lower level protocol will have to provide the source address,
+    destination address, and protocol fields, and some way to determine
+    the "TCP length", both to provide the functional equivlent service
+    of IP and to be used in the TCP checksum.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+                                                               [Page 51]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+
+
+
+3.9.  Event Processing
+
+  The processing depicted in this section is an example of one possible
+  implementation.  Other implementations may have slightly different
+  processing sequences, but they should differ from those in this
+  section only in detail, not in substance.
+
+  The activity of the TCP can be characterized as responding to events.
+  The events that occur can be cast into three categories:  user calls,
+  arriving segments, and timeouts.  This section describes the
+  processing the TCP does in response to each of the events.  In many
+  cases the processing required depends on the state of the connection.
+
+    Events that occur:
+
+      User Calls
+
+        OPEN
+        SEND
+        RECEIVE
+        CLOSE
+        ABORT
+        STATUS
+
+      Arriving Segments
+
+        SEGMENT ARRIVES
+
+      Timeouts
+
+        USER TIMEOUT
+        RETRANSMISSION TIMEOUT
+        TIME-WAIT TIMEOUT
+
+  The model of the TCP/user interface is that user commands receive an
+  immediate return and possibly a delayed response via an event or
+  pseudo interrupt.  In the following descriptions, the term "signal"
+  means cause a delayed response.
+
+  Error responses are given as character strings.  For example, user
+  commands referencing connections that do not exist receive "error:
+  connection not open".
+
+  Please note in the following that all arithmetic on sequence numbers,
+  acknowledgment numbers, windows, et cetera, is modulo 2**32 the size
+  of the sequence number space.  Also note that "=<" means less than or
+  equal to (modulo 2**32).
+
+
+
+[Page 52]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+
+
+
+  A natural way to think about processing incoming segments is to
+  imagine that they are first tested for proper sequence number (i.e.,
+  that their contents lie in the range of the expected "receive window"
+  in the sequence number space) and then that they are generally queued
+  and processed in sequence number order.
+
+  When a segment overlaps other already received segments we reconstruct
+  the segment to contain just the new data, and adjust the header fields
+  to be consistent.
+
+  Note that if no state change is mentioned the TCP stays in the same
+  state.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+                                                               [Page 53]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+                                                               OPEN Call
+
+
+
+  OPEN Call
+
+    CLOSED STATE (i.e., TCB does not exist)
+
+      Create a new transmission control block (TCB) to hold connection
+      state information.  Fill in local socket identifier, foreign
+      socket, precedence, security/compartment, and user timeout
+      information.  Note that some parts of the foreign socket may be
+      unspecified in a passive OPEN and are to be filled in by the
+      parameters of the incoming SYN segment.  Verify the security and
+      precedence requested are allowed for this user, if not return
+      "error:  precedence not allowed" or "error:  security/compartment
+      not allowed."  If passive enter the LISTEN state and return.  If
+      active and the foreign socket is unspecified, return "error:
+      foreign socket unspecified"; if active and the foreign socket is
+      specified, issue a SYN segment.  An initial send sequence number
+      (ISS) is selected.  A SYN segment of the form <SEQ=ISS><CTL=SYN>
+      is sent.  Set SND.UNA to ISS, SND.NXT to ISS+1, enter SYN-SENT
+      state, and return.
+
+      If the caller does not have access to the local socket specified,
+      return "error:  connection illegal for this process".  If there is
+      no room to create a new connection, return "error:  insufficient
+      resources".
+
+    LISTEN STATE
+
+      If active and the foreign socket is specified, then change the
+      connection from passive to active, select an ISS.  Send a SYN
+      segment, set SND.UNA to ISS, SND.NXT to ISS+1.  Enter SYN-SENT
+      state.  Data associated with SEND may be sent with SYN segment or
+      queued for transmission after entering ESTABLISHED state.  The
+      urgent bit if requested in the command must be sent with the data
+      segments sent as a result of this command.  If there is no room to
+      queue the request, respond with "error:  insufficient resources".
+      If Foreign socket was not specified, then return "error:  foreign
+      socket unspecified".
+
+
+
+
+
+
+
+
+
+
+
+
+[Page 54]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+OPEN Call
+
+
+
+    SYN-SENT STATE
+    SYN-RECEIVED STATE
+    ESTABLISHED STATE
+    FIN-WAIT-1 STATE
+    FIN-WAIT-2 STATE
+    CLOSE-WAIT STATE
+    CLOSING STATE
+    LAST-ACK STATE
+    TIME-WAIT STATE
+
+      Return "error:  connection already exists".
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+                                                               [Page 55]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+                                                               SEND Call
+
+
+
+  SEND Call
+
+    CLOSED STATE (i.e., TCB does not exist)
+
+      If the user does not have access to such a connection, then return
+      "error:  connection illegal for this process".
+
+      Otherwise, return "error:  connection does not exist".
+
+    LISTEN STATE
+
+      If the foreign socket is specified, then change the connection
+      from passive to active, select an ISS.  Send a SYN segment, set
+      SND.UNA to ISS, SND.NXT to ISS+1.  Enter SYN-SENT state.  Data
+      associated with SEND may be sent with SYN segment or queued for
+      transmission after entering ESTABLISHED state.  The urgent bit if
+      requested in the command must be sent with the data segments sent
+      as a result of this command.  If there is no room to queue the
+      request, respond with "error:  insufficient resources".  If
+      Foreign socket was not specified, then return "error:  foreign
+      socket unspecified".
+
+    SYN-SENT STATE
+    SYN-RECEIVED STATE
+
+      Queue the data for transmission after entering ESTABLISHED state.
+      If no space to queue, respond with "error:  insufficient
+      resources".
+
+    ESTABLISHED STATE
+    CLOSE-WAIT STATE
+
+      Segmentize the buffer and send it with a piggybacked
+      acknowledgment (acknowledgment value = RCV.NXT).  If there is
+      insufficient space to remember this buffer, simply return "error:
+      insufficient resources".
+
+      If the urgent flag is set, then SND.UP <- SND.NXT-1 and set the
+      urgent pointer in the outgoing segments.
+
+
+
+
+
+
+
+
+
+
+[Page 56]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+SEND Call
+
+
+
+    FIN-WAIT-1 STATE
+    FIN-WAIT-2 STATE
+    CLOSING STATE
+    LAST-ACK STATE
+    TIME-WAIT STATE
+
+      Return "error:  connection closing" and do not service request.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+                                                               [Page 57]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+                                                            RECEIVE Call
+
+
+
+  RECEIVE Call
+
+    CLOSED STATE (i.e., TCB does not exist)
+
+      If the user does not have access to such a connection, return
+      "error:  connection illegal for this process".
+
+      Otherwise return "error:  connection does not exist".
+
+    LISTEN STATE
+    SYN-SENT STATE
+    SYN-RECEIVED STATE
+
+      Queue for processing after entering ESTABLISHED state.  If there
+      is no room to queue this request, respond with "error:
+      insufficient resources".
+
+    ESTABLISHED STATE
+    FIN-WAIT-1 STATE
+    FIN-WAIT-2 STATE
+
+      If insufficient incoming segments are queued to satisfy the
+      request, queue the request.  If there is no queue space to
+      remember the RECEIVE, respond with "error:  insufficient
+      resources".
+
+      Reassemble queued incoming segments into receive buffer and return
+      to user.  Mark "push seen" (PUSH) if this is the case.
+
+      If RCV.UP is in advance of the data currently being passed to the
+      user notify the user of the presence of urgent data.
+
+      When the TCP takes responsibility for delivering data to the user
+      that fact must be communicated to the sender via an
+      acknowledgment.  The formation of such an acknowledgment is
+      described below in the discussion of processing an incoming
+      segment.
+
+
+
+
+
+
+
+
+
+
+
+
+[Page 58]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+RECEIVE Call
+
+
+
+    CLOSE-WAIT STATE
+
+      Since the remote side has already sent FIN, RECEIVEs must be
+      satisfied by text already on hand, but not yet delivered to the
+      user.  If no text is awaiting delivery, the RECEIVE will get a
+      "error:  connection closing" response.  Otherwise, any remaining
+      text can be used to satisfy the RECEIVE.
+
+    CLOSING STATE
+    LAST-ACK STATE
+    TIME-WAIT STATE
+
+      Return "error:  connection closing".
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+                                                               [Page 59]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+                                                              CLOSE Call
+
+
+
+  CLOSE Call
+
+    CLOSED STATE (i.e., TCB does not exist)
+
+      If the user does not have access to such a connection, return
+      "error:  connection illegal for this process".
+
+      Otherwise, return "error:  connection does not exist".
+
+    LISTEN STATE
+
+      Any outstanding RECEIVEs are returned with "error:  closing"
+      responses.  Delete TCB, enter CLOSED state, and return.
+
+    SYN-SENT STATE
+
+      Delete the TCB and return "error:  closing" responses to any
+      queued SENDs, or RECEIVEs.
+
+    SYN-RECEIVED STATE
+
+      If no SENDs have been issued and there is no pending data to send,
+      then form a FIN segment and send it, and enter FIN-WAIT-1 state;
+      otherwise queue for processing after entering ESTABLISHED state.
+
+    ESTABLISHED STATE
+
+      Queue this until all preceding SENDs have been segmentized, then
+      form a FIN segment and send it.  In any case, enter FIN-WAIT-1
+      state.
+
+    FIN-WAIT-1 STATE
+    FIN-WAIT-2 STATE
+
+      Strictly speaking, this is an error and should receive a "error:
+      connection closing" response.  An "ok" response would be
+      acceptable, too, as long as a second FIN is not emitted (the first
+      FIN may be retransmitted though).
+
+
+
+
+
+
+
+
+
+
+
+[Page 60]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+CLOSE Call
+
+
+
+    CLOSE-WAIT STATE
+
+      Queue this request until all preceding SENDs have been
+      segmentized; then send a FIN segment, enter CLOSING state.
+
+    CLOSING STATE
+    LAST-ACK STATE
+    TIME-WAIT STATE
+
+      Respond with "error:  connection closing".
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+                                                               [Page 61]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+                                                              ABORT Call
+
+
+
+  ABORT Call
+
+    CLOSED STATE (i.e., TCB does not exist)
+
+      If the user should not have access to such a connection, return
+      "error:  connection illegal for this process".
+
+      Otherwise return "error:  connection does not exist".
+
+    LISTEN STATE
+
+      Any outstanding RECEIVEs should be returned with "error:
+      connection reset" responses.  Delete TCB, enter CLOSED state, and
+      return.
+
+    SYN-SENT STATE
+
+      All queued SENDs and RECEIVEs should be given "connection reset"
+      notification, delete the TCB, enter CLOSED state, and return.
+
+    SYN-RECEIVED STATE
+    ESTABLISHED STATE
+    FIN-WAIT-1 STATE
+    FIN-WAIT-2 STATE
+    CLOSE-WAIT STATE
+
+      Send a reset segment:
+
+        <SEQ=SND.NXT><CTL=RST>
+
+      All queued SENDs and RECEIVEs should be given "connection reset"
+      notification; all segments queued for transmission (except for the
+      RST formed above) or retransmission should be flushed, delete the
+      TCB, enter CLOSED state, and return.
+
+    CLOSING STATE
+    LAST-ACK STATE
+    TIME-WAIT STATE
+
+      Respond with "ok" and delete the TCB, enter CLOSED state, and
+      return.
+
+
+
+
+
+
+
+
+[Page 62]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+STATUS Call
+
+
+
+  STATUS Call
+
+    CLOSED STATE (i.e., TCB does not exist)
+
+      If the user should not have access to such a connection, return
+      "error:  connection illegal for this process".
+
+      Otherwise return "error:  connection does not exist".
+
+    LISTEN STATE
+
+      Return "state = LISTEN", and the TCB pointer.
+
+    SYN-SENT STATE
+
+      Return "state = SYN-SENT", and the TCB pointer.
+
+    SYN-RECEIVED STATE
+
+      Return "state = SYN-RECEIVED", and the TCB pointer.
+
+    ESTABLISHED STATE
+
+      Return "state = ESTABLISHED", and the TCB pointer.
+
+    FIN-WAIT-1 STATE
+
+      Return "state = FIN-WAIT-1", and the TCB pointer.
+
+    FIN-WAIT-2 STATE
+
+      Return "state = FIN-WAIT-2", and the TCB pointer.
+
+    CLOSE-WAIT STATE
+
+      Return "state = CLOSE-WAIT", and the TCB pointer.
+
+    CLOSING STATE
+
+      Return "state = CLOSING", and the TCB pointer.
+
+    LAST-ACK STATE
+
+      Return "state = LAST-ACK", and the TCB pointer.
+
+
+
+
+
+                                                               [Page 63]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+                                                             STATUS Call
+
+
+
+    TIME-WAIT STATE
+
+      Return "state = TIME-WAIT", and the TCB pointer.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+[Page 64]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+SEGMENT ARRIVES
+
+
+
+  SEGMENT ARRIVES
+
+    If the state is CLOSED (i.e., TCB does not exist) then
+
+      all data in the incoming segment is discarded.  An incoming
+      segment containing a RST is discarded.  An incoming segment not
+      containing a RST causes a RST to be sent in response.  The
+      acknowledgment and sequence field values are selected to make the
+      reset sequence acceptable to the TCP that sent the offending
+      segment.
+
+      If the ACK bit is off, sequence number zero is used,
+
+        <SEQ=0><ACK=SEG.SEQ+SEG.LEN><CTL=RST,ACK>
+
+      If the ACK bit is on,
+
+        <SEQ=SEG.ACK><CTL=RST>
+
+      Return.
+
+    If the state is LISTEN then
+
+      first check for an RST
+
+        An incoming RST should be ignored.  Return.
+
+      second check for an ACK
+
+        Any acknowledgment is bad if it arrives on a connection still in
+        the LISTEN state.  An acceptable reset segment should be formed
+        for any arriving ACK-bearing segment.  The RST should be
+        formatted as follows:
+
+          <SEQ=SEG.ACK><CTL=RST>
+
+        Return.
+
+      third check for a SYN
+
+        If the SYN bit is set, check the security.  If the
+        security/compartment on the incoming segment does not exactly
+        match the security/compartment in the TCB then send a reset and
+        return.
+
+          <SEQ=SEG.ACK><CTL=RST>
+
+
+
+                                                               [Page 65]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+                                                         SEGMENT ARRIVES
+
+
+
+        If the SEG.PRC is greater than the TCB.PRC then if allowed by
+        the user and the system set TCB.PRC<-SEG.PRC, if not allowed
+        send a reset and return.
+
+          <SEQ=SEG.ACK><CTL=RST>
+
+        If the SEG.PRC is less than the TCB.PRC then continue.
+
+        Set RCV.NXT to SEG.SEQ+1, IRS is set to SEG.SEQ and any other
+        control or text should be queued for processing later.  ISS
+        should be selected and a SYN segment sent of the form:
+
+          <SEQ=ISS><ACK=RCV.NXT><CTL=SYN,ACK>
+
+        SND.NXT is set to ISS+1 and SND.UNA to ISS.  The connection
+        state should be changed to SYN-RECEIVED.  Note that any other
+        incoming control or data (combined with SYN) will be processed
+        in the SYN-RECEIVED state, but processing of SYN and ACK should
+        not be repeated.  If the listen was not fully specified (i.e.,
+        the foreign socket was not fully specified), then the
+        unspecified fields should be filled in now.
+
+      fourth other text or control
+
+        Any other control or text-bearing segment (not containing SYN)
+        must have an ACK and thus would be discarded by the ACK
+        processing.  An incoming RST segment could not be valid, since
+        it could not have been sent in response to anything sent by this
+        incarnation of the connection.  So you are unlikely to get here,
+        but if you do, drop the segment, and return.
+
+    If the state is SYN-SENT then
+
+      first check the ACK bit
+
+        If the ACK bit is set
+
+          If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send a reset (unless
+          the RST bit is set, if so drop the segment and return)
+
+            <SEQ=SEG.ACK><CTL=RST>
+
+          and discard the segment.  Return.
+
+          If SND.UNA =< SEG.ACK =< SND.NXT then the ACK is acceptable.
+
+      second check the RST bit
+
+
+[Page 66]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+SEGMENT ARRIVES
+
+
+
+        If the RST bit is set
+
+          If the ACK was acceptable then signal the user "error:
+          connection reset", drop the segment, enter CLOSED state,
+          delete TCB, and return.  Otherwise (no ACK) drop the segment
+          and return.
+
+      third check the security and precedence
+
+        If the security/compartment in the segment does not exactly
+        match the security/compartment in the TCB, send a reset
+
+          If there is an ACK
+
+            <SEQ=SEG.ACK><CTL=RST>
+
+          Otherwise
+
+            <SEQ=0><ACK=SEG.SEQ+SEG.LEN><CTL=RST,ACK>
+
+        If there is an ACK
+
+          The precedence in the segment must match the precedence in the
+          TCB, if not, send a reset
+
+            <SEQ=SEG.ACK><CTL=RST>
+
+        If there is no ACK
+
+          If the precedence in the segment is higher than the precedence
+          in the TCB then if allowed by the user and the system raise
+          the precedence in the TCB to that in the segment, if not
+          allowed to raise the prec then send a reset.
+
+            <SEQ=0><ACK=SEG.SEQ+SEG.LEN><CTL=RST,ACK>
+
+          If the precedence in the segment is lower than the precedence
+          in the TCB continue.
+
+        If a reset was sent, discard the segment and return.
+
+      fourth check the SYN bit
+
+        This step should be reached only if the ACK is ok, or there is
+        no ACK, and it the segment did not contain a RST.
+
+        If the SYN bit is on and the security/compartment and precedence
+
+
+                                                               [Page 67]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+                                                         SEGMENT ARRIVES
+
+
+
+        are acceptable then, RCV.NXT is set to SEG.SEQ+1, IRS is set to
+        SEG.SEQ.  SND.UNA should be advanced to equal SEG.ACK (if there
+        is an ACK), and any segments on the retransmission queue which
+        are thereby acknowledged should be removed.
+
+        If SND.UNA > ISS (our SYN has been ACKed), change the connection
+        state to ESTABLISHED, form an ACK segment
+
+          <SEQ=SND.NXT><ACK=RCV.NXT><CTL=ACK>
+
+        and send it.  Data or controls which were queued for
+        transmission may be included.  If there are other controls or
+        text in the segment then continue processing at the sixth step
+        below where the URG bit is checked, otherwise return.
+
+        Otherwise enter SYN-RECEIVED, form a SYN,ACK segment
+
+          <SEQ=ISS><ACK=RCV.NXT><CTL=SYN,ACK>
+
+        and send it.  If there are other controls or text in the
+        segment, queue them for processing after the ESTABLISHED state
+        has been reached, return.
+
+      fifth, if neither of the SYN or RST bits is set then drop the
+      segment and return.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+[Page 68]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+SEGMENT ARRIVES
+
+
+
+    Otherwise,
+
+    first check sequence number
+
+      SYN-RECEIVED STATE
+      ESTABLISHED STATE
+      FIN-WAIT-1 STATE
+      FIN-WAIT-2 STATE
+      CLOSE-WAIT STATE
+      CLOSING STATE
+      LAST-ACK STATE
+      TIME-WAIT STATE
+
+        Segments are processed in sequence.  Initial tests on arrival
+        are used to discard old duplicates, but further processing is
+        done in SEG.SEQ order.  If a segment's contents straddle the
+        boundary between old and new, only the new parts should be
+        processed.
+
+        There are four cases for the acceptability test for an incoming
+        segment:
+
+        Segment Receive  Test
+        Length  Window
+        ------- -------  -------------------------------------------
+
+           0       0     SEG.SEQ = RCV.NXT
+
+           0      >0     RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND
+
+          >0       0     not acceptable
+
+          >0      >0     RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND
+                      or RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND
+
+        If the RCV.WND is zero, no segments will be acceptable, but
+        special allowance should be made to accept valid ACKs, URGs and
+        RSTs.
+
+        If an incoming segment is not acceptable, an acknowledgment
+        should be sent in reply (unless the RST bit is set, if so drop
+        the segment and return):
+
+          <SEQ=SND.NXT><ACK=RCV.NXT><CTL=ACK>
+
+        After sending the acknowledgment, drop the unacceptable segment
+        and return.
+
+
+                                                               [Page 69]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+                                                         SEGMENT ARRIVES
+
+
+
+        In the following it is assumed that the segment is the idealized
+        segment that begins at RCV.NXT and does not exceed the window.
+        One could tailor actual segments to fit this assumption by
+        trimming off any portions that lie outside the window (including
+        SYN and FIN), and only processing further if the segment then
+        begins at RCV.NXT.  Segments with higher begining sequence
+        numbers may be held for later processing.
+
+    second check the RST bit,
+
+      SYN-RECEIVED STATE
+
+        If the RST bit is set
+
+          If this connection was initiated with a passive OPEN (i.e.,
+          came from the LISTEN state), then return this connection to
+          LISTEN state and return.  The user need not be informed.  If
+          this connection was initiated with an active OPEN (i.e., came
+          from SYN-SENT state) then the connection was refused, signal
+          the user "connection refused".  In either case, all segments
+          on the retransmission queue should be removed.  And in the
+          active OPEN case, enter the CLOSED state and delete the TCB,
+          and return.
+
+      ESTABLISHED
+      FIN-WAIT-1
+      FIN-WAIT-2
+      CLOSE-WAIT
+
+        If the RST bit is set then, any outstanding RECEIVEs and SEND
+        should receive "reset" responses.  All segment queues should be
+        flushed.  Users should also receive an unsolicited general
+        "connection reset" signal.  Enter the CLOSED state, delete the
+        TCB, and return.
+
+      CLOSING STATE
+      LAST-ACK STATE
+      TIME-WAIT
+
+        If the RST bit is set then, enter the CLOSED state, delete the
+        TCB, and return.
+
+
+
+
+
+
+
+
+[Page 70]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+SEGMENT ARRIVES
+
+
+
+    third check security and precedence
+
+      SYN-RECEIVED
+
+        If the security/compartment and precedence in the segment do not
+        exactly match the security/compartment and precedence in the TCB
+        then send a reset, and return.
+
+      ESTABLISHED STATE
+
+        If the security/compartment and precedence in the segment do not
+        exactly match the security/compartment and precedence in the TCB
+        then send a reset, any outstanding RECEIVEs and SEND should
+        receive "reset" responses.  All segment queues should be
+        flushed.  Users should also receive an unsolicited general
+        "connection reset" signal.  Enter the CLOSED state, delete the
+        TCB, and return.
+
+      Note this check is placed following the sequence check to prevent
+      a segment from an old connection between these ports with a
+      different security or precedence from causing an abort of the
+      current connection.
+
+    fourth, check the SYN bit,
+
+      SYN-RECEIVED
+      ESTABLISHED STATE
+      FIN-WAIT STATE-1
+      FIN-WAIT STATE-2
+      CLOSE-WAIT STATE
+      CLOSING STATE
+      LAST-ACK STATE
+      TIME-WAIT STATE
+
+        If the SYN is in the window it is an error, send a reset, any
+        outstanding RECEIVEs and SEND should receive "reset" responses,
+        all segment queues should be flushed, the user should also
+        receive an unsolicited general "connection reset" signal, enter
+        the CLOSED state, delete the TCB, and return.
+
+        If the SYN is not in the window this step would not be reached
+        and an ack would have been sent in the first step (sequence
+        number check).
+
+
+
+
+
+
+                                                               [Page 71]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+                                                         SEGMENT ARRIVES
+
+
+
+    fifth check the ACK field,
+
+      if the ACK bit is off drop the segment and return
+
+      if the ACK bit is on
+
+        SYN-RECEIVED STATE
+
+          If SND.UNA =< SEG.ACK =< SND.NXT then enter ESTABLISHED state
+          and continue processing.
+
+            If the segment acknowledgment is not acceptable, form a
+            reset segment,
+
+              <SEQ=SEG.ACK><CTL=RST>
+
+            and send it.
+
+        ESTABLISHED STATE
+
+          If SND.UNA < SEG.ACK =< SND.NXT then, set SND.UNA <- SEG.ACK.
+          Any segments on the retransmission queue which are thereby
+          entirely acknowledged are removed.  Users should receive
+          positive acknowledgments for buffers which have been SENT and
+          fully acknowledged (i.e., SEND buffer should be returned with
+          "ok" response).  If the ACK is a duplicate
+          (SEG.ACK < SND.UNA), it can be ignored.  If the ACK acks
+          something not yet sent (SEG.ACK > SND.NXT) then send an ACK,
+          drop the segment, and return.
+
+          If SND.UNA < SEG.ACK =< SND.NXT, the send window should be
+          updated.  If (SND.WL1 < SEG.SEQ or (SND.WL1 = SEG.SEQ and
+          SND.WL2 =< SEG.ACK)), set SND.WND <- SEG.WND, set
+          SND.WL1 <- SEG.SEQ, and set SND.WL2 <- SEG.ACK.
+
+          Note that SND.WND is an offset from SND.UNA, that SND.WL1
+          records the sequence number of the last segment used to update
+          SND.WND, and that SND.WL2 records the acknowledgment number of
+          the last segment used to update SND.WND.  The check here
+          prevents using old segments to update the window.
+
+
+
+
+
+
+
+
+
+[Page 72]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+SEGMENT ARRIVES
+
+
+
+        FIN-WAIT-1 STATE
+
+          In addition to the processing for the ESTABLISHED state, if
+          our FIN is now acknowledged then enter FIN-WAIT-2 and continue
+          processing in that state.
+
+        FIN-WAIT-2 STATE
+
+          In addition to the processing for the ESTABLISHED state, if
+          the retransmission queue is empty, the user's CLOSE can be
+          acknowledged ("ok") but do not delete the TCB.
+
+        CLOSE-WAIT STATE
+
+          Do the same processing as for the ESTABLISHED state.
+
+        CLOSING STATE
+
+          In addition to the processing for the ESTABLISHED state, if
+          the ACK acknowledges our FIN then enter the TIME-WAIT state,
+          otherwise ignore the segment.
+
+        LAST-ACK STATE
+
+          The only thing that can arrive in this state is an
+          acknowledgment of our FIN.  If our FIN is now acknowledged,
+          delete the TCB, enter the CLOSED state, and return.
+
+        TIME-WAIT STATE
+
+          The only thing that can arrive in this state is a
+          retransmission of the remote FIN.  Acknowledge it, and restart
+          the 2 MSL timeout.
+
+    sixth, check the URG bit,
+
+      ESTABLISHED STATE
+      FIN-WAIT-1 STATE
+      FIN-WAIT-2 STATE
+
+        If the URG bit is set, RCV.UP <- max(RCV.UP,SEG.UP), and signal
+        the user that the remote side has urgent data if the urgent
+        pointer (RCV.UP) is in advance of the data consumed.  If the
+        user has already been signaled (or is still in the "urgent
+        mode") for this continuous sequence of urgent data, do not
+        signal the user again.
+
+
+
+                                                               [Page 73]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+                                                         SEGMENT ARRIVES
+
+
+
+      CLOSE-WAIT STATE
+      CLOSING STATE
+      LAST-ACK STATE
+      TIME-WAIT
+
+        This should not occur, since a FIN has been received from the
+        remote side.  Ignore the URG.
+
+    seventh, process the segment text,
+
+      ESTABLISHED STATE
+      FIN-WAIT-1 STATE
+      FIN-WAIT-2 STATE
+
+        Once in the ESTABLISHED state, it is possible to deliver segment
+        text to user RECEIVE buffers.  Text from segments can be moved
+        into buffers until either the buffer is full or the segment is
+        empty.  If the segment empties and carries an PUSH flag, then
+        the user is informed, when the buffer is returned, that a PUSH
+        has been received.
+
+        When the TCP takes responsibility for delivering the data to the
+        user it must also acknowledge the receipt of the data.
+
+        Once the TCP takes responsibility for the data it advances
+        RCV.NXT over the data accepted, and adjusts RCV.WND as
+        apporopriate to the current buffer availability.  The total of
+        RCV.NXT and RCV.WND should not be reduced.
+
+        Please note the window management suggestions in section 3.7.
+
+        Send an acknowledgment of the form:
+
+          <SEQ=SND.NXT><ACK=RCV.NXT><CTL=ACK>
+
+        This acknowledgment should be piggybacked on a segment being
+        transmitted if possible without incurring undue delay.
+
+
+
+
+
+
+
+
+
+
+
+
+[Page 74]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+SEGMENT ARRIVES
+
+
+
+      CLOSE-WAIT STATE
+      CLOSING STATE
+      LAST-ACK STATE
+      TIME-WAIT STATE
+
+        This should not occur, since a FIN has been received from the
+        remote side.  Ignore the segment text.
+
+    eighth, check the FIN bit,
+
+      Do not process the FIN if the state is CLOSED, LISTEN or SYN-SENT
+      since the SEG.SEQ cannot be validated; drop the segment and
+      return.
+
+      If the FIN bit is set, signal the user "connection closing" and
+      return any pending RECEIVEs with same message, advance RCV.NXT
+      over the FIN, and send an acknowledgment for the FIN.  Note that
+      FIN implies PUSH for any segment text not yet delivered to the
+      user.
+
+        SYN-RECEIVED STATE
+        ESTABLISHED STATE
+
+          Enter the CLOSE-WAIT state.
+
+        FIN-WAIT-1 STATE
+
+          If our FIN has been ACKed (perhaps in this segment), then
+          enter TIME-WAIT, start the time-wait timer, turn off the other
+          timers; otherwise enter the CLOSING state.
+
+        FIN-WAIT-2 STATE
+
+          Enter the TIME-WAIT state.  Start the time-wait timer, turn
+          off the other timers.
+
+        CLOSE-WAIT STATE
+
+          Remain in the CLOSE-WAIT state.
+
+        CLOSING STATE
+
+          Remain in the CLOSING state.
+
+        LAST-ACK STATE
+
+          Remain in the LAST-ACK state.
+
+
+                                                               [Page 75]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Functional Specification
+                                                         SEGMENT ARRIVES
+
+
+
+        TIME-WAIT STATE
+
+          Remain in the TIME-WAIT state.  Restart the 2 MSL time-wait
+          timeout.
+
+    and return.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+[Page 76]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                Functional Specification
+USER TIMEOUT
+
+
+
+  USER TIMEOUT
+
+    For any state if the user timeout expires, flush all queues, signal
+    the user "error:  connection aborted due to user timeout" in general
+    and for any outstanding calls, delete the TCB, enter the CLOSED
+    state and return.
+
+  RETRANSMISSION TIMEOUT
+
+    For any state if the retransmission timeout expires on a segment in
+    the retransmission queue, send the segment at the front of the
+    retransmission queue again, reinitialize the retransmission timer,
+    and return.
+
+  TIME-WAIT TIMEOUT
+
+    If the time-wait timeout expires on a connection delete the TCB,
+    enter the CLOSED state and return.
+
+   
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+                                                               [Page 77]
+
+
+                                                          September 1981
+Transmission Control Protocol
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+[Page 78]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+
+
+
+                                GLOSSARY
+
+
+
+1822
+          BBN Report 1822, "The Specification of the Interconnection of
+          a Host and an IMP".  The specification of interface between a
+          host and the ARPANET.
+
+ACK
+          A control bit (acknowledge) occupying no sequence space, which
+          indicates that the acknowledgment field of this segment
+          specifies the next sequence number the sender of this segment
+          is expecting to receive, hence acknowledging receipt of all
+          previous sequence numbers.
+
+ARPANET message
+          The unit of transmission between a host and an IMP in the
+          ARPANET.  The maximum size is about 1012 octets (8096 bits).
+
+ARPANET packet
+          A unit of transmission used internally in the ARPANET between
+          IMPs.  The maximum size is about 126 octets (1008 bits).
+
+connection
+          A logical communication path identified by a pair of sockets.
+
+datagram
+          A message sent in a packet switched computer communications
+          network.
+
+Destination Address
+          The destination address, usually the network and host
+          identifiers.
+
+FIN
+          A control bit (finis) occupying one sequence number, which
+          indicates that the sender will send no more data or control
+          occupying sequence space.
+
+fragment
+          A portion of a logical unit of data, in particular an internet
+          fragment is a portion of an internet datagram.
+
+FTP
+          A file transfer protocol.
+
+
+
+
+
+                                                               [Page 79]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Glossary
+
+
+
+header
+          Control information at the beginning of a message, segment,
+          fragment, packet or block of data.
+
+host
+          A computer.  In particular a source or destination of messages
+          from the point of view of the communication network.
+
+Identification
+          An Internet Protocol field.  This identifying value assigned
+          by the sender aids in assembling the fragments of a datagram.
+
+IMP
+          The Interface Message Processor, the packet switch of the
+          ARPANET.
+
+internet address
+          A source or destination address specific to the host level.
+
+internet datagram
+          The unit of data exchanged between an internet module and the
+          higher level protocol together with the internet header.
+
+internet fragment
+          A portion of the data of an internet datagram with an internet
+          header.
+
+IP
+          Internet Protocol.
+
+IRS
+          The Initial Receive Sequence number.  The first sequence
+          number used by the sender on a connection.
+
+ISN
+          The Initial Sequence Number.  The first sequence number used
+          on a connection, (either ISS or IRS).  Selected on a clock
+          based procedure.
+
+ISS
+          The Initial Send Sequence number.  The first sequence number
+          used by the sender on a connection.
+
+leader
+          Control information at the beginning of a message or block of
+          data.  In particular, in the ARPANET, the control information
+          on an ARPANET message at the host-IMP interface.
+
+
+
+[Page 80]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                                Glossary
+
+
+
+left sequence
+          This is the next sequence number to be acknowledged by the
+          data receiving TCP (or the lowest currently unacknowledged
+          sequence number) and is sometimes referred to as the left edge
+          of the send window.
+
+local packet
+          The unit of transmission within a local network.
+
+module
+          An implementation, usually in software, of a protocol or other
+          procedure.
+
+MSL
+          Maximum Segment Lifetime, the time a TCP segment can exist in
+          the internetwork system.  Arbitrarily defined to be 2 minutes.
+
+octet
+          An eight bit byte.
+
+Options
+          An Option field may contain several options, and each option
+          may be several octets in length.  The options are used
+          primarily in testing situations; for example, to carry
+          timestamps.  Both the Internet Protocol and TCP provide for
+          options fields.
+
+packet
+          A package of data with a header which may or may not be
+          logically complete.  More often a physical packaging than a
+          logical packaging of data.
+
+port
+          The portion of a socket that specifies which logical input or
+          output channel of a process is associated with the data.
+
+process
+          A program in execution.  A source or destination of data from
+          the point of view of the TCP or other host-to-host protocol.
+
+PUSH
+          A control bit occupying no sequence space, indicating that
+          this segment contains data that must be pushed through to the
+          receiving user.
+
+RCV.NXT
+          receive next sequence number
+
+
+
+                                                               [Page 81]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Glossary
+
+
+
+RCV.UP
+          receive urgent pointer
+
+RCV.WND
+          receive window
+
+receive next sequence number
+          This is the next sequence number the local TCP is expecting to
+          receive.
+
+receive window
+          This represents the sequence numbers the local (receiving) TCP
+          is willing to receive.  Thus, the local TCP considers that
+          segments overlapping the range RCV.NXT to
+          RCV.NXT + RCV.WND - 1 carry acceptable data or control.
+          Segments containing sequence numbers entirely outside of this
+          range are considered duplicates and discarded.
+
+RST
+          A control bit (reset), occupying no sequence space, indicating
+          that the receiver should delete the connection without further
+          interaction.  The receiver can determine, based on the
+          sequence number and acknowledgment fields of the incoming
+          segment, whether it should honor the reset command or ignore
+          it.  In no case does receipt of a segment containing RST give
+          rise to a RST in response.
+
+RTP
+          Real Time Protocol:  A host-to-host protocol for communication
+          of time critical information.
+
+SEG.ACK
+          segment acknowledgment
+
+SEG.LEN
+          segment length
+
+SEG.PRC
+          segment precedence value
+
+SEG.SEQ
+          segment sequence
+
+SEG.UP
+          segment urgent pointer field
+
+
+
+
+
+[Page 82]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+                                                                Glossary
+
+
+
+SEG.WND
+          segment window field
+
+segment
+          A logical unit of data, in particular a TCP segment is the
+          unit of data transfered between a pair of TCP modules.
+
+segment acknowledgment
+          The sequence number in the acknowledgment field of the
+          arriving segment.
+
+segment length
+          The amount of sequence number space occupied by a segment,
+          including any controls which occupy sequence space.
+
+segment sequence
+          The number in the sequence field of the arriving segment.
+
+send sequence
+          This is the next sequence number the local (sending) TCP will
+          use on the connection.  It is initially selected from an
+          initial sequence number curve (ISN) and is incremented for
+          each octet of data or sequenced control transmitted.
+
+send window
+          This represents the sequence numbers which the remote
+          (receiving) TCP is willing to receive.  It is the value of the
+          window field specified in segments from the remote (data
+          receiving) TCP.  The range of new sequence numbers which may
+          be emitted by a TCP lies between SND.NXT and
+          SND.UNA + SND.WND - 1. (Retransmissions of sequence numbers
+          between SND.UNA and SND.NXT are expected, of course.)
+
+SND.NXT
+          send sequence
+
+SND.UNA
+          left sequence
+
+SND.UP
+          send urgent pointer
+
+SND.WL1
+          segment sequence number at last window update
+
+SND.WL2
+          segment acknowledgment number at last window update
+
+
+
+                                                               [Page 83]
+
+
+                                                          September 1981
+Transmission Control Protocol
+Glossary
+
+
+
+SND.WND
+          send window
+
+socket
+          An address which specifically includes a port identifier, that
+          is, the concatenation of an Internet Address with a TCP port.
+
+Source Address
+          The source address, usually the network and host identifiers.
+
+SYN
+          A control bit in the incoming segment, occupying one sequence
+          number, used at the initiation of a connection, to indicate
+          where the sequence numbering will start.
+
+TCB
+          Transmission control block, the data structure that records
+          the state of a connection.
+
+TCB.PRC
+          The precedence of the connection.
+
+TCP
+          Transmission Control Protocol:  A host-to-host protocol for
+          reliable communication in internetwork environments.
+
+TOS
+          Type of Service, an Internet Protocol field.
+
+Type of Service
+          An Internet Protocol field which indicates the type of service
+          for this internet fragment.
+
+URG
+          A control bit (urgent), occupying no sequence space, used to
+          indicate that the receiving user should be notified to do
+          urgent processing as long as there is data to be consumed with
+          sequence numbers less than the value indicated in the urgent
+          pointer.
+
+urgent pointer
+          A control field meaningful only when the URG bit is on.  This
+          field communicates the value of the urgent pointer which
+          indicates the data octet associated with the sending user's
+          urgent call.
+
+          
+
+
+
+[Page 84]                                                               
+
+
+September 1981                                                          
+                                           Transmission Control Protocol
+
+
+
+                               REFERENCES
+
+
+
+[1]  Cerf, V., and R. Kahn, "A Protocol for Packet Network
+     Intercommunication", IEEE Transactions on Communications,
+     Vol. COM-22, No. 5, pp 637-648, May 1974.
+
+[2]  Postel, J. (ed.), "Internet Protocol - DARPA Internet Program
+     Protocol Specification", RFC 791, USC/Information Sciences
+     Institute, September 1981.
+
+[3]  Dalal, Y. and C. Sunshine, "Connection Management in Transport
+     Protocols", Computer Networks, Vol. 2, No. 6, pp. 454-473,
+     December 1978.
+
+[4]  Postel, J., "Assigned Numbers", RFC 790, USC/Information Sciences
+     Institute, September 1981.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+                                                               [Page 85]
+
diff --git a/ext/picotcp/RFC/rfc1066.txt b/ext/picotcp/RFC/rfc1066.txt
new file mode 100644
index 0000000..66aae55
--- /dev/null
+++ b/ext/picotcp/RFC/rfc1066.txt
@@ -0,0 +1,5043 @@
+
+
+
+
+
+
+Network Working Group                                     K. McCloghrie
+Request For Comments: 1066                                      M. Rose
+                                                                    TWG
+                                                            August 1988
+
+
+           Management Information Base for Network Management
+                       of TCP/IP-based internets
+
+                           Table of Contents
+
+   1. Status of this Memo ...................................   1
+   2. IAB POLICY STATEMENT ..................................   2
+   3. Introduction ..........................................   2
+   4. Objects ...............................................   5
+   4.1 Object Groups ........................................   5
+   4.2 Format of Definitions ................................   6
+   5. Object Definitions ....................................   7
+   5.1 The System Group .....................................   8
+   5.2 The Interfaces Group .................................  10
+   5.2.1 The Interfaces Table ...............................  10
+   5.3 The Address Translation Group ........................  22
+   5.4 The IP Group .........................................  25
+   5.4.1 The IP Address Table ...............................  33
+   5.4.2 The IP Routing Table ...............................  35
+   5.5 The ICMP Group .......................................  42
+   5.6 The TCP Group ........................................  52
+   5.7 The UDP Group ........................................  61
+   5.8 The EGP Group ........................................  63
+   5.8.1 The EGP Neighbor Table .............................  64
+   6. Definitions ...........................................  67
+   7. Acknowledgements ......................................  88
+   8. References ............................................  89
+
+1.  Status of this Memo
+
+   This memo provides the initial version of the Management Information
+   Base (MIB) for use with network management protocols in TCP/IP-based
+   internets in the short-term.  In particular, together with its
+   companion memos which describe the structure of management
+   information along with the initial network management protocol, these
+   documents provide a simple, workable architecture and system for
+   managing TCP/IP-based internets and in particular the Internet.
+
+
+
+
+
+
+
+
+McCloghrie & Rose                                               [Page 1]
+
+RFC 1066                          MIB                        August 1988
+
+
+   This memo specifies a draft standard for the Internet community.
+   TCP/IP implementations in the Internet which are network manageable
+   are expected to adopt and implement this specification.
+
+   Distribution of this memo is unlimited.
+
+2.  IAB POLICY STATEMENT
+
+   This MIB specification is the first edition of an evolving document
+   defining variables needed for monitoring and control of various
+   components of the Internet.  Not all groups of defined variables are
+   mandatory for all Internet components.
+
+   For example, the EGP group is mandatory for gateways using EGP but
+   not for hosts which should not be running EGP.  Similarly, the TCP
+   group is mandatory for hosts running TCP but not for gateways which
+   aren't running it.  What IS mandatory, however, is that all variables
+   of a group be supported if any element of the group is supported.
+
+   It is expected that additional MIB groups and variables will be
+   defined over time to accommodate the monitoring and control needs of
+   new or changing components of the Internet.  The MIB working group
+   will continue to refine this specification and projects a revision
+   incorporating new requirements in early 1989.
+
+3.  Introduction
+
+   As reported in RFC 1052, IAB Recommendations for the Development of
+   Internet Network Management Standards [1], the Internet Activities
+   Board has directed the Internet Engineering Task Force (IETF) to
+   create two new working groups in the area of network management.  One
+   group is charged with the further specification and definition of
+   elements to be included in the Management Information Base.  The
+   other is charged with defining the modifications to the Simple
+   Network Management Protocol (SNMP) to accommodate the short-term
+   needs of the network vendor and operator communities.  The long-term
+   needs of the Internet community are to be met using the ISO CMIS/CMIP
+   [2,3] framework as a basis.  An existing IETF working group, the
+   "NETMAN" group, is already engaged in defining the use of CMIS/CMIP
+   in a TCP/IP network, and will continue with responsibility for
+   addressing the longer-term requirements.
+
+   The output of the MIB working group is to be provided to both  the
+   SNMP working  group  and  the  NETMAN group, so as to ensure
+   compatibility of monitored items for both network management
+   frameworks.
+
+   The MIB working group has produced this memo and a companion.  The
+
+
+
+McCloghrie & Rose                                               [Page 2]
+
+RFC 1066                          MIB                        August 1988
+
+
+   companion memo [4] defines a Structure for Management Information
+   (SMI) for use by the managed objects contained in the MIB.  This memo
+   defines the list of managed objects.
+
+   The IAB also urged the working groups to be "extremely sensitive to
+   the need to keep SNMP simple," and recommends that the MIB working
+   group take as its starting inputs the MIB definitions found in the
+   High-Level Entity Management Systems (HEMS) RFC 1024 [5], the initial
+   SNMP specification [6], and the CMIS/CMIP memos [7,8].
+
+   Thus, the list of managed objects defined here, has been derived by
+   taking only those elements which are considered essential.  Since
+   such elements are essential, there is no need to allow the
+   implementation of individual objects, to be optional.  Rather, all
+   compliant implementations will contain all applicable (see below)
+   objects defined in this memo.
+
+   This approach of taking only the essential objects is NOT
+   restrictive, since the SMI defined in the companion memo provides
+   three extensibility mechanisms: one, the addition of new standard
+   objects through the definitions of new versions of the MIB; two, the
+   addition of widely-available but non-standard objects through the
+   multilateral subtree; and three, the addition of private objects
+   through the enterprises subtree. Such additional objects can not only
+   be used for vendor-specific elements, but also for experimentation as
+   required to further the knowledge of which other objects are
+   essential.
+
+   The primary criterion for being considered essential was for an
+   object to be contained in all of the above referenced MIB
+   definitions.  A few other objects have been included, but only if the
+   MIB working group believed they are truly essential.  The detailed
+   list of criteria against which potential inclusions in this (initial)
+   MIB were considered, was:
+
+      1) An object needed to be essential for either fault or
+         configuration management.
+
+      2) Only weak control objects were permitted (by weak, it
+         is meant that tampering with them can do only limited
+         damage).  This criterion reflects the fact that the
+         current management protocols are not sufficiently secure
+         to do more powerful control operations.
+
+      3) Evidence of current use and utility was required.
+
+      4) An attempt was made to limit the number of objects to
+         about 100 to make it easier for vendors to fully
+
+
+
+McCloghrie & Rose                                               [Page 3]
+
+RFC 1066                          MIB                        August 1988
+
+
+         instrument their software.
+
+      5) To avoid redundant variables, it was required that no
+         object be included that can be derived from others in the
+         MIB.
+
+      6) Implementation specific objects (e.g., for BSD UNIX)
+         were excluded.
+
+      7) It was agreed to avoid heavily instrumenting critical
+         sections of code.  The general guideline was one counter
+         per critical section per layer.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+McCloghrie & Rose                                               [Page 4]
+
+RFC 1066                          MIB                        August 1988
+
+
+4.  Objects
+
+   Managed objects are accessed via a virtual information store, termed
+   the Management Information Base or MIB.  Objects in the MIB are
+   defined using Abstract Syntax Notation One (ASN.1) [9].
+
+   The mechanisms used for describing these objects are specified in the
+   companion memo.  In particular, each object has a name, a syntax, and
+   an encoding.  The name is an object identifier, an administratively
+   assigned name, which specifies an object type.  The object type
+   together with an object instance serves to uniquely identify a
+   specific instantiation of the object.  For human convenience, we
+   often use a textual string, termed the OBJECT DESCRIPTOR, to also
+   refer to the object type.
+
+   The syntax of an object type defines the abstract data structure
+   corresponding to that object type.  The ASN.1 language is used for
+   this purpose.  However, the companion memo purposely restricts the
+   ASN.1 constructs which may be used.  These restrictions are
+   explicitly made for simplicity.
+
+   The encoding of an object type is simply how that object type is
+   represented using the object type's syntax.  Implicitly tied to the
+   notion of an object type's syntax and encoding is how the object type
+   is represented when being transmitted on the network.  This memo
+   specifies the use of the basic encoding rules of ASN.1 [10].
+
+4.1.  Object Groups
+
+   Since this list of managed objects contains only the essential
+   elements, there is no need to allow individual objects to be
+   optional.  Rather, the objects are arranged into the following
+   groups:
+
+                  - System
+                  - Interfaces
+                  - Address Translation
+                  - IP
+                  - ICMP
+                  - TCP
+                  - UDP
+                  - EGP
+
+   There are two reasons for defining these groups: one, to provide a
+   means of assigning object identifiers; two, to provide a method for
+   implementations of managed agents to know which objects they must
+   implement.  This method is as follows: if the semantics of a group is
+   applicable to an implementation, then it must implement all objects
+
+
+
+McCloghrie & Rose                                               [Page 5]
+
+RFC 1066                          MIB                        August 1988
+
+
+   in that group.  For example, an implementation must implement the EGP
+   group if and only if it implements the EGP protocol.
+
+4.2.  Format of Definitions
+
+   The next section contains the specification of all object types
+   contained in the MIB. Following the conventions of the companion
+   memo, the object types are defined using the following fields:
+
+          OBJECT:
+          -------
+               A textual name, termed the OBJECT DESCRIPTOR, for the
+               object type, along with its corresponding OBJECT
+               IDENTIFIER.
+
+          Syntax:
+               The abstract syntax for the object type, presented using
+               ASN.1.  This must resolve to an instance of the ASN.1
+               type ObjectSyntax defined in the SMI.
+
+          Definition:
+               A textual description of the semantics of the object
+               type.  Implementations should ensure that their
+               interpretation of the object type fulfills this
+               definition since this MIB is intended for use in multi-
+               vendor environments.  As such it is vital that object
+               types have consistent meaning across all machines.
+
+          Access:
+               One of read-only, read-write, write-only, or
+               not-accessible.
+
+          Status:
+              One of mandatory, optional, or obsolete.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+McCloghrie & Rose                                               [Page 6]
+
+RFC 1066                          MIB                        August 1988
+
+
+5.  Object Definitions
+
+               RFC1066-MIB { iso org(3) dod(6) internet(1) mgmt(2) 1 }
+
+               DEFINITIONS ::= BEGIN
+
+               IMPORTS
+                       mgmt, OBJECT-TYPE, NetworkAddress, IpAddress,
+                       Counter, Gauge, TimeTicks
+                           FROM RFC1065-SMI;
+
+               mib        OBJECT IDENTIFIER ::= { mgmt 1 }
+
+               system     OBJECT IDENTIFIER ::= { mib 1 }
+               interfaces OBJECT IDENTIFIER ::= { mib 2 }
+               at         OBJECT IDENTIFIER ::= { mib 3 }
+               ip         OBJECT IDENTIFIER ::= { mib 4 }
+               icmp       OBJECT IDENTIFIER ::= { mib 5 }
+               tcp        OBJECT IDENTIFIER ::= { mib 6 }
+               udp        OBJECT IDENTIFIER ::= { mib 7 }
+               egp        OBJECT IDENTIFIER ::= { mib 8 }
+
+               END
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+McCloghrie & Rose                                               [Page 7]
+
+RFC 1066                          MIB                        August 1988
+
+
+5.1.  The System Group
+
+          Implementation of the System group is mandatory for all
+          systems.
+
+          OBJECT:
+          -------
+               sysDescr { system 1 }
+
+          Syntax:
+               OCTET STRING
+
+          Definition:
+               A textual description of the entity.  This value should
+               include the full name and version identification of the
+               system's hardware type, software operating-system, and
+               networking software.  It is mandatory that this only
+               contain printable ASCII characters.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               sysObjectID { system 2 }
+
+          Syntax:
+               OBJECT IDENTIFIER
+
+          Definition:
+               The vendor's authoritative identification of the network
+               management subsystem contained in the entity.  This value
+               is allocated within the SMI enterprises subtree
+               (1.3.6.1.4.1) and provides an easy and unambiguous means
+               for determining "what kind of box" is being managed.  For
+               example, if vendor "Flintstones, Inc." was assigned the
+               subtree 1.3.6.1.4.1.42, it could assign the identifier
+               1.3.6.1.4.1.42.1.1 to its "Fred Router".
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+
+McCloghrie & Rose                                               [Page 8]
+
+RFC 1066                          MIB                        August 1988
+
+
+          OBJECT:
+          -------
+               sysUpTime { system 3 }
+
+          Syntax:
+               TimeTicks
+
+          Definition:
+               The time (in hundredths of a second) since the network
+               management portion of the system was last re-initialized.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+McCloghrie & Rose                                               [Page 9]
+
+RFC 1066                          MIB                        August 1988
+
+
+5.2.  The Interfaces Group
+
+          Implementation of the Interfaces group is mandatory for all
+          systems.
+
+          OBJECT:
+          -------
+               ifNumber { interfaces 1 }
+
+          Syntax:
+               INTEGER
+
+          Definition:
+               The number of network interfaces (regardless of their
+               current state) on which this system can send/receive IP
+               datagrams.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+5.2.1.  The Interfaces Table
+
+          OBJECT:
+          -------
+               ifTable { interfaces 2 }
+
+          Syntax:
+               SEQUENCE OF IfEntry
+
+          Definition:
+               A list of interface entries.  The number of entries is
+               given by the value of ifNumber.
+
+          Access:
+               read-write.
+
+          Status:
+               mandatory.
+
+          OBJECT:
+          -------
+               ifEntry { ifTable 1 }
+
+          Syntax:
+               IfEntry ::= SEQUENCE {
+
+
+
+McCloghrie & Rose                                              [Page 10]
+
+RFC 1066                          MIB                        August 1988
+
+
+                    ifIndex
+                        INTEGER,
+                    ifDescr
+                        OCTET STRING,
+                    ifType
+                        INTEGER,
+                    ifMtu
+                        INTEGER,
+                    ifSpeed
+                        Gauge,
+                    ifPhysAddress
+                        OCTET STRING,
+                    ifAdminStatus
+                        INTEGER,
+                    ifOperStatus
+                        INTEGER,
+                    ifLastChange
+                        TimeTicks,
+                    ifInOctets
+                        Counter,
+                    ifInUcastPkts
+                        Counter,
+                    ifInNUcastPkts
+                        Counter,
+                    ifInDiscards
+                        Counter,
+                    ifInErrors
+                        Counter,
+                    ifInUnknownProtos
+                        Counter,
+                    ifOutOctets
+                        Counter,
+                    ifOutUcastPkts
+                        Counter,
+                    ifOutNUcastPkts
+                        Counter,
+                    ifOutDiscards
+                        Counter,
+                    ifOutErrors
+                        Counter,
+                    ifOutQLen
+                        Gauge
+               }
+
+          Definition:
+               An interface entry containing objects at the subnetwork
+               layer and below for a particular interface.
+
+
+
+
+McCloghrie & Rose                                              [Page 11]
+
+RFC 1066                          MIB                        August 1988
+
+
+          Access:
+               read-write.
+
+          Status:
+               mandatory.
+
+
+          We now consider the individual components of each interface
+          entry:
+
+
+          OBJECT:
+          -------
+               ifIndex { ifEntry 1 }
+
+          Syntax:
+               INTEGER
+
+          Definition:
+               A unique value for each interface.  Its value ranges
+               between 1 and the value of ifNumber.  The value for each
+               interface must remain constant at least from one re-
+               initialization of the entity's network management system
+               to the next re-initialization.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ifDescr { ifEntry 2 }
+
+          Syntax:
+               OCTET STRING
+
+          Definition:
+               A text string containing information about the interface.
+               This string should include the name of the manufacturer,
+               the product name and the version of the hardware
+               interface.  The string is intended for presentation to a
+               human; it must not contain anything but printable ASCII
+               characters.
+
+
+
+
+
+McCloghrie & Rose                                              [Page 12]
+
+RFC 1066                          MIB                        August 1988
+
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ifType { ifEntry 3 }
+
+          Syntax:
+               INTEGER {
+                    other(1),          -- none of the following
+                    regular1822(2),
+                    hdh1822(3),
+                    ddn-x25(4),
+                    rfc877-x25(5),
+                    ethernet-csmacd(6),
+                    iso88023-csmacd(7),
+                    iso88024-tokenBus(8),
+                    iso88025-tokenRing(9),
+                    iso88026-man(10),
+                    starLan(11),
+                    proteon-10MBit(12),
+                    proteon-80MBit(13),
+                    hyperchannel(14),
+                    fddi(15),
+                    lapb(16),
+                    sdlc(17),
+                    t1-carrier(18),
+                    cept(19),          -- european equivalent of T-1
+                    basicIsdn(20),
+                    primaryIsdn(21),
+                                        -- proprietary serial
+                    propPointToPointSerial(22)
+               }
+
+          Definition:
+               The type of interface, distinguished according to the
+               physical/link/network protocol(s) immediately "below" IP
+               in the protocol stack.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+
+McCloghrie & Rose                                              [Page 13]
+
+RFC 1066                          MIB                        August 1988
+
+
+          OBJECT:
+          -------
+               ifMtu { ifEntry 4 }
+
+          Syntax:
+               INTEGER
+
+          Definition:
+               The size of the largest IP datagram which can be
+               sent/received on the interface, specified in octets.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ifSpeed { ifEntry 5 }
+
+          Syntax:
+               Gauge
+
+          Definition:
+               An estimate of the interface's current bandwidth in bits
+               per second.  For interfaces which do not vary in
+               bandwidth or for those where no accurate estimation can
+               be made, this object should contain the nominal
+               bandwidth.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ifPhysAddress { ifEntry 6 }
+
+          Syntax:
+               OCTET STRING
+
+          Definition:
+               The interface's address at the protocol layer immediately
+
+
+
+McCloghrie & Rose                                              [Page 14]
+
+RFC 1066                          MIB                        August 1988
+
+
+               "below" IP in the protocol stack.  For interfaces which
+               do not have such an address (e.g., a serial line), this
+               object should contain an octet string of zero length.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ifAdminStatus { ifEntry 7 }
+
+          Syntax:
+               INTEGER {
+                    up(1),       -- ready to pass packets
+                    down(2),
+                    testing(3)   -- in some test mode
+                  }
+
+           Definition:
+               The desired state of the interface.  The testing(3) state
+               indicates that no operational packets can be passed.
+
+           Access:
+               read-write.
+
+           Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ifOperStatus { ifEntry 8 }
+
+          Syntax:
+               INTEGER {
+                    up(1),       -- ready to pass packets
+                    down(2),
+                    testing(3)   -- in some test mode
+               }
+
+          Definition:
+               The current operational state of the interface.  The
+               testing(3) state indicates that no operational packets
+               can be passed.
+
+
+
+McCloghrie & Rose                                              [Page 15]
+
+RFC 1066                          MIB                        August 1988
+
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ifLastChange { ifEntry 9 }
+
+          Syntax:
+               TimeTicks
+
+          Definition:
+               The value of sysUpTime at the time the interface entered
+               its current operational state.  If the current state was
+               entered prior to the last re-initialization of the local
+               network management subsystem, then this object contains a
+               zero value.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ifInOctets { ifEntry 10 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The total number of octets received on the interface,
+               including framing characters.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+
+
+
+
+
+McCloghrie & Rose                                              [Page 16]
+
+RFC 1066                          MIB                        August 1988
+
+
+          OBJECT:
+          -------
+               ifInUcastPkts  { ifEntry 11 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of (subnet) unicast packets delivered to a
+               higher-layer protocol.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ifInNUcastPkts { ifEntry 12 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of non-unicast (i.e., subnet broadcast or
+               subnet multicast) packets delivered to a higher-layer
+               protocol.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ifInDiscards { ifEntry 13 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of inbound packets which were chosen to be
+               discarded even though no errors had been detected to
+               prevent their being deliverable to a higher-layer
+
+
+
+McCloghrie & Rose                                              [Page 17]
+
+RFC 1066                          MIB                        August 1988
+
+
+               protocol.  One possible reason for discarding such a
+               packet could be to free up buffer space.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ifInErrors { ifEntry 14 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of inbound packets that contained errors
+               preventing them from being deliverable to a higher-layer
+               protocol.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ifInUnknownProtos { ifEntry 15 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of packets received via the interface which
+               were discarded because of an unknown or unsupported
+               protocol.
+
+          Access:
+               read-only.
+
+          Status:
+              mandatory.
+
+
+
+
+
+McCloghrie & Rose                                              [Page 18]
+
+RFC 1066                          MIB                        August 1988
+
+
+          OBJECT:
+          -------
+               ifOutOctets { ifEntry 16 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The total number of octets transmitted out of the
+               interface, including framing characters.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ifOutUcastPkts { ifEntry 17 }
+
+           Syntax:
+               Counter
+
+          Definition:
+               The total number of packets that higher-level protocols
+               requested be transmitted to a subnet-unicast address,
+               including those that were discarded or not sent.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+              ifOutNUcastPkts { ifEntry 18 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The total number of packets that higher-level protocols
+               requested be transmitted to a non-unicast (i.e., a subnet
+               broadcast or subnet multicast) address, including those
+
+
+
+McCloghrie & Rose                                              [Page 19]
+
+RFC 1066                          MIB                        August 1988
+
+
+               that were discarded or not sent.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ifOutDiscards { ifEntry 19 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of outbound packets which were chosen to be
+               discarded even though no errors had been detected to
+               prevent their being transmitted.  One possible reason for
+               discarding such a packet could be to free up buffer
+               space.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ifOutErrors { ifEntry 20 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of outbound packets that could not be
+               transmitted because of errors.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+
+
+
+McCloghrie & Rose                                              [Page 20]
+
+RFC 1066                          MIB                        August 1988
+
+
+          OBJECT:
+          -------
+               ifOutQLen { ifEntry 21 }
+
+          Syntax:
+               Gauge
+
+          Definition:
+               The length of the output packet queue (in packets).
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+McCloghrie & Rose                                              [Page 21]
+
+RFC 1066                          MIB                        August 1988
+
+
+5.3.  The Address Translation Group
+
+   Implementation of the Address Translation group is mandatory
+   for all systems.
+
+   The Address Translation group contains one table which is the
+   union across all interfaces of the translation tables for
+   converting a NetworkAddress (e.g., an IP address) into a
+   subnetwork-specific address.  For lack of a better term, this
+   document refers to such a subnetwork-specific address as a
+   "physical" address.
+
+   Examples of such translation tables are: for broadcast media
+   where ARP is in use, the translation table is equivalent to
+   the ARP cache; or, on an X.25 network where non-algorithmic
+   translation to X.121 addresses is required, the translation
+   table contains the NetworkAddress to X.121 address
+   equivalences.
+
+          OBJECT:
+          -------
+               atTable { at 1 }
+
+          Syntax:
+               SEQUENCE OF AtEntry
+
+          Definition:
+               The Address Translation tables contain the NetworkAddress
+               to "physical" address equivalences.  Some interfaces do
+               not use translation tables for determining address
+               equivalences (e.g., DDN-X.25 has an algorithmic method);
+               if all interfaces are of this type, then the Address
+               Translation table is empty, i.e., has zero entries.
+
+          Access:
+               read-write.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               atEntry { atTable 1 }
+
+          Syntax:
+               AtEntry ::= SEQUENCE {
+                    atIfIndex
+
+
+
+McCloghrie & Rose                                              [Page 22]
+
+RFC 1066                          MIB                        August 1988
+
+
+                        INTEGER,
+                    atPhysAddress
+                        OCTET STRING,
+                    atNetAddress
+                        NetworkAddress
+               }
+
+          Definition:
+               Each entry contains one NetworkAddress to "physical"
+               address equivalence.
+
+          Access:
+               read-write.
+
+          Status:
+               mandatory.
+
+          We now consider the individual components of each Address
+          Translation table entry:
+
+
+          OBJECT:
+          -------
+               atIfIndex { atEntry 1 }
+
+          Syntax:
+               INTEGER
+
+          Definition:
+               The interface on which this entry's equivalence is
+               effective.  The interface identified by a particular
+               value of this index is the same interface as identified
+                by the same value of ifIndex.
+
+          Access:
+               read-write.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               atPhysAddress { atEntry 2 }
+
+          Syntax:
+               OCTET STRING
+
+
+
+
+McCloghrie & Rose                                              [Page 23]
+
+RFC 1066                          MIB                        August 1988
+
+
+          Definition:
+               The media-dependent "physical" address.
+
+          Access:
+               read-write.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               atNetAddress { atEntry 3 }
+
+          Syntax:
+               NetworkAddress
+
+          Definition:
+               The NetworkAddress (e.g., the IP address) corresponding to
+               the media-dependent "physical" address.
+
+          Access:
+               read-write.
+
+          Status:
+               mandatory.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+McCloghrie & Rose                                              [Page 24]
+
+RFC 1066                          MIB                        August 1988
+
+
+5.4.  The IP Group
+
+   Implementation of the IP group is mandatory for all systems.
+
+
+          OBJECT:
+          -------
+               ipForwarding { ip 1 }
+
+          Syntax:
+               INTEGER {
+                    gateway(1),   -- entity forwards datagrams
+                    host(2)       -- entity does NOT forward datagrams
+               }
+
+          Definition:
+               The indication of whether this entity is acting as an IP
+               gateway in respect to the forwarding of datagrams
+               received by, but not addressed to, this entity.  IP
+               gateways forward datagrams; Hosts do not (except those
+               Source-Routed via the host).
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipDefaultTTL { ip 2 }
+
+          Syntax:
+               INTEGER
+
+          Definition:
+               The default value inserted into the Time-To-Live field of
+               the IP header of datagrams originated at this entity,
+               whenever a TTL value is not supplied by the transport
+               layer protocol.
+
+          Access:
+               read-write.
+
+          Status:
+               mandatory.
+
+
+
+
+McCloghrie & Rose                                              [Page 25]
+
+RFC 1066                          MIB                        August 1988
+
+
+          OBJECT:
+          -------
+               ipInReceives { ip 3 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The total number of input datagrams received from
+               interfaces, including those received in error.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipInHdrErrors { ip 4 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of input datagrams discarded due to errors in
+               their IP headers, including bad checksums, version number
+               mismatch, other format errors, time-to-live exceeded,
+               errors discovered in processing their IP options, etc.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipInAddrErrors { ip 5 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of input datagrams discarded because the IP
+               address in their IP header's destination field was not a
+
+
+
+McCloghrie & Rose                                              [Page 26]
+
+RFC 1066                          MIB                        August 1988
+
+
+               valid address to be received at this entity.  This count
+               includes invalid addresses (e.g., 0.0.0.0) and addresses
+               of unsupported Classes (e.g., Class E).  For entities
+               which are not IP Gateways and therefore do not forward
+               datagrams, this counter includes datagrams discarded
+               because the destination address was not a local address.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipForwDatagrams { ip 6 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of input datagrams for which this entity was
+               not their final IP destination, as a result of which an
+               attempt was made to find a route to forward them to that
+               final destination.  In entities which do not act as IP
+               Gateways, this counter will include only those packets
+               which were Source-Routed via this entity, and the
+               Source-Route option processing was successful.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipInUnknownProtos { ip 7 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of locally-addressed datagrams received
+               successfully but discarded because of an unknown or
+               unsupported protocol.
+
+
+
+McCloghrie & Rose                                              [Page 27]
+
+RFC 1066                          MIB                        August 1988
+
+
+           Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipInDiscards { ip 8 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of input IP datagrams for which no problems
+               were encountered to prevent their continued processing,
+               but which were discarded (e.g. for lack of buffer space).
+               Note that this counter does not include any datagrams
+               discarded while awaiting re-assembly.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipInDelivers { ip 9 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The total number of input datagrams successfully
+               delivered to IP user-protocols (including ICMP).
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+          OBJECT:
+          -------
+               ipOutRequests { ip 10 }
+
+
+
+McCloghrie & Rose                                              [Page 28]
+
+RFC 1066                          MIB                        August 1988
+
+
+          Syntax:
+               Counter
+
+          Definition:
+               The total number of IP datagrams which local IP user-
+               protocols (including ICMP) supplied to IP in requests for
+               transmission.  Note that this counter does not include
+               any datagrams counted in ipForwDatagrams.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipOutDiscards { ip 11 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of output IP datagrams for which no problem
+               was encountered to prevent their transmission to their
+               destination, but which were discarded (e.g., for lack of
+               buffer space).  Note that this counter would include
+               datagrams counted in ipForwDatagrams if any such packets
+               met this (discretionary) discard criterion.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipOutNoRoutes { ip 12 }
+
+          Syntax:
+               Counter
+
+
+
+
+
+
+
+McCloghrie & Rose                                              [Page 29]
+
+RFC 1066                          MIB                        August 1988
+
+
+          Definition:
+               The number of IP datagrams discarded because no route
+               could be found to transmit them to their destination.
+               Note that this counter includes any packets counted in
+               ipForwDatagrams which meet this "no-route" criterion.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipReasmTimeout { ip 13 }
+
+          Syntax:
+               INTEGER
+
+          Definition:
+               The maximum number of seconds which received fragments
+               are held while they are awaiting reassembly at this
+               entity.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipReasmReqds { ip 14 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of IP fragments received which needed to be
+               reassembled at this entity.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+
+McCloghrie & Rose                                              [Page 30]
+
+RFC 1066                          MIB                        August 1988
+
+
+          OBJECT:
+          -------
+               ipReasmOKs { ip 15 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of IP datagrams successfully re-assembled.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipReasmFails { ip 16 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of failures detected by the IP re-assembly
+               algorithm (for whatever reason: timed out, errors, etc).
+
+               Note that this is not necessarily a count of discarded IP
+               fragments since some algorithms (notably RFC 815's) can
+               lose track of the number of fragments by combining them
+               as they are received.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipFragOKs { ip 17 }
+
+          Syntax:
+              Counter
+
+
+
+
+
+McCloghrie & Rose                                              [Page 31]
+
+RFC 1066                          MIB                        August 1988
+
+
+          Definition:
+               The number of IP datagrams that have been successfully
+               fragmented at this entity.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipFragFails { ip 18 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of IP datagrams that have been discarded
+               because they needed to be fragmented at this entity but
+               could not be, e.g., because their "Don't Fragment" flag
+               was set.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipFragCreates { ip 19 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of IP datagram fragments that have been
+               generated as a result of fragmentation at this entity.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+
+
+McCloghrie & Rose                                              [Page 32]
+
+RFC 1066                          MIB                        August 1988
+
+
+5.4.1.  The IP Address Table
+
+   The Ip Address table contains this entity's IP addressing
+   information.
+
+
+          OBJECT:
+          -------
+               ipAddrTable { ip 20 }
+
+          Syntax:
+               SEQUENCE OF IpAddrEntry
+
+          Definition:
+               The table of addressing information relevant to this
+               entity's IP addresses.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipAddrEntry { ipAddrTable 1 }
+
+          Syntax:
+               IpAddrEntry ::= SEQUENCE {
+                    ipAdEntAddr
+                        IpAddress,
+                    ipAdEntIfIndex
+                        INTEGER,
+                    ipAdEntNetMask
+                        IpAddress,
+                    ipAdEntBcastAddr
+                        INTEGER
+               }
+
+          Definition:
+               The addressing information for one of this entity's IP
+               addresses.
+
+          Access:
+               read-only.
+
+
+
+
+
+McCloghrie & Rose                                              [Page 33]
+
+RFC 1066                          MIB                        August 1988
+
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipAdEntAddr  { ipAddrEntry 1 }
+
+          Syntax:
+               IpAddress
+
+          Definition:
+               The IP address to which this entry's addressing
+               information pertains.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipAdEntIfIndex  { ipAddrEntry 2 }
+
+          Syntax:
+               INTEGER
+
+          Definition:
+               The index value which uniquely identifies the interface
+               to which this entry is applicable.  The interface
+               identified by a particular value of this index is the
+                same interface as identified by the same value of
+                ifIndex.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipAdEntNetMask  { ipAddrEntry 3 }
+
+
+
+
+
+McCloghrie & Rose                                              [Page 34]
+
+RFC 1066                          MIB                        August 1988
+
+
+          Syntax:
+               IpAddress
+
+          Definition:
+               The subnet mask associated with the IP address of this
+               entry.  The value of the mask is an IP address with all
+               the network bits set to 1 and all the hosts bits set to
+               0.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipAdEntBcastAddr { ipAddrEntry 4 }
+
+          Syntax:
+               INTEGER
+
+          Definition:
+               The value of the least-significant bit in the IP
+               broadcast address used for sending datagrams on the
+               (logical) interface associated with the IP address of
+               this entry.  For example, when the Internet standard
+               all-ones broadcast address is used, the value will be 1.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+5.4.2.  The IP Routing Table
+
+   The IP Routing Table contains an entry for each route
+   presently known to this entity.  Note that the action to be
+   taken in response to a request to read a non-existent entry,
+   is specific to the network management protocol being used.
+
+
+          OBJECT:
+          -------
+               ipRoutingTable { ip 21 }
+
+
+
+
+McCloghrie & Rose                                              [Page 35]
+
+RFC 1066                          MIB                        August 1988
+
+
+          Syntax:
+               SEQUENCE OF IpRouteEntry
+
+          Definition:
+               This entity's IP Routing table.
+
+          Access:
+               read-write.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipRouteEntry { ipRoutingTable 1 }
+
+          Syntax:
+               IpRouteEntry ::= SEQUENCE {
+                    ipRouteDest
+                        IpAddress,
+                    ipRouteIfIndex
+                        INTEGER,
+                    ipRouteMetric1
+                        INTEGER,
+                    ipRouteMetric2
+                        INTEGER,
+                    ipRouteMetric3
+                        INTEGER,
+                    ipRouteMetric4
+                        INTEGER,
+                    ipRouteNextHop
+                        IpAddress,
+                    ipRouteType
+                        INTEGER,
+                    ipRouteProto
+                        INTEGER,
+                    ipRouteAge
+                        INTEGER
+               }
+
+          Definition:
+               A route to a particular destination.
+
+          Access:
+               read-write.
+
+
+
+
+
+McCloghrie & Rose                                              [Page 36]
+
+RFC 1066                          MIB                        August 1988
+
+
+          Status:
+               mandatory.
+
+          We now consider the individual components of each route in the
+          IP Routing Table:
+
+
+          OBJECT:
+          -------
+               ipRouteDest { ipRouteEntry 1 }
+
+          Syntax:
+               IpAddress
+
+          Definition:
+               The destination IP address of this route.  An entry with
+               a value of 0.0.0.0 is considered a default route.
+               Multiple such default routes can appear in the table, but
+               access to such multiple entries is dependent on the
+               table-access mechanisms defined by the network management
+               protocol in use.
+
+          Access:
+               read-write.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipRouteIfIndex  { ipRouteEntry 2 }
+
+          Syntax:
+               INTEGER
+
+          Definition:
+               The index value which uniquely identifies the local
+               interface through which the next hop of this route should
+               be reached.  The interface identified by a particular
+               value of this index is the same interface as identified
+               by the same value of ifIndex.
+
+          Access:
+               read-write.
+
+          Status:
+               mandatory.
+
+
+
+McCloghrie & Rose                                              [Page 37]
+
+RFC 1066                          MIB                        August 1988
+
+
+          OBJECT:
+          -------
+               ipRouteMetric1 { ipRouteEntry 3 }
+
+          Syntax:
+               INTEGER
+
+          Definition:
+               The primary routing metric for this route.  The semantics
+               of this metric are determined by the routing-protocol
+               specified in the route's ipRouteProto value.  If this
+               metric is not used, its value should be set to -1.
+
+          Access:
+               read-write.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipRouteMetric2 { ipRouteEntry 4 }
+
+          Syntax:
+               INTEGER
+
+          Definition:
+               An alternate routing metric for this route.  The
+               semantics of this metric are determined by the routing-
+               protocol specified in the route's ipRouteProto value.  If
+               this metric is not used, its value should be set to -1.
+
+          Access:
+               read-write.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipRouteMetric3 { ipRouteEntry 5 }
+
+          Syntax:
+               INTEGER
+
+
+
+
+
+McCloghrie & Rose                                              [Page 38]
+
+RFC 1066                          MIB                        August 1988
+
+
+          Definition:
+               An alternate routing metric for this route.  The
+               semantics of this metric are determined by the routing-
+               protocol specified in the route's ipRouteProto value.  If
+               this metric is not used, its value should be set to -1.
+
+           Access:
+               read-write.
+
+           Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipRouteMetric4 { ipRouteEntry 6 }
+
+          Syntax:
+               INTEGER
+
+          Definition:
+               An alternate routing metric for this route.  The
+               semantics of this metric are determined by the routing-
+               protocol specified in the route's ipRouteProto value.  If
+               this metric is not used, its value should be set to -1.
+
+          Access:
+               read-write.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipRouteNextHop { ipRouteEntry 7 }
+
+          Syntax:
+               IpAddress
+
+          Definition:
+               The IP address of the next hop of this route.
+
+          Access:
+               read-write.
+
+          Status:
+               mandatory.
+
+
+
+McCloghrie & Rose                                              [Page 39]
+
+RFC 1066                          MIB                        August 1988
+
+
+          OBJECT:
+          -------
+               ipRouteType { ipRouteEntry 8 }
+
+          Syntax:
+               INTEGER {
+                    other(1),        -- none of the following
+
+                    invalid(2),      -- an invalidated route
+
+                                     -- route to directly
+                    direct(3),       -- connected (sub-)network
+
+                                     -- route to a non-local
+                    remote(4),       -- host/network/sub-network
+               }
+
+          Definition:
+               The type of route.
+
+          Access:
+               read-write.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipRouteProto { ipRouteEntry 9 }
+
+          Syntax:
+               INTEGER {
+                    other(1),       -- none of the following
+
+                                    -- non-protocol information,
+                                    -- e.g., manually configured
+                    local(2),       -- entries
+
+                                    -- set via a network management
+                    netmgmt(3),     -- protocol
+
+                                    -- obtained via ICMP,
+                    icmp(4),        -- e.g., Redirect
+
+                                    -- the remaining values are
+                                    -- all gateway routing protocols
+                    egp(5),
+
+
+
+McCloghrie & Rose                                              [Page 40]
+
+RFC 1066                          MIB                        August 1988
+
+
+                    ggp(6),
+                    hello(7),
+                    rip(8),
+                    is-is(9),
+                    es-is(10),
+                    ciscoIgrp(11),
+                    bbnSpfIgp(12),
+                    oigp(13)
+               }
+
+          Definition:
+               The routing mechanism via which this route was learned.
+               Inclusion of values for gateway routing protocols is not
+               intended to imply that hosts should support those
+               protocols.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               ipRouteAge { ipRouteEntry 10 }
+
+          Syntax:
+               INTEGER
+
+          Definition:
+               The number of seconds since this route was last updated
+               or otherwise determined to be correct.   Note that no
+               semantics of "too old" can be implied except through
+               knowledge of the routing protocol by which the route was
+               learned.
+
+          Access:
+               read-write.
+
+          Status:
+               mandatory.
+
+
+
+
+
+
+
+
+
+McCloghrie & Rose                                              [Page 41]
+
+RFC 1066                          MIB                        August 1988
+
+
+5.5.  The ICMP Group
+
+   Implementation of the ICMP group is mandatory for all systems.
+
+   The ICMP group contains the ICMP input and output statistics.
+
+   Note that individual counters for ICMP message (sub-)codes have been
+   omitted from this (version of the) MIB for simplicity.
+
+
+          OBJECT:
+          -------
+               icmpInMsgs { icmp 1 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The total number of ICMP messages which the entity
+               received.  Note that this counter includes all those
+               counted by icmpInErrors.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               icmpInErrors { icmp 2 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of ICMP messages which the entity received but
+               determined as having errors (bad ICMP checksums, bad
+               length, etc.).
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+
+
+
+McCloghrie & Rose                                              [Page 42]
+
+RFC 1066                          MIB                        August 1988
+
+
+          OBJECT:
+          -------
+              icmpInDestUnreachs { icmp 3 }
+
+          Syntax:
+              Counter
+
+          Definition:
+               The number of ICMP Destination Unreachable messages
+               received.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               icmpInTimeExcds { icmp 4 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of ICMP Time Exceeded messages received.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               icmpInParmProbs { icmp 5 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of ICMP Parameter Problem messages received.
+
+          Access:
+               read-only.
+
+
+
+
+McCloghrie & Rose                                              [Page 43]
+
+RFC 1066                          MIB                        August 1988
+
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               icmpInSrcQuenchs { icmp 6 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of ICMP Source Quench messages received.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               icmpInRedirects { icmp 7 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of ICMP Redirect messages received.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               icmpInEchos { icmp 8 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of ICMP Echo (request) messages received.
+
+
+
+
+McCloghrie & Rose                                              [Page 44]
+
+RFC 1066                          MIB                        August 1988
+
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               icmpInEchoReps { icmp 9 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of ICMP Echo Reply messages received.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               icmpInTimestamps { icmp 10 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of ICMP Timestamp (request) messages received.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               icmpInTimestampReps { icmp 11 }
+
+          Syntax:
+               Counter
+
+
+
+
+McCloghrie & Rose                                              [Page 45]
+
+RFC 1066                          MIB                        August 1988
+
+
+          Definition:
+               The number of ICMP Timestamp Reply messages received.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               icmpInAddrMasks { icmp 12 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of ICMP Address Mask Request messages
+               received.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               icmpInAddrMaskReps { icmp 13 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of ICMP Address Mask Reply messages received.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               icmpOutMsgs { icmp 14 }
+
+
+
+McCloghrie & Rose                                              [Page 46]
+
+RFC 1066                          MIB                        August 1988
+
+
+          Syntax:
+               Counter
+
+          Definition:
+               The total number of ICMP messages which this entity
+               attempted to send.  Note that this counter includes all
+               those counted by icmpOutErrors.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               icmpOutErrors { icmp 15 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of ICMP messages which this entity did not
+               send due to problems discovered within ICMP such as a
+               lack of buffers.  This value should not include errors
+               discovered outside the ICMP layer such as the inability
+               of IP to route the resultant datagram.  In some
+               implementations there may be no types of error which
+               contribute to this counter's value.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               icmpOutDestUnreachs { icmp 16 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of ICMP Destination Unreachable messages sent.
+
+
+
+
+McCloghrie & Rose                                              [Page 47]
+
+RFC 1066                          MIB                        August 1988
+
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               icmpOutTimeExcds { icmp 17 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of ICMP Time Exceeded messages sent.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               icmpOutParmProbs { icmp 18 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of ICMP Parameter Problem messages sent.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               icmpOutSrcQuenchs { icmp 19 }
+
+          Syntax:
+               Counter
+
+
+
+
+McCloghrie & Rose                                              [Page 48]
+
+RFC 1066                          MIB                        August 1988
+
+
+          Definition:
+               The number of ICMP Source Quench messages sent.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               icmpOutRedirects { icmp 20 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of ICMP Redirect messages sent.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               icmpOutEchos { icmp 21 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of ICMP Echo (request) messages sent.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               icmpOutEchoReps { icmp 22 }
+
+
+
+
+McCloghrie & Rose                                              [Page 49]
+
+RFC 1066                          MIB                        August 1988
+
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of ICMP Echo Reply messages sent.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               icmpOutTimestamps { icmp 23 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of ICMP Timestamp (request) messages sent.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               icmpOutTimestampReps { icmp 24 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of ICMP Timestamp Reply messages sent.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+
+
+
+
+McCloghrie & Rose                                              [Page 50]
+
+RFC 1066                          MIB                        August 1988
+
+
+          OBJECT:
+          -------
+               icmpOutAddrMasks { icmp 25 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of ICMP Address Mask Request messages sent.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+          OBJECT:
+          -------
+               icmpOutAddrMaskReps { icmp 26 }
+
+          Syntax:
+               Counter
+
+          Definition:
+               The number of ICMP Address Mask Reply messages sent.
+
+          Access:
+               read-only.
+
+          Status:
+               mandatory.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+McCloghrie & Rose                                              [Page 51]
+
+RFC 1066                          MIB                        August 1988
+
+
+5.6.  The TCP Group
+
+   Implementation of the TCP group is mandatory for all systems
+   that implement the TCP protocol.
+
+   Note that instances of object types that represent information
+   about a particular TCP connection are transient; they persist
+   only as long as the connection in question.
+
+             OBJECT:
+             -------
+                  tcpRtoAlgorithm { tcp 1 }
+
+             Syntax:
+                  INTEGER {
+                       other(1),    -- none of the following
+                       constant(2), -- a constant rto
+                       rsre(3),     -- MIL-STD-1778, Appendix B
+                       vanj(4)      -- Van Jacobson's algorithm [11]
+                  }
+
+             Definition:
+                  The algorithm used to determine the timeout value used
+                  for retransmitting unacknowledged octets.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  tcpRtoMin { tcp 2 }
+
+             Syntax:
+                  INTEGER
+
+             Definition:
+                  The minimum value permitted by a TCP implementation
+                  for the retransmission timeout, measured in
+                  milliseconds.  More refined semantics for objects
+                  of this type depend upon the algorithm used to
+                  determine the retransmission timeout.  In particular,
+                  when the timeout algorithm is rsre(3), an object
+                  of this type has the semantics of the LBOUND
+                  quantity described in RFC 793.
+
+
+
+McCloghrie & Rose                                              [Page 52]
+
+RFC 1066                          MIB                        August 1988
+
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  tcpRtoMax { tcp 3 }
+
+             Syntax:
+                  INTEGER
+
+             Definition:
+                  The maximum value permitted by a TCP implementation
+                  for the retransmission timeout, measured
+                  in milliseconds.  More refined semantics for objects
+                  of this type depend upon the algorithm used to
+                  determine the retransmission timeout.  In particular,
+                  when the timeout algorithm is rsre(3), an object of
+                  this type has the semantics of the UBOUND quantity
+                  described in RFC 793.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  tcpMaxConn { tcp 4 }
+
+             Syntax:
+                  INTEGER
+
+             Definition:
+                  The limit on the total number of TCP connections the
+                  entity can support.  In entities where the maximum
+                  number of connections is dynamic, this object should
+                  contain the value "-1".
+
+             Access:
+                  read-only.
+
+
+
+
+
+McCloghrie & Rose                                              [Page 53]
+
+RFC 1066                          MIB                        August 1988
+
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  tcpActiveOpens { tcp 5 }
+
+             Syntax:
+                  Counter
+
+             Definition:
+                  The number of times TCP connections have made a direct
+                  transition to the SYN-SENT state from the CLOSED
+                  state.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  tcpPassiveOpens { tcp 6 }
+
+             Syntax:
+                  Counter
+
+             Definition:
+                  The number of times TCP connections have made a direct
+                  transition to the SYN-RCVD state from the LISTEN
+                  state.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  tcpAttemptFails { tcp 7 }
+
+             Syntax:
+                  Counter
+
+
+
+McCloghrie & Rose                                              [Page 54]
+
+RFC 1066                          MIB                        August 1988
+
+
+             Definition:
+                  The number of times TCP connections have made a direct
+                  transition to the CLOSED state from either the
+                  SYN-SENT state or the SYN-RCVD state, plus the number
+                  of times TCP connections have made a direct transition
+                  to the LISTEN state from the SYN-RCVD state.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  tcpEstabResets { tcp 8 }
+
+             Syntax:
+                  Counter
+
+             Definition:
+                  The number of times TCP connections have made a direct
+                  transition to the CLOSED state from either the
+                  ESTABLISHED state or the CLOSE-WAIT state.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  tcpCurrEstab { tcp 9 }
+
+             Syntax:
+                  Gauge
+
+             Definition:
+                  The number of TCP connections for which the current
+                  state is either ESTABLISHED or CLOSE-WAIT.
+
+             Access:
+                  read-only.
+
+
+
+
+
+McCloghrie & Rose                                              [Page 55]
+
+RFC 1066                          MIB                        August 1988
+
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  tcpInSegs { tcp 10 }
+
+             Syntax:
+                  Counter
+
+             Definition:
+                  The total number of segments received, including those
+                  received in error.  This count includes segments
+                  received on currently established connections.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  tcpOutSegs { tcp 11 }
+
+             Syntax:
+                  Counter
+
+             Definition:
+                  The total number of segments sent, including those on
+                  current connections but excluding those containing
+                  only retransmitted octets.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  tcpRetransSegs { tcp 12 }
+
+             Syntax:
+                  Counter
+
+
+
+McCloghrie & Rose                                              [Page 56]
+
+RFC 1066                          MIB                        August 1988
+
+
+             Definition:
+                  The total number of segments retransmitted - that is,
+                  the number of TCP segments transmitted containing one
+                  or more previously transmitted octets.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  tcpConnTable { tcp 13 }
+
+             Syntax:
+                  SEQUENCE OF TcpConnEntry
+
+             Definition:
+                  A table containing TCP connection-specific
+                  information.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  tcpConnEntry { tcpConnTable 1 }
+
+             Syntax:
+                  TcpConnEntry ::= SEQUENCE {
+                       tcpConnState
+                           INTEGER,
+                       tcpConnLocalAddress
+                           IpAddress,
+                       tcpConnLocalPort
+                           INTEGER (0..65535),
+                       tcpConnRemAddress
+                           IpAddress,
+                       tcpConnRemPort
+                           INTEGER (0..65535)
+                  }
+
+
+
+
+McCloghrie & Rose                                              [Page 57]
+
+RFC 1066                          MIB                        August 1988
+
+
+             Definition:
+                  Information about a particular current TCP connection.
+                  An object of this type is transient, in that it ceases
+                  to exist when (or soon after) the connection makes the
+                  transition to the CLOSED state.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  tcpConnState { tcpConnEntry 1 }
+
+             Syntax:
+                  INTEGER {
+                       closed(1),
+                       listen(2),
+                       synSent(3),
+                       synReceived(4),
+                       established(5),
+                       finWait1(6),
+                       finWait2(7),
+                       closeWait(8),
+                       lastAck(9),
+                       closing(10),
+                       timeWait(11)
+                  }
+
+             Definition:
+                  The state of this TCP connection.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  tcpConnLocalAddress { tcpConnEntry 2 }
+
+             Syntax:
+                  IpAddress
+
+
+
+McCloghrie & Rose                                              [Page 58]
+
+RFC 1066                          MIB                        August 1988
+
+
+             Definition:
+                  The local IP address for this TCP connection.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  tcpConnLocalPort { tcpConnEntry 3 }
+
+             Syntax:
+                  INTEGER (0..65535)
+
+             Definition:
+                  The local port number for this TCP connection.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  tcpConnRemAddress { tcpConnEntry 4 }
+
+             Syntax:
+                  IpAddress
+
+             Definition:
+                  The remote IP address for this TCP connection.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  tcpConnRemPort { tcpConnEntry 5 }
+
+
+
+
+McCloghrie & Rose                                              [Page 59]
+
+RFC 1066                          MIB                        August 1988
+
+
+             Syntax:
+                  INTEGER (0..65535)
+
+             Definition:
+                  The remote port number for this TCP connection.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+McCloghrie & Rose                                              [Page 60]
+
+RFC 1066                          MIB                        August 1988
+
+
+5.7.  The UDP Group
+
+   Implementation of the UDP group is mandatory for all systems
+   which implement the UDP protocol.
+
+             OBJECT:
+             -------
+                  udpInDatagrams { udp 1 }
+
+             Syntax:
+                  Counter
+
+             Definition:
+                  The total number of UDP datagrams delivered to UDP
+                  users.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  udpNoPorts { udp 2 }
+
+             Syntax:
+                  Counter
+
+             Definition:
+                  The total number of received UDP datagrams for which
+                  there was no application at the destination port.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  udpInErrors { udp 3 }
+
+             Syntax:
+                  Counter
+
+
+
+
+McCloghrie & Rose                                              [Page 61]
+
+RFC 1066                          MIB                        August 1988
+
+
+             Definition:
+                  The number of received UDP datagrams that could not be
+                  delivered for reasons other than the lack of an
+                  application at the destination port.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  udpOutDatagrams { udp 4 }
+
+             Syntax:
+                  Counter
+
+             Definition:
+                  The total number of UDP datagrams sent from this
+                  entity.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+McCloghrie & Rose                                              [Page 62]
+
+RFC 1066                          MIB                        August 1988
+
+
+5.8.  The EGP Group
+
+   Implementation of the EGP group is mandatory for all systems
+   which implement the EGP protocol.
+
+             OBJECT:
+             -------
+                  egpInMsgs { egp 1 }
+
+             Syntax:
+                  Counter
+
+             Definition:
+                  The number of EGP messages received without error.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  egpInErrors { egp 2 }
+
+             Syntax:
+                  Counter
+
+             Definition:
+                  The number of EGP messages received that proved to be
+                  in error.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  egpOutMsgs { egp 3 }
+
+             Syntax:
+                  Counter
+
+
+
+
+
+McCloghrie & Rose                                              [Page 63]
+
+RFC 1066                          MIB                        August 1988
+
+
+             Definition:
+                  The total number of locally generated EGP messages.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  egpOutErrors { egp 4 }
+
+             Syntax:
+                  Counter
+
+             Definition:
+                  The number of locally generated EGP messages not sent
+                  due to resource limitations within an EGP entity.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+5.8.1.  The EGP Neighbor Table
+
+   The Egp Neighbor table contains information about this entity's EGP
+   neighbors.
+
+
+             OBJECT:
+             -------
+                  egpNeighTable { egp 5 }
+
+             Syntax:
+                  SEQUENCE OF EgpNeighEntry
+
+             Definition:
+                  The EGP neighbor table.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+
+McCloghrie & Rose                                              [Page 64]
+
+RFC 1066                          MIB                        August 1988
+
+
+             OBJECT:
+             -------
+                  egpNeighEntry { egpNeighTable 1 }
+
+             Syntax:
+                  EgpNeighEntry ::= SEQUENCE {
+                       egpNeighState
+                           INTEGER,
+                       egpNeighAddr
+                           IpAddress
+                  }
+
+             Definition:
+                  Information about this entity's relationship with a
+                  particular EGP neighbor.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+             We now consider the individual components of each EGP
+             neighbor entry:
+
+
+             OBJECT:
+             -------
+                  egpNeighState { egpNeighEntry 1 }
+
+             Syntax:
+                  INTEGER {
+                       idle(1),
+                       acquisition(2),
+                       down(3),
+                       up(4),
+                       cease(5)
+                  }
+
+             Definition:
+                  The EGP state of the local system with respect to this
+                  entry's EGP neighbor.  Each EGP state is represented
+                  by a value that is one greater than the numerical
+                  value associated with said state in RFC 904.
+
+             Access:
+                  read-only.
+
+
+
+McCloghrie & Rose                                              [Page 65]
+
+RFC 1066                          MIB                        August 1988
+
+
+             Status:
+                  mandatory.
+
+
+             OBJECT:
+             -------
+                  egpNeighAddr { egpNeighEntry 2 }
+
+             Syntax:
+                  IpAddress
+
+             Definition:
+                  The IP address of this entry's EGP neighbor.
+
+             Access:
+                  read-only.
+
+             Status:
+                  mandatory.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+McCloghrie & Rose                                              [Page 66]
+
+RFC 1066                          MIB                        August 1988
+
+
+6.  Definitions
+
+                RFC1066-MIB { iso org(3) dod(6) internet(1) mgmt(2) 1 }
+
+                DEFINITIONS ::= BEGIN
+
+                IMPORTS
+                        mgmt, OBJECT-TYPE, NetworkAddress, IpAddress,
+                        Counter, Gauge, TimeTicks
+                            FROM RFC1065-SMI;
+
+                  mib        OBJECT IDENTIFIER ::= { mgmt 1 }
+
+                  system     OBJECT IDENTIFIER ::= { mib 1 }
+                  interfaces OBJECT IDENTIFIER ::= { mib 2 }
+                  at         OBJECT IDENTIFIER ::= { mib 3 }
+                  ip         OBJECT IDENTIFIER ::= { mib 4 }
+                  icmp       OBJECT IDENTIFIER ::= { mib 5 }
+                  tcp        OBJECT IDENTIFIER ::= { mib 6 }
+                  udp        OBJECT IDENTIFIER ::= { mib 7 }
+                  egp        OBJECT IDENTIFIER ::= { mib 8 }
+
+                  -- object types
+
+                  -- the System group
+
+                  sysDescr OBJECT-TYPE
+                          SYNTAX  OCTET STRING
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { system 1 }
+
+                  sysObjectID OBJECT-TYPE
+                          SYNTAX  OBJECT IDENTIFIER
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { system 2 }
+
+                  sysUpTime OBJECT-TYPE
+                          SYNTAX  TimeTicks
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { system 3 }
+
+                  -- the Interfaces group
+
+                  ifNumber OBJECT-TYPE
+                          SYNTAX  INTEGER
+
+
+
+McCloghrie & Rose                                              [Page 67]
+
+RFC 1066                          MIB                        August 1988
+
+
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { interfaces 1 }
+
+                  -- the Interfaces table
+
+                  ifTable OBJECT-TYPE
+                          SYNTAX  SEQUENCE OF IfEntry
+                          ACCESS  read-write
+                          STATUS  mandatory
+                          ::= { interfaces 2 }
+
+                  ifEntry OBJECT-TYPE
+                          SYNTAX  IfEntry
+                          ACCESS  read-write
+                          STATUS  mandatory
+                          ::= { ifTable 1 }
+
+                  IfEntry ::= SEQUENCE {
+                      ifIndex
+                          INTEGER,
+                      ifDescr
+                          OCTET STRING,
+                      ifType
+                          INTEGER,
+                      ifMtu
+                          INTEGER,
+                      ifSpeed
+                          Gauge,
+                      ifPhysAddress
+                          OCTET STRING,
+                      ifAdminStatus
+                          INTEGER,
+                      ifOperStatus
+                          INTEGER,
+                      ifLastChange
+                          TimeTicks,
+                      ifInOctets
+                          Counter,
+                      ifInUcastPkts
+                          Counter,
+                      ifInNUcastPkts
+                          Counter,
+                      ifInDiscards
+                          Counter,
+                      ifInErrors
+                          Counter,
+                      ifInUnknownProtos
+
+
+
+McCloghrie & Rose                                              [Page 68]
+
+RFC 1066                          MIB                        August 1988
+
+
+                          Counter,
+                      ifOutOctets
+                          Counter,
+                      ifOutUcastPkts
+                          Counter,
+                      ifOutNUcastPkts
+                          Counter,
+                      ifOutDiscards
+                          Counter,
+                      ifOutErrors
+                          Counter,
+                      ifOutQLen
+                          Gauge
+                  }
+
+                  ifIndex OBJECT-TYPE
+                          SYNTAX  INTEGER
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ifEntry 1 }
+
+                  ifDescr OBJECT-TYPE
+                          SYNTAX  OCTET STRING
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ifEntry 2 }
+
+                  ifType OBJECT-TYPE
+                          SYNTAX  INTEGER {
+                                  other(1),     -- none of the following
+                                  regular1822(2),
+                                  hdh1822(3),
+                                  ddn-x25(4),
+                                  rfc877-x25(5),
+                                  ethernet-csmacd(6),
+                                  iso88023-csmacd(7),
+                                  iso88024-tokenBus(8),
+                                  iso88025-tokenRing(9),
+                                  iso88026-man(10),
+                                  starLan(11),
+                                  proteon-10MBit(12),
+                                  proteon-80MBit(13),
+                                  hyperchannel(14),
+                                  fddi(15),
+                                  lapb(16),
+                                  sdlc(17),
+                                  t1-carrier(18),
+                                  cept(19),
+
+
+
+McCloghrie & Rose                                              [Page 69]
+
+RFC 1066                          MIB                        August 1988
+
+
+                                  basicIsdn(20),
+                                  primaryIsdn(21),
+                                                   -- proprietary serial
+                                  propPointToPointSerial(22)
+                              }
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ifEntry 3 }
+
+                  ifMtu OBJECT-TYPE
+                          SYNTAX  INTEGER
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ifEntry 4 }
+
+                  ifSpeed OBJECT-TYPE
+                          SYNTAX  Gauge
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ifEntry 5 }
+
+                  ifPhysAddress OBJECT-TYPE
+                          SYNTAX  OCTET STRING
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ifEntry 6 }
+
+                  ifAdminStatus OBJECT-TYPE
+                          SYNTAX  INTEGER {
+                                  up(1),        -- ready to pass packets
+                                  down(2),
+                                  testing(3)    -- in some test mode
+                                  }
+                          ACCESS  read-write
+                          STATUS  mandatory
+                          ::= { ifEntry 7 }
+
+                  ifOperStatus OBJECT-TYPE
+                          SYNTAX  INTEGER {
+                                  up(1),        -- ready to pass packets
+                                  down(2),
+                                  testing(3)    -- in some test mode
+                                  }
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ifEntry 8 }
+
+                  ifLastChange OBJECT-TYPE
+
+
+
+McCloghrie & Rose                                              [Page 70]
+
+RFC 1066                          MIB                        August 1988
+
+
+                          SYNTAX  TimeTicks
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ifEntry 9 }
+
+                  ifInOctets OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ifEntry 10 }
+
+                  ifInUcastPkts OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::=  { ifEntry 11 }
+
+                  ifInNUcastPkts OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ifEntry 12 }
+
+                  ifInDiscards OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ifEntry 13 }
+
+                  ifInErrors OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ifEntry 14 }
+
+                  ifInUnknownProtos OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ifEntry 15 }
+
+                  ifOutOctets OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ifEntry 16 }
+
+                  ifOutUcastPkts OBJECT-TYPE
+
+
+
+McCloghrie & Rose                                              [Page 71]
+
+RFC 1066                          MIB                        August 1988
+
+
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ifEntry 17 }
+
+                  ifOutNUcastPkts OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ifEntry 18 }
+
+                  ifOutDiscards OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ifEntry 19 }
+
+                  ifOutErrors OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ifEntry 20 }
+
+                  ifOutQLen OBJECT-TYPE
+                          SYNTAX  Gauge
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ifEntry 21 }
+
+                  -- the Address Translation group
+
+                  atTable OBJECT-TYPE
+                          SYNTAX  SEQUENCE OF AtEntry
+                          ACCESS  read-write
+                          STATUS  mandatory
+                          ::= { at 1 }
+
+                  atEntry OBJECT-TYPE
+                          SYNTAX  AtEntry
+                          ACCESS  read-write
+                          STATUS  mandatory
+                          ::= { atTable 1 }
+
+                  AtEntry ::= SEQUENCE {
+                      atIfIndex
+                          INTEGER,
+                      atPhysAddress
+                          OCTET STRING,
+
+
+
+McCloghrie & Rose                                              [Page 72]
+
+RFC 1066                          MIB                        August 1988
+
+
+                      atNetAddress
+                          NetworkAddress
+                  }
+
+                  atIfIndex OBJECT-TYPE
+                          SYNTAX  INTEGER
+                          ACCESS  read-write
+                          STATUS  mandatory
+                          ::= { atEntry 1 }
+
+                  atPhysAddress OBJECT-TYPE
+                          SYNTAX  OCTET STRING
+                          ACCESS  read-write
+                          STATUS  mandatory
+                          ::= { atEntry 2 }
+
+                  atNetAddress OBJECT-TYPE
+                          SYNTAX  NetworkAddress
+                          ACCESS  read-write
+                          STATUS  mandatory
+                          ::= { atEntry 3 }
+
+                  -- the IP group
+
+                  ipForwarding OBJECT-TYPE
+                          SYNTAX  INTEGER {
+                        gateway(1), -- entity forwards datagrams
+                        host(2)     -- entity does NOT forward datagrams
+                                  }
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ip 1 }
+
+                  ipDefaultTTL OBJECT-TYPE
+                          SYNTAX  INTEGER
+                          ACCESS  read-write
+                          STATUS  mandatory
+                          ::= { ip 2 }
+
+                  ipInReceives OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ip 3 }
+
+                  ipInHdrErrors OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+
+
+
+McCloghrie & Rose                                              [Page 73]
+
+RFC 1066                          MIB                        August 1988
+
+
+                          STATUS  mandatory
+                          ::= { ip 4 }
+
+                  ipInAddrErrors OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ip 5 }
+
+                  ipForwDatagrams OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ip 6 }
+
+                  ipInUnknownProtos OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ip 7 }
+
+                  ipInDiscards OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ip 8 }
+
+                  ipInDelivers OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ip 9 }
+
+                  ipOutRequests OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ip 10 }
+
+                  ipOutDiscards OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ip 11 }
+
+                  ipOutNoRoutes OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+
+
+
+McCloghrie & Rose                                              [Page 74]
+
+RFC 1066                          MIB                        August 1988
+
+
+                          STATUS  mandatory
+                          ::= { ip 12 }
+
+                  ipReasmTimeout OBJECT-TYPE
+                          SYNTAX  INTEGER
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ip 13 }
+
+                  ipReasmReqds OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ip 14 }
+
+                  ipReasmOKs OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ip 15 }
+
+                  ipReasmFails OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ip 16 }
+
+                  ipFragOKs OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ip 17 }
+
+                  ipFragFails OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ip 18 }
+
+                  ipFragCreates OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ip 19 }
+
+                  -- the IP Interface table
+
+                  ipAddrTable OBJECT-TYPE
+
+
+
+McCloghrie & Rose                                              [Page 75]
+
+RFC 1066                          MIB                        August 1988
+
+
+                          SYNTAX  SEQUENCE OF IpAddrEntry
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ip 20 }
+
+                  ipAddrEntry OBJECT-TYPE
+                          SYNTAX  IpAddrEntry
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ipAddrTable 1 }
+
+                  IpAddrEntry ::= SEQUENCE {
+                      ipAdEntAddr
+                          IpAddress,
+                      ipAdEntIfIndex
+                          INTEGER,
+                      ipAdEntNetMask
+                          IpAddress,
+                      ipAdEntBcastAddr
+                          INTEGER
+                  }
+
+                  ipAdEntAddr OBJECT-TYPE
+                          SYNTAX  IpAddress
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::=  { ipAddrEntry 1 }
+
+                  ipAdEntIfIndex OBJECT-TYPE
+                          SYNTAX  INTEGER
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::=  { ipAddrEntry 2 }
+
+                  ipAdEntNetMask OBJECT-TYPE
+                          SYNTAX  IpAddress
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::=  { ipAddrEntry 3 }
+
+                  ipAdEntBcastAddr OBJECT-TYPE
+                          SYNTAX  INTEGER
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ipAddrEntry 4 }
+
+                  -- the IP Routing table
+
+
+
+
+McCloghrie & Rose                                              [Page 76]
+
+RFC 1066                          MIB                        August 1988
+
+
+                  ipRoutingTable OBJECT-TYPE
+                          SYNTAX  SEQUENCE OF IpRouteEntry
+                          ACCESS  read-write
+                          STATUS  mandatory
+                          ::= { ip 21 }
+
+                  ipRouteEntry OBJECT-TYPE
+                          SYNTAX  IpRouteEntry
+                          ACCESS  read-write
+                          STATUS  mandatory
+                          ::= { ipRoutingTable 1 }
+
+                  IpRouteEntry ::= SEQUENCE {
+                      ipRouteDest
+                          IpAddress,
+                      ipRouteIfIndex
+                          INTEGER,
+                      ipRouteMetric1
+                          INTEGER,
+                      ipRouteMetric2
+                          INTEGER,
+                      ipRouteMetric3
+                          INTEGER,
+                      ipRouteMetric4
+                          INTEGER,
+                      ipRouteNextHop
+                          IpAddress,
+                      ipRouteType
+                          INTEGER,
+                      ipRouteProto
+                          INTEGER,
+                      ipRouteAge
+                          INTEGER
+                  }
+
+                  ipRouteDest OBJECT-TYPE
+                          SYNTAX  IpAddress
+                          ACCESS  read-write
+                          STATUS  mandatory
+                          ::= { ipRouteEntry 1 }
+
+                  ipRouteIfIndex  OBJECT-TYPE
+                          SYNTAX  INTEGER
+                          ACCESS  read-write
+                          STATUS  mandatory
+                          ::= { ipRouteEntry 2 }
+
+                  ipRouteMetric1 OBJECT-TYPE
+
+
+
+McCloghrie & Rose                                              [Page 77]
+
+RFC 1066                          MIB                        August 1988
+
+
+                          SYNTAX  INTEGER
+                          ACCESS  read-write
+                          STATUS  mandatory
+                          ::= { ipRouteEntry 3 }
+
+                  ipRouteMetric2 OBJECT-TYPE
+                          SYNTAX  INTEGER
+                          ACCESS  read-write
+                          STATUS  mandatory
+                          ::= { ipRouteEntry 4 }
+
+                  ipRouteMetric3 OBJECT-TYPE
+                          SYNTAX  INTEGER
+                          ACCESS  read-write
+                          STATUS  mandatory
+                          ::= { ipRouteEntry 5 }
+
+                  ipRouteMetric4 OBJECT-TYPE
+                          SYNTAX  INTEGER
+                          ACCESS  read-write
+                          STATUS  mandatory
+                          ::= { ipRouteEntry 6 }
+
+                  ipRouteNextHop OBJECT-TYPE
+                          SYNTAX  IpAddress
+                          ACCESS  read-write
+                          STATUS  mandatory
+                          ::= { ipRouteEntry 7 }
+
+                  ipRouteType OBJECT-TYPE
+                          SYNTAX  INTEGER {
+                            other(1),      -- none of the following
+
+                            invalid(2),    -- an invalidated route
+
+                                           -- route to directly
+                            direct(3),     -- connected (sub-)network
+
+                                           -- route to a non-local
+                            remote(4),     -- host/network/sub-network
+                              }
+                          ACCESS  read-write
+                          STATUS  mandatory
+                          ::= { ipRouteEntry 8 }
+
+                  ipRouteProto OBJECT-TYPE
+                          SYNTAX  INTEGER {
+                            other(1),     -- none of the following
+
+
+
+McCloghrie & Rose                                              [Page 78]
+
+RFC 1066                          MIB                        August 1988
+
+
+                                          -- non-protocol information
+                                          --   e.g., manually
+                            local(2),     --   configured entries
+
+                                          -- set via a network
+                            netmgmt(3),   --   management protocol
+
+                                          -- obtained via ICMP,
+                            icmp(4),      --   e.g., Redirect
+
+                                          -- the following are
+                                          -- gateway routing protocols
+                            egp(5),
+                            ggp(6),
+                            hello(7),
+                            rip(8),
+                            is-is(9),
+                            es-is(10),
+                            ciscoIgrp(11),
+                            bbnSpfIgp(12),
+                            oigp(13)
+                              }
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { ipRouteEntry 9 }
+
+                  ipRouteAge OBJECT-TYPE
+                          SYNTAX  INTEGER
+                          ACCESS  read-write
+                          STATUS  mandatory
+                          ::= { ipRouteEntry 10 }
+
+                  -- the ICMP group
+
+                  icmpInMsgs OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 1 }
+
+                  icmpInErrors OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 2 }
+
+                  icmpInDestUnreachs OBJECT-TYPE
+                          SYNTAX  Counter
+
+
+
+McCloghrie & Rose                                              [Page 79]
+
+RFC 1066                          MIB                        August 1988
+
+
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 3 }
+
+                  icmpInTimeExcds OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 4 }
+
+                  icmpInParmProbs OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 5 }
+
+                  icmpInSrcQuenchs OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 6 }
+
+                  icmpInRedirects OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 7 }
+
+                  icmpInEchos OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 8 }
+
+                  icmpInEchoReps OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 9 }
+
+                  icmpInTimestamps OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 10 }
+
+                  icmpInTimestampReps OBJECT-TYPE
+                          SYNTAX  Counter
+
+
+
+McCloghrie & Rose                                              [Page 80]
+
+RFC 1066                          MIB                        August 1988
+
+
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 11 }
+
+                  icmpInAddrMasks OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 12 }
+
+                  icmpInAddrMaskReps OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 13 }
+
+                  icmpOutMsgs OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 14 }
+
+                  icmpOutErrors OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 15 }
+
+                  icmpOutDestUnreachs OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 16 }
+
+                  icmpOutTimeExcds OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 17 }
+
+                  icmpOutParmProbs OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 18 }
+
+                  icmpOutSrcQuenchs OBJECT-TYPE
+                          SYNTAX  Counter
+
+
+
+McCloghrie & Rose                                              [Page 81]
+
+RFC 1066                          MIB                        August 1988
+
+
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 19 }
+
+                  icmpOutRedirects OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 20 }
+
+                  icmpOutEchos OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 21 }
+
+                  icmpOutEchoReps OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 22 }
+
+                  icmpOutTimestamps OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 23 }
+
+                  icmpOutTimestampReps OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 24 }
+
+                  icmpOutAddrMasks OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 25 }
+
+                  icmpOutAddrMaskReps OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { icmp 26 }
+
+                  -- the TCP group
+
+
+
+
+McCloghrie & Rose                                              [Page 82]
+
+RFC 1066                          MIB                        August 1988
+
+
+                  tcpRtoAlgorithm OBJECT-TYPE
+                          SYNTAX  INTEGER {
+                          other(1),    -- none of the following
+                          constant(2), -- a constant rto
+                          rsre(3),     -- MIL-STD-1778, Appendix B
+                          vanj(4)      -- Van Jacobson's algorithm [11]
+                                  }
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { tcp 1 }
+
+                  tcpRtoMin OBJECT-TYPE
+                          SYNTAX  INTEGER
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { tcp 2 }
+
+                  tcpRtoMax OBJECT-TYPE
+                          SYNTAX  INTEGER
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { tcp 3 }
+
+                  tcpMaxConn OBJECT-TYPE
+                          SYNTAX  INTEGER
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { tcp 4 }
+
+                  tcpActiveOpens OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { tcp 5 }
+
+                  tcpPassiveOpens OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { tcp 6 }
+
+                  tcpAttemptFails OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { tcp 7 }
+
+                  tcpEstabResets OBJECT-TYPE
+
+
+
+McCloghrie & Rose                                              [Page 83]
+
+RFC 1066                          MIB                        August 1988
+
+
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { tcp 8 }
+
+                  tcpCurrEstab OBJECT-TYPE
+                          SYNTAX  Gauge
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { tcp 9 }
+
+                  tcpInSegs OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { tcp 10 }
+
+                  tcpOutSegs OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { tcp 11 }
+
+                  tcpRetransSegs OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { tcp 12 }
+
+                  -- the TCP connections table
+
+                  tcpConnTable OBJECT-TYPE
+                          SYNTAX  SEQUENCE OF TcpConnEntry
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { tcp 13 }
+
+                  tcpConnEntry OBJECT-TYPE
+                          SYNTAX  TcpConnEntry
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { tcpConnTable 1 }
+
+                  TcpConnEntry ::= SEQUENCE {
+                      tcpConnState
+                          INTEGER,
+                      tcpConnLocalAddress
+                          IpAddress,
+
+
+
+McCloghrie & Rose                                              [Page 84]
+
+RFC 1066                          MIB                        August 1988
+
+
+                      tcpConnLocalPort
+                          INTEGER (0..65535),
+                      tcpConnRemAddress
+                          IpAddress,
+                      tcpConnRemPort
+                          INTEGER (0..65535)
+                  }
+
+                  tcpConnState OBJECT-TYPE
+                          SYNTAX  INTEGER {
+                                      closed(1),
+                                      listen(2),
+                                      synSent(3),
+                                      synReceived(4),
+                                      established(5),
+                                      finWait1(6),
+                                      finWait2(7),
+                                      closeWait(8),
+                                      lastAck(9),
+                                      closing(10),
+                                      timeWait(11)
+                                  }
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { tcpConnEntry 1 }
+
+                  tcpConnLocalAddress OBJECT-TYPE
+                          SYNTAX  IpAddress
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { tcpConnEntry 2 }
+
+                  tcpConnLocalPort OBJECT-TYPE
+                          SYNTAX  INTEGER (0..65535)
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { tcpConnEntry 3 }
+
+                  tcpConnRemAddress OBJECT-TYPE
+                          SYNTAX  IpAddress
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { tcpConnEntry 4 }
+
+                  tcpConnRemPort OBJECT-TYPE
+                          SYNTAX  INTEGER (0..65535)
+                          ACCESS  read-only
+                          STATUS  mandatory
+
+
+
+McCloghrie & Rose                                              [Page 85]
+
+RFC 1066                          MIB                        August 1988
+
+
+                          ::= { tcpConnEntry 5 }
+
+                  -- the UDP group
+
+                  udpInDatagrams OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { udp 1 }
+
+                  udpNoPorts OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { udp 2 }
+
+                  udpInErrors OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { udp 3 }
+
+                  udpOutDatagrams OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { udp 4 }
+
+                  -- the EGP group
+
+                  egpInMsgs OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { egp 1 }
+
+                  egpInErrors OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { egp 2 }
+
+                  egpOutMsgs OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { egp 3 }
+
+
+
+
+McCloghrie & Rose                                              [Page 86]
+
+RFC 1066                          MIB                        August 1988
+
+
+                  egpOutErrors OBJECT-TYPE
+                          SYNTAX  Counter
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { egp 4 }
+
+                  -- the EGP Neighbor table
+
+                  egpNeighTable OBJECT-TYPE
+                          SYNTAX  SEQUENCE OF EgpNeighEntry
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { egp 5 }
+
+                  egpNeighEntry OBJECT-TYPE
+                          SYNTAX  EgpNeighEntry
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { egpNeighTable 1 }
+
+                  EgpNeighEntry ::= SEQUENCE {
+                      egpNeighState
+                          INTEGER,
+                      egpNeighAddr
+                          IpAddress
+                  }
+
+                  egpNeighState OBJECT-TYPE
+                          SYNTAX  INTEGER {
+                                      idle(1),
+                                      acquisition(2),
+                                      down(3),
+                                      up(4),
+                                      cease(5)
+                                  }
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { egpNeighEntry 1 }
+
+                  egpNeighAddr OBJECT-TYPE
+                          SYNTAX  IpAddress
+                          ACCESS  read-only
+                          STATUS  mandatory
+                          ::= { egpNeighEntry 2 }
+
+                  END
+
+
+
+
+
+McCloghrie & Rose                                              [Page 87]
+
+RFC 1066                          MIB                        August 1988
+
+
+7.  Acknowledgements
+
+   The initial draft of this memo was heavily influenced by the the HEMS
+   [5] and SNMP [6] MIBs.
+
+   Its final form is the result of the suggestions, the dicussions, and
+   the compromises reached by the members of the IETF MIB working group:
+
+         Karl Auerbach, Epilogue Technology
+         K. Ramesh Babu, Excelan
+         Lawrence Besaw, Hewlett-Packard
+         Jeffrey D. Case, University of Tennessee at Knoxville
+         James R. Davin, Proteon
+         Mark S. Fedor, NYSERNet
+         Robb Foster, BBN
+         Phill Gross, The MITRE Corporation
+         Bent Torp Jensen, Convergent Technology
+         Lee Labarre, The MITRE Corporation
+         Dan Lynch, Advanced Computing Environments
+         Keith McCloghrie, The Wollongong Group
+         Dave Mackie, 3Com/Bridge
+         Craig Partridge, BBN (chair)
+         Jim Robertson, 3Com/Bridge
+         Marshall T. Rose, The Wollongong Group
+         Greg Satz, cisco
+         Martin Lee Schoffstall, Rensselaer Polytechnic Institute
+         Lou Steinberg, IBM
+         Dean Throop, Data General
+         Unni Warrier, Unisys
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+McCloghrie & Rose                                              [Page 88]
+
+RFC 1066                          MIB                        August 1988
+
+
+8.  References
+
+   [1]  Cerf, V., "IAB Recommendations for the Development of Internet
+        Network Management Standards", RFC 1052, IAB, April 1988.
+
+   [2]  Information processing systems - Open Systems Interconnection,
+        "Management Information Services Definition", International
+        Organization for Standardization, Draft Proposal 9595/2,
+        December 1987.
+
+   [3]  Information processing systems - Open Systems Interconnection,
+        "Management Information Protocol Specification", International
+        Organization for Standardization, Draft Proposal 9596/2,
+        December 1987.
+
+   [4]  Rose M., and K. McCloghrie, "Structure and Identification of
+        Management Information for TCP/IP-based internets", RFC 1065,
+        TWG, August 1988.
+
+   [5]  Partridge C., and G. Trewitt, "The High-Level Entity Management
+        System (HEMS)", RFCs 1021-1024, BBN and Stanford, October 1987.
+
+   [6]  Case, J., M. Fedor, M. Schoffstall, and J. Davin, "A Simple
+        Network Management Protocol", RFC 1067, University of Tennessee
+        At Knoxville, NYSERNet, Rensselaer Polytechnic, Proteon, August
+        1988.
+
+   [7]  LaBarre, L., "Structure and Identification of Management
+        Information for the Internet", Internet Engineering Task Force
+        working note, Network Information Center, SRI International,
+        Menlo Park, California, April 1988.
+
+   [8]  LaBarre, L., "Transport Layer Management Information: TCP",
+        Internet Engineering Task Force working note in preparation.
+        Network Information Center, SRI International, Menlo Park,
+        California, (unpublished).
+
+   [9]  Information processing systems - Open Systems Interconnection,
+        "Specification of Abstract Syntax Notation One (ASN.1)",
+        International Organization for Standardization, International
+        Standard 8824, December 1987.
+
+   [10] Information processing systems - Open Systems Interconnection,
+        "Specification of Basic Encoding Rules for Abstract Notation One
+        (ASN.1)", International Organization for Standardization,
+        International Standard 8825, December 1987.
+
+   [11] Jacobson, V., "Congestion Avoidance and Control", SIGCOMM, 1988,
+
+
+
+McCloghrie & Rose                                              [Page 89]
+
+RFC 1066                          MIB                        August 1988
+
+
+        Stanford, California.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+McCloghrie & Rose                                              [Page 90]
+
\ No newline at end of file
diff --git a/ext/picotcp/RFC/rfc1122.txt b/ext/picotcp/RFC/rfc1122.txt
new file mode 100644
index 0000000..c14f2e5
--- /dev/null
+++ b/ext/picotcp/RFC/rfc1122.txt
@@ -0,0 +1,6844 @@
+
+
+
+
+
+
+Network Working Group                    Internet Engineering Task Force
+Request for Comments: 1122                             R. Braden, Editor
+                                                            October 1989
+
+
+        Requirements for Internet Hosts -- Communication Layers
+
+
+Status of This Memo
+
+   This RFC is an official specification for the Internet community.  It
+   incorporates by reference, amends, corrects, and supplements the
+   primary protocol standards documents relating to hosts.  Distribution
+   of this document is unlimited.
+
+Summary
+
+   This is one RFC of a pair that defines and discusses the requirements
+   for Internet host software.  This RFC covers the communications
+   protocol layers: link layer, IP layer, and transport layer; its
+   companion RFC-1123 covers the application and support protocols.
+
+
+
+                           Table of Contents
+
+
+
+
+   1.  INTRODUCTION ...............................................    5
+      1.1  The Internet Architecture ..............................    6
+         1.1.1  Internet Hosts ....................................    6
+         1.1.2  Architectural Assumptions .........................    7
+         1.1.3  Internet Protocol Suite ...........................    8
+         1.1.4  Embedded Gateway Code .............................   10
+      1.2  General Considerations .................................   12
+         1.2.1  Continuing Internet Evolution .....................   12
+         1.2.2  Robustness Principle ..............................   12
+         1.2.3  Error Logging .....................................   13
+         1.2.4  Configuration .....................................   14
+      1.3  Reading this Document ..................................   15
+         1.3.1  Organization ......................................   15
+         1.3.2  Requirements ......................................   16
+         1.3.3  Terminology .......................................   17
+      1.4  Acknowledgments ........................................   20
+
+   2. LINK LAYER ..................................................   21
+      2.1  INTRODUCTION ...........................................   21
+
+
+
+Internet Engineering Task Force                                 [Page 1]
+
+
+
+
+RFC1122                       INTRODUCTION                  October 1989
+
+
+      2.2  PROTOCOL WALK-THROUGH ..................................   21
+      2.3  SPECIFIC ISSUES ........................................   21
+         2.3.1  Trailer Protocol Negotiation ......................   21
+         2.3.2  Address Resolution Protocol -- ARP ................   22
+            2.3.2.1  ARP Cache Validation .........................   22
+            2.3.2.2  ARP Packet Queue .............................   24
+         2.3.3  Ethernet and IEEE 802 Encapsulation ...............   24
+      2.4  LINK/INTERNET LAYER INTERFACE ..........................   25
+      2.5  LINK LAYER REQUIREMENTS SUMMARY ........................   26
+
+   3. INTERNET LAYER PROTOCOLS ....................................   27
+      3.1 INTRODUCTION ............................................   27
+      3.2  PROTOCOL WALK-THROUGH ..................................   29
+         3.2.1 Internet Protocol -- IP ............................   29
+            3.2.1.1  Version Number ...............................   29
+            3.2.1.2  Checksum .....................................   29
+            3.2.1.3  Addressing ...................................   29
+            3.2.1.4  Fragmentation and Reassembly .................   32
+            3.2.1.5  Identification ...............................   32
+            3.2.1.6  Type-of-Service ..............................   33
+            3.2.1.7  Time-to-Live .................................   34
+            3.2.1.8  Options ......................................   35
+         3.2.2 Internet Control Message Protocol -- ICMP ..........   38
+            3.2.2.1  Destination Unreachable ......................   39
+            3.2.2.2  Redirect .....................................   40
+            3.2.2.3  Source Quench ................................   41
+            3.2.2.4  Time Exceeded ................................   41
+            3.2.2.5  Parameter Problem ............................   42
+            3.2.2.6  Echo Request/Reply ...........................   42
+            3.2.2.7  Information Request/Reply ....................   43
+            3.2.2.8  Timestamp and Timestamp Reply ................   43
+            3.2.2.9  Address Mask Request/Reply ...................   45
+         3.2.3  Internet Group Management Protocol IGMP ...........   47
+      3.3  SPECIFIC ISSUES ........................................   47
+         3.3.1  Routing Outbound Datagrams ........................   47
+            3.3.1.1  Local/Remote Decision ........................   47
+            3.3.1.2  Gateway Selection ............................   48
+            3.3.1.3  Route Cache ..................................   49
+            3.3.1.4  Dead Gateway Detection .......................   51
+            3.3.1.5  New Gateway Selection ........................   55
+            3.3.1.6  Initialization ...............................   56
+         3.3.2  Reassembly ........................................   56
+         3.3.3  Fragmentation .....................................   58
+         3.3.4  Local Multihoming .................................   60
+            3.3.4.1  Introduction .................................   60
+            3.3.4.2  Multihoming Requirements .....................   61
+            3.3.4.3  Choosing a Source Address ....................   64
+         3.3.5  Source Route Forwarding ...........................   65
+
+
+
+Internet Engineering Task Force                                 [Page 2]
+
+
+
+
+RFC1122                       INTRODUCTION                  October 1989
+
+
+         3.3.6  Broadcasts ........................................   66
+         3.3.7  IP Multicasting ...................................   67
+         3.3.8  Error Reporting ...................................   69
+      3.4  INTERNET/TRANSPORT LAYER INTERFACE .....................   69
+      3.5  INTERNET LAYER REQUIREMENTS SUMMARY ....................   72
+
+   4. TRANSPORT PROTOCOLS .........................................   77
+      4.1  USER DATAGRAM PROTOCOL -- UDP ..........................   77
+         4.1.1  INTRODUCTION ......................................   77
+         4.1.2  PROTOCOL WALK-THROUGH .............................   77
+         4.1.3  SPECIFIC ISSUES ...................................   77
+            4.1.3.1  Ports ........................................   77
+            4.1.3.2  IP Options ...................................   77
+            4.1.3.3  ICMP Messages ................................   78
+            4.1.3.4  UDP Checksums ................................   78
+            4.1.3.5  UDP Multihoming ..............................   79
+            4.1.3.6  Invalid Addresses ............................   79
+         4.1.4  UDP/APPLICATION LAYER INTERFACE ...................   79
+         4.1.5  UDP REQUIREMENTS SUMMARY ..........................   80
+      4.2  TRANSMISSION CONTROL PROTOCOL -- TCP ...................   82
+         4.2.1  INTRODUCTION ......................................   82
+         4.2.2  PROTOCOL WALK-THROUGH .............................   82
+            4.2.2.1  Well-Known Ports .............................   82
+            4.2.2.2  Use of Push ..................................   82
+            4.2.2.3  Window Size ..................................   83
+            4.2.2.4  Urgent Pointer ...............................   84
+            4.2.2.5  TCP Options ..................................   85
+            4.2.2.6  Maximum Segment Size Option ..................   85
+            4.2.2.7  TCP Checksum .................................   86
+            4.2.2.8  TCP Connection State Diagram .................   86
+            4.2.2.9  Initial Sequence Number Selection ............   87
+            4.2.2.10  Simultaneous Open Attempts ..................   87
+            4.2.2.11  Recovery from Old Duplicate SYN .............   87
+            4.2.2.12  RST Segment .................................   87
+            4.2.2.13  Closing a Connection ........................   87
+            4.2.2.14  Data Communication ..........................   89
+            4.2.2.15  Retransmission Timeout ......................   90
+            4.2.2.16  Managing the Window .........................   91
+            4.2.2.17  Probing Zero Windows ........................   92
+            4.2.2.18  Passive OPEN Calls ..........................   92
+            4.2.2.19  Time to Live ................................   93
+            4.2.2.20  Event Processing ............................   93
+            4.2.2.21  Acknowledging Queued Segments ...............   94
+         4.2.3  SPECIFIC ISSUES ...................................   95
+            4.2.3.1  Retransmission Timeout Calculation ...........   95
+            4.2.3.2  When to Send an ACK Segment ..................   96
+            4.2.3.3  When to Send a Window Update .................   97
+            4.2.3.4  When to Send Data ............................   98
+
+
+
+Internet Engineering Task Force                                 [Page 3]
+
+
+
+
+RFC1122                       INTRODUCTION                  October 1989
+
+
+            4.2.3.5  TCP Connection Failures ......................  100
+            4.2.3.6  TCP Keep-Alives ..............................  101
+            4.2.3.7  TCP Multihoming ..............................  103
+            4.2.3.8  IP Options ...................................  103
+            4.2.3.9  ICMP Messages ................................  103
+            4.2.3.10  Remote Address Validation ...................  104
+            4.2.3.11  TCP Traffic Patterns ........................  104
+            4.2.3.12  Efficiency ..................................  105
+         4.2.4  TCP/APPLICATION LAYER INTERFACE ...................  106
+            4.2.4.1  Asynchronous Reports .........................  106
+            4.2.4.2  Type-of-Service ..............................  107
+            4.2.4.3  Flush Call ...................................  107
+            4.2.4.4  Multihoming ..................................  108
+         4.2.5  TCP REQUIREMENT SUMMARY ...........................  108
+
+   5.  REFERENCES .................................................  112
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Internet Engineering Task Force                                 [Page 4]
+
+
+
+
+RFC1122                       INTRODUCTION                  October 1989
+
+
+1.  INTRODUCTION
+
+   This document is one of a pair that defines and discusses the
+   requirements for host system implementations of the Internet protocol
+   suite.  This RFC covers the communication protocol layers:  link
+   layer, IP layer, and transport layer.  Its companion RFC,
+   "Requirements for Internet Hosts -- Application and Support"
+   [INTRO:1], covers the application layer protocols.  This document
+   should also be read in conjunction with "Requirements for Internet
+   Gateways" [INTRO:2].
+
+   These documents are intended to provide guidance for vendors,
+   implementors, and users of Internet communication software.  They
+   represent the consensus of a large body of technical experience and
+   wisdom, contributed by the members of the Internet research and
+   vendor communities.
+
+   This RFC enumerates standard protocols that a host connected to the
+   Internet must use, and it incorporates by reference the RFCs and
+   other documents describing the current specifications for these
+   protocols.  It corrects errors in the referenced documents and adds
+   additional discussion and guidance for an implementor.
+
+   For each protocol, this document also contains an explicit set of
+   requirements, recommendations, and options.  The reader must
+   understand that the list of requirements in this document is
+   incomplete by itself; the complete set of requirements for an
+   Internet host is primarily defined in the standard protocol
+   specification documents, with the corrections, amendments, and
+   supplements contained in this RFC.
+
+   A good-faith implementation of the protocols that was produced after
+   careful reading of the RFC's and with some interaction with the
+   Internet technical community, and that followed good communications
+   software engineering practices, should differ from the requirements
+   of this document in only minor ways.  Thus, in many cases, the
+   "requirements" in this RFC are already stated or implied in the
+   standard protocol documents, so that their inclusion here is, in a
+   sense, redundant.  However, they were included because some past
+   implementation has made the wrong choice, causing problems of
+   interoperability, performance, and/or robustness.
+
+   This document includes discussion and explanation of many of the
+   requirements and recommendations.  A simple list of requirements
+   would be dangerous, because:
+
+   o    Some required features are more important than others, and some
+        features are optional.
+
+
+
+Internet Engineering Task Force                                 [Page 5]
+
+
+
+
+RFC1122                       INTRODUCTION                  October 1989
+
+
+   o    There may be valid reasons why particular vendor products that
+        are designed for restricted contexts might choose to use
+        different specifications.
+
+   However, the specifications of this document must be followed to meet
+   the general goal of arbitrary host interoperation across the
+   diversity and complexity of the Internet system.  Although most
+   current implementations fail to meet these requirements in various
+   ways, some minor and some major, this specification is the ideal
+   towards which we need to move.
+
+   These requirements are based on the current level of Internet
+   architecture.  This document will be updated as required to provide
+   additional clarifications or to include additional information in
+   those areas in which specifications are still evolving.
+
+   This introductory section begins with a brief overview of the
+   Internet architecture as it relates to hosts, and then gives some
+   general advice to host software vendors.  Finally, there is some
+   guidance on reading the rest of the document and some terminology.
+
+   1.1  The Internet Architecture
+
+      General background and discussion on the Internet architecture and
+      supporting protocol suite can be found in the DDN Protocol
+      Handbook [INTRO:3]; for background see for example [INTRO:9],
+      [INTRO:10], and [INTRO:11].  Reference [INTRO:5] describes the
+      procedure for obtaining Internet protocol documents, while
+      [INTRO:6] contains a list of the numbers assigned within Internet
+      protocols.
+
+      1.1.1  Internet Hosts
+
+         A host computer, or simply "host," is the ultimate consumer of
+         communication services.  A host generally executes application
+         programs on behalf of user(s), employing network and/or
+         Internet communication services in support of this function.
+         An Internet host corresponds to the concept of an "End-System"
+         used in the OSI protocol suite [INTRO:13].
+
+         An Internet communication system consists of interconnected
+         packet networks supporting communication among host computers
+         using the Internet protocols.  The networks are interconnected
+         using packet-switching computers called "gateways" or "IP
+         routers" by the Internet community, and "Intermediate Systems"
+         by the OSI world [INTRO:13].  The RFC "Requirements for
+         Internet Gateways" [INTRO:2] contains the official
+         specifications for Internet gateways.  That RFC together with
+
+
+
+Internet Engineering Task Force                                 [Page 6]
+
+
+
+
+RFC1122                       INTRODUCTION                  October 1989
+
+
+         the present document and its companion [INTRO:1] define the
+         rules for the current realization of the Internet architecture.
+
+         Internet hosts span a wide range of size, speed, and function.
+         They range in size from small microprocessors through
+         workstations to mainframes and supercomputers.  In function,
+         they range from single-purpose hosts (such as terminal servers)
+         to full-service hosts that support a variety of online network
+         services, typically including remote login, file transfer, and
+         electronic mail.
+
+         A host is generally said to be multihomed if it has more than
+         one interface to the same or to different networks.  See
+         Section 1.1.3 on "Terminology".
+
+      1.1.2  Architectural Assumptions
+
+         The current Internet architecture is based on a set of
+         assumptions about the communication system.  The assumptions
+         most relevant to hosts are as follows:
+
+         (a)  The Internet is a network of networks.
+
+              Each host is directly connected to some particular
+              network(s); its connection to the Internet is only
+              conceptual.  Two hosts on the same network communicate
+              with each other using the same set of protocols that they
+              would use to communicate with hosts on distant networks.
+
+         (b)  Gateways don't keep connection state information.
+
+              To improve robustness of the communication system,
+              gateways are designed to be stateless, forwarding each IP
+              datagram independently of other datagrams.  As a result,
+              redundant paths can be exploited to provide robust service
+              in spite of failures of intervening gateways and networks.
+
+              All state information required for end-to-end flow control
+              and reliability is implemented in the hosts, in the
+              transport layer or in application programs.  All
+              connection control information is thus co-located with the
+              end points of the communication, so it will be lost only
+              if an end point fails.
+
+         (c)  Routing complexity should be in the gateways.
+
+              Routing is a complex and difficult problem, and ought to
+              be performed by the gateways, not the hosts.  An important
+
+
+
+Internet Engineering Task Force                                 [Page 7]
+
+
+
+
+RFC1122                       INTRODUCTION                  October 1989
+
+
+              objective is to insulate host software from changes caused
+              by the inevitable evolution of the Internet routing
+              architecture.
+
+         (d)  The System must tolerate wide network variation.
+
+              A basic objective of the Internet design is to tolerate a
+              wide range of network characteristics -- e.g., bandwidth,
+              delay, packet loss, packet reordering, and maximum packet
+              size.  Another objective is robustness against failure of
+              individual networks, gateways, and hosts, using whatever
+              bandwidth is still available.  Finally, the goal is full
+              "open system interconnection": an Internet host must be
+              able to interoperate robustly and effectively with any
+              other Internet host, across diverse Internet paths.
+
+              Sometimes host implementors have designed for less
+              ambitious goals.  For example, the LAN environment is
+              typically much more benign than the Internet as a whole;
+              LANs have low packet loss and delay and do not reorder
+              packets.  Some vendors have fielded host implementations
+              that are adequate for a simple LAN environment, but work
+              badly for general interoperation.  The vendor justifies
+              such a product as being economical within the restricted
+              LAN market.  However, isolated LANs seldom stay isolated
+              for long; they are soon gatewayed to each other, to
+              organization-wide internets, and eventually to the global
+              Internet system.  In the end, neither the customer nor the
+              vendor is served by incomplete or substandard Internet
+              host software.
+
+              The requirements spelled out in this document are designed
+              for a full-function Internet host, capable of full
+              interoperation over an arbitrary Internet path.
+
+
+      1.1.3  Internet Protocol Suite
+
+         To communicate using the Internet system, a host must implement
+         the layered set of protocols comprising the Internet protocol
+         suite.  A host typically must implement at least one protocol
+         from each layer.
+
+         The protocol layers used in the Internet architecture are as
+         follows [INTRO:4]:
+
+
+         o  Application Layer
+
+
+
+Internet Engineering Task Force                                 [Page 8]
+
+
+
+
+RFC1122                       INTRODUCTION                  October 1989
+
+
+              The application layer is the top layer of the Internet
+              protocol suite.  The Internet suite does not further
+              subdivide the application layer, although some of the
+              Internet application layer protocols do contain some
+              internal sub-layering.  The application layer of the
+              Internet suite essentially combines the functions of the
+              top two layers -- Presentation and Application -- of the
+              OSI reference model.
+
+              We distinguish two categories of application layer
+              protocols:  user protocols that provide service directly
+              to users, and support protocols that provide common system
+              functions.  Requirements for user and support protocols
+              will be found in the companion RFC [INTRO:1].
+
+              The most common Internet user protocols are:
+
+                o  Telnet (remote login)
+                o  FTP    (file transfer)
+                o  SMTP   (electronic mail delivery)
+
+              There are a number of other standardized user protocols
+              [INTRO:4] and many private user protocols.
+
+              Support protocols, used for host name mapping, booting,
+              and management, include SNMP, BOOTP, RARP, and the Domain
+              Name System (DNS) protocols.
+
+
+         o  Transport Layer
+
+              The transport layer provides end-to-end communication
+              services for applications.  There are two primary
+              transport layer protocols at present:
+
+                o Transmission Control Protocol (TCP)
+                o User Datagram Protocol (UDP)
+
+              TCP is a reliable connection-oriented transport service
+              that provides end-to-end reliability, resequencing, and
+              flow control.  UDP is a connectionless ("datagram")
+              transport service.
+
+              Other transport protocols have been developed by the
+              research community, and the set of official Internet
+              transport protocols may be expanded in the future.
+
+              Transport layer protocols are discussed in Chapter 4.
+
+
+
+Internet Engineering Task Force                                 [Page 9]
+
+
+
+
+RFC1122                       INTRODUCTION                  October 1989
+
+
+         o  Internet Layer
+
+              All Internet transport protocols use the Internet Protocol
+              (IP) to carry data from source host to destination host.
+              IP is a connectionless or datagram internetwork service,
+              providing no end-to-end delivery guarantees. Thus, IP
+              datagrams may arrive at the destination host damaged,
+              duplicated, out of order, or not at all.  The layers above
+              IP are responsible for reliable delivery service when it
+              is required.  The IP protocol includes provision for
+              addressing, type-of-service specification, fragmentation
+              and reassembly, and security information.
+
+              The datagram or connectionless nature of the IP protocol
+              is a fundamental and characteristic feature of the
+              Internet architecture.  Internet IP was the model for the
+              OSI Connectionless Network Protocol [INTRO:12].
+
+              ICMP is a control protocol that is considered to be an
+              integral part of IP, although it is architecturally
+              layered upon IP, i.e., it uses IP to carry its data end-
+              to-end just as a transport protocol like TCP or UDP does.
+              ICMP provides error reporting, congestion reporting, and
+              first-hop gateway redirection.
+
+              IGMP is an Internet layer protocol used for establishing
+              dynamic host groups for IP multicasting.
+
+              The Internet layer protocols IP, ICMP, and IGMP are
+              discussed in Chapter 3.
+
+
+         o  Link Layer
+
+              To communicate on its directly-connected network, a host
+              must implement the communication protocol used to
+              interface to that network.  We call this a link layer or
+              media-access layer protocol.
+
+              There is a wide variety of link layer protocols,
+              corresponding to the many different types of networks.
+              See Chapter 2.
+
+
+      1.1.4  Embedded Gateway Code
+
+         Some Internet host software includes embedded gateway
+         functionality, so that these hosts can forward packets as a
+
+
+
+Internet Engineering Task Force                                [Page 10]
+
+
+
+
+RFC1122                       INTRODUCTION                  October 1989
+
+
+         gateway would, while still performing the application layer
+         functions of a host.
+
+         Such dual-purpose systems must follow the Gateway Requirements
+         RFC [INTRO:2]  with respect to their gateway functions, and
+         must follow the present document with respect to their host
+         functions.  In all overlapping cases, the two specifications
+         should be in agreement.
+
+         There are varying opinions in the Internet community about
+         embedded gateway functionality.  The main arguments are as
+         follows:
+
+         o    Pro: in a local network environment where networking is
+              informal, or in isolated internets, it may be convenient
+              and economical to use existing host systems as gateways.
+
+              There is also an architectural argument for embedded
+              gateway functionality: multihoming is much more common
+              than originally foreseen, and multihoming forces a host to
+              make routing decisions as if it were a gateway.  If the
+              multihomed  host contains an embedded gateway, it will
+              have full routing knowledge and as a result will be able
+              to make more optimal routing decisions.
+
+         o    Con: Gateway algorithms and protocols are still changing,
+              and they will continue to change as the Internet system
+              grows larger.  Attempting to include a general gateway
+              function within the host IP layer will force host system
+              maintainers to track these (more frequent) changes.  Also,
+              a larger pool of gateway implementations will make
+              coordinating the changes more difficult.  Finally, the
+              complexity of a gateway IP layer is somewhat greater than
+              that of a host, making the implementation and operation
+              tasks more complex.
+
+              In addition, the style of operation of some hosts is not
+              appropriate for providing stable and robust gateway
+              service.
+
+         There is considerable merit in both of these viewpoints.  One
+         conclusion can be drawn: an host administrator must have
+         conscious control over whether or not a given host acts as a
+         gateway.  See Section 3.1 for the detailed requirements.
+
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 11]
+
+
+
+
+RFC1122                       INTRODUCTION                  October 1989
+
+
+   1.2  General Considerations
+
+      There are two important lessons that vendors of Internet host
+      software have learned and which a new vendor should consider
+      seriously.
+
+      1.2.1  Continuing Internet Evolution
+
+         The enormous growth of the Internet has revealed problems of
+         management and scaling in a large datagram-based packet
+         communication system.  These problems are being addressed, and
+         as a result there will be continuing evolution of the
+         specifications described in this document.  These changes will
+         be carefully planned and controlled, since there is extensive
+         participation in this planning by the vendors and by the
+         organizations responsible for operations of the networks.
+
+         Development, evolution, and revision are characteristic of
+         computer network protocols today, and this situation will
+         persist for some years.  A vendor who develops computer
+         communication software for the Internet protocol suite (or any
+         other protocol suite!) and then fails to maintain and update
+         that software for changing specifications is going to leave a
+         trail of unhappy customers.  The Internet is a large
+         communication network, and the users are in constant contact
+         through it.  Experience has shown that knowledge of
+         deficiencies in vendor software propagates quickly through the
+         Internet technical community.
+
+      1.2.2  Robustness Principle
+
+         At every layer of the protocols, there is a general rule whose
+         application can lead to enormous benefits in robustness and
+         interoperability [IP:1]:
+
+                "Be liberal in what you accept, and
+                 conservative in what you send"
+
+         Software should be written to deal with every conceivable
+         error, no matter how unlikely; sooner or later a packet will
+         come in with that particular combination of errors and
+         attributes, and unless the software is prepared, chaos can
+         ensue.  In general, it is best to assume that the network is
+         filled with malevolent entities that will send in packets
+         designed to have the worst possible effect.  This assumption
+         will lead to suitable protective design, although the most
+         serious problems in the Internet have been caused by
+         unenvisaged mechanisms triggered by low-probability events;
+
+
+
+Internet Engineering Task Force                                [Page 12]
+
+
+
+
+RFC1122                       INTRODUCTION                  October 1989
+
+
+         mere human malice would never have taken so devious a course!
+
+         Adaptability to change must be designed into all levels of
+         Internet host software.  As a simple example, consider a
+         protocol specification that contains an enumeration of values
+         for a particular header field -- e.g., a type field, a port
+         number, or an error code; this enumeration must be assumed to
+         be incomplete.  Thus, if a protocol specification defines four
+         possible error codes, the software must not break when a fifth
+         code shows up.  An undefined code might be logged (see below),
+         but it must not cause a failure.
+
+         The second part of the principle is almost as important:
+         software on other hosts may contain deficiencies that make it
+         unwise to exploit legal but obscure protocol features.  It is
+         unwise to stray far from the obvious and simple, lest untoward
+         effects result elsewhere.  A corollary of this is "watch out
+         for misbehaving hosts"; host software should be prepared, not
+         just to survive other misbehaving hosts, but also to cooperate
+         to limit the amount of disruption such hosts can cause to the
+         shared communication facility.
+
+      1.2.3  Error Logging
+
+         The Internet includes a great variety of host and gateway
+         systems, each implementing many protocols and protocol layers,
+         and some of these contain bugs and mis-features in their
+         Internet protocol software.  As a result of complexity,
+         diversity, and distribution of function, the diagnosis of
+         Internet problems is often very difficult.
+
+         Problem diagnosis will be aided if host implementations include
+         a carefully designed facility for logging erroneous or
+         "strange" protocol events.  It is important to include as much
+         diagnostic information as possible when an error is logged.  In
+         particular, it is often useful to record the header(s) of a
+         packet that caused an error.  However, care must be taken to
+         ensure that error logging does not consume prohibitive amounts
+         of resources or otherwise interfere with the operation of the
+         host.
+
+         There is a tendency for abnormal but harmless protocol events
+         to overflow error logging files; this can be avoided by using a
+         "circular" log, or by enabling logging only while diagnosing a
+         known failure.  It may be useful to filter and count duplicate
+         successive messages.  One strategy that seems to work well is:
+         (1) always count abnormalities and make such counts accessible
+         through the management protocol (see [INTRO:1]); and (2) allow
+
+
+
+Internet Engineering Task Force                                [Page 13]
+
+
+
+
+RFC1122                       INTRODUCTION                  October 1989
+
+
+         the logging of a great variety of events to be selectively
+         enabled.  For example, it might useful to be able to "log
+         everything" or to "log everything for host X".
+
+         Note that different managements may have differing policies
+         about the amount of error logging that they want normally
+         enabled in a host.  Some will say, "if it doesn't hurt me, I
+         don't want to know about it", while others will want to take a
+         more watchful and aggressive attitude about detecting and
+         removing protocol abnormalities.
+
+      1.2.4  Configuration
+
+         It would be ideal if a host implementation of the Internet
+         protocol suite could be entirely self-configuring.  This would
+         allow the whole suite to be implemented in ROM or cast into
+         silicon, it would simplify diskless workstations, and it would
+         be an immense boon to harried LAN administrators as well as
+         system vendors.  We have not reached this ideal; in fact, we
+         are not even close.
+
+         At many points in this document, you will find a requirement
+         that a parameter be a configurable option.  There are several
+         different reasons behind such requirements.  In a few cases,
+         there is current uncertainty or disagreement about the best
+         value, and it may be necessary to update the recommended value
+         in the future.  In other cases, the value really depends on
+         external factors -- e.g., the size of the host and the
+         distribution of its communication load, or the speeds and
+         topology of nearby networks -- and self-tuning algorithms are
+         unavailable and may be insufficient.  In some cases,
+         configurability is needed because of administrative
+         requirements.
+
+         Finally, some configuration options are required to communicate
+         with obsolete or incorrect implementations of the protocols,
+         distributed without sources, that unfortunately persist in many
+         parts of the Internet.  To make correct systems coexist with
+         these faulty systems, administrators often have to "mis-
+         configure" the correct systems.  This problem will correct
+         itself gradually as the faulty systems are retired, but it
+         cannot be ignored by vendors.
+
+         When we say that a parameter must be configurable, we do not
+         intend to require that its value be explicitly read from a
+         configuration file at every boot time.  We recommend that
+         implementors set up a default for each parameter, so a
+         configuration file is only necessary to override those defaults
+
+
+
+Internet Engineering Task Force                                [Page 14]
+
+
+
+
+RFC1122                       INTRODUCTION                  October 1989
+
+
+         that are inappropriate in a particular installation.  Thus, the
+         configurability requirement is an assurance that it will be
+         POSSIBLE to override the default when necessary, even in a
+         binary-only or ROM-based product.
+
+         This document requires a particular value for such defaults in
+         some cases.  The choice of default is a sensitive issue when
+         the configuration item controls the accommodation to existing
+         faulty systems.  If the Internet is to converge successfully to
+         complete interoperability, the default values built into
+         implementations must implement the official protocol, not
+         "mis-configurations" to accommodate faulty implementations.
+         Although marketing considerations have led some vendors to
+         choose mis-configuration defaults, we urge vendors to choose
+         defaults that will conform to the standard.
+
+         Finally, we note that a vendor needs to provide adequate
+         documentation on all configuration parameters, their limits and
+         effects.
+
+
+   1.3  Reading this Document
+
+      1.3.1  Organization
+
+         Protocol layering, which is generally used as an organizing
+         principle in implementing network software, has also been used
+         to organize this document.  In describing the rules, we assume
+         that an implementation does strictly mirror the layering of the
+         protocols.  Thus, the following three major sections specify
+         the requirements for the link layer, the internet layer, and
+         the transport layer, respectively.  A companion RFC [INTRO:1]
+         covers application level software.  This layerist organization
+         was chosen for simplicity and clarity.
+
+         However, strict layering is an imperfect model, both for the
+         protocol suite and for recommended implementation approaches.
+         Protocols in different layers interact in complex and sometimes
+         subtle ways, and particular functions often involve multiple
+         layers.  There are many design choices in an implementation,
+         many of which involve creative "breaking" of strict layering.
+         Every implementor is urged to read references [INTRO:7] and
+         [INTRO:8].
+
+         This document describes the conceptual service interface
+         between layers using a functional ("procedure call") notation,
+         like that used in the TCP specification [TCP:1].  A host
+         implementation must support the logical information flow
+
+
+
+Internet Engineering Task Force                                [Page 15]
+
+
+
+
+RFC1122                       INTRODUCTION                  October 1989
+
+
+         implied by these calls, but need not literally implement the
+         calls themselves.  For example, many implementations reflect
+         the coupling between the transport layer and the IP layer by
+         giving them shared access to common data structures.  These
+         data structures, rather than explicit procedure calls, are then
+         the agency for passing much of the information that is
+         required.
+
+         In general, each major section of this document is organized
+         into the following subsections:
+
+         (1)  Introduction
+
+         (2)  Protocol Walk-Through -- considers the protocol
+              specification documents section-by-section, correcting
+              errors, stating requirements that may be ambiguous or
+              ill-defined, and providing further clarification or
+              explanation.
+
+         (3)  Specific Issues -- discusses protocol design and
+              implementation issues that were not included in the walk-
+              through.
+
+         (4)  Interfaces -- discusses the service interface to the next
+              higher layer.
+
+         (5)  Summary -- contains a summary of the requirements of the
+              section.
+
+
+         Under many of the individual topics in this document, there is
+         parenthetical material labeled "DISCUSSION" or
+         "IMPLEMENTATION". This material is intended to give
+         clarification and explanation of the preceding requirements
+         text.  It also includes some suggestions on possible future
+         directions or developments.  The implementation material
+         contains suggested approaches that an implementor may want to
+         consider.
+
+         The summary sections are intended to be guides and indexes to
+         the text, but are necessarily cryptic and incomplete.  The
+         summaries should never be used or referenced separately from
+         the complete RFC.
+
+      1.3.2  Requirements
+
+         In this document, the words that are used to define the
+         significance of each particular requirement are capitalized.
+
+
+
+Internet Engineering Task Force                                [Page 16]
+
+
+
+
+RFC1122                       INTRODUCTION                  October 1989
+
+
+         These words are:
+
+         *    "MUST"
+
+              This word or the adjective "REQUIRED" means that the item
+              is an absolute requirement of the specification.
+
+         *    "SHOULD"
+
+              This word or the adjective "RECOMMENDED" means that there
+              may exist valid reasons in particular circumstances to
+              ignore this item, but the full implications should be
+              understood and the case carefully weighed before choosing
+              a different course.
+
+         *    "MAY"
+
+              This word or the adjective "OPTIONAL" means that this item
+              is truly optional.  One vendor may choose to include the
+              item because a particular marketplace requires it or
+              because it enhances the product, for example; another
+              vendor may omit the same item.
+
+
+         An implementation is not compliant if it fails to satisfy one
+         or more of the MUST requirements for the protocols it
+         implements.  An implementation that satisfies all the MUST and
+         all the SHOULD requirements for its protocols is said to be
+         "unconditionally compliant"; one that satisfies all the MUST
+         requirements but not all the SHOULD requirements for its
+         protocols is said to be "conditionally compliant".
+
+      1.3.3  Terminology
+
+         This document uses the following technical terms:
+
+         Segment
+              A segment is the unit of end-to-end transmission in the
+              TCP protocol.  A segment consists of a TCP header followed
+              by application data.  A segment is transmitted by
+              encapsulation inside an IP datagram.
+
+         Message
+              In this description of the lower-layer protocols, a
+              message is the unit of transmission in a transport layer
+              protocol.  In particular, a TCP segment is a message.  A
+              message consists of a transport protocol header followed
+              by application protocol data.  To be transmitted end-to-
+
+
+
+Internet Engineering Task Force                                [Page 17]
+
+
+
+
+RFC1122                       INTRODUCTION                  October 1989
+
+
+              end through the Internet, a message must be encapsulated
+              inside a datagram.
+
+         IP Datagram
+              An IP datagram is the unit of end-to-end transmission in
+              the IP protocol.  An IP datagram consists of an IP header
+              followed by transport layer data, i.e., of an IP header
+              followed by a message.
+
+              In the description of the internet layer (Section 3), the
+              unqualified term "datagram" should be understood to refer
+              to an IP datagram.
+
+         Packet
+              A packet is the unit of data passed across the interface
+              between the internet layer and the link layer.  It
+              includes an IP header and data.  A packet may be a
+              complete IP datagram or a fragment of an IP datagram.
+
+         Frame
+              A frame is the unit of transmission in a link layer
+              protocol, and consists of a link-layer header followed by
+              a packet.
+
+         Connected Network
+              A network to which a host is interfaced is often known as
+              the "local network" or the "subnetwork" relative to that
+              host.  However, these terms can cause confusion, and
+              therefore we use the term "connected network" in this
+              document.
+
+         Multihomed
+              A host is said to be multihomed if it has multiple IP
+              addresses.  For a discussion of multihoming, see Section
+              3.3.4 below.
+
+         Physical network interface
+              This is a physical interface to a connected network and
+              has a (possibly unique) link-layer address.  Multiple
+              physical network interfaces on a single host may share the
+              same link-layer address, but the address must be unique
+              for different hosts on the same physical network.
+
+         Logical [network] interface
+              We define a logical [network] interface to be a logical
+              path, distinguished by a unique IP address, to a connected
+              network.  See Section 3.3.4.
+
+
+
+
+Internet Engineering Task Force                                [Page 18]
+
+
+
+
+RFC1122                       INTRODUCTION                  October 1989
+
+
+         Specific-destination address
+              This is the effective destination address of a datagram,
+              even if it is broadcast or multicast; see Section 3.2.1.3.
+
+         Path
+              At a given moment, all the IP datagrams from a particular
+              source host to a particular destination host will
+              typically traverse the same sequence of gateways.  We use
+              the term "path" for this sequence.  Note that a path is
+              uni-directional; it is not unusual to have different paths
+              in the two directions between a given host pair.
+
+         MTU
+              The maximum transmission unit, i.e., the size of the
+              largest packet that can be transmitted.
+
+
+         The terms frame, packet, datagram, message, and segment are
+         illustrated by the following schematic diagrams:
+
+         A. Transmission on connected network:
+           _______________________________________________
+          | LL hdr | IP hdr |         (data)              |
+          |________|________|_____________________________|
+
+           <---------- Frame ----------------------------->
+                    <----------Packet -------------------->
+
+
+         B. Before IP fragmentation or after IP reassembly:
+                    ______________________________________
+                   | IP hdr | transport| Application Data |
+                   |________|____hdr___|__________________|
+
+                    <--------  Datagram ------------------>
+                             <-------- Message ----------->
+           or, for TCP:
+                    ______________________________________
+                   | IP hdr |  TCP hdr | Application Data |
+                   |________|__________|__________________|
+
+                    <--------  Datagram ------------------>
+                             <-------- Segment ----------->
+
+
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 19]
+
+
+
+
+RFC1122                       INTRODUCTION                  October 1989
+
+
+   1.4  Acknowledgments
+
+      This document incorporates contributions and comments from a large
+      group of Internet protocol experts, including representatives of
+      university and research labs, vendors, and government agencies.
+      It was assembled primarily by the Host Requirements Working Group
+      of the Internet Engineering Task Force (IETF).
+
+      The Editor would especially like to acknowledge the tireless
+      dedication of the following people, who attended many long
+      meetings and generated 3 million bytes of electronic mail over the
+      past 18 months in pursuit of this document: Philip Almquist, Dave
+      Borman (Cray Research), Noel Chiappa, Dave Crocker (DEC), Steve
+      Deering (Stanford), Mike Karels (Berkeley), Phil Karn (Bellcore),
+      John Lekashman (NASA), Charles Lynn (BBN), Keith McCloghrie (TWG),
+      Paul Mockapetris (ISI), Thomas Narten (Purdue), Craig Partridge
+      (BBN), Drew Perkins (CMU), and James Van Bokkelen (FTP Software).
+
+      In addition, the following people made major contributions to the
+      effort: Bill Barns (Mitre), Steve Bellovin (AT&T), Mike Brescia
+      (BBN), Ed Cain (DCA), Annette DeSchon (ISI), Martin Gross (DCA),
+      Phill Gross (NRI), Charles Hedrick (Rutgers), Van Jacobson (LBL),
+      John Klensin (MIT), Mark Lottor (SRI), Milo Medin (NASA), Bill
+      Melohn (Sun Microsystems), Greg Minshall (Kinetics), Jeff Mogul
+      (DEC), John Mullen (CMC), Jon Postel (ISI), John Romkey (Epilogue
+      Technology), and Mike StJohns (DCA).  The following also made
+      significant contributions to particular areas: Eric Allman
+      (Berkeley), Rob Austein (MIT), Art Berggreen (ACC), Keith Bostic
+      (Berkeley), Vint Cerf (NRI), Wayne Hathaway (NASA), Matt Korn
+      (IBM), Erik Naggum (Naggum Software, Norway), Robert Ullmann
+      (Prime Computer), David Waitzman (BBN), Frank Wancho (USA), Arun
+      Welch (Ohio State), Bill Westfield (Cisco), and Rayan Zachariassen
+      (Toronto).
+
+      We are grateful to all, including any contributors who may have
+      been inadvertently omitted from this list.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 20]
+
+
+
+
+RFC1122                        LINK LAYER                   October 1989
+
+
+2. LINK LAYER
+
+   2.1  INTRODUCTION
+
+      All Internet systems, both hosts and gateways, have the same
+      requirements for link layer protocols.  These requirements are
+      given in Chapter 3 of "Requirements for Internet Gateways"
+      [INTRO:2], augmented with the material in this section.
+
+   2.2  PROTOCOL WALK-THROUGH
+
+      None.
+
+   2.3  SPECIFIC ISSUES
+
+      2.3.1  Trailer Protocol Negotiation
+
+         The trailer protocol [LINK:1] for link-layer encapsulation MAY
+         be used, but only when it has been verified that both systems
+         (host or gateway) involved in the link-layer communication
+         implement trailers.  If the system does not dynamically
+         negotiate use of the trailer protocol on a per-destination
+         basis, the default configuration MUST disable the protocol.
+
+         DISCUSSION:
+              The trailer protocol is a link-layer encapsulation
+              technique that rearranges the data contents of packets
+              sent on the physical network.  In some cases, trailers
+              improve the throughput of higher layer protocols by
+              reducing the amount of data copying within the operating
+              system.  Higher layer protocols are unaware of trailer
+              use, but both the sending and receiving host MUST
+              understand the protocol if it is used.
+
+              Improper use of trailers can result in very confusing
+              symptoms.  Only packets with specific size attributes are
+              encapsulated using trailers, and typically only a small
+              fraction of the packets being exchanged have these
+              attributes.  Thus, if a system using trailers exchanges
+              packets with a system that does not, some packets
+              disappear into a black hole while others are delivered
+              successfully.
+
+         IMPLEMENTATION:
+              On an Ethernet, packets encapsulated with trailers use a
+              distinct Ethernet type [LINK:1], and trailer negotiation
+              is performed at the time that ARP is used to discover the
+              link-layer address of a destination system.
+
+
+
+Internet Engineering Task Force                                [Page 21]
+
+
+
+
+RFC1122                        LINK LAYER                   October 1989
+
+
+              Specifically, the ARP exchange is completed in the usual
+              manner using the normal IP protocol type, but a host that
+              wants to speak trailers will send an additional "trailer
+              ARP reply" packet, i.e., an ARP reply that specifies the
+              trailer encapsulation protocol type but otherwise has the
+              format of a normal ARP reply.  If a host configured to use
+              trailers receives a trailer ARP reply message from a
+              remote machine, it can add that machine to the list of
+              machines that understand trailers, e.g., by marking the
+              corresponding entry in the ARP cache.
+
+              Hosts wishing to receive trailer encapsulations send
+              trailer ARP replies whenever they complete exchanges of
+              normal ARP messages for IP.  Thus, a host that received an
+              ARP request for its IP protocol address would send a
+              trailer ARP reply in addition to the normal IP ARP reply;
+              a host that sent the IP ARP request would send a trailer
+              ARP reply when it received the corresponding IP ARP reply.
+              In this way, either the requesting or responding host in
+              an IP ARP exchange may request that it receive trailer
+              encapsulations.
+
+              This scheme, using extra trailer ARP reply packets rather
+              than sending an ARP request for the trailer protocol type,
+              was designed to avoid a continuous exchange of ARP packets
+              with a misbehaving host that, contrary to any
+              specification or common sense, responded to an ARP reply
+              for trailers with another ARP reply for IP.  This problem
+              is avoided by sending a trailer ARP reply in response to
+              an IP ARP reply only when the IP ARP reply answers an
+              outstanding request; this is true when the hardware
+              address for the host is still unknown when the IP ARP
+              reply is received.  A trailer ARP reply may always be sent
+              along with an IP ARP reply responding to an IP ARP
+              request.
+
+      2.3.2  Address Resolution Protocol -- ARP
+
+         2.3.2.1  ARP Cache Validation
+
+            An implementation of the Address Resolution Protocol (ARP)
+            [LINK:2] MUST provide a mechanism to flush out-of-date cache
+            entries.  If this mechanism involves a timeout, it SHOULD be
+            possible to configure the timeout value.
+
+            A mechanism to prevent ARP flooding (repeatedly sending an
+            ARP Request for the same IP address, at a high rate) MUST be
+            included.  The recommended maximum rate is 1 per second per
+
+
+
+Internet Engineering Task Force                                [Page 22]
+
+
+
+
+RFC1122                        LINK LAYER                   October 1989
+
+
+            destination.
+
+            DISCUSSION:
+                 The ARP specification [LINK:2] suggests but does not
+                 require a timeout mechanism to invalidate cache entries
+                 when hosts change their Ethernet addresses.  The
+                 prevalence of proxy ARP (see Section 2.4 of [INTRO:2])
+                 has significantly increased the likelihood that cache
+                 entries in hosts will become invalid, and therefore
+                 some ARP-cache invalidation mechanism is now required
+                 for hosts.  Even in the absence of proxy ARP, a long-
+                 period cache timeout is useful in order to
+                 automatically correct any bad ARP data that might have
+                 been cached.
+
+            IMPLEMENTATION:
+                 Four mechanisms have been used, sometimes in
+                 combination, to flush out-of-date cache entries.
+
+                 (1)  Timeout -- Periodically time out cache entries,
+                      even if they are in use.  Note that this timeout
+                      should be restarted when the cache entry is
+                      "refreshed" (by observing the source fields,
+                      regardless of target address, of an ARP broadcast
+                      from the system in question).  For proxy ARP
+                      situations, the timeout needs to be on the order
+                      of a minute.
+
+                 (2)  Unicast Poll -- Actively poll the remote host by
+                      periodically sending a point-to-point ARP Request
+                      to it, and delete the entry if no ARP Reply is
+                      received from N successive polls.  Again, the
+                      timeout should be on the order of a minute, and
+                      typically N is 2.
+
+                 (3)  Link-Layer Advice -- If the link-layer driver
+                      detects a delivery problem, flush the
+                      corresponding ARP cache entry.
+
+                 (4)  Higher-layer Advice -- Provide a call from the
+                      Internet layer to the link layer to indicate a
+                      delivery problem.  The effect of this call would
+                      be to invalidate the corresponding cache entry.
+                      This call would be analogous to the
+                      "ADVISE_DELIVPROB()" call from the transport layer
+                      to the Internet layer (see Section 3.4), and in
+                      fact the ADVISE_DELIVPROB routine might in turn
+                      call the link-layer advice routine to invalidate
+
+
+
+Internet Engineering Task Force                                [Page 23]
+
+
+
+
+RFC1122                        LINK LAYER                   October 1989
+
+
+                      the ARP cache entry.
+
+                 Approaches (1) and (2) involve ARP cache timeouts on
+                 the order of a minute or less.  In the absence of proxy
+                 ARP, a timeout this short could create noticeable
+                 overhead traffic on a very large Ethernet.  Therefore,
+                 it may be necessary to configure a host to lengthen the
+                 ARP cache timeout.
+
+         2.3.2.2  ARP Packet Queue
+
+            The link layer SHOULD save (rather than discard) at least
+            one (the latest) packet of each set of packets destined to
+            the same unresolved IP address, and transmit the saved
+            packet when the address has been resolved.
+
+            DISCUSSION:
+                 Failure to follow this recommendation causes the first
+                 packet of every exchange to be lost.  Although higher-
+                 layer protocols can generally cope with packet loss by
+                 retransmission, packet loss does impact performance.
+                 For example, loss of a TCP open request causes the
+                 initial round-trip time estimate to be inflated.  UDP-
+                 based applications such as the Domain Name System are
+                 more seriously affected.
+
+      2.3.3  Ethernet and IEEE 802 Encapsulation
+
+         The IP encapsulation for Ethernets is described in RFC-894
+         [LINK:3], while RFC-1042 [LINK:4] describes the IP
+         encapsulation for IEEE 802 networks.  RFC-1042 elaborates and
+         replaces the discussion in Section 3.4 of [INTRO:2].
+
+         Every Internet host connected to a 10Mbps Ethernet cable:
+
+         o    MUST be able to send and receive packets using RFC-894
+              encapsulation;
+
+         o    SHOULD be able to receive RFC-1042 packets, intermixed
+              with RFC-894 packets; and
+
+         o    MAY be able to send packets using RFC-1042 encapsulation.
+
+
+         An Internet host that implements sending both the RFC-894 and
+         the RFC-1042 encapsulations MUST provide a configuration switch
+         to select which is sent, and this switch MUST default to RFC-
+         894.
+
+
+
+Internet Engineering Task Force                                [Page 24]
+
+
+
+
+RFC1122                        LINK LAYER                   October 1989
+
+
+         Note that the standard IP encapsulation in RFC-1042 does not
+         use the protocol id value (K1=6) that IEEE reserved for IP;
+         instead, it uses a value (K1=170) that implies an extension
+         (the "SNAP") which can be used to hold the Ether-Type field.
+         An Internet system MUST NOT send 802 packets using K1=6.
+
+         Address translation from Internet addresses to link-layer
+         addresses on Ethernet and IEEE 802 networks MUST be managed by
+         the Address Resolution Protocol (ARP).
+
+         The MTU for an Ethernet is 1500 and for 802.3 is 1492.
+
+         DISCUSSION:
+              The IEEE 802.3 specification provides for operation over a
+              10Mbps Ethernet cable, in which case Ethernet and IEEE
+              802.3 frames can be physically intermixed.  A receiver can
+              distinguish Ethernet and 802.3 frames by the value of the
+              802.3 Length field; this two-octet field coincides in the
+              header with the Ether-Type field of an Ethernet frame.  In
+              particular, the 802.3 Length field must be less than or
+              equal to 1500, while all valid Ether-Type values are
+              greater than 1500.
+
+              Another compatibility problem arises with link-layer
+              broadcasts.  A broadcast sent with one framing will not be
+              seen by hosts that can receive only the other framing.
+
+              The provisions of this section were designed to provide
+              direct interoperation between 894-capable and 1042-capable
+              systems on the same cable, to the maximum extent possible.
+              It is intended to support the present situation where
+              894-only systems predominate, while providing an easy
+              transition to a possible future in which 1042-capable
+              systems become common.
+
+              Note that 894-only systems cannot interoperate directly
+              with 1042-only systems.  If the two system types are set
+              up as two different logical networks on the same cable,
+              they can communicate only through an IP gateway.
+              Furthermore, it is not useful or even possible for a
+              dual-format host to discover automatically which format to
+              send, because of the problem of link-layer broadcasts.
+
+   2.4  LINK/INTERNET LAYER INTERFACE
+
+      The packet receive interface between the IP layer and the link
+      layer MUST include a flag to indicate whether the incoming packet
+      was addressed to a link-layer broadcast address.
+
+
+
+Internet Engineering Task Force                                [Page 25]
+
+
+
+
+RFC1122                        LINK LAYER                   October 1989
+
+
+      DISCUSSION
+           Although the IP layer does not generally know link layer
+           addresses (since every different network medium typically has
+           a different address format), the broadcast address on a
+           broadcast-capable medium is an important special case.  See
+           Section 3.2.2, especially the DISCUSSION concerning broadcast
+           storms.
+
+      The packet send interface between the IP and link layers MUST
+      include the 5-bit TOS field (see Section 3.2.1.6).
+
+      The link layer MUST NOT report a Destination Unreachable error to
+      IP solely because there is no ARP cache entry for a destination.
+
+   2.5  LINK LAYER REQUIREMENTS SUMMARY
+
+                                                  |       | | | |S| |
+                                                  |       | | | |H| |F
+                                                  |       | | | |O|M|o
+                                                  |       | |S| |U|U|o
+                                                  |       | |H| |L|S|t
+                                                  |       |M|O| |D|T|n
+                                                  |       |U|U|M| | |o
+                                                  |       |S|L|A|N|N|t
+                                                  |       |T|D|Y|O|O|t
+FEATURE                                           |SECTION| | | |T|T|e
+--------------------------------------------------|-------|-|-|-|-|-|--
+                                                  |       | | | | | |
+Trailer encapsulation                             |2.3.1  | | |x| | |
+Send Trailers by default without negotiation      |2.3.1  | | | | |x|
+ARP                                               |2.3.2  | | | | | |
+  Flush out-of-date ARP cache entries             |2.3.2.1|x| | | | |
+  Prevent ARP floods                              |2.3.2.1|x| | | | |
+  Cache timeout configurable                      |2.3.2.1| |x| | | |
+  Save at least one (latest) unresolved pkt       |2.3.2.2| |x| | | |
+Ethernet and IEEE 802 Encapsulation               |2.3.3  | | | | | |
+  Host able to:                                   |2.3.3  | | | | | |
+    Send & receive RFC-894 encapsulation          |2.3.3  |x| | | | |
+    Receive RFC-1042 encapsulation                |2.3.3  | |x| | | |
+    Send RFC-1042 encapsulation                   |2.3.3  | | |x| | |
+      Then config. sw. to select, RFC-894 dflt    |2.3.3  |x| | | | |
+  Send K1=6 encapsulation                         |2.3.3  | | | | |x|
+  Use ARP on Ethernet and IEEE 802 nets           |2.3.3  |x| | | | |
+Link layer report b'casts to IP layer             |2.4    |x| | | | |
+IP layer pass TOS to link layer                   |2.4    |x| | | | |
+No ARP cache entry treated as Dest. Unreach.      |2.4    | | | | |x|
+
+
+
+
+
+Internet Engineering Task Force                                [Page 26]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+3. INTERNET LAYER PROTOCOLS
+
+   3.1 INTRODUCTION
+
+      The Robustness Principle: "Be liberal in what you accept, and
+      conservative in what you send" is particularly important in the
+      Internet layer, where one misbehaving host can deny Internet
+      service to many other hosts.
+
+      The protocol standards used in the Internet layer are:
+
+      o    RFC-791 [IP:1] defines the IP protocol and gives an
+           introduction to the architecture of the Internet.
+
+      o    RFC-792 [IP:2] defines ICMP, which provides routing,
+           diagnostic and error functionality for IP.  Although ICMP
+           messages are encapsulated within IP datagrams, ICMP
+           processing is considered to be (and is typically implemented
+           as) part of the IP layer.  See Section 3.2.2.
+
+      o    RFC-950 [IP:3] defines the mandatory subnet extension to the
+           addressing architecture.
+
+      o    RFC-1112 [IP:4] defines the Internet Group Management
+           Protocol IGMP, as part of a recommended extension to hosts
+           and to the host-gateway interface to support Internet-wide
+           multicasting at the IP level.  See Section 3.2.3.
+
+           The target of an IP multicast may be an arbitrary group of
+           Internet hosts.  IP multicasting is designed as a natural
+           extension of the link-layer multicasting facilities of some
+           networks, and it provides a standard means for local access
+           to such link-layer multicasting facilities.
+
+      Other important references are listed in Section 5 of this
+      document.
+
+      The Internet layer of host software MUST implement both IP and
+      ICMP.  See Section 3.3.7 for the requirements on support of IGMP.
+
+      The host IP layer has two basic functions:  (1) choose the "next
+      hop" gateway or host for outgoing IP datagrams and (2) reassemble
+      incoming IP datagrams.  The IP layer may also (3) implement
+      intentional fragmentation of outgoing datagrams.  Finally, the IP
+      layer must (4) provide diagnostic and error functionality.  We
+      expect that IP layer functions may increase somewhat in the
+      future, as further Internet control and management facilities are
+      developed.
+
+
+
+Internet Engineering Task Force                                [Page 27]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+      For normal datagrams, the processing is straightforward.  For
+      incoming datagrams, the IP layer:
+
+      (1)  verifies that the datagram is correctly formatted;
+
+      (2)  verifies that it is destined to the local host;
+
+      (3)  processes options;
+
+      (4)  reassembles the datagram if necessary; and
+
+      (5)  passes the encapsulated message to the appropriate
+           transport-layer protocol module.
+
+      For outgoing datagrams, the IP layer:
+
+      (1)  sets any fields not set by the transport layer;
+
+      (2)  selects the correct first hop on the connected network (a
+           process called "routing");
+
+      (3)  fragments the datagram if necessary and if intentional
+           fragmentation is implemented (see Section 3.3.3); and
+
+      (4)  passes the packet(s) to the appropriate link-layer driver.
+
+
+      A host is said to be multihomed if it has multiple IP addresses.
+      Multihoming introduces considerable confusion and complexity into
+      the protocol suite, and it is an area in which the Internet
+      architecture falls seriously short of solving all problems.  There
+      are two distinct problem areas in multihoming:
+
+      (1)  Local multihoming --  the host itself is multihomed; or
+
+      (2)  Remote multihoming -- the local host needs to communicate
+           with a remote multihomed host.
+
+      At present, remote multihoming MUST be handled at the application
+      layer, as discussed in the companion RFC [INTRO:1].  A host MAY
+      support local multihoming, which is discussed in this document,
+      and in particular in Section 3.3.4.
+
+      Any host that forwards datagrams generated by another host is
+      acting as a gateway and MUST also meet the specifications laid out
+      in the gateway requirements RFC [INTRO:2].  An Internet host that
+      includes embedded gateway code MUST have a configuration switch to
+      disable the gateway function, and this switch MUST default to the
+
+
+
+Internet Engineering Task Force                                [Page 28]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+      non-gateway mode.  In this mode, a datagram arriving through one
+      interface will not be forwarded to another host or gateway (unless
+      it is source-routed), regardless of whether the host is single-
+      homed or multihomed.  The host software MUST NOT automatically
+      move into gateway mode if the host has more than one interface, as
+      the operator of the machine may neither want to provide that
+      service nor be competent to do so.
+
+      In the following, the action specified in certain cases is to
+      "silently discard" a received datagram.  This means that the
+      datagram will be discarded without further processing and that the
+      host will not send any ICMP error message (see Section 3.2.2) as a
+      result.  However, for diagnosis of problems a host SHOULD provide
+      the capability of logging the error (see Section 1.2.3), including
+      the contents of the silently-discarded datagram, and SHOULD record
+      the event in a statistics counter.
+
+      DISCUSSION:
+           Silent discard of erroneous datagrams is generally intended
+           to prevent "broadcast storms".
+
+   3.2  PROTOCOL WALK-THROUGH
+
+      3.2.1 Internet Protocol -- IP
+
+         3.2.1.1  Version Number: RFC-791 Section 3.1
+
+            A datagram whose version number is not 4 MUST be silently
+            discarded.
+
+         3.2.1.2  Checksum: RFC-791 Section 3.1
+
+            A host MUST verify the IP header checksum on every received
+            datagram and silently discard every datagram that has a bad
+            checksum.
+
+         3.2.1.3  Addressing: RFC-791 Section 3.2
+
+            There are now five classes of IP addresses: Class A through
+            Class E.  Class D addresses are used for IP multicasting
+            [IP:4], while Class E addresses are reserved for
+            experimental use.
+
+            A multicast (Class D) address is a 28-bit logical address
+            that stands for a group of hosts, and may be either
+            permanent or transient.  Permanent multicast addresses are
+            allocated by the Internet Assigned Number Authority
+            [INTRO:6], while transient addresses may be allocated
+
+
+
+Internet Engineering Task Force                                [Page 29]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+            dynamically to transient groups.  Group membership is
+            determined dynamically using IGMP [IP:4].
+
+            We now summarize the important special cases for Class A, B,
+            and C IP addresses, using the following notation for an IP
+            address:
+
+                { <Network-number>, <Host-number> }
+
+            or
+                { <Network-number>, <Subnet-number>, <Host-number> }
+
+            and the notation "-1" for a field that contains all 1 bits.
+            This notation is not intended to imply that the 1-bits in an
+            address mask need be contiguous.
+
+            (a)  { 0, 0 }
+
+                 This host on this network.  MUST NOT be sent, except as
+                 a source address as part of an initialization procedure
+                 by which the host learns its own IP address.
+
+                 See also Section 3.3.6 for a non-standard use of {0,0}.
+
+            (b)  { 0, <Host-number> }
+
+                 Specified host on this network.  It MUST NOT be sent,
+                 except as a source address as part of an initialization
+                 procedure by which the host learns its full IP address.
+
+            (c)  { -1, -1 }
+
+                 Limited broadcast.  It MUST NOT be used as a source
+                 address.
+
+                 A datagram with this destination address will be
+                 received by every host on the connected physical
+                 network but will not be forwarded outside that network.
+
+            (d)  { <Network-number>, -1 }
+
+                 Directed broadcast to the specified network.  It MUST
+                 NOT be used as a source address.
+
+            (e)  { <Network-number>, <Subnet-number>, -1 }
+
+                 Directed broadcast to the specified subnet.  It MUST
+                 NOT be used as a source address.
+
+
+
+Internet Engineering Task Force                                [Page 30]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+            (f)  { <Network-number>, -1, -1 }
+
+                 Directed broadcast to all subnets of the specified
+                 subnetted network.  It MUST NOT be used as a source
+                 address.
+
+            (g)  { 127, <any> }
+
+                 Internal host loopback address.  Addresses of this form
+                 MUST NOT appear outside a host.
+
+            The <Network-number> is administratively assigned so that
+            its value will be unique in the entire world.
+
+            IP addresses are not permitted to have the value 0 or -1 for
+            any of the <Host-number>, <Network-number>, or <Subnet-
+            number> fields (except in the special cases listed above).
+            This implies that each of these fields will be at least two
+            bits long.
+
+            For further discussion of broadcast addresses, see Section
+            3.3.6.
+
+            A host MUST support the subnet extensions to IP [IP:3].  As
+            a result, there will be an address mask of the form:
+            {-1, -1, 0} associated with each of the host's local IP
+            addresses; see Sections 3.2.2.9 and 3.3.1.1.
+
+            When a host sends any datagram, the IP source address MUST
+            be one of its own IP addresses (but not a broadcast or
+            multicast address).
+
+            A host MUST silently discard an incoming datagram that is
+            not destined for the host.  An incoming datagram is destined
+            for the host if the datagram's destination address field is:
+
+            (1)  (one of) the host's IP address(es); or
+
+            (2)  an IP broadcast address valid for the connected
+                 network; or
+
+            (3)  the address for a multicast group of which the host is
+                 a member on the incoming physical interface.
+
+            For most purposes, a datagram addressed to a broadcast or
+            multicast destination is processed as if it had been
+            addressed to one of the host's IP addresses; we use the term
+            "specific-destination address" for the equivalent local IP
+
+
+
+Internet Engineering Task Force                                [Page 31]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+            address of the host.  The specific-destination address is
+            defined to be the destination address in the IP header
+            unless the header contains a broadcast or multicast address,
+            in which case the specific-destination is an IP address
+            assigned to the physical interface on which the datagram
+            arrived.
+
+            A host MUST silently discard an incoming datagram containing
+            an IP source address that is invalid by the rules of this
+            section.  This validation could be done in either the IP
+            layer or by each protocol in the transport layer.
+
+            DISCUSSION:
+                 A mis-addressed datagram might be caused by a link-
+                 layer broadcast of a unicast datagram or by a gateway
+                 or host that is confused or mis-configured.
+
+                 An architectural goal for Internet hosts was to allow
+                 IP addresses to be featureless 32-bit numbers, avoiding
+                 algorithms that required a knowledge of the IP address
+                 format.  Otherwise, any future change in the format or
+                 interpretation of IP addresses will require host
+                 software changes.  However, validation of broadcast and
+                 multicast addresses violates this goal; a few other
+                 violations are described elsewhere in this document.
+
+                 Implementers should be aware that applications
+                 depending upon the all-subnets directed broadcast
+                 address (f) may be unusable on some networks.  All-
+                 subnets broadcast is not widely implemented in vendor
+                 gateways at present, and even when it is implemented, a
+                 particular network administration may disable it in the
+                 gateway configuration.
+
+         3.2.1.4  Fragmentation and Reassembly: RFC-791 Section 3.2
+
+            The Internet model requires that every host support
+            reassembly.  See Sections 3.3.2 and 3.3.3 for the
+            requirements on fragmentation and reassembly.
+
+         3.2.1.5  Identification: RFC-791 Section 3.2
+
+            When sending an identical copy of an earlier datagram, a
+            host MAY optionally retain the same Identification field in
+            the copy.
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 32]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+            DISCUSSION:
+                 Some Internet protocol experts have maintained that
+                 when a host sends an identical copy of an earlier
+                 datagram, the new copy should contain the same
+                 Identification value as the original.  There are two
+                 suggested advantages:  (1) if the datagrams are
+                 fragmented and some of the fragments are lost, the
+                 receiver may be able to reconstruct a complete datagram
+                 from fragments of the original and the copies; (2) a
+                 congested gateway might use the IP Identification field
+                 (and Fragment Offset) to discard duplicate datagrams
+                 from the queue.
+
+                 However, the observed patterns of datagram loss in the
+                 Internet do not favor the probability of retransmitted
+                 fragments filling reassembly gaps, while other
+                 mechanisms (e.g., TCP repacketizing upon
+                 retransmission) tend to prevent retransmission of an
+                 identical datagram [IP:9].  Therefore, we believe that
+                 retransmitting the same Identification field is not
+                 useful.  Also, a connectionless transport protocol like
+                 UDP would require the cooperation of the application
+                 programs to retain the same Identification value in
+                 identical datagrams.
+
+         3.2.1.6  Type-of-Service: RFC-791 Section 3.2
+
+            The "Type-of-Service" byte in the IP header is divided into
+            two sections:  the Precedence field (high-order 3 bits), and
+            a field that is customarily called "Type-of-Service" or
+            "TOS" (low-order 5 bits).  In this document, all references
+            to "TOS" or the "TOS field" refer to the low-order 5 bits
+            only.
+
+            The Precedence field is intended for Department of Defense
+            applications of the Internet protocols.  The use of non-zero
+            values in this field is outside the scope of this document
+            and the IP standard specification.  Vendors should consult
+            the Defense Communication Agency (DCA) for guidance on the
+            IP Precedence field and its implications for other protocol
+            layers.  However, vendors should note that the use of
+            precedence will most likely require that its value be passed
+            between protocol layers in just the same way as the TOS
+            field is passed.
+
+            The IP layer MUST provide a means for the transport layer to
+            set the TOS field of every datagram that is sent; the
+            default is all zero bits.  The IP layer SHOULD pass received
+
+
+
+Internet Engineering Task Force                                [Page 33]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+            TOS values up to the transport layer.
+
+            The particular link-layer mappings of TOS contained in RFC-
+            795 SHOULD NOT be implemented.
+
+            DISCUSSION:
+                 While the TOS field has been little used in the past,
+                 it is expected to play an increasing role in the near
+                 future.  The TOS field is expected to be used to
+                 control two aspects of gateway operations: routing and
+                 queueing algorithms.  See Section 2 of [INTRO:1] for
+                 the requirements on application programs to specify TOS
+                 values.
+
+                 The TOS field may also be mapped into link-layer
+                 service selectors.  This has been applied to provide
+                 effective sharing of serial lines by different classes
+                 of TCP traffic, for example.  However, the mappings
+                 suggested in RFC-795 for networks that were included in
+                 the Internet as of 1981 are now obsolete.
+
+         3.2.1.7  Time-to-Live: RFC-791 Section 3.2
+
+            A host MUST NOT send a datagram with a Time-to-Live (TTL)
+            value of zero.
+
+            A host MUST NOT discard a datagram just because it was
+            received with TTL less than 2.
+
+            The IP layer MUST provide a means for the transport layer to
+            set the TTL field of every datagram that is sent.  When a
+            fixed TTL value is used, it MUST be configurable.  The
+            current suggested value will be published in the "Assigned
+            Numbers" RFC.
+
+            DISCUSSION:
+                 The TTL field has two functions: limit the lifetime of
+                 TCP segments (see RFC-793 [TCP:1], p. 28), and
+                 terminate Internet routing loops.  Although TTL is a
+                 time in seconds, it also has some attributes of a hop-
+                 count, since each gateway is required to reduce the TTL
+                 field by at least one.
+
+                 The intent is that TTL expiration will cause a datagram
+                 to be discarded by a gateway but not by the destination
+                 host; however, hosts that act as gateways by forwarding
+                 datagrams must follow the gateway rules for TTL.
+
+
+
+
+Internet Engineering Task Force                                [Page 34]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+                 A higher-layer protocol may want to set the TTL in
+                 order to implement an "expanding scope" search for some
+                 Internet resource.  This is used by some diagnostic
+                 tools, and is expected to be useful for locating the
+                 "nearest" server of a given class using IP
+                 multicasting, for example.  A particular transport
+                 protocol may also want to specify its own TTL bound on
+                 maximum datagram lifetime.
+
+                 A fixed value must be at least big enough for the
+                 Internet "diameter," i.e., the longest possible path.
+                 A reasonable value is about twice the diameter, to
+                 allow for continued Internet growth.
+
+         3.2.1.8  Options: RFC-791 Section 3.2
+
+            There MUST be a means for the transport layer to specify IP
+            options to be included in transmitted IP datagrams (see
+            Section 3.4).
+
+            All IP options (except NOP or END-OF-LIST) received in
+            datagrams MUST be passed to the transport layer (or to ICMP
+            processing when the datagram is an ICMP message).  The IP
+            and transport layer MUST each interpret those IP options
+            that they understand and silently ignore the others.
+
+            Later sections of this document discuss specific IP option
+            support required by each of ICMP, TCP, and UDP.
+
+            DISCUSSION:
+                 Passing all received IP options to the transport layer
+                 is a deliberate "violation of strict layering" that is
+                 designed to ease the introduction of new transport-
+                 relevant IP options in the future.  Each layer must
+                 pick out any options that are relevant to its own
+                 processing and ignore the rest.  For this purpose,
+                 every IP option except NOP and END-OF-LIST will include
+                 a specification of its own length.
+
+                 This document does not define the order in which a
+                 receiver must process multiple options in the same IP
+                 header.  Hosts sending multiple options must be aware
+                 that this introduces an ambiguity in the meaning of
+                 certain options when combined with a source-route
+                 option.
+
+            IMPLEMENTATION:
+                 The IP layer must not crash as the result of an option
+
+
+
+Internet Engineering Task Force                                [Page 35]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+                 length that is outside the possible range.  For
+                 example, erroneous option lengths have been observed to
+                 put some IP implementations into infinite loops.
+
+            Here are the requirements for specific IP options:
+
+
+            (a)  Security Option
+
+                 Some environments require the Security option in every
+                 datagram; such a requirement is outside the scope of
+                 this document and the IP standard specification.  Note,
+                 however, that the security options described in RFC-791
+                 and RFC-1038 are obsolete.  For DoD applications,
+                 vendors should consult [IP:8] for guidance.
+
+
+            (b)  Stream Identifier Option
+
+                 This option is obsolete; it SHOULD NOT be sent, and it
+                 MUST be silently ignored if received.
+
+
+            (c)  Source Route Options
+
+                 A host MUST support originating a source route and MUST
+                 be able to act as the final destination of a source
+                 route.
+
+                 If host receives a datagram containing a completed
+                 source route (i.e., the pointer points beyond the last
+                 field), the datagram has reached its final destination;
+                 the option as received (the recorded route) MUST be
+                 passed up to the transport layer (or to ICMP message
+                 processing).  This recorded route will be reversed and
+                 used to form a return source route for reply datagrams
+                 (see discussion of IP Options in Section 4).  When a
+                 return source route is built, it MUST be correctly
+                 formed even if the recorded route included the source
+                 host (see case (B) in the discussion below).
+
+                 An IP header containing more than one Source Route
+                 option MUST NOT be sent; the effect on routing of
+                 multiple Source Route options is implementation-
+                 specific.
+
+                 Section 3.3.5 presents the rules for a host acting as
+                 an intermediate hop in a source route, i.e., forwarding
+
+
+
+Internet Engineering Task Force                                [Page 36]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+                 a source-routed datagram.
+
+                 DISCUSSION:
+                      If a source-routed datagram is fragmented, each
+                      fragment will contain a copy of the source route.
+                      Since the processing of IP options (including a
+                      source route) must precede reassembly, the
+                      original datagram will not be reassembled until
+                      the final destination is reached.
+
+                      Suppose a source routed datagram is to be routed
+                      from host S to host D via gateways G1, G2, ... Gn.
+                      There was an ambiguity in the specification over
+                      whether the source route option in a datagram sent
+                      out by S should be (A) or (B):
+
+                          (A):  {>>G2, G3, ... Gn, D}     <--- CORRECT
+
+                          (B):  {S, >>G2, G3, ... Gn, D}  <---- WRONG
+
+                      (where >> represents the pointer).  If (A) is
+                      sent, the datagram received at D will contain the
+                      option: {G1, G2, ... Gn >>}, with S and D as the
+                      IP source and destination addresses.  If (B) were
+                      sent, the datagram received at D would again
+                      contain S and D as the same IP source and
+                      destination addresses, but the option would be:
+                      {S, G1, ...Gn >>}; i.e., the originating host
+                      would be the first hop in the route.
+
+
+            (d)  Record Route Option
+
+                 Implementation of originating and processing the Record
+                 Route option is OPTIONAL.
+
+
+            (e)  Timestamp Option
+
+                 Implementation of originating and processing the
+                 Timestamp option is OPTIONAL.  If it is implemented,
+                 the following rules apply:
+
+                 o    The originating host MUST record a timestamp in a
+                      Timestamp option whose Internet address fields are
+                      not pre-specified or whose first pre-specified
+                      address is the host's interface address.
+
+
+
+
+Internet Engineering Task Force                                [Page 37]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+                 o    The destination host MUST (if possible) add the
+                      current timestamp to a Timestamp option before
+                      passing the option to the transport layer or to
+                      ICMP for processing.
+
+                 o    A timestamp value MUST follow the rules given in
+                      Section 3.2.2.8 for the ICMP Timestamp message.
+
+
+      3.2.2 Internet Control Message Protocol -- ICMP
+
+         ICMP messages are grouped into two classes.
+
+         *
+              ICMP error messages:
+
+               Destination Unreachable   (see Section 3.2.2.1)
+               Redirect                  (see Section 3.2.2.2)
+               Source Quench             (see Section 3.2.2.3)
+               Time Exceeded             (see Section 3.2.2.4)
+               Parameter Problem         (see Section 3.2.2.5)
+
+
+         *
+              ICMP query messages:
+
+                Echo                     (see Section 3.2.2.6)
+                Information              (see Section 3.2.2.7)
+                Timestamp                (see Section 3.2.2.8)
+                Address Mask             (see Section 3.2.2.9)
+
+
+         If an ICMP message of unknown type is received, it MUST be
+         silently discarded.
+
+         Every ICMP error message includes the Internet header and at
+         least the first 8 data octets of the datagram that triggered
+         the error; more than 8 octets MAY be sent; this header and data
+         MUST be unchanged from the received datagram.
+
+         In those cases where the Internet layer is required to pass an
+         ICMP error message to the transport layer, the IP protocol
+         number MUST be extracted from the original header and used to
+         select the appropriate transport protocol entity to handle the
+         error.
+
+         An ICMP error message SHOULD be sent with normal (i.e., zero)
+         TOS bits.
+
+
+
+Internet Engineering Task Force                                [Page 38]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+         An ICMP error message MUST NOT be sent as the result of
+         receiving:
+
+         *    an ICMP error message, or
+
+         *    a datagram destined to an IP broadcast or IP multicast
+              address, or
+
+         *    a datagram sent as a link-layer broadcast, or
+
+         *    a non-initial fragment, or
+
+         *    a datagram whose source address does not define a single
+              host -- e.g., a zero address, a loopback address, a
+              broadcast address, a multicast address, or a Class E
+              address.
+
+         NOTE: THESE RESTRICTIONS TAKE PRECEDENCE OVER ANY REQUIREMENT
+         ELSEWHERE IN THIS DOCUMENT FOR SENDING ICMP ERROR MESSAGES.
+
+         DISCUSSION:
+              These rules will prevent the "broadcast storms" that have
+              resulted from hosts returning ICMP error messages in
+              response to broadcast datagrams.  For example, a broadcast
+              UDP segment to a non-existent port could trigger a flood
+              of ICMP Destination Unreachable datagrams from all
+              machines that do not have a client for that destination
+              port.  On a large Ethernet, the resulting collisions can
+              render the network useless for a second or more.
+
+              Every datagram that is broadcast on the connected network
+              should have a valid IP broadcast address as its IP
+              destination (see Section 3.3.6).  However, some hosts
+              violate this rule.  To be certain to detect broadcast
+              datagrams, therefore, hosts are required to check for a
+              link-layer broadcast as well as an IP-layer broadcast
+              address.
+
+         IMPLEMENTATION:
+              This requires that the link layer inform the IP layer when
+              a link-layer broadcast datagram has been received; see
+              Section 2.4.
+
+         3.2.2.1  Destination Unreachable: RFC-792
+
+            The following additional codes are hereby defined:
+
+                    6 = destination network unknown
+
+
+
+Internet Engineering Task Force                                [Page 39]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+                    7 = destination host unknown
+
+                    8 = source host isolated
+
+                    9 = communication with destination network
+                            administratively prohibited
+
+                   10 = communication with destination host
+                            administratively prohibited
+
+                   11 = network unreachable for type of service
+
+                   12 = host unreachable for type of service
+
+            A host SHOULD generate Destination Unreachable messages with
+            code:
+
+            2    (Protocol Unreachable), when the designated transport
+                 protocol is not supported; or
+
+            3    (Port Unreachable), when the designated transport
+                 protocol (e.g., UDP) is unable to demultiplex the
+                 datagram but has no protocol mechanism to inform the
+                 sender.
+
+            A Destination Unreachable message that is received MUST be
+            reported to the transport layer.  The transport layer SHOULD
+            use the information appropriately; for example, see Sections
+            4.1.3.3, 4.2.3.9, and 4.2.4 below.  A transport protocol
+            that has its own mechanism for notifying the sender that a
+            port is unreachable (e.g., TCP, which sends RST segments)
+            MUST nevertheless accept an ICMP Port Unreachable for the
+            same purpose.
+
+            A Destination Unreachable message that is received with code
+            0 (Net), 1 (Host), or 5 (Bad Source Route) may result from a
+            routing transient and MUST therefore be interpreted as only
+            a hint, not proof, that the specified destination is
+            unreachable [IP:11].  For example, it MUST NOT be used as
+            proof of a dead gateway (see Section 3.3.1).
+
+         3.2.2.2  Redirect: RFC-792
+
+            A host SHOULD NOT send an ICMP Redirect message; Redirects
+            are to be sent only by gateways.
+
+            A host receiving a Redirect message MUST update its routing
+            information accordingly.  Every host MUST be prepared to
+
+
+
+Internet Engineering Task Force                                [Page 40]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+            accept both Host and Network Redirects and to process them
+            as described in Section 3.3.1.2 below.
+
+            A Redirect message SHOULD be silently discarded if the new
+            gateway address it specifies is not on the same connected
+            (sub-) net through which the Redirect arrived [INTRO:2,
+            Appendix A], or if the source of the Redirect is not the
+            current first-hop gateway for the specified destination (see
+            Section 3.3.1).
+
+         3.2.2.3  Source Quench: RFC-792
+
+            A host MAY send a Source Quench message if it is
+            approaching, or has reached, the point at which it is forced
+            to discard incoming datagrams due to a shortage of
+            reassembly buffers or other resources.  See Section 2.2.3 of
+            [INTRO:2] for suggestions on when to send Source Quench.
+
+            If a Source Quench message is received, the IP layer MUST
+            report it to the transport layer (or ICMP processing). In
+            general, the transport or application layer SHOULD implement
+            a mechanism to respond to Source Quench for any protocol
+            that can send a sequence of datagrams to the same
+            destination and which can reasonably be expected to maintain
+            enough state information to make this feasible.  See Section
+            4 for the handling of Source Quench by TCP and UDP.
+
+            DISCUSSION:
+                 A Source Quench may be generated by the target host or
+                 by some gateway in the path of a datagram.  The host
+                 receiving a Source Quench should throttle itself back
+                 for a period of time, then gradually increase the
+                 transmission rate again.  The mechanism to respond to
+                 Source Quench may be in the transport layer (for
+                 connection-oriented protocols like TCP) or in the
+                 application layer (for protocols that are built on top
+                 of UDP).
+
+                 A mechanism has been proposed [IP:14] to make the IP
+                 layer respond directly to Source Quench by controlling
+                 the rate at which datagrams are sent, however, this
+                 proposal is currently experimental and not currently
+                 recommended.
+
+         3.2.2.4  Time Exceeded: RFC-792
+
+            An incoming Time Exceeded message MUST be passed to the
+            transport layer.
+
+
+
+Internet Engineering Task Force                                [Page 41]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+            DISCUSSION:
+                 A gateway will send a Time Exceeded Code 0 (In Transit)
+                 message when it discards a datagram due to an expired
+                 TTL field.  This indicates either a gateway routing
+                 loop or too small an initial TTL value.
+
+                 A host may receive a Time Exceeded Code 1 (Reassembly
+                 Timeout) message from a destination host that has timed
+                 out and discarded an incomplete datagram; see Section
+                 3.3.2 below.  In the future, receipt of this message
+                 might be part of some "MTU discovery" procedure, to
+                 discover the maximum datagram size that can be sent on
+                 the path without fragmentation.
+
+         3.2.2.5  Parameter Problem: RFC-792
+
+            A host SHOULD generate Parameter Problem messages.  An
+            incoming Parameter Problem message MUST be passed to the
+            transport layer, and it MAY be reported to the user.
+
+            DISCUSSION:
+                 The ICMP Parameter Problem message is sent to the
+                 source host for any problem not specifically covered by
+                 another ICMP message.  Receipt of a Parameter Problem
+                 message generally indicates some local or remote
+                 implementation error.
+
+            A new variant on the Parameter Problem message is hereby
+            defined:
+              Code 1 = required option is missing.
+
+            DISCUSSION:
+                 This variant is currently in use in the military
+                 community for a missing security option.
+
+         3.2.2.6  Echo Request/Reply: RFC-792
+
+            Every host MUST implement an ICMP Echo server function that
+            receives Echo Requests and sends corresponding Echo Replies.
+            A host SHOULD also implement an application-layer interface
+            for sending an Echo Request and receiving an Echo Reply, for
+            diagnostic purposes.
+
+            An ICMP Echo Request destined to an IP broadcast or IP
+            multicast address MAY be silently discarded.
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 42]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+            DISCUSSION:
+                 This neutral provision results from a passionate debate
+                 between those who feel that ICMP Echo to a broadcast
+                 address provides a valuable diagnostic capability and
+                 those who feel that misuse of this feature can too
+                 easily create packet storms.
+
+            The IP source address in an ICMP Echo Reply MUST be the same
+            as the specific-destination address (defined in Section
+            3.2.1.3) of the corresponding ICMP Echo Request message.
+
+            Data received in an ICMP Echo Request MUST be entirely
+            included in the resulting Echo Reply.  However, if sending
+            the Echo Reply requires intentional fragmentation that is
+            not implemented, the datagram MUST be truncated to maximum
+            transmission size (see Section 3.3.3) and sent.
+
+            Echo Reply messages MUST be passed to the ICMP user
+            interface, unless the corresponding Echo Request originated
+            in the IP layer.
+
+            If a Record Route and/or Time Stamp option is received in an
+            ICMP Echo Request, this option (these options) SHOULD be
+            updated to include the current host and included in the IP
+            header of the Echo Reply message, without "truncation".
+            Thus, the recorded route will be for the entire round trip.
+
+            If a Source Route option is received in an ICMP Echo
+            Request, the return route MUST be reversed and used as a
+            Source Route option for the Echo Reply message.
+
+         3.2.2.7  Information Request/Reply: RFC-792
+
+            A host SHOULD NOT implement these messages.
+
+            DISCUSSION:
+                 The Information Request/Reply pair was intended to
+                 support self-configuring systems such as diskless
+                 workstations, to allow them to discover their IP
+                 network numbers at boot time.  However, the RARP and
+                 BOOTP protocols provide better mechanisms for a host to
+                 discover its own IP address.
+
+         3.2.2.8  Timestamp and Timestamp Reply: RFC-792
+
+            A host MAY implement Timestamp and Timestamp Reply.  If they
+            are implemented, the following rules MUST be followed.
+
+
+
+
+Internet Engineering Task Force                                [Page 43]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+            o    The ICMP Timestamp server function returns a Timestamp
+                 Reply to every Timestamp message that is received.  If
+                 this function is implemented, it SHOULD be designed for
+                 minimum variability in delay (e.g., implemented in the
+                 kernel to avoid delay in scheduling a user process).
+
+            The following cases for Timestamp are to be handled
+            according to the corresponding rules for ICMP Echo:
+
+            o    An ICMP Timestamp Request message to an IP broadcast or
+                 IP multicast address MAY be silently discarded.
+
+            o    The IP source address in an ICMP Timestamp Reply MUST
+                 be the same as the specific-destination address of the
+                 corresponding Timestamp Request message.
+
+            o    If a Source-route option is received in an ICMP Echo
+                 Request, the return route MUST be reversed and used as
+                 a Source Route option for the Timestamp Reply message.
+
+            o    If a Record Route and/or Timestamp option is received
+                 in a Timestamp Request, this (these) option(s) SHOULD
+                 be updated to include the current host and included in
+                 the IP header of the Timestamp Reply message.
+
+            o    Incoming Timestamp Reply messages MUST be passed up to
+                 the ICMP user interface.
+
+            The preferred form for a timestamp value (the "standard
+            value") is in units of milliseconds since midnight Universal
+            Time.  However, it may be difficult to provide this value
+            with millisecond resolution.  For example, many systems use
+            clocks that update only at line frequency, 50 or 60 times
+            per second.  Therefore, some latitude is allowed in a
+            "standard value":
+
+            (a)  A "standard value" MUST be updated at least 15 times
+                 per second (i.e., at most the six low-order bits of the
+                 value may be undefined).
+
+            (b)  The accuracy of a "standard value" MUST approximate
+                 that of operator-set CPU clocks, i.e., correct within a
+                 few minutes.
+
+
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 44]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+         3.2.2.9  Address Mask Request/Reply: RFC-950
+
+            A host MUST support the first, and MAY implement all three,
+            of the following methods for determining the address mask(s)
+            corresponding to its IP address(es):
+
+            (1)  static configuration information;
+
+            (2)  obtaining the address mask(s) dynamically as a side-
+                 effect of the system initialization process (see
+                 [INTRO:1]); and
+
+            (3)  sending ICMP Address Mask Request(s) and receiving ICMP
+                 Address Mask Reply(s).
+
+            The choice of method to be used in a particular host MUST be
+            configurable.
+
+            When method (3), the use of Address Mask messages, is
+            enabled, then:
+
+            (a)  When it initializes, the host MUST broadcast an Address
+                 Mask Request message on the connected network
+                 corresponding to the IP address.  It MUST retransmit
+                 this message a small number of times if it does not
+                 receive an immediate Address Mask Reply.
+
+            (b)  Until it has received an Address Mask Reply, the host
+                 SHOULD assume a mask appropriate for the address class
+                 of the IP address, i.e., assume that the connected
+                 network is not subnetted.
+
+            (c)  The first Address Mask Reply message received MUST be
+                 used to set the address mask corresponding to the
+                 particular local IP address.  This is true even if the
+                 first Address Mask Reply message is "unsolicited", in
+                 which case it will have been broadcast and may arrive
+                 after the host has ceased to retransmit Address Mask
+                 Requests.  Once the mask has been set by an Address
+                 Mask Reply, later Address Mask Reply messages MUST be
+                 (silently) ignored.
+
+            Conversely, if Address Mask messages are disabled, then no
+            ICMP Address Mask Requests will be sent, and any ICMP
+            Address Mask Replies received for that local IP address MUST
+            be (silently) ignored.
+
+            A host SHOULD make some reasonableness check on any address
+
+
+
+Internet Engineering Task Force                                [Page 45]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+            mask it installs; see IMPLEMENTATION section below.
+
+            A system MUST NOT send an Address Mask Reply unless it is an
+            authoritative agent for address masks.  An authoritative
+            agent may be a host or a gateway, but it MUST be explicitly
+            configured as a address mask agent.  Receiving an address
+            mask via an Address Mask Reply does not give the receiver
+            authority and MUST NOT be used as the basis for issuing
+            Address Mask Replies.
+
+            With a statically configured address mask, there SHOULD be
+            an additional configuration flag that determines whether the
+            host is to act as an authoritative agent for this mask,
+            i.e., whether it will answer Address Mask Request messages
+            using this mask.
+
+            If it is configured as an agent, the host MUST broadcast an
+            Address Mask Reply for the mask on the appropriate interface
+            when it initializes.
+
+            See "System Initialization" in [INTRO:1] for more
+            information about the use of Address Mask Request/Reply
+            messages.
+
+            DISCUSSION
+                 Hosts that casually send Address Mask Replies with
+                 invalid address masks have often been a serious
+                 nuisance.  To prevent this, Address Mask Replies ought
+                 to be sent only by authoritative agents that have been
+                 selected by explicit administrative action.
+
+                 When an authoritative agent receives an Address Mask
+                 Request message, it will send a unicast Address Mask
+                 Reply to the source IP address.  If the network part of
+                 this address is zero (see (a) and (b) in 3.2.1.3), the
+                 Reply will be broadcast.
+
+                 Getting no reply to its Address Mask Request messages,
+                 a host will assume there is no agent and use an
+                 unsubnetted mask, but the agent may be only temporarily
+                 unreachable.  An agent will broadcast an unsolicited
+                 Address Mask Reply whenever it initializes, in order to
+                 update the masks of all hosts that have initialized in
+                 the meantime.
+
+            IMPLEMENTATION:
+                 The following reasonableness check on an address mask
+                 is suggested: the mask is not all 1 bits, and it is
+
+
+
+Internet Engineering Task Force                                [Page 46]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+                 either zero or else the 8 highest-order bits are on.
+
+      3.2.3  Internet Group Management Protocol IGMP
+
+         IGMP [IP:4] is a protocol used between hosts and gateways on a
+         single network to establish hosts' membership in particular
+         multicast groups.  The gateways use this information, in
+         conjunction with a multicast routing protocol, to support IP
+         multicasting across the Internet.
+
+         At this time, implementation of IGMP is OPTIONAL; see Section
+         3.3.7 for more information.  Without IGMP, a host can still
+         participate in multicasting local to its connected networks.
+
+   3.3  SPECIFIC ISSUES
+
+      3.3.1  Routing Outbound Datagrams
+
+         The IP layer chooses the correct next hop for each datagram it
+         sends.  If the destination is on a connected network, the
+         datagram is sent directly to the destination host; otherwise,
+         it has to be routed to a gateway on a connected network.
+
+         3.3.1.1  Local/Remote Decision
+
+            To decide if the destination is on a connected network, the
+            following algorithm MUST be used [see IP:3]:
+
+            (a)  The address mask (particular to a local IP address for
+                 a multihomed host) is a 32-bit mask that selects the
+                 network number and subnet number fields of the
+                 corresponding IP address.
+
+            (b)  If the IP destination address bits extracted by the
+                 address mask match the IP source address bits extracted
+                 by the same mask, then the destination is on the
+                 corresponding connected network, and the datagram is to
+                 be transmitted directly to the destination host.
+
+            (c)  If not, then the destination is accessible only through
+                 a gateway.  Selection of a gateway is described below
+                 (3.3.1.2).
+
+            A special-case destination address is handled as follows:
+
+            *    For a limited broadcast or a multicast address, simply
+                 pass the datagram to the link layer for the appropriate
+                 interface.
+
+
+
+Internet Engineering Task Force                                [Page 47]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+            *    For a (network or subnet) directed broadcast, the
+                 datagram can use the standard routing algorithms.
+
+            The host IP layer MUST operate correctly in a minimal
+            network environment, and in particular, when there are no
+            gateways.  For example, if the IP layer of a host insists on
+            finding at least one gateway to initialize, the host will be
+            unable to operate on a single isolated broadcast net.
+
+         3.3.1.2  Gateway Selection
+
+            To efficiently route a series of datagrams to the same
+            destination, the source host MUST keep a "route cache" of
+            mappings to next-hop gateways.  A host uses the following
+            basic algorithm on this cache to route a datagram; this
+            algorithm is designed to put the primary routing burden on
+            the gateways [IP:11].
+
+            (a)  If the route cache contains no information for a
+                 particular destination, the host chooses a "default"
+                 gateway and sends the datagram to it.  It also builds a
+                 corresponding Route Cache entry.
+
+            (b)  If that gateway is not the best next hop to the
+                 destination, the gateway will forward the datagram to
+                 the best next-hop gateway and return an ICMP Redirect
+                 message to the source host.
+
+            (c)  When it receives a Redirect, the host updates the
+                 next-hop gateway in the appropriate route cache entry,
+                 so later datagrams to the same destination will go
+                 directly to the best gateway.
+
+            Since the subnet mask appropriate to the destination address
+            is generally not known, a Network Redirect message SHOULD be
+            treated identically to a Host Redirect message; i.e., the
+            cache entry for the destination host (only) would be updated
+            (or created, if an entry for that host did not exist) for
+            the new gateway.
+
+            DISCUSSION:
+                 This recommendation is to protect against gateways that
+                 erroneously send Network Redirects for a subnetted
+                 network, in violation of the gateway requirements
+                 [INTRO:2].
+
+            When there is no route cache entry for the destination host
+            address (and the destination is not on the connected
+
+
+
+Internet Engineering Task Force                                [Page 48]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+            network), the IP layer MUST pick a gateway from its list of
+            "default" gateways.  The IP layer MUST support multiple
+            default gateways.
+
+            As an extra feature, a host IP layer MAY implement a table
+            of "static routes".  Each such static route MAY include a
+            flag specifying whether it may be overridden by ICMP
+            Redirects.
+
+            DISCUSSION:
+                 A host generally needs to know at least one default
+                 gateway to get started.  This information can be
+                 obtained from a configuration file or else from the
+                 host startup sequence, e.g., the BOOTP protocol (see
+                 [INTRO:1]).
+
+                 It has been suggested that a host can augment its list
+                 of default gateways by recording any new gateways it
+                 learns about.  For example, it can record every gateway
+                 to which it is ever redirected.  Such a feature, while
+                 possibly useful in some circumstances, may cause
+                 problems in other cases (e.g., gateways are not all
+                 equal), and it is not recommended.
+
+                 A static route is typically a particular preset mapping
+                 from destination host or network into a particular
+                 next-hop gateway; it might also depend on the Type-of-
+                 Service (see next section).  Static routes would be set
+                 up by system administrators to override the normal
+                 automatic routing mechanism, to handle exceptional
+                 situations.  However, any static routing information is
+                 a potential source of failure as configurations change
+                 or equipment fails.
+
+         3.3.1.3  Route Cache
+
+            Each route cache entry needs to include the following
+            fields:
+
+            (1)  Local IP address (for a multihomed host)
+
+            (2)  Destination IP address
+
+            (3)  Type(s)-of-Service
+
+            (4)  Next-hop gateway IP address
+
+            Field (2) MAY be the full IP address of the destination
+
+
+
+Internet Engineering Task Force                                [Page 49]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+            host, or only the destination network number.  Field (3),
+            the TOS, SHOULD be included.
+
+            See Section 3.3.4.2 for a discussion of the implications of
+            multihoming for the lookup procedure in this cache.
+
+            DISCUSSION:
+                 Including the Type-of-Service field in the route cache
+                 and considering it in the host route algorithm will
+                 provide the necessary mechanism for the future when
+                 Type-of-Service routing is commonly used in the
+                 Internet.  See Section 3.2.1.6.
+
+                 Each route cache entry defines the endpoints of an
+                 Internet path.  Although the connecting path may change
+                 dynamically in an arbitrary way, the transmission
+                 characteristics of the path tend to remain
+                 approximately constant over a time period longer than a
+                 single typical host-host transport connection.
+                 Therefore, a route cache entry is a natural place to
+                 cache data on the properties of the path.  Examples of
+                 such properties might be the maximum unfragmented
+                 datagram size (see Section 3.3.3), or the average
+                 round-trip delay measured by a transport protocol.
+                 This data will generally be both gathered and used by a
+                 higher layer protocol, e.g., by TCP, or by an
+                 application using UDP.  Experiments are currently in
+                 progress on caching path properties in this manner.
+
+                 There is no consensus on whether the route cache should
+                 be keyed on destination host addresses alone, or allow
+                 both host and network addresses.  Those who favor the
+                 use of only host addresses argue that:
+
+                 (1)  As required in Section 3.3.1.2, Redirect messages
+                      will generally result in entries keyed on
+                      destination host addresses; the simplest and most
+                      general scheme would be to use host addresses
+                      always.
+
+                 (2)  The IP layer may not always know the address mask
+                      for a network address in a complex subnetted
+                      environment.
+
+                 (3)  The use of only host addresses allows the
+                      destination address to be used as a pure 32-bit
+                      number, which may allow the Internet architecture
+                      to be more easily extended in the future without
+
+
+
+Internet Engineering Task Force                                [Page 50]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+                      any change to the hosts.
+
+                 The opposing view is that allowing a mixture of
+                 destination hosts and networks in the route cache:
+
+                 (1)  Saves memory space.
+
+                 (2)  Leads to a simpler data structure, easily
+                      combining the cache with the tables of default and
+                      static routes (see below).
+
+                 (3)  Provides a more useful place to cache path
+                      properties, as discussed earlier.
+
+
+            IMPLEMENTATION:
+                 The cache needs to be large enough to include entries
+                 for the maximum number of destination hosts that may be
+                 in use at one time.
+
+                 A route cache entry may also include control
+                 information used to choose an entry for replacement.
+                 This might take the form of a "recently used" bit, a
+                 use count, or a last-used timestamp, for example.  It
+                 is recommended that it include the time of last
+                 modification of the entry, for diagnostic purposes.
+
+                 An implementation may wish to reduce the overhead of
+                 scanning the route cache for every datagram to be
+                 transmitted.  This may be accomplished with a hash
+                 table to speed the lookup, or by giving a connection-
+                 oriented transport protocol a "hint" or temporary
+                 handle on the appropriate cache entry, to be passed to
+                 the IP layer with each subsequent datagram.
+
+                 Although we have described the route cache, the lists
+                 of default gateways, and a table of static routes as
+                 conceptually distinct, in practice they may be combined
+                 into a single "routing table" data structure.
+
+         3.3.1.4  Dead Gateway Detection
+
+            The IP layer MUST be able to detect the failure of a "next-
+            hop" gateway that is listed in its route cache and to choose
+            an alternate gateway (see Section 3.3.1.5).
+
+            Dead gateway detection is covered in some detail in RFC-816
+            [IP:11]. Experience to date has not produced a complete
+
+
+
+Internet Engineering Task Force                                [Page 51]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+            algorithm which is totally satisfactory, though it has
+            identified several forbidden paths and promising techniques.
+
+            *    A particular gateway SHOULD NOT be used indefinitely in
+                 the absence of positive indications that it is
+                 functioning.
+
+            *    Active probes such as "pinging" (i.e., using an ICMP
+                 Echo Request/Reply exchange) are expensive and scale
+                 poorly.  In particular, hosts MUST NOT actively check
+                 the status of a first-hop gateway by simply pinging the
+                 gateway continuously.
+
+            *    Even when it is the only effective way to verify a
+                 gateway's status, pinging MUST be used only when
+                 traffic is being sent to the gateway and when there is
+                 no other positive indication to suggest that the
+                 gateway is functioning.
+
+            *    To avoid pinging, the layers above and/or below the
+                 Internet layer SHOULD be able to give "advice" on the
+                 status of route cache entries when either positive
+                 (gateway OK) or negative (gateway dead) information is
+                 available.
+
+
+            DISCUSSION:
+                 If an implementation does not include an adequate
+                 mechanism for detecting a dead gateway and re-routing,
+                 a gateway failure may cause datagrams to apparently
+                 vanish into a "black hole".  This failure can be
+                 extremely confusing for users and difficult for network
+                 personnel to debug.
+
+                 The dead-gateway detection mechanism must not cause
+                 unacceptable load on the host, on connected networks,
+                 or on first-hop gateway(s).  The exact constraints on
+                 the timeliness of dead gateway detection and on
+                 acceptable load may vary somewhat depending on the
+                 nature of the host's mission, but a host generally
+                 needs to detect a failed first-hop gateway quickly
+                 enough that transport-layer connections will not break
+                 before an alternate gateway can be selected.
+
+                 Passing advice from other layers of the protocol stack
+                 complicates the interfaces between the layers, but it
+                 is the preferred approach to dead gateway detection.
+                 Advice can come from almost any part of the IP/TCP
+
+
+
+Internet Engineering Task Force                                [Page 52]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+                 architecture, but it is expected to come primarily from
+                 the transport and link layers.  Here are some possible
+                 sources for gateway advice:
+
+                 o    TCP or any connection-oriented transport protocol
+                      should be able to give negative advice, e.g.,
+                      triggered by excessive retransmissions.
+
+                 o    TCP may give positive advice when (new) data is
+                      acknowledged.  Even though the route may be
+                      asymmetric, an ACK for new data proves that the
+                      acknowleged data must have been transmitted
+                      successfully.
+
+                 o    An ICMP Redirect message from a particular gateway
+                      should be used as positive advice about that
+                      gateway.
+
+                 o    Link-layer information that reliably detects and
+                      reports host failures (e.g., ARPANET Destination
+                      Dead messages) should be used as negative advice.
+
+                 o    Failure to ARP or to re-validate ARP mappings may
+                      be used as negative advice for the corresponding
+                      IP address.
+
+                 o    Packets arriving from a particular link-layer
+                      address are evidence that the system at this
+                      address is alive.  However, turning this
+                      information into advice about gateways requires
+                      mapping the link-layer address into an IP address,
+                      and then checking that IP address against the
+                      gateways pointed to by the route cache.  This is
+                      probably prohibitively inefficient.
+
+                 Note that positive advice that is given for every
+                 datagram received may cause unacceptable overhead in
+                 the implementation.
+
+                 While advice might be passed using required arguments
+                 in all interfaces to the IP layer, some transport and
+                 application layer protocols cannot deduce the correct
+                 advice.  These interfaces must therefore allow a
+                 neutral value for advice, since either always-positive
+                 or always-negative advice leads to incorrect behavior.
+
+                 There is another technique for dead gateway detection
+                 that has been commonly used but is not recommended.
+
+
+
+Internet Engineering Task Force                                [Page 53]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+                 This technique depends upon the host passively
+                 receiving ("wiretapping") the Interior Gateway Protocol
+                 (IGP) datagrams that the gateways are broadcasting to
+                 each other.  This approach has the drawback that a host
+                 needs to recognize all the interior gateway protocols
+                 that gateways may use (see [INTRO:2]).  In addition, it
+                 only works on a broadcast network.
+
+                 At present, pinging (i.e., using ICMP Echo messages) is
+                 the mechanism for gateway probing when absolutely
+                 required.  A successful ping guarantees that the
+                 addressed interface and its associated machine are up,
+                 but it does not guarantee that the machine is a gateway
+                 as opposed to a host.  The normal inference is that if
+                 a Redirect or other evidence indicates that a machine
+                 was a gateway, successful pings will indicate that the
+                 machine is still up and hence still a gateway.
+                 However, since a host silently discards packets that a
+                 gateway would forward or redirect, this assumption
+                 could sometimes fail.  To avoid this problem, a new
+                 ICMP message under development will ask "are you a
+                 gateway?"
+
+            IMPLEMENTATION:
+                 The following specific algorithm has been suggested:
+
+                 o    Associate a "reroute timer" with each gateway
+                      pointed to by the route cache.  Initialize the
+                      timer to a value Tr, which must be small enough to
+                      allow detection of a dead gateway before transport
+                      connections time out.
+
+                 o    Positive advice would reset the reroute timer to
+                      Tr.  Negative advice would reduce or zero the
+                      reroute timer.
+
+                 o    Whenever the IP layer used a particular gateway to
+                      route a datagram, it would check the corresponding
+                      reroute timer.  If the timer had expired (reached
+                      zero), the IP layer would send a ping to the
+                      gateway, followed immediately by the datagram.
+
+                 o    The ping (ICMP Echo) would be sent again if
+                      necessary, up to N times.  If no ping reply was
+                      received in N tries, the gateway would be assumed
+                      to have failed, and a new first-hop gateway would
+                      be chosen for all cache entries pointing to the
+                      failed gateway.
+
+
+
+Internet Engineering Task Force                                [Page 54]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+                 Note that the size of Tr is inversely related to the
+                 amount of advice available.  Tr should be large enough
+                 to insure that:
+
+                 *    Any pinging will be at a low level (e.g., <10%) of
+                      all packets sent to a gateway from the host, AND
+
+                 *    pinging is infrequent (e.g., every 3 minutes)
+
+                 Since the recommended algorithm is concerned with the
+                 gateways pointed to by route cache entries, rather than
+                 the cache entries themselves, a two level data
+                 structure (perhaps coordinated with ARP or similar
+                 caches) may be desirable for implementing a route
+                 cache.
+
+         3.3.1.5  New Gateway Selection
+
+            If the failed gateway is not the current default, the IP
+            layer can immediately switch to a default gateway.  If it is
+            the current default that failed, the IP layer MUST select a
+            different default gateway (assuming more than one default is
+            known) for the failed route and for establishing new routes.
+
+            DISCUSSION:
+                 When a gateway does fail, the other gateways on the
+                 connected network will learn of the failure through
+                 some inter-gateway routing protocol.  However, this
+                 will not happen instantaneously, since gateway routing
+                 protocols typically have a settling time of 30-60
+                 seconds.  If the host switches to an alternative
+                 gateway before the gateways have agreed on the failure,
+                 the new target gateway will probably forward the
+                 datagram to the failed gateway and send a Redirect back
+                 to the host pointing to the failed gateway (!).  The
+                 result is likely to be a rapid oscillation in the
+                 contents of the host's route cache during the gateway
+                 settling period.  It has been proposed that the dead-
+                 gateway logic should include some hysteresis mechanism
+                 to prevent such oscillations.  However, experience has
+                 not shown any harm from such oscillations, since
+                 service cannot be restored to the host until the
+                 gateways' routing information does settle down.
+
+            IMPLEMENTATION:
+                 One implementation technique for choosing a new default
+                 gateway is to simply round-robin among the default
+                 gateways in the host's list.  Another is to rank the
+
+
+
+Internet Engineering Task Force                                [Page 55]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+                 gateways in priority order, and when the current
+                 default gateway is not the highest priority one, to
+                 "ping" the higher-priority gateways slowly to detect
+                 when they return to service.  This pinging can be at a
+                 very low rate, e.g., 0.005 per second.
+
+         3.3.1.6  Initialization
+
+            The following information MUST be configurable:
+
+            (1)  IP address(es).
+
+            (2)  Address mask(s).
+
+            (3)  A list of default gateways, with a preference level.
+
+            A manual method of entering this configuration data MUST be
+            provided.  In addition, a variety of methods can be used to
+            determine this information dynamically; see the section on
+            "Host Initialization" in [INTRO:1].
+
+            DISCUSSION:
+                 Some host implementations use "wiretapping" of gateway
+                 protocols on a broadcast network to learn what gateways
+                 exist.  A standard method for default gateway discovery
+                 is under development.
+
+      3.3.2  Reassembly
+
+         The IP layer MUST implement reassembly of IP datagrams.
+
+         We designate the largest datagram size that can be reassembled
+         by EMTU_R ("Effective MTU to receive"); this is sometimes
+         called the "reassembly buffer size".  EMTU_R MUST be greater
+         than or equal to 576, SHOULD be either configurable or
+         indefinite, and SHOULD be greater than or equal to the MTU of
+         the connected network(s).
+
+         DISCUSSION:
+              A fixed EMTU_R limit should not be built into the code
+              because some application layer protocols require EMTU_R
+              values larger than 576.
+
+         IMPLEMENTATION:
+              An implementation may use a contiguous reassembly buffer
+              for each datagram, or it may use a more complex data
+              structure that places no definite limit on the reassembled
+              datagram size; in the latter case, EMTU_R is said to be
+
+
+
+Internet Engineering Task Force                                [Page 56]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+              "indefinite".
+
+              Logically, reassembly is performed by simply copying each
+              fragment into the packet buffer at the proper offset.
+              Note that fragments may overlap if successive
+              retransmissions use different packetizing but the same
+              reassembly Id.
+
+              The tricky part of reassembly is the bookkeeping to
+              determine when all bytes of the datagram have been
+              reassembled.  We recommend Clark's algorithm [IP:10] that
+              requires no additional data space for the bookkeeping.
+              However, note that, contrary to [IP:10], the first
+              fragment header needs to be saved for inclusion in a
+              possible ICMP Time Exceeded (Reassembly Timeout) message.
+
+         There MUST be a mechanism by which the transport layer can
+         learn MMS_R, the maximum message size that can be received and
+         reassembled in an IP datagram (see GET_MAXSIZES calls in
+         Section 3.4).  If EMTU_R is not indefinite, then the value of
+         MMS_R is given by:
+
+            MMS_R = EMTU_R - 20
+
+         since 20 is the minimum size of an IP header.
+
+         There MUST be a reassembly timeout.  The reassembly timeout
+         value SHOULD be a fixed value, not set from the remaining TTL.
+         It is recommended that the value lie between 60 seconds and 120
+         seconds.  If this timeout expires, the partially-reassembled
+         datagram MUST be discarded and an ICMP Time Exceeded message
+         sent to the source host (if fragment zero has been received).
+
+         DISCUSSION:
+              The IP specification says that the reassembly timeout
+              should be the remaining TTL from the IP header, but this
+              does not work well because gateways generally treat TTL as
+              a simple hop count rather than an elapsed time.  If the
+              reassembly timeout is too small, datagrams will be
+              discarded unnecessarily, and communication may fail.  The
+              timeout needs to be at least as large as the typical
+              maximum delay across the Internet.  A realistic minimum
+              reassembly timeout would be 60 seconds.
+
+              It has been suggested that a cache might be kept of
+              round-trip times measured by transport protocols for
+              various destinations, and that these values might be used
+              to dynamically determine a reasonable reassembly timeout
+
+
+
+Internet Engineering Task Force                                [Page 57]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+              value.  Further investigation of this approach is
+              required.
+
+              If the reassembly timeout is set too high, buffer
+              resources in the receiving host will be tied up too long,
+              and the MSL (Maximum Segment Lifetime) [TCP:1] will be
+              larger than necessary.  The MSL controls the maximum rate
+              at which fragmented datagrams can be sent using distinct
+              values of the 16-bit Ident field; a larger MSL lowers the
+              maximum rate.  The TCP specification [TCP:1] arbitrarily
+              assumes a value of 2 minutes for MSL.  This sets an upper
+              limit on a reasonable reassembly timeout value.
+
+      3.3.3  Fragmentation
+
+         Optionally, the IP layer MAY implement a mechanism to fragment
+         outgoing datagrams intentionally.
+
+         We designate by EMTU_S ("Effective MTU for sending") the
+         maximum IP datagram size that may be sent, for a particular
+         combination of IP source and destination addresses and perhaps
+         TOS.
+
+         A host MUST implement a mechanism to allow the transport layer
+         to learn MMS_S, the maximum transport-layer message size that
+         may be sent for a given {source, destination, TOS} triplet (see
+         GET_MAXSIZES call in Section 3.4).  If no local fragmentation
+         is performed, the value of MMS_S will be:
+
+            MMS_S = EMTU_S - <IP header size>
+
+         and EMTU_S must be less than or equal to the MTU of the network
+         interface corresponding to the source address of the datagram.
+         Note that <IP header size> in this equation will be 20, unless
+         the IP reserves space to insert IP options for its own purposes
+         in addition to any options inserted by the transport layer.
+
+         A host that does not implement local fragmentation MUST ensure
+         that the transport layer (for TCP) or the application layer
+         (for UDP) obtains MMS_S from the IP layer and does not send a
+         datagram exceeding MMS_S in size.
+
+         It is generally desirable to avoid local fragmentation and to
+         choose EMTU_S low enough to avoid fragmentation in any gateway
+         along the path.  In the absence of actual knowledge of the
+         minimum MTU along the path, the IP layer SHOULD use
+         EMTU_S <= 576 whenever the destination address is not on a
+         connected network, and otherwise use the connected network's
+
+
+
+Internet Engineering Task Force                                [Page 58]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+         MTU.
+
+         The MTU of each physical interface MUST be configurable.
+
+         A host IP layer implementation MAY have a configuration flag
+         "All-Subnets-MTU", indicating that the MTU of the connected
+         network is to be used for destinations on different subnets
+         within the same network, but not for other networks.  Thus,
+         this flag causes the network class mask, rather than the subnet
+         address mask, to be used to choose an EMTU_S.  For a multihomed
+         host, an "All-Subnets-MTU" flag is needed for each network
+         interface.
+
+         DISCUSSION:
+              Picking the correct datagram size to use when sending data
+              is a complex topic [IP:9].
+
+              (a)  In general, no host is required to accept an IP
+                   datagram larger than 576 bytes (including header and
+                   data), so a host must not send a larger datagram
+                   without explicit knowledge or prior arrangement with
+                   the destination host.  Thus, MMS_S is only an upper
+                   bound on the datagram size that a transport protocol
+                   may send; even when MMS_S exceeds 556, the transport
+                   layer must limit its messages to 556 bytes in the
+                   absence of other knowledge about the destination
+                   host.
+
+              (b)  Some transport protocols (e.g., TCP) provide a way to
+                   explicitly inform the sender about the largest
+                   datagram the other end can receive and reassemble
+                   [IP:7].  There is no corresponding mechanism in the
+                   IP layer.
+
+                   A transport protocol that assumes an EMTU_R larger
+                   than 576 (see Section 3.3.2), can send a datagram of
+                   this larger size to another host that implements the
+                   same protocol.
+
+              (c)  Hosts should ideally limit their EMTU_S for a given
+                   destination to the minimum MTU of all the networks
+                   along the path, to avoid any fragmentation.  IP
+                   fragmentation, while formally correct, can create a
+                   serious transport protocol performance problem,
+                   because loss of a single fragment means all the
+                   fragments in the segment must be retransmitted
+                   [IP:9].
+
+
+
+
+Internet Engineering Task Force                                [Page 59]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+              Since nearly all networks in the Internet currently
+              support an MTU of 576 or greater, we strongly recommend
+              the use of 576 for datagrams sent to non-local networks.
+
+              It has been suggested that a host could determine the MTU
+              over a given path by sending a zero-offset datagram
+              fragment and waiting for the receiver to time out the
+              reassembly (which cannot complete!) and return an ICMP
+              Time Exceeded message.  This message would include the
+              largest remaining fragment header in its body.  More
+              direct mechanisms are being experimented with, but have
+              not yet been adopted (see e.g., RFC-1063).
+
+      3.3.4  Local Multihoming
+
+         3.3.4.1  Introduction
+
+            A multihomed host has multiple IP addresses, which we may
+            think of as "logical interfaces".  These logical interfaces
+            may be associated with one or more physical interfaces, and
+            these physical interfaces may be connected to the same or
+            different networks.
+
+            Here are some important cases of multihoming:
+
+            (a)  Multiple Logical Networks
+
+                 The Internet architects envisioned that each physical
+                 network would have a single unique IP network (or
+                 subnet) number.  However, LAN administrators have
+                 sometimes found it useful to violate this assumption,
+                 operating a LAN with multiple logical networks per
+                 physical connected network.
+
+                 If a host connected to such a physical network is
+                 configured to handle traffic for each of N different
+                 logical networks, then the host will have N logical
+                 interfaces.  These could share a single physical
+                 interface, or might use N physical interfaces to the
+                 same network.
+
+            (b)  Multiple Logical Hosts
+
+                 When a host has multiple IP addresses that all have the
+                 same <Network-number> part (and the same <Subnet-
+                 number> part, if any), the logical interfaces are known
+                 as "logical hosts".  These logical interfaces might
+                 share a single physical interface or might use separate
+
+
+
+Internet Engineering Task Force                                [Page 60]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+                 physical interfaces to the same physical network.
+
+            (c)  Simple Multihoming
+
+                 In this case, each logical interface is mapped into a
+                 separate physical interface and each physical interface
+                 is connected to a different physical network.  The term
+                 "multihoming" was originally applied only to this case,
+                 but it is now applied more generally.
+
+                 A host with embedded gateway functionality will
+                 typically fall into the simple multihoming case.  Note,
+                 however, that a host may be simply multihomed without
+                 containing an embedded gateway, i.e., without
+                 forwarding datagrams from one connected network to
+                 another.
+
+                 This case presents the most difficult routing problems.
+                 The choice of interface (i.e., the choice of first-hop
+                 network) may significantly affect performance or even
+                 reachability of remote parts of the Internet.
+
+
+            Finally, we note another possibility that is NOT
+            multihoming:  one logical interface may be bound to multiple
+            physical interfaces, in order to increase the reliability or
+            throughput between directly connected machines by providing
+            alternative physical paths between them.  For instance, two
+            systems might be connected by multiple point-to-point links.
+            We call this "link-layer multiplexing".  With link-layer
+            multiplexing, the protocols above the link layer are unaware
+            that multiple physical interfaces are present; the link-
+            layer device driver is responsible for multiplexing and
+            routing packets across the physical interfaces.
+
+            In the Internet protocol architecture, a transport protocol
+            instance ("entity") has no address of its own, but instead
+            uses a single Internet Protocol (IP) address.  This has
+            implications for the IP, transport, and application layers,
+            and for the interfaces between them.  In particular, the
+            application software may have to be aware of the multiple IP
+            addresses of a multihomed host; in other cases, the choice
+            can be made within the network software.
+
+         3.3.4.2  Multihoming Requirements
+
+            The following general rules apply to the selection of an IP
+            source address for sending a datagram from a multihomed
+
+
+
+Internet Engineering Task Force                                [Page 61]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+            host.
+
+            (1)  If the datagram is sent in response to a received
+                 datagram, the source address for the response SHOULD be
+                 the specific-destination address of the request.  See
+                 Sections 4.1.3.5 and 4.2.3.7 and the "General Issues"
+                 section of [INTRO:1] for more specific requirements on
+                 higher layers.
+
+                 Otherwise, a source address must be selected.
+
+            (2)  An application MUST be able to explicitly specify the
+                 source address for initiating a connection or a
+                 request.
+
+            (3)  In the absence of such a specification, the networking
+                 software MUST choose a source address.  Rules for this
+                 choice are described below.
+
+
+            There are two key requirement issues related to multihoming:
+
+            (A)  A host MAY silently discard an incoming datagram whose
+                 destination address does not correspond to the physical
+                 interface through which it is received.
+
+            (B)  A host MAY restrict itself to sending (non-source-
+                 routed) IP datagrams only through the physical
+                 interface that corresponds to the IP source address of
+                 the datagrams.
+
+
+            DISCUSSION:
+                 Internet host implementors have used two different
+                 conceptual models for multihoming, briefly summarized
+                 in the following discussion.  This document takes no
+                 stand on which model is preferred; each seems to have a
+                 place.  This ambivalence is reflected in the issues (A)
+                 and (B) being optional.
+
+                 o    Strong ES Model
+
+                      The Strong ES (End System, i.e., host) model
+                      emphasizes the host/gateway (ES/IS) distinction,
+                      and would therefore substitute MUST for MAY in
+                      issues (A) and (B) above.  It tends to model a
+                      multihomed host as a set of logical hosts within
+                      the same physical host.
+
+
+
+Internet Engineering Task Force                                [Page 62]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+                      With respect to (A), proponents of the Strong ES
+                      model note that automatic Internet routing
+                      mechanisms could not route a datagram to a
+                      physical interface that did not correspond to the
+                      destination address.
+
+                      Under the Strong ES model, the route computation
+                      for an outgoing datagram is the mapping:
+
+                         route(src IP addr, dest IP addr, TOS)
+                                                        -> gateway
+
+                      Here the source address is included as a parameter
+                      in order to select a gateway that is directly
+                      reachable on the corresponding physical interface.
+                      Note that this model logically requires that in
+                      general there be at least one default gateway, and
+                      preferably multiple defaults, for each IP source
+                      address.
+
+                 o    Weak ES Model
+
+                      This view de-emphasizes the ES/IS distinction, and
+                      would therefore substitute MUST NOT for MAY in
+                      issues (A) and (B).  This model may be the more
+                      natural one for hosts that wiretap gateway routing
+                      protocols, and is necessary for hosts that have
+                      embedded gateway functionality.
+
+                      The Weak ES Model may cause the Redirect mechanism
+                      to fail.  If a datagram is sent out a physical
+                      interface that does not correspond to the
+                      destination address, the first-hop gateway will
+                      not realize when it needs to send a Redirect.  On
+                      the other hand, if the host has embedded gateway
+                      functionality, then it has routing information
+                      without listening to Redirects.
+
+                      In the Weak ES model, the route computation for an
+                      outgoing datagram is the mapping:
+
+                         route(dest IP addr, TOS) -> gateway, interface
+
+
+
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 63]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+         3.3.4.3  Choosing a Source Address
+
+            DISCUSSION:
+                 When it sends an initial connection request (e.g., a
+                 TCP "SYN" segment) or a datagram service request (e.g.,
+                 a UDP-based query), the transport layer on a multihomed
+                 host needs to know which source address to use.  If the
+                 application does not specify it, the transport layer
+                 must ask the IP layer to perform the conceptual
+                 mapping:
+
+                     GET_SRCADDR(remote IP addr, TOS)
+                                               -> local IP address
+
+                 Here TOS is the Type-of-Service value (see Section
+                 3.2.1.6), and the result is the desired source address.
+                 The following rules are suggested for implementing this
+                 mapping:
+
+                 (a)  If the remote Internet address lies on one of the
+                      (sub-) nets to which the host is directly
+                      connected, a corresponding source address may be
+                      chosen, unless the corresponding interface is
+                      known to be down.
+
+                 (b)  The route cache may be consulted, to see if there
+                      is an active route to the specified destination
+                      network through any network interface; if so, a
+                      local IP address corresponding to that interface
+                      may be chosen.
+
+                 (c)  The table of static routes, if any (see Section
+                      3.3.1.2) may be similarly consulted.
+
+                 (d)  The default gateways may be consulted.  If these
+                      gateways are assigned to different interfaces, the
+                      interface corresponding to the gateway with the
+                      highest preference may be chosen.
+
+                 In the future, there may be a defined way for a
+                 multihomed host to ask the gateways on all connected
+                 networks for advice about the best network to use for a
+                 given destination.
+
+            IMPLEMENTATION:
+                 It will be noted that this process is essentially the
+                 same as datagram routing (see Section 3.3.1), and
+                 therefore hosts may be able to combine the
+
+
+
+Internet Engineering Task Force                                [Page 64]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+                 implementation of the two functions.
+
+      3.3.5  Source Route Forwarding
+
+         Subject to restrictions given below, a host MAY be able to act
+         as an intermediate hop in a source route, forwarding a source-
+         routed datagram to the next specified hop.
+
+         However, in performing this gateway-like function, the host
+         MUST obey all the relevant rules for a gateway forwarding
+         source-routed datagrams [INTRO:2].  This includes the following
+         specific provisions, which override the corresponding host
+         provisions given earlier in this document:
+
+         (A)  TTL (ref. Section 3.2.1.7)
+
+              The TTL field MUST be decremented and the datagram perhaps
+              discarded as specified for a gateway in [INTRO:2].
+
+         (B)  ICMP Destination Unreachable (ref. Section 3.2.2.1)
+
+              A host MUST be able to generate Destination Unreachable
+              messages with the following codes:
+
+              4    (Fragmentation Required but DF Set) when a source-
+                   routed datagram cannot be fragmented to fit into the
+                   target network;
+
+              5    (Source Route Failed) when a source-routed datagram
+                   cannot be forwarded, e.g., because of a routing
+                   problem or because the next hop of a strict source
+                   route is not on a connected network.
+
+         (C)  IP Source Address (ref. Section 3.2.1.3)
+
+              A source-routed datagram being forwarded MAY (and normally
+              will) have a source address that is not one of the IP
+              addresses of the forwarding host.
+
+         (D)  Record Route Option (ref. Section 3.2.1.8d)
+
+              A host that is forwarding a source-routed datagram
+              containing a Record Route option MUST update that option,
+              if it has room.
+
+         (E)  Timestamp Option (ref. Section 3.2.1.8e)
+
+              A host that is forwarding a source-routed datagram
+
+
+
+Internet Engineering Task Force                                [Page 65]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+              containing a Timestamp Option MUST add the current
+              timestamp to that option, according to the rules for this
+              option.
+
+         To define the rules restricting host forwarding of source-
+         routed datagrams, we use the term "local source-routing" if the
+         next hop will be through the same physical interface through
+         which the datagram arrived; otherwise, it is "non-local
+         source-routing".
+
+         o    A host is permitted to perform local source-routing
+              without restriction.
+
+         o    A host that supports non-local source-routing MUST have a
+              configurable switch to disable forwarding, and this switch
+              MUST default to disabled.
+
+         o    The host MUST satisfy all gateway requirements for
+              configurable policy filters [INTRO:2] restricting non-
+              local forwarding.
+
+         If a host receives a datagram with an incomplete source route
+         but does not forward it for some reason, the host SHOULD return
+         an ICMP Destination Unreachable (code 5, Source Route Failed)
+         message, unless the datagram was itself an ICMP error message.
+
+      3.3.6  Broadcasts
+
+         Section 3.2.1.3 defined the four standard IP broadcast address
+         forms:
+
+           Limited Broadcast:  {-1, -1}
+
+           Directed Broadcast:  {<Network-number>,-1}
+
+           Subnet Directed Broadcast:
+                              {<Network-number>,<Subnet-number>,-1}
+
+           All-Subnets Directed Broadcast: {<Network-number>,-1,-1}
+
+         A host MUST recognize any of these forms in the destination
+         address of an incoming datagram.
+
+         There is a class of hosts* that use non-standard broadcast
+         address forms, substituting 0 for -1.  All hosts SHOULD
+_________________________
+*4.2BSD Unix and its derivatives, but not 4.3BSD.
+
+
+
+
+Internet Engineering Task Force                                [Page 66]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+         recognize and accept any of these non-standard broadcast
+         addresses as the destination address of an incoming datagram.
+         A host MAY optionally have a configuration option to choose the
+         0 or the -1 form of broadcast address, for each physical
+         interface, but this option SHOULD default to the standard (-1)
+         form.
+
+         When a host sends a datagram to a link-layer broadcast address,
+         the IP destination address MUST be a legal IP broadcast or IP
+         multicast address.
+
+         A host SHOULD silently discard a datagram that is received via
+         a link-layer broadcast (see Section 2.4) but does not specify
+         an IP multicast or broadcast destination address.
+
+         Hosts SHOULD use the Limited Broadcast address to broadcast to
+         a connected network.
+
+
+         DISCUSSION:
+              Using the Limited Broadcast address instead of a Directed
+              Broadcast address may improve system robustness.  Problems
+              are often caused by machines that do not understand the
+              plethora of broadcast addresses (see Section 3.2.1.3), or
+              that may have different ideas about which broadcast
+              addresses are in use.  The prime example of the latter is
+              machines that do not understand subnetting but are
+              attached to a subnetted net.  Sending a Subnet Broadcast
+              for the connected network will confuse those machines,
+              which will see it as a message to some other host.
+
+              There has been discussion on whether a datagram addressed
+              to the Limited Broadcast address ought to be sent from all
+              the interfaces of a multihomed host.  This specification
+              takes no stand on the issue.
+
+      3.3.7  IP Multicasting
+
+         A host SHOULD support local IP multicasting on all connected
+         networks for which a mapping from Class D IP addresses to
+         link-layer addresses has been specified (see below).  Support
+         for local IP multicasting includes sending multicast datagrams,
+         joining multicast groups and receiving multicast datagrams, and
+         leaving multicast groups.  This implies support for all of
+         [IP:4] except the IGMP protocol itself, which is OPTIONAL.
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 67]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+         DISCUSSION:
+              IGMP provides gateways that are capable of multicast
+              routing with the information required to support IP
+              multicasting across multiple networks.  At this time,
+              multicast-routing gateways are in the experimental stage
+              and are not widely available.  For hosts that are not
+              connected to networks with multicast-routing gateways or
+              that do not need to receive multicast datagrams
+              originating on other networks, IGMP serves no purpose and
+              is therefore optional for now.  However, the rest of
+              [IP:4] is currently recommended for the purpose of
+              providing IP-layer access to local network multicast
+              addressing, as a preferable alternative to local broadcast
+              addressing.  It is expected that IGMP will become
+              recommended at some future date, when multicast-routing
+              gateways have become more widely available.
+
+         If IGMP is not implemented, a host SHOULD still join the "all-
+         hosts" group (224.0.0.1) when the IP layer is initialized and
+         remain a member for as long as the IP layer is active.
+
+         DISCUSSION:
+              Joining the "all-hosts" group will support strictly local
+              uses of multicasting, e.g., a gateway discovery protocol,
+              even if IGMP is not implemented.
+
+         The mapping of IP Class D addresses to local addresses is
+         currently specified for the following types of networks:
+
+         o    Ethernet/IEEE 802.3, as defined in [IP:4].
+
+         o    Any network that supports broadcast but not multicast,
+              addressing: all IP Class D addresses map to the local
+              broadcast address.
+
+         o    Any type of point-to-point link (e.g., SLIP or HDLC
+              links): no mapping required.  All IP multicast datagrams
+              are sent as-is, inside the local framing.
+
+         Mappings for other types of networks will be specified in the
+         future.
+
+         A host SHOULD provide a way for higher-layer protocols or
+         applications to determine which of the host's connected
+         network(s) support IP multicast addressing.
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 68]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+      3.3.8  Error Reporting
+
+         Wherever practical, hosts MUST return ICMP error datagrams on
+         detection of an error, except in those cases where returning an
+         ICMP error message is specifically prohibited.
+
+         DISCUSSION:
+              A common phenomenon in datagram networks is the "black
+              hole disease": datagrams are sent out, but nothing comes
+              back.  Without any error datagrams, it is difficult for
+              the user to figure out what the problem is.
+
+   3.4  INTERNET/TRANSPORT LAYER INTERFACE
+
+      The interface between the IP layer and the transport layer MUST
+      provide full access to all the mechanisms of the IP layer,
+      including options, Type-of-Service, and Time-to-Live.  The
+      transport layer MUST either have mechanisms to set these interface
+      parameters, or provide a path to pass them through from an
+      application, or both.
+
+      DISCUSSION:
+           Applications are urged to make use of these mechanisms where
+           applicable, even when the mechanisms are not currently
+           effective in the Internet (e.g., TOS).  This will allow these
+           mechanisms to be immediately useful when they do become
+           effective, without a large amount of retrofitting of host
+           software.
+
+      We now describe a conceptual interface between the transport layer
+      and the IP layer, as a set of procedure calls.  This is an
+      extension of the information in Section 3.3 of RFC-791 [IP:1].
+
+
+      *    Send Datagram
+
+                SEND(src, dst, prot, TOS, TTL, BufPTR, len, Id, DF, opt
+                     => result )
+
+           where the parameters are defined in RFC-791.  Passing an Id
+           parameter is optional; see Section 3.2.1.5.
+
+
+      *    Receive Datagram
+
+                RECV(BufPTR, prot
+                     => result, src, dst, SpecDest, TOS, len, opt)
+
+
+
+
+Internet Engineering Task Force                                [Page 69]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+           All the parameters are defined in RFC-791, except for:
+
+                SpecDest = specific-destination address of datagram
+                            (defined in Section 3.2.1.3)
+
+           The result parameter dst contains the datagram's destination
+           address.  Since this may be a broadcast or multicast address,
+           the SpecDest parameter (not shown in RFC-791) MUST be passed.
+           The parameter opt contains all the IP options received in the
+           datagram; these MUST also be passed to the transport layer.
+
+
+      *    Select Source Address
+
+                GET_SRCADDR(remote, TOS)  -> local
+
+                remote = remote IP address
+                TOS = Type-of-Service
+                local = local IP address
+
+           See Section 3.3.4.3.
+
+
+      *    Find Maximum Datagram Sizes
+
+                GET_MAXSIZES(local, remote, TOS) -> MMS_R, MMS_S
+
+                MMS_R = maximum receive transport-message size.
+                MMS_S = maximum send transport-message size.
+               (local, remote, TOS defined above)
+
+           See Sections 3.3.2 and 3.3.3.
+
+
+      *    Advice on Delivery Success
+
+                ADVISE_DELIVPROB(sense, local, remote, TOS)
+
+           Here the parameter sense is a 1-bit flag indicating whether
+           positive or negative advice is being given; see the
+           discussion in Section 3.3.1.4. The other parameters were
+           defined earlier.
+
+
+      *    Send ICMP Message
+
+                SEND_ICMP(src, dst, TOS, TTL, BufPTR, len, Id, DF, opt)
+                     -> result
+
+
+
+Internet Engineering Task Force                                [Page 70]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+                (Parameters defined in RFC-791).
+
+           Passing an Id parameter is optional; see Section 3.2.1.5.
+           The transport layer MUST be able to send certain ICMP
+           messages:  Port Unreachable or any of the query-type
+           messages.  This function could be considered to be a special
+           case of the SEND() call, of course; we describe it separately
+           for clarity.
+
+
+      *    Receive ICMP Message
+
+                RECV_ICMP(BufPTR ) -> result, src, dst, len, opt
+
+                (Parameters defined in RFC-791).
+
+           The IP layer MUST pass certain ICMP messages up to the
+           appropriate transport-layer routine.  This function could be
+           considered to be a special case of the RECV() call, of
+           course; we describe it separately for clarity.
+
+           For an ICMP error message, the data that is passed up MUST
+           include the original Internet header plus all the octets of
+           the original message that are included in the ICMP message.
+           This data will be used by the transport layer to locate the
+           connection state information, if any.
+
+           In particular, the following ICMP messages are to be passed
+           up:
+
+           o    Destination Unreachable
+
+           o    Source Quench
+
+           o    Echo Reply (to ICMP user interface, unless the Echo
+                Request originated in the IP layer)
+
+           o    Timestamp Reply (to ICMP user interface)
+
+           o    Time Exceeded
+
+
+      DISCUSSION:
+           In the future, there may be additions to this interface to
+           pass path data (see Section 3.3.1.3) between the IP and
+           transport layers.
+
+
+
+
+
+Internet Engineering Task Force                                [Page 71]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+   3.5  INTERNET LAYER REQUIREMENTS SUMMARY
+
+
+                                                 |        | | | |S| |
+                                                 |        | | | |H| |F
+                                                 |        | | | |O|M|o
+                                                 |        | |S| |U|U|o
+                                                 |        | |H| |L|S|t
+                                                 |        |M|O| |D|T|n
+                                                 |        |U|U|M| | |o
+                                                 |        |S|L|A|N|N|t
+                                                 |        |T|D|Y|O|O|t
+FEATURE                                          |SECTION | | | |T|T|e
+-------------------------------------------------|--------|-|-|-|-|-|--
+                                                 |        | | | | | |
+Implement IP and ICMP                            |3.1     |x| | | | |
+Handle remote multihoming in application layer   |3.1     |x| | | | |
+Support local multihoming                        |3.1     | | |x| | |
+Meet gateway specs if forward datagrams          |3.1     |x| | | | |
+Configuration switch for embedded gateway        |3.1     |x| | | | |1
+   Config switch default to non-gateway          |3.1     |x| | | | |1
+   Auto-config based on number of interfaces     |3.1     | | | | |x|1
+Able to log discarded datagrams                  |3.1     | |x| | | |
+   Record in counter                             |3.1     | |x| | | |
+                                                 |        | | | | | |
+Silently discard Version != 4                    |3.2.1.1 |x| | | | |
+Verify IP checksum, silently discard bad dgram   |3.2.1.2 |x| | | | |
+Addressing:                                      |        | | | | | |
+  Subnet addressing (RFC-950)                    |3.2.1.3 |x| | | | |
+  Src address must be host's own IP address      |3.2.1.3 |x| | | | |
+  Silently discard datagram with bad dest addr   |3.2.1.3 |x| | | | |
+  Silently discard datagram with bad src addr    |3.2.1.3 |x| | | | |
+Support reassembly                               |3.2.1.4 |x| | | | |
+Retain same Id field in identical datagram       |3.2.1.5 | | |x| | |
+                                                 |        | | | | | |
+TOS:                                             |        | | | | | |
+  Allow transport layer to set TOS               |3.2.1.6 |x| | | | |
+  Pass received TOS up to transport layer        |3.2.1.6 | |x| | | |
+  Use RFC-795 link-layer mappings for TOS        |3.2.1.6 | | | |x| |
+TTL:                                             |        | | | | | |
+  Send packet with TTL of 0                      |3.2.1.7 | | | | |x|
+  Discard received packets with TTL < 2          |3.2.1.7 | | | | |x|
+  Allow transport layer to set TTL               |3.2.1.7 |x| | | | |
+  Fixed TTL is configurable                      |3.2.1.7 |x| | | | |
+                                                 |        | | | | | |
+IP Options:                                      |        | | | | | |
+  Allow transport layer to send IP options       |3.2.1.8 |x| | | | |
+  Pass all IP options rcvd to higher layer       |3.2.1.8 |x| | | | |
+
+
+
+Internet Engineering Task Force                                [Page 72]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+  IP layer silently ignore unknown options       |3.2.1.8 |x| | | | |
+  Security option                                |3.2.1.8a| | |x| | |
+  Send Stream Identifier option                  |3.2.1.8b| | | |x| |
+  Silently ignore Stream Identifer option        |3.2.1.8b|x| | | | |
+  Record Route option                            |3.2.1.8d| | |x| | |
+  Timestamp option                               |3.2.1.8e| | |x| | |
+Source Route Option:                             |        | | | | | |
+  Originate & terminate Source Route options     |3.2.1.8c|x| | | | |
+  Datagram with completed SR passed up to TL     |3.2.1.8c|x| | | | |
+  Build correct (non-redundant) return route     |3.2.1.8c|x| | | | |
+  Send multiple SR options in one header         |3.2.1.8c| | | | |x|
+                                                 |        | | | | | |
+ICMP:                                            |        | | | | | |
+  Silently discard ICMP msg with unknown type    |3.2.2   |x| | | | |
+  Include more than 8 octets of orig datagram    |3.2.2   | | |x| | |
+      Included octets same as received           |3.2.2   |x| | | | |
+  Demux ICMP Error to transport protocol         |3.2.2   |x| | | | |
+  Send ICMP error message with TOS=0             |3.2.2   | |x| | | |
+  Send ICMP error message for:                   |        | | | | | |
+   - ICMP error msg                              |3.2.2   | | | | |x|
+   - IP b'cast or IP m'cast                      |3.2.2   | | | | |x|
+   - Link-layer b'cast                           |3.2.2   | | | | |x|
+   - Non-initial fragment                        |3.2.2   | | | | |x|
+   - Datagram with non-unique src address        |3.2.2   | | | | |x|
+  Return ICMP error msgs (when not prohibited)   |3.3.8   |x| | | | |
+                                                 |        | | | | | |
+  Dest Unreachable:                              |        | | | | | |
+    Generate Dest Unreachable (code 2/3)         |3.2.2.1 | |x| | | |
+    Pass ICMP Dest Unreachable to higher layer   |3.2.2.1 |x| | | | |
+    Higher layer act on Dest Unreach             |3.2.2.1 | |x| | | |
+      Interpret Dest Unreach as only hint        |3.2.2.1 |x| | | | |
+  Redirect:                                      |        | | | | | |
+    Host send Redirect                           |3.2.2.2 | | | |x| |
+    Update route cache when recv Redirect        |3.2.2.2 |x| | | | |
+    Handle both Host and Net Redirects           |3.2.2.2 |x| | | | |
+    Discard illegal Redirect                     |3.2.2.2 | |x| | | |
+  Source Quench:                                 |        | | | | | |
+    Send Source Quench if buffering exceeded     |3.2.2.3 | | |x| | |
+    Pass Source Quench to higher layer           |3.2.2.3 |x| | | | |
+    Higher layer act on Source Quench            |3.2.2.3 | |x| | | |
+  Time Exceeded: pass to higher layer            |3.2.2.4 |x| | | | |
+  Parameter Problem:                             |        | | | | | |
+    Send Parameter Problem messages              |3.2.2.5 | |x| | | |
+    Pass Parameter Problem to higher layer       |3.2.2.5 |x| | | | |
+    Report Parameter Problem to user             |3.2.2.5 | | |x| | |
+                                                 |        | | | | | |
+  ICMP Echo Request or Reply:                    |        | | | | | |
+    Echo server and Echo client                  |3.2.2.6 |x| | | | |
+
+
+
+Internet Engineering Task Force                                [Page 73]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+    Echo client                                  |3.2.2.6 | |x| | | |
+    Discard Echo Request to broadcast address    |3.2.2.6 | | |x| | |
+    Discard Echo Request to multicast address    |3.2.2.6 | | |x| | |
+    Use specific-dest addr as Echo Reply src     |3.2.2.6 |x| | | | |
+    Send same data in Echo Reply                 |3.2.2.6 |x| | | | |
+    Pass Echo Reply to higher layer              |3.2.2.6 |x| | | | |
+    Reflect Record Route, Time Stamp options     |3.2.2.6 | |x| | | |
+    Reverse and reflect Source Route option      |3.2.2.6 |x| | | | |
+                                                 |        | | | | | |
+  ICMP Information Request or Reply:             |3.2.2.7 | | | |x| |
+  ICMP Timestamp and Timestamp Reply:            |3.2.2.8 | | |x| | |
+    Minimize delay variability                   |3.2.2.8 | |x| | | |1
+    Silently discard b'cast Timestamp            |3.2.2.8 | | |x| | |1
+    Silently discard m'cast Timestamp            |3.2.2.8 | | |x| | |1
+    Use specific-dest addr as TS Reply src       |3.2.2.8 |x| | | | |1
+    Reflect Record Route, Time Stamp options     |3.2.2.6 | |x| | | |1
+    Reverse and reflect Source Route option      |3.2.2.8 |x| | | | |1
+    Pass Timestamp Reply to higher layer         |3.2.2.8 |x| | | | |1
+    Obey rules for "standard value"              |3.2.2.8 |x| | | | |1
+                                                 |        | | | | | |
+  ICMP Address Mask Request and Reply:           |        | | | | | |
+    Addr Mask source configurable                |3.2.2.9 |x| | | | |
+    Support static configuration of addr mask    |3.2.2.9 |x| | | | |
+    Get addr mask dynamically during booting     |3.2.2.9 | | |x| | |
+    Get addr via ICMP Addr Mask Request/Reply    |3.2.2.9 | | |x| | |
+      Retransmit Addr Mask Req if no Reply       |3.2.2.9 |x| | | | |3
+      Assume default mask if no Reply            |3.2.2.9 | |x| | | |3
+      Update address mask from first Reply only  |3.2.2.9 |x| | | | |3
+    Reasonableness check on Addr Mask            |3.2.2.9 | |x| | | |
+    Send unauthorized Addr Mask Reply msgs       |3.2.2.9 | | | | |x|
+      Explicitly configured to be agent          |3.2.2.9 |x| | | | |
+    Static config=> Addr-Mask-Authoritative flag |3.2.2.9 | |x| | | |
+      Broadcast Addr Mask Reply when init.       |3.2.2.9 |x| | | | |3
+                                                 |        | | | | | |
+ROUTING OUTBOUND DATAGRAMS:                      |        | | | | | |
+  Use address mask in local/remote decision      |3.3.1.1 |x| | | | |
+  Operate with no gateways on conn network       |3.3.1.1 |x| | | | |
+  Maintain "route cache" of next-hop gateways    |3.3.1.2 |x| | | | |
+  Treat Host and Net Redirect the same           |3.3.1.2 | |x| | | |
+  If no cache entry, use default gateway         |3.3.1.2 |x| | | | |
+    Support multiple default gateways            |3.3.1.2 |x| | | | |
+  Provide table of static routes                 |3.3.1.2 | | |x| | |
+    Flag: route overridable by Redirects         |3.3.1.2 | | |x| | |
+  Key route cache on host, not net address       |3.3.1.3 | | |x| | |
+  Include TOS in route cache                     |3.3.1.3 | |x| | | |
+                                                 |        | | | | | |
+  Able to detect failure of next-hop gateway     |3.3.1.4 |x| | | | |
+  Assume route is good forever                   |3.3.1.4 | | | |x| |
+
+
+
+Internet Engineering Task Force                                [Page 74]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+  Ping gateways continuously                     |3.3.1.4 | | | | |x|
+  Ping only when traffic being sent              |3.3.1.4 |x| | | | |
+  Ping only when no positive indication          |3.3.1.4 |x| | | | |
+  Higher and lower layers give advice            |3.3.1.4 | |x| | | |
+  Switch from failed default g'way to another    |3.3.1.5 |x| | | | |
+  Manual method of entering config info          |3.3.1.6 |x| | | | |
+                                                 |        | | | | | |
+REASSEMBLY and FRAGMENTATION:                    |        | | | | | |
+  Able to reassemble incoming datagrams          |3.3.2   |x| | | | |
+    At least 576 byte datagrams                  |3.3.2   |x| | | | |
+    EMTU_R configurable or indefinite            |3.3.2   | |x| | | |
+  Transport layer able to learn MMS_R            |3.3.2   |x| | | | |
+  Send ICMP Time Exceeded on reassembly timeout  |3.3.2   |x| | | | |
+    Fixed reassembly timeout value               |3.3.2   | |x| | | |
+                                                 |        | | | | | |
+  Pass MMS_S to higher layers                    |3.3.3   |x| | | | |
+  Local fragmentation of outgoing packets        |3.3.3   | | |x| | |
+     Else don't send bigger than MMS_S           |3.3.3   |x| | | | |
+  Send max 576 to off-net destination            |3.3.3   | |x| | | |
+  All-Subnets-MTU configuration flag             |3.3.3   | | |x| | |
+                                                 |        | | | | | |
+MULTIHOMING:                                     |        | | | | | |
+  Reply with same addr as spec-dest addr         |3.3.4.2 | |x| | | |
+  Allow application to choose local IP addr      |3.3.4.2 |x| | | | |
+  Silently discard d'gram in "wrong" interface   |3.3.4.2 | | |x| | |
+  Only send d'gram through "right" interface     |3.3.4.2 | | |x| | |4
+                                                 |        | | | | | |
+SOURCE-ROUTE FORWARDING:                         |        | | | | | |
+  Forward datagram with Source Route option      |3.3.5   | | |x| | |1
+    Obey corresponding gateway rules             |3.3.5   |x| | | | |1
+      Update TTL by gateway rules                |3.3.5   |x| | | | |1
+      Able to generate ICMP err code 4, 5        |3.3.5   |x| | | | |1
+      IP src addr not local host                 |3.3.5   | | |x| | |1
+      Update Timestamp, Record Route options     |3.3.5   |x| | | | |1
+    Configurable switch for non-local SRing      |3.3.5   |x| | | | |1
+      Defaults to OFF                            |3.3.5   |x| | | | |1
+    Satisfy gwy access rules for non-local SRing |3.3.5   |x| | | | |1
+    If not forward, send Dest Unreach (cd 5)     |3.3.5   | |x| | | |2
+                                                 |        | | | | | |
+BROADCAST:                                       |        | | | | | |
+  Broadcast addr as IP source addr               |3.2.1.3 | | | | |x|
+  Receive 0 or -1 broadcast formats OK           |3.3.6   | |x| | | |
+  Config'ble option to send 0 or -1 b'cast       |3.3.6   | | |x| | |
+    Default to -1 broadcast                      |3.3.6   | |x| | | |
+  Recognize all broadcast address formats        |3.3.6   |x| | | | |
+  Use IP b'cast/m'cast addr in link-layer b'cast |3.3.6   |x| | | | |
+  Silently discard link-layer-only b'cast dg's   |3.3.6   | |x| | | |
+  Use Limited Broadcast addr for connected net   |3.3.6   | |x| | | |
+
+
+
+Internet Engineering Task Force                                [Page 75]
+
+
+
+
+RFC1122                      INTERNET LAYER                 October 1989
+
+
+                                                 |        | | | | | |
+MULTICAST:                                       |        | | | | | |
+  Support local IP multicasting (RFC-1112)       |3.3.7   | |x| | | |
+  Support IGMP (RFC-1112)                        |3.3.7   | | |x| | |
+  Join all-hosts group at startup                |3.3.7   | |x| | | |
+  Higher layers learn i'face m'cast capability   |3.3.7   | |x| | | |
+                                                 |        | | | | | |
+INTERFACE:                                       |        | | | | | |
+  Allow transport layer to use all IP mechanisms |3.4     |x| | | | |
+  Pass interface ident up to transport layer     |3.4     |x| | | | |
+  Pass all IP options up to transport layer      |3.4     |x| | | | |
+  Transport layer can send certain ICMP messages |3.4     |x| | | | |
+  Pass spec'd ICMP messages up to transp. layer  |3.4     |x| | | | |
+     Include IP hdr+8 octets or more from orig.  |3.4     |x| | | | |
+  Able to leap tall buildings at a single bound  |3.5     | |x| | | |
+
+Footnotes:
+
+(1)  Only if feature is implemented.
+
+(2)  This requirement is overruled if datagram is an ICMP error message.
+
+(3)  Only if feature is implemented and is configured "on".
+
+(4)  Unless has embedded gateway functionality or is source routed.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 76]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- UDP             October 1989
+
+
+4. TRANSPORT PROTOCOLS
+
+   4.1  USER DATAGRAM PROTOCOL -- UDP
+
+      4.1.1  INTRODUCTION
+
+         The User Datagram Protocol UDP [UDP:1] offers only a minimal
+         transport service -- non-guaranteed datagram delivery -- and
+         gives applications direct access to the datagram service of the
+         IP layer.  UDP is used by applications that do not require the
+         level of service of TCP or that wish to use communications
+         services (e.g., multicast or broadcast delivery) not available
+         from TCP.
+
+         UDP is almost a null protocol; the only services it provides
+         over IP are checksumming of data and multiplexing by port
+         number.  Therefore, an application program running over UDP
+         must deal directly with end-to-end communication problems that
+         a connection-oriented protocol would have handled -- e.g.,
+         retransmission for reliable delivery, packetization and
+         reassembly, flow control, congestion avoidance, etc., when
+         these are required.  The fairly complex coupling between IP and
+         TCP will be mirrored in the coupling between UDP and many
+         applications using UDP.
+
+      4.1.2  PROTOCOL WALK-THROUGH
+
+         There are no known errors in the specification of UDP.
+
+      4.1.3  SPECIFIC ISSUES
+
+         4.1.3.1  Ports
+
+            UDP well-known ports follow the same rules as TCP well-known
+            ports; see Section 4.2.2.1 below.
+
+            If a datagram arrives addressed to a UDP port for which
+            there is no pending LISTEN call, UDP SHOULD send an ICMP
+            Port Unreachable message.
+
+         4.1.3.2  IP Options
+
+            UDP MUST pass any IP option that it receives from the IP
+            layer transparently to the application layer.
+
+            An application MUST be able to specify IP options to be sent
+            in its UDP datagrams, and UDP MUST pass these options to the
+            IP layer.
+
+
+
+Internet Engineering Task Force                                [Page 77]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- UDP             October 1989
+
+
+            DISCUSSION:
+                 At present, the only options that need be passed
+                 through UDP are Source Route, Record Route, and Time
+                 Stamp.  However, new options may be defined in the
+                 future, and UDP need not and should not make any
+                 assumptions about the format or content of options it
+                 passes to or from the application; an exception to this
+                 might be an IP-layer security option.
+
+                 An application based on UDP will need to obtain a
+                 source route from a request datagram and supply a
+                 reversed route for sending the corresponding reply.
+
+         4.1.3.3  ICMP Messages
+
+            UDP MUST pass to the application layer all ICMP error
+            messages that it receives from the IP layer.  Conceptually
+            at least, this may be accomplished with an upcall to the
+            ERROR_REPORT routine (see Section 4.2.4.1).
+
+            DISCUSSION:
+                 Note that ICMP error messages resulting from sending a
+                 UDP datagram are received asynchronously.  A UDP-based
+                 application that wants to receive ICMP error messages
+                 is responsible for maintaining the state necessary to
+                 demultiplex these messages when they arrive; for
+                 example, the application may keep a pending receive
+                 operation for this purpose.  The application is also
+                 responsible to avoid confusion from a delayed ICMP
+                 error message resulting from an earlier use of the same
+                 port(s).
+
+         4.1.3.4  UDP Checksums
+
+            A host MUST implement the facility to generate and validate
+            UDP checksums.  An application MAY optionally be able to
+            control whether a UDP checksum will be generated, but it
+            MUST default to checksumming on.
+
+            If a UDP datagram is received with a checksum that is non-
+            zero and invalid, UDP MUST silently discard the datagram.
+            An application MAY optionally be able to control whether UDP
+            datagrams without checksums should be discarded or passed to
+            the application.
+
+            DISCUSSION:
+                 Some applications that normally run only across local
+                 area networks have chosen to turn off UDP checksums for
+
+
+
+Internet Engineering Task Force                                [Page 78]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- UDP             October 1989
+
+
+                 efficiency.  As a result, numerous cases of undetected
+                 errors have been reported.  The advisability of ever
+                 turning off UDP checksumming is very controversial.
+
+            IMPLEMENTATION:
+                 There is a common implementation error in UDP
+                 checksums.  Unlike the TCP checksum, the UDP checksum
+                 is optional; the value zero is transmitted in the
+                 checksum field of a UDP header to indicate the absence
+                 of a checksum.  If the transmitter really calculates a
+                 UDP checksum of zero, it must transmit the checksum as
+                 all 1's (65535).  No special action is required at the
+                 receiver, since zero and 65535 are equivalent in 1's
+                 complement arithmetic.
+
+         4.1.3.5  UDP Multihoming
+
+            When a UDP datagram is received, its specific-destination
+            address MUST be passed up to the application layer.
+
+            An application program MUST be able to specify the IP source
+            address to be used for sending a UDP datagram or to leave it
+            unspecified (in which case the networking software will
+            choose an appropriate source address).  There SHOULD be a
+            way to communicate the chosen source address up to the
+            application layer (e.g, so that the application can later
+            receive a reply datagram only from the corresponding
+            interface).
+
+            DISCUSSION:
+                 A request/response application that uses UDP should use
+                 a source address for the response that is the same as
+                 the specific destination address of the request.  See
+                 the "General Issues" section of [INTRO:1].
+
+         4.1.3.6  Invalid Addresses
+
+            A UDP datagram received with an invalid IP source address
+            (e.g., a broadcast or multicast address) must be discarded
+            by UDP or by the IP layer (see Section 3.2.1.3).
+
+            When a host sends a UDP datagram, the source address MUST be
+            (one of) the IP address(es) of the host.
+
+      4.1.4  UDP/APPLICATION LAYER INTERFACE
+
+         The application interface to UDP MUST provide the full services
+         of the IP/transport interface described in Section 3.4 of this
+
+
+
+Internet Engineering Task Force                                [Page 79]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- UDP             October 1989
+
+
+         document.  Thus, an application using UDP needs the functions
+         of the GET_SRCADDR(), GET_MAXSIZES(), ADVISE_DELIVPROB(), and
+         RECV_ICMP() calls described in Section 3.4.  For example,
+         GET_MAXSIZES() can be used to learn the effective maximum UDP
+         maximum datagram size for a particular {interface,remote
+         host,TOS} triplet.
+
+         An application-layer program MUST be able to set the TTL and
+         TOS values as well as IP options for sending a UDP datagram,
+         and these values must be passed transparently to the IP layer.
+         UDP MAY pass the received TOS up to the application layer.
+
+      4.1.5  UDP REQUIREMENTS SUMMARY
+
+
+                                                 |        | | | |S| |
+                                                 |        | | | |H| |F
+                                                 |        | | | |O|M|o
+                                                 |        | |S| |U|U|o
+                                                 |        | |H| |L|S|t
+                                                 |        |M|O| |D|T|n
+                                                 |        |U|U|M| | |o
+                                                 |        |S|L|A|N|N|t
+                                                 |        |T|D|Y|O|O|t
+FEATURE                                          |SECTION | | | |T|T|e
+-------------------------------------------------|--------|-|-|-|-|-|--
+                                                 |        | | | | | |
+    UDP                                          |        | | | | | |
+-------------------------------------------------|--------|-|-|-|-|-|--
+                                                 |        | | | | | |
+UDP send Port Unreachable                        |4.1.3.1 | |x| | | |
+                                                 |        | | | | | |
+IP Options in UDP                                |        | | | | | |
+ - Pass rcv'd IP options to applic layer         |4.1.3.2 |x| | | | |
+ - Applic layer can specify IP options in Send   |4.1.3.2 |x| | | | |
+ - UDP passes IP options down to IP layer        |4.1.3.2 |x| | | | |
+                                                 |        | | | | | |
+Pass ICMP msgs up to applic layer                |4.1.3.3 |x| | | | |
+                                                 |        | | | | | |
+UDP checksums:                                   |        | | | | | |
+ - Able to generate/check checksum               |4.1.3.4 |x| | | | |
+ - Silently discard bad checksum                 |4.1.3.4 |x| | | | |
+ - Sender Option to not generate checksum        |4.1.3.4 | | |x| | |
+   - Default is to checksum                      |4.1.3.4 |x| | | | |
+ - Receiver Option to require checksum           |4.1.3.4 | | |x| | |
+                                                 |        | | | | | |
+UDP Multihoming                                  |        | | | | | |
+ - Pass spec-dest addr to application            |4.1.3.5 |x| | | | |
+
+
+
+Internet Engineering Task Force                                [Page 80]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- UDP             October 1989
+
+
+ - Applic layer can specify Local IP addr        |4.1.3.5 |x| | | | |
+ - Applic layer specify wild Local IP addr       |4.1.3.5 |x| | | | |
+ - Applic layer notified of Local IP addr used   |4.1.3.5 | |x| | | |
+                                                 |        | | | | | |
+Bad IP src addr silently discarded by UDP/IP     |4.1.3.6 |x| | | | |
+Only send valid IP source address                |4.1.3.6 |x| | | | |
+UDP Application Interface Services               |        | | | | | |
+Full IP interface of 3.4 for application         |4.1.4   |x| | | | |
+ - Able to spec TTL, TOS, IP opts when send dg   |4.1.4   |x| | | | |
+ - Pass received TOS up to applic layer          |4.1.4   | | |x| | |
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 81]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+   4.2  TRANSMISSION CONTROL PROTOCOL -- TCP
+
+      4.2.1  INTRODUCTION
+
+         The Transmission Control Protocol TCP [TCP:1] is the primary
+         virtual-circuit transport protocol for the Internet suite.  TCP
+         provides reliable, in-sequence delivery of a full-duplex stream
+         of octets (8-bit bytes).  TCP is used by those applications
+         needing reliable, connection-oriented transport service, e.g.,
+         mail (SMTP), file transfer (FTP), and virtual terminal service
+         (Telnet); requirements for these application-layer protocols
+         are described in [INTRO:1].
+
+      4.2.2  PROTOCOL WALK-THROUGH
+
+         4.2.2.1  Well-Known Ports: RFC-793 Section 2.7
+
+            DISCUSSION:
+                 TCP reserves port numbers in the range 0-255 for
+                 "well-known" ports, used to access services that are
+                 standardized across the Internet.  The remainder of the
+                 port space can be freely allocated to application
+                 processes.  Current well-known port definitions are
+                 listed in the RFC entitled "Assigned Numbers"
+                 [INTRO:6].  A prerequisite for defining a new well-
+                 known port is an RFC documenting the proposed service
+                 in enough detail to allow new implementations.
+
+                 Some systems extend this notion by adding a third
+                 subdivision of the TCP port space: reserved ports,
+                 which are generally used for operating-system-specific
+                 services.  For example, reserved ports might fall
+                 between 256 and some system-dependent upper limit.
+                 Some systems further choose to protect well-known and
+                 reserved ports by permitting only privileged users to
+                 open TCP connections with those port values.  This is
+                 perfectly reasonable as long as the host does not
+                 assume that all hosts protect their low-numbered ports
+                 in this manner.
+
+         4.2.2.2  Use of Push: RFC-793 Section 2.8
+
+            When an application issues a series of SEND calls without
+            setting the PUSH flag, the TCP MAY aggregate the data
+            internally without sending it.  Similarly, when a series of
+            segments is received without the PSH bit, a TCP MAY queue
+            the data internally without passing it to the receiving
+            application.
+
+
+
+Internet Engineering Task Force                                [Page 82]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+            The PSH bit is not a record marker and is independent of
+            segment boundaries.  The transmitter SHOULD collapse
+            successive PSH bits when it packetizes data, to send the
+            largest possible segment.
+
+            A TCP MAY implement PUSH flags on SEND calls.  If PUSH flags
+            are not implemented, then the sending TCP: (1) must not
+            buffer data indefinitely, and (2) MUST set the PSH bit in
+            the last buffered segment (i.e., when there is no more
+            queued data to be sent).
+
+            The discussion in RFC-793 on pages 48, 50, and 74
+            erroneously implies that a received PSH flag must be passed
+            to the application layer.  Passing a received PSH flag to
+            the application layer is now OPTIONAL.
+
+            An application program is logically required to set the PUSH
+            flag in a SEND call whenever it needs to force delivery of
+            the data to avoid a communication deadlock.  However, a TCP
+            SHOULD send a maximum-sized segment whenever possible, to
+            improve performance (see Section 4.2.3.4).
+
+            DISCUSSION:
+                 When the PUSH flag is not implemented on SEND calls,
+                 i.e., when the application/TCP interface uses a pure
+                 streaming model, responsibility for aggregating any
+                 tiny data fragments to form reasonable sized segments
+                 is partially borne by the application layer.
+
+                 Generally, an interactive application protocol must set
+                 the PUSH flag at least in the last SEND call in each
+                 command or response sequence.  A bulk transfer protocol
+                 like FTP should set the PUSH flag on the last segment
+                 of a file or when necessary to prevent buffer deadlock.
+
+                 At the receiver, the PSH bit forces buffered data to be
+                 delivered to the application (even if less than a full
+                 buffer has been received). Conversely, the lack of a
+                 PSH bit can be used to avoid unnecessary wakeup calls
+                 to the application process; this can be an important
+                 performance optimization for large timesharing hosts.
+                 Passing the PSH bit to the receiving application allows
+                 an analogous optimization within the application.
+
+         4.2.2.3  Window Size: RFC-793 Section 3.1
+
+            The window size MUST be treated as an unsigned number, or
+            else large window sizes will appear like negative windows
+
+
+
+Internet Engineering Task Force                                [Page 83]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+            and TCP will not work.  It is RECOMMENDED that
+            implementations reserve 32-bit fields for the send and
+            receive window sizes in the connection record and do all
+            window computations with 32 bits.
+
+            DISCUSSION:
+                 It is known that the window field in the TCP header is
+                 too small for high-speed, long-delay paths.
+                 Experimental TCP options have been defined to extend
+                 the window size; see for example [TCP:11].  In
+                 anticipation of the adoption of such an extension, TCP
+                 implementors should treat windows as 32 bits.
+
+         4.2.2.4  Urgent Pointer: RFC-793 Section 3.1
+
+            The second sentence is in error: the urgent pointer points
+            to the sequence number of the LAST octet (not LAST+1) in a
+            sequence of urgent data.  The description on page 56 (last
+            sentence) is correct.
+
+            A TCP MUST support a sequence of urgent data of any length.
+
+            A TCP MUST inform the application layer asynchronously
+            whenever it receives an Urgent pointer and there was
+            previously no pending urgent data, or whenever the Urgent
+            pointer advances in the data stream.  There MUST be a way
+            for the application to learn how much urgent data remains to
+            be read from the connection, or at least to determine
+            whether or not more urgent data remains to be read.
+
+            DISCUSSION:
+                 Although the Urgent mechanism may be used for any
+                 application, it is normally used to send "interrupt"-
+                 type commands to a Telnet program (see "Using Telnet
+                 Synch Sequence" section in [INTRO:1]).
+
+                 The asynchronous or "out-of-band" notification will
+                 allow the application to go into "urgent mode", reading
+                 data from the TCP connection.  This allows control
+                 commands to be sent to an application whose normal
+                 input buffers are full of unprocessed data.
+
+            IMPLEMENTATION:
+                 The generic ERROR-REPORT() upcall described in Section
+                 4.2.4.1 is a possible mechanism for informing the
+                 application of the arrival of urgent data.
+
+
+
+
+
+Internet Engineering Task Force                                [Page 84]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+         4.2.2.5  TCP Options: RFC-793 Section 3.1
+
+            A TCP MUST be able to receive a TCP option in any segment.
+            A TCP MUST ignore without error any TCP option it does not
+            implement, assuming that the option has a length field (all
+            TCP options defined in the future will have length fields).
+            TCP MUST be prepared to handle an illegal option length
+            (e.g., zero) without crashing; a suggested procedure is to
+            reset the connection and log the reason.
+
+         4.2.2.6  Maximum Segment Size Option: RFC-793 Section 3.1
+
+            TCP MUST implement both sending and receiving the Maximum
+            Segment Size option [TCP:4].
+
+            TCP SHOULD send an MSS (Maximum Segment Size) option in
+            every SYN segment when its receive MSS differs from the
+            default 536, and MAY send it always.
+
+            If an MSS option is not received at connection setup, TCP
+            MUST assume a default send MSS of 536 (576-40) [TCP:4].
+
+            The maximum size of a segment that TCP really sends, the
+            "effective send MSS," MUST be the smaller of the send MSS
+            (which reflects the available reassembly buffer size at the
+            remote host) and the largest size permitted by the IP layer:
+
+               Eff.snd.MSS =
+
+                  min(SendMSS+20, MMS_S) - TCPhdrsize - IPoptionsize
+
+            where:
+
+            *    SendMSS is the MSS value received from the remote host,
+                 or the default 536 if no MSS option is received.
+
+            *    MMS_S is the maximum size for a transport-layer message
+                 that TCP may send.
+
+            *    TCPhdrsize is the size of the TCP header; this is
+                 normally 20, but may be larger if TCP options are to be
+                 sent.
+
+            *    IPoptionsize is the size of any IP options that TCP
+                 will pass to the IP layer with the current message.
+
+
+            The MSS value to be sent in an MSS option must be less than
+
+
+
+Internet Engineering Task Force                                [Page 85]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+            or equal to:
+
+               MMS_R - 20
+
+            where MMS_R is the maximum size for a transport-layer
+            message that can be received (and reassembled).  TCP obtains
+            MMS_R and MMS_S from the IP layer; see the generic call
+            GET_MAXSIZES in Section 3.4.
+
+            DISCUSSION:
+                 The choice of TCP segment size has a strong effect on
+                 performance.  Larger segments increase throughput by
+                 amortizing header size and per-datagram processing
+                 overhead over more data bytes; however, if the packet
+                 is so large that it causes IP fragmentation, efficiency
+                 drops sharply if any fragments are lost [IP:9].
+
+                 Some TCP implementations send an MSS option only if the
+                 destination host is on a non-connected network.
+                 However, in general the TCP layer may not have the
+                 appropriate information to make this decision, so it is
+                 preferable to leave to the IP layer the task of
+                 determining a suitable MTU for the Internet path.  We
+                 therefore recommend that TCP always send the option (if
+                 not 536) and that the IP layer determine MMS_R as
+                 specified in 3.3.3 and 3.4.  A proposed IP-layer
+                 mechanism to measure the MTU would then modify the IP
+                 layer without changing TCP.
+
+         4.2.2.7  TCP Checksum: RFC-793 Section 3.1
+
+            Unlike the UDP checksum (see Section 4.1.3.4), the TCP
+            checksum is never optional.  The sender MUST generate it and
+            the receiver MUST check it.
+
+         4.2.2.8  TCP Connection State Diagram: RFC-793 Section 3.2,
+            page 23
+
+            There are several problems with this diagram:
+
+            (a)  The arrow from SYN-SENT to SYN-RCVD should be labeled
+                 with "snd SYN,ACK", to agree with the text on page 68
+                 and with Figure 8.
+
+            (b)  There could be an arrow from SYN-RCVD state to LISTEN
+                 state, conditioned on receiving a RST after a passive
+                 open (see text page 70).
+
+
+
+
+Internet Engineering Task Force                                [Page 86]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+            (c)  It is possible to go directly from FIN-WAIT-1 to the
+                 TIME-WAIT state (see page 75 of the spec).
+
+
+         4.2.2.9  Initial Sequence Number Selection: RFC-793 Section
+            3.3, page 27
+
+            A TCP MUST use the specified clock-driven selection of
+            initial sequence numbers.
+
+         4.2.2.10  Simultaneous Open Attempts: RFC-793 Section 3.4, page
+            32
+
+            There is an error in Figure 8: the packet on line 7 should
+            be identical to the packet on line 5.
+
+            A TCP MUST support simultaneous open attempts.
+
+            DISCUSSION:
+                 It sometimes surprises implementors that if two
+                 applications attempt to simultaneously connect to each
+                 other, only one connection is generated instead of two.
+                 This was an intentional design decision; don't try to
+                 "fix" it.
+
+         4.2.2.11  Recovery from Old Duplicate SYN: RFC-793 Section 3.4,
+            page 33
+
+            Note that a TCP implementation MUST keep track of whether a
+            connection has reached SYN_RCVD state as the result of a
+            passive OPEN or an active OPEN.
+
+         4.2.2.12  RST Segment: RFC-793 Section 3.4
+
+            A TCP SHOULD allow a received RST segment to include data.
+
+            DISCUSSION
+                 It has been suggested that a RST segment could contain
+                 ASCII text that encoded and explained the cause of the
+                 RST.  No standard has yet been established for such
+                 data.
+
+         4.2.2.13  Closing a Connection: RFC-793 Section 3.5
+
+            A TCP connection may terminate in two ways: (1) the normal
+            TCP close sequence using a FIN handshake, and (2) an "abort"
+            in which one or more RST segments are sent and the
+            connection state is immediately discarded.  If a TCP
+
+
+
+Internet Engineering Task Force                                [Page 87]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+            connection is closed by the remote site, the local
+            application MUST be informed whether it closed normally or
+            was aborted.
+
+            The normal TCP close sequence delivers buffered data
+            reliably in both directions.  Since the two directions of a
+            TCP connection are closed independently, it is possible for
+            a connection to be "half closed," i.e., closed in only one
+            direction, and a host is permitted to continue sending data
+            in the open direction on a half-closed connection.
+
+            A host MAY implement a "half-duplex" TCP close sequence, so
+            that an application that has called CLOSE cannot continue to
+            read data from the connection.  If such a host issues a
+            CLOSE call while received data is still pending in TCP, or
+            if new data is received after CLOSE is called, its TCP
+            SHOULD send a RST to show that data was lost.
+
+            When a connection is closed actively, it MUST linger in
+            TIME-WAIT state for a time 2xMSL (Maximum Segment Lifetime).
+            However, it MAY accept a new SYN from the remote TCP to
+            reopen the connection directly from TIME-WAIT state, if it:
+
+            (1)  assigns its initial sequence number for the new
+                 connection to be larger than the largest sequence
+                 number it used on the previous connection incarnation,
+                 and
+
+            (2)  returns to TIME-WAIT state if the SYN turns out to be
+                 an old duplicate.
+
+
+            DISCUSSION:
+                 TCP's full-duplex data-preserving close is a feature
+                 that is not included in the analogous ISO transport
+                 protocol TP4.
+
+                 Some systems have not implemented half-closed
+                 connections, presumably because they do not fit into
+                 the I/O model of their particular operating system.  On
+                 these systems, once an application has called CLOSE, it
+                 can no longer read input data from the connection; this
+                 is referred to as a "half-duplex" TCP close sequence.
+
+                 The graceful close algorithm of TCP requires that the
+                 connection state remain defined on (at least)  one end
+                 of the connection, for a timeout period of 2xMSL, i.e.,
+                 4 minutes.  During this period, the (remote socket,
+
+
+
+Internet Engineering Task Force                                [Page 88]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+                 local socket) pair that defines the connection is busy
+                 and cannot be reused.  To shorten the time that a given
+                 port pair is tied up, some TCPs allow a new SYN to be
+                 accepted in TIME-WAIT state.
+
+         4.2.2.14  Data Communication: RFC-793 Section 3.7, page 40
+
+            Since RFC-793 was written, there has been extensive work on
+            TCP algorithms to achieve efficient data communication.
+            Later sections of the present document describe required and
+            recommended TCP algorithms to determine when to send data
+            (Section 4.2.3.4), when to send an acknowledgment (Section
+            4.2.3.2), and when to update the window (Section 4.2.3.3).
+
+            DISCUSSION:
+                 One important performance issue is "Silly Window
+                 Syndrome" or "SWS" [TCP:5], a stable pattern of small
+                 incremental window movements resulting in extremely
+                 poor TCP performance.  Algorithms to avoid SWS are
+                 described below for both the sending side (Section
+                 4.2.3.4) and the receiving side (Section 4.2.3.3).
+
+                 In brief, SWS is caused by the receiver advancing the
+                 right window edge whenever it has any new buffer space
+                 available to receive data and by the sender using any
+                 incremental window, no matter how small, to send more
+                 data [TCP:5].  The result can be a stable pattern of
+                 sending tiny data segments, even though both sender and
+                 receiver have a large total buffer space for the
+                 connection.  SWS can only occur during the transmission
+                 of a large amount of data; if the connection goes
+                 quiescent, the problem will disappear.  It is caused by
+                 typical straightforward implementation of window
+                 management, but the sender and receiver algorithms
+                 given below will avoid it.
+
+                 Another important TCP performance issue is that some
+                 applications, especially remote login to character-at-
+                 a-time hosts, tend to send streams of one-octet data
+                 segments.  To avoid deadlocks, every TCP SEND call from
+                 such applications must be "pushed", either explicitly
+                 by the application or else implicitly by TCP.  The
+                 result may be a stream of TCP segments that contain one
+                 data octet each, which makes very inefficient use of
+                 the Internet and contributes to Internet congestion.
+                 The Nagle Algorithm described in Section 4.2.3.4
+                 provides a simple and effective solution to this
+                 problem.  It does have the effect of clumping
+
+
+
+Internet Engineering Task Force                                [Page 89]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+                 characters over Telnet connections; this may initially
+                 surprise users accustomed to single-character echo, but
+                 user acceptance has not been a problem.
+
+                 Note that the Nagle algorithm and the send SWS
+                 avoidance algorithm play complementary roles in
+                 improving performance.  The Nagle algorithm discourages
+                 sending tiny segments when the data to be sent
+                 increases in small increments, while the SWS avoidance
+                 algorithm discourages small segments resulting from the
+                 right window edge advancing in small increments.
+
+                 A careless implementation can send two or more
+                 acknowledgment segments per data segment received.  For
+                 example, suppose the receiver acknowledges every data
+                 segment immediately.  When the application program
+                 subsequently consumes the data and increases the
+                 available receive buffer space again, the receiver may
+                 send a second acknowledgment segment to update the
+                 window at the sender.  The extreme case occurs with
+                 single-character segments on TCP connections using the
+                 Telnet protocol for remote login service.  Some
+                 implementations have been observed in which each
+                 incoming 1-character segment generates three return
+                 segments: (1) the acknowledgment, (2) a one byte
+                 increase in the window, and (3) the echoed character,
+                 respectively.
+
+         4.2.2.15  Retransmission Timeout: RFC-793 Section 3.7, page 41
+
+            The algorithm suggested in RFC-793 for calculating the
+            retransmission timeout is now known to be inadequate; see
+            Section 4.2.3.1 below.
+
+            Recent work by Jacobson [TCP:7] on Internet congestion and
+            TCP retransmission stability has produced a transmission
+            algorithm combining "slow start" with "congestion
+            avoidance".  A TCP MUST implement this algorithm.
+
+            If a retransmitted packet is identical to the original
+            packet (which implies not only that the data boundaries have
+            not changed, but also that the window and acknowledgment
+            fields of the header have not changed), then the same IP
+            Identification field MAY be used (see Section 3.2.1.5).
+
+            IMPLEMENTATION:
+                 Some TCP implementors have chosen to "packetize" the
+                 data stream, i.e., to pick segment boundaries when
+
+
+
+Internet Engineering Task Force                                [Page 90]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+                 segments are originally sent and to queue these
+                 segments in a "retransmission queue" until they are
+                 acknowledged.  Another design (which may be simpler) is
+                 to defer packetizing until each time data is
+                 transmitted or retransmitted, so there will be no
+                 segment retransmission queue.
+
+                 In an implementation with a segment retransmission
+                 queue, TCP performance may be enhanced by repacketizing
+                 the segments awaiting acknowledgment when the first
+                 retransmission timeout occurs.  That is, the
+                 outstanding segments that fitted would be combined into
+                 one maximum-sized segment, with a new IP Identification
+                 value.  The TCP would then retain this combined segment
+                 in the retransmit queue until it was acknowledged.
+                 However, if the first two segments in the
+                 retransmission queue totalled more than one maximum-
+                 sized segment, the TCP would retransmit only the first
+                 segment using the original IP Identification field.
+
+         4.2.2.16  Managing the Window: RFC-793 Section 3.7, page 41
+
+            A TCP receiver SHOULD NOT shrink the window, i.e., move the
+            right window edge to the left.  However, a sending TCP MUST
+            be robust against window shrinking, which may cause the
+            "useable window" (see Section 4.2.3.4) to become negative.
+
+            If this happens, the sender SHOULD NOT send new data, but
+            SHOULD retransmit normally the old unacknowledged data
+            between SND.UNA and SND.UNA+SND.WND.  The sender MAY also
+            retransmit old data beyond SND.UNA+SND.WND, but SHOULD NOT
+            time out the connection if data beyond the right window edge
+            is not acknowledged.  If the window shrinks to zero, the TCP
+            MUST probe it in the standard way (see next Section).
+
+            DISCUSSION:
+                 Many TCP implementations become confused if the window
+                 shrinks from the right after data has been sent into a
+                 larger window.  Note that TCP has a heuristic to select
+                 the latest window update despite possible datagram
+                 reordering; as a result, it may ignore a window update
+                 with a smaller window than previously offered if
+                 neither the sequence number nor the acknowledgment
+                 number is increased.
+
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 91]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+         4.2.2.17  Probing Zero Windows: RFC-793 Section 3.7, page 42
+
+            Probing of zero (offered) windows MUST be supported.
+
+            A TCP MAY keep its offered receive window closed
+            indefinitely.  As long as the receiving TCP continues to
+            send acknowledgments in response to the probe segments, the
+            sending TCP MUST allow the connection to stay open.
+
+            DISCUSSION:
+                 It is extremely important to remember that ACK
+                 (acknowledgment) segments that contain no data are not
+                 reliably transmitted by TCP.  If zero window probing is
+                 not supported, a connection may hang forever when an
+                 ACK segment that re-opens the window is lost.
+
+                 The delay in opening a zero window generally occurs
+                 when the receiving application stops taking data from
+                 its TCP.  For example, consider a printer daemon
+                 application, stopped because the printer ran out of
+                 paper.
+
+            The transmitting host SHOULD send the first zero-window
+            probe when a zero window has existed for the retransmission
+            timeout period (see Section 4.2.2.15), and SHOULD increase
+            exponentially the interval between successive probes.
+
+            DISCUSSION:
+                 This procedure minimizes delay if the zero-window
+                 condition is due to a lost ACK segment containing a
+                 window-opening update.  Exponential backoff is
+                 recommended, possibly with some maximum interval not
+                 specified here.  This procedure is similar to that of
+                 the retransmission algorithm, and it may be possible to
+                 combine the two procedures in the implementation.
+
+         4.2.2.18  Passive OPEN Calls:  RFC-793 Section 3.8
+
+            Every passive OPEN call either creates a new connection
+            record in LISTEN state, or it returns an error; it MUST NOT
+            affect any previously created connection record.
+
+            A TCP that supports multiple concurrent users MUST provide
+            an OPEN call that will functionally allow an application to
+            LISTEN on a port while a connection block with the same
+            local port is in SYN-SENT or SYN-RECEIVED state.
+
+            DISCUSSION:
+
+
+
+Internet Engineering Task Force                                [Page 92]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+                 Some applications (e.g., SMTP servers) may need to
+                 handle multiple connection attempts at about the same
+                 time.  The probability of a connection attempt failing
+                 is reduced by giving the application some means of
+                 listening for a new connection at the same time that an
+                 earlier connection attempt is going through the three-
+                 way handshake.
+
+            IMPLEMENTATION:
+                 Acceptable implementations of concurrent opens may
+                 permit multiple passive OPEN calls, or they may allow
+                 "cloning" of LISTEN-state connections from a single
+                 passive OPEN call.
+
+         4.2.2.19  Time to Live: RFC-793 Section 3.9, page 52
+
+            RFC-793 specified that TCP was to request the IP layer to
+            send TCP segments with TTL = 60.  This is obsolete; the TTL
+            value used to send TCP segments MUST be configurable.  See
+            Section 3.2.1.7 for discussion.
+
+         4.2.2.20  Event Processing: RFC-793 Section 3.9
+
+            While it is not strictly required, a TCP SHOULD be capable
+            of queueing out-of-order TCP segments.  Change the "may" in
+            the last sentence of the first paragraph on page 70 to
+            "should".
+
+            DISCUSSION:
+                 Some small-host implementations have omitted segment
+                 queueing because of limited buffer space.  This
+                 omission may be expected to adversely affect TCP
+                 throughput, since loss of a single segment causes all
+                 later segments to appear to be "out of sequence".
+
+            In general, the processing of received segments MUST be
+            implemented to aggregate ACK segments whenever possible.
+            For example, if the TCP is processing a series of queued
+            segments, it MUST process them all before sending any ACK
+            segments.
+
+            Here are some detailed error corrections and notes on the
+            Event Processing section of RFC-793.
+
+            (a)  CLOSE Call, CLOSE-WAIT state, p. 61: enter LAST-ACK
+                 state, not CLOSING.
+
+            (b)  LISTEN state, check for SYN (pp. 65, 66): With a SYN
+
+
+
+Internet Engineering Task Force                                [Page 93]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+                 bit, if the security/compartment or the precedence is
+                 wrong for the segment, a reset is sent.  The wrong form
+                 of reset is shown in the text; it should be:
+
+                   <SEQ=0><ACK=SEG.SEQ+SEG.LEN><CTL=RST,ACK>
+
+
+            (c)  SYN-SENT state, Check for SYN, p. 68: When the
+                 connection enters ESTABLISHED state, the following
+                 variables must be set:
+                    SND.WND <- SEG.WND
+                    SND.WL1 <- SEG.SEQ
+                    SND.WL2 <- SEG.ACK
+
+
+            (d)  Check security and precedence, p. 71: The first heading
+                 "ESTABLISHED STATE" should really be a list of all
+                 states other than SYN-RECEIVED: ESTABLISHED, FIN-WAIT-
+                 1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, and
+                 TIME-WAIT.
+
+            (e)  Check SYN bit, p. 71:  "In SYN-RECEIVED state and if
+                 the connection was initiated with a passive OPEN, then
+                 return this connection to the LISTEN state and return.
+                 Otherwise...".
+
+            (f)  Check ACK field, SYN-RECEIVED state, p. 72: When the
+                 connection enters ESTABLISHED state, the variables
+                 listed in (c) must be set.
+
+            (g)  Check ACK field, ESTABLISHED state, p. 72: The ACK is a
+                 duplicate if SEG.ACK =< SND.UNA (the = was omitted).
+                 Similarly, the window should be updated if: SND.UNA =<
+                 SEG.ACK =< SND.NXT.
+
+            (h)  USER TIMEOUT, p. 77:
+
+                 It would be better to notify the application of the
+                 timeout rather than letting TCP force the connection
+                 closed.  However, see also Section 4.2.3.5.
+
+
+         4.2.2.21  Acknowledging Queued Segments: RFC-793 Section 3.9
+
+            A TCP MAY send an ACK segment acknowledging RCV.NXT when a
+            valid segment arrives that is in the window but not at the
+            left window edge.
+
+
+
+
+Internet Engineering Task Force                                [Page 94]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+            DISCUSSION:
+                 RFC-793 (see page 74) was ambiguous about whether or
+                 not an ACK segment should be sent when an out-of-order
+                 segment was received, i.e., when SEG.SEQ was unequal to
+                 RCV.NXT.
+
+                 One reason for ACKing out-of-order segments might be to
+                 support an experimental algorithm known as "fast
+                 retransmit".   With this algorithm, the sender uses the
+                 "redundant" ACK's to deduce that a segment has been
+                 lost before the retransmission timer has expired.  It
+                 counts the number of times an ACK has been received
+                 with the same value of SEG.ACK and with the same right
+                 window edge.  If more than a threshold number of such
+                 ACK's is received, then the segment containing the
+                 octets starting at SEG.ACK is assumed to have been lost
+                 and is retransmitted, without awaiting a timeout.  The
+                 threshold is chosen to compensate for the maximum
+                 likely segment reordering in the Internet.  There is
+                 not yet enough experience with the fast retransmit
+                 algorithm to determine how useful it is.
+
+      4.2.3  SPECIFIC ISSUES
+
+         4.2.3.1  Retransmission Timeout Calculation
+
+            A host TCP MUST implement Karn's algorithm and Jacobson's
+            algorithm for computing the retransmission timeout ("RTO").
+
+            o    Jacobson's algorithm for computing the smoothed round-
+                 trip ("RTT") time incorporates a simple measure of the
+                 variance [TCP:7].
+
+            o    Karn's algorithm for selecting RTT measurements ensures
+                 that ambiguous round-trip times will not corrupt the
+                 calculation of the smoothed round-trip time [TCP:6].
+
+            This implementation also MUST include "exponential backoff"
+            for successive RTO values for the same segment.
+            Retransmission of SYN segments SHOULD use the same algorithm
+            as data segments.
+
+            DISCUSSION:
+                 There were two known problems with the RTO calculations
+                 specified in RFC-793.  First, the accurate measurement
+                 of RTTs is difficult when there are retransmissions.
+                 Second, the algorithm to compute the smoothed round-
+                 trip time is inadequate [TCP:7], because it incorrectly
+
+
+
+Internet Engineering Task Force                                [Page 95]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+                 assumed that the variance in RTT values would be small
+                 and constant.  These problems were solved by Karn's and
+                 Jacobson's algorithm, respectively.
+
+                 The performance increase resulting from the use of
+                 these improvements varies from noticeable to dramatic.
+                 Jacobson's algorithm for incorporating the measured RTT
+                 variance is especially important on a low-speed link,
+                 where the natural variation of packet sizes causes a
+                 large variation in RTT.  One vendor found link
+                 utilization on a 9.6kb line went from 10% to 90% as a
+                 result of implementing Jacobson's variance algorithm in
+                 TCP.
+
+            The following values SHOULD be used to initialize the
+            estimation parameters for a new connection:
+
+            (a)  RTT = 0 seconds.
+
+            (b)  RTO = 3 seconds.  (The smoothed variance is to be
+                 initialized to the value that will result in this RTO).
+
+            The recommended upper and lower bounds on the RTO are known
+            to be inadequate on large internets.  The lower bound SHOULD
+            be measured in fractions of a second (to accommodate high
+            speed LANs) and the upper bound should be 2*MSL, i.e., 240
+            seconds.
+
+            DISCUSSION:
+                 Experience has shown that these initialization values
+                 are reasonable, and that in any case the Karn and
+                 Jacobson algorithms make TCP behavior reasonably
+                 insensitive to the initial parameter choices.
+
+         4.2.3.2  When to Send an ACK Segment
+
+            A host that is receiving a stream of TCP data segments can
+            increase efficiency in both the Internet and the hosts by
+            sending fewer than one ACK (acknowledgment) segment per data
+            segment received; this is known as a "delayed ACK" [TCP:5].
+
+            A TCP SHOULD implement a delayed ACK, but an ACK should not
+            be excessively delayed; in particular, the delay MUST be
+            less than 0.5 seconds, and in a stream of full-sized
+            segments there SHOULD be an ACK for at least every second
+            segment.
+
+            DISCUSSION:
+
+
+
+Internet Engineering Task Force                                [Page 96]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+                 A delayed ACK gives the application an opportunity to
+                 update the window and perhaps to send an immediate
+                 response.  In particular, in the case of character-mode
+                 remote login, a delayed ACK can reduce the number of
+                 segments sent by the server by a factor of 3 (ACK,
+                 window update, and echo character all combined in one
+                 segment).
+
+                 In addition, on some large multi-user hosts, a delayed
+                 ACK can substantially reduce protocol processing
+                 overhead by reducing the total number of packets to be
+                 processed [TCP:5].  However, excessive delays on ACK's
+                 can disturb the round-trip timing and packet "clocking"
+                 algorithms [TCP:7].
+
+         4.2.3.3  When to Send a Window Update
+
+            A TCP MUST include a SWS avoidance algorithm in the receiver
+            [TCP:5].
+
+            IMPLEMENTATION:
+                 The receiver's SWS avoidance algorithm determines when
+                 the right window edge may be advanced; this is
+                 customarily known as "updating the window".  This
+                 algorithm combines with the delayed ACK algorithm (see
+                 Section 4.2.3.2) to determine when an ACK segment
+                 containing the current window will really be sent to
+                 the receiver.  We use the notation of RFC-793; see
+                 Figures 4 and 5 in that document.
+
+                 The solution to receiver SWS is to avoid advancing the
+                 right window edge RCV.NXT+RCV.WND in small increments,
+                 even if data is received from the network in small
+                 segments.
+
+                 Suppose the total receive buffer space is RCV.BUFF.  At
+                 any given moment, RCV.USER octets of this total may be
+                 tied up with data that has been received and
+                 acknowledged but which the user process has not yet
+                 consumed.  When the connection is quiescent, RCV.WND =
+                 RCV.BUFF and RCV.USER = 0.
+
+                 Keeping the right window edge fixed as data arrives and
+                 is acknowledged requires that the receiver offer less
+                 than its full buffer space, i.e., the receiver must
+                 specify a RCV.WND that keeps RCV.NXT+RCV.WND constant
+                 as RCV.NXT increases.  Thus, the total buffer space
+                 RCV.BUFF is generally divided into three parts:
+
+
+
+Internet Engineering Task Force                                [Page 97]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+
+                 |<------- RCV.BUFF ---------------->|
+                      1             2            3
+             ----|---------|------------------|------|----
+                        RCV.NXT               ^
+                                           (Fixed)
+
+             1 - RCV.USER =  data received but not yet consumed;
+             2 - RCV.WND =   space advertised to sender;
+             3 - Reduction = space available but not yet
+                             advertised.
+
+
+                 The suggested SWS avoidance algorithm for the receiver
+                 is to keep RCV.NXT+RCV.WND fixed until the reduction
+                 satisfies:
+
+                      RCV.BUFF - RCV.USER - RCV.WND  >=
+
+                             min( Fr * RCV.BUFF, Eff.snd.MSS )
+
+                 where Fr is a fraction whose recommended value is 1/2,
+                 and Eff.snd.MSS is the effective send MSS for the
+                 connection (see Section 4.2.2.6).  When the inequality
+                 is satisfied, RCV.WND is set to RCV.BUFF-RCV.USER.
+
+                 Note that the general effect of this algorithm is to
+                 advance RCV.WND in increments of Eff.snd.MSS (for
+                 realistic receive buffers:  Eff.snd.MSS < RCV.BUFF/2).
+                 Note also that the receiver must use its own
+                 Eff.snd.MSS, assuming it is the same as the sender's.
+
+         4.2.3.4  When to Send Data
+
+            A TCP MUST include a SWS avoidance algorithm in the sender.
+
+            A TCP SHOULD implement the Nagle Algorithm [TCP:9] to
+            coalesce short segments.  However, there MUST be a way for
+            an application to disable the Nagle algorithm on an
+            individual connection.  In all cases, sending data is also
+            subject to the limitation imposed by the Slow Start
+            algorithm (Section 4.2.2.15).
+
+            DISCUSSION:
+                 The Nagle algorithm is generally as follows:
+
+                      If there is unacknowledged data (i.e., SND.NXT >
+                      SND.UNA), then the sending TCP buffers all user
+
+
+
+Internet Engineering Task Force                                [Page 98]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+                      data (regardless of the PSH bit), until the
+                      outstanding data has been acknowledged or until
+                      the TCP can send a full-sized segment (Eff.snd.MSS
+                      bytes; see Section 4.2.2.6).
+
+                 Some applications (e.g., real-time display window
+                 updates) require that the Nagle algorithm be turned
+                 off, so small data segments can be streamed out at the
+                 maximum rate.
+
+            IMPLEMENTATION:
+                 The sender's SWS avoidance algorithm is more difficult
+                 than the receivers's, because the sender does not know
+                 (directly) the receiver's total buffer space RCV.BUFF.
+                 An approach which has been found to work well is for
+                 the sender to calculate Max(SND.WND), the maximum send
+                 window it has seen so far on the connection, and to use
+                 this value as an estimate of RCV.BUFF.  Unfortunately,
+                 this can only be an estimate; the receiver may at any
+                 time reduce the size of RCV.BUFF.  To avoid a resulting
+                 deadlock, it is necessary to have a timeout to force
+                 transmission of data, overriding the SWS avoidance
+                 algorithm.  In practice, this timeout should seldom
+                 occur.
+
+                 The "useable window" [TCP:5] is:
+
+                      U = SND.UNA + SND.WND - SND.NXT
+
+                 i.e., the offered window less the amount of data sent
+                 but not acknowledged.  If D is the amount of data
+                 queued in the sending TCP but not yet sent, then the
+                 following set of rules is recommended.
+
+                 Send data:
+
+                 (1)  if a maximum-sized segment can be sent, i.e, if:
+
+                           min(D,U) >= Eff.snd.MSS;
+
+
+                 (2)  or if the data is pushed and all queued data can
+                      be sent now, i.e., if:
+
+                          [SND.NXT = SND.UNA and] PUSHED and D <= U
+
+                      (the bracketed condition is imposed by the Nagle
+                      algorithm);
+
+
+
+Internet Engineering Task Force                                [Page 99]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+                 (3)  or if at least a fraction Fs of the maximum window
+                      can be sent, i.e., if:
+
+                          [SND.NXT = SND.UNA and]
+
+                                  min(D.U) >= Fs * Max(SND.WND);
+
+
+                 (4)  or if data is PUSHed and the override timeout
+                      occurs.
+
+                 Here Fs is a fraction whose recommended value is 1/2.
+                 The override timeout should be in the range 0.1 - 1.0
+                 seconds.  It may be convenient to combine this timer
+                 with the timer used to probe zero windows (Section
+                 4.2.2.17).
+
+                 Finally, note that the SWS avoidance algorithm just
+                 specified is to be used instead of the sender-side
+                 algorithm contained in [TCP:5].
+
+         4.2.3.5  TCP Connection Failures
+
+            Excessive retransmission of the same segment by TCP
+            indicates some failure of the remote host or the Internet
+            path.  This failure may be of short or long duration.  The
+            following procedure MUST be used to handle excessive
+            retransmissions of data segments [IP:11]:
+
+            (a)  There are two thresholds R1 and R2 measuring the amount
+                 of retransmission that has occurred for the same
+                 segment.  R1 and R2 might be measured in time units or
+                 as a count of retransmissions.
+
+            (b)  When the number of transmissions of the same segment
+                 reaches or exceeds threshold R1, pass negative advice
+                 (see Section 3.3.1.4) to the IP layer, to trigger
+                 dead-gateway diagnosis.
+
+            (c)  When the number of transmissions of the same segment
+                 reaches a threshold R2 greater than R1, close the
+                 connection.
+
+            (d)  An application MUST be able to set the value for R2 for
+                 a particular connection.  For example, an interactive
+                 application might set R2 to "infinity," giving the user
+                 control over when to disconnect.
+
+
+
+
+Internet Engineering Task Force                               [Page 100]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+            (d)  TCP SHOULD inform the application of the delivery
+                 problem (unless such information has been disabled by
+                 the application; see Section 4.2.4.1), when R1 is
+                 reached and before R2.  This will allow a remote login
+                 (User Telnet) application program to inform the user,
+                 for example.
+
+            The value of R1 SHOULD correspond to at least 3
+            retransmissions, at the current RTO.  The value of R2 SHOULD
+            correspond to at least 100 seconds.
+
+            An attempt to open a TCP connection could fail with
+            excessive retransmissions of the SYN segment or by receipt
+            of a RST segment or an ICMP Port Unreachable.  SYN
+            retransmissions MUST be handled in the general way just
+            described for data retransmissions, including notification
+            of the application layer.
+
+            However, the values of R1 and R2 may be different for SYN
+            and data segments.  In particular, R2 for a SYN segment MUST
+            be set large enough to provide retransmission of the segment
+            for at least 3 minutes.  The application can close the
+            connection (i.e., give up on the open attempt) sooner, of
+            course.
+
+            DISCUSSION:
+                 Some Internet paths have significant setup times, and
+                 the number of such paths is likely to increase in the
+                 future.
+
+         4.2.3.6  TCP Keep-Alives
+
+            Implementors MAY include "keep-alives" in their TCP
+            implementations, although this practice is not universally
+            accepted.  If keep-alives are included, the application MUST
+            be able to turn them on or off for each TCP connection, and
+            they MUST default to off.
+
+            Keep-alive packets MUST only be sent when no data or
+            acknowledgement packets have been received for the
+            connection within an interval.  This interval MUST be
+            configurable and MUST default to no less than two hours.
+
+            It is extremely important to remember that ACK segments that
+            contain no data are not reliably transmitted by TCP.
+            Consequently, if a keep-alive mechanism is implemented it
+            MUST NOT interpret failure to respond to any specific probe
+            as a dead connection.
+
+
+
+Internet Engineering Task Force                               [Page 101]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+            An implementation SHOULD send a keep-alive segment with no
+            data; however, it MAY be configurable to send a keep-alive
+            segment containing one garbage octet, for compatibility with
+            erroneous TCP implementations.
+
+            DISCUSSION:
+                 A "keep-alive" mechanism periodically probes the other
+                 end of a connection when the connection is otherwise
+                 idle, even when there is no data to be sent.  The TCP
+                 specification does not include a keep-alive mechanism
+                 because it could:  (1) cause perfectly good connections
+                 to break during transient Internet failures; (2)
+                 consume unnecessary bandwidth ("if no one is using the
+                 connection, who cares if it is still good?"); and (3)
+                 cost money for an Internet path that charges for
+                 packets.
+
+                 Some TCP implementations, however, have included a
+                 keep-alive mechanism.  To confirm that an idle
+                 connection is still active, these implementations send
+                 a probe segment designed to elicit a response from the
+                 peer TCP.  Such a segment generally contains SEG.SEQ =
+                 SND.NXT-1 and may or may not contain one garbage octet
+                 of data.  Note that on a quiet connection SND.NXT =
+                 RCV.NXT, so that this SEG.SEQ will be outside the
+                 window.  Therefore, the probe causes the receiver to
+                 return an acknowledgment segment, confirming that the
+                 connection is still live.  If the peer has dropped the
+                 connection due to a network partition or a crash, it
+                 will respond with a RST instead of an acknowledgment
+                 segment.
+
+                 Unfortunately, some misbehaved TCP implementations fail
+                 to respond to a segment with SEG.SEQ = SND.NXT-1 unless
+                 the segment contains data.  Alternatively, an
+                 implementation could determine whether a peer responded
+                 correctly to keep-alive packets with no garbage data
+                 octet.
+
+                 A TCP keep-alive mechanism should only be invoked in
+                 server applications that might otherwise hang
+                 indefinitely and consume resources unnecessarily if a
+                 client crashes or aborts a connection during a network
+                 failure.
+
+
+
+
+
+
+
+Internet Engineering Task Force                               [Page 102]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+         4.2.3.7  TCP Multihoming
+
+            If an application on a multihomed host does not specify the
+            local IP address when actively opening a TCP connection,
+            then the TCP MUST ask the IP layer to select a local IP
+            address before sending the (first) SYN.  See the function
+            GET_SRCADDR() in Section 3.4.
+
+            At all other times, a previous segment has either been sent
+            or received on this connection, and TCP MUST use the same
+            local address is used that was used in those previous
+            segments.
+
+         4.2.3.8  IP Options
+
+            When received options are passed up to TCP from the IP
+            layer, TCP MUST ignore options that it does not understand.
+
+            A TCP MAY support the Time Stamp and Record Route options.
+
+            An application MUST be able to specify a source route when
+            it actively opens a TCP connection, and this MUST take
+            precedence over a source route received in a datagram.
+
+            When a TCP connection is OPENed passively and a packet
+            arrives with a completed IP Source Route option (containing
+            a return route), TCP MUST save the return route and use it
+            for all segments sent on this connection.  If a different
+            source route arrives in a later segment, the later
+            definition SHOULD override the earlier one.
+
+         4.2.3.9  ICMP Messages
+
+            TCP MUST act on an ICMP error message passed up from the IP
+            layer, directing it to the connection that created the
+            error.  The necessary demultiplexing information can be
+            found in the IP header contained within the ICMP message.
+
+            o    Source Quench
+
+                 TCP MUST react to a Source Quench by slowing
+                 transmission on the connection.  The RECOMMENDED
+                 procedure is for a Source Quench to trigger a "slow
+                 start," as if a retransmission timeout had occurred.
+
+            o    Destination Unreachable -- codes 0, 1, 5
+
+                 Since these Unreachable messages indicate soft error
+
+
+
+Internet Engineering Task Force                               [Page 103]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+                 conditions, TCP MUST NOT abort the connection, and it
+                 SHOULD make the information available to the
+                 application.
+
+                 DISCUSSION:
+                      TCP could report the soft error condition directly
+                      to the application layer with an upcall to the
+                      ERROR_REPORT routine, or it could merely note the
+                      message and report it to the application only when
+                      and if the TCP connection times out.
+
+            o    Destination Unreachable -- codes 2-4
+
+                 These are hard error conditions, so TCP SHOULD abort
+                 the connection.
+
+            o    Time Exceeded -- codes 0, 1
+
+                 This should be handled the same way as Destination
+                 Unreachable codes 0, 1, 5 (see above).
+
+            o    Parameter Problem
+
+                 This should be handled the same way as Destination
+                 Unreachable codes 0, 1, 5 (see above).
+
+
+         4.2.3.10  Remote Address Validation
+
+            A TCP implementation MUST reject as an error a local OPEN
+            call for an invalid remote IP address (e.g., a broadcast or
+            multicast address).
+
+            An incoming SYN with an invalid source address must be
+            ignored either by TCP or by the IP layer (see Section
+            3.2.1.3).
+
+            A TCP implementation MUST silently discard an incoming SYN
+            segment that is addressed to a broadcast or multicast
+            address.
+
+         4.2.3.11  TCP Traffic Patterns
+
+            IMPLEMENTATION:
+                 The TCP protocol specification [TCP:1] gives the
+                 implementor much freedom in designing the algorithms
+                 that control the message flow over the connection --
+                 packetizing, managing the window, sending
+
+
+
+Internet Engineering Task Force                               [Page 104]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+                 acknowledgments, etc.  These design decisions are
+                 difficult because a TCP must adapt to a wide range of
+                 traffic patterns.  Experience has shown that a TCP
+                 implementor needs to verify the design on two extreme
+                 traffic patterns:
+
+                 o    Single-character Segments
+
+                      Even if the sender is using the Nagle Algorithm,
+                      when a TCP connection carries remote login traffic
+                      across a low-delay LAN the receiver will generally
+                      get a stream of single-character segments.  If
+                      remote terminal echo mode is in effect, the
+                      receiver's system will generally echo each
+                      character as it is received.
+
+                 o    Bulk Transfer
+
+                      When TCP is used for bulk transfer, the data
+                      stream should be made up (almost) entirely of
+                      segments of the size of the effective MSS.
+                      Although TCP uses a sequence number space with
+                      byte (octet) granularity, in bulk-transfer mode
+                      its operation should be as if TCP used a sequence
+                      space that counted only segments.
+
+                 Experience has furthermore shown that a single TCP can
+                 effectively and efficiently handle these two extremes.
+
+                 The most important tool for verifying a new TCP
+                 implementation is a packet trace program.  There is a
+                 large volume of experience showing the importance of
+                 tracing a variety of traffic patterns with other TCP
+                 implementations and studying the results carefully.
+
+
+         4.2.3.12  Efficiency
+
+            IMPLEMENTATION:
+                 Extensive experience has led to the following
+                 suggestions for efficient implementation of TCP:
+
+                 (a)  Don't Copy Data
+
+                      In bulk data transfer, the primary CPU-intensive
+                      tasks are copying data from one place to another
+                      and checksumming the data.  It is vital to
+                      minimize the number of copies of TCP data.  Since
+
+
+
+Internet Engineering Task Force                               [Page 105]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+                      the ultimate speed limitation may be fetching data
+                      across the memory bus, it may be useful to combine
+                      the copy with checksumming, doing both with a
+                      single memory fetch.
+
+                 (b)  Hand-Craft the Checksum Routine
+
+                      A good TCP checksumming routine is typically two
+                      to five times faster than a simple and direct
+                      implementation of the definition.  Great care and
+                      clever coding are often required and advisable to
+                      make the checksumming code "blazing fast".  See
+                      [TCP:10].
+
+                 (c)  Code for the Common Case
+
+                      TCP protocol processing can be complicated, but
+                      for most segments there are only a few simple
+                      decisions to be made.  Per-segment processing will
+                      be greatly speeded up by coding the main line to
+                      minimize the number of decisions in the most
+                      common case.
+
+
+      4.2.4  TCP/APPLICATION LAYER INTERFACE
+
+         4.2.4.1  Asynchronous Reports
+
+            There MUST be a mechanism for reporting soft TCP error
+            conditions to the application.  Generically, we assume this
+            takes the form of an application-supplied ERROR_REPORT
+            routine that may be upcalled [INTRO:7] asynchronously from
+            the transport layer:
+
+               ERROR_REPORT(local connection name, reason, subreason)
+
+            The precise encoding of the reason and subreason parameters
+            is not specified here.  However, the conditions that are
+            reported asynchronously to the application MUST include:
+
+            *    ICMP error message arrived (see 4.2.3.9)
+
+            *    Excessive retransmissions (see 4.2.3.5)
+
+            *    Urgent pointer advance (see 4.2.2.4).
+
+            However, an application program that does not want to
+            receive such ERROR_REPORT calls SHOULD be able to
+
+
+
+Internet Engineering Task Force                               [Page 106]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+            effectively disable these calls.
+
+            DISCUSSION:
+                 These error reports generally reflect soft errors that
+                 can be ignored without harm by many applications.  It
+                 has been suggested that these error report calls should
+                 default to "disabled," but this is not required.
+
+         4.2.4.2  Type-of-Service
+
+            The application layer MUST be able to specify the Type-of-
+            Service (TOS) for segments that are sent on a connection.
+            It not required, but the application SHOULD be able to
+            change the TOS during the connection lifetime.  TCP SHOULD
+            pass the current TOS value without change to the IP layer,
+            when it sends segments on the connection.
+
+            The TOS will be specified independently in each direction on
+            the connection, so that the receiver application will
+            specify the TOS used for ACK segments.
+
+            TCP MAY pass the most recently received TOS up to the
+            application.
+
+            DISCUSSION
+                 Some applications (e.g., SMTP) change the nature of
+                 their communication during the lifetime of a
+                 connection, and therefore would like to change the TOS
+                 specification.
+
+                 Note also that the OPEN call specified in RFC-793
+                 includes a parameter ("options") in which the caller
+                 can specify IP options such as source route, record
+                 route, or timestamp.
+
+         4.2.4.3  Flush Call
+
+            Some TCP implementations have included a FLUSH call, which
+            will empty the TCP send queue of any data for which the user
+            has issued SEND calls but which is still to the right of the
+            current send window.  That is, it flushes as much queued
+            send data as possible without losing sequence number
+            synchronization.  This is useful for implementing the "abort
+            output" function of Telnet.
+
+
+
+
+
+
+
+Internet Engineering Task Force                               [Page 107]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+         4.2.4.4  Multihoming
+
+            The user interface outlined in sections 2.7 and 3.8 of RFC-
+            793 needs to be extended for multihoming.  The OPEN call
+            MUST have an optional parameter:
+
+                OPEN( ... [local IP address,] ... )
+
+            to allow the specification of the local IP address.
+
+            DISCUSSION:
+                 Some TCP-based applications need to specify the local
+                 IP address to be used to open a particular connection;
+                 FTP is an example.
+
+            IMPLEMENTATION:
+                 A passive OPEN call with a specified "local IP address"
+                 parameter will await an incoming connection request to
+                 that address.  If the parameter is unspecified, a
+                 passive OPEN will await an incoming connection request
+                 to any local IP address, and then bind the local IP
+                 address of the connection to the particular address
+                 that is used.
+
+                 For an active OPEN call, a specified "local IP address"
+                 parameter will be used for opening the connection.  If
+                 the parameter is unspecified, the networking software
+                 will choose an appropriate local IP address (see
+                 Section 3.3.4.2) for the connection
+
+      4.2.5  TCP REQUIREMENT SUMMARY
+
+                                                 |        | | | |S| |
+                                                 |        | | | |H| |F
+                                                 |        | | | |O|M|o
+                                                 |        | |S| |U|U|o
+                                                 |        | |H| |L|S|t
+                                                 |        |M|O| |D|T|n
+                                                 |        |U|U|M| | |o
+                                                 |        |S|L|A|N|N|t
+                                                 |        |T|D|Y|O|O|t
+FEATURE                                          |SECTION | | | |T|T|e
+-------------------------------------------------|--------|-|-|-|-|-|--
+                                                 |        | | | | | |
+Push flag                                        |        | | | | | |
+  Aggregate or queue un-pushed data              |4.2.2.2 | | |x| | |
+  Sender collapse successive PSH flags           |4.2.2.2 | |x| | | |
+  SEND call can specify PUSH                     |4.2.2.2 | | |x| | |
+
+
+
+Internet Engineering Task Force                               [Page 108]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+    If cannot: sender buffer indefinitely        |4.2.2.2 | | | | |x|
+    If cannot: PSH last segment                  |4.2.2.2 |x| | | | |
+  Notify receiving ALP of PSH                    |4.2.2.2 | | |x| | |1
+  Send max size segment when possible            |4.2.2.2 | |x| | | |
+                                                 |        | | | | | |
+Window                                           |        | | | | | |
+  Treat as unsigned number                       |4.2.2.3 |x| | | | |
+  Handle as 32-bit number                        |4.2.2.3 | |x| | | |
+  Shrink window from right                       |4.2.2.16| | | |x| |
+  Robust against shrinking window                |4.2.2.16|x| | | | |
+  Receiver's window closed indefinitely          |4.2.2.17| | |x| | |
+  Sender probe zero window                       |4.2.2.17|x| | | | |
+    First probe after RTO                        |4.2.2.17| |x| | | |
+    Exponential backoff                          |4.2.2.17| |x| | | |
+  Allow window stay zero indefinitely            |4.2.2.17|x| | | | |
+  Sender timeout OK conn with zero wind          |4.2.2.17| | | | |x|
+                                                 |        | | | | | |
+Urgent Data                                      |        | | | | | |
+  Pointer points to last octet                   |4.2.2.4 |x| | | | |
+  Arbitrary length urgent data sequence          |4.2.2.4 |x| | | | |
+  Inform ALP asynchronously of urgent data       |4.2.2.4 |x| | | | |1
+  ALP can learn if/how much urgent data Q'd      |4.2.2.4 |x| | | | |1
+                                                 |        | | | | | |
+TCP Options                                      |        | | | | | |
+  Receive TCP option in any segment              |4.2.2.5 |x| | | | |
+  Ignore unsupported options                     |4.2.2.5 |x| | | | |
+  Cope with illegal option length                |4.2.2.5 |x| | | | |
+  Implement sending & receiving MSS option       |4.2.2.6 |x| | | | |
+  Send MSS option unless 536                     |4.2.2.6 | |x| | | |
+  Send MSS option always                         |4.2.2.6 | | |x| | |
+  Send-MSS default is 536                        |4.2.2.6 |x| | | | |
+  Calculate effective send seg size              |4.2.2.6 |x| | | | |
+                                                 |        | | | | | |
+TCP Checksums                                    |        | | | | | |
+  Sender compute checksum                        |4.2.2.7 |x| | | | |
+  Receiver check checksum                        |4.2.2.7 |x| | | | |
+                                                 |        | | | | | |
+Use clock-driven ISN selection                   |4.2.2.9 |x| | | | |
+                                                 |        | | | | | |
+Opening Connections                              |        | | | | | |
+  Support simultaneous open attempts             |4.2.2.10|x| | | | |
+  SYN-RCVD remembers last state                  |4.2.2.11|x| | | | |
+  Passive Open call interfere with others        |4.2.2.18| | | | |x|
+  Function: simultan. LISTENs for same port      |4.2.2.18|x| | | | |
+  Ask IP for src address for SYN if necc.        |4.2.3.7 |x| | | | |
+    Otherwise, use local addr of conn.           |4.2.3.7 |x| | | | |
+  OPEN to broadcast/multicast IP Address         |4.2.3.14| | | | |x|
+  Silently discard seg to bcast/mcast addr       |4.2.3.14|x| | | | |
+
+
+
+Internet Engineering Task Force                               [Page 109]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+                                                 |        | | | | | |
+Closing Connections                              |        | | | | | |
+  RST can contain data                           |4.2.2.12| |x| | | |
+  Inform application of aborted conn             |4.2.2.13|x| | | | |
+  Half-duplex close connections                  |4.2.2.13| | |x| | |
+    Send RST to indicate data lost               |4.2.2.13| |x| | | |
+  In TIME-WAIT state for 2xMSL seconds           |4.2.2.13|x| | | | |
+    Accept SYN from TIME-WAIT state              |4.2.2.13| | |x| | |
+                                                 |        | | | | | |
+Retransmissions                                  |        | | | | | |
+  Jacobson Slow Start algorithm                  |4.2.2.15|x| | | | |
+  Jacobson Congestion-Avoidance algorithm        |4.2.2.15|x| | | | |
+  Retransmit with same IP ident                  |4.2.2.15| | |x| | |
+  Karn's algorithm                               |4.2.3.1 |x| | | | |
+  Jacobson's RTO estimation alg.                 |4.2.3.1 |x| | | | |
+  Exponential backoff                            |4.2.3.1 |x| | | | |
+  SYN RTO calc same as data                      |4.2.3.1 | |x| | | |
+  Recommended initial values and bounds          |4.2.3.1 | |x| | | |
+                                                 |        | | | | | |
+Generating ACK's:                                |        | | | | | |
+  Queue out-of-order segments                    |4.2.2.20| |x| | | |
+  Process all Q'd before send ACK                |4.2.2.20|x| | | | |
+  Send ACK for out-of-order segment              |4.2.2.21| | |x| | |
+  Delayed ACK's                                  |4.2.3.2 | |x| | | |
+    Delay < 0.5 seconds                          |4.2.3.2 |x| | | | |
+    Every 2nd full-sized segment ACK'd           |4.2.3.2 |x| | | | |
+  Receiver SWS-Avoidance Algorithm               |4.2.3.3 |x| | | | |
+                                                 |        | | | | | |
+Sending data                                     |        | | | | | |
+  Configurable TTL                               |4.2.2.19|x| | | | |
+  Sender SWS-Avoidance Algorithm                 |4.2.3.4 |x| | | | |
+  Nagle algorithm                                |4.2.3.4 | |x| | | |
+    Application can disable Nagle algorithm      |4.2.3.4 |x| | | | |
+                                                 |        | | | | | |
+Connection Failures:                             |        | | | | | |
+  Negative advice to IP on R1 retxs              |4.2.3.5 |x| | | | |
+  Close connection on R2 retxs                   |4.2.3.5 |x| | | | |
+  ALP can set R2                                 |4.2.3.5 |x| | | | |1
+  Inform ALP of  R1<=retxs<R2                    |4.2.3.5 | |x| | | |1
+  Recommended values for R1, R2                  |4.2.3.5 | |x| | | |
+  Same mechanism for SYNs                        |4.2.3.5 |x| | | | |
+    R2 at least 3 minutes for SYN                |4.2.3.5 |x| | | | |
+                                                 |        | | | | | |
+Send Keep-alive Packets:                         |4.2.3.6 | | |x| | |
+  - Application can request                      |4.2.3.6 |x| | | | |
+  - Default is "off"                             |4.2.3.6 |x| | | | |
+  - Only send if idle for interval               |4.2.3.6 |x| | | | |
+  - Interval configurable                        |4.2.3.6 |x| | | | |
+
+
+
+Internet Engineering Task Force                               [Page 110]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+  - Default at least 2 hrs.                      |4.2.3.6 |x| | | | |
+  - Tolerant of lost ACK's                       |4.2.3.6 |x| | | | |
+                                                 |        | | | | | |
+IP Options                                       |        | | | | | |
+  Ignore options TCP doesn't understand          |4.2.3.8 |x| | | | |
+  Time Stamp support                             |4.2.3.8 | | |x| | |
+  Record Route support                           |4.2.3.8 | | |x| | |
+  Source Route:                                  |        | | | | | |
+    ALP can specify                              |4.2.3.8 |x| | | | |1
+      Overrides src rt in datagram               |4.2.3.8 |x| | | | |
+    Build return route from src rt               |4.2.3.8 |x| | | | |
+    Later src route overrides                    |4.2.3.8 | |x| | | |
+                                                 |        | | | | | |
+Receiving ICMP Messages from IP                  |4.2.3.9 |x| | | | |
+  Dest. Unreach (0,1,5) => inform ALP            |4.2.3.9 | |x| | | |
+  Dest. Unreach (0,1,5) => abort conn            |4.2.3.9 | | | | |x|
+  Dest. Unreach (2-4) => abort conn              |4.2.3.9 | |x| | | |
+  Source Quench => slow start                    |4.2.3.9 | |x| | | |
+  Time Exceeded => tell ALP, don't abort         |4.2.3.9 | |x| | | |
+  Param Problem => tell ALP, don't abort         |4.2.3.9 | |x| | | |
+                                                 |        | | | | | |
+Address Validation                               |        | | | | | |
+  Reject OPEN call to invalid IP address         |4.2.3.10|x| | | | |
+  Reject SYN from invalid IP address             |4.2.3.10|x| | | | |
+  Silently discard SYN to bcast/mcast addr       |4.2.3.10|x| | | | |
+                                                 |        | | | | | |
+TCP/ALP Interface Services                       |        | | | | | |
+  Error Report mechanism                         |4.2.4.1 |x| | | | |
+  ALP can disable Error Report Routine           |4.2.4.1 | |x| | | |
+  ALP can specify TOS for sending                |4.2.4.2 |x| | | | |
+    Passed unchanged to IP                       |4.2.4.2 | |x| | | |
+  ALP can change TOS during connection           |4.2.4.2 | |x| | | |
+  Pass received TOS up to ALP                    |4.2.4.2 | | |x| | |
+  FLUSH call                                     |4.2.4.3 | | |x| | |
+  Optional local IP addr parm. in OPEN           |4.2.4.4 |x| | | | |
+-------------------------------------------------|--------|-|-|-|-|-|--
+-------------------------------------------------|--------|-|-|-|-|-|--
+
+FOOTNOTES:
+
+(1)  "ALP" means Application-Layer program.
+
+
+
+
+
+
+
+
+
+
+Internet Engineering Task Force                               [Page 111]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+5.  REFERENCES
+
+INTRODUCTORY REFERENCES
+
+
+[INTRO:1] "Requirements for Internet Hosts -- Application and Support,"
+     IETF Host Requirements Working Group, R. Braden, Ed., RFC-1123,
+     October 1989.
+
+[INTRO:2]  "Requirements for Internet Gateways,"  R. Braden and J.
+     Postel, RFC-1009, June 1987.
+
+[INTRO:3]  "DDN Protocol Handbook," NIC-50004, NIC-50005, NIC-50006,
+     (three volumes), SRI International, December 1985.
+
+[INTRO:4]  "Official Internet Protocols," J. Reynolds and J. Postel,
+     RFC-1011, May 1987.
+
+     This document is republished periodically with new RFC numbers; the
+     latest version must be used.
+
+[INTRO:5]  "Protocol Document Order Information," O. Jacobsen and J.
+     Postel, RFC-980, March 1986.
+
+[INTRO:6]  "Assigned Numbers," J. Reynolds and J. Postel, RFC-1010, May
+     1987.
+
+     This document is republished periodically with new RFC numbers; the
+     latest version must be used.
+
+[INTRO:7] "Modularity and Efficiency in Protocol Implementations," D.
+     Clark, RFC-817, July 1982.
+
+[INTRO:8] "The Structuring of Systems Using Upcalls," D. Clark, 10th ACM
+     SOSP, Orcas Island, Washington, December 1985.
+
+
+Secondary References:
+
+
+[INTRO:9]  "A Protocol for Packet Network Intercommunication," V. Cerf
+     and R. Kahn, IEEE Transactions on Communication, May 1974.
+
+[INTRO:10]  "The ARPA Internet Protocol," J. Postel, C. Sunshine, and D.
+     Cohen, Computer Networks, Vol. 5, No. 4, July 1981.
+
+[INTRO:11]  "The DARPA Internet Protocol Suite," B. Leiner, J. Postel,
+     R. Cole and D. Mills, Proceedings INFOCOM 85, IEEE, Washington DC,
+
+
+
+Internet Engineering Task Force                               [Page 112]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+     March 1985.  Also in: IEEE Communications Magazine, March 1985.
+     Also available as ISI-RS-85-153.
+
+[INTRO:12] "Final Text of DIS8473, Protocol for Providing the
+     Connectionless Mode Network Service," ANSI, published as RFC-994,
+     March 1986.
+
+[INTRO:13] "End System to Intermediate System Routing Exchange
+     Protocol," ANSI X3S3.3, published as RFC-995, April 1986.
+
+
+LINK LAYER REFERENCES
+
+
+[LINK:1] "Trailer Encapsulations," S. Leffler and M. Karels, RFC-893,
+     April 1984.
+
+[LINK:2] "An Ethernet Address Resolution Protocol," D. Plummer, RFC-826,
+     November 1982.
+
+[LINK:3] "A Standard for the Transmission of IP Datagrams over Ethernet
+     Networks," C. Hornig, RFC-894, April 1984.
+
+[LINK:4] "A Standard for the Transmission of IP Datagrams over IEEE 802
+     "Networks," J. Postel and J. Reynolds, RFC-1042, February 1988.
+
+     This RFC contains a great deal of information of importance to
+     Internet implementers planning to use IEEE 802 networks.
+
+
+IP LAYER REFERENCES
+
+
+[IP:1] "Internet Protocol (IP)," J. Postel, RFC-791, September 1981.
+
+[IP:2] "Internet Control Message Protocol (ICMP)," J. Postel, RFC-792,
+     September 1981.
+
+[IP:3] "Internet Standard Subnetting Procedure," J. Mogul and J. Postel,
+     RFC-950, August 1985.
+
+[IP:4]  "Host Extensions for IP Multicasting," S. Deering, RFC-1112,
+     August 1989.
+
+[IP:5] "Military Standard Internet Protocol," MIL-STD-1777, Department
+     of Defense, August 1983.
+
+     This specification, as amended by RFC-963, is intended to describe
+
+
+
+Internet Engineering Task Force                               [Page 113]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+     the Internet Protocol but has some serious omissions (e.g., the
+     mandatory subnet extension [IP:3] and the optional multicasting
+     extension [IP:4]).  It is also out of date.  If there is a
+     conflict, RFC-791, RFC-792, and RFC-950 must be taken as
+     authoritative, while the present document is authoritative over
+     all.
+
+[IP:6] "Some Problems with the Specification of the Military Standard
+     Internet Protocol," D. Sidhu, RFC-963, November 1985.
+
+[IP:7] "The TCP Maximum Segment Size and Related Topics," J. Postel,
+     RFC-879, November 1983.
+
+     Discusses and clarifies the relationship between the TCP Maximum
+     Segment Size option and the IP datagram size.
+
+[IP:8] "Internet Protocol Security Options,"  B. Schofield, RFC-1108,
+     October 1989.
+
+[IP:9] "Fragmentation Considered Harmful," C. Kent and J. Mogul, ACM
+     SIGCOMM-87, August 1987.  Published as ACM Comp Comm Review, Vol.
+     17, no. 5.
+
+     This useful paper discusses the problems created by Internet
+     fragmentation and presents alternative solutions.
+
+[IP:10] "IP Datagram Reassembly Algorithms," D. Clark, RFC-815, July
+     1982.
+
+     This and the following paper should be read by every implementor.
+
+[IP:11] "Fault Isolation and Recovery," D. Clark, RFC-816, July 1982.
+
+SECONDARY IP REFERENCES:
+
+
+[IP:12] "Broadcasting Internet Datagrams in the Presence of Subnets," J.
+     Mogul, RFC-922, October 1984.
+
+[IP:13] "Name, Addresses, Ports, and Routes," D. Clark, RFC-814, July
+     1982.
+
+[IP:14] "Something a Host Could Do with Source Quench: The Source Quench
+     Introduced Delay (SQUID)," W. Prue and J. Postel, RFC-1016, July
+     1987.
+
+     This RFC first described directed broadcast addresses.  However,
+     the bulk of the RFC is concerned with gateways, not hosts.
+
+
+
+Internet Engineering Task Force                               [Page 114]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+UDP REFERENCES:
+
+
+[UDP:1] "User Datagram Protocol," J. Postel, RFC-768, August 1980.
+
+
+TCP REFERENCES:
+
+
+[TCP:1] "Transmission Control Protocol," J. Postel, RFC-793, September
+     1981.
+
+
+[TCP:2] "Transmission Control Protocol," MIL-STD-1778, US Department of
+     Defense, August 1984.
+
+     This specification as amended by RFC-964 is intended to describe
+     the same protocol as RFC-793 [TCP:1].  If there is a conflict,
+     RFC-793 takes precedence, and the present document is authoritative
+     over both.
+
+
+[TCP:3] "Some Problems with the Specification of the Military Standard
+     Transmission Control Protocol," D. Sidhu and T. Blumer, RFC-964,
+     November 1985.
+
+
+[TCP:4] "The TCP Maximum Segment Size and Related Topics," J. Postel,
+     RFC-879, November 1983.
+
+
+[TCP:5] "Window and Acknowledgment Strategy in TCP," D. Clark, RFC-813,
+     July 1982.
+
+
+[TCP:6] "Round Trip Time Estimation," P. Karn & C. Partridge, ACM
+     SIGCOMM-87, August 1987.
+
+
+[TCP:7] "Congestion Avoidance and Control," V. Jacobson, ACM SIGCOMM-88,
+     August 1988.
+
+
+SECONDARY TCP REFERENCES:
+
+
+[TCP:8] "Modularity and Efficiency in Protocol Implementation," D.
+     Clark, RFC-817, July 1982.
+
+
+
+Internet Engineering Task Force                               [Page 115]
+
+
+
+
+RFC1122                  TRANSPORT LAYER -- TCP             October 1989
+
+
+[TCP:9] "Congestion Control in IP/TCP," J. Nagle, RFC-896, January 1984.
+
+
+[TCP:10] "Computing the Internet Checksum," R. Braden, D. Borman, and C.
+     Partridge, RFC-1071, September 1988.
+
+
+[TCP:11] "TCP Extensions for Long-Delay Paths," V. Jacobson & R. Braden,
+     RFC-1072, October 1988.
+
+
+Security Considerations
+
+   There are many security issues in the communication layers of host
+   software, but a full discussion is beyond the scope of this RFC.
+
+   The Internet architecture generally provides little protection
+   against spoofing of IP source addresses, so any security mechanism
+   that is based upon verifying the IP source address of a datagram
+   should be treated with suspicion.  However, in restricted
+   environments some source-address checking may be possible.  For
+   example, there might be a secure LAN whose gateway to the rest of the
+   Internet discarded any incoming datagram with a source address that
+   spoofed the LAN address.  In this case, a host on the LAN could use
+   the source address to test for local vs. remote source.  This problem
+   is complicated by source routing, and some have suggested that
+   source-routed datagram forwarding by hosts (see Section 3.3.5) should
+   be outlawed for security reasons.
+
+   Security-related issues are mentioned in sections concerning the IP
+   Security option (Section 3.2.1.8), the ICMP Parameter Problem message
+   (Section 3.2.2.5), IP options in UDP datagrams (Section 4.1.3.2), and
+   reserved TCP ports (Section 4.2.2.1).
+
+Author's Address
+
+   Robert Braden
+   USC/Information Sciences Institute
+   4676 Admiralty Way
+   Marina del Rey, CA 90292-6695
+
+   Phone: (213) 822 1511
+
+   EMail: Braden@ISI.EDU
+
+
+
+
+
+
+
+Internet Engineering Task Force                               [Page 116]
+
diff --git a/ext/picotcp/RFC/rfc1123.txt b/ext/picotcp/RFC/rfc1123.txt
new file mode 100644
index 0000000..51cdf83
--- /dev/null
+++ b/ext/picotcp/RFC/rfc1123.txt
@@ -0,0 +1,5782 @@
+
+
+
+
+
+
+Network Working Group                    Internet Engineering Task Force
+Request for Comments: 1123                             R. Braden, Editor
+                                                            October 1989
+
+
+       Requirements for Internet Hosts -- Application and Support
+
+Status of This Memo
+
+   This RFC is an official specification for the Internet community.  It
+   incorporates by reference, amends, corrects, and supplements the
+   primary protocol standards documents relating to hosts.  Distribution
+   of this document is unlimited.
+
+Summary
+
+   This RFC is one of a pair that defines and discusses the requirements
+   for Internet host software.  This RFC covers the application and
+   support protocols; its companion RFC-1122 covers the communication
+   protocol layers: link layer, IP layer, and transport layer.
+
+
+
+                           Table of Contents
+
+
+
+
+   1.  INTRODUCTION ...............................................    5
+      1.1  The Internet Architecture ..............................    6
+      1.2  General Considerations .................................    6
+         1.2.1  Continuing Internet Evolution .....................    6
+         1.2.2  Robustness Principle ..............................    7
+         1.2.3  Error Logging .....................................    8
+         1.2.4  Configuration .....................................    8
+      1.3  Reading this Document ..................................   10
+         1.3.1  Organization ......................................   10
+         1.3.2  Requirements ......................................   10
+         1.3.3  Terminology .......................................   11
+      1.4  Acknowledgments ........................................   12
+
+   2.  GENERAL ISSUES .............................................   13
+      2.1  Host Names and Numbers .................................   13
+      2.2  Using Domain Name Service ..............................   13
+      2.3  Applications on Multihomed hosts .......................   14
+      2.4  Type-of-Service ........................................   14
+      2.5  GENERAL APPLICATION REQUIREMENTS SUMMARY ...............   15
+
+
+
+
+Internet Engineering Task Force                                 [Page 1]
+
+
+
+
+RFC1123                       INTRODUCTION                  October 1989
+
+
+   3.  REMOTE LOGIN -- TELNET PROTOCOL ............................   16
+      3.1  INTRODUCTION ...........................................   16
+      3.2  PROTOCOL WALK-THROUGH ..................................   16
+         3.2.1  Option Negotiation ................................   16
+         3.2.2  Telnet Go-Ahead Function ..........................   16
+         3.2.3  Control Functions .................................   17
+         3.2.4  Telnet "Synch" Signal .............................   18
+         3.2.5  NVT Printer and Keyboard ..........................   19
+         3.2.6  Telnet Command Structure ..........................   20
+         3.2.7  Telnet Binary Option ..............................   20
+         3.2.8  Telnet Terminal-Type Option .......................   20
+      3.3  SPECIFIC ISSUES ........................................   21
+         3.3.1  Telnet End-of-Line Convention .....................   21
+         3.3.2  Data Entry Terminals ..............................   23
+         3.3.3  Option Requirements ...............................   24
+         3.3.4  Option Initiation .................................   24
+         3.3.5  Telnet Linemode Option ............................   25
+      3.4  TELNET/USER INTERFACE ..................................   25
+         3.4.1  Character Set Transparency ........................   25
+         3.4.2  Telnet Commands ...................................   26
+         3.4.3  TCP Connection Errors .............................   26
+         3.4.4  Non-Default Telnet Contact Port ...................   26
+         3.4.5  Flushing Output ...................................   26
+      3.5.  TELNET REQUIREMENTS SUMMARY ...........................   27
+
+   4.  FILE TRANSFER ..............................................   29
+      4.1  FILE TRANSFER PROTOCOL -- FTP ..........................   29
+         4.1.1  INTRODUCTION ......................................   29
+         4.1.2.  PROTOCOL WALK-THROUGH ............................   29
+            4.1.2.1  LOCAL Type ...................................   29
+            4.1.2.2  Telnet Format Control ........................   30
+            4.1.2.3  Page Structure ...............................   30
+            4.1.2.4  Data Structure Transformations ...............   30
+            4.1.2.5  Data Connection Management ...................   31
+            4.1.2.6  PASV Command .................................   31
+            4.1.2.7  LIST and NLST Commands .......................   31
+            4.1.2.8  SITE Command .................................   32
+            4.1.2.9  STOU Command .................................   32
+            4.1.2.10  Telnet End-of-line Code .....................   32
+            4.1.2.11  FTP Replies .................................   33
+            4.1.2.12  Connections .................................   34
+            4.1.2.13  Minimum Implementation; RFC-959 Section .....   34
+         4.1.3  SPECIFIC ISSUES ...................................   35
+            4.1.3.1  Non-standard Command Verbs ...................   35
+            4.1.3.2  Idle Timeout .................................   36
+            4.1.3.3  Concurrency of Data and Control ..............   36
+            4.1.3.4  FTP Restart Mechanism ........................   36
+         4.1.4  FTP/USER INTERFACE ................................   39
+
+
+
+Internet Engineering Task Force                                 [Page 2]
+
+
+
+
+RFC1123                       INTRODUCTION                  October 1989
+
+
+            4.1.4.1  Pathname Specification .......................   39
+            4.1.4.2  "QUOTE" Command ..............................   40
+            4.1.4.3  Displaying Replies to User ...................   40
+            4.1.4.4  Maintaining Synchronization ..................   40
+         4.1.5   FTP REQUIREMENTS SUMMARY .........................   41
+      4.2  TRIVIAL FILE TRANSFER PROTOCOL -- TFTP .................   44
+         4.2.1  INTRODUCTION ......................................   44
+         4.2.2  PROTOCOL WALK-THROUGH .............................   44
+            4.2.2.1  Transfer Modes ...............................   44
+            4.2.2.2  UDP Header ...................................   44
+         4.2.3  SPECIFIC ISSUES ...................................   44
+            4.2.3.1  Sorcerer's Apprentice Syndrome ...............   44
+            4.2.3.2  Timeout Algorithms ...........................   46
+            4.2.3.3  Extensions ...................................   46
+            4.2.3.4  Access Control ...............................   46
+            4.2.3.5  Broadcast Request ............................   46
+         4.2.4  TFTP REQUIREMENTS SUMMARY .........................   47
+
+   5.  ELECTRONIC MAIL -- SMTP and RFC-822 ........................   48
+      5.1  INTRODUCTION ...........................................   48
+      5.2  PROTOCOL WALK-THROUGH ..................................   48
+         5.2.1  The SMTP Model ....................................   48
+         5.2.2  Canonicalization ..................................   49
+         5.2.3  VRFY and EXPN Commands ............................   50
+         5.2.4  SEND, SOML, and SAML Commands .....................   50
+         5.2.5  HELO Command ......................................   50
+         5.2.6  Mail Relay ........................................   51
+         5.2.7  RCPT Command ......................................   52
+         5.2.8  DATA Command ......................................   53
+         5.2.9  Command Syntax ....................................   54
+         5.2.10  SMTP Replies .....................................   54
+         5.2.11  Transparency .....................................   55
+         5.2.12  WKS Use in MX Processing .........................   55
+         5.2.13  RFC-822 Message Specification ....................   55
+         5.2.14  RFC-822 Date and Time Specification ..............   55
+         5.2.15  RFC-822 Syntax Change ............................   56
+         5.2.16  RFC-822  Local-part ..............................   56
+         5.2.17  Domain Literals ..................................   57
+         5.2.18  Common Address Formatting Errors .................   58
+         5.2.19  Explicit Source Routes ...........................   58
+      5.3  SPECIFIC ISSUES ........................................   59
+         5.3.1  SMTP Queueing Strategies ..........................   59
+            5.3.1.1 Sending Strategy ..............................   59
+            5.3.1.2  Receiving strategy ...........................   61
+         5.3.2  Timeouts in SMTP ..................................   61
+         5.3.3  Reliable Mail Receipt .............................   63
+         5.3.4  Reliable Mail Transmission ........................   63
+         5.3.5  Domain Name Support ...............................   65
+
+
+
+Internet Engineering Task Force                                 [Page 3]
+
+
+
+
+RFC1123                       INTRODUCTION                  October 1989
+
+
+         5.3.6  Mailing Lists and Aliases .........................   65
+         5.3.7  Mail Gatewaying ...................................   66
+         5.3.8  Maximum Message Size ..............................   68
+      5.4  SMTP REQUIREMENTS SUMMARY ..............................   69
+
+   6. SUPPORT SERVICES ............................................   72
+      6.1 DOMAIN NAME TRANSLATION .................................   72
+         6.1.1 INTRODUCTION .......................................   72
+         6.1.2  PROTOCOL WALK-THROUGH .............................   72
+            6.1.2.1  Resource Records with Zero TTL ...............   73
+            6.1.2.2  QCLASS Values ................................   73
+            6.1.2.3  Unused Fields ................................   73
+            6.1.2.4  Compression ..................................   73
+            6.1.2.5  Misusing Configuration Info ..................   73
+         6.1.3  SPECIFIC ISSUES ...................................   74
+            6.1.3.1  Resolver Implementation ......................   74
+            6.1.3.2  Transport Protocols ..........................   75
+            6.1.3.3  Efficient Resource Usage .....................   77
+            6.1.3.4  Multihomed Hosts .............................   78
+            6.1.3.5  Extensibility ................................   79
+            6.1.3.6  Status of RR Types ...........................   79
+            6.1.3.7  Robustness ...................................   80
+            6.1.3.8  Local Host Table .............................   80
+         6.1.4  DNS USER INTERFACE ................................   81
+            6.1.4.1  DNS Administration ...........................   81
+            6.1.4.2  DNS User Interface ...........................   81
+            6.1.4.3 Interface Abbreviation Facilities .............   82
+         6.1.5  DOMAIN NAME SYSTEM REQUIREMENTS SUMMARY ...........   84
+      6.2  HOST INITIALIZATION ....................................   87
+         6.2.1  INTRODUCTION ......................................   87
+         6.2.2  REQUIREMENTS ......................................   87
+            6.2.2.1  Dynamic Configuration ........................   87
+            6.2.2.2  Loading Phase ................................   89
+      6.3  REMOTE MANAGEMENT ......................................   90
+         6.3.1  INTRODUCTION ......................................   90
+         6.3.2  PROTOCOL WALK-THROUGH .............................   90
+         6.3.3  MANAGEMENT REQUIREMENTS SUMMARY ...................   92
+
+   7.  REFERENCES .................................................   93
+
+
+
+
+
+
+
+
+
+
+
+
+Internet Engineering Task Force                                 [Page 4]
+
+
+
+
+RFC1123                       INTRODUCTION                  October 1989
+
+
+1.  INTRODUCTION
+
+   This document is one of a pair that defines and discusses the
+   requirements for host system implementations of the Internet protocol
+   suite.  This RFC covers the applications layer and support protocols.
+   Its companion RFC, "Requirements for Internet Hosts -- Communications
+   Layers" [INTRO:1] covers the lower layer protocols: transport layer,
+   IP layer, and link layer.
+
+   These documents are intended to provide guidance for vendors,
+   implementors, and users of Internet communication software.  They
+   represent the consensus of a large body of technical experience and
+   wisdom, contributed by members of the Internet research and vendor
+   communities.
+
+   This RFC enumerates standard protocols that a host connected to the
+   Internet must use, and it incorporates by reference the RFCs and
+   other documents describing the current specifications for these
+   protocols.  It corrects errors in the referenced documents and adds
+   additional discussion and guidance for an implementor.
+
+   For each protocol, this document also contains an explicit set of
+   requirements, recommendations, and options.  The reader must
+   understand that the list of requirements in this document is
+   incomplete by itself; the complete set of requirements for an
+   Internet host is primarily defined in the standard protocol
+   specification documents, with the corrections, amendments, and
+   supplements contained in this RFC.
+
+   A good-faith implementation of the protocols that was produced after
+   careful reading of the RFC's and with some interaction with the
+   Internet technical community, and that followed good communications
+   software engineering practices, should differ from the requirements
+   of this document in only minor ways.  Thus, in many cases, the
+   "requirements" in this RFC are already stated or implied in the
+   standard protocol documents, so that their inclusion here is, in a
+   sense, redundant.  However, they were included because some past
+   implementation has made the wrong choice, causing problems of
+   interoperability, performance, and/or robustness.
+
+   This document includes discussion and explanation of many of the
+   requirements and recommendations.  A simple list of requirements
+   would be dangerous, because:
+
+   o    Some required features are more important than others, and some
+        features are optional.
+
+   o    There may be valid reasons why particular vendor products that
+
+
+
+Internet Engineering Task Force                                 [Page 5]
+
+
+
+
+RFC1123                       INTRODUCTION                  October 1989
+
+
+        are designed for restricted contexts might choose to use
+        different specifications.
+
+   However, the specifications of this document must be followed to meet
+   the general goal of arbitrary host interoperation across the
+   diversity and complexity of the Internet system.  Although most
+   current implementations fail to meet these requirements in various
+   ways, some minor and some major, this specification is the ideal
+   towards which we need to move.
+
+   These requirements are based on the current level of Internet
+   architecture.  This document will be updated as required to provide
+   additional clarifications or to include additional information in
+   those areas in which specifications are still evolving.
+
+   This introductory section begins with general advice to host software
+   vendors, and then gives some guidance on reading the rest of the
+   document.  Section 2 contains general requirements that may be
+   applicable to all application and support protocols.  Sections 3, 4,
+   and 5 contain the requirements on protocols for the three major
+   applications: Telnet, file transfer, and electronic mail,
+   respectively. Section 6 covers the support applications: the domain
+   name system, system initialization, and management.  Finally, all
+   references will be found in Section 7.
+
+   1.1  The Internet Architecture
+
+      For a brief introduction to the Internet architecture from a host
+      viewpoint, see Section 1.1 of [INTRO:1].  That section also
+      contains recommended references for general background on the
+      Internet architecture.
+
+   1.2  General Considerations
+
+      There are two important lessons that vendors of Internet host
+      software have learned and which a new vendor should consider
+      seriously.
+
+      1.2.1  Continuing Internet Evolution
+
+         The enormous growth of the Internet has revealed problems of
+         management and scaling in a large datagram-based packet
+         communication system.  These problems are being addressed, and
+         as a result there will be continuing evolution of the
+         specifications described in this document.  These changes will
+         be carefully planned and controlled, since there is extensive
+         participation in this planning by the vendors and by the
+         organizations responsible for operations of the networks.
+
+
+
+Internet Engineering Task Force                                 [Page 6]
+
+
+
+
+RFC1123                       INTRODUCTION                  October 1989
+
+
+         Development, evolution, and revision are characteristic of
+         computer network protocols today, and this situation will
+         persist for some years.  A vendor who develops computer
+         communication software for the Internet protocol suite (or any
+         other protocol suite!) and then fails to maintain and update
+         that software for changing specifications is going to leave a
+         trail of unhappy customers.  The Internet is a large
+         communication network, and the users are in constant contact
+         through it.  Experience has shown that knowledge of
+         deficiencies in vendor software propagates quickly through the
+         Internet technical community.
+
+      1.2.2  Robustness Principle
+
+         At every layer of the protocols, there is a general rule whose
+         application can lead to enormous benefits in robustness and
+         interoperability:
+
+                "Be liberal in what you accept, and
+                 conservative in what you send"
+
+         Software should be written to deal with every conceivable
+         error, no matter how unlikely; sooner or later a packet will
+         come in with that particular combination of errors and
+         attributes, and unless the software is prepared, chaos can
+         ensue.  In general, it is best to assume that the network is
+         filled with malevolent entities that will send in packets
+         designed to have the worst possible effect.  This assumption
+         will lead to suitable protective design, although the most
+         serious problems in the Internet have been caused by
+         unenvisaged mechanisms triggered by low-probability events;
+         mere human malice would never have taken so devious a course!
+
+         Adaptability to change must be designed into all levels of
+         Internet host software.  As a simple example, consider a
+         protocol specification that contains an enumeration of values
+         for a particular header field -- e.g., a type field, a port
+         number, or an error code; this enumeration must be assumed to
+         be incomplete.  Thus, if a protocol specification defines four
+         possible error codes, the software must not break when a fifth
+         code shows up.  An undefined code might be logged (see below),
+         but it must not cause a failure.
+
+         The second part of the principle is almost as important:
+         software on other hosts may contain deficiencies that make it
+         unwise to exploit legal but obscure protocol features.  It is
+         unwise to stray far from the obvious and simple, lest untoward
+         effects result elsewhere.  A corollary of this is "watch out
+
+
+
+Internet Engineering Task Force                                 [Page 7]
+
+
+
+
+RFC1123                       INTRODUCTION                  October 1989
+
+
+         for misbehaving hosts"; host software should be prepared, not
+         just to survive other misbehaving hosts, but also to cooperate
+         to limit the amount of disruption such hosts can cause to the
+         shared communication facility.
+
+      1.2.3  Error Logging
+
+         The Internet includes a great variety of host and gateway
+         systems, each implementing many protocols and protocol layers,
+         and some of these contain bugs and mis-features in their
+         Internet protocol software.  As a result of complexity,
+         diversity, and distribution of function, the diagnosis of user
+         problems is often very difficult.
+
+         Problem diagnosis will be aided if host implementations include
+         a carefully designed facility for logging erroneous or
+         "strange" protocol events.  It is important to include as much
+         diagnostic information as possible when an error is logged.  In
+         particular, it is often useful to record the header(s) of a
+         packet that caused an error.  However, care must be taken to
+         ensure that error logging does not consume prohibitive amounts
+         of resources or otherwise interfere with the operation of the
+         host.
+
+         There is a tendency for abnormal but harmless protocol events
+         to overflow error logging files; this can be avoided by using a
+         "circular" log, or by enabling logging only while diagnosing a
+         known failure.  It may be useful to filter and count duplicate
+         successive messages.  One strategy that seems to work well is:
+         (1) always count abnormalities and make such counts accessible
+         through the management protocol (see Section 6.3); and (2)
+         allow the logging of a great variety of events to be
+         selectively enabled.  For example, it might useful to be able
+         to "log everything" or to "log everything for host X".
+
+         Note that different managements may have differing policies
+         about the amount of error logging that they want normally
+         enabled in a host.  Some will say, "if it doesn't hurt me, I
+         don't want to know about it", while others will want to take a
+         more watchful and aggressive attitude about detecting and
+         removing protocol abnormalities.
+
+      1.2.4  Configuration
+
+         It would be ideal if a host implementation of the Internet
+         protocol suite could be entirely self-configuring.  This would
+         allow the whole suite to be implemented in ROM or cast into
+         silicon, it would simplify diskless workstations, and it would
+
+
+
+Internet Engineering Task Force                                 [Page 8]
+
+
+
+
+RFC1123                       INTRODUCTION                  October 1989
+
+
+         be an immense boon to harried LAN administrators as well as
+         system vendors.  We have not reached this ideal; in fact, we
+         are not even close.
+
+         At many points in this document, you will find a requirement
+         that a parameter be a configurable option.  There are several
+         different reasons behind such requirements.  In a few cases,
+         there is current uncertainty or disagreement about the best
+         value, and it may be necessary to update the recommended value
+         in the future.  In other cases, the value really depends on
+         external factors -- e.g., the size of the host and the
+         distribution of its communication load, or the speeds and
+         topology of nearby networks -- and self-tuning algorithms are
+         unavailable and may be insufficient.  In some cases,
+         configurability is needed because of administrative
+         requirements.
+
+         Finally, some configuration options are required to communicate
+         with obsolete or incorrect implementations of the protocols,
+         distributed without sources, that unfortunately persist in many
+         parts of the Internet.  To make correct systems coexist with
+         these faulty systems, administrators often have to "mis-
+         configure" the correct systems.  This problem will correct
+         itself gradually as the faulty systems are retired, but it
+         cannot be ignored by vendors.
+
+         When we say that a parameter must be configurable, we do not
+         intend to require that its value be explicitly read from a
+         configuration file at every boot time.  We recommend that
+         implementors set up a default for each parameter, so a
+         configuration file is only necessary to override those defaults
+         that are inappropriate in a particular installation.  Thus, the
+         configurability requirement is an assurance that it will be
+         POSSIBLE to override the default when necessary, even in a
+         binary-only or ROM-based product.
+
+         This document requires a particular value for such defaults in
+         some cases.  The choice of default is a sensitive issue when
+         the configuration item controls the accommodation to existing
+         faulty systems.  If the Internet is to converge successfully to
+         complete interoperability, the default values built into
+         implementations must implement the official protocol, not
+         "mis-configurations" to accommodate faulty implementations.
+         Although marketing considerations have led some vendors to
+         choose mis-configuration defaults, we urge vendors to choose
+         defaults that will conform to the standard.
+
+         Finally, we note that a vendor needs to provide adequate
+
+
+
+Internet Engineering Task Force                                 [Page 9]
+
+
+
+
+RFC1123                       INTRODUCTION                  October 1989
+
+
+         documentation on all configuration parameters, their limits and
+         effects.
+
+
+   1.3  Reading this Document
+
+      1.3.1  Organization
+
+         In general, each major section is organized into the following
+         subsections:
+
+         (1)  Introduction
+
+         (2)  Protocol Walk-Through -- considers the protocol
+              specification documents section-by-section, correcting
+              errors, stating requirements that may be ambiguous or
+              ill-defined, and providing further clarification or
+              explanation.
+
+         (3)  Specific Issues -- discusses protocol design and
+              implementation issues that were not included in the walk-
+              through.
+
+         (4)  Interfaces -- discusses the service interface to the next
+              higher layer.
+
+         (5)  Summary -- contains a summary of the requirements of the
+              section.
+
+         Under many of the individual topics in this document, there is
+         parenthetical material labeled "DISCUSSION" or
+         "IMPLEMENTATION".  This material is intended to give
+         clarification and explanation of the preceding requirements
+         text.  It also includes some suggestions on possible future
+         directions or developments.  The implementation material
+         contains suggested approaches that an implementor may want to
+         consider.
+
+         The summary sections are intended to be guides and indexes to
+         the text, but are necessarily cryptic and incomplete.  The
+         summaries should never be used or referenced separately from
+         the complete RFC.
+
+      1.3.2  Requirements
+
+         In this document, the words that are used to define the
+         significance of each particular requirement are capitalized.
+         These words are:
+
+
+
+Internet Engineering Task Force                                [Page 10]
+
+
+
+
+RFC1123                       INTRODUCTION                  October 1989
+
+
+         *    "MUST"
+
+              This word or the adjective "REQUIRED" means that the item
+              is an absolute requirement of the specification.
+
+         *    "SHOULD"
+
+              This word or the adjective "RECOMMENDED" means that there
+              may exist valid reasons in particular circumstances to
+              ignore this item, but the full implications should be
+              understood and the case carefully weighed before choosing
+              a different course.
+
+         *    "MAY"
+
+              This word or the adjective "OPTIONAL" means that this item
+              is truly optional.  One vendor may choose to include the
+              item because a particular marketplace requires it or
+              because it enhances the product, for example; another
+              vendor may omit the same item.
+
+
+         An implementation is not compliant if it fails to satisfy one
+         or more of the MUST requirements for the protocols it
+         implements.  An implementation that satisfies all the MUST and
+         all the SHOULD requirements for its protocols is said to be
+         "unconditionally compliant"; one that satisfies all the MUST
+         requirements but not all the SHOULD requirements for its
+         protocols is said to be "conditionally compliant".
+
+      1.3.3  Terminology
+
+         This document uses the following technical terms:
+
+         Segment
+              A segment is the unit of end-to-end transmission in the
+              TCP protocol.  A segment consists of a TCP header followed
+              by application data.  A segment is transmitted by
+              encapsulation in an IP datagram.
+
+         Message
+              This term is used by some application layer protocols
+              (particularly SMTP) for an application data unit.
+
+         Datagram
+              A [UDP] datagram is the unit of end-to-end transmission in
+              the UDP protocol.
+
+
+
+
+Internet Engineering Task Force                                [Page 11]
+
+
+
+
+RFC1123                       INTRODUCTION                  October 1989
+
+
+         Multihomed
+              A host is said to be multihomed if it has multiple IP
+              addresses to connected networks.
+
+
+
+   1.4  Acknowledgments
+
+      This document incorporates contributions and comments from a large
+      group of Internet protocol experts, including representatives of
+      university and research labs, vendors, and government agencies.
+      It was assembled primarily by the Host Requirements Working Group
+      of the Internet Engineering Task Force (IETF).
+
+      The Editor would especially like to acknowledge the tireless
+      dedication of the following people, who attended many long
+      meetings and generated 3 million bytes of electronic mail over the
+      past 18 months in pursuit of this document: Philip Almquist, Dave
+      Borman (Cray Research), Noel Chiappa, Dave Crocker (DEC), Steve
+      Deering (Stanford), Mike Karels (Berkeley), Phil Karn (Bellcore),
+      John Lekashman (NASA), Charles Lynn (BBN), Keith McCloghrie (TWG),
+      Paul Mockapetris (ISI), Thomas Narten (Purdue), Craig Partridge
+      (BBN), Drew Perkins (CMU), and James Van Bokkelen (FTP Software).
+
+      In addition, the following people made major contributions to the
+      effort: Bill Barns (Mitre), Steve Bellovin (AT&T), Mike Brescia
+      (BBN), Ed Cain (DCA), Annette DeSchon (ISI), Martin Gross (DCA),
+      Phill Gross (NRI), Charles Hedrick (Rutgers), Van Jacobson (LBL),
+      John Klensin (MIT), Mark Lottor (SRI), Milo Medin (NASA), Bill
+      Melohn (Sun Microsystems), Greg Minshall (Kinetics), Jeff Mogul
+      (DEC), John Mullen (CMC), Jon Postel (ISI), John Romkey (Epilogue
+      Technology), and Mike StJohns (DCA).  The following also made
+      significant contributions to particular areas: Eric Allman
+      (Berkeley), Rob Austein (MIT), Art Berggreen (ACC), Keith Bostic
+      (Berkeley), Vint Cerf (NRI), Wayne Hathaway (NASA), Matt Korn
+      (IBM), Erik Naggum (Naggum Software, Norway), Robert Ullmann
+      (Prime Computer), David Waitzman (BBN), Frank Wancho (USA), Arun
+      Welch (Ohio State), Bill Westfield (Cisco), and Rayan Zachariassen
+      (Toronto).
+
+      We are grateful to all, including any contributors who may have
+      been inadvertently omitted from this list.
+
+
+
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 12]
+
+
+
+
+RFC1123              APPLICATIONS LAYER -- GENERAL          October 1989
+
+
+2.  GENERAL ISSUES
+
+   This section contains general requirements that may be applicable to
+   all application-layer protocols.
+
+   2.1  Host Names and Numbers
+
+      The syntax of a legal Internet host name was specified in RFC-952
+      [DNS:4].  One aspect of host name syntax is hereby changed: the
+      restriction on the first character is relaxed to allow either a
+      letter or a digit.  Host software MUST support this more liberal
+      syntax.
+
+      Host software MUST handle host names of up to 63 characters and
+      SHOULD handle host names of up to 255 characters.
+
+      Whenever a user inputs the identity of an Internet host, it SHOULD
+      be possible to enter either (1) a host domain name or (2) an IP
+      address in dotted-decimal ("#.#.#.#") form.  The host SHOULD check
+      the string syntactically for a dotted-decimal number before
+      looking it up in the Domain Name System.
+
+      DISCUSSION:
+           This last requirement is not intended to specify the complete
+           syntactic form for entering a dotted-decimal host number;
+           that is considered to be a user-interface issue.  For
+           example, a dotted-decimal number must be enclosed within
+           "[ ]" brackets for SMTP mail (see Section 5.2.17).  This
+           notation could be made universal within a host system,
+           simplifying the syntactic checking for a dotted-decimal
+           number.
+
+           If a dotted-decimal number can be entered without such
+           identifying delimiters, then a full syntactic check must be
+           made, because a segment of a host domain name is now allowed
+           to begin with a digit and could legally be entirely numeric
+           (see Section 6.1.2.4).  However, a valid host name can never
+           have the dotted-decimal form #.#.#.#, since at least the
+           highest-level component label will be alphabetic.
+
+   2.2  Using Domain Name Service
+
+      Host domain names MUST be translated to IP addresses as described
+      in Section 6.1.
+
+      Applications using domain name services MUST be able to cope with
+      soft error conditions.  Applications MUST wait a reasonable
+      interval between successive retries due to a soft error, and MUST
+
+
+
+Internet Engineering Task Force                                [Page 13]
+
+
+
+
+RFC1123              APPLICATIONS LAYER -- GENERAL          October 1989
+
+
+      allow for the possibility that network problems may deny service
+      for hours or even days.
+
+      An application SHOULD NOT rely on the ability to locate a WKS
+      record containing an accurate listing of all services at a
+      particular host address, since the WKS RR type is not often used
+      by Internet sites.  To confirm that a service is present, simply
+      attempt to use it.
+
+   2.3  Applications on Multihomed hosts
+
+      When the remote host is multihomed, the name-to-address
+      translation will return a list of alternative IP addresses.  As
+      specified in Section 6.1.3.4, this list should be in order of
+      decreasing preference.  Application protocol implementations
+      SHOULD be prepared to try multiple addresses from the list until
+      success is obtained.  More specific requirements for SMTP are
+      given in Section 5.3.4.
+
+      When the local host is multihomed, a UDP-based request/response
+      application SHOULD send the response with an IP source address
+      that is the same as the specific destination address of the UDP
+      request datagram.  The "specific destination address" is defined
+      in the "IP Addressing" section of the companion RFC [INTRO:1].
+
+      Similarly, a server application that opens multiple TCP
+      connections to the same client SHOULD use the same local IP
+      address for all.
+
+   2.4  Type-of-Service
+
+      Applications MUST select appropriate TOS values when they invoke
+      transport layer services, and these values MUST be configurable.
+      Note that a TOS value contains 5 bits, of which only the most-
+      significant 3 bits are currently defined; the other two bits MUST
+      be zero.
+
+      DISCUSSION:
+           As gateway algorithms are developed to implement Type-of-
+           Service, the recommended values for various application
+           protocols may change.  In addition, it is likely that
+           particular combinations of users and Internet paths will want
+           non-standard TOS values.  For these reasons, the TOS values
+           must be configurable.
+
+           See the latest version of the "Assigned Numbers" RFC
+           [INTRO:5] for the recommended TOS values for the major
+           application protocols.
+
+
+
+Internet Engineering Task Force                                [Page 14]
+
+
+
+
+RFC1123              APPLICATIONS LAYER -- GENERAL          October 1989
+
+
+   2.5  GENERAL APPLICATION REQUIREMENTS SUMMARY
+
+                                               |          | | | |S| |
+                                               |          | | | |H| |F
+                                               |          | | | |O|M|o
+                                               |          | |S| |U|U|o
+                                               |          | |H| |L|S|t
+                                               |          |M|O| |D|T|n
+                                               |          |U|U|M| | |o
+                                               |          |S|L|A|N|N|t
+                                               |          |T|D|Y|O|O|t
+FEATURE                                        |SECTION   | | | |T|T|e
+-----------------------------------------------|----------|-|-|-|-|-|--
+                                               |          | | | | | |
+User interfaces:                               |          | | | | | |
+  Allow host name to begin with digit          |2.1       |x| | | | |
+  Host names of up to 635 characters           |2.1       |x| | | | |
+  Host names of up to 255 characters           |2.1       | |x| | | |
+  Support dotted-decimal host numbers          |2.1       | |x| | | |
+  Check syntactically for dotted-dec first     |2.1       | |x| | | |
+                                               |          | | | | | |
+Map domain names per Section 6.1               |2.2       |x| | | | |
+Cope with soft DNS errors                      |2.2       |x| | | | |
+   Reasonable interval between retries         |2.2       |x| | | | |
+   Allow for long outages                      |2.2       |x| | | | |
+Expect WKS records to be available             |2.2       | | | |x| |
+                                               |          | | | | | |
+Try multiple addr's for remote multihomed host |2.3       | |x| | | |
+UDP reply src addr is specific dest of request |2.3       | |x| | | |
+Use same IP addr for related TCP connections   |2.3       | |x| | | |
+Specify appropriate TOS values                 |2.4       |x| | | | |
+  TOS values configurable                      |2.4       |x| | | | |
+  Unused TOS bits zero                         |2.4       |x| | | | |
+                                               |          | | | | | |
+                                               |          | | | | | |
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 15]
+
+
+
+
+RFC1123                  REMOTE LOGIN -- TELNET             October 1989
+
+
+3.  REMOTE LOGIN -- TELNET PROTOCOL
+
+   3.1  INTRODUCTION
+
+      Telnet is the standard Internet application protocol for remote
+      login.  It provides the encoding rules to link a user's
+      keyboard/display on a client ("user") system with a command
+      interpreter on a remote server system.  A subset of the Telnet
+      protocol is also incorporated within other application protocols,
+      e.g., FTP and SMTP.
+
+      Telnet uses a single TCP connection, and its normal data stream
+      ("Network Virtual Terminal" or "NVT" mode) is 7-bit ASCII with
+      escape sequences to embed control functions.  Telnet also allows
+      the negotiation of many optional modes and functions.
+
+      The primary Telnet specification is to be found in RFC-854
+      [TELNET:1], while the options are defined in many other RFCs; see
+      Section 7 for references.
+
+   3.2  PROTOCOL WALK-THROUGH
+
+      3.2.1  Option Negotiation: RFC-854, pp. 2-3
+
+         Every Telnet implementation MUST include option negotiation and
+         subnegotiation machinery [TELNET:2].
+
+         A host MUST carefully follow the rules of RFC-854 to avoid
+         option-negotiation loops.  A host MUST refuse (i.e, reply
+         WONT/DONT to a DO/WILL) an unsupported option.  Option
+         negotiation SHOULD continue to function (even if all requests
+         are refused) throughout the lifetime of a Telnet connection.
+
+         If all option negotiations fail, a Telnet implementation MUST
+         default to, and support, an NVT.
+
+         DISCUSSION:
+              Even though more sophisticated "terminals" and supporting
+              option negotiations are becoming the norm, all
+              implementations must be prepared to support an NVT for any
+              user-server communication.
+
+      3.2.2  Telnet Go-Ahead Function: RFC-854, p. 5, and RFC-858
+
+         On a host that never sends the Telnet command Go Ahead (GA),
+         the Telnet Server MUST attempt to negotiate the Suppress Go
+         Ahead option (i.e., send "WILL Suppress Go Ahead").  A User or
+         Server Telnet MUST always accept negotiation of the Suppress Go
+
+
+
+Internet Engineering Task Force                                [Page 16]
+
+
+
+
+RFC1123                  REMOTE LOGIN -- TELNET             October 1989
+
+
+         Ahead option.
+
+         When it is driving a full-duplex terminal for which GA has no
+         meaning, a User Telnet implementation MAY ignore GA commands.
+
+         DISCUSSION:
+              Half-duplex ("locked-keyboard") line-at-a-time terminals
+              for which the Go-Ahead mechanism was designed have largely
+              disappeared from the scene.  It turned out to be difficult
+              to implement sending the Go-Ahead signal in many operating
+              systems, even some systems that support native half-duplex
+              terminals.  The difficulty is typically that the Telnet
+              server code does not have access to information about
+              whether the user process is blocked awaiting input from
+              the Telnet connection, i.e., it cannot reliably determine
+              when to send a GA command.  Therefore, most Telnet Server
+              hosts do not send GA commands.
+
+              The effect of the rules in this section is to allow either
+              end of a Telnet connection to veto the use of GA commands.
+
+              There is a class of half-duplex terminals that is still
+              commercially important: "data entry terminals," which
+              interact in a full-screen manner.  However, supporting
+              data entry terminals using the Telnet protocol does not
+              require the Go Ahead signal; see Section 3.3.2.
+
+      3.2.3  Control Functions: RFC-854, pp. 7-8
+
+         The list of Telnet commands has been extended to include EOR
+         (End-of-Record), with code 239 [TELNET:9].
+
+         Both User and Server Telnets MAY support the control functions
+         EOR, EC, EL, and Break, and MUST support AO, AYT, DM, IP, NOP,
+         SB, and SE.
+
+         A host MUST be able to receive and ignore any Telnet control
+         functions that it does not support.
+
+         DISCUSSION:
+              Note that a Server Telnet is required to support the
+              Telnet IP (Interrupt Process) function, even if the server
+              host has an equivalent in-stream function (e.g., Control-C
+              in many systems).  The Telnet IP function may be stronger
+              than an in-stream interrupt command, because of the out-
+              of-band effect of TCP urgent data.
+
+              The EOR control function may be used to delimit the
+
+
+
+Internet Engineering Task Force                                [Page 17]
+
+
+
+
+RFC1123                  REMOTE LOGIN -- TELNET             October 1989
+
+
+              stream.  An important application is data entry terminal
+              support (see Section 3.3.2).  There was concern that since
+              EOR had not been defined in RFC-854, a host that was not
+              prepared to correctly ignore unknown Telnet commands might
+              crash if it received an EOR.  To protect such hosts, the
+              End-of-Record option [TELNET:9] was introduced; however, a
+              properly implemented Telnet program will not require this
+              protection.
+
+      3.2.4  Telnet "Synch" Signal: RFC-854, pp. 8-10
+
+         When it receives "urgent" TCP data, a User or Server Telnet
+         MUST discard all data except Telnet commands until the DM (and
+         end of urgent) is reached.
+
+         When it sends Telnet IP (Interrupt Process), a User Telnet
+         SHOULD follow it by the Telnet "Synch" sequence, i.e., send as
+         TCP urgent data the sequence "IAC IP IAC DM".  The TCP urgent
+         pointer points to the DM octet.
+
+         When it receives a Telnet IP command, a Server Telnet MAY send
+         a Telnet "Synch" sequence back to the user, to flush the output
+         stream.  The choice ought to be consistent with the way the
+         server operating system behaves when a local user interrupts a
+         process.
+
+         When it receives a Telnet AO command, a Server Telnet MUST send
+         a Telnet "Synch" sequence back to the user, to flush the output
+         stream.
+
+         A User Telnet SHOULD have the capability of flushing output
+         when it sends a Telnet IP; see also Section 3.4.5.
+
+         DISCUSSION:
+              There are three possible ways for a User Telnet to flush
+              the stream of server output data:
+
+              (1)  Send AO after IP.
+
+                   This will cause the server host to send a "flush-
+                   buffered-output" signal to its operating system.
+                   However, the AO may not take effect locally, i.e.,
+                   stop terminal output at the User Telnet end, until
+                   the Server Telnet has received and processed the AO
+                   and has sent back a "Synch".
+
+              (2)  Send DO TIMING-MARK [TELNET:7] after IP, and discard
+                   all output locally until a WILL/WONT TIMING-MARK is
+
+
+
+Internet Engineering Task Force                                [Page 18]
+
+
+
+
+RFC1123                  REMOTE LOGIN -- TELNET             October 1989
+
+
+                   received from the Server Telnet.
+
+                   Since the DO TIMING-MARK will be processed after the
+                   IP at the server, the reply to it should be in the
+                   right place in the output data stream.  However, the
+                   TIMING-MARK will not send a "flush buffered output"
+                   signal to the server operating system.  Whether or
+                   not this is needed is dependent upon the server
+                   system.
+
+              (3)  Do both.
+
+              The best method is not entirely clear, since it must
+              accommodate a number of existing server hosts that do not
+              follow the Telnet standards in various ways.  The safest
+              approach is probably to provide a user-controllable option
+              to select (1), (2), or (3).
+
+      3.2.5  NVT Printer and Keyboard: RFC-854, p. 11
+
+         In NVT mode, a Telnet SHOULD NOT send characters with the
+         high-order bit 1, and MUST NOT send it as a parity bit.
+         Implementations that pass the high-order bit to applications
+         SHOULD negotiate binary mode (see Section 3.2.6).
+
+
+         DISCUSSION:
+              Implementors should be aware that a strict reading of
+              RFC-854 allows a client or server expecting NVT ASCII to
+              ignore characters with the high-order bit set.  In
+              general, binary mode is expected to be used for
+              transmission of an extended (beyond 7-bit) character set
+              with Telnet.
+
+              However, there exist applications that really need an 8-
+              bit NVT mode, which is currently not defined, and these
+              existing applications do set the high-order bit during
+              part or all of the life of a Telnet connection.  Note that
+              binary mode is not the same as 8-bit NVT mode, since
+              binary mode turns off end-of-line processing.  For this
+              reason, the requirements on the high-order bit are stated
+              as SHOULD, not MUST.
+
+              RFC-854 defines a minimal set of properties of a "network
+              virtual terminal" or NVT; this is not meant to preclude
+              additional features in a real terminal.  A Telnet
+              connection is fully transparent to all 7-bit ASCII
+              characters, including arbitrary ASCII control characters.
+
+
+
+Internet Engineering Task Force                                [Page 19]
+
+
+
+
+RFC1123                  REMOTE LOGIN -- TELNET             October 1989
+
+
+              For example, a terminal might support full-screen commands
+              coded as ASCII escape sequences; a Telnet implementation
+              would pass these sequences as uninterpreted data.  Thus,
+              an NVT should not be conceived as a terminal type of a
+              highly-restricted device.
+
+      3.2.6  Telnet Command Structure: RFC-854, p. 13
+
+         Since options may appear at any point in the data stream, a
+         Telnet escape character (known as IAC, with the value 255) to
+         be sent as data MUST be doubled.
+
+      3.2.7  Telnet Binary Option: RFC-856
+
+         When the Binary option has been successfully negotiated,
+         arbitrary 8-bit characters are allowed.  However, the data
+         stream MUST still be scanned for IAC characters, any embedded
+         Telnet commands MUST be obeyed, and data bytes equal to IAC
+         MUST be doubled.  Other character processing (e.g., replacing
+         CR by CR NUL or by CR LF) MUST NOT be done.  In particular,
+         there is no end-of-line convention (see Section 3.3.1) in
+         binary mode.
+
+         DISCUSSION:
+              The Binary option is normally negotiated in both
+              directions, to change the Telnet connection from NVT mode
+              to "binary mode".
+
+              The sequence IAC EOR can be used to delimit blocks of data
+              within a binary-mode Telnet stream.
+
+      3.2.8  Telnet Terminal-Type Option: RFC-1091
+
+         The Terminal-Type option MUST use the terminal type names
+         officially defined in the Assigned Numbers RFC [INTRO:5], when
+         they are available for the particular terminal.  However, the
+         receiver of a Terminal-Type option MUST accept any name.
+
+         DISCUSSION:
+              RFC-1091 [TELNET:10] updates an earlier version of the
+              Terminal-Type option defined in RFC-930.  The earlier
+              version allowed a server host capable of supporting
+              multiple terminal types to learn the type of a particular
+              client's terminal, assuming that each physical terminal
+              had an intrinsic type.  However, today a "terminal" is
+              often really a terminal emulator program running in a PC,
+              perhaps capable of emulating a range of terminal types.
+              Therefore, RFC-1091 extends the specification to allow a
+
+
+
+Internet Engineering Task Force                                [Page 20]
+
+
+
+
+RFC1123                  REMOTE LOGIN -- TELNET             October 1989
+
+
+              more general terminal-type negotiation between User and
+              Server Telnets.
+
+   3.3  SPECIFIC ISSUES
+
+      3.3.1  Telnet End-of-Line Convention
+
+         The Telnet protocol defines the sequence CR LF to mean "end-
+         of-line".  For terminal input, this corresponds to a command-
+         completion or "end-of-line" key being pressed on a user
+         terminal; on an ASCII terminal, this is the CR key, but it may
+         also be labelled "Return" or "Enter".
+
+         When a Server Telnet receives the Telnet end-of-line sequence
+         CR LF as input from a remote terminal, the effect MUST be the
+         same as if the user had pressed the "end-of-line" key on a
+         local terminal.  On server hosts that use ASCII, in particular,
+         receipt of the Telnet sequence CR LF must cause the same effect
+         as a local user pressing the CR key on a local terminal.  Thus,
+         CR LF and CR NUL MUST have the same effect on an ASCII server
+         host when received as input over a Telnet connection.
+
+         A User Telnet MUST be able to send any of the forms: CR LF, CR
+         NUL, and LF.  A User Telnet on an ASCII host SHOULD have a
+         user-controllable mode to send either CR LF or CR NUL when the
+         user presses the "end-of-line" key, and CR LF SHOULD be the
+         default.
+
+         The Telnet end-of-line sequence CR LF MUST be used to send
+         Telnet data that is not terminal-to-computer (e.g., for Server
+         Telnet sending output, or the Telnet protocol incorporated
+         another application protocol).
+
+         DISCUSSION:
+              To allow interoperability between arbitrary Telnet clients
+              and servers, the Telnet protocol defined a standard
+              representation for a line terminator.  Since the ASCII
+              character set includes no explicit end-of-line character,
+              systems have chosen various representations, e.g., CR, LF,
+              and the sequence CR LF.  The Telnet protocol chose the CR
+              LF sequence as the standard for network transmission.
+
+              Unfortunately, the Telnet protocol specification in RFC-
+              854 [TELNET:1] has turned out to be somewhat ambiguous on
+              what character(s) should be sent from client to server for
+              the "end-of-line" key.  The result has been a massive and
+              continuing interoperability headache, made worse by
+              various faulty implementations of both User and Server
+
+
+
+Internet Engineering Task Force                                [Page 21]
+
+
+
+
+RFC1123                  REMOTE LOGIN -- TELNET             October 1989
+
+
+              Telnets.
+
+              Although the Telnet protocol is based on a perfectly
+              symmetric model, in a remote login session the role of the
+              user at a terminal differs from the role of the server
+              host.  For example, RFC-854 defines the meaning of CR, LF,
+              and CR LF as output from the server, but does not specify
+              what the User Telnet should send when the user presses the
+              "end-of-line" key on the terminal; this turns out to be
+              the point at issue.
+
+              When a user presses the "end-of-line" key, some User
+              Telnet implementations send CR LF, while others send CR
+              NUL (based on a different interpretation of the same
+              sentence in RFC-854).  These will be equivalent for a
+              correctly-implemented ASCII server host, as discussed
+              above.  For other servers, a mode in the User Telnet is
+              needed.
+
+              The existence of User Telnets that send only CR NUL when
+              CR is pressed creates a dilemma for non-ASCII hosts: they
+              can either treat CR NUL as equivalent to CR LF in input,
+              thus precluding the possibility of entering a "bare" CR,
+              or else lose complete interworking.
+
+              Suppose a user on host A uses Telnet to log into a server
+              host B, and then execute B's User Telnet program to log
+              into server host C.  It is desirable for the Server/User
+              Telnet combination on B to be as transparent as possible,
+              i.e., to appear as if A were connected directly to C.  In
+              particular, correct implementation will make B transparent
+              to Telnet end-of-line sequences, except that CR LF may be
+              translated to CR NUL or vice versa.
+
+         IMPLEMENTATION:
+              To understand Telnet end-of-line issues, one must have at
+              least a general model of the relationship of Telnet to the
+              local operating system.  The Server Telnet process is
+              typically coupled into the terminal driver software of the
+              operating system as a pseudo-terminal.  A Telnet end-of-
+              line sequence received by the Server Telnet must have the
+              same effect as pressing the end-of-line key on a real
+              locally-connected terminal.
+
+              Operating systems that support interactive character-at-
+              a-time applications (e.g., editors) typically have two
+              internal modes for their terminal I/O: a formatted mode,
+              in which local conventions for end-of-line and other
+
+
+
+Internet Engineering Task Force                                [Page 22]
+
+
+
+
+RFC1123                  REMOTE LOGIN -- TELNET             October 1989
+
+
+              formatting rules have been applied to the data stream, and
+              a "raw" mode, in which the application has direct access
+              to every character as it was entered.  A Server Telnet
+              must be implemented in such a way that these modes have
+              the same effect for remote as for local terminals.  For
+              example, suppose a CR LF or CR NUL is received by the
+              Server Telnet on an ASCII host.  In raw mode, a CR
+              character is passed to the application; in formatted mode,
+              the local system's end-of-line convention is used.
+
+      3.3.2  Data Entry Terminals
+
+         DISCUSSION:
+              In addition to the line-oriented and character-oriented
+              ASCII terminals for which Telnet was designed, there are
+              several families of video display terminals that are
+              sometimes known as "data entry terminals" or DETs.  The
+              IBM 3270 family is a well-known example.
+
+              Two Internet protocols have been designed to support
+              generic DETs: SUPDUP [TELNET:16, TELNET:17], and the DET
+              option [TELNET:18, TELNET:19].  The DET option drives a
+              data entry terminal over a Telnet connection using (sub-)
+              negotiation.  SUPDUP is a completely separate terminal
+              protocol, which can be entered from Telnet by negotiation.
+              Although both SUPDUP and the DET option have been used
+              successfully in particular environments, neither has
+              gained general acceptance or wide implementation.
+
+              A different approach to DET interaction has been developed
+              for supporting the IBM 3270 family through Telnet,
+              although the same approach would be applicable to any DET.
+              The idea is to enter a "native DET" mode, in which the
+              native DET input/output stream is sent as binary data.
+              The Telnet EOR command is used to delimit logical records
+              (e.g., "screens") within this binary stream.
+
+         IMPLEMENTATION:
+              The rules for entering and leaving native DET mode are as
+              follows:
+
+              o    The Server uses the Terminal-Type option [TELNET:10]
+                   to learn that the client is a DET.
+
+              o    It is conventional, but not required, that both ends
+                   negotiate the EOR option [TELNET:9].
+
+              o    Both ends negotiate the Binary option [TELNET:3] to
+
+
+
+Internet Engineering Task Force                                [Page 23]
+
+
+
+
+RFC1123                  REMOTE LOGIN -- TELNET             October 1989
+
+
+                   enter native DET mode.
+
+              o    When either end negotiates out of binary mode, the
+                   other end does too, and the mode then reverts to
+                   normal NVT.
+
+
+      3.3.3  Option Requirements
+
+         Every Telnet implementation MUST support the Binary option
+         [TELNET:3] and the Suppress Go Ahead option [TELNET:5], and
+         SHOULD support the Echo [TELNET:4], Status [TELNET:6], End-of-
+         Record [TELNET:9], and Extended Options List [TELNET:8]
+         options.
+
+         A User or Server Telnet SHOULD support the Window Size Option
+         [TELNET:12] if the local operating system provides the
+         corresponding capability.
+
+         DISCUSSION:
+              Note that the End-of-Record option only signifies that a
+              Telnet can receive a Telnet EOR without crashing;
+              therefore, every Telnet ought to be willing to accept
+              negotiation of the End-of-Record option.  See also the
+              discussion in Section 3.2.3.
+
+      3.3.4  Option Initiation
+
+         When the Telnet protocol is used in a client/server situation,
+         the server SHOULD initiate negotiation of the terminal
+         interaction mode it expects.
+
+         DISCUSSION:
+              The Telnet protocol was defined to be perfectly
+              symmetrical, but its application is generally asymmetric.
+              Remote login has been known to fail because NEITHER side
+              initiated negotiation of the required non-default terminal
+              modes.  It is generally the server that determines the
+              preferred mode, so the server needs to initiate the
+              negotiation; since the negotiation is symmetric, the user
+              can also initiate it.
+
+         A client (User Telnet) SHOULD provide a means for users to
+         enable and disable the initiation of option negotiation.
+
+         DISCUSSION:
+              A user sometimes needs to connect to an application
+              service (e.g., FTP or SMTP) that uses Telnet for its
+
+
+
+Internet Engineering Task Force                                [Page 24]
+
+
+
+
+RFC1123                  REMOTE LOGIN -- TELNET             October 1989
+
+
+              control stream but does not support Telnet options.  User
+              Telnet may be used for this purpose if initiation of
+              option negotiation is  disabled.
+
+      3.3.5  Telnet Linemode Option
+
+         DISCUSSION:
+              An important new Telnet option, LINEMODE [TELNET:12], has
+              been proposed.  The LINEMODE option provides a standard
+              way for a User Telnet and a Server Telnet to agree that
+              the client rather than the server will perform terminal
+              character processing.  When the client has prepared a
+              complete line of text, it will send it to the server in
+              (usually) one TCP packet.  This option will greatly
+              decrease the packet cost of Telnet sessions and will also
+              give much better user response over congested or long-
+              delay networks.
+
+              The LINEMODE option allows dynamic switching between local
+              and remote character processing.  For example, the Telnet
+              connection will automatically negotiate into single-
+              character mode while a full screen editor is running, and
+              then return to linemode when the editor is finished.
+
+              We expect that when this RFC is released, hosts should
+              implement the client side of this option, and may
+              implement the server side of this option.  To properly
+              implement the server side, the server needs to be able to
+              tell the local system not to do any input character
+              processing, but to remember its current terminal state and
+              notify the Server Telnet process whenever the state
+              changes.  This will allow password echoing and full screen
+              editors to be handled properly, for example.
+
+   3.4  TELNET/USER INTERFACE
+
+      3.4.1  Character Set Transparency
+
+         User Telnet implementations SHOULD be able to send or receive
+         any 7-bit ASCII character.  Where possible, any special
+         character interpretations by the user host's operating system
+         SHOULD be bypassed so that these characters can conveniently be
+         sent and received on the connection.
+
+         Some character value MUST be reserved as "escape to command
+         mode"; conventionally, doubling this character allows it to be
+         entered as data.  The specific character used SHOULD be user
+         selectable.
+
+
+
+Internet Engineering Task Force                                [Page 25]
+
+
+
+
+RFC1123                  REMOTE LOGIN -- TELNET             October 1989
+
+
+         On binary-mode connections, a User Telnet program MAY provide
+         an escape mechanism for entering arbitrary 8-bit values, if the
+         host operating system doesn't allow them to be entered directly
+         from the keyboard.
+
+         IMPLEMENTATION:
+              The transparency issues are less pressing on servers, but
+              implementors should take care in dealing with issues like:
+              masking off parity bits (sent by an older, non-conforming
+              client) before they reach programs that expect only NVT
+              ASCII, and properly handling programs that request 8-bit
+              data streams.
+
+      3.4.2  Telnet Commands
+
+         A User Telnet program MUST provide a user the capability of
+         entering any of the Telnet control functions IP, AO, or AYT,
+         and SHOULD provide the capability of entering EC, EL, and
+         Break.
+
+      3.4.3  TCP Connection Errors
+
+         A User Telnet program SHOULD report to the user any TCP errors
+         that are reported by the transport layer (see "TCP/Application
+         Layer Interface" section in [INTRO:1]).
+
+      3.4.4  Non-Default Telnet Contact Port
+
+         A User Telnet program SHOULD allow the user to optionally
+         specify a non-standard contact port number at the Server Telnet
+         host.
+
+      3.4.5  Flushing Output
+
+         A User Telnet program SHOULD provide the user the ability to
+         specify whether or not output should be flushed when an IP is
+         sent; see Section 3.2.4.
+
+         For any output flushing scheme that causes the User Telnet to
+         flush output locally until a Telnet signal is received from the
+         Server, there SHOULD be a way for the user to manually restore
+         normal output, in case the Server fails to send the expected
+         signal.
+
+
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 26]
+
+
+
+
+RFC1123                  REMOTE LOGIN -- TELNET             October 1989
+
+
+   3.5.  TELNET REQUIREMENTS SUMMARY
+
+
+                                                 |        | | | |S| |
+                                                 |        | | | |H| |F
+                                                 |        | | | |O|M|o
+                                                 |        | |S| |U|U|o
+                                                 |        | |H| |L|S|t
+                                                 |        |M|O| |D|T|n
+                                                 |        |U|U|M| | |o
+                                                 |        |S|L|A|N|N|t
+                                                 |        |T|D|Y|O|O|t
+FEATURE                                          |SECTION | | | |T|T|e
+-------------------------------------------------|--------|-|-|-|-|-|--
+                                                 |        | | | | | |
+Option Negotiation                               |3.2.1   |x| | | | |
+  Avoid negotiation loops                        |3.2.1   |x| | | | |
+  Refuse unsupported options                     |3.2.1   |x| | | | |
+  Negotiation OK anytime on connection           |3.2.1   | |x| | | |
+  Default to NVT                                 |3.2.1   |x| | | | |
+  Send official name in Term-Type option         |3.2.8   |x| | | | |
+  Accept any name in Term-Type option            |3.2.8   |x| | | | |
+  Implement Binary, Suppress-GA options          |3.3.3   |x| | | | |
+  Echo, Status, EOL, Ext-Opt-List options        |3.3.3   | |x| | | |
+  Implement Window-Size option if appropriate    |3.3.3   | |x| | | |
+  Server initiate mode negotiations              |3.3.4   | |x| | | |
+  User can enable/disable init negotiations      |3.3.4   | |x| | | |
+                                                 |        | | | | | |
+Go-Aheads                                        |        | | | | | |
+  Non-GA server negotiate SUPPRESS-GA option     |3.2.2   |x| | | | |
+  User or Server accept SUPPRESS-GA option       |3.2.2   |x| | | | |
+  User Telnet ignore GA's                        |3.2.2   | | |x| | |
+                                                 |        | | | | | |
+Control Functions                                |        | | | | | |
+  Support SE NOP DM IP AO AYT SB                 |3.2.3   |x| | | | |
+  Support EOR EC EL Break                        |3.2.3   | | |x| | |
+  Ignore unsupported control functions           |3.2.3   |x| | | | |
+  User, Server discard urgent data up to DM      |3.2.4   |x| | | | |
+  User Telnet send "Synch" after IP, AO, AYT     |3.2.4   | |x| | | |
+  Server Telnet reply Synch to IP                |3.2.4   | | |x| | |
+  Server Telnet reply Synch to AO                |3.2.4   |x| | | | |
+  User Telnet can flush output when send IP      |3.2.4   | |x| | | |
+                                                 |        | | | | | |
+Encoding                                         |        | | | | | |
+  Send high-order bit in NVT mode                |3.2.5   | | | |x| |
+  Send high-order bit as parity bit              |3.2.5   | | | | |x|
+  Negot. BINARY if pass high-ord. bit to applic  |3.2.5   | |x| | | |
+  Always double IAC data byte                    |3.2.6   |x| | | | |
+
+
+
+Internet Engineering Task Force                                [Page 27]
+
+
+
+
+RFC1123                  REMOTE LOGIN -- TELNET             October 1989
+
+
+  Double IAC data byte in binary mode            |3.2.7   |x| | | | |
+  Obey Telnet cmds in binary mode                |3.2.7   |x| | | | |
+  End-of-line, CR NUL in binary mode             |3.2.7   | | | | |x|
+                                                 |        | | | | | |
+End-of-Line                                      |        | | | | | |
+  EOL at Server same as local end-of-line        |3.3.1   |x| | | | |
+  ASCII Server accept CR LF or CR NUL for EOL    |3.3.1   |x| | | | |
+  User Telnet able to send CR LF, CR NUL, or LF  |3.3.1   |x| | | | |
+    ASCII user able to select CR LF/CR NUL       |3.3.1   | |x| | | |
+    User Telnet default mode is CR LF            |3.3.1   | |x| | | |
+  Non-interactive uses CR LF for EOL             |3.3.1   |x| | | | |
+                                                 |        | | | | | |
+User Telnet interface                            |        | | | | | |
+  Input & output all 7-bit characters            |3.4.1   | |x| | | |
+  Bypass local op sys interpretation             |3.4.1   | |x| | | |
+  Escape character                               |3.4.1   |x| | | | |
+     User-settable escape character              |3.4.1   | |x| | | |
+  Escape to enter 8-bit values                   |3.4.1   | | |x| | |
+  Can input IP, AO, AYT                          |3.4.2   |x| | | | |
+  Can input EC, EL, Break                        |3.4.2   | |x| | | |
+  Report TCP connection errors to user           |3.4.3   | |x| | | |
+  Optional non-default contact port              |3.4.4   | |x| | | |
+  Can spec: output flushed when IP sent          |3.4.5   | |x| | | |
+  Can manually restore output mode               |3.4.5   | |x| | | |
+                                                 |        | | | | | |
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 28]
+
+
+
+
+RFC1123                   FILE TRANSFER -- FTP              October 1989
+
+
+4.  FILE TRANSFER
+
+   4.1  FILE TRANSFER PROTOCOL -- FTP
+
+      4.1.1  INTRODUCTION
+
+         The File Transfer Protocol FTP is the primary Internet standard
+         for file transfer.  The current specification is contained in
+         RFC-959 [FTP:1].
+
+         FTP uses separate simultaneous TCP connections for control and
+         for data transfer.  The FTP protocol includes many features,
+         some of which are not commonly implemented.  However, for every
+         feature in FTP, there exists at least one implementation.  The
+         minimum implementation defined in RFC-959 was too small, so a
+         somewhat larger minimum implementation is defined here.
+
+         Internet users have been unnecessarily burdened for years by
+         deficient FTP implementations.  Protocol implementors have
+         suffered from the erroneous opinion that implementing FTP ought
+         to be a small and trivial task.  This is wrong, because FTP has
+         a user interface, because it has to deal (correctly) with the
+         whole variety of communication and operating system errors that
+         may occur, and because it has to handle the great diversity of
+         real file systems in the world.
+
+      4.1.2.  PROTOCOL WALK-THROUGH
+
+         4.1.2.1  LOCAL Type: RFC-959 Section 3.1.1.4
+
+            An FTP program MUST support TYPE I ("IMAGE" or binary type)
+            as well as TYPE L 8 ("LOCAL" type with logical byte size 8).
+            A machine whose memory is organized into m-bit words, where
+            m is not a multiple of 8, MAY also support TYPE L m.
+
+            DISCUSSION:
+                 The command "TYPE L 8" is often required to transfer
+                 binary data between a machine whose memory is organized
+                 into (e.g.) 36-bit words and a machine with an 8-bit
+                 byte organization.  For an 8-bit byte machine, TYPE L 8
+                 is equivalent to IMAGE.
+
+                 "TYPE L m" is sometimes specified to the FTP programs
+                 on two m-bit word machines to ensure the correct
+                 transfer of a native-mode binary file from one machine
+                 to the other.  However, this command should have the
+                 same effect on these machines as "TYPE I".
+
+
+
+
+Internet Engineering Task Force                                [Page 29]
+
+
+
+
+RFC1123                   FILE TRANSFER -- FTP              October 1989
+
+
+         4.1.2.2  Telnet Format Control: RFC-959 Section 3.1.1.5.2
+
+            A host that makes no distinction between TYPE N and TYPE T
+            SHOULD implement TYPE T to be identical to TYPE N.
+
+            DISCUSSION:
+                 This provision should ease interoperation with hosts
+                 that do make this distinction.
+
+                 Many hosts represent text files internally as strings
+                 of ASCII characters, using the embedded ASCII format
+                 effector characters (LF, BS, FF, ...) to control the
+                 format when a file is printed.  For such hosts, there
+                 is no distinction between "print" files and other
+                 files.  However, systems that use record structured
+                 files typically need a special format for printable
+                 files (e.g., ASA carriage control).   For the latter
+                 hosts, FTP allows a choice of TYPE N or TYPE T.
+
+         4.1.2.3  Page Structure: RFC-959 Section 3.1.2.3 and Appendix I
+
+            Implementation of page structure is NOT RECOMMENDED in
+            general. However, if a host system does need to implement
+            FTP for "random access" or "holey" files, it MUST use the
+            defined page structure format rather than define a new
+            private FTP format.
+
+         4.1.2.4  Data Structure Transformations: RFC-959 Section 3.1.2
+
+            An FTP transformation between record-structure and file-
+            structure SHOULD be invertible, to the extent possible while
+            making the result useful on the target host.
+
+            DISCUSSION:
+                 RFC-959 required strict invertibility between record-
+                 structure and file-structure, but in practice,
+                 efficiency and convenience often preclude it.
+                 Therefore, the requirement is being relaxed.  There are
+                 two different objectives for transferring a file:
+                 processing it on the target host, or just storage.  For
+                 storage, strict invertibility is important.  For
+                 processing, the file created on the target host needs
+                 to be in the format expected by application programs on
+                 that host.
+
+                 As an example of the conflict, imagine a record-
+                 oriented operating system that requires some data files
+                 to have exactly 80 bytes in each record.  While STORing
+
+
+
+Internet Engineering Task Force                                [Page 30]
+
+
+
+
+RFC1123                   FILE TRANSFER -- FTP              October 1989
+
+
+                 a file on such a host, an FTP Server must be able to
+                 pad each line or record to 80 bytes; a later retrieval
+                 of such a file cannot be strictly invertible.
+
+         4.1.2.5  Data Connection Management: RFC-959 Section 3.3
+
+            A User-FTP that uses STREAM mode SHOULD send a PORT command
+            to assign a non-default data port before each transfer
+            command is issued.
+
+            DISCUSSION:
+                 This is required because of the long delay after a TCP
+                 connection is closed until its socket pair can be
+                 reused, to allow multiple transfers during a single FTP
+                 session.  Sending a port command can avoided if a
+                 transfer mode other than stream is used, by leaving the
+                 data transfer connection open between transfers.
+
+         4.1.2.6  PASV Command: RFC-959 Section 4.1.2
+
+            A server-FTP MUST implement the PASV command.
+
+            If multiple third-party transfers are to be executed during
+            the same session, a new PASV command MUST be issued before
+            each transfer command, to obtain a unique port pair.
+
+            IMPLEMENTATION:
+                 The format of the 227 reply to a PASV command is not
+                 well standardized.  In particular, an FTP client cannot
+                 assume that the parentheses shown on page 40 of RFC-959
+                 will be present (and in fact, Figure 3 on page 43 omits
+                 them).  Therefore, a User-FTP program that interprets
+                 the PASV reply must scan the reply for the first digit
+                 of the host and port numbers.
+
+                 Note that the host number h1,h2,h3,h4 is the IP address
+                 of the server host that is sending the reply, and that
+                 p1,p2 is a non-default data transfer port that PASV has
+                 assigned.
+
+         4.1.2.7  LIST and NLST Commands: RFC-959 Section 4.1.3
+
+            The data returned by an NLST command MUST contain only a
+            simple list of legal pathnames, such that the server can use
+            them directly as the arguments of subsequent data transfer
+            commands for the individual files.
+
+            The data returned by a LIST or NLST command SHOULD use an
+
+
+
+Internet Engineering Task Force                                [Page 31]
+
+
+
+
+RFC1123                   FILE TRANSFER -- FTP              October 1989
+
+
+            implied TYPE AN, unless the current type is EBCDIC, in which
+            case an implied TYPE EN SHOULD be used.
+
+            DISCUSSION:
+                 Many FTP clients support macro-commands that will get
+                 or put files matching a wildcard specification, using
+                 NLST to obtain a list of pathnames.  The expansion of
+                 "multiple-put" is local to the client, but "multiple-
+                 get" requires cooperation by the server.
+
+                 The implied type for LIST and NLST is designed to
+                 provide compatibility with existing User-FTPs, and in
+                 particular with multiple-get commands.
+
+         4.1.2.8  SITE Command: RFC-959 Section 4.1.3
+
+            A Server-FTP SHOULD use the SITE command for non-standard
+            features, rather than invent new private commands or
+            unstandardized extensions to existing commands.
+
+         4.1.2.9  STOU Command: RFC-959 Section 4.1.3
+
+            The STOU command stores into a uniquely named file.  When it
+            receives an STOU command, a Server-FTP MUST return the
+            actual file name in the "125 Transfer Starting" or the "150
+            Opening Data Connection" message that precedes the transfer
+            (the 250 reply code mentioned in RFC-959 is incorrect).  The
+            exact format of these messages is hereby defined to be as
+            follows:
+
+                125 FILE: pppp
+                150 FILE: pppp
+
+            where pppp represents the unique pathname of the file that
+            will be written.
+
+         4.1.2.10  Telnet End-of-line Code: RFC-959, Page 34
+
+            Implementors MUST NOT assume any correspondence between READ
+            boundaries on the control connection and the Telnet EOL
+            sequences (CR LF).
+
+            DISCUSSION:
+                 Thus, a server-FTP (or User-FTP) must continue reading
+                 characters from the control connection until a complete
+                 Telnet EOL sequence is encountered, before processing
+                 the command (or response, respectively).  Conversely, a
+                 single READ from the control connection may include
+
+
+
+Internet Engineering Task Force                                [Page 32]
+
+
+
+
+RFC1123                   FILE TRANSFER -- FTP              October 1989
+
+
+                 more than one FTP command.
+
+         4.1.2.11  FTP Replies: RFC-959 Section 4.2, Page 35
+
+            A Server-FTP MUST send only correctly formatted replies on
+            the control connection.  Note that RFC-959 (unlike earlier
+            versions of the FTP spec) contains no provision for a
+            "spontaneous" reply message.
+
+            A Server-FTP SHOULD use the reply codes defined in RFC-959
+            whenever they apply.  However, a server-FTP MAY use a
+            different reply code when needed, as long as the general
+            rules of Section 4.2 are followed. When the implementor has
+            a choice between a 4xx and 5xx reply code, a Server-FTP
+            SHOULD send a 4xx (temporary failure) code when there is any
+            reasonable possibility that a failed FTP will succeed a few
+            hours later.
+
+            A User-FTP SHOULD generally use only the highest-order digit
+            of a 3-digit reply code for making a procedural decision, to
+            prevent difficulties when a Server-FTP uses non-standard
+            reply codes.
+
+            A User-FTP MUST be able to handle multi-line replies.  If
+            the implementation imposes a limit on the number of lines
+            and if this limit is exceeded, the User-FTP MUST recover,
+            e.g., by ignoring the excess lines until the end of the
+            multi-line reply is reached.
+
+            A User-FTP SHOULD NOT interpret a 421 reply code ("Service
+            not available, closing control connection") specially, but
+            SHOULD detect closing of the control connection by the
+            server.
+
+            DISCUSSION:
+                 Server implementations that fail to strictly follow the
+                 reply rules often cause FTP user programs to hang.
+                 Note that RFC-959 resolved ambiguities in the reply
+                 rules found in earlier FTP specifications and must be
+                 followed.
+
+                 It is important to choose FTP reply codes that properly
+                 distinguish between temporary and permanent failures,
+                 to allow the successful use of file transfer client
+                 daemons.  These programs depend on the reply codes to
+                 decide whether or not to retry a failed transfer; using
+                 a permanent failure code (5xx) for a temporary error
+                 will cause these programs to give up unnecessarily.
+
+
+
+Internet Engineering Task Force                                [Page 33]
+
+
+
+
+RFC1123                   FILE TRANSFER -- FTP              October 1989
+
+
+                 When the meaning of a reply matches exactly the text
+                 shown in RFC-959, uniformity will be enhanced by using
+                 the RFC-959 text verbatim.  However, a Server-FTP
+                 implementor is encouraged to choose reply text that
+                 conveys specific system-dependent information, when
+                 appropriate.
+
+         4.1.2.12  Connections: RFC-959 Section 5.2
+
+            The words "and the port used" in the second paragraph of
+            this section of RFC-959 are erroneous (historical), and they
+            should be ignored.
+
+            On a multihomed server host, the default data transfer port
+            (L-1) MUST be associated with the same local IP address as
+            the corresponding control connection to port L.
+
+            A user-FTP MUST NOT send any Telnet controls other than
+            SYNCH and IP on an FTP control connection. In particular, it
+            MUST NOT attempt to negotiate Telnet options on the control
+            connection.  However, a server-FTP MUST be capable of
+            accepting and refusing Telnet negotiations (i.e., sending
+            DONT/WONT).
+
+            DISCUSSION:
+                 Although the RFC says: "Server- and User- processes
+                 should follow the conventions for the Telnet
+                 protocol...[on the control connection]", it is not the
+                 intent that Telnet option negotiation is to be
+                 employed.
+
+         4.1.2.13  Minimum Implementation; RFC-959 Section 5.1
+
+            The following commands and options MUST be supported by
+            every server-FTP and user-FTP, except in cases where the
+            underlying file system or operating system does not allow or
+            support a particular command.
+
+                 Type: ASCII Non-print, IMAGE, LOCAL 8
+                 Mode: Stream
+                 Structure: File, Record*
+                 Commands:
+                    USER, PASS, ACCT,
+                    PORT, PASV,
+                    TYPE, MODE, STRU,
+                    RETR, STOR, APPE,
+                    RNFR, RNTO, DELE,
+                    CWD,  CDUP, RMD,  MKD,  PWD,
+
+
+
+Internet Engineering Task Force                                [Page 34]
+
+
+
+
+RFC1123                   FILE TRANSFER -- FTP              October 1989
+
+
+                    LIST, NLST,
+                    SYST, STAT,
+                    HELP, NOOP, QUIT.
+
+            *Record structure is REQUIRED only for hosts whose file
+            systems support record structure.
+
+            DISCUSSION:
+                 Vendors are encouraged to implement a larger subset of
+                 the protocol.  For example, there are important
+                 robustness features in the protocol (e.g., Restart,
+                 ABOR, block mode) that would be an aid to some Internet
+                 users but are not widely implemented.
+
+                 A host that does not have record structures in its file
+                 system may still accept files with STRU R, recording
+                 the byte stream literally.
+
+      4.1.3  SPECIFIC ISSUES
+
+         4.1.3.1  Non-standard Command Verbs
+
+            FTP allows "experimental" commands, whose names begin with
+            "X".  If these commands are subsequently adopted as
+            standards, there may still be existing implementations using
+            the "X" form.  At present, this is true for the directory
+            commands:
+
+                RFC-959   "Experimental"
+
+                  MKD        XMKD
+                  RMD        XRMD
+                  PWD        XPWD
+                  CDUP       XCUP
+                  CWD        XCWD
+
+            All FTP implementations SHOULD recognize both forms of these
+            commands, by simply equating them with extra entries in the
+            command lookup table.
+
+            IMPLEMENTATION:
+                 A User-FTP can access a server that supports only the
+                 "X" forms by implementing a mode switch, or
+                 automatically using the following procedure: if the
+                 RFC-959 form of one of the above commands is rejected
+                 with a 500 or 502 response code, then try the
+                 experimental form; any other response would be passed
+                 to the user.
+
+
+
+Internet Engineering Task Force                                [Page 35]
+
+
+
+
+RFC1123                   FILE TRANSFER -- FTP              October 1989
+
+
+         4.1.3.2  Idle Timeout
+
+            A Server-FTP process SHOULD have an idle timeout, which will
+            terminate the process and close the control connection if
+            the server is inactive (i.e., no command or data transfer in
+            progress) for a long period of time.  The idle timeout time
+            SHOULD be configurable, and the default should be at least 5
+            minutes.
+
+            A client FTP process ("User-PI" in RFC-959) will need
+            timeouts on responses only if it is invoked from a program.
+
+            DISCUSSION:
+                 Without a timeout, a Server-FTP process may be left
+                 pending indefinitely if the corresponding client
+                 crashes without closing the control connection.
+
+         4.1.3.3  Concurrency of Data and Control
+
+            DISCUSSION:
+                 The intent of the designers of FTP was that a user
+                 should be able to send a STAT command at any time while
+                 data transfer was in progress and that the server-FTP
+                 would reply immediately with status -- e.g., the number
+                 of bytes transferred so far.  Similarly, an ABOR
+                 command should be possible at any time during a data
+                 transfer.
+
+                 Unfortunately, some small-machine operating systems
+                 make such concurrent programming difficult, and some
+                 other implementers seek minimal solutions, so some FTP
+                 implementations do not allow concurrent use of the data
+                 and control connections.  Even such a minimal server
+                 must be prepared to accept and defer a STAT or ABOR
+                 command that arrives during data transfer.
+
+         4.1.3.4  FTP Restart Mechanism
+
+            The description of the 110 reply on pp. 40-41 of RFC-959 is
+            incorrect; the correct description is as follows.  A restart
+            reply message, sent over the control connection from the
+            receiving FTP to the User-FTP, has the format:
+
+                110 MARK ssss = rrrr
+
+            Here:
+
+            *    ssss is a text string that appeared in a Restart Marker
+
+
+
+Internet Engineering Task Force                                [Page 36]
+
+
+
+
+RFC1123                   FILE TRANSFER -- FTP              October 1989
+
+
+                 in the data stream and encodes a position in the
+                 sender's file system;
+
+            *    rrrr encodes the corresponding position in the
+                 receiver's file system.
+
+            The encoding, which is specific to a particular file system
+            and network implementation, is always generated and
+            interpreted by the same system, either sender or receiver.
+
+            When an FTP that implements restart receives a Restart
+            Marker in the data stream, it SHOULD force the data to that
+            point to be written to stable storage before encoding the
+            corresponding position rrrr.  An FTP sending Restart Markers
+            MUST NOT assume that 110 replies will be returned
+            synchronously with the data, i.e., it must not await a 110
+            reply before sending more data.
+
+            Two new reply codes are hereby defined for errors
+            encountered in restarting a transfer:
+
+              554 Requested action not taken: invalid REST parameter.
+
+                 A 554 reply may result from a FTP service command that
+                 follows a REST command.  The reply indicates that the
+                 existing file at the Server-FTP cannot be repositioned
+                 as specified in the REST.
+
+              555 Requested action not taken: type or stru mismatch.
+
+                 A 555 reply may result from an APPE command or from any
+                 FTP service command following a REST command.  The
+                 reply indicates that there is some mismatch between the
+                 current transfer parameters (type and stru) and the
+                 attributes of the existing file.
+
+            DISCUSSION:
+                 Note that the FTP Restart mechanism requires that Block
+                 or Compressed mode be used for data transfer, to allow
+                 the Restart Markers to be included within the data
+                 stream.  The frequency of Restart Markers can be low.
+
+                 Restart Markers mark a place in the data stream, but
+                 the receiver may be performing some transformation on
+                 the data as it is stored into stable storage.  In
+                 general, the receiver's encoding must include any state
+                 information necessary to restart this transformation at
+                 any point of the FTP data stream.  For example, in TYPE
+
+
+
+Internet Engineering Task Force                                [Page 37]
+
+
+
+
+RFC1123                   FILE TRANSFER -- FTP              October 1989
+
+
+                 A transfers, some receiver hosts transform CR LF
+                 sequences into a single LF character on disk.   If a
+                 Restart Marker happens to fall between CR and LF, the
+                 receiver must encode in rrrr that the transfer must be
+                 restarted in a "CR has been seen and discarded" state.
+
+                 Note that the Restart Marker is required to be encoded
+                 as a string of printable ASCII characters, regardless
+                 of the type of the data.
+
+                 RFC-959 says that restart information is to be returned
+                 "to the user".  This should not be taken literally.  In
+                 general, the User-FTP should save the restart
+                 information (ssss,rrrr) in stable storage, e.g., append
+                 it to a restart control file.  An empty restart control
+                 file should be created when the transfer first starts
+                 and deleted automatically when the transfer completes
+                 successfully.  It is suggested that this file have a
+                 name derived in an easily-identifiable manner from the
+                 name of the file being transferred and the remote host
+                 name; this is analogous to the means used by many text
+                 editors for naming "backup" files.
+
+                 There are three cases for FTP restart.
+
+                 (1)  User-to-Server Transfer
+
+                      The User-FTP puts Restart Markers <ssss> at
+                      convenient places in the data stream.  When the
+                      Server-FTP receives a Marker, it writes all prior
+                      data to disk, encodes its file system position and
+                      transformation state as rrrr, and returns a "110
+                      MARK ssss = rrrr" reply over the control
+                      connection.  The User-FTP appends the pair
+                      (ssss,rrrr) to its restart control file.
+
+                      To restart the transfer, the User-FTP fetches the
+                      last (ssss,rrrr) pair from the restart control
+                      file, repositions its local file system and
+                      transformation state using ssss, and sends the
+                      command "REST rrrr" to the Server-FTP.
+
+                 (2)  Server-to-User Transfer
+
+                      The Server-FTP puts Restart Markers <ssss> at
+                      convenient places in the data stream.  When the
+                      User-FTP receives a Marker, it writes all prior
+                      data to disk, encodes its file system position and
+
+
+
+Internet Engineering Task Force                                [Page 38]
+
+
+
+
+RFC1123                   FILE TRANSFER -- FTP              October 1989
+
+
+                      transformation state as rrrr, and appends the pair
+                      (rrrr,ssss) to its restart control file.
+
+                      To restart the transfer, the User-FTP fetches the
+                      last (rrrr,ssss) pair from the restart control
+                      file, repositions its local file system and
+                      transformation state using rrrr, and sends the
+                      command "REST ssss" to the Server-FTP.
+
+                 (3)  Server-to-Server ("Third-Party") Transfer
+
+                      The sending Server-FTP puts Restart Markers <ssss>
+                      at convenient places in the data stream.  When it
+                      receives a Marker, the receiving Server-FTP writes
+                      all prior data to disk, encodes its file system
+                      position and transformation state as rrrr, and
+                      sends a "110 MARK ssss = rrrr" reply over the
+                      control connection to the User.  The User-FTP
+                      appends the pair (ssss,rrrr) to its restart
+                      control file.
+
+                      To restart the transfer, the User-FTP fetches the
+                      last (ssss,rrrr) pair from the restart control
+                      file, sends "REST ssss" to the sending Server-FTP,
+                      and sends "REST rrrr" to the receiving Server-FTP.
+
+
+      4.1.4  FTP/USER INTERFACE
+
+         This section discusses the user interface for a User-FTP
+         program.
+
+         4.1.4.1  Pathname Specification
+
+            Since FTP is intended for use in a heterogeneous
+            environment, User-FTP implementations MUST support remote
+            pathnames as arbitrary character strings, so that their form
+            and content are not limited by the conventions of the local
+            operating system.
+
+            DISCUSSION:
+                 In particular, remote pathnames can be of arbitrary
+                 length, and all the printing ASCII characters as well
+                 as space (0x20) must be allowed.  RFC-959 allows a
+                 pathname to contain any 7-bit ASCII character except CR
+                 or LF.
+
+
+
+
+
+Internet Engineering Task Force                                [Page 39]
+
+
+
+
+RFC1123                   FILE TRANSFER -- FTP              October 1989
+
+
+         4.1.4.2  "QUOTE" Command
+
+            A User-FTP program MUST implement a "QUOTE" command that
+            will pass an arbitrary character string to the server and
+            display all resulting response messages to the user.
+
+            To make the "QUOTE" command useful, a User-FTP SHOULD send
+            transfer control commands to the server as the user enters
+            them, rather than saving all the commands and sending them
+            to the server only when a data transfer is started.
+
+            DISCUSSION:
+                 The "QUOTE" command is essential to allow the user to
+                 access servers that require system-specific commands
+                 (e.g., SITE or ALLO), or to invoke new or optional
+                 features that are not implemented by the User-FTP.  For
+                 example, "QUOTE" may be used to specify "TYPE A T" to
+                 send a print file to hosts that require the
+                 distinction, even if the User-FTP does not recognize
+                 that TYPE.
+
+         4.1.4.3  Displaying Replies to User
+
+            A User-FTP SHOULD display to the user the full text of all
+            error reply messages it receives.  It SHOULD have a
+            "verbose" mode in which all commands it sends and the full
+            text and reply codes it receives are displayed, for
+            diagnosis of problems.
+
+         4.1.4.4  Maintaining Synchronization
+
+            The state machine in a User-FTP SHOULD be forgiving of
+            missing and unexpected reply messages, in order to maintain
+            command synchronization with the server.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 40]
+
+
+
+
+RFC1123                   FILE TRANSFER -- FTP              October 1989
+
+
+      4.1.5   FTP REQUIREMENTS SUMMARY
+
+                                           |               | | | |S| |
+                                           |               | | | |H| |F
+                                           |               | | | |O|M|o
+                                           |               | |S| |U|U|o
+                                           |               | |H| |L|S|t
+                                           |               |M|O| |D|T|n
+                                           |               |U|U|M| | |o
+                                           |               |S|L|A|N|N|t
+                                           |               |T|D|Y|O|O|t
+FEATURE                                    |SECTION        | | | |T|T|e
+-------------------------------------------|---------------|-|-|-|-|-|--
+Implement TYPE T if same as TYPE N         |4.1.2.2        | |x| | | |
+File/Record transform invertible if poss.  |4.1.2.4        | |x| | | |
+User-FTP send PORT cmd for stream mode     |4.1.2.5        | |x| | | |
+Server-FTP implement PASV                  |4.1.2.6        |x| | | | |
+  PASV is per-transfer                     |4.1.2.6        |x| | | | |
+NLST reply usable in RETR cmds             |4.1.2.7        |x| | | | |
+Implied type for LIST and NLST             |4.1.2.7        | |x| | | |
+SITE cmd for non-standard features         |4.1.2.8        | |x| | | |
+STOU cmd return pathname as specified      |4.1.2.9        |x| | | | |
+Use TCP READ boundaries on control conn.   |4.1.2.10       | | | | |x|
+                                           |               | | | | | |
+Server-FTP send only correct reply format  |4.1.2.11       |x| | | | |
+Server-FTP use defined reply code if poss. |4.1.2.11       | |x| | | |
+  New reply code following Section 4.2     |4.1.2.11       | | |x| | |
+User-FTP use only high digit of reply      |4.1.2.11       | |x| | | |
+User-FTP handle multi-line reply lines     |4.1.2.11       |x| | | | |
+User-FTP handle 421 reply specially        |4.1.2.11       | | | |x| |
+                                           |               | | | | | |
+Default data port same IP addr as ctl conn |4.1.2.12       |x| | | | |
+User-FTP send Telnet cmds exc. SYNCH, IP   |4.1.2.12       | | | | |x|
+User-FTP negotiate Telnet options          |4.1.2.12       | | | | |x|
+Server-FTP handle Telnet options           |4.1.2.12       |x| | | | |
+Handle "Experimental" directory cmds       |4.1.3.1        | |x| | | |
+Idle timeout in server-FTP                 |4.1.3.2        | |x| | | |
+    Configurable idle timeout              |4.1.3.2        | |x| | | |
+Receiver checkpoint data at Restart Marker |4.1.3.4        | |x| | | |
+Sender assume 110 replies are synchronous  |4.1.3.4        | | | | |x|
+                                           |               | | | | | |
+Support TYPE:                              |               | | | | | |
+  ASCII - Non-Print (AN)                   |4.1.2.13       |x| | | | |
+  ASCII - Telnet (AT) -- if same as AN     |4.1.2.2        | |x| | | |
+  ASCII - Carriage Control (AC)            |959 3.1.1.5.2  | | |x| | |
+  EBCDIC - (any form)                      |959 3.1.1.2    | | |x| | |
+  IMAGE                                    |4.1.2.1        |x| | | | |
+  LOCAL 8                                  |4.1.2.1        |x| | | | |
+
+
+
+Internet Engineering Task Force                                [Page 41]
+
+
+
+
+RFC1123                   FILE TRANSFER -- FTP              October 1989
+
+
+  LOCAL m                                  |4.1.2.1        | | |x| | |2
+                                           |               | | | | | |
+Support MODE:                              |               | | | | | |
+  Stream                                   |4.1.2.13       |x| | | | |
+  Block                                    |959 3.4.2      | | |x| | |
+                                           |               | | | | | |
+Support STRUCTURE:                         |               | | | | | |
+  File                                     |4.1.2.13       |x| | | | |
+  Record                                   |4.1.2.13       |x| | | | |3
+  Page                                     |4.1.2.3        | | | |x| |
+                                           |               | | | | | |
+Support commands:                          |               | | | | | |
+  USER                                     |4.1.2.13       |x| | | | |
+  PASS                                     |4.1.2.13       |x| | | | |
+  ACCT                                     |4.1.2.13       |x| | | | |
+  CWD                                      |4.1.2.13       |x| | | | |
+  CDUP                                     |4.1.2.13       |x| | | | |
+  SMNT                                     |959 5.3.1      | | |x| | |
+  REIN                                     |959 5.3.1      | | |x| | |
+  QUIT                                     |4.1.2.13       |x| | | | |
+                                           |               | | | | | |
+  PORT                                     |4.1.2.13       |x| | | | |
+  PASV                                     |4.1.2.6        |x| | | | |
+  TYPE                                     |4.1.2.13       |x| | | | |1
+  STRU                                     |4.1.2.13       |x| | | | |1
+  MODE                                     |4.1.2.13       |x| | | | |1
+                                           |               | | | | | |
+  RETR                                     |4.1.2.13       |x| | | | |
+  STOR                                     |4.1.2.13       |x| | | | |
+  STOU                                     |959 5.3.1      | | |x| | |
+  APPE                                     |4.1.2.13       |x| | | | |
+  ALLO                                     |959 5.3.1      | | |x| | |
+  REST                                     |959 5.3.1      | | |x| | |
+  RNFR                                     |4.1.2.13       |x| | | | |
+  RNTO                                     |4.1.2.13       |x| | | | |
+  ABOR                                     |959 5.3.1      | | |x| | |
+  DELE                                     |4.1.2.13       |x| | | | |
+  RMD                                      |4.1.2.13       |x| | | | |
+  MKD                                      |4.1.2.13       |x| | | | |
+  PWD                                      |4.1.2.13       |x| | | | |
+  LIST                                     |4.1.2.13       |x| | | | |
+  NLST                                     |4.1.2.13       |x| | | | |
+  SITE                                     |4.1.2.8        | | |x| | |
+  STAT                                     |4.1.2.13       |x| | | | |
+  SYST                                     |4.1.2.13       |x| | | | |
+  HELP                                     |4.1.2.13       |x| | | | |
+  NOOP                                     |4.1.2.13       |x| | | | |
+                                           |               | | | | | |
+
+
+
+Internet Engineering Task Force                                [Page 42]
+
+
+
+
+RFC1123                   FILE TRANSFER -- FTP              October 1989
+
+
+User Interface:                            |               | | | | | |
+  Arbitrary pathnames                      |4.1.4.1        |x| | | | |
+  Implement "QUOTE" command                |4.1.4.2        |x| | | | |
+  Transfer control commands immediately    |4.1.4.2        | |x| | | |
+  Display error messages to user           |4.1.4.3        | |x| | | |
+    Verbose mode                           |4.1.4.3        | |x| | | |
+  Maintain synchronization with server     |4.1.4.4        | |x| | | |
+
+Footnotes:
+
+(1)  For the values shown earlier.
+
+(2)  Here m is number of bits in a memory word.
+
+(3)  Required for host with record-structured file system, optional
+     otherwise.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 43]
+
+
+
+
+RFC1123                  FILE TRANSFER -- TFTP              October 1989
+
+
+   4.2  TRIVIAL FILE TRANSFER PROTOCOL -- TFTP
+
+      4.2.1  INTRODUCTION
+
+         The Trivial File Transfer Protocol TFTP is defined in RFC-783
+         [TFTP:1].
+
+         TFTP provides its own reliable delivery with UDP as its
+         transport protocol, using a simple stop-and-wait acknowledgment
+         system.  Since TFTP has an effective window of only one 512
+         octet segment, it can provide good performance only over paths
+         that have a small delay*bandwidth product.  The TFTP file
+         interface is very simple, providing no access control or
+         security.
+
+         TFTP's most important application is bootstrapping a host over
+         a local network, since it is simple and small enough to be
+         easily implemented in EPROM [BOOT:1, BOOT:2].  Vendors are
+         urged to support TFTP for booting.
+
+      4.2.2  PROTOCOL WALK-THROUGH
+
+         The TFTP specification [TFTP:1] is written in an open style,
+         and does not fully specify many parts of the protocol.
+
+         4.2.2.1  Transfer Modes: RFC-783, Page 3
+
+            The transfer mode "mail" SHOULD NOT be supported.
+
+         4.2.2.2  UDP Header: RFC-783, Page 17
+
+            The Length field of a UDP header is incorrectly defined; it
+            includes the UDP header length (8).
+
+      4.2.3  SPECIFIC ISSUES
+
+         4.2.3.1  Sorcerer's Apprentice Syndrome
+
+            There is a serious bug, known as the "Sorcerer's Apprentice
+            Syndrome," in the protocol specification.  While it does not
+            cause incorrect operation of the transfer (the file will
+            always be transferred correctly if the transfer completes),
+            this bug may cause excessive retransmission, which may cause
+            the transfer to time out.
+
+            Implementations MUST contain the fix for this problem: the
+            sender (i.e., the side originating the DATA packets) must
+            never resend the current DATA packet on receipt of a
+
+
+
+Internet Engineering Task Force                                [Page 44]
+
+
+
+
+RFC1123                  FILE TRANSFER -- TFTP              October 1989
+
+
+            duplicate ACK.
+
+            DISCUSSION:
+                 The bug is caused by the protocol rule that either
+                 side, on receiving an old duplicate datagram, may
+                 resend the current datagram.  If a packet is delayed in
+                 the network but later successfully delivered after
+                 either side has timed out and retransmitted a packet, a
+                 duplicate copy of the response may be generated.  If
+                 the other side responds to this duplicate with a
+                 duplicate of its own, then every datagram will be sent
+                 in duplicate for the remainder of the transfer (unless
+                 a datagram is lost, breaking the repetition).  Worse
+                 yet, since the delay is often caused by congestion,
+                 this duplicate transmission will usually causes more
+                 congestion, leading to more delayed packets, etc.
+
+                 The following example may help to clarify this problem.
+
+                     TFTP A                  TFTP B
+
+                 (1)  Receive ACK X-1
+                      Send DATA X
+                 (2)                          Receive DATA X
+                                              Send ACK X
+                        (ACK X is delayed in network,
+                         and  A times out):
+                 (3)  Retransmit DATA X
+
+                 (4)                          Receive DATA X again
+                                              Send ACK X again
+                 (5)  Receive (delayed) ACK X
+                      Send DATA X+1
+                 (6)                          Receive DATA X+1
+                                              Send ACK X+1
+                 (7)  Receive ACK X again
+                      Send DATA X+1 again
+                 (8)                          Receive DATA X+1 again
+                                              Send ACK X+1 again
+                 (9)  Receive ACK X+1
+                      Send DATA X+2
+                 (10)                         Receive DATA X+2
+                                              Send ACK X+3
+                 (11) Receive ACK X+1 again
+                      Send DATA X+2 again
+                 (12)                         Receive DATA X+2 again
+                                              Send ACK X+3 again
+
+
+
+
+Internet Engineering Task Force                                [Page 45]
+
+
+
+
+RFC1123                  FILE TRANSFER -- TFTP              October 1989
+
+
+                 Notice that once the delayed ACK arrives, the protocol
+                 settles down to duplicate all further packets
+                 (sequences 5-8 and 9-12).  The problem is caused not by
+                 either side timing out, but by both sides
+                 retransmitting the current packet when they receive a
+                 duplicate.
+
+                 The fix is to break the retransmission loop, as
+                 indicated above.  This is analogous to the behavior of
+                 TCP.  It is then possible to remove the retransmission
+                 timer on the receiver, since the resent ACK will never
+                 cause any action; this is a useful simplification where
+                 TFTP is used in a bootstrap program.  It is OK to allow
+                 the timer to remain, and it may be helpful if the
+                 retransmitted ACK replaces one that was genuinely lost
+                 in the network.  The sender still requires a retransmit
+                 timer, of course.
+
+         4.2.3.2  Timeout Algorithms
+
+            A TFTP implementation MUST use an adaptive timeout.
+
+            IMPLEMENTATION:
+                 TCP retransmission algorithms provide a useful base to
+                 work from.  At least an exponential backoff of
+                 retransmission timeout is necessary.
+
+         4.2.3.3  Extensions
+
+            A variety of non-standard extensions have been made to TFTP,
+            including additional transfer modes and a secure operation
+            mode (with passwords).  None of these have been
+            standardized.
+
+         4.2.3.4  Access Control
+
+            A server TFTP implementation SHOULD include some
+            configurable access control over what pathnames are allowed
+            in TFTP operations.
+
+         4.2.3.5  Broadcast Request
+
+            A TFTP request directed to a broadcast address SHOULD be
+            silently ignored.
+
+            DISCUSSION:
+                 Due to the weak access control capability of TFTP,
+                 directed broadcasts of TFTP requests to random networks
+
+
+
+Internet Engineering Task Force                                [Page 46]
+
+
+
+
+RFC1123                  FILE TRANSFER -- TFTP              October 1989
+
+
+                 could create a significant security hole.
+
+      4.2.4  TFTP REQUIREMENTS SUMMARY
+
+                                                 |        | | | |S| |
+                                                 |        | | | |H| |F
+                                                 |        | | | |O|M|o
+                                                 |        | |S| |U|U|o
+                                                 |        | |H| |L|S|t
+                                                 |        |M|O| |D|T|n
+                                                 |        |U|U|M| | |o
+                                                 |        |S|L|A|N|N|t
+                                                 |        |T|D|Y|O|O|t
+FEATURE                                          |SECTION | | | |T|T|e
+-------------------------------------------------|--------|-|-|-|-|-|--
+Fix Sorcerer's Apprentice Syndrome               |4.2.3.1 |x| | | | |
+Transfer modes:                                  |        | | | | | |
+  netascii                                       |RFC-783 |x| | | | |
+  octet                                          |RFC-783 |x| | | | |
+  mail                                           |4.2.2.1 | | | |x| |
+  extensions                                     |4.2.3.3 | | |x| | |
+Use adaptive timeout                             |4.2.3.2 |x| | | | |
+Configurable access control                      |4.2.3.4 | |x| | | |
+Silently ignore broadcast request                |4.2.3.5 | |x| | | |
+-------------------------------------------------|--------|-|-|-|-|-|--
+-------------------------------------------------|--------|-|-|-|-|-|--
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 47]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+5.  ELECTRONIC MAIL -- SMTP and RFC-822
+
+   5.1  INTRODUCTION
+
+      In the TCP/IP protocol suite, electronic mail in a format
+      specified in RFC-822 [SMTP:2] is transmitted using the Simple Mail
+      Transfer Protocol (SMTP) defined in RFC-821 [SMTP:1].
+
+      While SMTP has remained unchanged over the years, the Internet
+      community has made several changes in the way SMTP is used.  In
+      particular, the conversion to the Domain Name System (DNS) has
+      caused changes in address formats and in mail routing.  In this
+      section, we assume familiarity with the concepts and terminology
+      of the DNS, whose requirements are given in Section 6.1.
+
+      RFC-822 specifies the Internet standard format for electronic mail
+      messages.  RFC-822 supercedes an older standard, RFC-733, that may
+      still be in use in a few places, although it is obsolete.  The two
+      formats are sometimes referred to simply by number ("822" and
+      "733").
+
+      RFC-822 is used in some non-Internet mail environments with
+      different mail transfer protocols than SMTP, and SMTP has also
+      been adapted for use in some non-Internet environments.  Note that
+      this document presents the rules for the use of SMTP and RFC-822
+      for the Internet environment only; other mail environments that
+      use these protocols may be expected to have their own rules.
+
+   5.2  PROTOCOL WALK-THROUGH
+
+      This section covers both RFC-821 and RFC-822.
+
+      The SMTP specification in RFC-821 is clear and contains numerous
+      examples, so implementors should not find it difficult to
+      understand.  This section simply updates or annotates portions of
+      RFC-821 to conform with current usage.
+
+      RFC-822 is a long and dense document, defining a rich syntax.
+      Unfortunately, incomplete or defective implementations of RFC-822
+      are common.  In fact, nearly all of the many formats of RFC-822
+      are actually used, so an implementation generally needs to
+      recognize and correctly interpret all of the RFC-822 syntax.
+
+      5.2.1  The SMTP Model: RFC-821 Section 2
+
+         DISCUSSION:
+              Mail is sent by a series of request/response transactions
+              between a client, the "sender-SMTP," and a server, the
+
+
+
+Internet Engineering Task Force                                [Page 48]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+              "receiver-SMTP".  These transactions pass (1) the message
+              proper, which is composed of header and body, and (2) SMTP
+              source and destination addresses, referred to as the
+              "envelope".
+
+              The SMTP programs are analogous to Message Transfer Agents
+              (MTAs) of X.400.  There will be another level of protocol
+              software, closer to the end user, that is responsible for
+              composing and analyzing RFC-822 message headers; this
+              component is known as the "User Agent" in X.400, and we
+              use that term in this document.  There is a clear logical
+              distinction between the User Agent and the SMTP
+              implementation, since they operate on different levels of
+              protocol.  Note, however, that this distinction is may not
+              be exactly reflected the structure of typical
+              implementations of Internet mail.  Often there is a
+              program known as the "mailer" that implements SMTP and
+              also some of the User Agent functions; the rest of the
+              User Agent functions are included in a user interface used
+              for entering and reading mail.
+
+              The SMTP envelope is constructed at the originating site,
+              typically by the User Agent when the message is first
+              queued for the Sender-SMTP program.  The envelope
+              addresses may be derived from information in the message
+              header, supplied by the user interface (e.g., to implement
+              a bcc: request), or derived from local configuration
+              information (e.g., expansion of a mailing list).  The SMTP
+              envelope cannot in general be re-derived from the header
+              at a later stage in message delivery, so the envelope is
+              transmitted separately from the message itself using the
+              MAIL and RCPT commands of SMTP.
+
+              The text of RFC-821 suggests that mail is to be delivered
+              to an individual user at a host.  With the advent of the
+              domain system and of mail routing using mail-exchange (MX)
+              resource records, implementors should now think of
+              delivering mail to a user at a domain, which may or may
+              not be a particular host.  This DOES NOT change the fact
+              that SMTP is a host-to-host mail exchange protocol.
+
+      5.2.2  Canonicalization: RFC-821 Section 3.1
+
+         The domain names that a Sender-SMTP sends in MAIL and RCPT
+         commands MUST have been  "canonicalized," i.e., they must be
+         fully-qualified principal names or domain literals, not
+         nicknames or domain abbreviations.  A canonicalized name either
+         identifies a host directly or is an MX name; it cannot be a
+
+
+
+Internet Engineering Task Force                                [Page 49]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+         CNAME.
+
+      5.2.3  VRFY and EXPN Commands: RFC-821 Section 3.3
+
+         A receiver-SMTP MUST implement VRFY and SHOULD implement EXPN
+         (this requirement overrides RFC-821).  However, there MAY be
+         configuration information to disable VRFY and EXPN in a
+         particular installation; this might even allow EXPN to be
+         disabled for selected lists.
+
+         A new reply code is defined for the VRFY command:
+
+              252 Cannot VRFY user (e.g., info is not local), but will
+                  take message for this user and attempt delivery.
+
+         DISCUSSION:
+              SMTP users and administrators make regular use of these
+              commands for diagnosing mail delivery problems.  With the
+              increasing use of multi-level mailing list expansion
+              (sometimes more than two levels), EXPN has been
+              increasingly important for diagnosing inadvertent mail
+              loops.  On the other hand,  some feel that EXPN represents
+              a significant privacy, and perhaps even a security,
+              exposure.
+
+      5.2.4  SEND, SOML, and SAML Commands: RFC-821 Section 3.4
+
+         An SMTP MAY implement the commands to send a message to a
+         user's terminal: SEND, SOML, and SAML.
+
+         DISCUSSION:
+              It has been suggested that the use of mail relaying
+              through an MX record is inconsistent with the intent of
+              SEND to deliver a message immediately and directly to a
+              user's terminal.  However, an SMTP receiver that is unable
+              to write directly to the user terminal can return a "251
+              User Not Local" reply to the RCPT following a SEND, to
+              inform the originator of possibly deferred delivery.
+
+      5.2.5  HELO Command: RFC-821 Section 3.5
+
+         The sender-SMTP MUST ensure that the <domain> parameter in a
+         HELO command is a valid principal host domain name for the
+         client host.  As a result, the receiver-SMTP will not have to
+         perform MX resolution on this name in order to validate the
+         HELO parameter.
+
+         The HELO receiver MAY verify that the HELO parameter really
+
+
+
+Internet Engineering Task Force                                [Page 50]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+         corresponds to the IP address of the sender.  However, the
+         receiver MUST NOT refuse to accept a message, even if the
+         sender's HELO command fails verification.
+
+         DISCUSSION:
+              Verifying the HELO parameter requires a domain name lookup
+              and may therefore take considerable time.  An alternative
+              tool for tracking bogus mail sources is suggested below
+              (see "DATA Command").
+
+              Note also that the HELO argument is still required to have
+              valid <domain> syntax, since it will appear in a Received:
+              line; otherwise, a 501 error is to be sent.
+
+         IMPLEMENTATION:
+              When HELO parameter validation fails, a suggested
+              procedure is to insert a note about the unknown
+              authenticity of the sender into the message header (e.g.,
+              in the "Received:"  line).
+
+      5.2.6  Mail Relay: RFC-821 Section 3.6
+
+         We distinguish three types of mail (store-and-) forwarding:
+
+         (1)  A simple forwarder or "mail exchanger" forwards a message
+              using private knowledge about the recipient; see section
+              3.2 of RFC-821.
+
+         (2)  An SMTP mail "relay" forwards a message within an SMTP
+              mail environment as the result of an explicit source route
+              (as defined in section 3.6 of RFC-821).  The SMTP relay
+              function uses the "@...:" form of source route from RFC-
+              822 (see Section 5.2.19 below).
+
+         (3)  A mail "gateway" passes a message between different
+              environments.  The rules for mail gateways are discussed
+              below in Section 5.3.7.
+
+         An Internet host that is forwarding a message but is not a
+         gateway to a different mail environment (i.e., it falls under
+         (1) or (2)) SHOULD NOT alter any existing header fields,
+         although the host will add an appropriate Received: line as
+         required in Section 5.2.8.
+
+         A Sender-SMTP SHOULD NOT send a RCPT TO: command containing an
+         explicit source route using the "@...:" address form.  Thus,
+         the relay function defined in section  3.6 of RFC-821 should
+         not be used.
+
+
+
+Internet Engineering Task Force                                [Page 51]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+         DISCUSSION:
+              The intent is to discourage all source routing and to
+              abolish explicit source routing for mail delivery within
+              the Internet environment.  Source-routing is unnecessary;
+              the simple target address "user@domain" should always
+              suffice.  This is the result of an explicit architectural
+              decision to use universal naming rather than source
+              routing for mail.  Thus, SMTP provides end-to-end
+              connectivity, and the DNS provides globally-unique,
+              location-independent names.  MX records handle the major
+              case where source routing might otherwise be needed.
+
+         A receiver-SMTP MUST accept the explicit source route syntax in
+         the envelope, but it MAY implement the relay function as
+         defined in section 3.6 of RFC-821.  If it does not implement
+         the relay function, it SHOULD attempt to deliver the message
+         directly to the host to the right of the right-most "@" sign.
+
+         DISCUSSION:
+              For example, suppose a host that does not implement the
+              relay function receives a message with the SMTP command:
+              "RCPT TO:<@ALPHA,@BETA:joe@GAMMA>", where ALPHA, BETA, and
+              GAMMA represent domain names.  Rather than immediately
+              refusing the message with a 550 error reply as suggested
+              on page 20 of RFC-821, the host should try to forward the
+              message to GAMMA directly, using: "RCPT TO:<joe@GAMMA>".
+              Since this host does not support relaying, it is not
+              required to update the reverse path.
+
+              Some have suggested that source routing may be needed
+              occasionally for manually routing mail around failures;
+              however, the reality and importance of this need is
+              controversial.  The use of explicit SMTP mail relaying for
+              this purpose is discouraged, and in fact it may not be
+              successful, as many host systems do not support it.  Some
+              have used the "%-hack" (see Section 5.2.16) for this
+              purpose.
+
+      5.2.7  RCPT Command: RFC-821 Section 4.1.1
+
+         A host that supports a receiver-SMTP MUST support the reserved
+         mailbox "Postmaster".
+
+         The receiver-SMTP MAY verify RCPT parameters as they arrive;
+         however, RCPT responses MUST NOT be delayed beyond a reasonable
+         time (see Section 5.3.2).
+
+         Therefore, a "250 OK" response to a RCPT does not necessarily
+
+
+
+Internet Engineering Task Force                                [Page 52]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+         imply that the delivery address(es) are valid.  Errors found
+         after message acceptance will be reported by mailing a
+         notification message to an appropriate address (see Section
+         5.3.3).
+
+         DISCUSSION:
+              The set of conditions under which a RCPT parameter can be
+              validated immediately is an engineering design choice.
+              Reporting destination mailbox errors to the Sender-SMTP
+              before mail is transferred is generally desirable to save
+              time and network bandwidth, but this advantage is lost if
+              RCPT verification is lengthy.
+
+              For example, the receiver can verify immediately any
+              simple local reference, such as a single locally-
+              registered mailbox.  On the other hand, the "reasonable
+              time" limitation generally implies deferring verification
+              of a mailing list until after the message has been
+              transferred and accepted, since verifying a large mailing
+              list can take a very long time.  An implementation might
+              or might not choose to defer validation of addresses that
+              are non-local and therefore require a DNS lookup.  If a
+              DNS lookup is performed but a soft domain system error
+              (e.g., timeout) occurs, validity must be assumed.
+
+      5.2.8  DATA Command: RFC-821 Section 4.1.1
+
+         Every receiver-SMTP (not just one that "accepts a message for
+         relaying or for final delivery" [SMTP:1]) MUST insert a
+         "Received:" line at the beginning of a message.  In this line,
+         called a "time stamp line" in RFC-821:
+
+         *    The FROM field SHOULD contain both (1) the name of the
+              source host as presented in the HELO command and (2) a
+              domain literal containing the IP address of the source,
+              determined from the TCP connection.
+
+         *    The ID field MAY contain an "@" as suggested in RFC-822,
+              but this is not required.
+
+         *    The FOR field MAY contain a list of <path> entries when
+              multiple RCPT commands have been given.
+
+
+         An Internet mail program MUST NOT change a Received: line that
+         was previously added to the message header.
+
+
+
+
+
+Internet Engineering Task Force                                [Page 53]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+         DISCUSSION:
+              Including both the source host and the IP source address
+              in the Received: line may provide enough information for
+              tracking illicit mail sources and eliminate a need to
+              explicitly verify the HELO parameter.
+
+              Received: lines are primarily intended for humans tracing
+              mail routes, primarily of diagnosis of faults.  See also
+              the discussion under 5.3.7.
+
+         When the receiver-SMTP makes "final delivery" of a message,
+         then it MUST pass the MAIL FROM: address from the SMTP envelope
+         with the message, for use if an error notification message must
+         be sent later (see Section 5.3.3).  There is an analogous
+         requirement when gatewaying from the Internet into a different
+         mail environment; see Section 5.3.7.
+
+         DISCUSSION:
+              Note that the final reply to the DATA command depends only
+              upon the successful transfer and storage of the message.
+              Any problem with the destination address(es) must either
+              (1) have been reported in an SMTP error reply to the RCPT
+              command(s), or (2) be reported in a later error message
+              mailed to the originator.
+
+         IMPLEMENTATION:
+              The MAIL FROM: information may be passed as a parameter or
+              in a Return-Path: line inserted at the beginning of the
+              message.
+
+      5.2.9  Command Syntax: RFC-821 Section 4.1.2
+
+         The syntax shown in RFC-821 for the MAIL FROM: command omits
+         the case of an empty path:  "MAIL FROM: <>" (see RFC-821 Page
+         15).  An empty reverse path MUST be supported.
+
+      5.2.10  SMTP Replies:  RFC-821 Section 4.2
+
+         A receiver-SMTP SHOULD send only the reply codes listed in
+         section 4.2.2 of RFC-821 or in this document.  A receiver-SMTP
+         SHOULD use the text shown in examples in RFC-821 whenever
+         appropriate.
+
+         A sender-SMTP MUST determine its actions only by the reply
+         code, not by the text (except for 251 and 551 replies); any
+         text, including no text at all, must be acceptable.  The space
+         (blank) following the reply code is considered part of the
+         text.  Whenever possible, a sender-SMTP SHOULD test only the
+
+
+
+Internet Engineering Task Force                                [Page 54]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+         first digit of the reply code, as specified in Appendix E of
+         RFC-821.
+
+         DISCUSSION:
+              Interoperability problems have arisen with SMTP systems
+              using reply codes that are not listed explicitly in RFC-
+              821 Section 4.3 but are legal according to the theory of
+              reply codes explained in Appendix E.
+
+      5.2.11  Transparency: RFC-821 Section 4.5.2
+
+         Implementors MUST be sure that their mail systems always add
+         and delete periods to ensure message transparency.
+
+      5.2.12  WKS Use in MX Processing: RFC-974, p. 5
+
+         RFC-974 [SMTP:3] recommended that the domain system be queried
+         for WKS ("Well-Known Service") records, to verify that each
+         proposed mail target does support SMTP.  Later experience has
+         shown that WKS is not widely supported, so the WKS step in MX
+         processing SHOULD NOT be used.
+
+      The following are notes on RFC-822, organized by section of that
+      document.
+
+      5.2.13  RFC-822 Message Specification: RFC-822 Section 4
+
+         The syntax shown for the Return-path line omits the possibility
+         of a null return path, which is used to prevent looping of
+         error notifications (see Section 5.3.3).  The complete syntax
+         is:
+
+             return = "Return-path"  ":" route-addr
+                    / "Return-path"  ":" "<" ">"
+
+         The set of optional header fields is hereby expanded to include
+         the Content-Type field defined in RFC-1049 [SMTP:7].  This
+         field "allows mail reading systems to automatically identify
+         the type of a structured message body and to process it for
+         display accordingly".  [SMTP:7]  A User Agent MAY support this
+         field.
+
+      5.2.14  RFC-822 Date and Time Specification: RFC-822 Section 5
+
+         The syntax for the date is hereby changed to:
+
+            date = 1*2DIGIT month 2*4DIGIT
+
+
+
+
+Internet Engineering Task Force                                [Page 55]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+         All mail software SHOULD use 4-digit years in dates, to ease
+         the transition to the next century.
+
+         There is a strong trend towards the use of numeric timezone
+         indicators, and implementations SHOULD use numeric timezones
+         instead of timezone names.  However, all implementations MUST
+         accept either notation.  If timezone names are used, they MUST
+         be exactly as defined in RFC-822.
+
+         The military time zones are specified incorrectly in RFC-822:
+         they count the wrong way from UT (the signs are reversed).  As
+         a result, military time zones in RFC-822 headers carry no
+         information.
+
+         Finally, note that there is a typo in the definition of "zone"
+         in the syntax summary of appendix D; the correct definition
+         occurs in Section 3 of RFC-822.
+
+      5.2.15  RFC-822 Syntax Change: RFC-822 Section 6.1
+
+         The syntactic definition of "mailbox" in RFC-822 is hereby
+         changed to:
+
+            mailbox =  addr-spec            ; simple address
+                    / [phrase] route-addr   ; name & addr-spec
+
+         That is, the phrase preceding a route address is now OPTIONAL.
+         This change makes the following header field legal, for
+         example:
+
+             From: <craig@nnsc.nsf.net>
+
+      5.2.16  RFC-822  Local-part: RFC-822 Section 6.2
+
+         The basic mailbox address specification has the form: "local-
+         part@domain".  Here "local-part", sometimes called the "left-
+         hand side" of the address, is domain-dependent.
+
+         A host that is forwarding the message but is not the
+         destination host implied by the right-hand side "domain" MUST
+         NOT interpret or modify the "local-part" of the address.
+
+         When mail is to be gatewayed from the Internet mail environment
+         into a foreign mail environment (see Section 5.3.7), routing
+         information for that foreign environment MAY be embedded within
+         the "local-part" of the address.  The gateway will then
+         interpret this local part appropriately for the foreign mail
+         environment.
+
+
+
+Internet Engineering Task Force                                [Page 56]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+         DISCUSSION:
+              Although source routes are discouraged within the Internet
+              (see Section 5.2.6), there are non-Internet mail
+              environments whose delivery mechanisms do depend upon
+              source routes.  Source routes for extra-Internet
+              environments can generally be buried in the "local-part"
+              of the address (see Section 5.2.16) while mail traverses
+              the Internet.  When the mail reaches the appropriate
+              Internet mail gateway, the gateway will interpret the
+              local-part and build the necessary address or route for
+              the target mail environment.
+
+              For example, an Internet host might send mail to:
+              "a!b!c!user@gateway-domain".  The complex local part
+              "a!b!c!user" would be uninterpreted within the Internet
+              domain, but could be parsed and understood by the
+              specified mail gateway.
+
+              An embedded source route is sometimes encoded in the
+              "local-part" using "%" as a right-binding routing
+              operator.  For example, in:
+
+                 user%domain%relay3%relay2@relay1
+
+              the "%" convention implies that the mail is to be routed
+              from "relay1" through "relay2", "relay3", and finally to
+              "user" at "domain".  This is commonly known as the "%-
+              hack".  It is suggested that "%" have lower precedence
+              than any other routing operator (e.g., "!") hidden in the
+              local-part; for example, "a!b%c" would be interpreted as
+              "(a!b)%c".
+
+              Only the target host (in this case, "relay1") is permitted
+              to analyze the local-part "user%domain%relay3%relay2".
+
+      5.2.17  Domain Literals: RFC-822 Section 6.2.3
+
+         A mailer MUST be able to accept and parse an Internet domain
+         literal whose content ("dtext"; see RFC-822) is a dotted-
+         decimal host address.  This satisfies the requirement of
+         Section 2.1 for the case of mail.
+
+         An SMTP MUST accept and recognize a domain literal for any of
+         its own IP addresses.
+
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 57]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+      5.2.18  Common Address Formatting Errors: RFC-822 Section 6.1
+
+         Errors in formatting or parsing 822 addresses are unfortunately
+         common.  This section mentions only the most common errors.  A
+         User Agent MUST accept all valid RFC-822 address formats, and
+         MUST NOT generate illegal address syntax.
+
+         o    A common error is to leave out the semicolon after a group
+              identifier.
+
+         o    Some systems fail to fully-qualify domain names in
+              messages they generate.  The right-hand side of an "@"
+              sign in a header address field MUST be a fully-qualified
+              domain name.
+
+              For example, some systems fail to fully-qualify the From:
+              address; this prevents a "reply" command in the user
+              interface from automatically constructing a return
+              address.
+
+              DISCUSSION:
+                   Although RFC-822 allows the local use of abbreviated
+                   domain names within a domain, the application of
+                   RFC-822 in Internet mail does not allow this.  The
+                   intent is that an Internet host must not send an SMTP
+                   message header containing an abbreviated domain name
+                   in an address field.  This allows the address fields
+                   of the header to be passed without alteration across
+                   the Internet, as required in Section 5.2.6.
+
+         o    Some systems mis-parse multiple-hop explicit source routes
+              such as:
+
+                  @relay1,@relay2,@relay3:user@domain.
+
+
+         o    Some systems over-qualify domain names by adding a
+              trailing dot to some or all domain names in addresses or
+              message-ids.  This violates RFC-822 syntax.
+
+
+      5.2.19  Explicit Source Routes: RFC-822 Section 6.2.7
+
+         Internet host software SHOULD NOT create an RFC-822 header
+         containing an address with an explicit source route, but MUST
+         accept such headers for compatibility with earlier systems.
+
+         DISCUSSION:
+
+
+
+Internet Engineering Task Force                                [Page 58]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+              In an understatement, RFC-822 says "The use of explicit
+              source routing is discouraged".  Many hosts implemented
+              RFC-822 source routes incorrectly, so the syntax cannot be
+              used unambiguously in practice.  Many users feel the
+              syntax is ugly.  Explicit source routes are not needed in
+              the mail envelope for delivery; see Section 5.2.6.  For
+              all these reasons, explicit source routes using the RFC-
+              822 notations are not to be used in Internet mail headers.
+
+              As stated in Section 5.2.16, it is necessary to allow an
+              explicit source route to be buried in the local-part of an
+              address, e.g., using the "%-hack", in order to allow mail
+              to be gatewayed into another environment in which explicit
+              source routing is necessary.  The vigilant will observe
+              that there is no way for a User Agent to detect and
+              prevent the use of such implicit source routing when the
+              destination is within the Internet.  We can only
+              discourage source routing of any kind within the Internet,
+              as unnecessary and undesirable.
+
+   5.3  SPECIFIC ISSUES
+
+      5.3.1  SMTP Queueing Strategies
+
+         The common structure of a host SMTP implementation includes
+         user mailboxes, one or more areas for queueing messages in
+         transit, and one or more daemon processes for sending and
+         receiving mail.  The exact structure will vary depending on the
+         needs of the users on the host and the number and size of
+         mailing lists supported by the host.  We describe several
+         optimizations that have proved helpful, particularly for
+         mailers supporting high traffic levels.
+
+         Any queueing strategy MUST include:
+
+         o    Timeouts on all activities.  See Section 5.3.2.
+
+         o    Never sending error messages in response to error
+              messages.
+
+
+         5.3.1.1 Sending Strategy
+
+            The general model of a sender-SMTP is one or more processes
+            that periodically attempt to transmit outgoing mail.  In a
+            typical system, the program that composes a message has some
+            method for requesting immediate attention for a new piece of
+            outgoing mail, while mail that cannot be transmitted
+
+
+
+Internet Engineering Task Force                                [Page 59]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+            immediately MUST be queued and periodically retried by the
+            sender.  A mail queue entry will include not only the
+            message itself but also the envelope information.
+
+            The sender MUST delay retrying a particular destination
+            after one attempt has failed.  In general, the retry
+            interval SHOULD be at least 30 minutes; however, more
+            sophisticated and variable strategies will be beneficial
+            when the sender-SMTP can determine the reason for non-
+            delivery.
+
+            Retries continue until the message is transmitted or the
+            sender gives up; the give-up time generally needs to be at
+            least 4-5 days.  The parameters to the retry algorithm MUST
+            be configurable.
+
+            A sender SHOULD keep a list of hosts it cannot reach and
+            corresponding timeouts, rather than just retrying queued
+            mail items.
+
+            DISCUSSION:
+                 Experience suggests that failures are typically
+                 transient (the target system has crashed), favoring a
+                 policy of two connection attempts in the first hour the
+                 message is in the queue, and then backing off to once
+                 every two or three hours.
+
+                 The sender-SMTP can shorten the queueing delay by
+                 cooperation with the receiver-SMTP.  In particular, if
+                 mail is received from a particular address, it is good
+                 evidence that any mail queued for that host can now be
+                 sent.
+
+                 The strategy may be further modified as a result of
+                 multiple addresses per host (see Section 5.3.4), to
+                 optimize delivery time vs. resource usage.
+
+                 A sender-SMTP may have a large queue of messages for
+                 each unavailable destination host, and if it retried
+                 all these messages in every retry cycle, there would be
+                 excessive Internet overhead and the daemon would be
+                 blocked for a long period.  Note that an SMTP can
+                 generally determine that a delivery attempt has failed
+                 only after a timeout of a minute or more; a one minute
+                 timeout per connection will result in a very large
+                 delay if it is repeated for dozens or even hundreds of
+                 queued messages.
+
+
+
+
+Internet Engineering Task Force                                [Page 60]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+            When the same message is to be delivered to several users on
+            the same host, only one copy of the message SHOULD be
+            transmitted.  That is, the sender-SMTP should use the
+            command sequence: RCPT, RCPT,... RCPT, DATA instead of the
+            sequence: RCPT, DATA, RCPT, DATA,... RCPT, DATA.
+            Implementation of this efficiency feature is strongly urged.
+
+            Similarly, the sender-SMTP MAY support multiple concurrent
+            outgoing mail transactions to achieve timely delivery.
+            However, some limit SHOULD be imposed to protect the host
+            from devoting all its resources to mail.
+
+            The use of the different addresses of a multihomed host is
+            discussed below.
+
+         5.3.1.2  Receiving strategy
+
+            The receiver-SMTP SHOULD attempt to keep a pending listen on
+            the SMTP port at all times.  This will require the support
+            of multiple incoming TCP connections for SMTP.  Some limit
+            MAY be imposed.
+
+            IMPLEMENTATION:
+                 When the receiver-SMTP receives mail from a particular
+                 host address, it could notify the sender-SMTP to retry
+                 any mail pending for that host address.
+
+      5.3.2  Timeouts in SMTP
+
+         There are two approaches to timeouts in the sender-SMTP:  (a)
+         limit the time for each SMTP command separately, or (b) limit
+         the time for the entire SMTP dialogue for a single mail
+         message.  A sender-SMTP SHOULD use option (a), per-command
+         timeouts.  Timeouts SHOULD be easily reconfigurable, preferably
+         without recompiling the SMTP code.
+
+         DISCUSSION:
+              Timeouts are an essential feature of an SMTP
+              implementation.  If the timeouts are too long (or worse,
+              there are no timeouts), Internet communication failures or
+              software bugs in receiver-SMTP programs can tie up SMTP
+              processes indefinitely.  If the timeouts are too short,
+              resources will be wasted with attempts that time out part
+              way through message delivery.
+
+              If option (b) is used, the timeout has to be very large,
+              e.g., an hour, to allow time to expand very large mailing
+              lists.  The timeout may also need to increase linearly
+
+
+
+Internet Engineering Task Force                                [Page 61]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+              with the size of the message, to account for the time to
+              transmit a very large message.  A large fixed timeout
+              leads to two problems:  a failure can still tie up the
+              sender for a very long time, and very large messages may
+              still spuriously time out (which is a wasteful failure!).
+
+              Using the recommended option (a), a timer is set for each
+              SMTP command and for each buffer of the data transfer.
+              The latter means that the overall timeout is inherently
+              proportional to the size of the message.
+
+         Based on extensive experience with busy mail-relay hosts, the
+         minimum per-command timeout values SHOULD be as follows:
+
+         o    Initial 220 Message: 5 minutes
+
+              A Sender-SMTP process needs to distinguish between a
+              failed TCP connection and a delay in receiving the initial
+              220 greeting message.  Many receiver-SMTPs will accept a
+              TCP connection but delay delivery of the 220 message until
+              their system load will permit more mail to be processed.
+
+         o    MAIL Command: 5 minutes
+
+
+         o    RCPT Command: 5 minutes
+
+              A longer timeout would be required if processing of
+              mailing lists and aliases were not deferred until after
+              the message was accepted.
+
+         o    DATA Initiation: 2 minutes
+
+              This is while awaiting the "354 Start Input" reply to a
+              DATA command.
+
+         o    Data Block: 3 minutes
+
+              This is while awaiting the completion of each TCP SEND
+              call transmitting a chunk of data.
+
+         o    DATA Termination: 10 minutes.
+
+              This is while awaiting the "250 OK" reply. When the
+              receiver gets the final period terminating the message
+              data, it typically performs processing to deliver the
+              message to a user mailbox.  A spurious timeout at this
+              point would be very wasteful, since the message has been
+
+
+
+Internet Engineering Task Force                                [Page 62]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+              successfully sent.
+
+         A receiver-SMTP SHOULD have a timeout of at least 5 minutes
+         while it is awaiting the next command from the sender.
+
+      5.3.3  Reliable Mail Receipt
+
+         When the receiver-SMTP accepts a piece of mail (by sending a
+         "250 OK" message in response to DATA), it is accepting
+         responsibility for delivering or relaying the message.  It must
+         take this responsibility seriously, i.e., it MUST NOT lose the
+         message for frivolous reasons, e.g., because the host later
+         crashes or because of a predictable resource shortage.
+
+         If there is a delivery failure after acceptance of a message,
+         the receiver-SMTP MUST formulate and mail a notification
+         message.  This notification MUST be sent using a null ("<>")
+         reverse path in the envelope; see Section 3.6 of RFC-821.  The
+         recipient of this notification SHOULD be the address from the
+         envelope return path (or the Return-Path: line).  However, if
+         this address is null ("<>"),  the receiver-SMTP MUST NOT send a
+         notification.  If the address is an explicit source route, it
+         SHOULD be stripped down to its final hop.
+
+         DISCUSSION:
+              For example, suppose that an error notification must be
+              sent for a message that arrived with:
+              "MAIL FROM:<@a,@b:user@d>".  The notification message
+              should be sent to: "RCPT TO:<user@d>".
+
+              Some delivery failures after the message is accepted by
+              SMTP will be unavoidable.  For example, it may be
+              impossible for the receiver-SMTP to validate all the
+              delivery addresses in RCPT command(s) due to a "soft"
+              domain system error or because the target is a mailing
+              list (see earlier discussion of RCPT).
+
+         To avoid receiving duplicate messages as the result of
+         timeouts, a receiver-SMTP MUST seek to minimize the time
+         required to respond to the final "." that ends a message
+         transfer.  See RFC-1047 [SMTP:4] for a discussion of this
+         problem.
+
+      5.3.4  Reliable Mail Transmission
+
+         To transmit a message, a sender-SMTP determines the IP address
+         of the target host from the destination address in the
+         envelope.  Specifically, it maps the string to the right of the
+
+
+
+Internet Engineering Task Force                                [Page 63]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+         "@" sign into an IP address.  This mapping or the transfer
+         itself may fail with a soft error, in which case the sender-
+         SMTP will requeue the outgoing mail for a later retry, as
+         required in Section 5.3.1.1.
+
+         When it succeeds, the mapping can result in a list of
+         alternative delivery addresses rather than a single address,
+         because of (a) multiple MX records, (b) multihoming, or both.
+         To provide reliable mail transmission, the sender-SMTP MUST be
+         able to try (and retry) each of the addresses in this list in
+         order, until a delivery attempt succeeds.  However, there MAY
+         also be a configurable limit on the number of alternate
+         addresses that can be tried.  In any case, a host SHOULD try at
+         least two addresses.
+
+         The following information is to be used to rank the host
+         addresses:
+
+         (1)  Multiple MX Records -- these contain a preference
+              indication that should be used in sorting.  If there are
+              multiple destinations with the same preference and there
+              is no clear reason to favor one (e.g., by address
+              preference), then the sender-SMTP SHOULD pick one at
+              random to spread the load across multiple mail exchanges
+              for a specific organization; note that this is a
+              refinement of the procedure in [DNS:3].
+
+         (2)  Multihomed host -- The destination host (perhaps taken
+              from the preferred MX record) may be multihomed, in which
+              case the domain name resolver will return a list of
+              alternative IP addresses.  It is the responsibility of the
+              domain name resolver interface (see Section 6.1.3.4 below)
+              to have ordered this list by decreasing preference, and
+              SMTP MUST try them in the order presented.
+
+         DISCUSSION:
+              Although the capability to try multiple alternative
+              addresses is required, there may be circumstances where
+              specific installations want to limit or disable the use of
+              alternative addresses.  The question of whether a sender
+              should attempt retries using the different addresses of a
+              multihomed host has been controversial.  The main argument
+              for using the multiple addresses is that it maximizes the
+              probability of timely delivery, and indeed sometimes the
+              probability of any delivery; the counter argument is that
+              it may result in unnecessary resource use.
+
+              Note that resource use is also strongly determined by the
+
+
+
+Internet Engineering Task Force                                [Page 64]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+              sending strategy discussed in Section 5.3.1.
+
+      5.3.5  Domain Name Support
+
+         SMTP implementations MUST use the mechanism defined in Section
+         6.1 for mapping between domain names and IP addresses.  This
+         means that every Internet SMTP MUST include support for the
+         Internet DNS.
+
+         In particular, a sender-SMTP MUST support the MX record scheme
+         [SMTP:3].  See also Section 7.4 of [DNS:2] for information on
+         domain name support for SMTP.
+
+      5.3.6  Mailing Lists and Aliases
+
+         An SMTP-capable host SHOULD support both the alias and the list
+         form of address expansion for multiple delivery.  When a
+         message is delivered or forwarded to each address of an
+         expanded list form, the return address in the envelope
+         ("MAIL FROM:") MUST be changed to be the address of a person
+         who administers the list, but the message header MUST be left
+         unchanged; in particular, the "From" field of the message is
+         unaffected.
+
+         DISCUSSION:
+              An important mail facility is a mechanism for multi-
+              destination delivery of a single message, by transforming
+              or "expanding" a pseudo-mailbox address into a list of
+              destination mailbox addresses.  When a message is sent to
+              such a pseudo-mailbox (sometimes called an "exploder"),
+              copies are forwarded or redistributed to each mailbox in
+              the expanded list.  We classify such a pseudo-mailbox as
+              an "alias" or a "list", depending upon the expansion
+              rules:
+
+              (a)  Alias
+
+                   To expand an alias, the recipient mailer simply
+                   replaces the pseudo-mailbox address in the envelope
+                   with each of the expanded addresses in turn; the rest
+                   of the envelope and the message body are left
+                   unchanged.  The message is then delivered or
+                   forwarded to each expanded address.
+
+              (b)  List
+
+                   A mailing list may be said to operate by
+                   "redistribution" rather than by "forwarding".  To
+
+
+
+Internet Engineering Task Force                                [Page 65]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+                   expand a list, the recipient mailer replaces the
+                   pseudo-mailbox address in the envelope with each of
+                   the expanded addresses in turn. The return address in
+                   the envelope is changed so that all error messages
+                   generated by the final deliveries will be returned to
+                   a list administrator, not to the message originator,
+                   who generally has no control over the contents of the
+                   list and will typically find error messages annoying.
+
+
+      5.3.7  Mail Gatewaying
+
+         Gatewaying mail between different mail environments, i.e.,
+         different mail formats and protocols, is complex and does not
+         easily yield to standardization.  See for example [SMTP:5a],
+         [SMTP:5b].  However, some general requirements may be given for
+         a gateway between the Internet and another mail environment.
+
+         (A)  Header fields MAY be rewritten when necessary as messages
+              are gatewayed across mail environment boundaries.
+
+              DISCUSSION:
+                   This may involve interpreting the local-part of the
+                   destination address, as suggested in Section 5.2.16.
+
+                   The other mail systems gatewayed to the Internet
+                   generally use a subset of RFC-822 headers, but some
+                   of them do not have an equivalent to the SMTP
+                   envelope.  Therefore, when a message leaves the
+                   Internet environment, it may be necessary to fold the
+                   SMTP envelope information into the message header.  A
+                   possible solution would be to create new header
+                   fields to carry the envelope information (e.g., "X-
+                   SMTP-MAIL:" and "X-SMTP-RCPT:"); however, this would
+                   require changes in mail programs in the foreign
+                   environment.
+
+         (B)  When forwarding a message into or out of the Internet
+              environment, a gateway MUST prepend a Received: line, but
+              it MUST NOT alter in any way a Received: line that is
+              already in the header.
+
+              DISCUSSION:
+                   This requirement is a subset of the general
+                   "Received:" line requirement of Section 5.2.8; it is
+                   restated here for emphasis.
+
+                   Received: fields of messages originating from other
+
+
+
+Internet Engineering Task Force                                [Page 66]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+                   environments may not conform exactly to RFC822.
+                   However, the most important use of Received: lines is
+                   for debugging mail faults, and this debugging can be
+                   severely hampered by well-meaning gateways that try
+                   to "fix" a Received: line.
+
+                   The gateway is strongly encouraged to indicate the
+                   environment and protocol in the "via" clauses of
+                   Received field(s) that it supplies.
+
+         (C)  From the Internet side, the gateway SHOULD accept all
+              valid address formats in SMTP commands and in RFC-822
+              headers, and all valid RFC-822 messages.  Although a
+              gateway must accept an RFC-822 explicit source route
+              ("@...:" format) in either the RFC-822 header or in the
+              envelope, it MAY or may not act on the source route; see
+              Sections 5.2.6 and 5.2.19.
+
+              DISCUSSION:
+                   It is often tempting to restrict the range of
+                   addresses accepted at the mail gateway to simplify
+                   the translation into addresses for the remote
+                   environment.  This practice is based on the
+                   assumption that mail users have control over the
+                   addresses their mailers send to the mail gateway.  In
+                   practice, however, users have little control over the
+                   addresses that are finally sent; their mailers are
+                   free to change addresses into any legal RFC-822
+                   format.
+
+         (D)  The gateway MUST ensure that all header fields of a
+              message that it forwards into the Internet meet the
+              requirements for Internet mail.  In particular, all
+              addresses in "From:", "To:", "Cc:", etc., fields must be
+              transformed (if necessary) to satisfy RFC-822 syntax, and
+              they must be effective and useful for sending replies.
+
+
+         (E)  The translation algorithm used to convert mail from the
+              Internet protocols to another environment's protocol
+              SHOULD try to ensure that error messages from the foreign
+              mail environment are delivered to the return path from the
+              SMTP envelope, not to the sender listed in the "From:"
+              field of the RFC-822 message.
+
+              DISCUSSION:
+                   Internet mail lists usually place the address of the
+                   mail list maintainer in the envelope but leave the
+
+
+
+Internet Engineering Task Force                                [Page 67]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+                   original message header intact (with the "From:"
+                   field containing the original sender).  This yields
+                   the behavior the average recipient expects: a reply
+                   to the header gets sent to the original sender, not
+                   to a mail list maintainer; however, errors get sent
+                   to the maintainer (who can fix the problem) and not
+                   the sender (who probably cannot).
+
+         (F)  Similarly, when forwarding a message from another
+              environment into the Internet, the gateway SHOULD set the
+              envelope return path in accordance with an error message
+              return address, if any, supplied by the foreign
+              environment.
+
+
+      5.3.8  Maximum Message Size
+
+         Mailer software MUST be able to send and receive messages of at
+         least 64K bytes in length (including header), and a much larger
+         maximum size is highly desirable.
+
+         DISCUSSION:
+              Although SMTP does not define the maximum size of a
+              message, many systems impose implementation limits.
+
+              The current de facto minimum limit in the Internet is 64K
+              bytes.  However, electronic mail is used for a variety of
+              purposes that create much larger messages.  For example,
+              mail is often used instead of FTP for transmitting ASCII
+              files, and in particular to transmit entire documents.  As
+              a result, messages can be 1 megabyte or even larger.  We
+              note that the present document together with its lower-
+              layer companion contains 0.5 megabytes.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 68]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+   5.4  SMTP REQUIREMENTS SUMMARY
+
+                                               |          | | | |S| |
+                                               |          | | | |H| |F
+                                               |          | | | |O|M|o
+                                               |          | |S| |U|U|o
+                                               |          | |H| |L|S|t
+                                               |          |M|O| |D|T|n
+                                               |          |U|U|M| | |o
+                                               |          |S|L|A|N|N|t
+                                               |          |T|D|Y|O|O|t
+FEATURE                                        |SECTION   | | | |T|T|e
+-----------------------------------------------|----------|-|-|-|-|-|--
+                                               |          | | | | | |
+RECEIVER-SMTP:                                 |          | | | | | |
+  Implement VRFY                               |5.2.3     |x| | | | |
+  Implement EXPN                               |5.2.3     | |x| | | |
+    EXPN, VRFY configurable                    |5.2.3     | | |x| | |
+  Implement SEND, SOML, SAML                   |5.2.4     | | |x| | |
+  Verify HELO parameter                        |5.2.5     | | |x| | |
+    Refuse message with bad HELO               |5.2.5     | | | | |x|
+  Accept explicit src-route syntax in env.     |5.2.6     |x| | | | |
+  Support "postmaster"                         |5.2.7     |x| | | | |
+  Process RCPT when received (except lists)    |5.2.7     | | |x| | |
+      Long delay of RCPT responses             |5.2.7     | | | | |x|
+                                               |          | | | | | |
+  Add Received: line                           |5.2.8     |x| | | | |
+      Received: line include domain literal    |5.2.8     | |x| | | |
+  Change previous Received: line               |5.2.8     | | | | |x|
+  Pass Return-Path info (final deliv/gwy)      |5.2.8     |x| | | | |
+  Support empty reverse path                   |5.2.9     |x| | | | |
+  Send only official reply codes               |5.2.10    | |x| | | |
+  Send text from RFC-821 when appropriate      |5.2.10    | |x| | | |
+  Delete "." for transparency                  |5.2.11    |x| | | | |
+  Accept and recognize self domain literal(s)  |5.2.17    |x| | | | |
+                                               |          | | | | | |
+  Error message about error message            |5.3.1     | | | | |x|
+  Keep pending listen on SMTP port             |5.3.1.2   | |x| | | |
+  Provide limit on recv concurrency            |5.3.1.2   | | |x| | |
+  Wait at least 5 mins for next sender cmd     |5.3.2     | |x| | | |
+  Avoidable delivery failure after "250 OK"    |5.3.3     | | | | |x|
+  Send error notification msg after accept     |5.3.3     |x| | | | |
+    Send using null return path                |5.3.3     |x| | | | |
+    Send to envelope return path               |5.3.3     | |x| | | |
+    Send to null address                       |5.3.3     | | | | |x|
+    Strip off explicit src route               |5.3.3     | |x| | | |
+  Minimize acceptance delay (RFC-1047)         |5.3.3     |x| | | | |
+-----------------------------------------------|----------|-|-|-|-|-|--
+
+
+
+Internet Engineering Task Force                                [Page 69]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+                                               |          | | | | | |
+SENDER-SMTP:                                   |          | | | | | |
+  Canonicalized domain names in MAIL, RCPT     |5.2.2     |x| | | | |
+  Implement SEND, SOML, SAML                   |5.2.4     | | |x| | |
+  Send valid principal host name in HELO       |5.2.5     |x| | | | |
+  Send explicit source route in RCPT TO:       |5.2.6     | | | |x| |
+  Use only reply code to determine action      |5.2.10    |x| | | | |
+  Use only high digit of reply code when poss. |5.2.10    | |x| | | |
+  Add "." for transparency                     |5.2.11    |x| | | | |
+                                               |          | | | | | |
+  Retry messages after soft failure            |5.3.1.1   |x| | | | |
+    Delay before retry                         |5.3.1.1   |x| | | | |
+    Configurable retry parameters              |5.3.1.1   |x| | | | |
+    Retry once per each queued dest host       |5.3.1.1   | |x| | | |
+  Multiple RCPT's for same DATA                |5.3.1.1   | |x| | | |
+  Support multiple concurrent transactions     |5.3.1.1   | | |x| | |
+    Provide limit on concurrency               |5.3.1.1   | |x| | | |
+                                               |          | | | | | |
+  Timeouts on all activities                   |5.3.1     |x| | | | |
+    Per-command timeouts                       |5.3.2     | |x| | | |
+    Timeouts easily reconfigurable             |5.3.2     | |x| | | |
+    Recommended times                          |5.3.2     | |x| | | |
+  Try alternate addr's in order                |5.3.4     |x| | | | |
+    Configurable limit on alternate tries      |5.3.4     | | |x| | |
+    Try at least two alternates                |5.3.4     | |x| | | |
+  Load-split across equal MX alternates        |5.3.4     | |x| | | |
+  Use the Domain Name System                   |5.3.5     |x| | | | |
+    Support MX records                         |5.3.5     |x| | | | |
+    Use WKS records in MX processing           |5.2.12    | | | |x| |
+-----------------------------------------------|----------|-|-|-|-|-|--
+                                               |          | | | | | |
+MAIL FORWARDING:                               |          | | | | | |
+  Alter existing header field(s)               |5.2.6     | | | |x| |
+  Implement relay function: 821/section 3.6    |5.2.6     | | |x| | |
+    If not, deliver to RHS domain              |5.2.6     | |x| | | |
+  Interpret 'local-part' of addr               |5.2.16    | | | | |x|
+                                               |          | | | | | |
+MAILING LISTS AND ALIASES                      |          | | | | | |
+  Support both                                 |5.3.6     | |x| | | |
+  Report mail list error to local admin.       |5.3.6     |x| | | | |
+                                               |          | | | | | |
+MAIL GATEWAYS:                                 |          | | | | | |
+  Embed foreign mail route in local-part       |5.2.16    | | |x| | |
+  Rewrite header fields when necessary         |5.3.7     | | |x| | |
+  Prepend Received: line                       |5.3.7     |x| | | | |
+  Change existing Received: line               |5.3.7     | | | | |x|
+  Accept full RFC-822 on Internet side         |5.3.7     | |x| | | |
+  Act on RFC-822 explicit source route         |5.3.7     | | |x| | |
+
+
+
+Internet Engineering Task Force                                [Page 70]
+
+
+
+
+RFC1123                  MAIL -- SMTP & RFC-822             October 1989
+
+
+  Send only valid RFC-822 on Internet side     |5.3.7     |x| | | | |
+  Deliver error msgs to envelope addr          |5.3.7     | |x| | | |
+  Set env return path from err return addr     |5.3.7     | |x| | | |
+                                               |          | | | | | |
+USER AGENT -- RFC-822                          |          | | | | | |
+  Allow user to enter <route> address          |5.2.6     | | | |x| |
+  Support RFC-1049 Content Type field          |5.2.13    | | |x| | |
+  Use 4-digit years                            |5.2.14    | |x| | | |
+  Generate numeric timezones                   |5.2.14    | |x| | | |
+  Accept all timezones                         |5.2.14    |x| | | | |
+  Use non-num timezones from RFC-822           |5.2.14    |x| | | | |
+  Omit phrase before route-addr                |5.2.15    | | |x| | |
+  Accept and parse dot.dec. domain literals    |5.2.17    |x| | | | |
+  Accept all RFC-822 address formats           |5.2.18    |x| | | | |
+  Generate invalid RFC-822 address format      |5.2.18    | | | | |x|
+  Fully-qualified domain names in header       |5.2.18    |x| | | | |
+  Create explicit src route in header          |5.2.19    | | | |x| |
+  Accept explicit src route in header          |5.2.19    |x| | | | |
+                                               |          | | | | | |
+Send/recv at least 64KB messages               |5.3.8     |x| | | | |
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 71]
+
+
+
+
+RFC1123               SUPPORT SERVICES -- DOMAINS           October 1989
+
+
+6. SUPPORT SERVICES
+
+   6.1 DOMAIN NAME TRANSLATION
+
+      6.1.1 INTRODUCTION
+
+         Every host MUST implement a resolver for the Domain Name System
+         (DNS), and it MUST implement a mechanism using this DNS
+         resolver to convert host names to IP addresses and vice-versa
+         [DNS:1, DNS:2].
+
+         In addition to the DNS, a host MAY also implement a host name
+         translation mechanism that searches a local Internet host
+         table.  See Section 6.1.3.8 for more information on this
+         option.
+
+         DISCUSSION:
+              Internet host name translation was originally performed by
+              searching local copies of a table of all hosts.  This
+              table became too large to update and distribute in a
+              timely manner and too large to fit into many hosts, so the
+              DNS was invented.
+
+              The DNS creates a distributed database used primarily for
+              the translation between host names and host addresses.
+              Implementation of DNS software is required.  The DNS
+              consists of two logically distinct parts: name servers and
+              resolvers (although implementations often combine these
+              two logical parts in the interest of efficiency) [DNS:2].
+
+              Domain name servers store authoritative data about certain
+              sections of the database and answer queries about the
+              data.  Domain resolvers query domain name servers for data
+              on behalf of user processes.  Every host therefore needs a
+              DNS resolver; some host machines will also need to run
+              domain name servers.  Since no name server has complete
+              information, in general it is necessary to obtain
+              information from more than one name server to resolve a
+              query.
+
+      6.1.2  PROTOCOL WALK-THROUGH
+
+         An implementor must study references [DNS:1] and [DNS:2]
+         carefully.  They provide a thorough description of the theory,
+         protocol, and implementation of the domain name system, and
+         reflect several years of experience.
+
+
+
+
+
+Internet Engineering Task Force                                [Page 72]
+
+
+
+
+RFC1123               SUPPORT SERVICES -- DOMAINS           October 1989
+
+
+         6.1.2.1  Resource Records with Zero TTL: RFC-1035 Section 3.2.1
+
+            All DNS name servers and resolvers MUST properly handle RRs
+            with a zero TTL: return the RR to the client but do not
+            cache it.
+
+            DISCUSSION:
+                 Zero TTL values are interpreted to mean that the RR can
+                 only be used for the transaction in progress, and
+                 should not be cached; they are useful for extremely
+                 volatile data.
+
+         6.1.2.2  QCLASS Values: RFC-1035 Section 3.2.5
+
+            A query with "QCLASS=*" SHOULD NOT be used unless the
+            requestor is seeking data from more than one class.  In
+            particular, if the requestor is only interested in Internet
+            data types, QCLASS=IN MUST be used.
+
+         6.1.2.3  Unused Fields: RFC-1035 Section 4.1.1
+
+            Unused fields in a query or response message MUST be zero.
+
+         6.1.2.4  Compression: RFC-1035 Section 4.1.4
+
+            Name servers MUST use compression in responses.
+
+            DISCUSSION:
+                 Compression is essential to avoid overflowing UDP
+                 datagrams; see Section 6.1.3.2.
+
+         6.1.2.5  Misusing Configuration Info: RFC-1035 Section 6.1.2
+
+            Recursive name servers and full-service resolvers generally
+            have some configuration information containing hints about
+            the location of root or local name servers.  An
+            implementation MUST NOT include any of these hints in a
+            response.
+
+            DISCUSSION:
+                 Many implementors have found it convenient to store
+                 these hints as if they were cached data, but some
+                 neglected to ensure that this "cached data" was not
+                 included in responses.  This has caused serious
+                 problems in the Internet when the hints were obsolete
+                 or incorrect.
+
+
+
+
+
+Internet Engineering Task Force                                [Page 73]
+
+
+
+
+RFC1123               SUPPORT SERVICES -- DOMAINS           October 1989
+
+
+      6.1.3  SPECIFIC ISSUES
+
+         6.1.3.1  Resolver Implementation
+
+            A name resolver SHOULD be able to multiplex concurrent
+            requests if the host supports concurrent processes.
+
+            In implementing a DNS resolver, one of two different models
+            MAY optionally be chosen: a full-service resolver, or a stub
+            resolver.
+
+
+            (A)  Full-Service Resolver
+
+                 A full-service resolver is a complete implementation of
+                 the resolver service, and is capable of dealing with
+                 communication failures, failure of individual name
+                 servers, location of the proper name server for a given
+                 name, etc.  It must satisfy the following requirements:
+
+                 o    The resolver MUST implement a local caching
+                      function to avoid repeated remote access for
+                      identical requests, and MUST time out information
+                      in the cache.
+
+                 o    The resolver SHOULD be configurable with start-up
+                      information pointing to multiple root name servers
+                      and multiple name servers for the local domain.
+                      This insures that the resolver will be able to
+                      access the whole name space in normal cases, and
+                      will be able to access local domain information
+                      should the local network become disconnected from
+                      the rest of the Internet.
+
+
+            (B)  Stub Resolver
+
+                 A "stub resolver" relies on the services of a recursive
+                 name server on the connected network or a "nearby"
+                 network.  This scheme allows the host to pass on the
+                 burden of the resolver function to a name server on
+                 another host.  This model is often essential for less
+                 capable hosts, such as PCs, and is also recommended
+                 when the host is one of several workstations on a local
+                 network, because it allows all of the workstations to
+                 share the cache of the recursive name server and hence
+                 reduce the number of domain requests exported by the
+                 local network.
+
+
+
+Internet Engineering Task Force                                [Page 74]
+
+
+
+
+RFC1123               SUPPORT SERVICES -- DOMAINS           October 1989
+
+
+                 At a minimum, the stub resolver MUST be capable of
+                 directing its requests to redundant recursive name
+                 servers.  Note that recursive name servers are allowed
+                 to restrict the sources of requests that they will
+                 honor, so the host administrator must verify that the
+                 service will be provided.  Stub resolvers MAY implement
+                 caching if they choose, but if so, MUST timeout cached
+                 information.
+
+
+         6.1.3.2  Transport Protocols
+
+            DNS resolvers and recursive servers MUST support UDP, and
+            SHOULD support TCP, for sending (non-zone-transfer) queries.
+            Specifically, a DNS resolver or server that is sending a
+            non-zone-transfer query MUST send a UDP query first.  If the
+            Answer section of the response is truncated and if the
+            requester supports TCP, it SHOULD try the query again using
+            TCP.
+
+            DNS servers MUST be able to service UDP queries and SHOULD
+            be able to service TCP queries.  A name server MAY limit the
+            resources it devotes to TCP queries, but it SHOULD NOT
+            refuse to service a TCP query just because it would have
+            succeeded with UDP.
+
+            Truncated responses MUST NOT be saved (cached) and later
+            used in such a way that the fact that they are truncated is
+            lost.
+
+            DISCUSSION:
+                 UDP is preferred over TCP for queries because UDP
+                 queries have much lower overhead, both in packet count
+                 and in connection state.  The use of UDP is essential
+                 for heavily-loaded servers, especially the root
+                 servers.  UDP also offers additional robustness, since
+                 a resolver can attempt several UDP queries to different
+                 servers for the cost of a single TCP query.
+
+                 It is possible for a DNS response to be truncated,
+                 although this is a very rare occurrence in the present
+                 Internet DNS.  Practically speaking, truncation cannot
+                 be predicted, since it is data-dependent.  The
+                 dependencies include the number of RRs in the answer,
+                 the size of each RR, and the savings in space realized
+                 by the name compression algorithm.  As a rule of thumb,
+                 truncation in NS and MX lists should not occur for
+                 answers containing 15 or fewer RRs.
+
+
+
+Internet Engineering Task Force                                [Page 75]
+
+
+
+
+RFC1123               SUPPORT SERVICES -- DOMAINS           October 1989
+
+
+                 Whether it is possible to use a truncated answer
+                 depends on the application.  A mailer must not use a
+                 truncated MX response, since this could lead to mail
+                 loops.
+
+                 Responsible practices can make UDP suffice in the vast
+                 majority of cases.  Name servers must use compression
+                 in responses.  Resolvers must differentiate truncation
+                 of the Additional section of a response (which only
+                 loses extra information) from truncation of the Answer
+                 section (which for MX records renders the response
+                 unusable by mailers).  Database administrators should
+                 list only a reasonable number of primary names in lists
+                 of name servers, MX alternatives, etc.
+
+                 However, it is also clear that some new DNS record
+                 types defined in the future will contain information
+                 exceeding the 512 byte limit that applies to UDP, and
+                 hence will require TCP.  Thus, resolvers and name
+                 servers should implement TCP services as a backup to
+                 UDP today, with the knowledge that they will require
+                 the TCP service in the future.
+
+            By private agreement, name servers and resolvers MAY arrange
+            to use TCP for all traffic between themselves.  TCP MUST be
+            used for zone transfers.
+
+            A DNS server MUST have sufficient internal concurrency that
+            it can continue to process UDP queries while awaiting a
+            response or performing a zone transfer on an open TCP
+            connection [DNS:2].
+
+            A server MAY support a UDP query that is delivered using an
+            IP broadcast or multicast address.  However, the Recursion
+            Desired bit MUST NOT be set in a query that is multicast,
+            and MUST be ignored by name servers receiving queries via a
+            broadcast or multicast address.  A host that sends broadcast
+            or multicast DNS queries SHOULD send them only as occasional
+            probes, caching the IP address(es) it obtains from the
+            response(s) so it can normally send unicast queries.
+
+            DISCUSSION:
+                 Broadcast or (especially) IP multicast can provide a
+                 way to locate nearby name servers without knowing their
+                 IP addresses in advance.  However, general broadcasting
+                 of recursive queries can result in excessive and
+                 unnecessary load on both network and servers.
+
+
+
+
+Internet Engineering Task Force                                [Page 76]
+
+
+
+
+RFC1123               SUPPORT SERVICES -- DOMAINS           October 1989
+
+
+         6.1.3.3  Efficient Resource Usage
+
+            The following requirements on servers and resolvers are very
+            important to the health of the Internet as a whole,
+            particularly when DNS services are invoked repeatedly by
+            higher level automatic servers, such as mailers.
+
+            (1)  The resolver MUST implement retransmission controls to
+                 insure that it does not waste communication bandwidth,
+                 and MUST impose finite bounds on the resources consumed
+                 to respond to a single request.  See [DNS:2] pages 43-
+                 44 for specific recommendations.
+
+            (2)  After a query has been retransmitted several times
+                 without a response, an implementation MUST give up and
+                 return a soft error to the application.
+
+            (3)  All DNS name servers and resolvers SHOULD cache
+                 temporary failures, with a timeout period of the order
+                 of minutes.
+
+                 DISCUSSION:
+                      This will prevent applications that immediately
+                      retry soft failures (in violation of Section 2.2
+                      of this document) from generating excessive DNS
+                      traffic.
+
+            (4)  All DNS name servers and resolvers SHOULD cache
+                 negative responses that indicate the specified name, or
+                 data of the specified type, does not exist, as
+                 described in [DNS:2].
+
+            (5)  When a DNS server or resolver retries a UDP query, the
+                 retry interval SHOULD be constrained by an exponential
+                 backoff algorithm, and SHOULD also have upper and lower
+                 bounds.
+
+                 IMPLEMENTATION:
+                      A measured RTT and variance (if available) should
+                      be used to calculate an initial retransmission
+                      interval.  If this information is not available, a
+                      default of no less than 5 seconds should be used.
+                      Implementations may limit the retransmission
+                      interval, but this limit must exceed twice the
+                      Internet maximum segment lifetime plus service
+                      delay at the name server.
+
+            (6)  When a resolver or server receives a Source Quench for
+
+
+
+Internet Engineering Task Force                                [Page 77]
+
+
+
+
+RFC1123               SUPPORT SERVICES -- DOMAINS           October 1989
+
+
+                 a query it has issued, it SHOULD take steps to reduce
+                 the rate of querying that server in the near future.  A
+                 server MAY ignore a Source Quench that it receives as
+                 the result of sending a response datagram.
+
+                 IMPLEMENTATION:
+                      One recommended action to reduce the rate is to
+                      send the next query attempt to an alternate
+                      server, if there is one available.  Another is to
+                      backoff the retry interval for the same server.
+
+
+         6.1.3.4  Multihomed Hosts
+
+            When the host name-to-address function encounters a host
+            with multiple addresses, it SHOULD rank or sort the
+            addresses using knowledge of the immediately connected
+            network number(s) and any other applicable performance or
+            history information.
+
+            DISCUSSION:
+                 The different addresses of a multihomed host generally
+                 imply different Internet paths, and some paths may be
+                 preferable to others in performance, reliability, or
+                 administrative restrictions.  There is no general way
+                 for the domain system to determine the best path.  A
+                 recommended approach is to base this decision on local
+                 configuration information set by the system
+                 administrator.
+
+            IMPLEMENTATION:
+                 The following scheme has been used successfully:
+
+                 (a)  Incorporate into the host configuration data a
+                      Network-Preference List, that is simply a list of
+                      networks in preferred order.  This list may be
+                      empty if there is no preference.
+
+                 (b)  When a host name is mapped into a list of IP
+                      addresses, these addresses should be sorted by
+                      network number, into the same order as the
+                      corresponding networks in the Network-Preference
+                      List.  IP addresses whose networks do not appear
+                      in the Network-Preference List should be placed at
+                      the end of the list.
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 78]
+
+
+
+
+RFC1123               SUPPORT SERVICES -- DOMAINS           October 1989
+
+
+         6.1.3.5  Extensibility
+
+            DNS software MUST support all well-known, class-independent
+            formats [DNS:2], and SHOULD be written to minimize the
+            trauma associated with the introduction of new well-known
+            types and local experimentation with non-standard types.
+
+            DISCUSSION:
+                 The data types and classes used by the DNS are
+                 extensible, and thus new types will be added and old
+                 types deleted or redefined.  Introduction of new data
+                 types ought to be dependent only upon the rules for
+                 compression of domain names inside DNS messages, and
+                 the translation between printable (i.e., master file)
+                 and internal formats for Resource Records (RRs).
+
+                 Compression relies on knowledge of the format of data
+                 inside a particular RR.  Hence compression must only be
+                 used for the contents of well-known, class-independent
+                 RRs, and must never be used for class-specific RRs or
+                 RR types that are not well-known.  The owner name of an
+                 RR is always eligible for compression.
+
+                 A name server may acquire, via zone transfer, RRs that
+                 the server doesn't know how to convert to printable
+                 format.  A resolver can receive similar information as
+                 the result of queries.  For proper operation, this data
+                 must be preserved, and hence the implication is that
+                 DNS software cannot use textual formats for internal
+                 storage.
+
+                 The DNS defines domain name syntax very generally -- a
+                 string of labels each containing up to 63 8-bit octets,
+                 separated by dots, and with a maximum total of 255
+                 octets.  Particular applications of the DNS are
+                 permitted to further constrain the syntax of the domain
+                 names they use, although the DNS deployment has led to
+                 some applications allowing more general names.  In
+                 particular, Section 2.1 of this document liberalizes
+                 slightly the syntax of a legal Internet host name that
+                 was defined in RFC-952 [DNS:4].
+
+         6.1.3.6  Status of RR Types
+
+            Name servers MUST be able to load all RR types except MD and
+            MF from configuration files.  The MD and MF types are
+            obsolete and MUST NOT be implemented; in particular, name
+            servers MUST NOT load these types from configuration files.
+
+
+
+Internet Engineering Task Force                                [Page 79]
+
+
+
+
+RFC1123               SUPPORT SERVICES -- DOMAINS           October 1989
+
+
+            DISCUSSION:
+                 The RR types MB, MG, MR, NULL, MINFO and RP are
+                 considered experimental, and applications that use the
+                 DNS cannot expect these RR types to be supported by
+                 most domains.  Furthermore these types are subject to
+                 redefinition.
+
+                 The TXT and WKS RR types have not been widely used by
+                 Internet sites; as a result, an application cannot rely
+                 on the the existence of a TXT or WKS RR in most
+                 domains.
+
+         6.1.3.7  Robustness
+
+            DNS software may need to operate in environments where the
+            root servers or other servers are unavailable due to network
+            connectivity or other problems.  In this situation, DNS name
+            servers and resolvers MUST continue to provide service for
+            the reachable part of the name space, while giving temporary
+            failures for the rest.
+
+            DISCUSSION:
+                 Although the DNS is meant to be used primarily in the
+                 connected Internet, it should be possible to use the
+                 system in networks which are unconnected to the
+                 Internet.  Hence implementations must not depend on
+                 access to root servers before providing service for
+                 local names.
+
+         6.1.3.8  Local Host Table
+
+            DISCUSSION:
+                 A host may use a local host table as a backup or
+                 supplement to the DNS.  This raises the question of
+                 which takes precedence, the DNS or the host table; the
+                 most flexible approach would make this a configuration
+                 option.
+
+                 Typically, the contents of such a supplementary host
+                 table will be determined locally by the site.  However,
+                 a publically-available table of Internet hosts is
+                 maintained by the DDN Network Information Center (DDN
+                 NIC), with a format documented in [DNS:4].  This table
+                 can be retrieved from the DDN NIC using a protocol
+                 described in [DNS:5].  It must be noted that this table
+                 contains only a small fraction of all Internet hosts.
+                 Hosts using this protocol to retrieve the DDN NIC host
+                 table should use the VERSION command to check if the
+
+
+
+Internet Engineering Task Force                                [Page 80]
+
+
+
+
+RFC1123               SUPPORT SERVICES -- DOMAINS           October 1989
+
+
+                 table has changed before requesting the entire table
+                 with the ALL command.  The VERSION identifier should be
+                 treated as an arbitrary string and tested only for
+                 equality; no numerical sequence may be assumed.
+
+                 The DDN NIC host table includes administrative
+                 information that is not needed for host operation and
+                 is therefore not currently included in the DNS
+                 database; examples include network and gateway entries.
+                 However, much of this additional information will be
+                 added to the DNS in the future.  Conversely, the DNS
+                 provides essential services (in particular, MX records)
+                 that are not available from the DDN NIC host table.
+
+      6.1.4  DNS USER INTERFACE
+
+         6.1.4.1  DNS Administration
+
+            This document is concerned with design and implementation
+            issues in host software, not with administrative or
+            operational issues.  However, administrative issues are of
+            particular importance in the DNS, since errors in particular
+            segments of this large distributed database can cause poor
+            or erroneous performance for many sites.  These issues are
+            discussed in [DNS:6] and [DNS:7].
+
+         6.1.4.2  DNS User Interface
+
+            Hosts MUST provide an interface to the DNS for all
+            application programs running on the host.  This interface
+            will typically direct requests to a system process to
+            perform the resolver function [DNS:1, 6.1:2].
+
+            At a minimum, the basic interface MUST support a request for
+            all information of a specific type and class associated with
+            a specific name, and it MUST return either all of the
+            requested information, a hard error code, or a soft error
+            indication.  When there is no error, the basic interface
+            returns the complete response information without
+            modification, deletion, or ordering, so that the basic
+            interface will not need to be changed to accommodate new
+            data types.
+
+            DISCUSSION:
+                 The soft error indication is an essential part of the
+                 interface, since it may not always be possible to
+                 access particular information from the DNS; see Section
+                 6.1.3.3.
+
+
+
+Internet Engineering Task Force                                [Page 81]
+
+
+
+
+RFC1123               SUPPORT SERVICES -- DOMAINS           October 1989
+
+
+            A host MAY provide other DNS interfaces tailored to
+            particular functions, transforming the raw domain data into
+            formats more suited to these functions.  In particular, a
+            host MUST provide a DNS interface to facilitate translation
+            between host addresses and host names.
+
+         6.1.4.3 Interface Abbreviation Facilities
+
+            User interfaces MAY provide a method for users to enter
+            abbreviations for commonly-used names.  Although the
+            definition of such methods is outside of the scope of the
+            DNS specification, certain rules are necessary to insure
+            that these methods allow access to the entire DNS name space
+            and to prevent excessive use of Internet resources.
+
+            If an abbreviation method is provided, then:
+
+            (a)  There MUST be some convention for denoting that a name
+                 is already complete, so that the abbreviation method(s)
+                 are suppressed.  A trailing dot is the usual method.
+
+            (b)  Abbreviation expansion MUST be done exactly once, and
+                 MUST be done in the context in which the name was
+                 entered.
+
+
+            DISCUSSION:
+                 For example, if an abbreviation is used in a mail
+                 program for a destination, the abbreviation should be
+                 expanded into a full domain name and stored in the
+                 queued message with an indication that it is already
+                 complete.  Otherwise, the abbreviation might be
+                 expanded with a mail system search list, not the
+                 user's, or a name could grow due to repeated
+                 canonicalizations attempts interacting with wildcards.
+
+            The two most common abbreviation methods are:
+
+            (1)  Interface-level aliases
+
+                 Interface-level aliases are conceptually implemented as
+                 a list of alias/domain name pairs. The list can be
+                 per-user or per-host, and separate lists can be
+                 associated with different functions, e.g. one list for
+                 host name-to-address translation, and a different list
+                 for mail domains.  When the user enters a name, the
+                 interface attempts to match the name to the alias
+                 component of a list entry, and if a matching entry can
+
+
+
+Internet Engineering Task Force                                [Page 82]
+
+
+
+
+RFC1123               SUPPORT SERVICES -- DOMAINS           October 1989
+
+
+                 be found, the name is replaced by the domain name found
+                 in the pair.
+
+                 Note that interface-level aliases and CNAMEs are
+                 completely separate mechanisms; interface-level aliases
+                 are a local matter while CNAMEs are an Internet-wide
+                 aliasing mechanism which is a required part of any DNS
+                 implementation.
+
+            (2)  Search Lists
+
+                 A search list is conceptually implemented as an ordered
+                 list of domain names.  When the user enters a name, the
+                 domain names in the search list are used as suffixes to
+                 the user-supplied name, one by one, until a domain name
+                 with the desired associated data is found, or the
+                 search list is exhausted.  Search lists often contain
+                 the name of the local host's parent domain or other
+                 ancestor domains.  Search lists are often per-user or
+                 per-process.
+
+                 It SHOULD be possible for an administrator to disable a
+                 DNS search-list facility.  Administrative denial may be
+                 warranted in some cases, to prevent abuse of the DNS.
+
+                 There is danger that a search-list mechanism will
+                 generate excessive queries to the root servers while
+                 testing whether user input is a complete domain name,
+                 lacking a final period to mark it as complete.  A
+                 search-list mechanism MUST have one of, and SHOULD have
+                 both of, the following two provisions to prevent this:
+
+                 (a)  The local resolver/name server can implement
+                      caching  of negative responses (see Section
+                      6.1.3.3).
+
+                 (b)  The search list expander can require two or more
+                      interior dots in a generated domain name before it
+                      tries using the name in a query to non-local
+                      domain servers, such as the root.
+
+                 DISCUSSION:
+                      The intent of this requirement is to avoid
+                      excessive delay for the user as the search list is
+                      tested, and more importantly to prevent excessive
+                      traffic to the root and other high-level servers.
+                      For example, if the user supplied a name "X" and
+                      the search list contained the root as a component,
+
+
+
+Internet Engineering Task Force                                [Page 83]
+
+
+
+
+RFC1123               SUPPORT SERVICES -- DOMAINS           October 1989
+
+
+                      a query would have to consult a root server before
+                      the next search list alternative could be tried.
+                      The resulting load seen by the root servers and
+                      gateways near the root would be multiplied by the
+                      number of hosts in the Internet.
+
+                      The negative caching alternative limits the effect
+                      to the first time a name is used.  The interior
+                      dot rule is simpler to implement but can prevent
+                      easy use of some top-level names.
+
+
+      6.1.5  DOMAIN NAME SYSTEM REQUIREMENTS SUMMARY
+
+                                               |           | | | |S| |
+                                               |           | | | |H| |F
+                                               |           | | | |O|M|o
+                                               |           | |S| |U|U|o
+                                               |           | |H| |L|S|t
+                                               |           |M|O| |D|T|n
+                                               |           |U|U|M| | |o
+                                               |           |S|L|A|N|N|t
+                                               |           |T|D|Y|O|O|t
+FEATURE                                        |SECTION    | | | |T|T|e
+-----------------------------------------------|-----------|-|-|-|-|-|--
+GENERAL ISSUES                                 |           | | | | | |
+                                               |           | | | | | |
+Implement DNS name-to-address conversion       |6.1.1      |x| | | | |
+Implement DNS address-to-name conversion       |6.1.1      |x| | | | |
+Support conversions using host table           |6.1.1      | | |x| | |
+Properly handle RR with zero TTL               |6.1.2.1    |x| | | | |
+Use QCLASS=* unnecessarily                     |6.1.2.2    | |x| | | |
+  Use QCLASS=IN for Internet class             |6.1.2.2    |x| | | | |
+Unused fields zero                             |6.1.2.3    |x| | | | |
+Use compression in responses                   |6.1.2.4    |x| | | | |
+                                               |           | | | | | |
+Include config info in responses               |6.1.2.5    | | | | |x|
+Support all well-known, class-indep. types     |6.1.3.5    |x| | | | |
+Easily expand type list                        |6.1.3.5    | |x| | | |
+Load all RR types (except MD and MF)           |6.1.3.6    |x| | | | |
+Load MD or MF type                             |6.1.3.6    | | | | |x|
+Operate when root servers, etc. unavailable    |6.1.3.7    |x| | | | |
+-----------------------------------------------|-----------|-|-|-|-|-|--
+RESOLVER ISSUES:                               |           | | | | | |
+                                               |           | | | | | |
+Resolver support multiple concurrent requests  |6.1.3.1    | |x| | | |
+Full-service resolver:                         |6.1.3.1    | | |x| | |
+  Local caching                                |6.1.3.1    |x| | | | |
+
+
+
+Internet Engineering Task Force                                [Page 84]
+
+
+
+
+RFC1123               SUPPORT SERVICES -- DOMAINS           October 1989
+
+
+  Information in local cache times out         |6.1.3.1    |x| | | | |
+  Configurable with starting info              |6.1.3.1    | |x| | | |
+Stub resolver:                                 |6.1.3.1    | | |x| | |
+  Use redundant recursive name servers         |6.1.3.1    |x| | | | |
+  Local caching                                |6.1.3.1    | | |x| | |
+  Information in local cache times out         |6.1.3.1    |x| | | | |
+Support for remote multi-homed hosts:          |           | | | | | |
+  Sort multiple addresses by preference list   |6.1.3.4    | |x| | | |
+                                               |           | | | | | |
+-----------------------------------------------|-----------|-|-|-|-|-|--
+TRANSPORT PROTOCOLS:                           |           | | | | | |
+                                               |           | | | | | |
+Support UDP queries                            |6.1.3.2    |x| | | | |
+Support TCP queries                            |6.1.3.2    | |x| | | |
+  Send query using UDP first                   |6.1.3.2    |x| | | | |1
+  Try TCP if UDP answers are truncated         |6.1.3.2    | |x| | | |
+Name server limit TCP query resources          |6.1.3.2    | | |x| | |
+  Punish unnecessary TCP query                 |6.1.3.2    | | | |x| |
+Use truncated data as if it were not           |6.1.3.2    | | | | |x|
+Private agreement to use only TCP              |6.1.3.2    | | |x| | |
+Use TCP for zone transfers                     |6.1.3.2    |x| | | | |
+TCP usage not block UDP queries                |6.1.3.2    |x| | | | |
+Support broadcast or multicast queries         |6.1.3.2    | | |x| | |
+  RD bit set in query                          |6.1.3.2    | | | | |x|
+  RD bit ignored by server is b'cast/m'cast    |6.1.3.2    |x| | | | |
+  Send only as occasional probe for addr's     |6.1.3.2    | |x| | | |
+-----------------------------------------------|-----------|-|-|-|-|-|--
+RESOURCE USAGE:                                |           | | | | | |
+                                               |           | | | | | |
+Transmission controls, per [DNS:2]             |6.1.3.3    |x| | | | |
+  Finite bounds per request                    |6.1.3.3    |x| | | | |
+Failure after retries => soft error            |6.1.3.3    |x| | | | |
+Cache temporary failures                       |6.1.3.3    | |x| | | |
+Cache negative responses                       |6.1.3.3    | |x| | | |
+Retries use exponential backoff                |6.1.3.3    | |x| | | |
+  Upper, lower bounds                          |6.1.3.3    | |x| | | |
+Client handle Source Quench                    |6.1.3.3    | |x| | | |
+Server ignore Source Quench                    |6.1.3.3    | | |x| | |
+-----------------------------------------------|-----------|-|-|-|-|-|--
+USER INTERFACE:                                |           | | | | | |
+                                               |           | | | | | |
+All programs have access to DNS interface      |6.1.4.2    |x| | | | |
+Able to request all info for given name        |6.1.4.2    |x| | | | |
+Returns complete info or error                 |6.1.4.2    |x| | | | |
+Special interfaces                             |6.1.4.2    | | |x| | |
+  Name<->Address translation                   |6.1.4.2    |x| | | | |
+                                               |           | | | | | |
+Abbreviation Facilities:                       |6.1.4.3    | | |x| | |
+
+
+
+Internet Engineering Task Force                                [Page 85]
+
+
+
+
+RFC1123               SUPPORT SERVICES -- DOMAINS           October 1989
+
+
+  Convention for complete names                |6.1.4.3    |x| | | | |
+  Conversion exactly once                      |6.1.4.3    |x| | | | |
+  Conversion in proper context                 |6.1.4.3    |x| | | | |
+  Search list:                                 |6.1.4.3    | | |x| | |
+    Administrator can disable                  |6.1.4.3    | |x| | | |
+    Prevention of excessive root queries       |6.1.4.3    |x| | | | |
+      Both methods                             |6.1.4.3    | |x| | | |
+-----------------------------------------------|-----------|-|-|-|-|-|--
+-----------------------------------------------|-----------|-|-|-|-|-|--
+
+1.   Unless there is private agreement between particular resolver and
+     particular server.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 86]
+
+
+
+
+RFC1123            SUPPORT SERVICES -- INITIALIZATION       October 1989
+
+
+   6.2  HOST INITIALIZATION
+
+      6.2.1  INTRODUCTION
+
+         This section discusses the initialization of host software
+         across a connected network, or more generally across an
+         Internet path.  This is necessary for a diskless host, and may
+         optionally be used for a host with disk drives.  For a diskless
+         host, the initialization process is called "network booting"
+         and is controlled by a bootstrap program located in a boot ROM.
+
+         To initialize a diskless host across the network, there are two
+         distinct phases:
+
+         (1)  Configure the IP layer.
+
+              Diskless machines often have no permanent storage in which
+              to store network configuration information, so that
+              sufficient configuration information must be obtained
+              dynamically to support the loading phase that follows.
+              This information must include at least the IP addresses of
+              the host and of the boot server.  To support booting
+              across a gateway, the address mask and a list of default
+              gateways are also required.
+
+         (2)  Load the host system code.
+
+              During the loading phase, an appropriate file transfer
+              protocol is used to copy the system code across the
+              network from the boot server.
+
+         A host with a disk may perform the first step, dynamic
+         configuration.  This is important for microcomputers, whose
+         floppy disks allow network configuration information to be
+         mistakenly duplicated on more than one host.  Also,
+         installation of new hosts is much simpler if they automatically
+         obtain their configuration information from a central server,
+         saving administrator time and decreasing the probability of
+         mistakes.
+
+      6.2.2  REQUIREMENTS
+
+         6.2.2.1  Dynamic Configuration
+
+            A number of protocol provisions have been made for dynamic
+            configuration.
+
+            o    ICMP Information Request/Reply messages
+
+
+
+Internet Engineering Task Force                                [Page 87]
+
+
+
+
+RFC1123            SUPPORT SERVICES -- INITIALIZATION       October 1989
+
+
+                 This obsolete message pair was designed to allow a host
+                 to find the number of the network it is on.
+                 Unfortunately, it was useful only if the host already
+                 knew the host number part of its IP address,
+                 information that hosts requiring dynamic configuration
+                 seldom had.
+
+            o    Reverse Address Resolution Protocol (RARP) [BOOT:4]
+
+                 RARP is a link-layer protocol for a broadcast medium
+                 that allows a host to find its IP address given its
+                 link layer address.  Unfortunately, RARP does not work
+                 across IP gateways and therefore requires a RARP server
+                 on every network.  In addition, RARP does not provide
+                 any other configuration information.
+
+            o    ICMP Address Mask Request/Reply messages
+
+                 These ICMP messages allow a host to learn the address
+                 mask for a particular network interface.
+
+            o    BOOTP Protocol [BOOT:2]
+
+                 This protocol allows a host to determine the IP
+                 addresses of the local host and the boot server, the
+                 name of an appropriate boot file, and optionally the
+                 address mask and list of default gateways.  To locate a
+                 BOOTP server, the host broadcasts a BOOTP request using
+                 UDP.  Ad hoc gateway extensions have been used to
+                 transmit the BOOTP broadcast through gateways, and in
+                 the future the IP Multicasting facility will provide a
+                 standard mechanism for this purpose.
+
+
+            The suggested approach to dynamic configuration is to use
+            the BOOTP protocol with the extensions defined in "BOOTP
+            Vendor Information Extensions" RFC-1084 [BOOT:3].  RFC-1084
+            defines some important general (not vendor-specific)
+            extensions.  In particular, these extensions allow the
+            address mask to be supplied in BOOTP; we RECOMMEND that the
+            address mask be supplied in this manner.
+
+            DISCUSSION:
+                 Historically, subnetting was defined long after IP, and
+                 so a separate mechanism (ICMP Address Mask messages)
+                 was designed to supply the address mask to a host.
+                 However, the IP address mask and the corresponding IP
+                 address conceptually form a pair, and for operational
+
+
+
+Internet Engineering Task Force                                [Page 88]
+
+
+
+
+RFC1123            SUPPORT SERVICES -- INITIALIZATION       October 1989
+
+
+                 simplicity they ought to be defined at the same time
+                 and by the same mechanism, whether a configuration file
+                 or a dynamic mechanism like BOOTP.
+
+                 Note that BOOTP is not sufficiently general to specify
+                 the configurations of all interfaces of a multihomed
+                 host.  A multihomed host must either use BOOTP
+                 separately for each interface, or configure one
+                 interface using BOOTP to perform the loading, and
+                 perform the complete initialization from a file later.
+
+                 Application layer configuration information is expected
+                 to be obtained from files after loading of the system
+                 code.
+
+         6.2.2.2  Loading Phase
+
+            A suggested approach for the loading phase is to use TFTP
+            [BOOT:1] between the IP addresses established by BOOTP.
+
+            TFTP to a broadcast address SHOULD NOT be used, for reasons
+            explained in Section 4.2.3.4.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 89]
+
+
+
+
+RFC1123              SUPPORT SERVICES -- MANAGEMENT         October 1989
+
+
+   6.3  REMOTE MANAGEMENT
+
+      6.3.1  INTRODUCTION
+
+         The Internet community has recently put considerable effort
+         into the development of network management protocols.  The
+         result has been a two-pronged approach [MGT:1, MGT:6]:  the
+         Simple Network Management Protocol (SNMP) [MGT:4] and the
+         Common Management Information Protocol over TCP (CMOT) [MGT:5].
+
+         In order to be managed using SNMP or CMOT, a host will need to
+         implement an appropriate management agent.  An Internet host
+         SHOULD include an agent for either SNMP or CMOT.
+
+         Both SNMP and CMOT operate on a Management Information Base
+         (MIB) that defines a collection of management values.  By
+         reading and setting these values, a remote application may
+         query and change the state of the managed system.
+
+         A standard MIB [MGT:3] has been defined for use by both
+         management protocols, using data types defined by the Structure
+         of Management Information (SMI) defined in [MGT:2].  Additional
+         MIB variables can be introduced under the "enterprises" and
+         "experimental" subtrees of the MIB naming space [MGT:2].
+
+         Every protocol module in the host SHOULD implement the relevant
+         MIB variables.  A host SHOULD implement the MIB variables as
+         defined in the most recent standard MIB, and MAY implement
+         other MIB variables when appropriate and useful.
+
+      6.3.2  PROTOCOL WALK-THROUGH
+
+         The MIB is intended to cover both hosts and gateways, although
+         there may be detailed differences in MIB application to the two
+         cases.  This section contains the appropriate interpretation of
+         the MIB for hosts.  It is likely that later versions of the MIB
+         will include more entries for host management.
+
+         A managed host must implement the following groups of MIB
+         object definitions: System, Interfaces, Address Translation,
+         IP, ICMP, TCP, and UDP.
+
+         The following specific interpretations apply to hosts:
+
+         o    ipInHdrErrors
+
+              Note that the error "time-to-live exceeded" can occur in a
+              host only when it is forwarding a source-routed datagram.
+
+
+
+Internet Engineering Task Force                                [Page 90]
+
+
+
+
+RFC1123              SUPPORT SERVICES -- MANAGEMENT         October 1989
+
+
+         o    ipOutNoRoutes
+
+              This object counts datagrams discarded because no route
+              can be found.  This may happen in a host if all the
+              default gateways in the host's configuration are down.
+
+         o    ipFragOKs, ipFragFails, ipFragCreates
+
+              A host that does not implement intentional fragmentation
+              (see "Fragmentation" section of [INTRO:1]) MUST return the
+              value zero for these three objects.
+
+         o    icmpOutRedirects
+
+              For a host, this object MUST always be zero, since hosts
+              do not send Redirects.
+
+         o    icmpOutAddrMaskReps
+
+              For a host, this object MUST always be zero, unless the
+              host is an authoritative source of address mask
+              information.
+
+         o    ipAddrTable
+
+              For a host, the "IP Address Table" object is effectively a
+              table of logical interfaces.
+
+         o    ipRoutingTable
+
+              For a host, the "IP Routing Table" object is effectively a
+              combination of the host's Routing Cache and the static
+              route table described in "Routing Outbound Datagrams"
+              section of [INTRO:1].
+
+              Within each ipRouteEntry, ipRouteMetric1...4 normally will
+              have no meaning for a host and SHOULD always be -1, while
+              ipRouteType will normally have the value "remote".
+
+              If destinations on the connected network do not appear in
+              the Route Cache (see "Routing Outbound Datagrams section
+              of [INTRO:1]), there will be no entries with ipRouteType
+              of "direct".
+
+
+         DISCUSSION:
+              The current MIB does not include Type-of-Service in an
+              ipRouteEntry, but a future revision is expected to make
+
+
+
+Internet Engineering Task Force                                [Page 91]
+
+
+
+
+RFC1123              SUPPORT SERVICES -- MANAGEMENT         October 1989
+
+
+              this addition.
+
+              We also expect the MIB to be expanded to allow the remote
+              management of applications (e.g., the ability to partially
+              reconfigure mail systems).  Network service applications
+              such as mail systems should therefore be written with the
+              "hooks" for remote management.
+
+      6.3.3  MANAGEMENT REQUIREMENTS SUMMARY
+
+                                               |           | | | |S| |
+                                               |           | | | |H| |F
+                                               |           | | | |O|M|o
+                                               |           | |S| |U|U|o
+                                               |           | |H| |L|S|t
+                                               |           |M|O| |D|T|n
+                                               |           |U|U|M| | |o
+                                               |           |S|L|A|N|N|t
+                                               |           |T|D|Y|O|O|t
+FEATURE                                        |SECTION    | | | |T|T|e
+-----------------------------------------------|-----------|-|-|-|-|-|--
+Support SNMP or CMOT agent                     |6.3.1      | |x| | | |
+Implement specified objects in standard MIB    |6.3.1      | |x| | | |
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 92]
+
+
+
+
+RFC1123              SUPPORT SERVICES -- MANAGEMENT         October 1989
+
+
+7.  REFERENCES
+
+   This section lists the primary references with which every
+   implementer must be thoroughly familiar.  It also lists some
+   secondary references that are suggested additional reading.
+
+   INTRODUCTORY REFERENCES:
+
+
+   [INTRO:1] "Requirements for Internet Hosts -- Communication Layers,"
+        IETF Host Requirements Working Group, R. Braden, Ed., RFC-1122,
+        October 1989.
+
+   [INTRO:2]  "DDN Protocol Handbook," NIC-50004, NIC-50005, NIC-50006,
+        (three volumes), SRI International, December 1985.
+
+   [INTRO:3]  "Official Internet Protocols," J. Reynolds and J. Postel,
+        RFC-1011, May 1987.
+
+        This document is republished periodically with new RFC numbers;
+        the latest version must be used.
+
+   [INTRO:4]  "Protocol Document Order Information," O. Jacobsen and J.
+        Postel, RFC-980, March 1986.
+
+   [INTRO:5]  "Assigned Numbers," J. Reynolds and J. Postel, RFC-1010,
+        May 1987.
+
+        This document is republished periodically with new RFC numbers;
+        the latest version must be used.
+
+
+   TELNET REFERENCES:
+
+
+   [TELNET:1]  "Telnet Protocol Specification," J. Postel and J.
+        Reynolds, RFC-854, May 1983.
+
+   [TELNET:2]  "Telnet Option Specification," J. Postel and J. Reynolds,
+        RFC-855, May 1983.
+
+   [TELNET:3]  "Telnet Binary Transmission," J. Postel and J. Reynolds,
+        RFC-856, May 1983.
+
+   [TELNET:4]  "Telnet Echo Option," J. Postel and J. Reynolds, RFC-857,
+        May 1983.
+
+   [TELNET:5]  "Telnet Suppress Go Ahead Option," J. Postel and J.
+
+
+
+Internet Engineering Task Force                                [Page 93]
+
+
+
+
+RFC1123              SUPPORT SERVICES -- MANAGEMENT         October 1989
+
+
+        Reynolds, RFC-858, May 1983.
+
+   [TELNET:6]  "Telnet Status Option," J. Postel and J. Reynolds, RFC-
+        859, May 1983.
+
+   [TELNET:7]  "Telnet Timing Mark Option," J. Postel and J. Reynolds,
+        RFC-860, May 1983.
+
+   [TELNET:8]  "Telnet Extended Options List," J. Postel and J.
+        Reynolds, RFC-861, May 1983.
+
+   [TELNET:9]  "Telnet End-Of-Record Option," J. Postel, RFC-855,
+        December 1983.
+
+   [TELNET:10] "Telnet Terminal-Type Option," J. VanBokkelen, RFC-1091,
+        February 1989.
+
+        This document supercedes RFC-930.
+
+   [TELNET:11] "Telnet Window Size Option," D. Waitzman, RFC-1073,
+        October 1988.
+
+   [TELNET:12] "Telnet Linemode Option," D. Borman, RFC-1116, August
+        1989.
+
+   [TELNET:13] "Telnet Terminal Speed Option," C. Hedrick, RFC-1079,
+        December 1988.
+
+   [TELNET:14] "Telnet Remote Flow Control Option," C. Hedrick, RFC-
+        1080, November 1988.
+
+
+   SECONDARY TELNET REFERENCES:
+
+
+   [TELNET:15] "Telnet Protocol," MIL-STD-1782, U.S. Department of
+        Defense, May 1984.
+
+        This document is intended to describe the same protocol as RFC-
+        854.  In case of conflict, RFC-854 takes precedence, and the
+        present document takes precedence over both.
+
+   [TELNET:16] "SUPDUP Protocol," M. Crispin, RFC-734, October 1977.
+
+   [TELNET:17] "Telnet SUPDUP Option," M. Crispin, RFC-736, October
+        1977.
+
+   [TELNET:18] "Data Entry Terminal Option," J. Day, RFC-732, June 1977.
+
+
+
+Internet Engineering Task Force                                [Page 94]
+
+
+
+
+RFC1123              SUPPORT SERVICES -- MANAGEMENT         October 1989
+
+
+   [TELNET:19] "TELNET Data Entry Terminal option -- DODIIS
+        Implementation," A. Yasuda and T. Thompson, RFC-1043, February
+        1988.
+
+
+   FTP REFERENCES:
+
+
+   [FTP:1]  "File Transfer Protocol," J. Postel and J. Reynolds, RFC-
+        959, October 1985.
+
+   [FTP:2]  "Document File Format Standards," J. Postel, RFC-678,
+        December 1974.
+
+   [FTP:3]  "File Transfer Protocol," MIL-STD-1780, U.S. Department of
+        Defense, May 1984.
+
+        This document is based on an earlier version of the FTP
+        specification (RFC-765) and is obsolete.
+
+
+   TFTP REFERENCES:
+
+
+   [TFTP:1]  "The TFTP Protocol Revision 2," K. Sollins, RFC-783, June
+        1981.
+
+
+   MAIL REFERENCES:
+
+
+   [SMTP:1]  "Simple Mail Transfer Protocol," J. Postel, RFC-821, August
+        1982.
+
+   [SMTP:2]  "Standard For The Format of ARPA Internet Text Messages,"
+        D. Crocker, RFC-822, August 1982.
+
+        This document obsoleted an earlier specification, RFC-733.
+
+   [SMTP:3]  "Mail Routing and the Domain System," C. Partridge, RFC-
+        974, January 1986.
+
+        This RFC describes the use of MX records, a mandatory extension
+        to the mail delivery process.
+
+   [SMTP:4]  "Duplicate Messages and SMTP," C. Partridge, RFC-1047,
+        February 1988.
+
+
+
+
+Internet Engineering Task Force                                [Page 95]
+
+
+
+
+RFC1123              SUPPORT SERVICES -- MANAGEMENT         October 1989
+
+
+   [SMTP:5a]  "Mapping between X.400 and RFC 822," S. Kille, RFC-987,
+        June 1986.
+
+   [SMTP:5b]  "Addendum to RFC-987," S. Kille, RFC-???, September 1987.
+
+        The two preceding RFC's define a proposed standard for
+        gatewaying mail between the Internet and the X.400 environments.
+
+   [SMTP:6]  "Simple Mail Transfer Protocol,"  MIL-STD-1781, U.S.
+        Department of Defense, May 1984.
+
+        This specification is intended to describe the same protocol as
+        does RFC-821.  However, MIL-STD-1781 is incomplete; in
+        particular, it does not include MX records [SMTP:3].
+
+   [SMTP:7]  "A Content-Type Field for Internet Messages," M. Sirbu,
+        RFC-1049, March 1988.
+
+
+   DOMAIN NAME SYSTEM REFERENCES:
+
+
+   [DNS:1]  "Domain Names - Concepts and Facilities," P. Mockapetris,
+        RFC-1034, November 1987.
+
+        This document and the following one obsolete RFC-882, RFC-883,
+        and RFC-973.
+
+   [DNS:2]  "Domain Names - Implementation and Specification," RFC-1035,
+        P. Mockapetris, November 1987.
+
+
+   [DNS:3]  "Mail Routing and the Domain System," C. Partridge, RFC-974,
+        January 1986.
+
+
+   [DNS:4]  "DoD Internet Host Table Specification," K. Harrenstein,
+        RFC-952, M. Stahl, E. Feinler, October 1985.
+
+        SECONDARY DNS REFERENCES:
+
+
+   [DNS:5]  "Hostname Server," K. Harrenstein, M. Stahl, E. Feinler,
+        RFC-953, October 1985.
+
+   [DNS:6]  "Domain Administrators Guide," M. Stahl, RFC-1032, November
+        1987.
+
+
+
+
+Internet Engineering Task Force                                [Page 96]
+
+
+
+
+RFC1123              SUPPORT SERVICES -- MANAGEMENT         October 1989
+
+
+   [DNS:7]  "Domain Administrators Operations Guide," M. Lottor, RFC-
+        1033, November 1987.
+
+   [DNS:8]  "The Domain Name System Handbook," Vol. 4 of Internet
+        Protocol Handbook, NIC 50007, SRI Network Information Center,
+        August 1989.
+
+
+   SYSTEM INITIALIZATION REFERENCES:
+
+
+   [BOOT:1] "Bootstrap Loading Using TFTP," R. Finlayson, RFC-906, June
+        1984.
+
+   [BOOT:2] "Bootstrap Protocol (BOOTP)," W. Croft and J. Gilmore, RFC-
+        951, September 1985.
+
+   [BOOT:3] "BOOTP Vendor Information Extensions," J. Reynolds, RFC-
+        1084, December 1988.
+
+        Note: this RFC revised and obsoleted RFC-1048.
+
+   [BOOT:4] "A Reverse Address Resolution Protocol," R. Finlayson, T.
+        Mann, J. Mogul, and M. Theimer, RFC-903, June 1984.
+
+
+   MANAGEMENT REFERENCES:
+
+
+   [MGT:1]  "IAB Recommendations for the Development of Internet Network
+        Management Standards," V. Cerf, RFC-1052, April 1988.
+
+   [MGT:2]  "Structure and Identification of Management Information for
+        TCP/IP-based internets," M. Rose and K. McCloghrie, RFC-1065,
+        August 1988.
+
+   [MGT:3]  "Management Information Base for Network Management of
+        TCP/IP-based internets," M. Rose and K. McCloghrie, RFC-1066,
+        August 1988.
+
+   [MGT:4]  "A Simple Network Management Protocol," J. Case, M. Fedor,
+        M. Schoffstall, and C. Davin, RFC-1098, April 1989.
+
+   [MGT:5]  "The Common Management Information Services and Protocol
+        over TCP/IP," U. Warrier and L. Besaw, RFC-1095, April 1989.
+
+   [MGT:6]  "Report of the Second Ad Hoc Network Management Review
+        Group," V. Cerf, RFC-1109, August 1989.
+
+
+
+Internet Engineering Task Force                                [Page 97]
+
+
+
+
+RFC1123              SUPPORT SERVICES -- MANAGEMENT         October 1989
+
+
+Security Considerations
+
+   There are many security issues in the application and support
+   programs of host software, but a full discussion is beyond the scope
+   of this RFC.  Security-related issues are mentioned in sections
+   concerning TFTP (Sections 4.2.1, 4.2.3.4, 4.2.3.5), the SMTP VRFY and
+   EXPN commands (Section 5.2.3), the SMTP HELO command (5.2.5), and the
+   SMTP DATA command (Section 5.2.8).
+
+Author's Address
+
+   Robert Braden
+   USC/Information Sciences Institute
+   4676 Admiralty Way
+   Marina del Rey, CA 90292-6695
+
+   Phone: (213) 822 1511
+
+   EMail: Braden@ISI.EDU
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Internet Engineering Task Force                                [Page 98]
+
diff --git a/ext/picotcp/RFC/rfc1323.txt b/ext/picotcp/RFC/rfc1323.txt
new file mode 100644
index 0000000..356eaa8
--- /dev/null
+++ b/ext/picotcp/RFC/rfc1323.txt
@@ -0,0 +1,2075 @@
+
+
+
+
+
+
+Network Working Group                                        V. Jacobson
+Request for Comments: 1323                                           LBL
+Obsoletes: RFC 1072, RFC 1185                                  R. Braden
+                                                                     ISI
+                                                               D. Borman
+                                                           Cray Research
+                                                                May 1992
+
+
+                  TCP Extensions for High Performance
+
+Status of This Memo
+
+   This RFC specifies an IAB standards track protocol for the Internet
+   community, and requests discussion and suggestions for improvements.
+   Please refer to the current edition of the "IAB Official Protocol
+   Standards" for the standardization state and status of this protocol.
+   Distribution of this memo is unlimited.
+
+Abstract
+
+   This memo presents a set of TCP extensions to improve performance
+   over large bandwidth*delay product paths and to provide reliable
+   operation over very high-speed paths.  It defines new TCP options for
+   scaled windows and timestamps, which are designed to provide
+   compatible interworking with TCP's that do not implement the
+   extensions.  The timestamps are used for two distinct mechanisms:
+   RTTM (Round Trip Time Measurement) and PAWS (Protect Against Wrapped
+   Sequences).  Selective acknowledgments are not included in this memo.
+
+   This memo combines and supersedes RFC-1072 and RFC-1185, adding
+   additional clarification and more detailed specification.  Appendix C
+   summarizes the changes from the earlier RFCs.
+
+TABLE OF CONTENTS
+
+   1.  Introduction .................................................  2
+   2.  TCP Window Scale Option ......................................  8
+   3.  RTTM -- Round-Trip Time Measurement .......................... 11
+   4.  PAWS -- Protect Against Wrapped Sequence Numbers ............. 17
+   5.  Conclusions and Acknowledgments .............................. 25
+   6.  References ................................................... 25
+   APPENDIX A: Implementation Suggestions ........................... 27
+   APPENDIX B: Duplicates from Earlier Connection Incarnations ...... 27
+   APPENDIX C: Changes from RFC-1072, RFC-1185 ...................... 30
+   APPENDIX D: Summary of Notation .................................. 31
+   APPENDIX E: Event Processing ..................................... 32
+   Security Considerations .......................................... 37
+
+
+
+Jacobson, Braden, & Borman                                      [Page 1]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+   Authors' Addresses ............................................... 37
+
+1. INTRODUCTION
+
+   The TCP protocol [Postel81] was designed to operate reliably over
+   almost any transmission medium regardless of transmission rate,
+   delay, corruption, duplication, or reordering of segments.
+   Production TCP implementations currently adapt to transfer rates in
+   the range of 100 bps to 10**7 bps and round-trip delays in the range
+   1 ms to 100 seconds.  Recent work on TCP performance has shown that
+   TCP can work well over a variety of Internet paths, ranging from 800
+   Mbit/sec I/O channels to 300 bit/sec dial-up modems [Jacobson88a].
+
+   The introduction of fiber optics is resulting in ever-higher
+   transmission speeds, and the fastest paths are moving out of the
+   domain for which TCP was originally engineered.  This memo defines a
+   set of modest extensions to TCP to extend the domain of its
+   application to match this increasing network capability.  It is based
+   upon and obsoletes RFC-1072 [Jacobson88b] and RFC-1185 [Jacobson90b].
+
+   There is no one-line answer to the question: "How fast can TCP go?".
+   There are two separate kinds of issues, performance and reliability,
+   and each depends upon different parameters.  We discuss each in turn.
+
+   1.1  TCP Performance
+
+      TCP performance depends not upon the transfer rate itself, but
+      rather upon the product of the transfer rate and the round-trip
+      delay.  This "bandwidth*delay product" measures the amount of data
+      that would "fill the pipe"; it is the buffer space required at
+      sender and receiver to obtain maximum throughput on the TCP
+      connection over the path, i.e., the amount of unacknowledged data
+      that TCP must handle in order to keep the pipeline full.  TCP
+      performance problems arise when the bandwidth*delay product is
+      large.  We refer to an Internet path operating in this region as a
+      "long, fat pipe", and a network containing this path as an "LFN"
+      (pronounced "elephan(t)").
+
+      High-capacity packet satellite channels (e.g., DARPA's Wideband
+      Net) are LFN's.  For example, a DS1-speed satellite channel has a
+      bandwidth*delay product of 10**6 bits or more; this corresponds to
+      100 outstanding TCP segments of 1200 bytes each.  Terrestrial
+      fiber-optical paths will also fall into the LFN class; for
+      example, a cross-country delay of 30 ms at a DS3 bandwidth
+      (45Mbps) also exceeds 10**6 bits.
+
+      There are three fundamental performance problems with the current
+      TCP over LFN paths:
+
+
+
+Jacobson, Braden, & Borman                                      [Page 2]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+      (1)  Window Size Limit
+
+           The TCP header uses a 16 bit field to report the receive
+           window size to the sender.  Therefore, the largest window
+           that can be used is 2**16 = 65K bytes.
+
+           To circumvent this problem, Section 2 of this memo defines a
+           new TCP option, "Window Scale", to allow windows larger than
+           2**16.  This option defines an implicit scale factor, which
+           is used to multiply the window size value found in a TCP
+           header to obtain the true window size.
+
+      (2)  Recovery from Losses
+
+           Packet losses in an LFN can have a catastrophic effect on
+           throughput.  Until recently, properly-operating TCP
+           implementations would cause the data pipeline to drain with
+           every packet loss, and require a slow-start action to
+           recover.  Recently, the Fast Retransmit and Fast Recovery
+           algorithms [Jacobson90c] have been introduced.  Their
+           combined effect is to recover from one packet loss per
+           window, without draining the pipeline.  However, more than
+           one packet loss per window typically results in a
+           retransmission timeout and the resulting pipeline drain and
+           slow start.
+
+           Expanding the window size to match the capacity of an LFN
+           results in a corresponding increase of the probability of
+           more than one packet per window being dropped.  This could
+           have a devastating effect upon the throughput of TCP over an
+           LFN.  In addition, if a congestion control mechanism based
+           upon some form of random dropping were introduced into
+           gateways, randomly spaced packet drops would become common,
+           possible increasing the probability of dropping more than one
+           packet per window.
+
+           To generalize the Fast Retransmit/Fast Recovery mechanism to
+           handle multiple packets dropped per window, selective
+           acknowledgments are required.  Unlike the normal cumulative
+           acknowledgments of TCP, selective acknowledgments give the
+           sender a complete picture of which segments are queued at the
+           receiver and which have not yet arrived.  Some evidence in
+           favor of selective acknowledgments has been published
+           [NBS85], and selective acknowledgments have been included in
+           a number of experimental Internet protocols -- VMTP
+           [Cheriton88], NETBLT [Clark87], and RDP [Velten84], and
+           proposed for OSI TP4 [NBS85].  However, in the non-LFN
+           regime, selective acknowledgments reduce the number of
+
+
+
+Jacobson, Braden, & Borman                                      [Page 3]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+           packets retransmitted but do not otherwise improve
+           performance, making their complexity of questionable value.
+           However, selective acknowledgments are expected to become
+           much more important in the LFN regime.
+
+           RFC-1072 defined a new TCP "SACK" option to send a selective
+           acknowledgment.  However, there are important technical
+           issues to be worked out concerning both the format and
+           semantics of the SACK option.  Therefore, SACK has been
+           omitted from this package of extensions.  It is hoped that
+           SACK can "catch up" during the standardization process.
+
+      (3)  Round-Trip Measurement
+
+           TCP implements reliable data delivery by retransmitting
+           segments that are not acknowledged within some retransmission
+           timeout (RTO) interval.  Accurate dynamic determination of an
+           appropriate RTO is essential to TCP performance.  RTO is
+           determined by estimating the mean and variance of the
+           measured round-trip time (RTT), i.e., the time interval
+           between sending a segment and receiving an acknowledgment for
+           it [Jacobson88a].
+
+           Section 4 introduces a new TCP option, "Timestamps", and then
+           defines a mechanism using this option that allows nearly
+           every segment, including retransmissions, to be timed at
+           negligible computational cost.  We use the mnemonic RTTM
+           (Round Trip Time Measurement) for this mechanism, to
+           distinguish it from other uses of the Timestamps option.
+
+
+   1.2 TCP Reliability
+
+      Now we turn from performance to reliability.  High transfer rate
+      enters TCP performance through the bandwidth*delay product.
+      However, high transfer rate alone can threaten TCP reliability by
+      violating the assumptions behind the TCP mechanism for duplicate
+      detection and sequencing.
+
+      An especially serious kind of error may result from an accidental
+      reuse of TCP sequence numbers in data segments.  Suppose that an
+      "old duplicate segment", e.g., a duplicate data segment that was
+      delayed in Internet queues, is delivered to the receiver at the
+      wrong moment, so that its sequence numbers falls somewhere within
+      the current window.  There would be no checksum failure to warn of
+      the error, and the result could be an undetected corruption of the
+      data.  Reception of an old duplicate ACK segment at the
+      transmitter could be only slightly less serious: it is likely to
+
+
+
+Jacobson, Braden, & Borman                                      [Page 4]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+      lock up the connection so that no further progress can be made,
+      forcing an RST on the connection.
+
+      TCP reliability depends upon the existence of a bound on the
+      lifetime of a segment: the "Maximum Segment Lifetime" or MSL.  An
+      MSL is generally required by any reliable transport protocol,
+      since every sequence number field must be finite, and therefore
+      any sequence number may eventually be reused.  In the Internet
+      protocol suite, the MSL bound is enforced by an IP-layer
+      mechanism, the "Time-to-Live" or TTL field.
+
+      Duplication of sequence numbers might happen in either of two
+      ways:
+
+      (1)  Sequence number wrap-around on the current connection
+
+           A TCP sequence number contains 32 bits.  At a high enough
+           transfer rate, the 32-bit sequence space may be "wrapped"
+           (cycled) within the time that a segment is delayed in queues.
+
+      (2)  Earlier incarnation of the connection
+
+           Suppose that a connection terminates, either by a proper
+           close sequence or due to a host crash, and the same
+           connection (i.e., using the same pair of sockets) is
+           immediately reopened.  A delayed segment from the terminated
+           connection could fall within the current window for the new
+           incarnation and be accepted as valid.
+
+      Duplicates from earlier incarnations, Case (2), are avoided by
+      enforcing the current fixed MSL of the TCP spec, as explained in
+      Section 5.3 and Appendix B.   However, case (1), avoiding the
+      reuse of sequence numbers within the same connection, requires an
+      MSL bound that depends upon the transfer rate, and at high enough
+      rates, a new mechanism is required.
+
+      More specifically, if the maximum effective bandwidth at which TCP
+      is able to transmit over a particular path is B bytes per second,
+      then the following constraint must be satisfied for error-free
+      operation:
+
+          2**31 / B  > MSL (secs)                     [1]
+
+      The following table shows the value for Twrap = 2**31/B in
+      seconds, for some important values of the bandwidth B:
+
+
+
+
+
+
+Jacobson, Braden, & Borman                                      [Page 5]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+           Network       B*8          B         Twrap
+                      bits/sec   bytes/sec      secs
+           _______    _______      ______       ______
+
+           ARPANET       56kbps       7KBps    3*10**5 (~3.6 days)
+
+           DS1          1.5Mbps     190KBps    10**4 (~3 hours)
+
+           Ethernet      10Mbps    1.25MBps    1700 (~30 mins)
+
+           DS3           45Mbps     5.6MBps    380
+
+           FDDI         100Mbps    12.5MBps    170
+
+           Gigabit        1Gbps     125MBps    17
+
+
+      It is clear that wrap-around of the sequence space is not a
+      problem for 56kbps packet switching or even 10Mbps Ethernets.  On
+      the other hand, at DS3 and FDDI speeds, Twrap is comparable to the
+      2 minute MSL assumed by the TCP specification [Postel81].  Moving
+      towards gigabit speeds, Twrap becomes too small for reliable
+      enforcement by the Internet TTL mechanism.
+
+      The 16-bit window field of TCP limits the effective bandwidth B to
+      2**16/RTT, where RTT is the round-trip time in seconds
+      [McKenzie89].  If the RTT is large enough, this limits B to a
+      value that meets the constraint [1] for a large MSL value.  For
+      example, consider a transcontinental backbone with an RTT of 60ms
+      (set by the laws of physics).  With the bandwidth*delay product
+      limited to 64KB by the TCP window size, B is then limited to
+      1.1MBps, no matter how high the theoretical transfer rate of the
+      path.  This corresponds to cycling the sequence number space in
+      Twrap= 2000 secs, which is safe in today's Internet.
+
+      It is important to understand that the culprit is not the larger
+      window but rather the high bandwidth.  For example, consider a
+      (very large) FDDI LAN with a diameter of 10km.  Using the speed of
+      light, we can compute the RTT across the ring as
+      (2*10**4)/(3*10**8) = 67 microseconds, and the delay*bandwidth
+      product is then 833 bytes.  A TCP connection across this LAN using
+      a window of only 833 bytes will run at the full 100mbps and can
+      wrap the sequence space in about 3 minutes, very close to the MSL
+      of TCP.  Thus, high speed alone can cause a reliability problem
+      with sequence number wrap-around, even without extended windows.
+
+      Watson's Delta-T protocol [Watson81] includes network-layer
+      mechanisms for precise enforcement of an MSL.  In contrast, the IP
+
+
+
+Jacobson, Braden, & Borman                                      [Page 6]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+      mechanism for MSL enforcement is loosely defined and even more
+      loosely implemented in the Internet.  Therefore, it is unwise to
+      depend upon active enforcement of MSL for TCP connections, and it
+      is unrealistic to imagine setting MSL's smaller than the current
+      values (e.g., 120 seconds specified for TCP).
+
+      A possible fix for the problem of cycling the sequence space would
+      be to increase the size of the TCP sequence number field.  For
+      example, the sequence number field (and also the acknowledgment
+      field) could be expanded to 64 bits.  This could be done either by
+      changing the TCP header or by means of an additional option.
+
+      Section 5 presents a different mechanism, which we call PAWS
+      (Protect Against Wrapped Sequence numbers), to extend TCP
+      reliability to transfer rates well beyond the foreseeable upper
+      limit of network bandwidths.  PAWS uses the TCP Timestamps option
+      defined in Section 4 to protect against old duplicates from the
+      same connection.
+
+   1.3 Using TCP options
+
+      The extensions defined in this memo all use new TCP options.  We
+      must address two possible issues concerning the use of TCP
+      options: (1) compatibility and (2) overhead.
+
+      We must pay careful attention to compatibility, i.e., to
+      interoperation with existing implementations.  The only TCP option
+      defined previously, MSS, may appear only on a SYN segment.  Every
+      implementation should (and we expect that most will) ignore
+      unknown options on SYN segments.  However, some buggy TCP
+      implementation might be crashed by the first appearance of an
+      option on a non-SYN segment.  Therefore, for each of the
+      extensions defined below, TCP options will be sent on non-SYN
+      segments only when an exchange of options on the SYN segments has
+      indicated that both sides understand the extension.  Furthermore,
+      an extension option will be sent in a <SYN,ACK> segment only if
+      the corresponding option was received in the initial <SYN>
+      segment.
+
+      A question may be raised about the bandwidth and processing
+      overhead for TCP options.  Those options that occur on SYN
+      segments are not likely to cause a performance concern.  Opening a
+      TCP connection requires execution of significant special-case
+      code, and the processing of options is unlikely to increase that
+      cost significantly.
+
+      On the other hand, a Timestamps option may appear in any data or
+      ACK segment, adding 12 bytes to the 20-byte TCP header.  We
+
+
+
+Jacobson, Braden, & Borman                                      [Page 7]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+      believe that the bandwidth saved by reducing unnecessary
+      retransmissions will more than pay for the extra header bandwidth.
+
+      There is also an issue about the processing overhead for parsing
+      the variable byte-aligned format of options, particularly with a
+      RISC-architecture CPU.  To meet this concern, Appendix A contains
+      a recommended layout of the options in TCP headers to achieve
+      reasonable data field alignment.  In the spirit of Header
+      Prediction, a TCP can quickly test for this layout and if it is
+      verified then use a fast path.  Hosts that use this canonical
+      layout will effectively use the options as a set of fixed-format
+      fields appended to the TCP header.  However, to retain the
+      philosophical and protocol framework of TCP options, a TCP must be
+      prepared to parse an arbitrary options field, albeit with less
+      efficiency.
+
+      Finally, we observe that most of the mechanisms defined in this
+      memo are important for LFN's and/or very high-speed networks.  For
+      low-speed networks, it might be a performance optimization to NOT
+      use these mechanisms.  A TCP vendor concerned about optimal
+      performance over low-speed paths might consider turning these
+      extensions off for low-speed paths, or allow a user or
+      installation manager to disable them.
+
+
+2. TCP WINDOW SCALE OPTION
+
+   2.1  Introduction
+
+      The window scale extension expands the definition of the TCP
+      window to 32 bits and then uses a scale factor to carry this 32-
+      bit value in the 16-bit Window field of the TCP header (SEG.WND in
+      RFC-793).  The scale factor is carried in a new TCP option, Window
+      Scale.  This option is sent only in a SYN segment (a segment with
+      the SYN bit on), hence the window scale is fixed in each direction
+      when a connection is opened.  (Another design choice would be to
+      specify the window scale in every TCP segment.  It would be
+      incorrect to send a window scale option only when the scale factor
+      changed, since a TCP option in an acknowledgement segment will not
+      be delivered reliably (unless the ACK happens to be piggy-backed
+      on data in the other direction).  Fixing the scale when the
+      connection is opened has the advantage of lower overhead but the
+      disadvantage that the scale factor cannot be changed during the
+      connection.)
+
+      The maximum receive window, and therefore the scale factor, is
+      determined by the maximum receive buffer space.  In a typical
+      modern implementation, this maximum buffer space is set by default
+
+
+
+Jacobson, Braden, & Borman                                      [Page 8]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+      but can be overridden by a user program before a TCP connection is
+      opened.  This determines the scale factor, and therefore no new
+      user interface is needed for window scaling.
+
+   2.2  Window Scale Option
+
+      The three-byte Window Scale option may be sent in a SYN segment by
+      a TCP.  It has two purposes: (1) indicate that the TCP is prepared
+      to do both send and receive window scaling, and (2) communicate a
+      scale factor to be applied to its receive window.  Thus, a TCP
+      that is prepared to scale windows should send the option, even if
+      its own scale factor is 1.  The scale factor is limited to a power
+      of two and encoded logarithmically, so it may be implemented by
+      binary shift operations.
+
+
+      TCP Window Scale Option (WSopt):
+
+         Kind: 3 Length: 3 bytes
+
+                +---------+---------+---------+
+                | Kind=3  |Length=3 |shift.cnt|
+                +---------+---------+---------+
+
+
+         This option is an offer, not a promise; both sides must send
+         Window Scale options in their SYN segments to enable window
+         scaling in either direction.  If window scaling is enabled,
+         then the TCP that sent this option will right-shift its true
+         receive-window values by 'shift.cnt' bits for transmission in
+         SEG.WND.  The value 'shift.cnt' may be zero (offering to scale,
+         while applying a scale factor of 1 to the receive window).
+
+         This option may be sent in an initial <SYN> segment (i.e., a
+         segment with the SYN bit on and the ACK bit off).  It may also
+         be sent in a <SYN,ACK> segment, but only if a Window Scale op-
+         tion was received in the initial <SYN> segment.  A Window Scale
+         option in a segment without a SYN bit should be ignored.
+
+         The Window field in a SYN (i.e., a <SYN> or <SYN,ACK>) segment
+         itself is never scaled.
+
+   2.3  Using the Window Scale Option
+
+      A model implementation of window scaling is as follows, using the
+      notation of RFC-793 [Postel81]:
+
+      *    All windows are treated as 32-bit quantities for storage in
+
+
+
+Jacobson, Braden, & Borman                                      [Page 9]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+           the connection control block and for local calculations.
+           This includes the send-window (SND.WND) and the receive-
+           window (RCV.WND) values, as well as the congestion window.
+
+      *    The connection state is augmented by two window shift counts,
+           Snd.Wind.Scale and Rcv.Wind.Scale, to be applied to the
+           incoming and outgoing window fields, respectively.
+
+      *    If a TCP receives a <SYN> segment containing a Window Scale
+           option, it sends its own Window Scale option in the <SYN,ACK>
+           segment.
+
+      *    The Window Scale option is sent with shift.cnt = R, where R
+           is the value that the TCP would like to use for its receive
+           window.
+
+      *    Upon receiving a SYN segment with a Window Scale option
+           containing shift.cnt = S, a TCP sets Snd.Wind.Scale to S and
+           sets Rcv.Wind.Scale to R; otherwise, it sets both
+           Snd.Wind.Scale and Rcv.Wind.Scale to zero.
+
+      *    The window field (SEG.WND) in the header of every incoming
+           segment, with the exception of SYN segments, is left-shifted
+           by Snd.Wind.Scale bits before updating SND.WND:
+
+              SND.WND = SEG.WND << Snd.Wind.Scale
+
+           (assuming the other conditions of RFC793 are met, and using
+           the "C" notation "<<" for left-shift).
+
+      *    The window field (SEG.WND) of every outgoing segment, with
+           the exception of SYN segments, is right-shifted by
+           Rcv.Wind.Scale bits:
+
+              SEG.WND = RCV.WND >> Rcv.Wind.Scale.
+
+
+      TCP determines if a data segment is "old" or "new" by testing
+      whether its sequence number is within 2**31 bytes of the left edge
+      of the window, and if it is not, discarding the data as "old".  To
+      insure that new data is never mistakenly considered old and vice-
+      versa, the left edge of the sender's window has to be at most
+      2**31 away from the right edge of the receiver's window.
+      Similarly with the sender's right edge and receiver's left edge.
+      Since the right and left edges of either the sender's or
+      receiver's window differ by the window size, and since the sender
+      and receiver windows can be out of phase by at most the window
+      size, the above constraints imply that 2 * the max window size
+
+
+
+Jacobson, Braden, & Borman                                     [Page 10]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+      must be less than 2**31, or
+
+           max window < 2**30
+
+      Since the max window is 2**S (where S is the scaling shift count)
+      times at most 2**16 - 1 (the maximum unscaled window), the maximum
+      window is guaranteed to be < 2*30 if S <= 14.  Thus, the shift
+      count must be limited to 14 (which allows windows of 2**30 = 1
+      Gbyte).  If a Window Scale option is received with a shift.cnt
+      value exceeding 14, the TCP should log the error but use 14
+      instead of the specified value.
+
+      The scale factor applies only to the Window field as transmitted
+      in the TCP header; each TCP using extended windows will maintain
+      the window values locally as 32-bit numbers.  For example, the
+      "congestion window" computed by Slow Start and Congestion
+      Avoidance is not affected by the scale factor, so window scaling
+      will not introduce quantization into the congestion window.
+
+3.  RTTM: ROUND-TRIP TIME MEASUREMENT
+
+   3.1  Introduction
+
+      Accurate and current RTT estimates are necessary to adapt to
+      changing traffic conditions and to avoid an instability known as
+      "congestion collapse" [Nagle84] in a busy network.  However,
+      accurate measurement of RTT may be difficult both in theory and in
+      implementation.
+
+      Many TCP implementations base their RTT measurements upon a sample
+      of only one packet per window.  While this yields an adequate
+      approximation to the RTT for small windows, it results in an
+      unacceptably poor RTT estimate for an LFN.  If we look at RTT
+      estimation as a signal processing problem (which it is), a data
+      signal at some frequency, the packet rate, is being sampled at a
+      lower frequency, the window rate.  This lower sampling frequency
+      violates Nyquist's criteria and may therefore introduce "aliasing"
+      artifacts into the estimated RTT [Hamming77].
+
+      A good RTT estimator with a conservative retransmission timeout
+      calculation can tolerate aliasing when the sampling frequency is
+      "close" to the data frequency.   For example, with a window of 8
+      packets, the sample rate is 1/8 the data frequency -- less than an
+      order of magnitude different.  However, when the window is tens or
+      hundreds of packets, the RTT estimator may be seriously in error,
+      resulting in spurious retransmissions.
+
+      If there are dropped packets, the problem becomes worse.  Zhang
+
+
+
+Jacobson, Braden, & Borman                                     [Page 11]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+      [Zhang86], Jain [Jain86] and Karn [Karn87] have shown that it is
+      not possible to accumulate reliable RTT estimates if retransmitted
+      segments are included in the estimate.  Since a full window of
+      data will have been transmitted prior to a retransmission, all of
+      the segments in that window will have to be ACKed before the next
+      RTT sample can be taken.  This means at least an additional
+      window's worth of time between RTT measurements and, as the error
+      rate approaches one per window of data (e.g., 10**-6 errors per
+      bit for the Wideband satellite network), it becomes effectively
+      impossible to obtain a valid RTT measurement.
+
+      A solution to these problems, which actually simplifies the sender
+      substantially, is as follows: using TCP options, the sender places
+      a timestamp in each data segment, and the receiver reflects these
+      timestamps back in ACK segments.  Then a single subtract gives the
+      sender an accurate RTT measurement for every ACK segment (which
+      will correspond to every other data segment, with a sensible
+      receiver).  We call this the RTTM (Round-Trip Time Measurement)
+      mechanism.
+
+      It is vitally important to use the RTTM mechanism with big
+      windows; otherwise, the door is opened to some dangerous
+      instabilities due to aliasing.  Furthermore, the option is
+      probably useful for all TCP's, since it simplifies the sender.
+
+   3.2  TCP Timestamps Option
+
+      TCP is a symmetric protocol, allowing data to be sent at any time
+      in either direction, and therefore timestamp echoing may occur in
+      either direction.  For simplicity and symmetry, we specify that
+      timestamps always be sent and echoed in both directions.  For
+      efficiency, we combine the timestamp and timestamp reply fields
+      into a single TCP Timestamps Option.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Jacobson, Braden, & Borman                                     [Page 12]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+      TCP Timestamps Option (TSopt):
+
+         Kind: 8
+
+         Length: 10 bytes
+
+          +-------+-------+---------------------+---------------------+
+          |Kind=8 |  10   |   TS Value (TSval)  |TS Echo Reply (TSecr)|
+          +-------+-------+---------------------+---------------------+
+              1       1              4                     4
+
+         The Timestamps option carries two four-byte timestamp fields.
+         The Timestamp Value field (TSval) contains the current value of
+         the timestamp clock of the TCP sending the option.
+
+         The Timestamp Echo Reply field (TSecr) is only valid if the ACK
+         bit is set in the TCP header; if it is valid, it echos a times-
+         tamp value that was sent by the remote TCP in the TSval field
+         of a Timestamps option.  When TSecr is not valid, its value
+         must be zero.  The TSecr value will generally be from the most
+         recent Timestamp option that was received; however, there are
+         exceptions that are explained below.
+
+         A TCP may send the Timestamps option (TSopt) in an initial
+         <SYN> segment (i.e., segment containing a SYN bit and no ACK
+         bit), and may send a TSopt in other segments only if it re-
+         ceived a TSopt in the initial <SYN> segment for the connection.
+
+   3.3 The RTTM Mechanism
+
+      The timestamp value to be sent in TSval is to be obtained from a
+      (virtual) clock that we call the "timestamp clock".  Its values
+      must be at least approximately proportional to real time, in order
+      to measure actual RTT.
+
+      The following example illustrates a one-way data flow with
+      segments arriving in sequence without loss.  Here A, B, C...
+      represent data blocks occupying successive blocks of sequence
+      numbers, and ACK(A),...  represent the corresponding cumulative
+      acknowledgments.  The two timestamp fields of the Timestamps
+      option are shown symbolically as <TSval= x,TSecr=y>.  Each TSecr
+      field contains the value most recently received in a TSval field.
+
+
+
+
+
+
+
+
+
+Jacobson, Braden, & Borman                                     [Page 13]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+
+         TCP  A                                          TCP B
+
+                        <A,TSval=1,TSecr=120> ------>
+
+             <---- <ACK(A),TSval=127,TSecr=1>
+
+                        <B,TSval=5,TSecr=127> ------>
+
+             <---- <ACK(B),TSval=131,TSecr=5>
+
+             . . . . . . . . . . . . . . . . . . . . . .
+
+                        <C,TSval=65,TSecr=131> ------>
+
+             <---- <ACK(C),TSval=191,TSecr=65>
+
+                        (etc)
+
+
+      The dotted line marks a pause (60 time units long) in which A had
+      nothing to send.  Note that this pause inflates the RTT which B
+      could infer from receiving TSecr=131 in data segment C.  Thus, in
+      one-way data flows, RTTM in the reverse direction measures a value
+      that is inflated by gaps in sending data.  However, the following
+      rule prevents a resulting inflation of the measured RTT:
+
+           A TSecr value received in a segment is used to update the
+           averaged RTT measurement only if the segment acknowledges
+           some new data, i.e., only if it advances the left edge of the
+           send window.
+
+      Since TCP B is not sending data, the data segment C does not
+      acknowledge any new data when it arrives at B.  Thus, the inflated
+      RTTM measurement is not used to update B's RTTM measurement.
+
+   3.4  Which Timestamp to Echo
+
+      If more than one Timestamps option is received before a reply
+      segment is sent, the TCP must choose only one of the TSvals to
+      echo, ignoring the others.  To minimize the state kept in the
+      receiver (i.e., the number of unprocessed TSvals), the receiver
+      should be required to retain at most one timestamp in the
+      connection control block.
+
+
+
+
+
+
+
+Jacobson, Braden, & Borman                                     [Page 14]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+      There are three situations to consider:
+
+      (A)  Delayed ACKs.
+
+           Many TCP's acknowledge only every Kth segment out of a group
+           of segments arriving within a short time interval; this
+           policy is known generally as "delayed ACKs".  The data-sender
+           TCP must measure the effective RTT, including the additional
+           time due to delayed ACKs, or else it will retransmit
+           unnecessarily.  Thus, when delayed ACKs are in use, the
+           receiver should reply with the TSval field from the earliest
+           unacknowledged segment.
+
+      (B)  A hole in the sequence space (segment(s) have been lost).
+
+           The sender will continue sending until the window is filled,
+           and the receiver may be generating ACKs as these out-of-order
+           segments arrive (e.g., to aid "fast retransmit").
+
+           The lost segment is probably a sign of congestion, and in
+           that situation the sender should be conservative about
+           retransmission.  Furthermore, it is better to overestimate
+           than underestimate the RTT.  An ACK for an out-of-order
+           segment should therefore contain the timestamp from the most
+           recent segment that advanced the window.
+
+           The same situation occurs if segments are re-ordered by the
+           network.
+
+      (C)  A filled hole in the sequence space.
+
+           The segment that fills the hole represents the most recent
+           measurement of the network characteristics.  On the other
+           hand, an RTT computed from an earlier segment would probably
+           include the sender's retransmit time-out, badly biasing the
+           sender's average RTT estimate.  Thus, the timestamp from the
+           latest segment (which filled the hole) must be echoed.
+
+      An algorithm that covers all three cases is described in the
+      following rules for Timestamps option processing on a synchronized
+      connection:
+
+      (1)  The connection state is augmented with two 32-bit slots:
+           TS.Recent holds a timestamp to be echoed in TSecr whenever a
+           segment is sent, and Last.ACK.sent holds the ACK field from
+           the last segment sent.  Last.ACK.sent will equal RCV.NXT
+           except when ACKs have been delayed.
+
+
+
+
+Jacobson, Braden, & Borman                                     [Page 15]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+      (2)  If Last.ACK.sent falls within the range of sequence numbers
+           of an incoming segment:
+
+              SEG.SEQ <= Last.ACK.sent < SEG.SEQ + SEG.LEN
+
+           then the TSval from the segment is copied to TS.Recent;
+           otherwise, the TSval is ignored.
+
+      (3)  When a TSopt is sent, its TSecr field is set to the current
+           TS.Recent value.
+
+      The following examples illustrate these rules.  Here A, B, C...
+      represent data segments occupying successive blocks of sequence
+      numbers, and ACK(A),...  represent the corresponding
+      acknowledgment segments.  Note that ACK(A) has the same sequence
+      number as B.  We show only one direction of timestamp echoing, for
+      clarity.
+
+
+      o    Packets arrive in sequence, and some of the ACKs are delayed.
+
+           By Case (A), the timestamp from the oldest unacknowledged
+           segment is echoed.
+
+                                                      TS.Recent
+                    <A, TSval=1> ------------------->
+                                                          1
+                    <B, TSval=2> ------------------->
+                                                          1
+                    <C, TSval=3> ------------------->
+                                                          1
+                             <---- <ACK(C), TSecr=1>
+                    (etc)
+
+      o    Packets arrive out of order, and every packet is
+           acknowledged.
+
+           By Case (B), the timestamp from the last segment that
+           advanced the left window edge is echoed, until the missing
+           segment arrives; it is echoed according to Case (C).  The
+           same sequence would occur if segments B and D were lost and
+           retransmitted..
+
+
+
+
+
+
+
+
+
+Jacobson, Braden, & Borman                                     [Page 16]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+                                                      TS.Recent
+                    <A, TSval=1> ------------------->
+                                                          1
+                             <---- <ACK(A), TSecr=1>
+                                                          1
+                    <C, TSval=3> ------------------->
+                                                          1
+                             <---- <ACK(A), TSecr=1>
+                                                          1
+                    <B, TSval=2> ------------------->
+                                                          2
+                             <---- <ACK(C), TSecr=2>
+                                                          2
+                    <E, TSval=5> ------------------->
+                                                          2
+                             <---- <ACK(C), TSecr=2>
+                                                          2
+                    <D, TSval=4> ------------------->
+                                                          4
+                             <---- <ACK(E), TSecr=4>
+                    (etc)
+
+
+
+
+4.  PAWS: PROTECT AGAINST WRAPPED SEQUENCE NUMBERS
+
+   4.1  Introduction
+
+      Section 4.2 describes a simple mechanism to reject old duplicate
+      segments that might corrupt an open TCP connection; we call this
+      mechanism PAWS (Protect Against Wrapped Sequence numbers).  PAWS
+      operates within a single TCP connection, using state that is saved
+      in the connection control block.  Section 4.3 and Appendix C
+      discuss the implications of the PAWS mechanism for avoiding old
+      duplicates from previous incarnations of the same connection.
+
+   4.2  The PAWS Mechanism
+
+      PAWS uses the same TCP Timestamps option as the RTTM mechanism
+      described earlier, and assumes that every received TCP segment
+      (including data and ACK segments) contains a timestamp SEG.TSval
+      whose values are monotone non-decreasing in time.  The basic idea
+      is that a segment can be discarded as an old duplicate if it is
+      received with a timestamp SEG.TSval less than some timestamp
+      recently received on this connection.
+
+      In both the PAWS and the RTTM mechanism, the "timestamps" are 32-
+
+
+
+Jacobson, Braden, & Borman                                     [Page 17]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+      bit unsigned integers in a modular 32-bit space.  Thus, "less
+      than" is defined the same way it is for TCP sequence numbers, and
+      the same implementation techniques apply.  If s and t are
+      timestamp values, s < t if 0 < (t - s) < 2**31, computed in
+      unsigned 32-bit arithmetic.
+
+      The choice of incoming timestamps to be saved for this comparison
+      must guarantee a value that is monotone increasing.  For example,
+      we might save the timestamp from the segment that last advanced
+      the left edge of the receive window, i.e., the most recent in-
+      sequence segment.  Instead, we choose the value TS.Recent
+      introduced in Section 3.4 for the RTTM mechanism, since using a
+      common value for both PAWS and RTTM simplifies the implementation
+      of both.  As Section 3.4 explained, TS.Recent differs from the
+      timestamp from the last in-sequence segment only in the case of
+      delayed ACKs, and therefore by less than one window.  Either
+      choice will therefore protect against sequence number wrap-around.
+
+      RTTM was specified in a symmetrical manner, so that TSval
+      timestamps are carried in both data and ACK segments and are
+      echoed in TSecr fields carried in returning ACK or data segments.
+      PAWS submits all incoming segments to the same test, and therefore
+      protects against duplicate ACK segments as well as data segments.
+      (An alternative un-symmetric algorithm would protect against old
+      duplicate ACKs: the sender of data would reject incoming ACK
+      segments whose TSecr values were less than the TSecr saved from
+      the last segment whose ACK field advanced the left edge of the
+      send window.  This algorithm was deemed to lack economy of
+      mechanism and symmetry.)
+
+      TSval timestamps sent on {SYN} and {SYN,ACK} segments are used to
+      initialize PAWS.  PAWS protects against old duplicate non-SYN
+      segments, and duplicate SYN segments received while there is a
+      synchronized connection.  Duplicate {SYN} and {SYN,ACK} segments
+      received when there is no connection will be discarded by the
+      normal 3-way handshake and sequence number checks of TCP.
+
+      It is recommended that RST segments NOT carry timestamps, and that
+      RST segments be acceptable regardless of their timestamp.  Old
+      duplicate RST segments should be exceedingly unlikely, and their
+      cleanup function should take precedence over timestamps.
+
+      4.2.1  Basic PAWS Algorithm
+
+         The PAWS algorithm requires the following processing to be
+         performed on all incoming segments for a synchronized
+         connection:
+
+
+
+
+Jacobson, Braden, & Borman                                     [Page 18]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+         R1)  If there is a Timestamps option in the arriving segment
+              and SEG.TSval < TS.Recent and if TS.Recent is valid (see
+              later discussion), then treat the arriving segment as not
+              acceptable:
+
+                   Send an acknowledgement in reply as specified in
+                   RFC-793 page 69 and drop the segment.
+
+                   Note: it is necessary to send an ACK segment in order
+                   to retain TCP's mechanisms for detecting and
+                   recovering from half-open connections.  For example,
+                   see Figure 10 of RFC-793.
+
+         R2)  If the segment is outside the window, reject it (normal
+              TCP processing)
+
+         R3)  If an arriving segment satisfies: SEG.SEQ <= Last.ACK.sent
+              (see Section 3.4), then record its timestamp in TS.Recent.
+
+         R4)  If an arriving segment is in-sequence (i.e., at the left
+              window edge), then accept it normally.
+
+         R5)  Otherwise, treat the segment as a normal in-window, out-
+              of-sequence TCP segment (e.g., queue it for later delivery
+              to the user).
+
+         Steps R2, R4, and R5 are the normal TCP processing steps
+         specified by RFC-793.
+
+         It is important to note that the timestamp is checked only when
+         a segment first arrives at the receiver, regardless of whether
+         it is in-sequence or it must be queued for later delivery.
+         Consider the following example.
+
+              Suppose the segment sequence: A.1, B.1, C.1, ..., Z.1 has
+              been sent, where the letter indicates the sequence number
+              and the digit represents the timestamp.  Suppose also that
+              segment B.1 has been lost.  The timestamp in TS.TStamp is
+              1 (from A.1), so C.1, ..., Z.1 are considered acceptable
+              and are queued.  When B is retransmitted as segment B.2
+              (using the latest timestamp), it fills the hole and causes
+              all the segments through Z to be acknowledged and passed
+              to the user.  The timestamps of the queued segments are
+              *not* inspected again at this time, since they have
+              already been accepted.  When B.2 is accepted, TS.Stamp is
+              set to 2.
+
+         This rule allows reasonable performance under loss.  A full
+
+
+
+Jacobson, Braden, & Borman                                     [Page 19]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+         window of data is in transit at all times, and after a loss a
+         full window less one packet will show up out-of-sequence to be
+         queued at the receiver (e.g., up to ~2**30 bytes of data); the
+         timestamp option must not result in discarding this data.
+
+         In certain unlikely circumstances, the algorithm of rules R1-R4
+         could lead to discarding some segments unnecessarily, as shown
+         in the following example:
+
+              Suppose again that segments: A.1, B.1, C.1, ..., Z.1 have
+              been sent in sequence and that segment B.1 has been lost.
+              Furthermore, suppose delivery of some of C.1, ... Z.1 is
+              delayed until AFTER the retransmission B.2 arrives at the
+              receiver.  These delayed segments will be discarded
+              unnecessarily when they do arrive, since their timestamps
+              are now out of date.
+
+         This case is very unlikely to occur.  If the retransmission was
+         triggered by a timeout, some of the segments C.1, ... Z.1 must
+         have been delayed longer than the RTO time.  This is presumably
+         an unlikely event, or there would be many spurious timeouts and
+         retransmissions.  If B's retransmission was triggered by the
+         "fast retransmit" algorithm, i.e., by duplicate ACKs, then the
+         queued segments that caused these ACKs must have been received
+         already.
+
+         Even if a segment were delayed past the RTO, the Fast
+         Retransmit mechanism [Jacobson90c] will cause the delayed
+         packets to be retransmitted at the same time as B.2, avoiding
+         an extra RTT and therefore causing a very small performance
+         penalty.
+
+         We know of no case with a significant probability of occurrence
+         in which timestamps will cause performance degradation by
+         unnecessarily discarding segments.
+
+      4.2.2  Timestamp Clock
+
+         It is important to understand that the PAWS algorithm does not
+         require clock synchronization between sender and receiver.  The
+         sender's timestamp clock is used to stamp the segments, and the
+         sender uses the echoed timestamp to measure RTT's.  However,
+         the receiver treats the timestamp as simply a monotone-
+         increasing serial number, without any necessary connection to
+         its clock.  From the receiver's viewpoint, the timestamp is
+         acting as a logical extension of the high-order bits of the
+         sequence number.
+
+
+
+
+Jacobson, Braden, & Borman                                     [Page 20]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+         The receiver algorithm does place some requirements on the
+         frequency of the timestamp clock.
+
+         (a)  The timestamp clock must not be "too slow".
+
+              It must tick at least once for each 2**31 bytes sent.  In
+              fact, in order to be useful to the sender for round trip
+              timing, the clock should tick at least once per window's
+              worth of data, and even with the RFC-1072 window
+              extension, 2**31 bytes must be at least two windows.
+
+              To make this more quantitative, any clock faster than 1
+              tick/sec will reject old duplicate segments for link
+              speeds of ~8 Gbps.  A 1ms timestamp clock will work at
+              link speeds up to 8 Tbps (8*10**12) bps!
+
+         (b)  The timestamp clock must not be "too fast".
+
+              Its recycling time must be greater than MSL seconds.
+              Since the clock (timestamp) is 32 bits and the worst-case
+              MSL is 255 seconds, the maximum acceptable clock frequency
+              is one tick every 59 ns.
+
+              However, it is desirable to establish a much longer
+              recycle period, in order to handle outdated timestamps on
+              idle connections (see Section 4.2.3), and to relax the MSL
+              requirement for preventing sequence number wrap-around.
+              With a 1 ms timestamp clock, the 32-bit timestamp will
+              wrap its sign bit in 24.8 days.  Thus, it will reject old
+              duplicates on the same connection if MSL is 24.8 days or
+              less.  This appears to be a very safe figure; an MSL of
+              24.8 days or longer can probably be assumed by the gateway
+              system without requiring precise MSL enforcement by the
+              TTL value in the IP layer.
+
+         Based upon these considerations, we choose a timestamp clock
+         frequency in the range 1 ms to 1 sec per tick.  This range also
+         matches the requirements of the RTTM mechanism, which does not
+         need much more resolution than the granularity of the
+         retransmit timer, e.g., tens or hundreds of milliseconds.
+
+         The PAWS mechanism also puts a strong monotonicity requirement
+         on the sender's timestamp clock.  The method of implementation
+         of the timestamp clock to meet this requirement depends upon
+         the system hardware and software.
+
+         *    Some hosts have a hardware clock that is guaranteed to be
+              monotonic between hardware resets.
+
+
+
+Jacobson, Braden, & Borman                                     [Page 21]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+         *    A clock interrupt may be used to simply increment a binary
+              integer by 1 periodically.
+
+         *    The timestamp clock may be derived from a system clock
+              that is subject to being abruptly changed, by adding a
+              variable offset value.  This offset is initialized to
+              zero.  When a new timestamp clock value is needed, the
+              offset can be adjusted as necessary to make the new value
+              equal to or larger than the previous value (which was
+              saved for this purpose).
+
+
+      4.2.3  Outdated Timestamps
+
+         If a connection remains idle long enough for the timestamp
+         clock of the other TCP to wrap its sign bit, then the value
+         saved in TS.Recent will become too old; as a result, the PAWS
+         mechanism will cause all subsequent segments to be rejected,
+         freezing the connection (until the timestamp clock wraps its
+         sign bit again).
+
+         With the chosen range of timestamp clock frequencies (1 sec to
+         1 ms), the time to wrap the sign bit will be between 24.8 days
+         and 24800 days.  A TCP connection that is idle for more than 24
+         days and then comes to life is exceedingly unusual.  However,
+         it is undesirable in principle to place any limitation on TCP
+         connection lifetimes.
+
+         We therefore require that an implementation of PAWS include a
+         mechanism to "invalidate" the TS.Recent value when a connection
+         is idle for more than 24 days.  (An alternative solution to the
+         problem of outdated timestamps would be to send keepalive
+         segments at a very low rate, but still more often than the
+         wrap-around time for timestamps, e.g., once a day.  This would
+         impose negligible overhead.  However, the TCP specification has
+         never included keepalives, so the solution based upon
+         invalidation was chosen.)
+
+         Note that a TCP does not know the frequency, and therefore, the
+         wraparound time, of the other TCP, so it must assume the worst.
+         The validity of TS.Recent needs to be checked only if the basic
+         PAWS timestamp check fails, i.e., only if SEG.TSval <
+         TS.Recent.  If TS.Recent is found to be invalid, then the
+         segment is accepted, regardless of the failure of the timestamp
+         check, and rule R3 updates TS.Recent with the TSval from the
+         new segment.
+
+         To detect how long the connection has been idle, the TCP may
+
+
+
+Jacobson, Braden, & Borman                                     [Page 22]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+         update a clock or timestamp value associated with the
+         connection whenever TS.Recent is updated, for example.  The
+         details will be implementation-dependent.
+
+      4.2.4  Header Prediction
+
+         "Header prediction" [Jacobson90a] is a high-performance
+         transport protocol implementation technique that is most
+         important for high-speed links.  This technique optimizes the
+         code for the most common case, receiving a segment correctly
+         and in order.  Using header prediction, the receiver asks the
+         question, "Is this segment the next in sequence?"  This
+         question can be answered in fewer machine instructions than the
+         question, "Is this segment within the window?"
+
+         Adding header prediction to our timestamp procedure leads to
+         the following recommended sequence for processing an arriving
+         TCP segment:
+
+         H1)  Check timestamp (same as step R1 above)
+
+         H2)  Do header prediction: if segment is next in sequence and
+              if there are no special conditions requiring additional
+              processing, accept the segment, record its timestamp, and
+              skip H3.
+
+         H3)  Process the segment normally, as specified in RFC-793.
+              This includes dropping segments that are outside the win-
+              dow and possibly sending acknowledgments, and queueing
+              in-window, out-of-sequence segments.
+
+         Another possibility would be to interchange steps H1 and H2,
+         i.e., to perform the header prediction step H2 FIRST, and
+         perform H1 and H3 only when header prediction fails.  This
+         could be a performance improvement, since the timestamp check
+         in step H1 is very unlikely to fail, and it requires interval
+         arithmetic on a finite field, a relatively expensive operation.
+         To perform this check on every single segment is contrary to
+         the philosophy of header prediction.  We believe that this
+         change might reduce CPU time for TCP protocol processing by up
+         to 5-10% on high-speed networks.
+
+         However, putting H2 first would create a hazard: a segment from
+         2**32 bytes in the past might arrive at exactly the wrong time
+         and be accepted mistakenly by the header-prediction step.  The
+         following reasoning has been introduced [Jacobson90b] to show
+         that the probability of this failure is negligible.
+
+
+
+
+Jacobson, Braden, & Borman                                     [Page 23]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+              If all segments are equally likely to show up as old
+              duplicates, then the probability of an old duplicate
+              exactly matching the left window edge is the maximum
+              segment size (MSS) divided by the size of the sequence
+              space.  This ratio must be less than 2**-16, since MSS
+              must be < 2**16; for example, it will be (2**12)/(2**32) =
+              2**-20 for an FDDI link.  However, the older a segment is,
+              the less likely it is to be retained in the Internet, and
+              under any reasonable model of segment lifetime the
+              probability of an old duplicate exactly at the left window
+              edge must be much smaller than 2**-16.
+
+              The 16 bit TCP checksum also allows a basic unreliability
+              of one part in 2**16.  A protocol mechanism whose
+              reliability exceeds the reliability of the TCP checksum
+              should be considered "good enough", i.e., it won't
+              contribute significantly to the overall error rate.  We
+              therefore believe we can ignore the problem of an old
+              duplicate being accepted by doing header prediction before
+              checking the timestamp.
+
+         However, this probabilistic argument is not universally
+         accepted, and the consensus at present is that the performance
+         gain does not justify the hazard in the general case.  It is
+         therefore recommended that H2 follow H1.
+
+   4.3.  Duplicates from Earlier Incarnations of Connection
+
+      The PAWS mechanism protects against errors due to sequence number
+      wrap-around on high-speed connection.  Segments from an earlier
+      incarnation of the same connection are also a potential cause of
+      old duplicate errors.  In both cases, the TCP mechanisms to
+      prevent such errors depend upon the enforcement of a maximum
+      segment lifetime (MSL) by the Internet (IP) layer (see Appendix of
+      RFC-1185 for a detailed discussion).  Unlike the case of sequence
+      space wrap-around, the MSL required to prevent old duplicate
+      errors from earlier incarnations does not depend upon the transfer
+      rate.  If the IP layer enforces the recommended 2 minute MSL of
+      TCP, and if the TCP rules are followed, TCP connections will be
+      safe from earlier incarnations, no matter how high the network
+      speed.  Thus, the PAWS mechanism is not required for this case.
+
+      We may still ask whether the PAWS mechanism can provide additional
+      security against old duplicates from earlier connections, allowing
+      us to relax the enforcement of MSL by the IP layer.  Appendix B
+      explores this question, showing that further assumptions and/or
+      mechanisms are required, beyond those of PAWS.  This is not part
+      of the current extension.
+
+
+
+Jacobson, Braden, & Borman                                     [Page 24]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+5.  CONCLUSIONS AND ACKNOWLEDGMENTS
+
+   This memo presented a set of extensions to TCP to provide efficient
+   operation over large-bandwidth*delay-product paths and reliable
+   operation over very high-speed paths.  These extensions are designed
+   to provide compatible interworking with TCP's that do not implement
+   the extensions.
+
+   These mechanisms are implemented using new TCP options for scaled
+   windows and timestamps.  The timestamps are used for two distinct
+   mechanisms: RTTM (Round Trip Time Measurement) and PAWS (Protect
+   Against Wrapped Sequences).
+
+   The Window Scale option was originally suggested by Mike St. Johns of
+   USAF/DCA.  The present form of the option was suggested by Mike
+   Karels of UC Berkeley in response to a more cumbersome scheme defined
+   by Van Jacobson.  Lixia Zhang helped formulate the PAWS mechanism
+   description in RFC-1185.
+
+   Finally, much of this work originated as the result of discussions
+   within the End-to-End Task Force on the theoretical limitations of
+   transport protocols in general and TCP in particular.  More recently,
+   task force members and other on the end2end-interest list have made
+   valuable contributions by pointing out flaws in the algorithms and
+   the documentation.  The authors are grateful for all these
+   contributions.
+
+6.  REFERENCES
+
+      [Clark87]  Clark, D., Lambert, M., and L. Zhang, "NETBLT: A Bulk
+      Data Transfer Protocol", RFC 998, MIT, March 1987.
+
+      [Garlick77]  Garlick, L., R. Rom, and J. Postel, "Issues in
+      Reliable Host-to-Host Protocols", Proc. Second Berkeley Workshop
+      on Distributed Data Management and Computer Networks, May 1977.
+
+      [Hamming77]  Hamming, R., "Digital Filters", ISBN 0-13-212571-4,
+      Prentice Hall, Englewood Cliffs, N.J., 1977.
+
+      [Cheriton88]  Cheriton, D., "VMTP: Versatile Message Transaction
+      Protocol", RFC 1045, Stanford University, February 1988.
+
+      [Jacobson88a] Jacobson, V., "Congestion Avoidance and Control",
+      SIGCOMM '88, Stanford, CA., August 1988.
+
+      [Jacobson88b]  Jacobson, V., and R. Braden, "TCP Extensions for
+      Long-Delay Paths", RFC-1072, LBL and USC/Information Sciences
+      Institute, October 1988.
+
+
+
+Jacobson, Braden, & Borman                                     [Page 25]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+      [Jacobson90a]  Jacobson, V., "4BSD Header Prediction", ACM
+      Computer Communication Review, April 1990.
+
+      [Jacobson90b]  Jacobson, V., Braden, R., and Zhang, L., "TCP
+      Extension for High-Speed Paths", RFC-1185, LBL and USC/Information
+      Sciences Institute, October 1990.
+
+      [Jacobson90c]  Jacobson, V., "Modified TCP congestion avoidance
+      algorithm", Message to end2end-interest mailing list, April 1990.
+
+      [Jain86]  Jain, R., "Divergence of Timeout Algorithms for Packet
+      Retransmissions", Proc. Fifth Phoenix Conf. on Comp. and Comm.,
+      Scottsdale, Arizona, March 1986.
+
+      [Karn87]  Karn, P. and C. Partridge, "Estimating Round-Trip Times
+      in Reliable Transport Protocols", Proc. SIGCOMM '87, Stowe, VT,
+      August 1987.
+
+      [McKenzie89]  McKenzie, A., "A Problem with the TCP Big Window
+      Option", RFC 1110, BBN STC, August 1989.
+
+      [Nagle84]  Nagle, J., "Congestion Control in IP/TCP
+      Internetworks", RFC 896, FACC, January 1984.
+
+      [NBS85]  Colella, R., Aronoff, R., and K. Mills, "Performance
+      Improvements for ISO Transport", Ninth Data Comm Symposium,
+      published in ACM SIGCOMM Comp Comm Review, vol. 15, no. 5,
+      September 1985.
+
+      [Postel81]  Postel, J., "Transmission Control Protocol - DARPA
+      Internet Program Protocol Specification", RFC 793, DARPA,
+      September 1981.
+
+      [Velten84] Velten, D., Hinden, R., and J. Sax, "Reliable Data
+      Protocol", RFC 908, BBN, July 1984.
+
+      [Watson81]  Watson, R., "Timer-based Mechanisms in Reliable
+      Transport Protocol Connection Management", Computer Networks, Vol.
+      5, 1981.
+
+      [Zhang86]  Zhang, L., "Why TCP Timers Don't Work Well", Proc.
+      SIGCOMM '86, Stowe, Vt., August 1986.
+
+
+
+
+
+
+
+
+
+Jacobson, Braden, & Borman                                     [Page 26]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+APPENDIX A:  IMPLEMENTATION SUGGESTIONS
+
+   The following layouts are recommended for sending options on non-SYN
+   segments, to achieve maximum feasible alignment of 32-bit and 64-bit
+   machines.
+
+
+       +--------+--------+--------+--------+
+       |   NOP  |  NOP   |  TSopt |   10   |
+       +--------+--------+--------+--------+
+       |          TSval   timestamp        |
+       +--------+--------+--------+--------+
+       |          TSecr   timestamp        |
+       +--------+--------+--------+--------+
+
+
+APPENDIX B: DUPLICATES FROM EARLIER CONNECTION INCARNATIONS
+
+   There are two cases to be considered:  (1) a system crashing (and
+   losing connection state) and restarting, and (2) the same connection
+   being closed and reopened without a loss of host state.  These will
+   be described in the following two sections.
+
+   B.1  System Crash with Loss of State
+
+      TCP's quiet time of one MSL upon system startup handles the loss
+      of connection state in a system crash/restart.  For an
+      explanation, see for example "When to Keep Quiet" in the TCP
+      protocol specification [Postel81].  The MSL that is required here
+      does not depend upon the transfer speed.  The current TCP MSL of 2
+      minutes seems acceptable as an operational compromise, as many
+      host systems take this long to boot after a crash.
+
+      However, the timestamp option may be used to ease the MSL
+      requirements (or to provide additional security against data
+      corruption).  If timestamps are being used and if the timestamp
+      clock can be guaranteed to be monotonic over a system
+      crash/restart, i.e., if the first value of the sender's timestamp
+      clock after a crash/restart can be guaranteed to be greater than
+      the last value before the restart, then a quiet time will be
+      unnecessary.
+
+      To dispense totally with the quiet time would require that the
+      host clock be synchronized to a time source that is stable over
+      the crash/restart period, with an accuracy of one timestamp clock
+      tick or better.  We can back off from this strict requirement to
+      take advantage of approximate clock synchronization.  Suppose that
+      the clock is always re-synchronized to within N timestamp clock
+
+
+
+Jacobson, Braden, & Borman                                     [Page 27]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+      ticks and that booting (extended with a quiet time, if necessary)
+      takes more than N ticks.  This will guarantee monotonicity of the
+      timestamps, which can then be used to reject old duplicates even
+      without an enforced MSL.
+
+   B.2  Closing and Reopening a Connection
+
+      When a TCP connection is closed, a delay of 2*MSL in TIME-WAIT
+      state ties up the socket pair for 4 minutes (see Section 3.5 of
+      [Postel81].  Applications built upon TCP that close one connection
+      and open a new one (e.g., an FTP data transfer connection using
+      Stream mode) must choose a new socket pair each time.  The TIME-
+      WAIT delay serves two different purposes:
+
+      (a)  Implement the full-duplex reliable close handshake of TCP.
+
+           The proper time to delay the final close step is not really
+           related to the MSL; it depends instead upon the RTO for the
+           FIN segments and therefore upon the RTT of the path.  (It
+           could be argued that the side that is sending a FIN knows
+           what degree of reliability it needs, and therefore it should
+           be able to determine the length of the TIME-WAIT delay for
+           the FIN's recipient.  This could be accomplished with an
+           appropriate TCP option in FIN segments.)
+
+           Although there is no formal upper-bound on RTT, common
+           network engineering practice makes an RTT greater than 1
+           minute very unlikely.  Thus, the 4 minute delay in TIME-WAIT
+           state works satisfactorily to provide a reliable full-duplex
+           TCP close.  Note again that this is independent of MSL
+           enforcement and network speed.
+
+           The TIME-WAIT state could cause an indirect performance
+           problem if an application needed to repeatedly close one
+           connection and open another at a very high frequency, since
+           the number of available TCP ports on a host is less than
+           2**16.  However, high network speeds are not the major
+           contributor to this problem; the RTT is the limiting factor
+           in how quickly connections can be opened and closed.
+           Therefore, this problem will be no worse at high transfer
+           speeds.
+
+      (b)  Allow old duplicate segments to expire.
+
+           To replace this function of TIME-WAIT state, a mechanism
+           would have to operate across connections.  PAWS is defined
+           strictly within a single connection; the last timestamp is
+           TS.Recent is kept in the connection control block, and
+
+
+
+Jacobson, Braden, & Borman                                     [Page 28]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+           discarded when a connection is closed.
+
+           An additional mechanism could be added to the TCP, a per-host
+           cache of the last timestamp received from any connection.
+           This value could then be used in the PAWS mechanism to reject
+           old duplicate segments from earlier incarnations of the
+           connection, if the timestamp clock can be guaranteed to have
+           ticked at least once since the old connection was open.  This
+           would require that the TIME-WAIT delay plus the RTT together
+           must be at least one tick of the sender's timestamp clock.
+           Such an extension is not part of the proposal of this RFC.
+
+           Note that this is a variant on the mechanism proposed by
+           Garlick, Rom, and Postel [Garlick77], which required each
+           host to maintain connection records containing the highest
+           sequence numbers on every connection.  Using timestamps
+           instead, it is only necessary to keep one quantity per remote
+           host, regardless of the number of simultaneous connections to
+           that host.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Jacobson, Braden, & Borman                                     [Page 29]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+APPENDIX C: CHANGES FROM RFC-1072, RFC-1185
+
+   The protocol extensions defined in this document differ in several
+   important ways from those defined in RFC-1072 and RFC-1185.
+
+   (a)  SACK has been deferred to a later memo.
+
+   (b)  The detailed rules for sending timestamp replies (see Section
+        3.4) differ in important ways.  The earlier rules could result
+        in an under-estimate of the RTT in certain cases (packets
+        dropped or out of order).
+
+   (c)  The same value TS.Recent is now shared by the two distinct
+        mechanisms RTTM and PAWS.  This simplification became possible
+        because of change (b).
+
+   (d)  An ambiguity in RFC-1185 was resolved in favor of putting
+        timestamps on ACK as well as data segments.  This supports the
+        symmetry of the underlying TCP protocol.
+
+   (e)  The echo and echo reply options of RFC-1072 were combined into a
+        single Timestamps option, to reflect the symmetry and to
+        simplify processing.
+
+   (f)  The problem of outdated timestamps on long-idle connections,
+        discussed in Section 4.2.2, was realized and resolved.
+
+   (g)  RFC-1185 recommended that header prediction take precedence over
+        the timestamp check.  Based upon some scepticism about the
+        probabilistic arguments given in Section 4.2.4, it was decided
+        to recommend that the timestamp check be performed first.
+
+   (h)  The spec was modified so that the extended options will be sent
+        on <SYN,ACK> segments only when they are received in the
+        corresponding <SYN> segments.  This provides the most
+        conservative possible conditions for interoperation with
+        implementations without the extensions.
+
+   In addition to these substantive changes, the present RFC attempts to
+   specify the algorithms unambiguously by presenting modifications to
+   the Event Processing rules of RFC-793; see Appendix E.
+
+
+
+
+
+
+
+
+
+
+Jacobson, Braden, & Borman                                     [Page 30]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+APPENDIX D: SUMMARY OF NOTATION
+
+   The following notation has been used in this document.
+
+   Options
+
+       WSopt:       TCP Window Scale Option
+       TSopt:       TCP Timestamps Option
+
+   Option Fields
+
+       shift.cnt:   Window scale byte in WSopt.
+       TSval:       32-bit Timestamp Value field in TSopt.
+       TSecr:       32-bit Timestamp Reply field in TSopt.
+
+   Option Fields in Current Segment
+
+       SEG.TSval:   TSval field from TSopt in current segment.
+       SEG.TSecr:   TSecr field from TSopt in current segment.
+       SEG.WSopt:   8-bit value in WSopt
+
+   Clock Values
+
+       my.TSclock:      Local source of 32-bit timestamp values
+       my.TSclock.rate: Period of my.TSclock (1 ms to 1 sec).
+
+   Per-Connection State Variables
+
+       TS.Recent:       Latest received Timestamp
+       Last.ACK.sent:   Last ACK field sent
+
+       Snd.TS.OK:       1-bit flag
+       Snd.WS.OK:       1-bit flag
+
+       Rcv.Wind.Scale:  Receive window scale power
+       Snd.Wind.Scale:  Send window scale power
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Jacobson, Braden, & Borman                                     [Page 31]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+APPENDIX E: EVENT PROCESSING
+
+
+Event Processing
+
+  OPEN Call
+
+     ...
+    An initial send sequence number (ISS) is selected.  Send a SYN
+    segment of the form:
+
+        <SEQ=ISS><CTL=SYN><TSval=my.TSclock><WSopt=Rcv.Wind.Scale>
+
+      ...
+
+  SEND Call
+
+    CLOSED STATE (i.e., TCB does not exist)
+
+      ...
+
+    LISTEN STATE
+
+      If the foreign socket is specified, then change the connection
+      from passive to active, select an ISS.  Send a SYN segment
+      containing the options: <TSval=my.TSclock> and
+      <WSopt=Rcv.Wind.Scale>.  Set SND.UNA to ISS, SND.NXT to ISS+1.
+      Enter SYN-SENT state. ...
+
+    SYN-SENT STATE
+    SYN-RECEIVED STATE
+
+      ...
+
+    ESTABLISHED STATE
+    CLOSE-WAIT STATE
+
+      Segmentize the buffer and send it with a piggybacked
+      acknowledgment (acknowledgment value = RCV.NXT).  ...
+
+      If the urgent flag is set ...
+
+      If the Snd.TS.OK flag is set, then include the TCP Timestamps
+      option <TSval=my.TSclock,TSecr=TS.Recent> in each data segment.
+
+      Scale the receive window for transmission in the segment header:
+
+            SEG.WND = (SND.WND >> Rcv.Wind.Scale).
+
+
+
+Jacobson, Braden, & Borman                                     [Page 32]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+  SEGMENT ARRIVES
+
+     ...
+
+    If the state is LISTEN then
+
+      first check for an RST
+
+        ...
+
+      second check for an ACK
+
+        ...
+
+      third check for a SYN
+
+        if the SYN bit is set, check the security.  If the ...
+
+         ...
+
+        If the SEG.PRC is less than the TCB.PRC then continue.
+
+        Check for a Window Scale option (WSopt); if one is found, save
+        SEG.WSopt in Snd.Wind.Scale and set Snd.WS.OK flag on.
+        Otherwise, set both Snd.Wind.Scale and Rcv.Wind.Scale to zero
+        and clear Snd.WS.OK flag.
+
+        Check for a TSopt option; if one is found, save SEG.TSval in the
+        variable TS.Recent and turn on the Snd.TS.OK bit.
+
+        Set RCV.NXT to SEG.SEQ+1, IRS is set to SEG.SEQ and any other
+        control or text should be queued for processing later.  ISS
+        should be selected and a SYN segment sent of the form:
+
+          <SEQ=ISS><ACK=RCV.NXT><CTL=SYN,ACK>
+
+        If the Snd.WS.OK bit is on, include a WSopt option
+        <WSopt=Rcv.Wind.Scale> in this segment.  If the Snd.TS.OK bit is
+        on, include a TSopt <TSval=my.TSclock,TSecr=TS.Recent> in this
+        segment.  Last.ACK.sent is set to RCV.NXT.
+
+        SND.NXT is set to ISS+1 and SND.UNA to ISS.  The connection
+        state should be changed to SYN-RECEIVED.  Note that any other
+        incoming control or data (combined with SYN) will be processed
+        in the SYN-RECEIVED state, but processing of SYN and ACK should
+        not be repeated.  If the listen was not fully specified (i.e.,
+        the foreign socket was not fully specified), then the
+        unspecified fields should be filled in now.
+
+
+
+Jacobson, Braden, & Borman                                     [Page 33]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+      fourth other text or control
+
+       ...
+
+    If the state is SYN-SENT then
+
+      first check the ACK bit
+
+        ...
+
+      fourth check the SYN bit
+
+         ...
+
+        If the SYN bit is on and the security/compartment and precedence
+        are acceptable then, RCV.NXT is set to SEG.SEQ+1, IRS is set to
+        SEG.SEQ, and any acknowledgements on the retransmission queue
+        which are thereby acknowledged should be removed.
+
+        Check for a Window Scale option (WSopt); if is found, save
+        SEG.WSopt in Snd.Wind.Scale; otherwise, set both Snd.Wind.Scale
+        and Rcv.Wind.Scale to zero.
+
+        Check for a TSopt option; if one is found, save SEG.TSval in
+        variable TS.Recent and turn on the Snd.TS.OK bit in the
+        connection control block.  If the ACK bit is set, use my.TSclock
+        - SEG.TSecr as the initial RTT estimate.
+
+        If SND.UNA > ISS (our SYN has been ACKed), change the connection
+        state to ESTABLISHED, form an ACK segment:
+
+            <SEQ=SND.NXT><ACK=RCV.NXT><CTL=ACK>
+
+        and send it.  If the Snd.Echo.OK bit is on, include a TSopt
+        option <TSval=my.TSclock,TSecr=TS.Recent> in this ACK segment.
+        Last.ACK.sent is set to RCV.NXT.
+
+        Data or controls which were queued for transmission may be
+        included.  If there are other controls or text in the segment
+        then continue processing at the sixth step below where the URG
+        bit is checked, otherwise return.
+
+        Otherwise enter SYN-RECEIVED, form a SYN,ACK segment:
+
+            <SEQ=ISS><ACK=RCV.NXT><CTL=SYN,ACK>
+
+        and send it.  If the Snd.Echo.OK bit is on, include a TSopt
+        option <TSval=my.TSclock,TSecr=TS.Recent> in this segment.  If
+
+
+
+Jacobson, Braden, & Borman                                     [Page 34]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+        the Snd.WS.OK bit is on, include a WSopt option
+        <WSopt=Rcv.Wind.Scale> in this segment.  Last.ACK.sent is set to
+        RCV.NXT.
+
+        If there are other controls or text in the segment, queue them
+        for processing after the ESTABLISHED state has been reached,
+        return.
+
+      fifth, if neither of the SYN or RST bits is set then drop the
+      segment and return.
+
+
+    Otherwise,
+
+    First, check sequence number
+
+      SYN-RECEIVED STATE
+      ESTABLISHED STATE
+      FIN-WAIT-1 STATE
+      FIN-WAIT-2 STATE
+      CLOSE-WAIT STATE
+      CLOSING STATE
+      LAST-ACK STATE
+      TIME-WAIT STATE
+
+        Segments are processed in sequence.  Initial tests on arrival
+        are used to discard old duplicates, but further processing is
+        done in SEG.SEQ order.  If a segment's contents straddle the
+        boundary between old and new, only the new parts should be
+        processed.
+
+        Rescale the received window field:
+
+            TrueWindow = SEG.WND << Snd.Wind.Scale,
+
+        and use "TrueWindow" in place of SEG.WND in the following steps.
+
+        Check whether the segment contains a Timestamps option and bit
+        Snd.TS.OK is on.  If so:
+
+          If SEG.TSval < TS.Recent, then test whether connection has
+          been idle less than 24 days; if both are true, then the
+          segment is not acceptable; follow steps below for an
+          unacceptable segment.
+
+          If SEG.SEQ is equal to Last.ACK.sent, then save SEG.ECopt in
+          variable TS.Recent.
+
+
+
+
+Jacobson, Braden, & Borman                                     [Page 35]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+        There are four cases for the acceptability test for an incoming
+        segment:
+
+          ...
+
+        If an incoming segment is not acceptable, an acknowledgment
+        should be sent in reply (unless the RST bit is set, if so drop
+        the segment and return):
+
+          <SEQ=SND.NXT><ACK=RCV.NXT><CTL=ACK>
+
+        Last.ACK.sent is set to SEG.ACK of the acknowledgment.  If the
+        Snd.Echo.OK bit is on, include the Timestamps option
+        <TSval=my.TSclock,TSecr=TS.Recent> in this ACK segment.  Set
+        Last.ACK.sent to SEG.ACK and send the ACK segment.  After
+        sending the acknowledgment, drop the unacceptable segment and
+        return.
+
+          ...
+
+    fifth check the ACK field.
+
+      if the ACK bit is off drop the segment and return.
+
+      if the ACK bit is on
+
+        ...
+
+        ESTABLISHED STATE
+
+          If SND.UNA < SEG.ACK =< SND.NXT then, set SND.UNA <- SEG.ACK.
+          Also compute a new estimate of round-trip time.  If Snd.TS.OK
+          bit is on, use my.TSclock - SEG.TSecr; otherwise use the
+          elapsed time since the first segment in the retransmission
+          queue was sent.  Any segments on the retransmission queue
+          which are thereby entirely acknowledged...
+
+            ...
+
+    Seventh, process the segment text.
+
+      ESTABLISHED STATE
+      FIN-WAIT-1 STATE
+      FIN-WAIT-2 STATE
+
+          ...
+
+        Send an acknowledgment of the form:
+
+
+
+Jacobson, Braden, & Borman                                     [Page 36]
+
+RFC 1323          TCP Extensions for High Performance           May 1992
+
+
+          <SEQ=SND.NXT><ACK=RCV.NXT><CTL=ACK>
+
+        If the Snd.TS.OK bit is on, include Timestamps option
+        <TSval=my.TSclock,TSecr=TS.Recent> in this ACK segment.  Set
+        Last.ACK.sent to SEG.ACK of the acknowledgment, and send it.
+        This acknowledgment should be piggy-backed on a segment being
+        transmitted if possible without incurring undue delay.
+
+
+         ...
+
+
+Security Considerations
+
+   Security issues are not discussed in this memo.
+
+Authors' Addresses
+
+   Van Jacobson
+   University of California
+   Lawrence Berkeley Laboratory
+   Mail Stop 46A
+   Berkeley, CA 94720
+
+   Phone: (415) 486-6411
+   EMail: van@CSAM.LBL.GOV
+
+
+   Bob Braden
+   University of Southern California
+   Information Sciences Institute
+   4676 Admiralty Way
+   Marina del Rey, CA 90292
+
+   Phone: (310) 822-1511
+   EMail: Braden@ISI.EDU
+
+
+   Dave Borman
+   Cray Research
+   655-E Lone Oak Drive
+   Eagan, MN 55121
+
+   Phone: (612) 683-5571
+   Email: dab@cray.com
+
+
+
+
+
+
+Jacobson, Braden, & Borman                                     [Page 37]
+
\ No newline at end of file
diff --git a/ext/picotcp/RFC/rfc1379.txt b/ext/picotcp/RFC/rfc1379.txt
new file mode 100644
index 0000000..b5f2bdc
--- /dev/null
+++ b/ext/picotcp/RFC/rfc1379.txt
@@ -0,0 +1,2131 @@
+
+
+
+
+
+
+Network Working Group                                          R. Braden
+Request for Comments: 1379                                           ISI
+                                                           November 1992
+
+
+               Extending TCP for Transactions -- Concepts
+
+Status of This Memo
+
+   This memo provides information for the Internet community.  It does
+   not specify an Internet standard.  Distribution of this memo is
+   unlimited.
+
+Abstract
+
+   This memo discusses extension of TCP to provide transaction-oriented
+   service, without altering its virtual-circuit operation.  This
+   extension would fill the large gap between connection-oriented TCP
+   and datagram-based UDP, allowing TCP to efficiently perform many
+   applications for which UDP is currently used.  A separate memo
+   contains a detailed functional specification for this proposed
+   extension.
+
+   This work was supported in part by the National Science Foundation
+   under Grant Number NCR-8922231.
+
+TABLE OF CONTENTS
+
+   1. INTRODUCTION ..................................................  2
+   2. TRANSACTIONS USING STANDARD TCP ...............................  3
+   3. BYPASSING THE 3-WAY HANDSHAKE .................................  6
+      3.1  Concept of TAO ...........................................  6
+      3.2  Cache Initialization ..................................... 10
+      3.3  Accepting <SYN,ACK> Segments ............................. 11
+   4. SHORTENING TIME-WAIT STATE .................................... 13
+   5. CHOOSING A MONOTONIC SEQUENCE ................................. 15
+      5.1  Cached Timestamps ........................................ 16
+      5.2  Current TCP Sequence Numbers ............................. 18
+      5.3  64-bit Sequence Numbers .................................. 20
+      5.4  Connection Counts ........................................ 20
+      5.5  Conclusions .............................................. 21
+   6. CONNECTION STATES ............................................. 24
+   7. CONCLUSIONS AND ACKNOWLEDGMENTS ............................... 32
+   APPENDIX A: TIME-WAIT STATE AND THE 2-PACKET EXCHANGE ............ 34
+   REFERENCES ....................................................... 37
+   Security Considerations .......................................... 38
+   Author's Address ................................................. 38
+
+
+
+
+Braden                                                          [Page 1]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+1. INTRODUCTION
+
+   The TCP protocol [STD-007] implements a virtual-circuit transport
+   service that provides reliable and ordered data delivery over a
+   full-duplex connection.  Under the virtual circuit model, the life of
+   a connection is divided into three distinct phases: (1) opening the
+   connection to create a full-duplex byte stream; (2) transferring data
+   in one or both directions over this stream; and (3) closing the
+   connection.  Remote login and file transfer are examples of
+   applications that are well suited to virtual-circuit service.
+
+   Distributed applications, which are becoming increasingly numerous
+   and sophisticated in the Internet, tend to use a transaction-oriented
+   rather than a virtual circuit style of communication.  Currently, a
+   transaction-oriented Internet application must choose to suffer the
+   overhead of opening and closing TCP connections or else build an
+   application-specific transport mechanism on top of the connectionless
+   transport protocol UDP.  Greater convenience, uniformity, and
+   efficiency would result from widely-available kernel implementations
+   of a transport protocol supporting a transaction service model [RFC-
+   955].
+
+   The transaction service model has the following features:
+
+   *    The fundamental interaction is a request followed by a response.
+
+   *    An explicit open or close phase would impose excessive overhead.
+
+   *    At-most-once semantics is required; that is, a transaction must
+        not be "replayed" by a duplicate request packet.
+
+   *    In favorable circumstances, a reliable request/response
+        handshake can be performed with exactly one packet in each
+        direction.
+
+   *    The minimum transaction latency for a client is RTT + SPT, where
+        RTT is the round-trip time and SPT is the server processing
+        time.
+
+   We use the term "transaction transport protocol" for a transport-
+   layer protocol that follows this model [RFC-955].
+
+   The Internet architecture allows an arbitrary collection of transport
+   protocols to be defined on top of the minimal end-to-end datagram
+   service provided by IP [Clark88].  In practice, however, production
+   systems implement only TCP and UDP at the transport layer.  It has
+   proven difficult to leverage a new transport protocol into place, to
+   be widely enough available to be useful for application builders.
+
+
+
+Braden                                                          [Page 2]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+   This memo explores an alternative approach to providing a transaction
+   transport protocol: extending TCP to implement the transaction
+   service model, while continuing to support the virtual circuit model.
+   Each transaction will then be a single instance of a TCP connection.
+   The proposed transaction extension is effectively implementable
+   within current TCPs and operating systems, and it should also scale
+   to the much faster networks, interfaces, and CPUs of the future.
+
+   The present memo explains the theory behind the extension, in
+   somewhat exquisite detail.  Despite the length and complexity of this
+   memo, the TCP extensions required for transactions are in fact quite
+   limited and simple.  Another memo [TTCP-FS] provides a self-contained
+   functional specification of the extensions.
+
+   Section 2 of this memo describes the limitations of standard TCP for
+   transaction processing, to motivate the extensions.  Sections 3, 4,
+   and 5 explore the fundamental extensions that are required for
+   transactions.  Section 6 discusses the changes required in the TCP
+   connection state diagram.  Finally, Section 7 presents conclusions
+   and acknowledgments.  Familiarity with the standard TCP protocol
+   [STD-007] is assumed.
+
+2.  TRANSACTIONS USING STANDARD TCP
+
+   Reliable transfer of data depends upon sequence numbers.  Before data
+   transfer can begin, both parties must "synchronize" the connection,
+   i.e, agree on common sequence numbers.  The synchronization procedure
+   must preserve at-most-once semantics, i.e., be free from replay
+   hazards due to duplicate packets.  The TCP developers adopted a
+   synchronization mechanism known as the 3-way handshake.
+
+   Consider a simple transaction in which client host A sends a single-
+   segment request to server host B, and B returns a single-segment
+   response.  Many current TCP implementations use at least ten segments
+   (i.e., packets) for this sequence: three for the 3-way handshake
+   opening the connection, four to send and acknowledge the request and
+   response data, and three for TCP's full-duplex data-conserving close
+   sequence.  These ten segments represent a high relative overhead for
+   two data-bearing segments.  However, a more important consideration
+   is the transaction latency seen by the client:  2*RTT + SPT, larger
+   than the minimum by one RTT.  As CPU and network speeds increase, the
+   relative significance of this extra transaction latency also
+   increases.
+
+   Proposed transaction transport protocols have typically used a
+   "timer-based" approach to connection synchronization [Birrell84].  In
+   this approach, once end-to-end connection state is established in the
+   client and server hosts, a subset of this state is maintained for
+
+
+
+Braden                                                          [Page 3]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+   some period of time.  A new request before the expiration of this
+   timeout period can then reestablish the full state without an
+   explicit handshake.  Watson pointed out that the timer-based approach
+   of his Delta-T protocol [Watson81] would encompass both virtual
+   circuits and transactions.  However, the TCP group adopted the 3-way
+   handshake (because of uncertainty about the robustness of enforcing
+   the packet lifetime bounds required by Delta-T, within a general
+   Internet environment).  More recently, Liskov, Shrira, and Wroclawski
+   [Liskov90] have proposed a different timer-based approach to
+   connection synchronization, requiring loosely-synchronized clocks in
+   the hosts.
+
+   The technique proposed in this memo, suggested by Clark [Clark89],
+   depends upon cacheing of connection state but not upon clocks or
+   timers; it is described in Section 3 below.  Garlick, Rom, and Postel
+   also proposed a connection synchronization mechanism using cached
+   state [Garlick77].  Their scheme required each host to maintain
+   connection records containing the highest sequence number on each
+   connection.  The technique suggested here retains only per-host
+   state, not per-connection state.
+
+   During TCP development, it was suggested that TCP could support
+   transactions with data segments containing both SYN and FIN bits.
+   (These "Kamikaze" segments were not supported as a service; they were
+   used mainly to crash other experimental TCPs!)  To illustrate this
+   idea, Figure 1 shows a plausible application of the current TCP rules
+   to create a minimal transaction.  (In fact, some minor adjustments in
+   the standard TCP spec would be required to make Figure 1 fully legal
+   [STD-007]).
+
+   Figure 1, like many of the examples shown in this memo, uses an
+   abbreviated form to illustrate segment sequences.  For clarity and
+   brevity, it omits explicit sequence and acknowledgment numbers,
+   assuming that these will follow the well-known TCP rules.  The
+   notation "ACK(x)" implies a cumulative acknowledgment for the control
+   bit or data "x" and everything preceding "x" in the sequence space.
+   The referent of "x" should be clear from the context.  Also, host A
+   will always be the client and host B will be the server in these
+   diagrams.
+
+   The first three segments in Figure 1 implement the standard TCP
+   three-way handshake.  If segment #1 had been an old duplicate, the
+   client side would have sent an RST (Reset) bit in segment #3,
+   terminating the sequence.  The request data included on the initial
+   SYN segment cannot be delivered to user B until segment #3 completes
+   the 3-way handshake.  Loading control bits onto the segments has
+   reduced the total number of segments to 5, but the client still
+   observes a transaction latency of 2*RTT + SPT.  The 3-way handshake
+
+
+
+Braden                                                          [Page 4]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+   thus precludes high-performance transaction processing.
+
+
+       TCP A  (Client)                                 TCP B (Server)
+       _______________                                 ______________
+
+       CLOSED                                               LISTEN
+
+   (Client sends request)
+    1. SYN-SENT             --> <SYN,data1,FIN> -->       SYN-RCVD
+                                                       (data1 queued)
+
+    2. ESTABLISHED  <-- <SYN,ACK(SYN)> <--                SYN-RCVD
+
+
+    3. FIN-WAIT-1            --> <ACK(SYN),FIN> -->     CLOSE-WAIT
+                                                    (data1 to server)
+
+                                                 (Server sends reply)
+    4. TIME-WAIT    <-- <ACK(FIN),data2,FIN> <--          LAST-ACK
+    (data2 to client)
+
+    5. TIME-WAIT                 --> <ACK(FIN)> -->         CLOSED
+
+       (timeout)
+       CLOSED
+
+               Figure 1: Transaction Sequence: RFC-793 TCP
+
+
+   The TCP close sequence also poses a performance problem for
+   transactions: one or both end(s) of a closed connection must remain
+   in "TIME-WAIT" state until a 4 minute timeout has expired [STD-007].
+   The same connection (defined by the host and port numbers at both
+   ends) cannot be reopened until this delay has expired.  Because of
+   TIME-WAIT state, a client program should choose a new local port
+   number (i.e., a different connection) for each successive
+   transaction.  However, the TCP port field of 16 bits (less the
+   "well-known" port space) provides only 64512 available user ports.
+   This limits the total rate of transactions between any pair of hosts
+   to a maximum of 64512/240 = 268 per second.  This is much too low a
+   rate for low-delay paths, e.g., high-speed LANs.  A high rate of
+   short connections (i.e., transactions) could also lead to excessive
+   consumption of kernel memory by connection control blocks in TIME-
+   WAIT state.
+
+   In summary, to perform efficient transaction processing in TCP, we
+   need to suppress the 3-way handshake and to shorten TIME-WAIT state.
+
+
+
+Braden                                                          [Page 5]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+   Protocol mechanisms to accomplish these two goals are discussed in
+   Sections 3 and 4, respectively.  Both require the choice of a
+   monotonic sequence-like space; Section 5 analyzes the choices and
+   makes a selection for this space.  Finally, the TCP connection state
+   machine must be extended as described in Section 6.
+
+   Transaction processing in TCP raises some other protocol issues,
+   which are discussed in the functional specification memo [TTCP-FS].
+   These include:
+
+   (1)  augmenting the user interface for transactions,
+
+   (2)  delaying acknowledgment segments to allow maximum piggy-backing
+        of control bits with data,
+
+   (3)  measuring the retransmission timeout time (RTO) on very short
+        connections, and
+
+   (4)  providing an initial server window.
+
+   A recently proposed set of enhancements [RFC-1323] defines a TCP
+   Timestamps option that carries two 32-bit timestamp values.  The
+   Timestamps option is used to accurately measure round-trip time
+   (RTT).  The same option is also used in a procedure known as "PAWS"
+   (Protect Againsts Wrapped Sequence) to prevent erroneous data
+   delivery due to a combination of old duplicate segments and sequence
+   number reuse at very high bandwidths.  The particular approach to
+   transactions chosen in this memo does not require the RFC-1323
+   enhancements; however, they are important and should be implemented
+   in every TCP, with or without the transaction extensions described
+   here.
+
+3.  BYPASSING THE 3-WAY HANDSHAKE
+
+   To avoid 3-way handshakes for transactions, we introduce a new
+   mechanism for validating initial SYN segments, i.e., for enforcing
+   at-most-once semantics without a 3-way handshake.  We refer to this
+   as the TCP Accelerated Open, or TAO, mechanism.
+
+   3.1 Concept of TAO
+
+      The basis of TAO is this: a TCP uses cached per-host information
+      to immediately validate new SYNs [Clark89].  If this validation
+      fails, e.g., because there is no current cached state or the
+      segment is an old duplicate, the procedure falls back to a normal
+      3-way handshake to validate the SYN.  Thus, bypassing a 3-way
+      handshake is considered to be an optional optimization.
+
+
+
+
+Braden                                                          [Page 6]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+      The proposed TAO mechanism uses a finite sequence-like space of
+      values that increase monotonically with successive transactions
+      (connections) between a given (client, server) host pair.  Call
+      this monotonic space M, and let each initial SYN segment carry an
+      M value SEG.M.  If M is not the existing sequence (SEG.SEQ) field,
+      SEG.M may be carried in a TCP option.
+
+      When host B receives from host A an initial SYN segment containing
+      a new value SEG.M, host B compares this against cache.M[A], the
+      latest M value that B has cached for host A.  This comparison is
+      the "TAO test".  Because the M values are monotonically
+      increasing, SEG.M > cache.M[A] implies that the SYN must be new
+      and can be accepted immediately.  If not, a normal 3-way handshake
+      is performed to validate the initial SYN segment.  Figure 2
+      illustrates the TAO mechanism; cached M values are shown enclosed
+      in square brackets.  The M values generated by host A satisfy
+      x0 < x1, and the M values generated by host B satisfy y0 < y1.
+
+      An appropriate choice for the M value space is discussed in
+      Section 5.  M values are drawn from a finite number space, so
+      inequalities must be defined in the usual way for sequence numbers
+      [STD-007].  The M space must not wrap so quickly that an old
+      duplicate SYN will be erroneously accepted.  We assume that some
+      maximum segment lifetime (MSL) is enforced by the IP layer.
+
+        ____T_C_P__A_____                                ____T_C_P__B_____
+
+            cache.M[B]                                  cache.M[A]
+               V                                            V
+
+            [ y0 ]                                       [ x0 ]
+
+      1.             -->  <SYN,data1,M=x1> -->       ( (x1 > x0) =>
+                                                      data1 -> user_B;
+                                                      cache.M[A]= x1)
+
+            [ y0 ]                                       [ x1 ]
+      2.            <-- <SYN,ACK(data1),data2,M=y1> <--
+
+         (data2 -> user_A,
+          cache.M[B]= y1)
+
+            [ y1 ]                                       [ x1 ]
+                              ... (etc.) ...
+
+
+                   Figure 2. TAO: Three-Way Handshake is Bypassed
+
+
+
+
+Braden                                                          [Page 7]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+      Figure 2 shows the simplest case: each side has cached the latest
+      M value of the other, and the SEG.M value in the client's SYN
+      segment is greater than the value in the cache at the server host.
+      As a result, B can accept the client A's request data1 immediately
+      and pass it to the server application.  B's reply data2 is shown
+      piggybacked on the <SYN,ACK> segment.  As a result of this 2-way
+      exchange, the cached M values are updated at both sites; the
+      client side becomes relevant only if the client/server roles
+      reverse.  Validation of the <SYN,ACK> segment at host A is
+      discussed later.
+
+      Figure 3 shows the TAO test failing but the consequent 3-way
+      handshake succeeding.  B updates its cache with the value x2 >= x1
+      when the initial SYN is known to be valid.
+
+
+           _T_C_P__A                                     _T_C_P__B
+
+            cache.M[B]                                  cache.M[A]
+               V                                           V
+
+            [ y0 ]                                       [ x0 ]
+      1.                 --> <SYN,data1,M=x1> -->   ( (x1 <= x0) =>
+                                                    data1 queued;
+                                                    3-way handshake)
+
+            [ y0 ]                                       [ x0 ]
+      2.                <-- <SYN,ACK(SYN),M=y1> <--
+         (cache.M[B]= y1)
+
+            [ y1 ]                                       [ x0 ]
+      3.                  --> <ACK(SYN),M=x2> -->  (Handshake OK =>
+                                                   data1->user_B,
+                                                   cache.M[A]= x2)
+
+            [ y1 ]                                       [ x2 ]
+                            ...  (etc.)  ...
+
+          Figure 3. TAO Test Fails but 3-Way Handshake Succeeds.
+
+      There are several possible causes for a TAO test failure on a
+      legitimate new SYN segment (not an old duplicate).
+
+      (1)  There may be no cached M value for this particular client
+           host.
+
+      (2)  The SYN may be the one of a set of nearly-simultaneous SYNs
+           for different connections but from the same host, which
+
+
+
+Braden                                                          [Page 8]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+           arrived out of order.
+
+      (3)  The finite M space may have wrapped around between successive
+           transactions from the same client.
+
+      (4)  The M values may advance too slowly for closely-spaced
+           transactions.
+
+      None of these TAO failures will cause a lockout, because the
+      resulting 3-way handshake will succeed.  Note that the first
+      transaction between a given host pair will always require a 3-way
+      handshake; subsequent transactions can take advantage of TAO.
+
+      The per-host cache required by TAO is highly desirable for other
+      reasons, e.g., to retain the measured round trip time and MTU for
+      a given remote host.  Furthermore, a host should already have a
+      per-host routing cache [HR-COMM] that should be easily extensible
+      for this purpose.
+
+      Figure 4 illustrates a complete TCP transaction sequence using the
+      TAO mechanism.  Bypassing the 3-way handshake leads to new
+      connection states; Figure 4 shows three of them, "SYN-SENT*",
+      "CLOSE-WAIT*", and "LAST-ACK*".  Explanation of these states is
+      deferred to Section 6.
+
+
+          TCP A  (Client)                                 TCP B (Server)
+          _______________                                 ______________
+
+          CLOSED                                                  LISTEN
+
+      1.  SYN-SENT*    --> <SYN,data1,FIN,M=x1> -->          CLOSE-WAIT*
+                                                         (TAO test OK=>
+                                                          data1->user_B)
+
+                   <-- <SYN,ACK(FIN),data2,FIN,M=y1> <--       LAST-ACK*
+      2.  TIME-WAIT
+       (data2->user_A)
+
+
+      3.  TIME-WAIT          --> <ACK(FIN),M=x2> -->              CLOSED
+
+          (timeout)
+            CLOSED
+
+
+               Figure 4: Minimal Transaction Sequence Using TAO
+
+
+
+
+Braden                                                          [Page 9]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+   3.2 Cache Initialization
+
+      The first connection between hosts A and B will find no cached
+      state at one or both ends, so both M caches must be initialized.
+      This requires that the first transaction carry a specially marked
+      SEG.M value, which we call SEG.M.NEW.  Receiving a SEG.M.NEW value
+      in an initial SYN segment, B will cache this value and send its
+      own M back to initialize A's cache.  When a host crashes and
+      restarts, all its cached M values cache.M[*] must be invalidated
+      in order to force a re-synchronization of the caches at both ends.
+
+      This cache synchronization procedure is illustrated in Figure 5,
+      where client host A has crashed and restarted with its cache
+      entries undefined, as indicated by "??".  Since cache.TS[B] is
+      undefined, A sends a SEG.M.NEW value instead of SEG.M in the <SYN>
+      segment of its first transaction request to B.  Receiving this
+      SEG.M.NEW, the server host B invalidates cache.TS[A] and performs
+      a 3-way handshake.  SEG.M in segment #2 updates A's cache, and
+      when the handshake completes successfully, B updates its cached M
+      value to x2 >= x1.
+
+
+           _T_C_P__A                                     _T_C_P__B
+
+            cache.M[B]                                  cache.M[A]
+               V                                           V
+            [ ?? ]                                       [ x0 ]
+
+      1.           --> <SYN,data1,M.NEW=x1> -->   (invalidate cache;
+                                                        queue data1;
+            [ ?? ]                                  3-way handshake)
+
+                                                         [ ?? ]
+      2.              <-- <SYN,ACK(SYN),M=y1> <--
+         (cache.M[B]= y1)
+
+            [ y1 ]                                       [ ?? ]
+
+      3.                  --> <ACK(SYN),M=x2> -->  data1->user_B,
+                                                   cache.M[A]= x2)
+
+            [ y1 ]                                       [ x2 ]
+                            ...  (etc.)  ...
+
+                  Figure 5.  Client Host Crashed
+
+
+      Suppose that the 3-way handshake failed, presumably because
+
+
+
+Braden                                                         [Page 10]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+      segment #1 was an old duplicate.  Then segment #3 from host A
+      would be an RST segment, with the result that both side's caches
+      would be left undefined.
+
+      Figure 6 shows the procedure when the server crashes and restarts.
+      Upon receiving a <SYN> segment from a host for which it has no
+      cached M value, B initiates a 3-way handshake to validate the
+      request and sends its own M value to A.  Again the result is to
+      update cached M values on both sides.
+
+
+              _T_C_P__A                                     _T_C_P__B
+
+               cache.M[B]                                  cache.M[A]
+                  V                                           V
+               [ y0 ]                                       [ ?? ]
+
+         1.               --> <SYN,data1,M=x1> -->      (data1 queued;
+                                                       3-way handshake)
+
+               [ y0 ]                                       [ ?? ]
+         2.              <-- <SYN,ACK(SYN),M=y1> <--
+            (cache.M[B]= y1)
+
+               [ y1 ]                                       [ ?? ]
+         3.                --> <ACK(SYN),M=x2> -->   (data1->user_B,
+                                                      cache.M[A]= x2)
+
+               [ y1 ]                                       [ x2 ]
+                               ...  (etc.)  ...
+
+
+                        Figure 6. Server Host Crashed
+
+
+   3.3  Accepting <SYN,ACK> Segments
+
+      Transactions introduce a new hazard of erroneously accepting an
+      old duplicate <SYN,ACK> segment.  To be acceptable, a <SYN,ACK>
+      segment must arrive in SYN-SENT state, and its ACK field must
+      acknowledge something that was sent.  In current TCPs the
+      effective send window in SYN-SENT state is exactly one octet, and
+      an acceptable <SYN,ACK> must exactly ACK this one octet.  The
+      clock-driven selection of Initial Sequence Number (ISN) makes an
+      erroneous acceptance exceedingly unlikely.  An old duplicate SYN
+      could be accepted erroneously only if successive connection
+      attempts occurred more often than once every 4 microseconds, or if
+      the segment lifetime exceeded the 4 hour wraparound time for ISN
+
+
+
+Braden                                                         [Page 11]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+      selection.
+
+      However, when TCP is used for transactions, data sent with the
+      initial SYN increases the range of sequence numbers that have been
+      sent.  This increases the danger of accepting an old duplicate
+      <SYN,ACK> segment, and the consequences are more serious.  In the
+      example in Figure 7, segments 1-3 form a normal transaction
+      sequence, and segment 4 begins a new transaction (incarnation) for
+      the same connection.  Segment #5 is a duplicate of segment #2 from
+      the preceding transaction.  Although the new transaction has a
+      larger ISN, the previous ACK value 402 falls into the new range
+      [200,700) of sequence numbers that have been sent, so segment #5
+      could be erroneously accepted and passed to the client as the
+      response to the new request.
+
+           _T_C_P__A                                       _T_C_P__B
+
+         CLOSED                                                   LISTEN
+
+      1.           --> <seq=100,SYN,data=300,FIN,M=x1> --> (TAO test OK)
+
+
+      2.         <-- <seq=800,ack=402,SYN,data=350,FIN,M=y1> <--
+
+
+      3. TIME-WAIT                      --> <ACK(FIN)> -->       CLOSED
+         (short timeout)
+         CLOSED
+
+         (New Request)
+      4.           --> <seq=200,SYN,data=500,FIN,M=x2> --> ...
+
+                                            (Duplicate of segment #2)
+      5.         <-- <seq=800,ack=402,SYN,data=300,FIN,M=y1> <--...
+         (Acceptable!!)
+
+
+               Figure 7: Old Duplicate <SYN,ACK> Causing Error
+
+
+      Unfortunately, we cannot simply use TAO on the client side to
+      detect and reject old duplicate <SYN,ACK> segments.  A TAO test at
+      the client might fail for a valid <SYN,ACK> segment, due to out-
+      of-order delivery, and this could result in permanent non-delivery
+      of a valid transaction reply.
+
+      Instead, we include a second M value, an echo of the client's M
+      value from the initial <SYN> segment, in the <SYN,ACK> segment.  A
+
+
+
+Braden                                                         [Page 12]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+      specially-marked M value, SEG.M.ECHO, is used for this purpose.
+      The client knows the value it sent in the initial <SYN> and can
+      therefore positively validate the <SYN,ACK> using the echoed
+      value.  This is illustrated in Figure 12, which is the same as
+      Figure 4 with the addition of the echoed value on the <SYN,ACK>
+      segment #2.
+
+      It should be noted that TCP allows a simultaneous open sequence in
+      which both sides send and receive an initial <SYN> (see Figure 8
+      of [STD-007].  In this case, the TAO test must be performed on
+      both sides to preserve the symmetry.  See [TTCP-FS] for an
+      example.
+
+4.  SHORTENING TIME-WAIT STATE
+
+   Once a transaction has been initiated for a particular connection
+   (pair of ports) between a given host pair, a new transaction for the
+   same connection cannot take place for a time that is at least:
+
+       RTT + SPT + TIME-WAIT_delay
+
+   Since the client host can cycle among the 64512 available port
+   numbers, an upper bound on the transaction rate between a particular
+   host pair is:
+
+   [1]    TRmax = 64512 /(RTT + TIME-WAIT_Delay)
+
+   in transactions per second (Tps), where we assumed SPT is negligible.
+   We must reduce TIME-WAIT_Delay to support high-rate TCP transaction
+   processing.
+
+   TIME-WAIT state performs two functions: (1) supporting the full-
+   duplex reliable close of TCP, and (2) allowing old duplicate segments
+   from an earlier connection incarnation to expire before they can
+   cause an error (see Appendix to [RFC-1185]).  The first function
+   impacts the application model of a TCP connection, which we would not
+   want to change.  The second is part of the fundamental machinery of
+   TCP reliable delivery; to safely truncate TIME-WAIT state, we must
+   provide another means to exclude duplicate packets from earlier
+   incarnations of the connection.
+
+   To minimize the delay in TIME-WAIT state while performing both
+   functions, we propose to set the TIME-WAIT delay to:
+
+   [2]    TIME-WAIT_Delay = max( K*RTO, U )
+
+   where U and K are constants and RTO is the dynamically-determined
+   retransmission timeout, the measured RTT plus an allowance for the
+
+
+
+Braden                                                         [Page 13]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+   RTT variance [Jacobson88].  We choose K large enough so that there is
+   high probability of the close completing successfully if at all
+   possible; K = 8 seems reasonable.  This takes care of the first
+   function of TIME-WAIT state.
+
+   In a real implementation, there may be a minimum RTO value Tr,
+   corresponding to the precision of RTO calculation.  For example, in
+   the popular BSD implementation of TCP, the minimum RTO is Tr = 0.5
+   second.  Assuming K = 8 and U = 0, Eqns [1] and [2] impose an upper
+   limit of TRmax = 16K Tps on the transaction rate of these
+   implementations.
+
+   It is possible to have many short connections only if RTO is very
+   small, in which case the TIME-WAIT delay [2] reduces to U.  To
+   accelerate the close sequence, we need to reduce U below the MSL
+   enforced by the IP layer, without introducing a hazard from old
+   duplicate segments.  For this purpose, we introduce another monotonic
+   number sequence; call it X.  X values are required to be monotonic
+   between successive connection incarnations; depending upon the choice
+   of the X space (see Section 5), X values may also increase during a
+   connection.  A value from the X space is to be carried in every
+   segment, and a segment is rejected if it is received with an X value
+   smaller than the largest X value received.  This mechanism does not
+   use a cache; the largest X value is maintained in the TCP connection
+   control block (TCB) for each connection.
+
+   The value of U depends upon the choice for the X space, discussed in
+   the next section.  If X is time-like, U can be set to twice the time
+   granularity (i.e, twice the minimum "tick" time) of X.  The TIME-WAIT
+   delay will then ensure that current X values do not overlap the X
+   values of earlier incarnations of the same connection.  Another
+   consequence of time-like X values is the possibility that an open but
+   idle connection might allow the X value to wrap its sign bit,
+   resulting in a lockup of the connection.  To prevent this, a 24-day
+   idle timer on each open connection could bypass the X check on the
+   first segment following the idle period, for example.  In practice,
+   many implementations have keep-alive mechanisms that prevent such
+   long idle periods [RFC-1323].
+
+   Referring back to Figure 4, our proposed transaction extension
+   results in a minimum exchange of 3 packets.  Segment #3, the final
+   ACK segment, does not increase transaction latency, but in
+   combination with the TIME-WAIT delay of K*RTO it ensures that the
+   server side of the connection will be closed before a new transaction
+   is issued for this same pair of ports.  It also provides an RTT
+   measurement for the server.
+
+   We may ask whether it would be possible to further reduce the TIME-
+
+
+
+Braden                                                         [Page 14]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+   WAIT delay.  We might set K to zero; alternatively, we might allow
+   the client TCP to start a new transaction request while the
+   connection was still in TIME-WAIT state, with the new initial SYN
+   acting as an implied acknowledgment of the previous FIN.  Appendix A
+   summarizes the issues raised by these alternatives, which we call
+   "truncating" TIME-WAIT state, and suggests some possible solutions.
+   Further study would be required, but these solutions appear to bend
+   the theory and/or implementations of the TCP protocol farther than we
+   wish to bend them.
+
+   We therefore propose using formula [2] with K=8 and retaining the
+   final ACK(FIN) transmission.  To raise the transaction rate,
+   therefore, we require small values of RTO and U.
+
+5.  CHOOSING A MONOTONIC SEQUENCE
+
+   For simplicity, we want the monotonic sequence X used for shortening
+   TIME-WAIT state to be identical to the monotonic sequence M for
+   bypassing the 3-way handshake.  Calling the common space M, we will
+   send an M value SEG.M in each TCP segment.  Upon receipt of an
+   initial SYN segment, SEG.M will be compared with a per-host cached
+   value to authenticate the SYN without a 3-way handshake; this is the
+   TAO mechanism.  Upon receipt of a non-SYN segment, SEG.M will be
+   compared with the current value in the connection control block and
+   used to discard old duplicates.
+
+   Note that the situation with TIME-WAIT state differs from that of
+   bypassing 3-way handshakes in two ways: (a) TIME-WAIT requires
+   duplicate detection on every segment vs. only on SYN segments, and
+   (b) TIME-WAIT applies to a single connection vs. being global across
+   all connections.  This section discusses possible choices for the
+   common monotonic sequence.
+
+   The SEG.M values must satisfy the following requirements.
+
+   *    The values must be monotonic; this requirement is defined more
+        precisely below.
+
+   *    Their granularity must be fine-grained enough to support a high
+        rate of transaction processing; the M clock must "tick" at least
+        once between successive transactions.
+
+   *    Their range (wrap-around time) must be great enough to allow a
+        realistic MSL to be enforced by the network.
+
+   The TCP spec calls for an MSL of 120 secs.  Since much of the
+   Internet does not carefully enforce this limit, it would be safer to
+   have an MSL at least an order of magnitude larger.  We set as an
+
+
+
+Braden                                                         [Page 15]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+   objective an MSL of at least 2000 seconds.  If there were no TIME-
+   WAIT delay, the ultimate limit on transaction rate would be set by
+   speed-of-light delays in the network and by the latency of host
+   operating systems.  As the bottleneck problems with interfacing CPUs
+   to gigabit LANs are solved, we can imagine transaction durations as
+   short as 1 microsecond.  Therefore, we set an ultimate performance
+   goal of TRmax at least 10**6 Tps.
+
+   A particular connection between hosts A and B is identified by the
+   local and remote TCP "sockets", i.e., by the quadruplet: {A, B,
+   Port.A, Port.B}.  Imagine that each host keeps a count CC of the
+   number of TCP connections it has initiated.  We can use this CC
+   number to distinguish different incarnations of the same connection.
+   Then a particular SEG.M value may be labeled implicitly by 6
+   quantities: {A, B, Port.A, Port.B, CC, n}, where n is the byte offset
+   of that segment within the connection incarnation.
+
+   To bypass the 3-way handshake, we require thgt SEG.M values on
+   successive SYN segments from a host A to a host B be monotone
+   increasing.  If CC' > CC, then we require that:
+
+       SEG.M(A,B,Port.A,Port.B,CC',0) >  SEG.M(A,B,Port.A,Port.B,CC,0)
+
+   for any legal values of Port.A and Port.B.
+
+   To delete old duplicates (allowing TIME-WAIT state to be shortened),
+   we require that SEG.M values be disjoint across different
+   incarnations of the same connection.   If CC' > CC then
+
+       SEG.M(A,B,Port.A,Port.B,CC',n') > SEG.M(A,B,Port.A,Port.B,CC,n),
+
+   for any non-negative integers n and n'.
+
+   We now consider four different choices for the common monotonic
+   space: RFC-1323 timestamps, TCP sequence numbers, the connection
+   count, and 64-bit TCP sequence numbers.  The results are summarized
+   in Table I.
+
+   5.1 Cached Timestamps
+
+      The PAWS mechanism [RFC-1323] uses TCP "timestamps" as
+      monotonically increasing integers in order to throw out old
+      duplicate segments within the same incarnation.  Jacobson
+      suggested the cacheing of these timestamps for bypassing 3-way
+      handshakes [Jacobson90], i.e., that TCP timestamps be used for our
+      common monotonic space M.  This idea is attractive since it would
+      allow the same timestamp options to be used for RTTM, PAWS, and
+      transactions.
+
+
+
+Braden                                                         [Page 16]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+      To obtain at-most-once service, the criterion for immediate
+      acceptance of a SYN must be that SEG.M is strictly greater than
+      the cached M value.  That is, to be useful for bypassing 3-way
+      handshakes, the timestamp clock must tick at least once between
+      any two successive transactions between the same pair of hosts
+      (even if different ports are used).  Hence, the timestamp clock
+      rate would determine TRmax, the maximum possible transaction rate.
+
+      Unfortunately, the timestamp clock frequency called for by RFC-
+      1323, in the range 1 sec to 1 ms, is much too slow for
+      transactions.  The TCP timestamp period was chosen to be
+      comparable to the fundamental interval for computing and
+      scheduling retransmission timeouts; this is generally in the range
+      of 1 sec. to 1 ms., and in many operating systems, much closer to
+      1 second.  Although it would be possible to increase the timestamp
+      clock frequency by several orders of magnitude, to do so would
+      make implementation more difficult, and on some systems
+      excessively expensive.
+
+      The wraparound time for TCP timestamps, at least 24 days, causes
+      no problem for transactions.
+
+      The PAWS mechanism uses TCP timestamps to protect against old
+      duplicate non-SYN segments from the same incarnation [RFC-1323].
+      It can also be used to protect against old duplicate data segments
+      from earlier incarnations (and therefore allow shortening of
+      TIME-WAIT state) if we can ensure that the timestamp clock ticks
+      at least once between the end of one incarnation and the beginning
+      of the next.  This can be achieved by setting U = 2 seconds, i.e.,
+      to twice the maximum timestamp clock period.  This value in
+      formula [2] leads to an upper bound TRmax = 32K Tps between a host
+      pair.  However, as pointed out above, old duplicate SYN detection
+      using timestamps leads to a smaller transaction rate bound, 1 Tps,
+      which is unacceptable.  In addition, the timestamp approach is
+      imperfect; it allows old ACK segments to enter the new connection
+      where they can cause a disconnect.  This happens because old
+      duplicate ACKs that arrive during TIME-WAIT state generate new
+      ACKs with the current timestamp [RFC-1337].
+
+      We therefore conclude that timestamps are not adequate as the
+      monotonic space M; see Table I.  However, they may still be useful
+      to effectively extend some other monotonic number space, just as
+      they are used in PAWS to extend the TCP sequence number space.
+      This is discussed below.
+
+
+
+
+
+
+
+Braden                                                         [Page 17]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+   5.2 Current TCP Sequence Numbers
+
+      It is useful to understand why the existing 32-bit TCP sequence
+      numbers do not form an appropriate monotonic space for
+      transactions.
+
+      The sequence number sent in an initial SYN is called the Initial
+      Sequence Number or ISN.  According to the TCP specification, an
+      ISN is to be selected using:
+
+      [3]      ISN = (R*T) mod 2**32
+
+      where T is the real time in seconds (from an arbitrary origin,
+      fixed when the system is started) and R is a constant, currently
+      250 KBps.  These ISN values form a monotonic time sequence that
+      wraps in 4.55 hours = 16380 seconds and has a granularity of 4
+      usecs.  For transaction rates up to roughly 250K Tps, the ISN
+      value calculated by formula [3] will be monotonic and could be
+      used for bypassing the 3-way handshake.
+
+      However, TCP sequence numbers (alone) could not be used to shorten
+      TIME-WAIT state, because there are several ways that overlap of
+      the sequence space of successive incarnations can occur (as
+      described in Appendix to [RFC-1185]).  One way is a "fast
+      connection", with a transfer rate greater than R; another is a
+      "long" connection, with a duration of approximately 4.55 hours.
+      TIME-WAIT delay is necessary to protect against these cases.  With
+      the official delay of 240 seconds, formula [1] implies a upper
+      bound (as RTT -> 0) of TRmax = 268 Tps; with our target MSL of
+      2000 sec, TRmax = 32 Tps.  These values are unacceptably low.
+
+      To improve this transaction rate, we could use TCP timestamps to
+      effectively extend the range of the TCP sequence numbers.
+      Timestamps would guard against sequence number wrap-around and
+      thereby allow us to increase R in [3] to exceed the maximum
+      possible transfer rate.  Then sequence numbers for successive
+      incarnations could not overlap.  Timestamps would also provide
+      safety with an MSL as large as 24 days.  We could then set U = 0
+      in the TIME-WAIT delay calculation [2].  For example, R = 10**9
+      Bps leads to TRmax <= 10**9 Tps. See 2(b) in Table I.  These
+      values would more than satisfy our objectives.
+
+      We should make clear how this proposal, sequence numbers plus
+      timestamps, differs from the timestamps alone discussed (and
+      rejected) in the previous section.  The difference lies in what is
+      cached and tested for TAO; the proposal here is to cache and test
+      BOTH the latest TCP sequence number and the latest TCP timestamp.
+      In effect, we are proposing to use timestamps to logically extend
+
+
+
+Braden                                                         [Page 18]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+      the sequence space to 64 bits.  Another alternative, presented in
+      the next section, is to directly expand the TCP sequence space to
+      64 bits.
+
+      Unfortunately, the proposed solution (TCP sequence numbers plus
+      timestamps) based on equation [3] would be difficult or impossible
+      to implement on many systems, which base their TCP implementation
+      upon a very low granularity software clock, typically O(1 sec).
+      To adapt the procedure to a system with a low granularity software
+      clock, suppose that we calculate the ISN as:
+
+      [4]      ISN = ( R*Ts*floor(T/Ts) + q*CC) mod 2**32
+
+      where Ts is the time per tick of the software clock, CC is the
+      connection count, and q is a constant.  That is, the ISN is
+      incremented by the constant R*Ts once every clock tick and by the
+      constant q for every new connection.  We need to choose q to
+      obtain the required monotonicity.
+
+      For monotonicity of the ISN's themselves, q=1 suffices.  However,
+      monotonicity during the entire connection requires q = R*Ts.  This
+      value of q can be deduced as follows.  Let S(T, CC, n) be the
+      sequence number for byte offset n in a connection with number CC
+      at time T:
+
+          S(T, CC, n) = (R*Ts*floor(T/Ts) + q*CC + n) mod 2**32.
+
+      For any T1 > T2, we require that: S(T2, CC+1, 0) - S(T1, CC, n) >
+      0 for all n.  Since R is assumed to be an upper bound on the
+      transfer rate, we can write down:
+
+          R > n/(T2 - T1),  or  T2/Ts - T1/Ts > n/(R*Ts)
+
+      Using the relationship:  floor(x)-floor(y) > x-y-1 and a little
+      algebra leads to the conclusion that using q = R*Ts creates the
+      required monotonic number sequence.  Therefore, we consider:
+
+      [5]      ISN = R*Ts*(floor(T/Ts) + CC) mod 2**32
+
+      (which is the algorithm used for ISN selection by BSD TCP).
+
+      For error-free operation, the sequence numbers generated by [5]
+      must not wrap the sign bit in less than MSL seconds.  Since CC
+      cannot increase faster than TRmax, the safe condition is:
+
+            R* (1 + Ts*TRmax) * MSL < 2**31.
+
+      We are interested in the case: Ts*TRmax >> 1, so this relationship
+
+
+
+Braden                                                         [Page 19]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+      reduces to:
+
+      [6]     R * Ts * TRmax * MSL < 2**31.
+
+      This shows a direct trade-off among the maximum effective
+      bandwidth R, the maximum transaction rate TRmax, and the maximum
+      segment lifetime MSL.  For reasonable limiting values of R, Ts,
+      and MSL, formula [6] leads to a very low value of TRmax.  For
+      example, with MSL= 2000 secs, R=10**9 Bps, and Ts = 0.5 sec, TRmax
+      < 2*10**-3 Tps.
+
+      To ease the situation, we could supplement sequence numbers with
+      timestamps.  This would allow an effective MSL of 2 seconds in
+      [6], since longer times would be protected by differing
+      timestamps.  Then TRmax < 2**30/(R*Ts).  The actual enforced MSL
+      would be increased to 24 days.  Unfortunately, TRmax would still
+      be too small, since we want to support transfer rates up to R ~
+      10**9 Bps.  Ts = 0.5 sec would imply TRmax ~ 2 Tps.  On many
+      systems, it appears infeasible to decrease Ts enough to obtain an
+      acceptable TRmax using this approach.
+
+   5.3 64-bit TCP Sequence Numbers
+
+      Another possibility would be to simply increase the TCP sequence
+      space to 64 bits as suggested in [RFC-1263].  We would also
+      increase the R value for clock-driven ISN selection, beyond the
+      fastest transfer rate of which the host is capable.  A reasonable
+      upper limit might be R = 10**9 Bps.  As noted above, in a
+      practical implementation we would use:
+
+            ISN = R*Ts*( floor(T/Ts) + CC) mod 2**64
+
+      leading to:
+
+            R*(1 +  Ts * TRmax) * MSL < 2**63
+
+      For example, suppose that R = 10**9 Bps, Ts = 0.5, and MSL = 16K
+      secs (4.4 hrs); then this result implies that TRmax < 10**6 Tps.
+      We see that adding 32 bits to the sequence space has provided
+      feasible values for transaction processing.
+
+   5.4 Connection Counts
+
+      The Connection Count CC is well suited to be the monotonic
+      sequence M, since it "ticks" exactly once for each new connection
+      incarnation and is constant within a single incarnation.  Thus, it
+      perfectly separates segments from different incarnations of the
+      same connection and would allow U = 0 in the TIME-WAIT state delay
+
+
+
+Braden                                                         [Page 20]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+      formula [2].  (Strictly, U cannot be reduced below 1/R = 4 usec,
+      as noted in Section 4.  However, this is of little practical
+      consequence until the ultimate limits on TRmax are approached).
+
+      Assume that CC is a 32-bit number.  To prevent wrap-around in the
+      sign bit of CC in less than MSL seconds requires that:
+
+           TRmax * MSL < 2**31
+
+      For example, if MSL =  2000 seconds then TRmax < 10**6 Tp.  These
+      are acceptable limits for transaction processing.  However, if
+      they are not, we could augment CC with TCP timestamps to obtain
+      very far-out limits, as discussed below.
+
+      It would be an implementation choice at the client whether CC is
+      global for all destinations or private to each destination host
+      (and maintained in the per-host cache).  In the latter case, the
+      last CC value assigned for each remote host could also be
+      maintained in the per-host cache.  Since there is not typically a
+      large amount of parallelism in the network connection of a host,
+      there should be little difference in the performance of these two
+      different approaches, and the single global CC value is certainly
+      simpler.
+
+      To augment CC with TCP timestamps, we would bypass a 3-way
+      handshake if both SEG.CC > cache.CC[A] and SEG.TSval >=
+      cache.TS[A].  The timestamp check would detect a SYN older than 2
+      seconds, so that the effective wrap-around requirement would be:
+
+           TRmax * 2 < 2**31
+
+      i.e., TRmax < 10**9 Tps.  The required MSL would be raised to 24
+      days.  Using timestamps in this way, we could reduce the size of
+      CC.  For example, suppose CC were 16 bits.  Then the wrap-around
+      condition TRmax * 2 < 2**15 implies that TRmax is 16K.
+
+      Finally, note that using CC to delete old duplicates from earlier
+      incarnations would not obviate the need for the time-stamp-based
+      PAWS mechanism to prevent errors within a single incarnation due
+      to wrapping the 32-bit TCP sequence space at very high transfer
+      rates.
+
+   5.5  Conclusions
+
+      The alternatives for monotonic sequence are summarized in Table I.
+      We see that there are two feasible choices for the monotonic
+      space: the connection count and 64-bit sequence numbers.  Of these
+      two, we believe that the simpler is the connection count.
+
+
+
+Braden                                                         [Page 21]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+      Implementation of 64-bit sequence numbers would require
+      negotiation of a new header format and expansion of all variables
+      and calculations on the sequence space.  CC can be carried in an
+      option and need be examined only once per packet.
+
+      We propose to use a simple 32-bit connection count CC, without
+      augmentation with timestamps, for the transaction extension.  This
+      choice has the advantages of simplicity and directness.  Its
+      drawback is that it adds a third sequence-like space (in addition
+      to the TCP sequence number and the TCP timestamp) to each TCP
+      header and to the main line of packet processing.  However, the
+      additional code is in fact very modest.
+
+   We now have a general outline of the proposed TCP extensions for
+   transactions.
+
+   o    A host maintains a 32-bit global connection counter variable CC.
+
+   o    The sender's current CC value is carried in an option in every
+        TCP segment.
+
+   o    CC values are cached per host, and the TAO mechanism is used to
+        bypass the 3-way handshake when possible.
+
+   o    In non-SYN segments, the CC value is used to reject duplicates
+        from earlier incarnations.  This allows TIME-WAIT state delay to
+        be reduced to K*RTO (i.e., U=0 in Eq. [2]).
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Braden                                                         [Page 22]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+                TABLE I: Summary of Monotonic Sequences
+
+      APPROACH              TRmax (Tps)    Required MSL      COMMENTS
+   __________________________________________________________________
+
+   1. Timestamp & PAWS        1              24 days         TRmax is
+                                                            too small
+   __________________________________________________________________
+
+   2. Current TCP Sequence Numbers
+
+     (a) clock-driven
+       ISN: eq. [3]           268           240 secs      TRmax & MSL
+                                                            too small
+
+     (b) Timestamps& clock-
+         driven ISN [3] &     10**9         24 days           Hard to
+         R=10**9                                            implement
+
+     (c) Timestamps & c-dr
+         ISN: eq. [4]        2**30/(R*Ts)   24 days         TRmax too
+                                                               small.
+   __________________________________________________________________
+
+   3. 64-bit TCP Sequence Numbers
+
+                          2**63/(MSL*R*Ts)      MSL        Significant
+                                                          TCP change
+                           e.g., R=10**9 Bps,
+                               MSL = 4.4 hrs,
+                               Ts = 0.5 sec=>
+                               TRmax = 10**6
+   __________________________________________________________________
+
+   4. Connection Counts
+
+     (a) no timestamps       2**31/MSL        MSL        3rd sequence
+                        e.g., MSL=2000 sec                      space
+                             TRmax = 10**6
+
+     (b) with timestamps     2**30           24 days     (ditto)
+                 and PAWS
+   __________________________________________________________________
+
+
+
+
+
+
+
+
+Braden                                                         [Page 23]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+6.  CONNECTION STATES
+
+   TCP has always allowed a connection to be half-closed.  TAO makes a
+   significant addition to TCP semantics by allowing a connection to be
+   half-synchronized, i.e., to be open for data transfer in one
+   direction before the other direction has been opened.  Thus, the
+   passive end of a connection (which receives an initial SYN) can
+   accept data and even a FIN bit before its own SYN has been
+   acknowledged.  This SYN, data, and FIN may arrive on a single segment
+   (as in Figure 4), or on multiple segments; packetization makes no
+   difference to the logic of the finite-state machine (FSM) defining
+   transitions among connection states.
+
+   Half-synchronized connections have several consequences.
+
+   (a)  The passive end must provide an implied initial data window in
+        order to accept data.  The minimum size of this implied window
+        is a parameter in the specification; we suggest 4K bytes.
+
+   (b)  New connection states and transitions are introduced into the
+        TCP FSM at both ends of the connection.  At the active end, new
+        states are required to piggy-back the FIN on the initial SYN
+        segment.  At the passive end, new states are required for a
+        half-synchronized connection.
+
+   This section develops the resulting FSM description of a TCP
+   connection as a conventional state/transition diagram.  To develop a
+   complete FSM, we take a constructive approach, as follows: (1) write
+   down all possible events; (2) write down the precedence rules that
+   govern the order in which events may occur; (3) construct the
+   resulting FSM; and (4) augment it to support TAO.  In principle, we
+   do this separately for the active and passive ends; however, the
+   symmetry of TCP results in the two FSMs being almost entirely
+   coincident.
+
+   Figure 8 lists all possible state transitions for a TCP connection in
+   the absence of TAO, as elementary events and corresponding actions.
+   Each transition is labeled with a letter.  Transitions a-g are used
+   by the active side, and c-i are used by the passive side.  Without
+   TAO, transition "c" (event "rcv ACK(SYN)") synchronizes the
+   connection, allowing data to be accepted for the user.
+
+   By definition, the first transition for an active (or passive) side
+   must be "a" (or "i", respectively).  During a single instance of a
+   connection, the active side will progress through some permutation of
+   the complete sequence of transitions {a b c d e f } or the sequence
+   {a b c d e f g}.  The set of possible permutations is determined by
+   precedence rules governing the order in which transitions can occur.
+
+
+
+Braden                                                         [Page 24]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+          Label              Event / Action
+          _____              ________________________
+            a                OPEN / snd SYN
+
+            b                rcv SYN [No TAO]/ snd ACK(SYN)
+
+            c                rcv ACK(SYN) /
+
+            d                CLOSE / snd FIN
+
+            e                rcv FIN / snd ACK(FIN)
+
+            f                rcv ACK(FIN) /
+
+            g                timeout=2MSL / delete TCB
+        ___________________________________________________
+            h                passive OPEN / create TCB
+
+            i                rcv SYN [No TAO]/ snd SYN, ACK(SYN)
+        ___________________________________________________
+
+           Figure 8.  Basic TCP Connection Transitions
+
+
+   Using the notation "<." to mean "must precede", the precedence rules
+   are:
+
+   (1)  Logical ordering: must open connection before closing it:
+
+        b <. e
+
+   (2)  Causality -- cannot receive ACK(x) before x has been sent:
+
+        a <. c and i <. c and d <. f
+
+   (3)  Acknowledgments are cumulative
+
+        c <. f
+
+   (4)  First packet in each direction must contain a SYN.
+
+        b <. c and b <. f
+
+   (5)  TIME-WAIT state
+
+        Whenever d precedes e in the sequence, g must be the last
+        transition.
+
+
+
+
+Braden                                                         [Page 25]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+   Applying these rules, we can enumerate all possible permutations of
+   the events and summarize them in a state transition diagram.  Figure
+   9 shows the result, with boxes representing the states and directed
+   arcs representing the transitions.
+
+          ________            ________
+         |        |    h     |        |
+         | CLOSED |--------->| LISTEN |
+         |________|          |________|
+              |                   |
+              | a                 | i
+          ____V____           ____V___                 ________
+         |        |    b     |        |      e        |        |
+         |        |--------->|        |-------------->|        |
+         |________|          |________|               |________|
+            /                    /   |                /       |
+           /                    /    | c           d /        | c
+          /                    /   __V_____          |    ____V___
+         /                    /   |        | e       |   |        |
+      d  |                d  /    |        |------------>|        |
+         |                   |    |________|         |   |________|
+         |                   |       |               |         |
+         |                   |       |            ___V____     |
+         |                   |       |           |        |    |
+         |                   |       |           |        |    |
+         |                   |       |           |________|    |
+         |                   |       |                   |     |
+     ____V___          ______V_      |     ________      |     |
+    |        |    b   |        | e   |    |        |     |     |
+    |        |------->|        |--------->|        |     |     |
+    |________|        |________|     |    |________|     |     |
+                              |      /          |        |     |
+                            c |     / d       c |      c |   d |
+                              |    /            |        |     |
+                             _V___V__       ____V___     V_____V_
+                            |        |  e  |        |   |        |
+                            |        |---->|        |   |        |
+                            |________|     |________|   |________|
+                                 |              |           |
+                                 | f            | f         | f
+                             ____V___       ____V___     ___V____
+                            |        |  e  | TIME-  | g |        |
+                            |        |---->|   WAIT |-->| CLOSED |
+                            |________|     |________|   |________|
+
+
+               Figure 9: Basic State Diagram
+
+
+
+
+Braden                                                         [Page 26]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+   Although Figure 9 gives a correct representation of the possible
+   event sequences, it is not quite correct for the actions, which do
+   not compose as shown.   In particular, once a control bit X has been
+   sent, it must continue to be sent until ACK(X) is received.  This
+   requires new transitions with modified actions, shown in the
+   following list.  We use the labeling convention that transitions with
+   the same event part all have the same letter, with different numbers
+   of primes to indicate different actions.
+
+          Label              Event / Action
+          _____              _______________________________________
+            b' (=i)          rcv SYN [No TAO] / snd SYN,ACK(SYN)
+            b''              rcv SYN [No TAO] / snd SYN,FIN,ACK(SYN)
+            d'               CLOSE / snd SYN,FIN
+            e'               rcv FIN / snd FIN,ACK(FIN)
+            e''              rcv FIN / snd SYN,FIN,ACK(FIN)
+
+
+   Figure 10 shows the state diagram of Figure 9, with the modified
+   transitions and with the states used by standard TCP [STD-007]
+   identified. Those states that do not occur in standard TCP are
+   numbered 1-5.
+
+   Standard TCP has another implied restriction: a FIN bit cannot be
+   recognized before the connection has been synchronized, i.e., c <. e.
+   This eliminates from standard TCP the states 1, 2, and 5 shown in
+   Figure 10.  States 3 and 4 are needed if a FIN is to be piggy-backed
+   on a SYN segment (note that the states shown in Figure 1 are actually
+   wrong; the states shown as SYN-SENT and ESTABLISHED are really states
+   3 and 4).  In the absence of piggybacking the FIN bit, Figure 10
+   reduces to the standard TCP state diagram [STD-007].
+
+   The FSM described in Figure 10 is intended to be applied
+   cumulatively; that is, parsing a single packet header may lead to
+   more than one transition.  For example, the standard TCP state
+   diagram includes a direct transition from SYN-SENT to ESTABLISHED:
+
+       rcv SYN,ACK(SYN) / snd ACK(SYN).
+
+   This is transition b followed immediately by c.
+
+
+
+
+
+
+
+
+
+
+
+Braden                                                         [Page 27]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+          ________            ________
+         |        |     h    |        |
+         | CLOSED |--------->| LISTEN |
+         |________|          |________|
+              |                   |
+              | a                 | i
+          ____V____           ____V___                 ________
+         | SYN-   |     b'   |  SYN-  |     e'        |        |
+         |   SENT |--------->|RECEIVED|-------------->|   1    |
+         |________|          |________|               |________|
+            /                    /   |                  |     |
+         d'/                  d'/    | c             d' |   c |
+          /                    /   __V_____             |    _V______
+         /                    /   |ESTAB-  | e          |   | CLOSE- |
+         |                   /    |  LISHED|------------|-->|   WAIT |
+         |                   |    |________|            |   |________|
+         |                   |       |                  |      |
+         |                   |       |             _____V__    |
+         |                   |       |            |        |   |
+         |                   |       |            |   2    |   |
+         |                   |       |            |________|   |
+         |                   |       |                   |     |
+     ____V___          ______V_      |     ________      |     |
+    |        |  b''   |        |e''' |    |        |     |     |
+    |    3   |------->|    4   |--------->|    5   |     |     |
+    |________|        |________|     |    |________|     |     |
+                              |      /          |        |     |
+                            c |     / d       c |      c |   d |
+                              |    /            |        |     |
+                             _V___V__       ____V___     V_____V_
+                            | FIN-   | e'' |        |   | LAST-  |
+                            |  WAIT-1|---->|CLOSING |   |   ACK  |
+                            |________|     |________|   |________|
+                                 |              |           |
+                                 | f            | f         | f
+                             ____V___       ____V___     ___V____
+                            | FIN-   |  e  | TIME-  | g |        |
+                            |  WAIT-2|---->|   WAIT |-->| CLOSED |
+                            |________|     |________|   |________|
+
+
+        Figure 10: Basic State Diagram -- Correct Actions
+
+
+   Next we introduce TAO.  If the TAO test succeeds, the connection
+   becomes half-synchronized.  This requires a new set of states,
+   mirroring the states of Figure 10, beginning with acceptance of a SYN
+   (transition "b" or "i"), and ending when ACK(SYN) arrives (transition
+
+
+
+Braden                                                         [Page 28]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+   "c").  Figure 11 shows the result of augmenting Figure 10 with the
+   additional states for TAO.  The transitions are defined in the
+   following table:
+
+           Key for Figure 11: Complete State Diagram with TAO
+
+
+                Label            Event / Action
+                _____            ________________________
+
+                  a              OPEN / create TCB, snd SYN
+                  b'             rcv SYN [no TAO]/ snd SYN,ACK(SYN)
+                  b''            rcv SYN [no TAO]/ snd SYN,FIN,ACK(SYN)
+                  c              rcv ACK(SYN) /
+                  d              CLOSE / snd FIN
+                  d'             CLOSE / snd SYN,FIN
+                  e              rcv FIN / snd ACK(FIN)
+                  e'             rcv FIN / snd SYN,ACK(FIN)
+                  e''            rcv FIN / snd FIN,ACK(FIN)
+                  e'''           rcv FIN / snd SYN,FIN,ACK(FIN)
+                  f              rcv ACK(FIN) /
+                  g              timeout=2MSL / delete TCB
+                  h              passive OPEN / create TCB
+                  i (= b')       rcv SYN [no TAO]/ snd SYN,ACK(SYN)
+                  j              rcv SYN [TAO OK] / snd SYN,ACK(SYN)
+                  k              rcv SYN [TAO OK] / snd SYN,FIN,ACK(SYN)
+
+
+
+   Each new state in Figure 11 bears a very simple relationship to a
+   standard TCP state.  We indicate this by naming the new state with
+   the standard state name followed by a star.  States SYN-SENT* and
+   SYN-RECEIVED* differ from the corresponding unstarred states in
+   recording the fact that a FIN has been sent.  The other new states
+   with starred names differ from the corresponding unstarred states in
+   being half-synchronized (hence, a SYN bit needs to be transmitted).
+
+   The state diagram of Figure 11 is more general than required for
+   transaction processing.  In particular, it handles simultaneous
+   connection synchronization from both sides, allowing one or both
+   sides to bypass the 3-way handshake.  It includes other transitions
+   that are unlikely in normal transaction processing, for example, the
+   server sending a FIN before it receives a FIN from the client
+   (ESTABLISHED* -> FIN-WAIT-1* in Figure 11).
+
+
+
+
+
+
+
+Braden                                                         [Page 29]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+   ________                  ________
+  |        |      h         |        |
+  | CLOSED |--------------->| LISTEN |
+  |________|                |________|
+       |                     /     |
+      a|                    / i    | j
+       |                   /       |
+       |                  /       _V______               ________
+       |           j      |      |ESTAB-  |       e'    | CLOSE- |
+       |        /---------|----->| LISHED*|------------>|   WAIT*|
+       |       /          |      |________|             |________|
+       |      /           |       |     |                 |    |
+       |     /            |       |d'   | c            d' |    | c
+   ____V___ /       ______V_      |    _V______           |   _V______
+  | SYN-   |   b'  |  SYN-  | c   |   |ESTAB-  |  e       |  | CLOSE- |
+  |   SENT |------>|RECEIVED|-----|-->|  LISHED|----------|->|   WAIT |
+  |________|       |________|     |   |________|          |  |________|
+       |               |          |     |                 |       |
+       |               |          |     |              ___V____   |
+       |               |          |     |             | LAST-  |  |
+       | d'            | d'       | d'  | d           |  ACK*  |  |
+       |               |          |     |             |________|  |
+       |               |          |     |                    |    |
+       |               |    ______V_    |        ________    |c   |d
+       |          k    |   |  FIN-  |   |  e''' |        |   |    |
+       |        /------|-->| WAIT-1*|---|------>|CLOSING*|   |    |
+       |       /       |   |________|   |       |________|   |    |
+       |      /        |          |     |            |       |    |
+       |     /         |          | c   |            | c     |    |
+   ____V___ /      ____V___       V_____V_       ____V___    V____V__
+  | SYN-   |  b'' |  SYN-  |  c  |  FIN-  | e'' |        |  | LAST-  |
+  |  SENT* |----->|RECEIVD*|---->| WAIT-1 |---->|CLOSING |  |   ACK  |
+  |________|      |________|     |________|     |________|  |________|
+                                     |               |           |
+                                     | f             | f         | f
+                                  ___V____       ____V___     ___V____
+                                 |  FIN-  | e   |TIME-   | g |        |
+                                 | WAIT-2 |---->|   WAIT |-->| CLOSED |
+                                 |________|     |________|   |________|
+
+       Figure 11: Complete State Diagram with TAO
+
+
+
+   The relationship between starred and unstarred states is very
+   regular.  As a result, the state extensions can be implemented very
+   simply using the standard TCP FSM with the addition of two "hidden"
+   boolean flags, as described in the functional specification memo
+
+
+
+Braden                                                         [Page 30]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+   [TTCP-FS].
+
+   As an example of the application of Figure 11, consider the minimal
+   transaction shown in Figure 12.
+
+
+       TCP A  (Client)                                 TCP B (Server)
+       _______________                                 ______________
+
+       CLOSED                                                  LISTEN
+
+   1.  SYN-SENT*    --> <SYN,data1,FIN,CC=x1> -->     CLOSE-WAIT*
+                                                      (TAO test OK=>
+                                                       data1->user_B)
+
+                                                             LAST-ACK*
+              <-- <SYN,ACK(FIN),data2,FIN,CC=y1,CC.ECHO=x1> <--
+   2.  TIME-WAIT
+    (TAO test OK,
+     data2->user_A)
+
+
+   3.  TIME-WAIT          --> <ACK(FIN),CC=x2> -->              CLOSED
+
+       (timeout)
+         CLOSED
+
+
+             Figure 12: Minimal Transaction Sequence
+
+   Sending segment #1 leaves the client end in SYN-SENT* state, which
+   differs from SYN-SENT state in recording the fact that a FIN has been
+   sent.  At the server end, passing the TAO test enters ESTABLISHED*
+   state, which passes the data to the user as in ESTABLISHED state and
+   also records the fact that the connection is half synchronized.  Then
+   the server processes the FIN bit of segment #1, moving to CLOSE-WAIT*
+   state.
+
+   Moving to CLOSE-WAIT* state should cause the server to send a segment
+   containing SYN and ACK(FIN).  However, transmission of this segment
+   is deferred so the server can piggyback the response data and FIN on
+   the same segment, unless a timeout occurs first.  When the server
+   does send segment #2 containing the response data2 and a FIN, the
+   connection advances from CLOSE-WAIT* to LAST-ACK* state; the
+   connection is still half-synchronized from B's viewpoint.
+
+   Processing segment #2 at the client again results in multiple
+   transitions:
+
+
+
+Braden                                                         [Page 31]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+       SYN-SENT* -> FIN-WAIT-1* -> CLOSING* -> CLOSING -> TIME-WAIT
+
+   These correspond respectively to receiving a SYN, a FIN, an ACK for
+   A's SYN, and an ACK for A's FIN.
+
+   Figure 13 shows a slightly more complex example, a transaction
+   sequence in which request and response data each require two
+   segments.  This figure assumes that both client and server TCP are
+   well-behaved, so that e.g., the client sends the single segment #5 to
+   acknowledge both data segments #3 and #4.  SEG.CC values are omitted
+   for clarity.
+
+
+        _T_C_P__A                                            _T_C_P__B
+
+
+    1.  SYN-SENT*      --> <SYN,data1>   -->         ESTABLISHED*
+                                                    (TAO OK,
+                                                     data1-> user)
+
+    2.  SYN-SENT*      --> <data2,FIN>   -->          CLOSE-WAIT*
+                                                    (data2-> user)
+
+    3.  FIN-WAIT-2     <-- <SYN,ACK(FIN),data3> <--   CLOSE-WAIT*
+         (data3->user)
+
+    4.  TIME_WAIT      <-- <ACK(FIN),data4,FIN> <--     LAST-ACK*
+         (data4->user)
+
+    5.  TIME-WAIT      --> <ACK(FIN)> -->                  CLOSED
+
+
+         Figure 13. Multi-Packet Request/Response Transaction
+
+
+7.  CONCLUSIONS AND ACKNOWLEDGMENTS
+
+   TCP was designed to be a highly symmetric protocol.  This symmetry is
+   evident in the piggy-backing of acknowledgments on data and in the
+   common header format for data segments and acknowledgments.  On the
+   other hand, the examples and discussion in this memo are in general
+   highly unsymmetrical; the actions of a "client" are clearly
+   distinguished from those of a "server".  To explain this apparent
+   discrepancy, we note the following.  Even when TCP is used for
+   virtual circuit service, the data transfer phase is symmetrical but
+   the open and close phases are not.  A minimal transaction, consisting
+   of one segment in each direction, compresses the open, data transfer,
+   and close phases together, and making the asymmetry of the open and
+
+
+
+Braden                                                         [Page 32]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+   close phases dominant.  As request and response messages increase in
+   size, the virtual circuit model becomes increasingly relevant, and
+   symmetry again dominates.
+
+   TCP's 3-way handshake precludes any performance gain from including
+   data on a SYN segment, while TCP's full-duplex data-conserving close
+   sequence ties up communication resources to the detriment of high-
+   speed transactions.  Merely loading more control bits onto TCP data
+   segments does not provide efficient transaction service.  To use TCP
+   as an effective transaction transport protocol requires bypassing the
+   3-way handshake and shortening the TIME-WAIT delay.  This memo has
+   proposed a backwards-compatible TCP extension to accomplish both
+   goals.  It is our hope that by building upon the current version of
+   TCP, we can give a boost to community acceptance of the new
+   facilities.  Furthermore, the resulting protocol implementations will
+   retain the algorithms that have been developed for flow and
+   congestion control in TCP [Jacobson88].
+
+   O'Malley and Peterson have recently recommended against backwards-
+   compatible extensions to TCP, and suggested instead a mechanism to
+   allow easy installation of alternative versions of a protocol [RFC-
+   1263].  While this is an interesting long-term approach, in the
+   shorter term we suggest that incremental extension of the current TCP
+   may be a more effective route.
+
+   Besides the backward-compatible extension proposed here, there are
+   two other possible approaches to making efficient transaction
+   processing widely available in the Internet: (1) a new version of TCP
+   or (2) a new protocol specifically adapted to transactions.  Since
+   current TCP "almost" supports transactions, we favor (1) over (2).  A
+   new version of TCP that retained the semantics of STD-007 but used 64
+   bit sequence numbers with the procedures and states described in
+   Sections 3, 4, and 6 of this memo would support transactions as well
+   as virtual circuits in a clean, coherent manner.
+
+   A potential application of transaction-mode TCP might be SMTP.  If
+   commands and responses are batched, in favorable cases complete SMTP
+   delivery operations on short messages could be performed with a
+   single minimal transaction; on the other hand, the body of a message
+   may be arbitrarily large.  Using a TCP extended as in this memo could
+   significantly reduce the load on large mail hosts.
+
+   This work began as an elaboration of the concept of TAO, due to Dave
+   Clark.  I am grateful to him and to Van Jacobson, John Wroclawski,
+   Dave Borman, and other members of the End-to-End Research group for
+   helpful ideas and critiques during the long development of this work.
+   I also thank Liming Wei, who tested the initial implementation in Sun
+   OS.
+
+
+
+Braden                                                         [Page 33]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+APPENDIX A -- TIME-WAIT STATE AND THE 2-PACKET EXCHANGE
+
+   This appendix considers the implications of reducing TIME-WAIT state
+   delay below that given in formula [2].
+
+   An immediate consequence of this would be the requirement for the
+   server host to accept an initial SYN for a connection in LAST-ACK
+   state.  Without the transaction extensions, the arrival of a new
+   <SYN> in LAST-ACK state looks to TCP like a half-open connection, and
+   TCP's rules are designed to restore correspondence by destroying the
+   state (through sending a RST segment) at one end or the other.  We
+   would need to thwart this action in the case of transactions.
+
+   There are two different possible ways to further reduce TIME-WAIT
+   delay.
+
+   (1)  Explicit Truncation of TIME-WAIT state
+
+        TIME-WAIT state could be explicitly truncated by accepting a new
+        sendto() request for a connection in TIME-WAIT state.
+
+        This would allow the ACK(FIN) segment to be delayed and sent
+        only if a timeout occurs before a new request arrives.  This
+        allows an ideal 2-segment exchange for closely-spaced
+        transactions, which would restore some symmetry to the
+        transaction exchange.  However, explicit truncation would
+        represent a significant change in many implementations.
+
+        It might be supposed that even greater symmetry would result if
+        the new request segment were a <SYN,ACK> that explicitly
+        acknowledges the previous reply, rather than a <SYN> that is
+        only an implicit acknowledgment.  However, the new request
+        segment might arrive at B to find the server side in either
+        LAST-ACK or CLOSED state, depending upon whether the ACK(FIN)
+        had arrived.  In CLOSED state, a <SYN,ACK> would not be
+        acceptable.  Hence, if the client sent an initial <SYN,ACK>
+        instead of a <SYN> segment, there would be a race condition at
+        the server.
+
+   (2)  No TIME-WAIT delay
+
+        TIME-WAIT delay could be removed entirely.  This would imply
+        that the ACK(FIN) would always be sent (which does not of course
+        guarantee that it will be received).  As a result, the arrival
+        of a new SYN in LAST-ACK state would be rare.
+
+        This choice is much simpler to implement.  Its drawback is that
+        the server will get a false failure report if the ACK(FIN) is
+
+
+
+Braden                                                         [Page 34]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+        lost.  This may not matter in practice, but it does represent a
+        significant change of TCP semantics.  It should be noted that
+        reliable delivery of the reply is not an issue.  The client
+        enter TIME-WAIT state only after the entire reply, including the
+        FIN bit, has been received successfully.
+
+   The server host B must be certain that a new request received in
+   LAST-ACK state is indeed a new SYN and not an old duplicate;
+   otherwise, B could falsely acknowledge a previous response that has
+   not in fact been delivered to A.  If the TAO comparison succeeds, the
+   SYN must be new; however, the server has a dilemma if the TAO test
+   fails.
+
+   In Figure A.1, for example, the reply segment from the first
+   transaction has been lost; since it has not been acknowledged, it is
+   still in B's retransmission queue.  An old duplicate request, segment
+   #3, arrives at B and its TAO test fails.  B is in the position of
+   having old state it cannot discard (the retransmission queue) and
+   needing to build new state to pursue a 3-way handshake to validate
+   the new SYN.  If the 3-way handshake failed, it would need to restore
+   the earlier LAST-ACK* state.  (Compare with Figure 15 "Old Duplicate
+   SYN Initiates a Reset on Two Passive Sockets" in STD-007).  This
+   would be complex and difficult to accomplish in many implementations.
+
+
+       TCP A  (Client)                               TCP B (Server)
+       _______________                               ______________
+
+         CLOSED                                          LISTEN
+
+
+   1.    SYN-SENT*       --> <SYN,data1,FIN> -->    CLOSE-WAIT*
+                                                     (TAO test OK;
+                                                      data1->server)
+
+   2.        (lost) X<-- <SYN,ACK(FIN),data2,FIN> <-- LAST-ACK*
+
+                   (old duplicate)
+   3.                     ... <SYN,data3,FIN> -->     LAST-ACK*
+                                                  (TAO test fail;
+                                                   3-way handshake?)
+
+                 Figure A.1: The Server's Dilemma
+
+
+   The only practical action A can taken when the TAO test fails on a
+   new SYN received in LAST-ACK state is to ignore the SYN, assuming it
+   is really an old duplicate.  We must pursue the possible consequences
+
+
+
+Braden                                                         [Page 35]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+   of this action.
+
+   Section 3.1 listed four possible reasons for failure of the TAO test
+   on a legitimate SYN segment: (1) no cached state, (2) out-of-order
+   delivery of SYNs, (3) wraparound of CCgen relative to the cached
+   value, or (4) the M values advance too slowly.   We are assuming that
+   there is a cached CC value at B (otherwise, the SYN cannot be
+   acceptable in LAST-ACK state).  Wrapping the CC space is very
+   unlikely and probably impossible; it is difficult to imagine
+   circumstances which would allow the new SYN to be delivered but not
+   the ACK(FIN), especially given the long wraparound time of CCgen.
+
+   This leaves the problem of out-of-order delivery of two nearly-
+   concurrent SYNs for different ports.  The second to be delivered may
+   have a lower CC option and thus be locked out.  This can be solved by
+   using a new CCgen value for every retransmission of an initial SYN.
+
+   Truncation of TIME-WAIT state and acceptance of a SYN in LAST-ACK
+   state should take place only if there is a cached CC value for the
+   remote host.  Otherwise, a SYN arriving in LAST-ACK state is to be
+   processed by normal TCP rules, which will result in a RST segment
+   from either A or B.
+
+   This discussion leads to a paradigm for rejecting old duplicate
+   segments that is different from TAO.  This alternative scheme is
+   based upon the following:
+
+   (a)  Each retransmission of an initial SYN will have a new value of
+        CC, as described above.
+
+        This provision takes care of reordered SYNs.
+
+   (b)  A host maintains a distinct CCgen value for each remote host.
+        This value could easily be maintained in the same cache used for
+        the received CC values, e.g., as cache.CCgen[].
+
+        Once the caches are primed, it should always be true that
+        cache.CCgen[B] on host A is equal to cache.CC[A] on host B, and
+        the next transaction from A will carry a CC value exactly 1
+        greater.  Thus, there is no problem of wraparound of the CC
+        value.
+
+   (c)  A new SYN is acceptable if its SEG.CC > cache.CC[client],
+        otherwise the SYN is ignored as an old duplicate.
+
+   This alternative paradigm was not adopted because it would be a
+   somewhat greater perturbation of TCP rules, because it may not have
+   the robustness of TAO, and because all of its consequences may not be
+
+
+
+Braden                                                         [Page 36]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+   understood.
+
+
+REFERENCES
+
+    [Birrell84]  Birrell, A. and B. Nelson, "Implementing Remote
+      Procedure Calls", ACM TOCS, Vo. 2, No. 1, February 1984.
+
+    [Clark88]  Clark, D., "The Design Philosophy of the Internet
+      Protocols", ACM SIGCOMM '88, Stanford, CA, August 1988.
+
+    [Clark89]  Clark, D., Private communication, 1989.
+
+    [Garlick77]  Garlick, L., R. Rom, and J. Postel, "Issues in Reliable
+      Host-to-Host Protocols", Proc. Second Berkeley Workshop on
+      Distributed Data Management and Computer Networks, May 1977.
+
+    [HR-COMM]  Braden, R., Ed., "Requirements for Internet Hosts --
+      Communication Layers", STD-003, RFC-1122, October 1989.
+
+    [Jacobson88] Jacobson, V., "Congestion Avoidance and Control",
+      SIGCOMM '88, Stanford, CA., August 1988.
+
+    [Jacobson90] Jacobson, V., private communication, 1990.
+
+    [Liskov90]  Liskov, B., Shrira, L., and J. Wroclawski, "Efficient
+      At-Most-Once Messages Based on Synchronized Clocks", ACM SIGCOMM
+      '90, Philadelphia, PA, September 1990.
+
+    [RFC-955]  Braden, R., "Towards a Transport Service Transaction
+      Protocol", RFC-955, September 1985.
+
+    [RFC-1185]  Jacobson, V., Braden, R., and Zhang, L., "TCP Extension
+      for High-Speed Paths", RFC-1185, October 1990.
+
+    [RFC-1263]  O'Malley, S. and L. Peterson, "TCP Extensions Considered
+      Harmful", RFC-1263, University of Arizona, October 1991.
+
+    [RFC-1323]  Jacobson, V., Braden, R., and Borman, D., "TCP
+      Extensions for High Performance, RFC-1323, February 1991.
+
+    [RFC-1337]  Braden, R., "TIME-WAIT Assassination Hazards in TCP",
+      RFC-1337, May 1992.
+
+    [STD-007]  Postel, J., "Transmission Control Protocol - DARPA
+      Internet Program Protocol Specification", STD-007, RFC-793,
+      September 1981.
+
+
+
+
+Braden                                                         [Page 37]
+
+RFC 1379              Transaction TCP -- Concepts          November 1992
+
+
+    [TTCP-FS]  Braden, R., "Transaction TCP -- Functional
+      Specification", Work in Progress, September 1992.
+
+    [Watson81]  Watson, R., "Timer-based Mechanisms in Reliable
+      Transport Protocol Connection Management", Computer Networks, Vol.
+      5, 1981.
+
+Security Considerations
+
+   Security issues are not discussed in this memo.
+
+Author's Address
+
+   Bob Braden
+   University of Southern California
+   Information Sciences Institute
+   4676 Admiralty Way
+   Marina del Rey, CA 90292
+
+   Phone: (310) 822-1511
+   EMail: Braden@ISI.EDU
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Braden                                                         [Page 38]
+
\ No newline at end of file
diff --git a/ext/picotcp/RFC/rfc1470.txt b/ext/picotcp/RFC/rfc1470.txt
new file mode 100644
index 0000000..5ccb856
--- /dev/null
+++ b/ext/picotcp/RFC/rfc1470.txt
@@ -0,0 +1,10755 @@
+
+
+
+
+
+
+Network Working Group                                           R. Enger
+Request for Comments: 1470                                           ANS
+FYI: 2                                                       J. Reynolds
+Obsoletes: 1147                                                      ISI
+                                                                 Editors
+                                                               June 1993
+
+
+               FYI on a Network Management Tool Catalog:
+          Tools for Monitoring and Debugging TCP/IP Internets
+                       and Interconnected Devices
+
+Status of this Memo
+
+   This memo provides information for the Internet community.  It does
+   not specify an Internet standard.  Distribution of this memo is
+   unlimited.
+
+Abstract
+
+   The goal of this FYI memo is to provide an update to FYI 2, RFC 1147
+   [1], which provided practical information to site administrators and
+   network managers.  New and/or updated tools are listed in this RFC.
+   Additonal descriptions are welcome, and should be sent to: noctools-
+   entries@merit.edu.
+
+Introduction
+
+   A static document cannot incorporate references to the latest tools
+   nor recent revisions to the older catalog entries.  To provide a more
+   timely and responsive information source, the NOCtools catalog is
+   available on-line via the Internet and Usenet.
+
+      news    comp.networks.noctools
+      ftp     wuarchive.wustl.edu:/doc/noctools
+
+   Because of publication delays and other factors, some of the entries
+   in this catalog may be out of date.  The reader is urged to consult
+   the on-line service to obtain the most up-to-date information.
+
+   The index provided in this document reflects the current contents of
+   the on-line documentation.
+
+   The NOCtools2 Working Group of the Internet Engineering Task Force
+   (IETF) has compiled this revised catalog.  Future revisions will be
+   incorporated into the on-line NOCtools catalog.  The reader is
+   encouraged to submit new or revised entries for (near-immediate)
+   electronic publication.
+
+
+
+NOCTools2 Working Group                                         [Page 1]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   The tools described in this catalog are in no way endorsed by the
+   IETF.  For the most part, we have neither evaluated the tools in this
+   catalog, nor validated their descriptions.  Most of the descriptions
+   of commercial tools have been provided by vendors.  Caveat Emptor.
+
+Acknowledgements
+
+   This catalog is the result of work on the part of the NOCTools2
+   Working Group of the User Services Area of the IETF.  The following
+   individuals made especially notable contributions: Chris Myers,
+   Darren Kinley, Gary Malkin, Mohamed Ellozy, and Mike Patton.
+
+Current Postings
+
+   The current contents of the NOCtools catalog may be retrieved via
+   anonymous FTP from wuarchive.wustl.edu.  The entries are stored as
+   individual files in the directory /doc/noctools.
+
+"No-Writeups" Appendix
+
+   This section contains references to tools which are known to exist,
+   but which have not been fully cataloged.  If anyone wishes to author
+   an entry for one of these tools please contact us at:
+
+        noctools-request@merit.edu
+
+   Keep in mind that if these or other tools are included in the future,
+   they will be available in the on-line version of the catalog.
+
+   Each mention is separated by a <form-feed> for improved readability.
+   If you intend to actually print-out this section of the catalog, then
+   you should probably strip-out the <ff>.
+
+How to Submit/Update an Entry
+
+      1) review the template included below to determine what
+         information you will need to collect,
+      2) review the keywords to see what your indexing options are,
+      3) assemble (update) catalog entry to include results of
+         1) and 2).
+      4) Submit your entry using either of the following two methods:
+
+         a) Post your submission to: comp.internet.noctools.submissions
+         b) Email your submission to: noctools-entries@merit.edu
+
+   New entries will be circulated automatically upon reception.  As time
+   permits, the NOCtools editors will review recent submissions and
+   incorporate them into the master indexes.  Enquiries regarding the
+
+
+
+NOCTools2 Working Group                                         [Page 2]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   status of a submission should be E-Mailed to:
+
+                        noctools-request@merit.edu
+
+   Those submitting an entry to the catalog should insure that any E-
+   mail addresses provided are correct and functional.  Either the
+   catalog editors or prospective users of your tool may wish to reach
+   you.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                         [Page 3]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+TEMPLATE
+
+   NAME
+           <tool-name>
+
+
+   KEYWORDS
+           [<keyword-A1>[,<keyword-A2>[,...,<keyword-An>]]];
+           [<keyword-B1>[,<keyword-B2>[,...,<keyword-Bn>]]];
+           [<keyword-C1>[,<keyword-C2>[,...,<keyword-Cn>]]];
+           [<keyword-D1>[,<keyword-D2>[,...,<keyword-Dn>]]];
+           [<keyword-E1>[,<keyword-E2>[,...,<keyword-En>]]].
+
+
+   ABSTRACT
+           <summary of the tool>
+           <summary of the tool>
+           <summary of the tool>
+
+
+   MECHANISM
+           <high level technical details of how it works>
+           <high level technical details of how it works>
+           <high level technical details of how it works>
+
+
+   CAVEATS
+           <any warnings or cautions>
+           <any warnings or cautions>
+           <any warnings or cautions>
+
+
+   BUGS
+           <any warnings or cautions>
+           <any warnings or cautions>
+           <any warnings or cautions>
+
+
+   LIMITATIONS
+           <any warnings or cautions>
+           <any warnings or cautions>
+           <any warnings or cautions>
+
+
+   HARDWARE REQUIRED
+           <list any hardware requirements>
+           <list any hardware requirements>
+           <list any hardware requirements>
+
+
+
+NOCTools2 Working Group                                         [Page 4]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   SOFTWARE REQUIRED
+           <list any software requirements>
+           <list any software requirements>
+           <list any software requirements>
+
+
+   AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+           <How to acquire the tool.>
+           <Location/Contact Info to access/obtain tool>
+
+
+   CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+           <Contact info for person responsible for catalog entry>
+
+
+   DATE OF MOST RECENT UPDATE TO THIS CATALOG ENTRY
+           <YYMMDD>
+
+Keywords
+
+   This catalog uses "keywords" for terse characterizations of the
+   tools.  Keywords are abbreviated attributes of a tool or its use.  To
+   allow cross-comparison of tools, uniform keyword definitions have
+   been developed, and are given below.  Following the definitions,
+   there is an index of catalog entries by keyword.
+
+Keyword Definitions
+
+   The keywords are always listed in a prefined order, sorted first by
+   the general category into which they fall, and then alphabetically.
+   The categories that have been defined for management tool keywords
+   are:
+
+               o    the general management area to which a tool
+                    relates or a tool's functional role;
+
+               o    the network resources or components that are
+                    managed;
+
+               o    the mechanisms or methods a tool uses to
+                    perform its functions;
+
+               o    the operating system and hardware environment
+                    of a tool; and
+
+               o    the characteristics of a tool as a hardware
+                    product or software release.
+
+
+
+
+NOCTools2 Working Group                                         [Page 5]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   The keywords used to describe the general management area or
+   functional role of a tool are:
+
+          Alarm
+               a reporting/logging tool that can trigger  on  specific
+               events within a network.
+
+          Analyzer
+               a traffic monitor that reconstructs and interprets pro-
+               tocol messages that span several packets.
+
+          Benchmark
+               a tool used to evaluate the performance of network com-
+               ponents.
+
+          Control
+               a tool that can change the state or status of a  remote
+               network resource.
+
+          Debugger
+               a tool that by generating arbitrary packets  and  moni-
+               toring traffic, can drive a remote network component to
+               various states and record its responses.
+
+          Generator
+               a traffic generation tool.
+
+          Manager
+               a distributed network management system or system  com-
+               ponent.
+
+          Map
+               a tool that can discover and report a system's topology
+               or configuration.
+
+          Reference
+               a tool for documenting MIB structure or  system  confi-
+               guration.
+
+          Routing
+               a packet route discovery tool.
+
+          Security
+               a tool for analyzing or reducing threats to security.
+
+          Status
+               a tool that remotely tracks the status of network  com-
+               ponents.
+
+
+
+NOCTools2 Working Group                                         [Page 6]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Traffic
+               a tool that monitors packet flow.
+
+   The keywords used to identify the network resources or components
+   that a tool manages are:
+
+          Bridge
+               a tool for controlling or monitoring LAN bridges.
+
+          CHAOS
+               a tool for controlling or monitoring implementations of
+               the CHAOS protocol suite or network components that use
+               it.
+
+          DECnet
+               a tool for controlling or monitoring implementations of
+               the  DECnet  protocol  suite or network components that
+               use it.
+
+          DNS
+               a Domain Name System debugging tool.
+
+          Ethernet
+               a tool for controlling or monitoring network components
+               on ethernet LANs.
+
+          FDDI
+               a tool for controlling or monitoring network components
+               on FDDI LANs or WANs.
+
+          IP
+               a tool for controlling or monitoring implementations of
+               the  TCP/IP  protocol  suite or network components that
+               use it.
+
+          OSI
+               a tool for controlling or monitoring implementations of
+               the  OSI  protocol suite or network components that use
+               it.
+
+          NFS
+               a Network File System debugging tool.
+
+          Ring
+               a tool for controlling or monitoring network components
+               on Token Ring LANs.
+
+
+
+
+
+NOCTools2 Working Group                                         [Page 7]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          SMTP
+               an SMTP debugging tool.
+
+          Star
+               a tool for controlling or monitoring network components
+               on StarLANs.
+
+   The keywords used to describe a tool's mechanism are:
+
+          CMIS
+               a network management system or component based on
+               CMIS/CMIP, the Common Management Information System and
+               Protocol.
+
+          Curses
+               a tool that uses the "curses" tty interface package.
+
+          Eavesdrop
+               a tool  that  silently  monitors  communications  media
+               (e.g., by putting an ethernet interface into "promiscu-
+               ous" mode).
+
+          NMS
+               the tool is a component of or queries a Network Manage-
+               ment System.
+
+          Ping
+               a tool that sends packet probes such as ICMP echo  mes-
+               sages;  to  help  distinguish tools, we do not consider
+               NMS queries or protocol spoofing (see below) as probes.
+
+          Proprietary
+               a distributed tool that uses proprietary communications
+               techniques to link its components.
+
+          RMON
+               a tool which employs the RMON extensions to SNMP.
+
+          SNMP
+               a network management system or component based on SNMP,
+               the Simple Network Management Protocol.
+
+          Spoof
+               a tool that tests operation of remote protocol  modules
+               by peer-level message exchange.
+
+          X
+               a tool that uses X-Windows.
+
+
+
+NOCTools2 Working Group                                         [Page 8]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   The keywords used to describe a tool's operating environment are:
+
+          DOS
+               a tool that runs under MS-DOS.
+
+          HP
+               a tool that runs on Hewlett-Packard systems.
+
+          Macintosh
+               a tool that runs on Macintosh personal computers.
+
+          OS/2
+               a tool that runs under the OS/2 operating system.
+
+          Standalone
+               an integrated hardware/software tool that requires only
+               a network interface for operation.
+          Sun
+               a tool that runs on Sun Microsystems platforms.
+               (binary distribution built for use on a Sun.)
+
+          UNIX
+               a tool that runs under 4.xBSD UNIX or related OS.
+
+          VMS
+               a tool that runs under DEC's VMS operating system.
+
+   The keywords used to describe a tool's characteristics as a hardware
+   or software acquisition are:
+
+          Free
+               a tool is available at no charge, though other restric-
+               tions may apply (tools that are part of an OS distribu-
+               tion but not otherwise  available  are  not  listed  as
+               "free").
+
+          Library
+               a tool packaged with either an Application  Programming
+               Interface (API) or object-level subroutines that may be
+               loaded with programs.
+
+          Sourcelib
+               a collection of source code  (subroutines)  upon  which
+               developers may construct other tools.
+
+
+
+
+
+
+
+NOCTools2 Working Group                                         [Page 9]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+Tools Indexed by Keywords
+
+   Following is an index of the most up-to-date catalog entries sorted
+   by keyword, which is available via:
+
+      news    comp.networks.noctools.tools
+      ftp     wuarchive.wustl.edu:/doc/noctool
+
+   This index can be used to locate the tools with a particular
+   attribute: tools are listed under each keyword that characterizes
+   them.  The keywords and the subordinate lists of tools under them are
+   in alphabetical order.
+
+   Alarm
+   -----
+   CMIP Library
+   Dual Manager
+   Eagle
+   EMANATE
+   EtherMeter
+   LanProbe
+   LANWatch
+   MONET
+   NetMetrix Load Monitor
+   NetMetrix Protocol Analyzer
+   NETMON for Windows
+   NETscout
+   NOCOL
+   SNMP Libraries and Utilities from Empire Technologies
+   SNMP Libraries and Utilities from SNMP Research
+   snmpd from Empire Technologies
+   SpiderMonitor
+   XNETMON from SNMP Research
+   xnetmon from Wellfleet
+
+   Analyzer
+   --------
+   LANVista
+   LANWatch
+   NetMetrix Protocol Analyzer
+   NETscout
+   PacketView
+   Sniffer
+   SpiderMonitor
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 10]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   Benchmark
+   ---------
+   hammer & anvil
+   iozone
+   LADDIS
+   LANVista
+   nhfsstone
+   SPIMS
+   spray
+   ttcp
+   XNETMON from SNMP Research
+
+   CMIS
+   ----
+   CMIP library
+   Generic Managed System
+   MIB Browser
+
+   Control
+   -------
+   CMIP Library
+   Dual Manager
+   Eagle
+   MIB Manager from Empire Technologies
+   MONET
+   NETMON for Windows
+   proxyd
+   SNMP Libraries and Utilities from Empire Technologies
+   SNMP Libraries and Utilities from SNMP Research
+   SNMP Packaged Agent System
+   snmpd from Empire Technologies
+   TokenVIEW
+   XNETMON from SNMP Research
+
+   Debugger
+   --------
+   Ethernet Box II
+   LANVista
+   NetMetrix Traffic Generator
+   ping from UCB
+   SPIMS
+   XNETMON from SNMP Research
+
+   Generator
+   ---------
+   hammer & anvil
+   LADDIS
+   LANVista
+
+
+
+NOCTools2 Working Group                                        [Page 11]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   NetMetrix Traffic Generator
+   nhfsstone
+   ping
+   ping from UCB
+   Sniffer
+   SpiderMonitor
+   spray
+   TTCP
+
+   Manager
+   -------
+   Beholder
+   CMIP Library
+   CMU SNMP Distribution
+   decaddrs by Wellfleet
+   Dual Manager
+   EMANATE
+   Ethernet Box II
+   getone by Wellfleet
+   Interactive Network Map
+   LanProbe
+   LANVista
+   MIB Manager from Empire Technologies
+   MONET
+   NetLabs CMOT Agent
+   NetLabs SNMP Agent
+   NETMON for Windows
+   NETscout
+   NNStat
+   NOCOL
+   OverVIEW
+   SAS/CPE for Open Systems Software
+   SNMP Development Kit
+   SNMP Libraries and Utilities from Empire Technologies
+   SNMP Libraries and Utilities from SNMP Research
+   SNMP Packaged Agent System
+   snmpd from Empire Technologies
+   tokenview
+   Tricklet
+   Wollongong-Manager
+   XNETMON from SNMP Research
+   XNETMON from Wellfleet
+   xnetperfmon
+
+   Map
+   ---
+   decaddrs by Wellfleet
+   Dual Manager
+
+
+
+NOCTools2 Working Group                                        [Page 12]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   etherhostprobe
+   EtherMeter
+   Interactive Network Map
+   LanProbe
+   NETMON for Windows
+   Network Integrator I
+   NPRV
+   SNMP Libraries and Utilities from SNMP Research
+   XNETMON by SNMP Research
+   XNETMON by Wellfleet
+
+   Reference
+   ---------
+   EMANATE
+   ethernet-codes
+   HyperMIB
+   MIB Manager from Empire Technologies
+   XNETMON
+
+   Routing
+   -------
+   arp
+   decaddrs by Wellfleet
+   etherhostprobe
+   getone by Wellfleet
+   hopcheck
+   MONET
+   net_monitor
+   NETMON for Windows
+   netstat
+   NPRV
+   ping from UCB
+   query
+   traceroute
+
+   Security
+   --------
+   Computer Security Checklist
+   Dual Manager
+   Eagle
+   EMANATE
+   LAN Patrol
+   SNMP Libraries and Utilities from SNMP Research
+   XNETMON by SNMP Research
+   xnetperfmon
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 13]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   Status
+   ------
+   Beholder
+   CMIP Library
+   CMU SNMP
+   DiG
+   dnsstats
+   doc
+   Dual Manager
+   EMANATE
+   fping
+   getone by Wellfleet
+   host
+   Internet Rover
+   lamers
+   LanProbe
+   mconnect
+   MONET
+   net_monitor
+   Netlabs CMOT Agent
+   Netlabs SNMP Agent
+   NETscout
+   NNStat
+   NOCOL
+   NPRV
+   OverVIEW
+   ping
+   ping from UCB
+   proxyd from SNMP Research
+   SAS/CPE
+   SNMP Development Kit
+   SNMP Libraries and Utilities from Empire Technologies
+   SNMP Libraries and Utilities from SNMP Research
+   SNMP Packaged Agent System
+   PSI SNMP
+   snmpd from Empire Technologies
+   snmpd from SNMP Research
+   TokenVIEW
+   Tricklet
+   vrfy
+   XNETMON by SNMP Research
+   xnetmon by Wellfleet
+   xnetperfmon
+   xup
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 14]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   Traffic
+   -------
+   etherfind
+   EtherMeter
+   Ethernet Box II
+   EtherView
+   getethers
+   LAN Patrol
+   LanProbe
+   LANVista
+   LANWatch
+   ENTM
+   MONET
+   NetMetrix Load Monitor
+   NetMetrix NFS Monitor
+   NetMetrix Protocol Analyzer
+   NetMetrix Traffic Generator
+   NETMON by Mitre
+   NETscout
+   netwatch
+   Network Integrator I
+   nfswatch
+   nhfsstone
+   NNStat
+   ositrace
+   PacketView
+   Sniffer
+   SpiderMonitor
+   spray
+   tcpdump
+   tcplogger
+   trpt
+   ttcp
+   XNETMON by SNMP Research
+
+   Bridge
+   ------
+   decaddrs by Wellfleet
+   EMANATE
+   MIB Manager from Empire Technologies
+   MONET
+   proxyd by SNMP Research
+   SAS/CPE
+   SNMP Libraries and Utilities from SNMP Research
+   SNMP Packaged Agent System
+   snmpd from SNMP Research
+   XNETMON from SNMP Research
+
+
+
+
+NOCTools2 Working Group                                        [Page 15]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   CHAOS
+   -----
+   Interactive Network Map
+   LANWatch
+
+   DECnet
+   ------
+   decaddrs by Wellfleet
+   LANVista
+   LANWatch
+   MONET
+   net_monitor
+   NetMetrix Protocol Analyzer
+   NETMON for Windows
+   NETscout
+   Sniffer
+   SNMP Libraries and Utilities from SNMP Research
+   SpiderMonitor
+   XNETMON from SNMP Research
+   xnetperfmon from SNMP Research
+
+   DNS
+   ---
+   DiG
+   dnsstats
+   doc
+   lamers
+   LANWatch
+   NetMetrix Protocol Analyzer
+   NOCOL
+
+   Ethernet
+   --------
+   arp
+   Beholder
+   Eagle
+   EMANATE
+   etherfind
+   etherhostprobe
+   EtherMeter
+   Ethernet Box II
+   ethernet-codes
+   EtherView
+   getethers
+   LAN Patrol
+   LanProbe
+   LANVista
+   LANWatch
+
+
+
+NOCTools2 Working Group                                        [Page 16]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   ENTM
+   Interactive Network Map
+   MONET
+   NetMetrix Load Monitor
+   NetMetrix NFS Monitor
+   NetMetrix Protocol Analyzer
+   NetMetrix Traffic Generator
+   NETMON for Windows
+   NETscout
+   netwatch
+   Network Integrator I
+   nfswatch
+   NNStat
+   PacketView
+   proxyd from SNMP Research
+   SAS/CPE
+   Sniffer
+   SNMP Libraries and Utilities from SNMP Research
+   SNMP Packaged Agent System from SNMP Research
+   snmpd from SNMP Research
+   SpiderMonitor
+   tcpdump
+   XNETMON from SNMP Research
+   xnetperfmon from SNMP Research
+
+   FDDI
+   ----
+   EMANATE
+   ethernet-codes
+   NetMetrix Load Monitor
+   NetMetrix NFS Monitor
+   NetMetrix Protocol Analyzer
+   NetMetrix Traffic Generator
+   nfswatch
+   SAS/CPE
+   SNMP Libraries and utilities from SNMP Research
+   SNMP Packaged Agent System from SNMP Research
+   snmpd from SNMP Research
+   XNETMON from SNMP Research
+
+   IP
+   --
+   arp
+   CMU SNMP
+   Dual Manager
+   Eagle
+   EMANATE
+   etherfind
+
+
+
+NOCTools2 Working Group                                        [Page 17]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   etherhostprobe
+   EtherView
+   fping
+   getone from Wellfleet
+   hammer & anvil
+   hopcheck
+   Internet Rover
+   LanProbe
+   LANVista
+   LANWatch
+   ENTM
+   Interactive Network Map
+   MIB Manager from Empire Technologies
+   MONET
+   net_monitor
+   Netlabs CMOT Agent
+   Netlabs SNMP Agent
+   NetMetrix Load Monitor
+   NetMetrix Protocol Analyzer
+   NetMetrix Traffic Generator
+   NETMON by Mitre
+   NETMON for Windows
+   NETscout
+   netstat
+   netwatch
+   nfswatch
+   nhfsstone
+   NNStat
+   NOCOL
+   NPRV
+   OverVIEW
+   PacketView
+   ping
+   ping from UCB
+   proxyd from SNMP Research
+   query
+   SAS/CPE
+   SNMP Development Kit
+   SNMP Libraries and Utilities from SNMP Research
+   SNMP Packaged Agent System from SNMP Research
+   PSI SNMP
+   snmpd from Empire Technologies
+   snmpd from SNMP Research
+   PSI SNMP
+   SpiderMonitor
+   SPIMS
+   spray
+   tcpdump
+
+
+
+NOCTools2 Working Group                                        [Page 18]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   tcplogger
+   traceroute
+   trpt
+   ttcp
+   XNETMON from SNMP Research
+   xnetmon from Wellfleet
+   xnetperfmon from SNMP Research
+
+   OSI
+   ---
+   CMIP Library
+   Dual Manager
+   EMANATE
+   LANVista
+   LANWatch
+   Netlabs CMOT Agent
+   NetMetrix Protocol Analyzer
+   NETMON for Windows
+   NETscout
+   NOCOL
+   ositrace
+   proxyd from SNMP Research
+   SAS/CPE
+   Sniffer
+   SNMP Libraries and Utilities from SNMP Research
+   SNMP Packaged Agent System from SNMP Research
+   snmpd from SNMP Research
+   SpiderMonitor
+   SPIMS
+   XNETMON from SNMP Research
+   xnetperfmon from SNMP Research
+
+   NFS
+   ---
+   etherfind
+   EtherView
+   iozone
+   LADDIS
+   NetMetrix NFS Monitor
+   NetMetrix Protocol Analyzer
+   NETscout
+   nfswatch
+   nhfsstone
+   Sniffer
+   tcpdump
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 19]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   Ring
+   ----
+   Eagle
+   EMANATE
+   Interactive Network Map
+   LANVista
+   LANWatch
+   NetMetrix Load Monitor
+   NetMetrix NFS Monitor
+   NetMetrix Protocol Analyzer
+   NetMetrix Traffic Generator
+   NETMON by Mitre
+   NETMON for Windows
+   NETscout
+   netwatch
+   PacketView
+   proxyd from SNMP Research
+   Sniffer
+   SNMP Libraries and Utilities from SNMP Research
+   SNMP Packaged Agent System from SNMP Research
+   snmpd from SNMP Research
+   TokenVIEW
+   XNETMON from SNMP Research
+   xnetperfmon from SNMP Research
+
+   SMTP
+   ----
+   host
+   Internet Rover
+   LANWatch
+   mconnect
+   NetMetrix Protocol Analyzer
+   Sniffer
+   vrfy
+
+   Star
+   ----
+   EMANATE
+   Interactive Network Map
+   LAN Patrol
+   LANWatch
+   NETMON for Windows
+   NETscout
+   proxyd from SNMP Research
+   Sniffer
+   SNMP Libraries and Utilities from SNMP Research
+   SNMP Packaged Agent System from SNMP Research
+   snmpd from SNMP Research
+
+
+
+NOCTools2 Working Group                                        [Page 20]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   XNETMON from SNMP Research
+   xnetperfmon from SNMP Research
+
+   Curses
+   ------
+   Eagle
+   Internet Rover
+   net_monitor
+   nfswatch
+   NOCOL
+   PSI SNMP
+
+   Eavesdrop
+   ---------
+   etherfind
+   Ethernet Box II
+   EtherView
+   LAN Patrol
+   LANVista
+   LANWatch
+   ENTM
+   NetMetrix Load Monitor
+   NetMetrix NFS Monitor
+   NetMetrix Protocol Analyzer
+   NetNetrix Traffic Generator
+   NETMON from Mitre
+   NETscout
+   netwatch
+   nfswatch
+   NNStat
+   OSITRACE
+   PacketView
+   Sniffer
+   SpiderMonitor
+   tcplogger
+   trpt
+
+   NMS
+   ---
+   CMU SNMP
+   decaddrs from Wellfleet
+   Dual Manager
+   EMANATE
+   EtherMeter
+   Ethernet Box II
+   getone from Wellfleet
+   Interactive Network Map
+   MONET
+
+
+
+NOCTools2 Working Group                                        [Page 21]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   Netlabs CMOT Agent
+   Netlabs SNMP Agent
+   NETMON for Windows
+   NETscout
+   NNStat
+   NOCOL
+   OverVIEW
+   proxyd from SNMP Research
+   SNMP Development Kit
+   SNMP Libraries and Utilities from SNMP Research
+   SNMP Packaged Agent System from SNMP Research
+   PSI SNMP
+   snmpd from Empire Technologies
+   snmpd from SNMP Research
+   TokenVIEW
+   XNETMON from SNMP Research
+   xnetmon from Wellfleet
+   xnetperfmon from SNMP Research
+
+   Ping
+   ----
+   etherhostprobe
+   fping
+   getethers
+   hopcheck
+   Interactive Network Map
+   Internet Rover
+   LANWatch
+   net_monitor
+   NOCOL
+   NPRV
+   ping
+   ping from UCB
+   spray
+   traceroute
+   ttcp
+   XNETMON from SNMP Research
+   xup
+
+   Proprietary
+   -----------
+   Eagle
+   EtherMeter
+   Ethernet Box II
+   LanProbe
+   LANVista
+   TokenVIEW
+
+
+
+
+NOCTools2 Working Group                                        [Page 22]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   RMON
+   ----
+   Beholder
+
+   SNMP
+   ----
+   Beholder
+   CMU SNMP
+   decaddrs from Wellfleet
+   Dual Manager
+   EMANATE
+   getone from Wellfleet
+   Interactive Network Map
+   MIB Manager from Empire Technologies
+   MONET
+   Netlabs SNMP Agent
+   NetMetrix Load Monitor
+   NetMetrix NFS Monitor
+   NetMetrix Protocol Analyzer
+   NetMetrix Traffic Generator
+   NETMON for Windows
+   NETscout
+   NOCOL
+   OverVIEW
+   proxyd from SNMP Research
+   SNMP Development Kit
+   SNMP Libraries and utilities from SNMP Research
+   SNMP Packaged Agent System from SNMP Research
+   PSI SNMP
+   snmpd from Empire Technologies
+   snmpd from SNMP Research
+   Wollongong-Manager
+   XNETMON from SNMP Research
+   xnetmon from Wellfleet
+   xnetperfmon from SNMP Research
+
+   Spoof
+   -----
+   DiG
+   doc
+   Internet Rover
+   host
+   LADDIS
+   mconnect
+   nhfsstone
+   NOCOL
+   query
+   SPIMS
+
+
+
+NOCTools2 Working Group                                        [Page 23]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   vrfy
+
+   X
+   -
+   Dual Manager
+   Interactive Network Map
+   MIB Manager from Empire Technologies
+   NetMetrix Load Monitor
+   NetMetrix NFS Monitor
+   NetMetrix Protocol Analyzer
+   NetMetrix Traffic Generator
+   SAS/CPE
+   PSI SNMP
+   XNETMON from SNMP Research
+   xnetperfmon from SNMP Research
+   xup
+
+   DEC
+   ---
+   Wollongong-Manager
+
+   DOS
+   ---
+   Computer Security Checklist
+   Ethernet Box II
+   hammer & anvil
+   hopcheck
+   iozone
+   LAN Patrol
+   LANVista
+   netmon
+   NETMON for Windows
+   netwatch
+   OverVIEW
+   PacketView
+   ping
+   SAS/CPE
+   SNMP Libraries and Utilities from SNMP Research
+   SNMP Packaged Agent System from SNMP Research
+   snmpd from SNMP Research
+   TokenVIEW
+   Wollongong-Manager
+   xnetperfmon from SNMP Research
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 24]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   HP
+   --
+   iozone
+   SAS/CPE
+   xup
+
+   Macintosh
+   ---------
+   HyperMIB
+
+   OS/2
+   ----
+   Beholder
+   Tricklet
+
+   Standalone
+   ----------
+   LANVista
+   Sniffer
+   SNMP Packaged Agent System from SNMP Research
+   SpiderMonitor
+
+   Sun
+   ---
+   Avatar SunSNMPD
+   Wollongong Manager
+
+   UNIX
+   ----
+   arp
+   CMIP Library
+   CMU SNMP
+   decaddrs from Wellfleet
+   DiG
+   doc
+   dnsstats
+   Eagle
+   etherfind
+   etherhostprobe
+   EtherView
+   fping
+   getethers
+   getone from Wellfleet
+   host
+   Interactive Network Map
+   Internet Rover
+   iozone
+   LADDIS
+
+
+
+NOCTools2 Working Group                                        [Page 25]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   lamers
+   mconnect
+   MIB Manager from Empire Technologies
+   MONET
+   net_monitor
+   Dual Manager
+   NetMetrix Load Monitor
+   NetMetrix NFS Monitor
+   NetMetrix Protocol Analyzer
+   NetMetrix Traffic Generator
+   NETMON from Mitre
+   NETscout
+   netstat
+   Network Integrator I
+   nfswatch
+   nhfsstone
+   NNStat
+   NOCOL
+   OSITRACE
+   ping
+   ping from UCB
+   proxyd from SNMP Research
+   query
+   SAS/CPE
+   SNMP Development Kit
+   SNMP Libraries and Utilities from Empire Technologies
+   SNMP Libraries and Utilities from SNMP Research
+   SNMP Packaged Agent System from SNMP Research
+   PSI SNMP
+   snmpd from Empire Technologies
+   snmpd from SNMP Research
+   SPIMS
+   spray
+   tcpdump
+   tcplogger
+   traceroute
+   Tricklet
+   trpt
+   ttcp
+   vrfy
+   XNETMON from SNMP Research
+   xnetmon from Wellfleet
+   xnetperfmon from SNMP Research
+
+   VMS
+   ---
+   arp
+   ENTM
+
+
+
+NOCTools2 Working Group                                        [Page 26]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   fping
+   net_monitor
+   netstat
+   NPRV
+   ping
+   SNMP Libraries and Utilities from SNMP Research
+   tcpdump
+   traceroute
+   ttcp
+   xnetperfmon from SNMP Research
+
+   Free
+   ----
+   arp
+   Beholder
+   CMIP Library
+   CMU SNMP Distribution
+   DiG
+   dnsstats
+   doc
+   ENTM
+   fping
+   getethers
+   hammer & anvil
+   hopcheck
+   host
+   Interactive Network Map
+   Internet Rover
+   iozone
+   lamers
+   net_monitor
+   netmon from Mitre
+   netstat
+   netwatch
+   nfswatch
+   nhfsstone
+   NNStat
+   NOCOL
+   NPRV
+   OSITRACE
+   PING
+   ping from UCB
+   query
+   SNMP Development Kit
+   tcpdump
+   tcplogger
+   traceroute
+   Tricklet
+
+
+
+NOCTools2 Working Group                                        [Page 27]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   trpt
+   ttcp
+   vrfy
+
+   Library
+   -------
+   CMIP Library
+   CMU SNMP
+   Dual Manager
+   NetMetrix Protocol Analyzer
+   NetMetrix Traffic Generator
+   proxyd from SNMP Research
+   SAS/CPE
+
+   Sourcelib
+   ---------
+   Beholder
+   CMIP Library
+   CMU SNMP
+   EMANATE
+   HyperMIB
+   Interactive Network Map
+   Internet Rover
+   LANWatch
+   MIB Manager from Empire Technologies
+   net_monitor
+   NETMON for Windows
+   NOCOL
+   proxyd from SNMP Research
+   SNMP Development Kit
+   SNMP Libraries and Utilities from Empire Technologies
+   SNMP Libraries and Utilities from SNMP Research
+   SNMP Packaged Agent System from SNMP Research
+   snmpd from SNMP Research
+   SpiderMonitor
+   Tricklet
+   XNETMON from SNMP Research
+   xnetperfmon from SNMP Research
+
+Tool Descriptions
+
+   This section is an updated collection of brief descriptions of tools
+   for managing TCP/IP internets.  These entries are in alphabetical
+   order, by tool name.
+
+   The entries all follow a standard format.  Immediately after the NAME
+   of a tool are its associated KEYWORDS.  Keywords are terse
+   descriptions of the purposes or attributes of a tool.  A more
+
+
+
+NOCTools2 Working Group                                        [Page 28]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   detailed description of a tool's purpose and characteristics is given
+   in the ABSTRACT section.  The MECHANISM section describes how a tool
+   works.  In CAVEATS, warnings about tool use are given.  In BUGS,
+   known bugs or bug-report procedures are given.  LIMITATIONS describes
+   the boundaries of a tool's capabilities.  HARDWARE REQUIRED and
+   SOFTWARE REQUIRED relate the operational environment a tool needs.
+   Finally, in AVAILABILITY, pointers to vendors, online repositories,
+   or other sources for a tool are given.
+
+   Where tool names conflict, the vendor name is used as well.  For
+   example, MITRE, and SNMP Research each submitted an updated
+   description of a tool called, "NETMON".  These tools were
+   independently developed, are functionally different, and run in
+   different environments.  MITRE's tool is listed as "NETMON_MITRE,"
+   and the tool from SNMP Research as "NETMON_WINDOWS_SNMP_RESEARCH".
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 29]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                                    ARP
+
+        NAME
+                arp
+
+        KEYWORDS
+                routing; ethernet, IP;; UNIX, VMS; free.
+
+        ABSTRACT
+                Arp displays and can  modify  the  internet-to-ethernet
+                address  translations  tables  used by ARP, the address
+                resolution protocol.
+
+        MECHANISM
+                The arp program accesses  operating  system  memory  to
+                read the ARP data structures.
+
+        CAVEATS
+                None.
+
+        BUGS
+                None known.
+
+        LIMITATIONS
+                Only the super user can modify ARP entries.
+
+        HARDWARE REQUIRED
+                No restrictions.
+
+        SOFTWARE REQUIRED
+                BSD UNIX or related OS, or VMS.
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+
+                Available via anonymous FTP from uunet.uu.net, in
+                directory bsd-sources/src/etc.  Available with 4.xBSD
+                UNIX and related operating systems.  For VMS, available
+                as part of TGV MultiNet IP software package, as well as
+                Wollongong's WIN/TCP and Process Software Corporation's
+                TCPware for VMS.
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+                This entry maintained by the NOCtools editors.
+                Send email to noctools-request@merit.edu.
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 30]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                    AVATAR-SNMP-TOOLKIT
+
+          NAME
+                SNMP Application Development Toolkit
+
+          KEYWORDS
+                manager;;SNMP;;sourcelib.
+
+          ABSTRACT
+                snmpapi is an api toolkit for developing SNMP
+                applications and agents. The toolkit is simple and
+                very fast that can be used for any type of
+                application. It is very well suited for embedded
+                systems such as bridges or routers. An example MIB II
+                agent for Sun Sparcstations is provided. snmpapi is
+                distributed in source form only.
+
+          MECHANISM
+                snmpapi is a library of C functions.
+
+          CAVEATS
+               None.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+                None.
+
+          HARDWARE REQUIRED
+                No restrictions.
+
+          AVAILABILITY
+                Available now. For more information, send e-mail to
+                info@avatar.com.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 31]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                         AVATAR-SUNSNMPD
+
+          NAME
+                sunsnmpd
+
+          KEYWORDS
+                manager;;snmp;sun;.
+
+          ABSTRACT
+                sunsnmpd is a fully supported SNMP agent with MIB II
+                support for Sun Sparscations running SunOS 4.1 or
+                higher. sunsnmpd supports both SNMP GET and SET
+                operations.
+
+          MECHANISM
+                sundnmpd is a daemon process which starts up at boot
+                time from the rc.local file. It uses /dev/kmem to access
+                kernel structures.
+
+          CAVEATS
+               None.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+                Must be started by a super user.
+
+          HARDWARE REQUIRED
+                Sun Sparcstations.
+
+          AVAILABILITY
+                Available now. Site licensing only. For more information,
+                send e-mail to info@avatar.com.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 32]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                           ChameLAN-100
+
+        NAME
+                ChameLAN 100
+
+        KEYWORDS
+                analyzer, benchmark, debugger, generator, map,
+                reference, status, traffic; bridge, DECnet, ethernet,
+                FDDI, IP, OSI, NFS, ring; eavesdrop, SNMP, X;
+                standalone, UNIX.
+
+        ABSTRACT
+
+                Tekelec's ChameLAN 100 is a portable diagnostic system
+                for monitoring and simulation of FDDI, Ethernet and
+                Token Ring networks -- simultaneously.  Protocol
+                analysis of multiple topologies, as well as mixed
+                topoloies simultaneously, is a key feature of
+                the product family.  Tekelec's proprietary FDDI
+                hardware guarantees complete real-time analysis of
+                networks and network components at the full ring
+                bandwidth of 125 Mbps.  It passively connects to the
+                network and captures 100 percent of the data, measures
+                performance and isolates real-time problems.
+
+                The simulation option offers full bandwidth load
+                generation that allows you to create and simulate any
+                network condition.  It gives you the ability to inject
+                errors and misformed frames.  A set of
+                confidence tests allow simple evaluation of new
+                equipment.  A ring map feature displays network
+                topology and status of all nodes via the SMT
+                process.
+
+                Monitoring of FDDI, Ethernet and Token Ring allows the
+                user to: view network status in real time; view
+                network, node, or node pair statistics; capture
+                frames; control capture using trigger and filter
+                capabilities; view real-time statistics; view captured
+                frames in decoded format; and view the last frame
+                transmitted by each station.
+
+                The following Real-Time Network Statistics of FDDI,
+                Ethernet and Token Ring networks is displayed: frame
+                rate, runts, byte rate, jabbers, CRC/align errors, and
+                collisions.
+
+                Product developers can use the ChameLAN 100 to observe
+
+
+
+NOCTools2 Working Group                                        [Page 33]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+                and control various events to help debug their FDDI,
+                Ethernet and Token Ring products.  End users can
+                perform real-time monitoring to test and
+                diagnose problems that may occur when developing,
+                installing or managing FDDI, Ethernet and Token Ring
+                networks and network products.  End users can use the
+                ChameLAN 100 to aid in the installation and
+                maintenance of Ethernet and Token Ring networks.  To
+                isolate specific network trouble spots the ChameLAN
+                100 uses filtering and triggering techniques for data
+                capture.  Higher level protocol decode includes
+                TCP/IP, OSI and DECnet protocol suites.  Protocol
+                decode of IPX, SNMP, XTP, and AppleTalk are also
+                supported.  Development of additional protocol decodes
+                is also under development.  The ChameLAN 100 family
+                also offers a Protocol Management Development System
+                (PMDS) that enables users to develop custom protocol
+                decode suites.
+
+                The FDDI, Ethernet and Token Ring hardware interfaces
+                feature independent processing power.  Real-time data
+                is monitored unobtrusively at full bandwidth without
+                affecting network activity.  Real-time data may also
+                be saved to a 120MB or optional 200MB hard disk drive
+                for later analysis.  FDDI data is captured at 125 megabits
+                per second (Mbps), Ethernet at 10 Mbps and Token Ring
+                at 4 or 16 Mbps.
+
+        MECHANISM
+                This portable, standalone unit incorporates the power
+                of UNIX, X-Windows and Motif.  Its UNIX-based
+                programming interface facilitates development of
+                customized monitoring and simulation applications.  The
+                ChameLAN 100 may connect to the network at any
+                location using standard equipment.  Standard graphical
+                Motif/X-Windows and TCP/IP allow remote control
+                through Ethernet and 10Base T interfaces.  Tekelec
+                also offers a rackmounted model -- ChameLAN 100-X.
+                Both models can be controlled via a Sun Workstation
+                remotely.
+
+        CAVEATS
+                none.
+
+        BUGS
+                none known.
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 34]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        LIMITATIONS
+                none reported.
+
+        HARDWARE REQUIRED
+                None.  The ChameLAN 100 is a self-contained unit, and
+                includes its own interface cards.  It installs
+                into a network with standard interface
+                connectors.
+
+        SOFTWARE REQUIRED
+                None.
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+                The ChameLAN 100 product famil y is available
+                commercially.  For more information or a free demo,
+                call or write:
+
+                1.800.tek.elec
+                Tekelec
+                26580 West Agoura Road
+                Calabasas, CA 91302
+                Phone:          818.880.5656
+                Fax:            818.880.6993
+
+                The ChameLAN 100 is listed on the GSA schedule.
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+                Todd Koch
+                Public Relations Specialist
+                818.880.7718
+                Internet:  todd.koch@tekelec.com
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 35]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                               CMU_SNMP
+
+          NAME
+               The CMU SNMP Distribution
+
+          KEYWORDS
+               manager, status; IP; NMS, SNMP; UNIX; free, sourcelib.
+
+          ABSTRACT
+               The CMU SNMP Distribution includes source code for an
+               SNMP agent, several SNMP client applications, an ASN.1
+               library, and supporting documentation.
+
+               The agent compiles into about 10 KB of 68000 code.  The
+               distribution includes a full agent that runs on a
+               Kinetics FastPath2/3/4, and is built into the KIP
+               appletalk/ethernet gateway.  The machine independent
+               portions of this agent also run on CMU's IBM PC/AT
+               based router.
+
+               The applications are designed to be useful in the real
+               world.  Information is collected and presented in a
+               useful format and is suitable for everyday status
+               monitoring.  Input and output are interpreted
+               symbolically.  The tools can be used without
+               referencing the RFCs.
+
+          MECHANISM
+               SNMP.
+
+          CAVEATS
+               None.
+
+          BUGS
+               None reported.  Send bug reports to
+               sw0l+snmp@andrew.cmu.edu.  ("sw0l" is "ess double-you
+               zero ell.")
+
+          LIMITATIONS
+               None reported.
+
+          HARDWARE REQUIRED
+               The KIP gateway agent runs on a Kinetics FastPath2/3/4.
+               Otherwise, no restrictions.
+
+          SOFTWARE REQUIRED
+               The code was written with efficiency and portability in
+               mind.  The applications compile and run on the follow-
+
+
+
+NOCTools2 Working Group                                        [Page 36]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+               ing systems: IBM PC/RT running ACIS Release 3, Sun3/50
+               running SUNOS 3.5, and the DEC microVax running Ultrix
+               2.2.  They are expected to run on any system with a
+               Berkeley socket interface.
+
+          AVAILABILITY
+               This distribution is copyrighted by CMU, but may be
+               used and sold without permission.  Consult the copy-
+               right notices for further information.  The distribu-
+               tion is available by anonymous FTP from the host
+               lancaster.andrew.cmu.edu (128.2.13.21) as the files
+               pub/cmu-snmp.9.tar, and pub/kip-snmp.9.tar.  The former
+               includes the libraries and the applications, and the
+               latter is the KIP SNMP agent.
+
+               Please direct questions, comments, and bug reports to
+               sw0l+snmp@andrew.cmu.edu.  ("sw0l" is "ess double-you
+               zero ell.")  If you pick up this package, please send a
+               note to the above address, so that you may be notified
+               of future enhancements/changes and additions to the set
+               of applications (several are planned).
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 37]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog            COMPUTER-SECURITY-CHECKLIST
+
+          NAME
+               Computer Security Checklist
+
+          KEYWORDS
+               security; DOS.
+
+          ABSTRACT
+               This program consists of 858 computer security ques-
+               tions divided up in thirteen sections.  The program
+               presents the questions to the user and records their
+               responses.  After answering the questions in one of the
+               thirteen sections, the user can generate a report from
+               the questions and the user's answers.  The thirteen
+               sections are: telecommunications security, physical
+               access security, personnel security, systems develop-
+               ment security, security awareness and training prac-
+               tices, organizational and management security, data and
+               program security, processing and operations security,
+               ergonomics and error prevention, environmental secu-
+               rity, and backup and recovery security.
+
+               The questions are weighted as to their importance, and
+               the report generator can sort the questions by weight.
+               This way the most important issues can be tackled
+               first.
+
+          MECHANISM
+               The questions are displayed on the screen and the user
+               is prompted for a single keystroke reply.  When the end
+               of one of the thirteen sections is reached, the answers
+               are written to a disk file.  The question file and the
+               answer file are merged to create the report file.
+
+          CAVEATS
+               None.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               None reported.
+
+          HARDWARE REQUIRED
+               No restrictions.
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 38]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          SOFTWARE REQUIRED
+               DOS operating system.
+
+          AVAILABILITY
+               A commercial product available from:
+                    C.D., Ltd.
+                    P.O. Box 58363
+                    Seattle, WA 98138
+                    (206) 243-8700
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 39]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                           CMIP-LIBRARY
+
+        NAME
+                CMIP Library
+
+        KEYWORDS
+                manager; osi; cmis; unix; free, sourcelib.
+
+        ABSTRACT
+
+                The CMIP Library implements the functionality of the
+                Common  Management Information Service/Protocol as in
+                the full international standards (ISO 9595, ISO 9596)
+                published in 1990. It is designed to work with the
+                ISODE package and can act as a building block for the
+                construction of CMIP-based agent and manager
+                applications.
+
+        MECHANISM
+                The CMIP library uses ISO ROS, ACSE and ASN.1
+                presentation, as implemented in ISODE, to provide its
+                service.
+
+        CAVEATS
+                None.
+
+        BUGS
+                None known.
+
+        LIMITATIONS
+                None known.
+
+        HARDWARE REQUIRED
+                Has been tested on SUN 3 and SUN 4 architectures.
+
+        SOFTWARE REQUIRED
+                The ISODE protocol suite, BSD UNIX.
+
+        AVAILABILITY
+                The CMIP library and related management tools built
+                upon it, known as OSIMIS (OSI Management Information
+                Service), are publicly available from University
+                College London, England via FTP and FTAM.  To obtain
+                information regarding a copy send email to
+                osimis-request@cs.ucl.ac.uk or call +44 71 380 7366.
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 40]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                            DECADDRS
+
+          NAME
+               decaddrs, decaroute, decnroute, xnsroutes, bridgetab
+
+          KEYWORDS
+               manager, map, routing; bridge, DECnet; NMS, SNMP; UNIX.
+
+          ABSTRACT
+               These commands display private MIB information from
+               Wellfleet systems.  They retrieve and format for
+               display values of one or several MIB variables from the
+               Wellfleet Communications private enterprise MIB, using
+               the SNMP (RFC1098).  In particular these tools are used
+               to examine the non-IP modules (DECnet, XNS, and Bridg-
+               ing) of a Wellfleet system.
+
+               Decaddrs displays the DECnet configuration of a
+               Wellfleet system acting as a DECnet router, showing the
+               static parameters associated with each DECnet inter-
+               face.  Decaroute and decnroute display the DECnet
+               inter-area and intra-area routing tables (that is area
+               routes and node routes).  Xnsroutes displays routes
+               known to a Wellfleet system acting as an XNS router.
+               Bridgetab displays the bridge forwarding table with the
+               disposition of traffic arriving from or directed to
+               each station known to the Wellfleet bridge module.  All
+               these commands take an IP address as the argument and
+               can specify an SNMP community for the retrieval.  One
+               SNMP query is performed for each row of the table.
+               Note that the Wellfleet system must be operating as an
+               IP router for the SNMP to be accessible.
+
+          MECHANISM
+               Management information is exchanged by use of SNMP.
+
+          CAVEATS
+               None.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               None reported.
+
+          HARDWARE REQUIRED
+               Distributed and supported for Sun 3 systems.
+
+
+
+
+NOCTools2 Working Group                                        [Page 41]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          SOFTWARE REQUIRED
+               Distributed and supported for SunOS 3.5 and 4.x.
+
+          AVAILABILITY
+               Commercial product of:
+                    Wellfleet Communications, Inc.
+                    12 DeAngelo Drive
+                    Bedford, MA 01730-2204
+                    (617) 275-2400
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 42]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                                    DIG
+
+          NAME
+               DiG
+
+          KEYWORDS
+               status; DNS; spoof; UNIX; free.
+
+          ABSTRACT
+               DiG (domain information groper), is a command line tool
+               which queries DNS servers in either an interactive or a
+               batch mode.  It was developed to be more
+               convenient/flexible than nslookup for gathering perfor-
+               mance data and testing DNS servers.
+
+          MECHANISM
+               Dig is built on a slightly modified version of the bind
+               resolver (release 4.8).
+
+          CAVEATS
+               none.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               None reported.
+
+          HARDWARE REQUIRED
+               No restrictions.
+
+          SOFTWARE REQUIRED
+               BSD UNIX.
+
+          AVAILABILITY
+               DiG is available via anonymous FTP from venera.isi.edu
+               in pub/dig.2.0.tar.Z.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 43]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                  EMANATE_SNMP_RESEARCH
+
+        NAME
+                EMANATE: Enhanced MANagement Agent Through Extensions
+                from SNMP Research.
+
+        KEYWORDS
+                alarm, control, manager, reference, security, status;
+                bridge, Ethernet, FDDI, IP, OSI, ring, star;
+                NMS, SNMP;
+                sourcelib.
+
+        ABSTRACT
+                The EMANATE system provides a run-time extensible SNMP
+                agent that dynamically reconfigures an agent's MIB
+                without having to recompile, relink, or restart the
+                agent.  An EMANATE capable SNMP agent can support zero,
+                one, or many subagents and dynamically reconfigure to
+                connect or disconnect those subagents' MIBs.
+
+                The EMANATE system consists of several logically
+                independent components and subsystems:
+
+                o Master SNMP agent which contains an API to communicate
+                  with subagents.
+                o Subagents which implement various MIBS.
+                o Subagent Developer's Kit which contains tools to assist
+                  in the implementation of subagents.
+                o EMANATE libraries which provide the API for the
+                  subagent.
+
+        MECHANISM
+                A concise API allows a standard means of communication
+                between the master and subagents.  System dependent
+                mechanisms are employed for transfer of information
+                between the master and subagents.
+
+        CAVEATS
+                None.
+
+        BUGS
+                None known.
+
+        LIMITATIONS
+                None reported.
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 44]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        HARDWARE REQUIRED
+                Multiple platforms including PC's, workstations, hosts,
+                and servers are supported.  Contact SNMP Research for
+                more details.
+
+        SOFTWARE REQUIRED
+                C compiler.
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+                This is a commercial product available under license
+                from:
+
+                SNMP Research
+                3001 Kimberlin Heights Road
+                Knoxville, TN  37920-9716
+                Attn:  John Southwood, Sales and Marketing
+                (615) 573-1434 (Voice)  (615) 573-9197 (FAX)
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+                users@seymour1.cs.utk.edu
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 45]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                          ETHERFIND_SUN
+
+          NAME
+               etherfind
+
+          KEYWORDS
+               traffic; ethernet, IP, NFS; eavesdrop; UNIX.
+
+          ABSTRACT
+               Etherfind examines the packets that traverse a network
+               interface, and outputs a text file describing the
+               traffic.  In the file, a single line of text describes
+               a single packet: it contains values such as protocol
+               type, length, source, and destination.  Etherfind can
+               print out all packet traffic on the ethernet, or
+               traffic for the local host.  Further packet filtering
+               can be done on the basis of protocol: IP, ARP, RARP,
+               ICMP, UDP, ND, TCP, and filtering can also be done
+               based on the source, destination addresses as well as
+               TCP and UDP port numbers.
+
+          MECHANISM
+               In usual operations, and by default, etherfind puts the
+               interface in promiscuous mode.  In 4.3BSD UNIX and
+               related OSs, it uses a Network Interface Tap (NIT) to
+               obtain a copy of traffic on an ethernet interface.
+
+          CAVEATS
+               None.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               Minimal protocol information is printed.  Can  only  be
+               run by the super user.  The syntax is painful.
+
+          HARDWARE REQUIRED
+               Ethernet.
+
+          SOFTWARE REQUIRED
+               SunOS.
+
+          AVAILABILITY
+               Executable included in Sun  OS  "Networking  Tools  and
+               Programs" software installation option.
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 46]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+         Internet Tool Catalog                         ETHERNET-CODES
+
+        NAME
+                ethernet-codes
+
+        KEYWORDS
+                reference;
+                ethernet, fddi;
+                ;
+                ;
+                ;
+
+        ABSTRACT
+                Mike Patton of MIT LCS has compiled a very
+                comprehensive list of the IEEE numbers used on
+                Ethernet and FDDI (with some permutation).
+                This file contains collected information on the
+                various codes used on IEEE 802.3 and EtherNet.
+                There are three "pages": type codes, vendor
+                codes, and the uses of multicast (including
+                broadcast) addresses.
+
+        MECHANISM
+                FTP the file and use it like a secret decoder ring.
+
+        CAVEATS
+                Since this information is from collected wisdom,
+                there are certainly omissions.
+
+        BUGS
+                Mike welcomes any further additions.
+                They can be sent to a special mailbox that he has set up:
+
+                        MAP=EtherNet-codes@LCS.MIT.Edu
+
+        LIMITATIONS
+                See caveats.
+
+        HARDWARE REQUIRED
+                No restrictions.
+
+        SOFTWARE REQUIRED
+                No restrictions.
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 47]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+                The file is stored as flat, non-compressed ASCII text.
+                It can be FTP'ed from:
+                        ftp.lcs.mit.edu
+
+                Retreive the file:
+                        /pub/map/EtherNet-codes
+
+        To submit additions or obtain further assistance, send email to:
+                         MAP=EtherNet-codes@LCS.MIT.Edu
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+                This entry maintained by the NOCtools editors.
+                Send email to noctools-request@merit.edu
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 48]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                 GENERIC-MANAGED-SYSTEM
+
+        NAME
+                Generic Managed System
+
+        KEYWORDS
+                manager; osi; cmis; unix; free, sourcelib
+
+        ABSTRACT
+                The Generic Managed System (GMS) implements the
+                functions that would be common to any OSI managed
+                system. These include the parseing of CMIS requests,
+                selection of managed objects according to the scoping
+                and filtering rules, handling of notifications and
+                event forwarding discriminators etc. The intention is
+                that the implementors should use the GMS as a basis
+                for their own managed object implementations. A
+                support environment is provided to assist with this.
+
+        MECHANISM
+                The GMS uses the UCL CMIP library plus a library of
+                C++ objects representing common managed objects and
+                attribute types.
+
+        CAVEATS
+                The system is still experimental, is subject to change
+                and is not yet well documented.
+
+        BUGS
+                See above.
+
+        LIMITATIONS
+                None known.
+
+        HARDWARE REQUIRED
+                Has been tested on SUN 3 and SUN 4 architectures.
+
+        SOFTWARE REQUIRED
+                The ISODE protocol suite, BSD UNIX, UCL CMIP Library,
+                GNU C++ (g++).
+
+        AVAILABILITY
+                The CMIP library and related management tools built
+                upon it, known as OSIMIS (OSI Management Information
+                Service), are publicly available from University
+                College London, England via FTP and FTAM.  To obtain
+                information regarding a copy send email to
+                osimis-request@cs.ucl.ac.uk or call +44 71 380 7366.
+
+
+
+NOCTools2 Working Group                                        [Page 49]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                              GETETHERS
+
+        NAME
+                getethers
+
+        KEYWORDS
+                Traffic; Ethernet; Ping; UNIX; Free
+
+        ABSTRACT
+                Getethers runs through all addresses on an ethernet
+                segment (a.b.c.1 to a.b.c.254) and pings each address,
+                and then determines the ethernet address for that
+                host.  It produces a list, in either plain ASCII, the
+                file format for the Excelan Lanalyzer, or the file
+                format for the Network General Sniffer, of
+                hostname/ethernet address pairs for all hosts on the
+                local nework.  The plain ASCII list optionally
+                includes the vendor name of the ethernet card in
+                each system, to aid in the determination of the
+                identity of unknown systems.
+
+        MECHANISM
+                Getethers uses a raw IP socket to generate ICMP echo
+                requests and receive ICMP echo replies, and then
+                examines the kernel ARP table to determine the
+                ethernet address of each responding system.
+
+        CAVEATS
+                Assumes that the ethernet it is looking at is either
+                a Class C IP network, or part of a Class B IP network
+                that is subnetted with a netmask of 255.255.255.0.
+                (This is easy to change, but it's compiled in.)
+
+        BUGS
+                None known.
+
+        LIMITATIONS
+                None.
+
+        HARDWARE REQUIRED
+                Has been tested on Sun-3 and Sun-4 (SPARC) systems
+                under SunOS 4.1.x, DEC VAXes under 4.3BSD.
+
+        SOFTWARE REQUIRED
+                Runs under SunOS 4.x and 4.3BSD; should be easy to
+                port to any other Berkeley-like system.  Requires
+                raw sockets and the ioctl calls to get at the ARP
+                table.
+
+
+
+NOCTools2 Working Group                                        [Page 50]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+                Public domain, and freely distributable.  Available
+                via anonymous FTP from harbor.ecn.purdue.edu; also has
+                been posted to comp.sources.unix.  The current version
+                is Version 1.4 from May 1992.
+
+                Contact point:
+                        Dave Curry
+                        Purdue University
+                        Engineering Computer Network
+                        1285 Electrical Engineering Bldg.
+                        West Lafayette, IN 47907-1285
+                        davy@ecn.purdue.edu
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+                        Dave Curry (see address above).
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 51]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                       GETONE_WELLFLEET
+
+          NAME
+               getone, getmany, getroute, getarp, getaddr, getif,
+               getid.
+
+          KEYWORDS
+               manager, routing, status; IP; NMS, SNMP; UNIX.
+
+          ABSTRACT
+               These commands retrieve and format for display values
+               of one or several MIB variables (RFC1066) using the
+               SNMP (RFC1098).  Getone and getmany retrieve arbitrary
+               MIB variables; getroute, getarp, getaddr, and getif
+               retrieve and display tabular information (routing
+               tables, ARP table, interface configuration, etc.), and
+               getid retrieves and displays system name, identifica-
+               tion and boot time.
+
+               Getone <target> <mibvariable> retrieves and displays
+               the value of the designated MIB variable from the
+               specified target system.  The SNMP community name to be
+               used for the retrieval can also be specified.  Getmany
+               works similarly for groups of MIB variables rather than
+               individual values.  The name of each variable, its
+               value and its data type is displayed.  Getroute returns
+               information from the ipRoutingTable MIB structure,
+               displaying the retrieved information in an accessible
+               format.  Getarp behaves similarly for the address
+               translation table; getaddr for the ipAddressTable; and
+               getif displays information from the interfaces table,
+               supplemented with information from the ipAddressTable.
+               Getid displays the system name, identification, ipFor-
+               warding state, and the boot time and date.  All take a
+               system name or IP address as an argument and can
+               specify an SNMP community for the retrieval.  One SNMP
+               query is performed for each row of the table.
+
+          MECHANISM
+               Queries SNMP agent(s).
+
+          CAVEATS
+               None.
+
+          BUGS
+               None known.
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 52]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          LIMITATIONS
+               None reported.
+
+          HARDWARE REQUIRED
+               Distributed and supported for Sun 3 systems.
+
+          SOFTWARE REQUIRED
+               Distributed and supported for SunOS 3.5 and 4.x.
+
+          AVAILABILITY
+               Commercial product of:
+                    Wellfleet Communications, Inc.
+                    12 DeAngelo Drive
+                    Bedford, MA 01730-2204
+                    (617) 275-2400
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 53]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                           HAMMER_ANVIL
+
+          NAME
+               hammer & anvil
+
+          KEYWORDS
+               benchmark, generator; IP; DOS; free.
+
+          ABSTRACT
+               Hammer and Anvil are the benchmarking programs for IP
+               routers.  Using these tools, gateways have been tested
+               for per-packet delay, router-generated traffic over-
+               head, maximum sustained throughput, etc.
+
+          MECHANISM
+               Tests are performed on a gateway in an isolated
+               testbed.  Hammer generates packets at controlled rates.
+               It can set the length and interpacket interval of a
+               packet stream.  Anvil counts packet arrivals.
+
+          CAVEATS
+               Hammer should not be run on a live network.
+
+          BUGS
+               None reported.
+
+          LIMITATIONS
+               Early versions of hammer could not produce inter-packet
+               intervals shorter than 55 usec.
+
+          HARDWARE REQUIRED
+               Hammer runs on a PC/AT or compatible, and anvil
+               requires a PC or clone.  Both use a Micom Interlan
+               NI5210 for LAN interface.
+
+          SOFTWARE REQUIRED
+               MS-DOS.
+
+          AVAILABILITY
+               Hammer and anvil are copyrighted, though free.  Copies
+               are available from pub/eutil on husc6.harvard.edu.
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 54]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                               HOPCHECK
+
+          NAME
+               hopcheck
+
+          KEYWORDS
+               routing; IP; ping; DOS; free.
+
+          ABSTRACT
+               Hopcheck is a tool that lists the gateways traversed by
+               packets sent from the hopcheck-resident PC to a desti-
+               nation.  Hopcheck uses the same mechanism as traceroute
+               but is for use on IBM PC compatibles that have ethernet
+               connections.  Hopcheck is part of a larger TCP/IP pack-
+               age that is known as ka9q that is for use with packet
+               radio.  Ka9q can coexist on a PC with other TCP/IP
+               packages such as FTP Inc's PC/TCP, but must be used
+               independently of other packages.  Ka9q was written by
+               Phil Karn.  Hopcheck was added by Katie Stevens,
+               dkstevens@ucdavis.edu.  Unlike traceroute, which
+               requires a UNIX kernel mod, hopcheck will run on the
+               standard, unmodified ka9q release.
+
+          MECHANISM
+               See the description in traceroute.
+
+          CAVEATS
+               See the description in traceroute.
+
+          BUGS
+               None known.
+
+          HARDWARE REQUIRED
+               IBM PC compatible with ethernet network interface card;
+               ethernet card supported through FTP spec packet driver.
+
+          SOFTWARE REQUIRED
+               DOS.
+
+          AVAILABILITY
+               Free for radio amateurs and educational institutions;
+               others should contact Phil Karn, karn@ka9q.bellcore.com.
+               Available via anonymous FTP at ucdavis.edu, in the
+               directory "dist/nethop".
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 55]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                         INTERNET_ROVER
+
+          NAME
+               Internet Rover
+
+          KEYWORDS
+               status; IP, SMTP; curses, ping, spoof; UNIX; free,
+               sourcelib.
+
+          ABSTRACT
+               Internet Rover is a prototype network monitor that uses
+               multiple protocol "modules" to test network functional-
+               ity.  This package consists of two primary pieces of
+               code: the data collector and the problem display.
+
+               There is one data collector that performs a series of
+               network tests, and maintains a list of problems with
+               the network.  There can be many display processes all
+               displaying the current list of problems which is useful
+               in a multi-operator NOC.
+
+               The display task uses curses, allowing many terminal
+               types to display the problem file either locally or
+               from a remote site.  Full source is provided.  The data
+               collector is easily configured and extensible.  Contri-
+               butions such as additional protocol modules, and shell
+               script extensions are welcome.
+
+          MECHANISM
+               A configuration file contains a list of nodes,
+               addresses, NodeUp? protocol test (ping in most cases),
+               and a list of further tests to be performed if the node
+               is in fact up.  Modules are included to test TELNET,
+               FTP, and SMTP.  If the configuration contains a test
+               that isn't recognized, a generic test is assumed, and a
+               filename is checked for existence.  This way users can
+               create scripts that create a file if there is a prob-
+               lem, and the data collector simply checks the existence
+               of that file to determine if there is problem.
+
+          CAVEATS
+               None.
+
+          BUGS
+               None known.
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 56]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          LIMITATIONS
+               This tool does not yet have the capability to  perform
+               actions based on the result of the test.  Rather, it is
+               intended for a multi-operator environment,  and  simply
+               displays a list of what is wrong with the net.
+
+          HARDWARE REQUIRED
+               This software is known to run on Suns and IBM RTs.
+
+          SOFTWARE REQUIRED
+               Curses, 4.xBSD UNIX socket programming  libraries,  BSD
+               ping.
+
+          AVAILABILITY
+               Full source available via anonymous FTP from merit.edu
+               (35.1.1.42) in the ~ftp/pub/inetrover directory.
+               Source and executables are public domain and can be
+               freely distributed for non-commercial use.  This pack-
+               age is unsupported, but bug reports and fixes may be
+               sent to: wbn@merit.edu.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 57]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                                 IOZONE
+
+        NAME
+                iozone
+
+        KEYWORDS
+                benchmark; nfs;; dos,hp,unix,vmx; free.
+
+        ABSTRACT
+                Software to assess the sequential file I/O capability
+                of a system.  May be useful as reference to compare
+                against results obtained when files are accessed via
+                NFS, Andrew, etc.
+
+        MECHANISM
+                This test writes a X MEGABYTE sequential file in Y
+                byte chunks, then rewinds it and reads it back.
+                [The size of the file should be big enough to factor
+                out the effect of any disk cache.].  Finally,
+                IOZONE deletes the temporary file.  Options allow one to
+                vary X and Y.  In addition, 'auto test' runs IOZONE
+                repeatedly using record sizes from 512 to 8192 bytes
+                (adjustable), and file sizes from 1 to 16 megabytes
+                (adjustable).  It creates a table of results.
+
+        CAVEATS
+                The file is written (filling any cache buffers), and
+                then read.  If the cache is >= X MB, then most if not
+                all the reads will be satisfied from the cache.
+                However, if it is less than or equal to
+                .5X MB, then NONE of the reads will be satisfied from
+                the cache.  This is becase after the file is written,
+                a .5X MB cache will contain the upper .5 MB of the
+                test file, but we will start reading
+                from the beginning of the file (data which is no
+                longer in the cache).
+
+                In order for this to be a fair test, the length of the
+                test file must be AT LEAST 2X the amount of disk cache
+                memory for your system.  If not, you are really
+                testing the speed at which your CPU
+                can read blocks out of the cache (not a fair test).
+
+        BUGS
+                none known at this time.
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 58]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        LIMITATIONS
+                IOZONE does not normally test the raw I/O speed of
+                your disk or system-em.  It tests the speed of
+                sequential I/O to actual files.
+                Therefore, this measurement factors in the efficiency
+                of you  machines file system, operating system, C
+                compiler, and C runtime library.  It produces a
+                measurement which is the number of bytes
+                per second that your system can read or write to a file.
+
+        HARDWARE REQUIRED
+
+                This program has been ported and tested on the
+                following computer operating systems:
+
+Vendor             Operating System    Notes on compiling IOzone
+-----------------------------------------------------------------------
+Apollo             Domain/OS           no cc switches -- BSD domain
+AT&T               UNIX System V R4
+AT&T 6386WGS       AT&T UNIX 5.3.2     define SYSTYPE_SYSV
+Generic AT&T       UNIX System V R3    may need cc -DSVR3
+Convergent         Unisys/AT&T SVR3   cc -DCONVERGENT -o iozone iozone.c
+Digital Equipment  ULTRIX V4.1
+Digital Equipment  VAX/VMS V5.4        see below **
+Digital Equipment  VAX/VMS (POSIX)
+Hewlett-Packard    HP-UX 7.05
+IBM                AIX Ver. 3 rel. 1
+Interactive        UNIX System V R3
+Microsoft          MS-DOS 3.3          tested Borland, Microsoft C
+MIPS               RISCos 4.52
+NeXt               NeXt OS 2.x
+OSF                OSF/1
+Portable!          POSIX 1003.1-1988   may need to define _POSIX_SOURCE
+QNX                QNX 4.0
+SCO                UNIX System V/386 3.2.2
+SCO                XENIX 2.3
+SCO                XENIX 3.2
+Silicon Graphics   UNIX                cc -DSGI -o iozone iozone.c
+Sony Microsystems  UNIX                same as MIPS
+Sun Microsystems   SUNOS 4.1.1
+Tandem Computers   GUARDIAN 90          1. call the source file IOZONEC
+                                        2. C/IN IOZONEC/IOZONE;RUNNABLE
+                                        3. RUN IOZONE
+Tandem Computers   Non-Stop UX
+
+** for VMS, define iozone as a foreign command via this DCL command:
+
+        $IOZONE :== $SYS$DISK:[]IOZONE.EXE
+
+
+
+NOCTools2 Working Group                                        [Page 59]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        this lets you pass the command line arguments to IOZONE
+
+        SOFTWARE REQUIRED
+                OS as shown in the hardware listing above.
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+                Author: Bill Norcott
+                        1060 Hyde Avenue
+                        San Jose, CA  95129
+                        norcott_bill@tandem.com
+
+                Availability:
+                        This tool has been posted to comp.sources.misc.
+                        It is available from the usual archive sites.
+                        Program can be located using ARCHIE or other
+                        servers.
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+                This entry is maintained by the noctools editors.
+                Send email to noctools-request@merit.edu.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 60]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                                 LADDIS
+
+        NAME
+                LADDIS
+
+        KEYWORDS
+                benchmark, generator;
+                NFS;
+                spoof;
+                unix;
+                free.
+
+        ABSTRACT
+
+                "LADDIS: A Multi-Vendor and Vendor-Neutral SPEC NFS
+                Benchmark", Bruce Nelson, LADDIS Group & Auspex Systems.
+
+                Over the past 24 months, engineers from Legato,
+                Auspex, Data General, DEC, Interphase, and Sun
+                (LADDIS) met regularly to create the LADDIS NFS
+                benchmark: an unbiased, standard, vendor-independent,
+                scalable NFS performance test.
+
+                The purpose of the LADDIS benchmark is to give users a
+                credible and undisputed test of NFS performance, and
+                to give vendors a publishable standard performance
+                measure that customers can use for load planning,
+                system configuration, and equipment buying decisions.
+                Toward this end, the LADDIS benchmark is being adopted
+                by SPEC (the System Performance Evaluation
+                Cooperative, creators of SPECmarks) as the first
+                member of SPEC's System-level File Server (SFS)
+                benchmark suite."
+
+                "In particular, we have had unexpected interest from
+                some router vendors in using LADDIS to both rate and
+                stress-test IP routers. This is because LADDIS can
+                send back-to-back full-size packet trains, and because
+                it can generate a 90%-Ethernet util on simulated
+                "real" NFS workloads, just like routers encounter in
+                the real world. But LADDIS is for local Ethernet or
+                FDDI nets only, not WAN."
+
+        MECHANISM
+                Generates NFS requests and measures responsiveness of
+                the server.
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 61]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        CAVEATS
+                "LADDIS is not released yet by SPEC, although a free
+                beta version, quite stable, is available now as
+                PRE-LADDIS. So you might want to put PRE-LADDIS in
+                your listing, noting that full LADDIS
+                availability from SPEC is expected by the end of 1992."
+
+        BUGS
+                The licensee is requested to direct beta test comments
+                via electronicmail to:
+                "spec-preladdis-comments@riscee.pko.dec.com".
+
+                This alias will forward all comments to the SPECSFS
+                mailing list (which includes the LADDIS Group).
+
+        LIMITATIONS
+                LADDIS is for local Ethernet or FDDI nets only, not
+                WAN.
+
+        HARDWAE REQUIRED
+                A host with LAN connectivity.  Presumably, a host with
+                enough horsepower to generate an adequate work load.
+
+        SOFTWARE REQUIRED
+                LADDIS is a sophisticated Unix-based NFS traffic
+                generator program.
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+                Date: Mon, 10 Feb 92 13:12:20 PST
+                From: bnelson (Bruce Nelson)
+
+                Dear Person:
+
+                The SPEC PRE-LADDIS beta test process became
+                operational on Monday, February 3, 1992.  This email
+                describes the process as announced during the LADDIS
+                Group's presentation at UniForum '92 and
+                also at Interop '91. The content of the beta test
+                license and the license request process are consistent
+                with the proposals approved by the SPEC Steering
+                Committee at the January 1992 meeting in Milpitas,
+                California.
+
+                The SPEC PRE-LADDIS beta test will consist of one beta
+                test version of PRE-LADDIS distributed ONLY by
+                electronic mail. The SPEC PRE-LADDIS Beta test
+                software is licensed by SPEC, not by the LADDIS
+                Group.
+
+
+
+NOCTools2 Working Group                                        [Page 62]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+                To obtain the PRE-LADDIS Beta test software, an
+                individual must:
+
+                1.  Request the SPEC PRE-LADDIS beta test License by
+                    electronic mail to
+                    "spec-preladdis-beta-test@riscee.pko.dec.com" with a
+                    subject line of "Request SPEC PRE-LADDIS Beta Test
+                    License".
+                2.  Print a hardcopy of the license and sign.
+                3.  Attach a cover letter written on the individual's
+                    company letterhead requesting the PRE-LADDIS Beta
+                    Test Kit.
+                4.  U.S. Mail the signed license and cover letter to:
+                       SPEC PRE-LADDIS Beta Test
+                       c/o NCGA, 2722 Merrilee Drive, Suite 200
+                       Fairfax, VA 22031
+
+                After completing these steps, the SPEC PRE-LADDIS beta
+                test kit will be emailed to the requestor from
+                riscee.pko.dec.com. The licensee is requested to
+                direct beta test comments via electronic mail
+                to "spec-preladdis-comments@riscee.pko.dec.com". This
+                alias will forward all comments to the SPECSFS mailing
+                list (which includes the
+                LADDIS Group).
+
+                Note that PRE-LADDIS is ONLY available through
+                electronic mail and ONLY through the process listed
+                above in steps 1-4. If you do not have internet email
+                available to you (which is unlikely if you are
+                receiving THIS email), you must arrange delivery of
+                PRE-LADDIS through some email-capable part of your
+                organization, not through LADDIS members like Auspex,
+                DEC, Sun, etc.
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+                This entry is maintained by the NOCtools editors.
+                Send E-mail to noctools-request@merit.edu.
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 63]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                             LAN_PATROL
+
+          NAME
+               LAN Patrol
+
+          KEYWORDS
+               security, traffic; ethernet, star; eavesdrop; DOS.
+
+          ABSTRACT
+               LAN Patrol is a full-featured network analyzer that
+               provides essential information for effective fault and
+               performance management.  It allows network managers to
+               easily monitor user activity, find traffic overloads,
+               plan for growth, test cable, uncover intruders, balance
+               network services, and so on.  LAN Patrol uses state of
+               the art data collection techniques to monitor all
+               activity on a network, giving an accurate picture of
+               how it is performing.
+
+               LAN Patrol's reports can be saved as ASCII files to
+               disk, and imported into spreadsheet or database pro-
+               grams for further analysis.
+
+          MECHANISM
+               The LAN Patrol interface driver programs a standard
+               interface card to capture all traffic on a network seg-
+               ment.  The driver operates from the background of a
+               standard PC, maintaining statistics for each station on
+               the network.  The information can be viewed on the PC's
+               screen, or as a user-defined report output either to
+               file or printer.
+
+          CAVEATS
+               None.  Normal operation is completely passive, making
+               LAN Patrol transparent to the network.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               LAN Patrol can monitor up to 10,000 packets/sec on an
+               AT class PC, and is limited to monitoring a maximum of
+               1024 stations for intervals of up to 30 days.
+
+               Because LAN Patrol operates at the physical level, it
+               will only see traffic for the segment on which it is
+               installed; it cannot see traffic across bridges.
+
+
+
+
+NOCTools2 Working Group                                        [Page 64]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          HARDWARE REQUIRED
+               Computer: IBM PC/XT/AT, PS/2 Model 30, or compatible.
+               Requires 512K memory and a hard drive or double-sided
+               disk drive.
+
+               Display: Color or monochrome text.  Color display
+               allows color-coding of traffic information.
+
+               Ethernet, StarLAN, LattisNet, or StarLAN 10 network
+               interface card.
+
+          SOFTWARE REQUIRED
+               PC DOS, MS-DOS version 3.1 or greater.
+
+          AVAILABILITY
+               LAN Patrol many be purchased through  network  dealers,
+               or directly from:
+                    Legend Software, Inc.
+                    Phone:  (201) 227-8771
+                    FAX:    (201) 906-1151
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 65]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                               LANVista
+
+        NAME
+            LANVista
+
+        KEYWORDS
+            analyzer, benchmark, debugger, generator, manager, traffic;
+            DECnet, Ethernet, IP, OSI, Ring; Eavesdrop, Proprietary;
+            DOS, Standalone.
+
+        ABSTRACT
+            CXR/Digilog's LANVista family of protocol and statistical
+            analyzers provide the tools to troubleshoot an Ethernet and
+            Token Ring 4/16Mbps network.  LANVista lets you capture
+            frames to RAM and or disk, generate traffic for stress
+            testing, test your network cable for fault isolation, and
+            decode all 7 layers of many popular protocol stacks.
+            LANVista's 100 family offers exceptional price/performance
+            and a wide range of options. Combined with an
+            integrated upgrade path to the fully distributed LANVista
+            200 system, the 100 line provides a  reasonably priced
+            entry into LAN management and protocol analysis.
+
+            All LANVista models are fully operable under Microsoft
+            Windows. Under Windows, LANVista can be operated in
+            the background, gathering data and alarms as other
+            tasks are completed. Displayed data may easily be
+            cut from LANVista and pasted into other Windows
+            applications such as Excel, Lotus 1-2-3, Harvard
+            Graphics, etc.
+
+            The versatile LANVista family can also be remotely
+            controlled through the use of PC Anywhere, Commute,
+            Carbon Copy, or other PC remote control packages.
+            This feature allows the use of "co-pilot" mode which
+            enables an operator at the central site to guide and
+            train a remote operator through network management or
+            analysis tasks.
+
+            All LANVista models provide features vital to effective
+            network management and troubleshooting.  Basic
+            capabilities include: Network database, statistics
+            based on the entire network and on a node basis, Token
+            Ring functional address statistics, Bridged  traffic
+            statistics, Protocol statistics, logging of statistics
+            to a printer or file of user definable alarms, Hardware
+            Pre-Capture filtering, Post capture filtering, Playback of
+            captured data, Traffic simulation and On-line context
+
+
+
+NOCTools2 Working Group                                        [Page 66]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+            sensitive Help.
+
+            Protocol Interpreters used for decoding network traffic
+            supported by LANVista include: TCP/IP, DECnet, Banyan
+            Vines, XNS/MS-Net, AppleTalk, IBM Token Ring, Novell,
+            3Com 3+ Open, SNMP and OSI.
+
+        MECHANISM
+            LANVista is available in three forms.  A kit version which
+            consists of a plug-in PC card and Master software, a self
+            contained unit that packages the kit version in a portable
+            PC, and a Distributed system.  The LANVista distributed
+            system allows slave units placed anywhere in the world to
+            be controlled from a single central location for
+            centralized management of an enterprise network.
+            LANVista's PC cards provides a physical interface to
+            the LAN and frame preprocessing power.  The Master
+            software controls the PC card, and the display and
+            processing of information gathered from the network.
+
+        CAVEATS
+            Optimal performance of LANVista's master software is achieved
+            with DOS 5.0 by utilizing RAMDRIVE.SYS, SMARTDRV.SYS and High
+            memory.
+
+        BUGS
+            None Known.
+
+        LIMITATIONS
+            None Known.
+
+        HARDWARE REQUIRED
+            IBM PC AT, 386, 486 or compatible.
+
+        SOFTWARE REQUIRED
+            DOS
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+            LANVista is available worldwide.  For information on a
+            local sales representative contact:
+
+                CXR/DIGILOG
+                900 Business Center Drive
+                Horsham, PA 19044
+                Phone 1-800-DIGILOG
+                FAX: 215-956-0108
+
+            GSA schedule pricing is honored.
+
+
+
+NOCTools2 Working Group                                        [Page 67]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+            CXR/DIGILOG Help Desk 1-800-DIGILOG
+            Send email to: lanvista@digilog.uucp
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 68]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                               LANPROBE
+
+          NAME
+               LanProbe -- the HP 4990S LanProbe Distributed Analysis
+               System.
+
+          KEYWORDS
+               alarm, manager, map, status, traffic; ethernet; eaves-
+               drop, NMS; proprietary.
+
+          ABSTRACT
+               The LanProbe distributed monitoring system performs
+               remote and local monitoring of ethernet LANs in a pro-
+               tocol and vendor independent manner.
+
+               LanProbe discovers each active node on a segment and
+               displays it on a map with its adapter card vendor name,
+               ethernet address, and IP address.  Additional informa-
+               tion about the nodes, such as equipment type and physi-
+               cal location can be entered in to the data base by the
+               user.
+
+               When the NodeLocator option is used, data on the actual
+               location of nodes is automatically entered and the map
+               becomes an accurate representation of the physical lay-
+               out of the segment.  Thereafter when a new node is
+               installed and becomes active, or when a node is moved
+               or becomes inactive, the change is detected and shown
+               on the map in real time.  The system also provides the
+               network manager with precise cable fault information
+               displayed on the map.
+
+               Traffic statistics are gathered and displayed and can
+               be exported in (comma delimited) CSV format for further
+               analysis.  Alerts can be set on user defined thres-
+               holds.
+
+               Trace provides a remote protocol analyzer capability
+               with decodes for common protocols.
+
+               Significant events (like power failure, cable breaks,
+               new node on network, broadcast IP source address seen,
+               etc.) are tracked in a log that is uploaded to Pro-
+               beView periodically.
+
+               ProbeView generates reports that can be manipulated by
+               MSDOS based word processors, spreadsheets, and DBMS.
+
+
+
+
+NOCTools2 Working Group                                        [Page 69]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          MECHANISM
+               The system consists of one or more LanProbe segment
+               monitors and ProbeView software running under Microsoft
+               Windows.  The LanProbe segment monitor attaches to the
+               end of an ethernet segment and monitors all traffic.
+               Attachment can be direct to a thin or thick coax cable,
+               or via an external transceiver to fiber optic or twist-
+               ed pair cabling.  Network data relating to the segment
+               is transferred to a workstation running ProbeView via
+               RS-232, ethernet, or a modem connection.
+
+               ProbeView software, which runs on a PC/AT class works-
+               tation, presents network information in graphical
+               displays.
+
+               The HP4992A NodeLocator option attaches to the opposite
+               end of the cable from the HP4991A LanProbe segment mon-
+               itor.  It automatically locates the position of nodes
+               on the ethernet networks using coaxial cabling schemes.
+
+          CAVEATS
+               None.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               None reported.
+
+          HARDWARE REQUIRED
+               HP 4991A LanProbe segment monitor
+               HP 4992A NodeLocator (for optional capabilities)
+               80386 based PC capable of running MS-Windows
+
+          SOFTWARE REQUIRED
+               HP 4990A ProbeView
+               MSDOS 3.0 or higher and Microsoft Windows/286 2.1.
+
+          AVAILABILITY
+               A commercial product available from:
+                    Hewlett-Packard Company
+                    P.O. Box 10301,
+                    Palo Alto, CA  94303-0890
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 70]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                               LANWATCH
+
+          NAME
+               LANWatch
+
+          KEYWORDS
+               alarm, analyzer, traffic; CHAOS, DECnet, DNS, ethernet,
+               IP, OSI, ring, SMTP, star; eavesdrop; DOS; library,
+               sourcelib.
+
+          ABSTRACT
+               LANWatch 2.0 is an inexpensive, powerful and flexible
+               network analyzer that runs under DOS on personal com-
+               puters and requires no hardware modifications to either
+               the host or the network.  LANWatch is an invaluable
+               tool for installing, troubleshooting, and monitoring
+               local area networks, and for developing and debugging
+               new protocols.  Network managers using LANWatch can
+               inspect network traffic patterns and packet errors to
+               isolate performance problems and bottlenecks.  Protocol
+               developers can use LANWatch to inspect and verify
+               proper protocol handling.  Since LANWatch is a
+               software-only package which installs easily in existing
+               PCs, network technicians and field service engineers
+               can carry LANWatch in their briefcase for convenient
+               network analysis at remote sites.
+
+               LANWatch has two operating modes: Display and Examine.
+               In Display Mode, LANWatch traces network traffic by
+               displaying captured packets in real time.  Examine Mode
+               allows you to scroll back through stored packets to
+               inspect them in detail.  To select a subset of packets
+               for display, storage or retrieval, there is an exten-
+               sive set of built-in filters.  Using filters, LANWatch
+               collects only packets of interest, saving the user from
+               having to sort through all network traffic to isolate
+               specific packets.  The built-in filters include alarm,
+               trigger, capture, load, save and search.  They can be
+               controlled separately to match on source or destination
+               address, protocol, or packet contents at the hardware
+               and transport layers.  LANWatch also includes suffi-
+               cient source code so users can modify the existing
+               filters and parsers or add new ones.
+
+               The LANWatch distribution includes executables and
+               source for several post-processors: a TCP protocol
+               analyzer, a node-by-node traffic analyzer and a dump
+               file listing tool.
+
+
+
+NOCTools2 Working Group                                        [Page 71]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          MECHANISM
+               Uses many common PC network interfaces by placing them
+               in promiscuous mode and capturing traffic.
+
+          CAVEATS
+               Most PC network interfaces will not capture 100% of the
+               traffic on a fully-loaded network (primarily missing
+               back-to-back packets).
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               LANWatch can't analyze what it doesn't see (see
+               Caveats).
+
+          HARDWARE REQUIRED
+               LANWatch requires a PC or PS/2 with a supported network
+               interface card.
+
+          SOFTWARE REQUIRED
+               LANWatch runs in DOS.  Modification of the supplied
+               source code or creation of additional filters and
+               parsers requires Microsoft C 5.1
+
+          AVAILABILITY
+               LANWatch is commercially available from FTP Software,
+               Incorporated, 26 Princess Street, Wakefield, MA, 01880
+               (617 246-0900).
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 72]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                               LLL_ENTM
+
+          NAME
+               ENTM -- Ethernet Traffic Monitor
+
+          KEYWORDS
+               traffic; ethernet, IP; eavesdrop; VMS; free.
+
+          ABSTRACT
+               ENTM is a screen-oriented utility that runs under
+               VAX/VMS.  It monitors local ethernet traffic and
+               displays either a real time or cumulative, histogram
+               showing a percent breakdown of traffic by ethernet pro-
+               tocol type.  The information in the display can be
+               reported based on packet count or byte count.  The per-
+               cent of broadcast, multicast and approximate lost pack-
+               ets is reported as well.  The screen display is updated
+               every three seconds.  Additionally, a real time, slid-
+               ing history window may be displayed showing ethernet
+               traffic patterns for the last five minutes.
+
+               ENTM can also report IP traffic statistics by packet
+               count or byte count.  The IP histograms reflect infor-
+               mation collected at the TCP and UDP port level, includ-
+               ing ICMP type/code combinations.  Both the ethernet and
+               IP histograms may be sorted by ASCII protocol/port name
+               or by percent-value.  All screen displays can be saved
+               in a file for printing later.
+
+          MECHANISM
+               This utility simply places the ethernet controller in
+               promiscuous mode and monitors the local area network
+               traffic.  It preallocates 10 receive buffers and
+               attempts to keep 22 reads pending on the ethernet dev-
+               ice.
+
+          CAVEATS
+               Placing the ethernet controller in promiscuous mode may
+               severly slow down a VAX system.  Depending on the speed
+               of the VAX system and the amount of traffic on the  lo-
+               cal  ethernet,  a large amount of CPU time may be spent
+               on the Interrupt Stack.  Running this code on any  pro-
+               duction system during operational hours is discouraged.
+
+          BUGS
+               Due to a bug in the VAX/VMS ethernet/802 device driver,
+               IEEE  802 format packets may not always be detected.  A
+               simple test is performed to "guess" which  packets  are
+
+
+
+NOCTools2 Working Group                                        [Page 73]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+               in  IEEE  802  format (DSAP equal to SSAP).  Thus, some
+               DSAP/SSAP pairs may be reported as  an  ethernet  type,
+               while  valid ethernet types may be reported as IEEE 802
+               packets.
+
+               In some hardware configurations, placing an ethernet
+               controller in promiscuous mode with automatic-restart
+               enabled will hang the controller.  Our VAX 8650 hangs
+               running this code, while our uVAX IIs and uVAX IIIs do
+               not.
+
+               Please report any additional bugs to the author at:
+                    Allen Sturtevant
+                    National Magnetic Fusion Energy Computer Center
+                    Lawrence Livermore National Laboratory
+                    P.O. Box 808; L-561
+                    Livermore, CA  94550
+                    Phone : (415) 422-8266
+                    E-Mail: sturtevant@ccc.nmfecc.gov
+
+          LIMITATIONS
+               The user is required to have PHY_IO, TMPMBX and NETMBX
+               privileges.  When activated, the program first checks
+               that the user process as enough quotas remaining
+               (BYTLM, BIOLM, ASTLM and PAGFLQUO) to successfully run
+               the program without entering into an involuntary wait
+               state.  Some quotas require a fairly generous setting.
+
+               The contents of IEEE 802 packets are not examined.
+               Only the presence of IEEE 802 packets on the wire is
+               reported.
+
+               The count of lost packets is approximated.  If, after
+               each read completes on the ethernet device, the utility
+               detects that it has no reads pending on that device,
+               the lost packet counter is incremented by one.
+
+               When the total number of bytes processed exceeds
+               7fffffff hex, all counters are automatically reset to
+               zero.
+
+          HARDWARE REQUIRED
+               A DEC ethernet controller.
+
+          SOFTWARE REQUIRED
+               VAX/VMS version V5.1+.
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 74]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          AVAILABILITY
+               For executables only,  FTP  to  the  ANONYMOUS  account
+               (password  GUEST) on CCC.NMFECC.GOV and GET the follow-
+               ing files:
+
+               [ANONYMOUS.PROGRAMS.ENTM]ENTM.DOC     (ASCII text)
+               [ANONYMOUS.PROGRAMS.ENTM]ENTM.EXE     (binary)
+               [ANONYMOUS.PROGRAMS.ENTM]EN_TYPES.DAT (ASCII text)
+               [ANONYMOUS.PROGRAMS.ENTM]IP_TYPES.DAT (ASCII text)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 75]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                Interactive Network Map
+
+          NAME
+               map -- Interactive Network Map
+
+          KEYWORDS
+               manager, map; CHAOS, ethernet, IP, ring, star; NMS,
+               ping, SNMP, X; UNIX; free, sourcelib.
+
+          ABSTRACT
+               Map draws a map of network connectivity and allows
+               interactive examination of information about various
+               components including whether hosts can be reached over
+               the network.
+
+               The program is supplied with complete source and is
+               written in a modular fashion to make addition of dif-
+               ferent protocols stacks, displays, or hardcopy devices
+               relatively easy.  This is one of the reasons why the
+               initial version supports at least two of each.  Contri-
+               butions of additional drivers in any of these areas
+               will be welcome as well as porting to additional plat-
+               forms.
+
+          MECHANISM
+               Net components are pinged by use of ICMP echo and,
+               optionally, CHAOS status requests and SNMP "gets."  The
+               program initializes itself from static data stored in
+               the file system and therefore does not need to access
+               the network in order to get running (unless the static
+               files are network mounted).
+
+          CAVEATS
+               As of publication, the tool is in beta release.
+
+          BUGS
+               Several minor nits, documented in distribution files.
+               Bug discoveries should be reported by email to Bug-
+               Map@LCS.MIT.Edu.
+
+          LIMITATIONS
+               See distribution file for an indepth discussion of sys-
+               tem capabilities and potential.
+
+          HARDWARE REQUIRED
+               An X display is needed for interactive display of the
+               map, non-graphical interaction is available in non-
+               display mode.  For hardcopy output a PostScript or Tek-
+
+
+
+NOCTools2 Working Group                                        [Page 76]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+               tronix 4692 printer is required.
+
+          SOFTWARE REQUIRED
+               BSD UNIX or related OS.  IP/ICMP is required;
+               CHAOS/STATUS and SNMP can be used but are optional.
+               X-Windows is required for interactive display of the
+               map.
+
+          AVAILABILITY
+               The program is Copyright MIT.  It is available via
+                anonymous FTP with a license making it free to use and
+                distribute for non-commercial purposes.  FTP to host
+                FTP.LCS.MIT.Edu, directory nets.  The complete
+                distribution is in map.tar.Z and some short
+                documentation files are there (as well as in the
+                distribution).  Of most interest are ReadMe and Intro.
+
+                To be added to the email forum that discusses the
+                software, or for other administrative details, send a
+                request to: MAP-Request@LCS.MIT.Edu
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 77]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                               MCONNECT
+
+          NAME
+               mconnect
+
+          KEYWORDS
+               status; SMTP; spoof; UNIX.
+
+          ABSTRACT
+               Mconnect allows an interactive session with a remote
+               mailer.  Mail delivery problems can be diagnosed by
+               connecting to the remote mailer and issuing SMTP com-
+               mands directly.
+
+          MECHANISM
+               Opens a TCP connection to remote SMTP on port 25.  Pro-
+               vides local line buffering and editing, which is the
+               distinction between mconnect and a TELNET to port 25.
+
+          CAVEATS
+               None.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               Mconnect is not a large improvement over using a TELNET
+               connection to port 25.
+
+          HARDWARE REQUIRED
+               No restrictions.
+
+          SOFTWARE REQUIRED
+               BSD UNIX or related OS.
+
+          AVAILABILITY
+               Available with 4.xBSD UNIX and related operating sys-
+               tems.
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 78]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                             MIB-BROWSER
+
+        NAME
+                MIB Browser
+
+        KEYWORDS
+                manager; osi; cmis, x; unix; free, sourcelib.
+
+        ABSTRACT
+                The MIB Browser is an X Windows HCI tool that allows
+                you to "browse" through the objects in a Management
+                Information Base (MIB). The browser is generic in that
+                it can connect to a CMIS agent without having any
+                prior knowledge of the structure of the MIB in the
+                agent.
+
+        MECHANISM
+                CMIP is used to transfer the values of attributes
+                between the managed system and the browser.
+
+        CAVEATS
+                None.
+
+        BUGS
+                Unexpected termination of the agent can cause browser
+                to crash (ISODE bug!).
+
+        HARDWARE REQUIRED
+                Unix workstation, has been tested on SUN 3 and SUN 4
+                architectures.
+
+        SOFTWARE REQUIRED
+                The ISODE protocol suite, BSD UNIX, X Windows, GNU C++
+                (g++), Interviews (2.6).
+
+        AVAILABILITY
+                The CMIP library and related management tools built
+                upon it, known as OSIMIS (OSI Management Information
+                Service), are publicly available from University
+                College London, England via FTP and FTAM.  To obtain
+                information regarding a copy send email to
+                osimis-request@cs.ucl.ac.uk or call +44 71 380 7366.
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 79]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                                  MONET
+
+        NAME
+               MONET -- the Hughes LAN Systems SNMP Network Management
+               Center (formerly the Hughes LAN Systems 9100) software
+               product runs on a Sun SPARCStation hardware platform.
+
+        KEYWORDS
+               control, graphics, network topology,manager, routing,
+               status, traffic; bridge, configuration, performance,
+               alarm management, relational database, mib parser for
+               RDBMS, intelligent hub management, DECnet, ethernet,
+               IP; NMS, SNMP; UNIX.
+
+        ABSTRACT
+               Monet provides the capability to manage and control
+               SNMP-based networking products from any vendor including
+               those from Hughes LAN Systems.
+
+               A comprehensive relational database manages the data and
+               ensures easy access and control of resources throughout
+               the network.
+
+               Monet provides multivendor management through its
+               advanced Mib master MIB parser that allows the parsing
+               of enterprise MIBs (ASN.1 format per RFC1212) directly
+               into the RDBMS for use by Monet's applications.
+
+               Major features include:
+
+               Remote access with X:
+                    Use of the X/Motif user-interface, enabling remote
+                    access to the all applications.
+
+               Database Management
+                    Stores and retrieves the information required to
+                    administer and configure the network.  It can be
+                    used to:
+                         - Store and recall configuration data for all
+                           devices.
+                         - Provide availability history for devices.
+                         - Assign new internet addresses.
+                         - Provide administrative information such as
+                           physical location of devices, responsible
+                           person, maintenance history, asset data,
+                           hardware/software versions, etc.
+                         - Full-function SQL interface.
+                         - User-customizable RDBMS report generation.
+
+
+
+NOCTools2 Working Group                                        [Page 80]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+                Graphics and Network Mapping
+                     The Graphics module enables the user to view the
+                     nodes in the network as "dynamic" icons in
+                     heirarchical maps.  The network is represented by
+                     these heirarchical maps.  Though there is a
+                     library of device icons, cities and geographical
+                     maps included, the user has access to a
+                     graphics editor that allows customizing and the
+                     creation of new icons and maps.
+                    A Device's icon may be selected to:
+                        - Register/deregister the device,
+                        - Access the open alarms and acknowledge
+                          faults for the selected device,
+                        - Ping the device to determine accessibility,
+                        - Draw graphs of any of the device's numeric
+                          MIB objects, either the values as retrieved
+                          in real-time or the history values
+                          previously stored in the RDBMS by the
+                          Performance Manager,
+                        - Telnet to the device,
+                        - Customize the graphical dynamics (color,
+                          fill, rotation, etc.) of the device's icon
+                          by associating them to the values of the
+                          device's MIB objects.
+
+               Configuration Management
+                    - Retrieves configuration information from SNMP
+                      devices.
+                    - Stores device parameters in the RDBMS, with
+                      common sets of parameters used for multiple
+                      devices, or for multiple ports on a device,
+                      stored only once in the RDBMS.
+                    - Configures devices from the parameters stored in
+                      the RDBMS, including those relating to TCP/IP,
+                      DECnet and any other protocol/feature
+                      configurable via SNMP.
+                    - Polls devices to compare their current parameter
+                      values with those in the database and produce
+                      reports of the discrepancies.
+                    - Collect data about the state of the network.
+                    - Learn the parameters of the devices in the
+                      network and populate the database.
+
+               Performance Management
+                    - Displays local network traffic graphically, by
+                      packet size, protocol, network utilization,
+                      sources and destinations of packets, etc.
+                    - Provides for the scheduling of jobs to retrieve
+
+
+
+NOCTools2 Working Group                                        [Page 81]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+                      MIB values of a device and store them in the RDBMS
+                      for review or summary reporting at a later time.
+                    - Allows high/low thresholds to be set on retrieved
+                      values with alarms generated when thresholds are
+                      exceeded.
+
+               Fault Management
+                    - Provides availability monitoring and indicates
+                      potential problems.
+                    - Creates alarms from received SNMP traps, and from
+                      other internally-generated conditions,
+                    - Records alarms in the alarm log in the RDBMS.
+                    - Lists alarms for selected set of devices,
+                      according to various filter conditions,
+                    - Possible causes and suggested actions for the
+                      alarms are listed.
+                    - New alarms are indicated by a flashing icon and
+                      optional audio alert.
+                    - Visual indication of alarms bubbles up the network
+                      map heirarchy.
+                    - Cumulative reports can be produced.
+
+               Utilities Function
+                    - View and/or terminate current NMC processes,
+                    - Access to database maintenance utilities.
+
+        MECHANISM
+               SNMP.
+
+        CAVEATS
+               None reported.
+
+        BUGS
+               None known.
+
+        LIMITATIONS
+               Maximum number of nodes that can be monitored is
+               18,000.  This can include Hosts, Terminal Servers, PCs,
+               Routers, and Bridges.
+
+        HARDWARE REQUIRED
+               The host for the NMC software is a Sun 4 desktop works-
+               tation.  Recommended minimum hardware is the Sun IPX
+               Color workstation, with a 1/4" SCSI tape drive.
+
+        SOFTWARE REQUIRED
+               MONET V5.0, which is provided on 1/4" tape format, runs on
+               the Sun 4.1.1 Operating System.
+
+
+
+NOCTools2 Working Group                                        [Page 82]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+               A commercial product of:
+                    Hughes LAN Systems Inc.
+                    1225 Charleston Road
+                    Mountain View, CA 94043
+                    Phone: (415) 966-7300
+                    Fax: (415) 960-3738
+                    RCA Telex: 276572
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+                kishoret@msgate.hls.com
+                kzm@hls.com
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 83]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                            NET_MONITOR
+
+          NAME
+               net_monitor
+
+          KEYWORDS
+               routing, status; DECnet, IP; curses, ping; UNIX, VMS;
+               free, sourcelib.
+
+          ABSTRACT
+               Net_monitor uses ICMP echo (and DECnet reachability
+               information on VAX/VMS) to monitor a network.  The mon-
+               itoring is very simplistic, but has proved useful.  It
+               periodically tests whether hosts are reachable and
+               reports the results in a full-screen display.  It
+               groups hosts together in common sets.  If all hosts in
+               a set become unreachable, it makes a lot of racket with
+               bells, since it assumes that this means that some com-
+               mon piece of hardware that supports that set has
+               failed.  The periodicity of the tests, hosts to test,
+               and groupings of hosts are controlled with a single
+               configuration file.
+
+               The idea for this program came from the PC/IP monitor
+               facility, but is an entirely different program with
+               different functionality.
+
+          MECHANISM
+               Reachability is tested using ICMP echo facilities for
+               TCP/IP hosts (and DECnet reachability information on
+               VAX/VMS).  A DECnet node is considered reachable if it
+               appears in the list of hosts in a "show network" com-
+               mand issued on a routing node.
+
+          CAVEATS
+               This facility has been found to be most useful when run
+               in a window on a workstation rather than on a terminal
+               connected to a host.  It could be useful if ported to a
+               PC (looks easy using FTP Software's programming
+               libraries), but this has not been done.  Curses is very
+               slow and cpu intensive on VMS, but the tool has been
+               run in a window on a VAXstation 2000.  Just don't try
+               to run it on a terminal connected to a 11/750.
+
+          BUGS
+               None known.
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 84]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          LIMITATIONS
+               This tool is not meant to be a replacement for a more
+               comprehensive network management facility such as is
+               provided with SNMP.
+
+          HARDWARE REQUIRED
+               A host with a network connection.
+
+          SOFTWARE REQUIRED
+               Curses, 4.xBSD UNIX socket programming libraries (lim-
+               ited set) and some flavor of TCP/IP that supports ICMP
+               echo request (ping).  It has been run on VAX/VMS run-
+               ning WIN/TCP and several flavors of 4BSD UNIX (includ-
+               ing SunOS 3.2, 4.0, and 4.3BSD).  It could be ported to
+               any platform that provides a BSD-style programming li-
+               brary with an ICMP echo request facility and curses.
+
+          AVAILABILITY
+               Requests should be sent to the author:
+
+               Dale Smith
+               Asst Dir of Network Services
+               University of Oregon
+               Computing Center
+               Eugene, OR  97403-1211
+
+               Internet: dsmith@oregon.uoregon.edu.
+               BITNET: dsmith@oregon.bitnet
+               UUCP: ...hp-pcd!uoregon!dsmith
+               Voice: (503)686-4394
+
+               With the source code, a makefile is provided for most
+               any UNIX box and a VMS makefile compatible with the
+               make distributed with PMDF.  A VMS DCL command file is
+               also provided, for use by those VMS sites without
+               "make."
+
+               The author will attempt to fix bugs, but no support is
+               promised.  The tool is copyrighted, but free (for now).
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 85]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                     NETLABS_CMOT_AGENT
+
+          NAME
+               Netlabs CMOT Agent
+
+          KEYWORDS
+               manager, status; IP, OSI; NMS.
+
+          ABSTRACT
+               Netlabs' CMOT code debuted in Interop 89.  The CMOT
+               code comes with an Extensible MIB, which allows users
+               to add new MIB variables.  The code currently supports
+               all the MIB variables in RFC 1095 via the data types in
+               RFC 1065, as well as the emerging MIB-II, which is
+               currently in experimental stage.  The CMOT has been
+               benchmarked at 100 Management Operations per Second
+               (MOPS) for a 1-MIPS machine.
+
+          MECHANISM
+               The Netlabs CMOT agent supports the control and moni-
+               toring of network resources by use of CMOT message
+               exchanges.
+
+          CAVEATS
+               None.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               None reported.
+
+          HARDWARE REQUIRED
+               Portable to most hardware.
+
+          SOFTWARE REQUIRED
+               Portable to most operating systems.
+
+          AVAILABILITY
+               Commercially available from:
+                    Netlabs Inc
+                    11693 Chenault Street Ste 348
+                    Los Angeles CA 90049
+                    (213) 476-4070
+                    lam@netlabs.com (Anne Lam)
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 86]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                   NETLABS_DUAL_MANAGER
+
+          NAME
+               Dual Manager
+
+          KEYWORDS
+               alarm, control, manager, map, security, status; IP,
+               OSI; NMS, SNMP, X; UNIX; library.
+
+          ABSTRACT
+               Netlabs' Dual Manager provides management of TCP/IP
+               networks using both SNMP and CMOT protoocls.  Such
+               management can be initiated either through the X-
+               Windows user interface (both Motif and Openlook), or
+               through OSI Network Management (CMIP) commands.  The
+               Dual Manager provides for configuration, fault, secu-
+               rity and performance management.  It provides extensive
+               map management features, including scanned maps in the
+               background.  It provides simple mechanisms to extend
+               the MIB and assign specific lists of objects to
+               specific network elements, thereby providing for the
+               management of all vendors' specific MIB extensions.  It
+               provides an optional relational DBMS for storing and
+               retrieving MIB and alarm information.  Finally, the
+               Dual Manager is an open platform, in that it provides
+               several Application Programming Interfaces (APIs) for
+               users to extend the functionality of the Dual Manager.
+
+               The Dual Manager is expected to work as a TCP/IP
+               "branch manager" under DEC's EMA, AT&T's UNMA and other
+               OSI-conformant enterprise management architectures.
+
+          MECHANISM
+               The Netlabs Dual Manager supports the control and moni-
+               toring of network resources by use of both CMOT and
+               SNMP message exchanges.
+
+          CAVEATS
+               None.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               None reported.
+
+          HARDWARE REQUIRED
+               Runs on Sun/3 and Sun/4s.
+
+
+
+NOCTools2 Working Group                                        [Page 87]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          SOFTWARE REQUIRED
+               Available on System V or SCO Open Desktop environments.
+               Uses X-Windows for the user interface.
+
+          AVAILABILITY
+               Commercially available from:
+                    Netlabs Inc
+                    11693 Chenault Street Ste 348
+                    Los Angeles CA 90049
+                    (213) 476-4070
+                    lam@netlabs.com (Anne Lam)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 88]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                     NETLABS_SNMP_AGENT
+
+          NAME
+               Netlabs SNMP Agent.
+
+          KEYWORDS
+               manager, status; IP; NMS, SNMP.
+
+          ABSTRACT
+               Netlabs' SNMP code debuted in Interop 89, where it
+               showed interoperation of the code with several imple-
+               mentations on the show floor.  The SNMP code comes with
+               an Extensible MIB, which allows users to add new MIB
+               variables.  The code currently supports all the MIB
+               variables in RFC 1066 via the data types in RFC 1065,
+               as well as the emerging MIB-II, which is currently in
+               experimental stage.  The SNMP has been benchmarked at
+               200 Management Operations per Second (MOPS) for a 1-
+               MIPS machine.
+
+          MECHANISM
+               The Netlabs SNMP agent supports the control and moni-
+               toring of network resources by use of SNMP message
+               exchanges.
+
+          CAVEATS
+               None.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               None reported.
+
+          HARDWARE REQUIRED
+               Portable to most hardware.
+
+          SOFTWARE REQUIRED
+               Portable to most operating systems.
+
+          AVAILABILITY
+               Commercially available from:
+                    Netlabs Inc
+                    11693 Chenault Street Ste 348
+                    Los Angeles CA 90049
+                    (213) 476-4070
+                    lam@netlabs.com (Anne Lam)
+
+
+
+
+NOCTools2 Working Group                                        [Page 89]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                 NetMetrix-Load-Monitor
+
+        NAME
+                NetMetrix Load Monitor
+
+        KEYWORDS
+                alarm,traffic; Ethernet, FDDI, IP, Ring; Eavesdrop,
+                SNMP, X; UNIX;
+
+        ABSTRACT
+                The NetMetrix Load Monitor is a distributed
+                client-server monitoring tool for ethernet, token
+                ring, and FDDI networks.  A unique "dual" architecture
+                provides compatibility with both RMON and X windows.
+                RMON allows interoperability and an enterprise-wide
+                view, while X windows enables much more powerful,
+                intelligent applications at remote segments and saves
+                network bandwidth.
+
+                The Load Monitor provides extensive traffic
+                statistics.  It looks at load by time interval, source
+                node, destination node, application, protocol or
+                packet size. A powerful ZOOM feature allows extensive
+                correlational analysis which is displayed in a wide
+                variety of graphs and tables.
+
+                You can answer questions such as: Which sources are
+                generating most of the load on the network when it is
+                most heavily loaded and where is this load going?
+                Which source/destination pairs generate the most
+                traffic over the day?  Where should bridges and
+                routers be located to optimally partition the network?
+                How much load do applications, like the X Windows
+                protocol, put on the network and who is generating that
+                load when it is the greatest.
+
+                A floating license allows easy access to the software
+                tool anywhere you need it.
+
+        MECHANISM
+                NetMetrix turns the network interface into promiscuous
+                mode to capture packets.
+
+        CAVEATS
+                none.
+
+        BUGS
+                none known.
+
+
+
+NOCTools2 Working Group                                        [Page 90]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        LIMITATIONS
+                none.
+
+        HARDWARE REQUIRED
+                SPARC system
+
+        SOFTWARE REQUIRED
+                SunOS 4.0 or higher
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+            NetMetrix is available from:
+                    Sales Department
+                    Metrix Network Systems, Inc.
+                    One Tara Boulevard
+                    Nashua, New Hampshire 03062
+                    telephone: 603-888-7000
+                    fax: 603-891-2796
+                    email: info@metrix.com
+
+        Government agencies please note that NetMetrix is on the GSA
+        schedule.
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+            Norma Shepperd
+            Marketing Administrator
+            603-888-7000
+            norma@metrix.com
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 91]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                  NetMetrix-NFS-Monitor
+
+        NAME
+              NetMetrix NFS Monitor
+
+        KEYWORDS
+              traffic; Ethernet, FDDI, NFS, Ring; Eavesdrop, SNMP, X;
+              UNIX
+
+        ABSTRACT
+                The NetMetrix NFS Monitor is a distributed network
+                monitoring tool which monitors and graphs NFS load,
+                response time, retransmits, rejects and errors by
+                server, client, NFS procedure, or time
+                interval.  Breakdown server activity by file system
+                and client activity by user.
+
+                A powerful ZOOM feature lets you correlate monitoring
+                variables.  You can see client/server relationships,
+                compare server performance, evaluate NFS performance
+                enhancement strategies.
+
+                A floating license and the X Window protocol allows
+                monitoring of remote ethernet, token ring and FDDI
+                segments from a central enterprise-wide display.
+
+        MECHANISM
+                NetMetrix turns the network interface into promiscuous
+                mode to capture packets.
+
+        CAVEATS
+                none.
+
+        BUGS
+                none known.
+
+        LIMITATIONS
+                none.
+
+        HARDWARE REQUIRED
+                SPARC system
+
+        SOFTWARE REQUIRED
+                SunOS 4.0 or higher
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 92]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+            NetMetrix is available from:
+                    Sales Department
+                    Metrix Network Systems, Inc.
+                    One Tara Boulevard
+                    Nashua, New Hampshire 03062
+                    telephone: 603-888-7000
+                    fax: 603-891-2796
+                    email: info@metrix.com
+
+                Government agencies please note that NetMetrix is on
+                the GSA schedule.
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+            Norma Shepperd
+            Marketing Administrator
+            603-888-7000
+            norma@metrix.com
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 93]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog            NetMetrix-Protocol-Analyzer
+
+        NAME
+              NetMetrix Protocol Analyzer
+
+        KEYWORDS
+                alarm, analyzer, traffic; DECnet, DNS, Ethernet, FDDI,
+                IP, OSI, NFS, Ring, SMTP; Eavesdrop, SNMP, X; UNIX;
+                Library
+
+        ABSTRACT
+                The NetMetrix Protocol Analyzer is a distributed
+                client-server monitoring tool for ethernet, token
+                ring, and FDDI networks.  A unique "dual" architecture
+                provides compatibility with both RMON and
+                X windows.  RMON allows interoperability, while X
+                windows enables much more powerful, intelligent
+                applications at remote segments and saves network
+                bandwidth.
+
+                With the Protocol Analyzer, you can decode and display
+                packets as they are being captured. Extensive filters
+                let you sift through packets either before or after
+                trace capture.  The capture filter may be specified by
+                source, destination between hosts, protocol, packet
+                size, pattern match, or by a complete expression using
+                an extensive filter expression language.
+
+                Full 7-layer packet decodes are available for all
+                major protocols including DECnet, Appletalk, Novell,
+                XNS, SNA, BANYAN, OSI and TCP/IP.  The decodes for the
+                TCP/IP stack have all major protocols including NFS,
+                YP, DNS, SNMP, OSPF, etc.
+
+                Request and reply packets are matched. Packets can be
+                displayed in summary, detail or hex, with multiple
+                views to see packet dialogues side by side.
+
+                A complete developers' kit is available for custom
+                decodes.
+
+                A floating license allows easy acess to the software
+                tool anywhere you need it.
+
+        MECHANISM
+                NetMetrix turns the network interface into promiscuous
+                mode to capture packets.
+
+
+
+
+NOCTools2 Working Group                                        [Page 94]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        CAVEATS
+                none.
+
+        BUGS
+                none known.
+
+        LIMITATIONS
+                none.
+
+        HARDWARE REQUIRED
+                SPARC system
+
+        SOFTWARE REQUIRED
+                 SunOS 4.0 or higher
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+            NetMetrix is available from:
+                    Sales Department
+                    Metrix Network Systems, Inc.
+                    One Tara Boulevard
+                    Nashua, New Hampshire 03062
+                    telephone: 603-888-7000
+                    fax: 603-891-2796
+                    email: info@metrix.com
+
+                Government agencies please note that NetMetrix is on the
+                GSA schedule.
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+            Norma Shepperd
+            Marketing Administrator
+            603-888-7000
+            norma@metrix.com
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 95]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog            NetMetrix-Traffic-Generator
+
+        NAME
+                 NetMetrix Traffic Generator
+
+        KEYWORDS
+                Debugger, Generator, Traffic; Ethernet, FDDI, IP,
+                Ring; Eavesdrop, SNMP, X; UNIX; Library
+
+        ABSTRACT
+                The NetMetrix Traffic Generator is a distributed
+                software tool which allows you to simulate network
+                load or test packet dialogues between nodes on your
+                ethernet, token ring, or FDDI segments.  The Traffic
+                Generator can also be used to test and validate
+                management station alarms, routers, bridges, hubs, etc.
+
+                An easy-to-use programming interface provides complete
+                flexibility over variables such as bandwidth, packet
+                sequence, and conditional responses.
+
+                A floating license and the X Window System protocol
+                allows testing of remote ethernet, token ring and FDDI
+                segments from a central console.
+
+        MECHANISM
+                NetMetrix turns the network interface into promiscuous
+                mode to capture packets.
+
+        CAVEATS
+                none.
+
+        BUGS
+                none known.
+
+        LIMITATIONS
+                none.
+
+        HARDWARE REQUIRED
+                SPARC system
+
+        SOFTWARE REQUIRED
+                SunOS 4.0 or higher
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 96]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+            NetMetrix is available from:
+                    Sales Department
+                    Metrix Network Systems, Inc.
+                    One Tara Boulevard
+                    Nashua, New Hampshire 03062
+                    telephone: 603-888-7000
+                    fax: 603-891-2796
+                    email: info@metrix.com
+
+                Government agencies please note that NetMetrix is on
+                the GSA schedule.
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+            Norma Shepperd
+            Marketing Administrator
+            603-888-7000
+            norma@metrix.com
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 97]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                           NETMON_MITRE
+
+          NAME
+               NETMON and iptrace
+
+          KEYWORDS
+               traffic; IP; eavesdrop; UNIX; free.
+
+          ABSTRACT
+               NETMON is a facility to enable communication of net-
+               working events from the BSD UNIX operating system to a
+               user-level network monitoring or management program.
+               Iptrace is a program interfacing to NETMON which logs
+               TCP-IP traffic for performance measurement and gateway
+               monitoring. It is easy to build other NETMON-based
+               tools using iptrace as a model.
+
+               NETMON resides in the 4.3BSD UNIX kernel.  It is
+               independent of hardware-specific code in UNIX.  It is
+               transparent to protocol and network type, having no
+               internal assumptions about the network protocols being
+               recorded.  It is installed in BSD-like kernels by
+               adding a standard function call (probe) to a few points
+               in the input and output routines of the protocols to be
+               logged.
+
+               NETMON is analogous to Sun Microsystems' NIT, but the
+               interface tap function is extended by recording more
+               context information.  Aside from the timestamp, the
+               choice of information recorded is up to the installer
+               of the probes.  The NETMON probes added to the BSD IP
+               code supplied with the distribution include as context:
+               input and output queue lengths, identification of the
+               network interface, and event codes labeling packet dis-
+               cards.  (The NETMON distribution is geared towards
+               measuring the performance of BSD networking protocols
+               in an IP gateway).
+
+               NETMON is designed so that it can reside within the
+               monitored system with minimal interference to the net-
+               work processing.  The estimated and measured overhead
+               is around five percent of packet processing.
+
+               The user-level tool "iptrace" is provided with NETMON.
+               This program logs IP traffic, either at IP-level only,
+               or as it passes through the network interface drivers
+               as well.  As a separate function, iptrace produces a
+               host traffic matrix output.  Its third type of output
+
+
+
+NOCTools2 Working Group                                        [Page 98]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+               is abbreviated sampling, in which only a pre-set number
+               of packets from each new host pair is logged.  The
+               three output types are configured dynamically, in any
+               combination.
+
+               OSITRACE, another logging tool with a NETMON interface,
+               is available separately (and documented in a separate
+               entry in this catalog).
+
+          MECHANISM
+               Access to the information logged by NETMON is through a
+               UNIX special file, /dev/netmon.  User reads are blocked
+               until the buffer reaches a configurable level of full-
+               ness.
+
+               Several other parameters of NETMON can be tuned at com-
+               pile time.  A diagnostic program, netmonstat, is
+               included in the distribution.
+
+          CAVEATS
+               None.
+
+          BUGS
+               Bug reports and questions should be addressed to:
+                    ie-tools@gateway.mitre.org
+               Requests to join this mailing list:
+                    ie-tools-request@gateway.mitre.org
+               Questions and suggestions can also be directed to:
+                    Allison Mankin (703)883-7907
+                    mankin@gateway.mitre.org
+
+          LIMITATIONS
+               A NETMON interface for tcpdump and other UNIX protocol
+               analyzers is not included, but it is simple to write.
+               NETMON probes for a promiscuous ethernet interface are
+               similarly not included.
+
+          HARDWARE REQUIRED
+               No restrictions.
+
+          SOFTWARE REQUIRED
+               BSD UNIX-like network protocols or the ability to
+               install the BSD publicly available network protocols in
+               the system to be monitored.
+
+
+
+
+
+
+
+NOCTools2 Working Group                                        [Page 99]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          AVAILABILITY
+               The NETMON distribution is available by anonymous FTP
+               in pub/netmon.tar or pub/netmon.tar.Z from aelred-
+               3.ie.org.  A short user's and installation guide,
+               NETMON.doc, is available in the same location.  The
+               NETMON distribution is provided "as is" and requires
+               retention of a copyright text in code derived from it.
+               It is copyrighted by the MITRE-Washington Networking
+               Center.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 100]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog           NETMON_WINDOWS_SNMP_RESEARCH
+
+        NAME
+                NETMON for Windows -- an SNMP-based network management
+                tool that runs under Microsoft Windows 3.0 from SNMP
+                Research.
+
+        KEYWORDS
+                alarm, control, manager, map, routing;
+                DECnet, Ethernet, IP, OSI, ring, star;
+                NMS, SNMP;
+                DOS;
+                sourcelib.
+
+        ABSTRACT
+                The NETMON application implements a powerful network
+                management station based on a low-cost DOS platform.
+                NETMON's network management tools for configuration,
+                performance, security, and fault management have been
+                used successfully with a wide assortment of wide- and
+                local-area-network topologies and medias.  Multiprotocol
+                devices are supported including those using TCP/IP,
+                DECnet, and OSI protocols.
+
+        Some features of NETMON's network management tools include:
+
+                o Fault management tool displays a map of the network
+                  configuration with node and link state indicated
+                  in one of several colors to indicate current status;
+                o Configuration management tool may be used to edit the
+                  network management information base stored in the
+                  NMS to reflect changes occurring in the network;
+                o Graphs and tabular tools for use in fault and performance
+                  management;
+                o Mechanisms by which additional variables, such as vendor-
+                  specific variables, may be added;
+                o Alarms may be enabled to alert the operator of events
+                  occurring in the network;
+                o Events are logged to disk;
+                o Output data may be transferred via flat files for
+                  additional report generation by a variety of
+                  statistical packages.
+
+        The NETMON application comes complete with source code
+        including a powerful set of portable libraries for generating
+        and parsing SNMP messages.
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 101]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        MECHANISM
+                The NETMON for Windows application is based on the
+                Simple Network Management Protocol (SNMP).  Polling is
+                performed via the powerful SNMP get-next operator and
+                the SNMP get operator.  Trap directed polling is used
+                to regulate the focus and intensity of the polling.
+
+        CAVEATS
+                None.
+
+        BUGS
+                None known.
+
+        LIMITATIONS
+                None reported.
+
+        HARDWARE REQUIRED
+                The minimum system is a IBM 386 computer, or
+                compatible, with hard disk drive.
+
+        SOFTWARE REQUIRED
+                DOS 5.0 or later, Windows 3.0 in 386 mode, and TCP/IP
+                kernel software from FTP Software.
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+                This is a commercial product available under license
+                from:
+                        SNMP Research
+                        3001 Kimberlin Heights Road
+                        Knoxville, TN  37920-9716
+                        Attn:  John Southwood, Sales and Marketing
+                        (615) 573-1434 (Voice)  (615) 573-9197 (FAX)
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+                users@seymour1.cs.utk.edu
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 102]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                               NETscout
+
+        NAME
+                NETscout(tm)
+
+        KEYWORDS
+                Alarm, Analyzer, Manager, Status, Traffic;
+                DECnet, Ethernet, IP, OSI, NFS, Ring, Star, Eavesdrop;
+                NMS, SNMP;
+                UNIX;
+
+        ABSTRACT
+                The NETscout family of distributed LAN Analyzer
+                devices are intended to provide network users with a
+                comprehensive capability to identify and isolate fault
+                conditions in data communications networks.
+                NETscout has the capability to collect wide ranging
+                statistical data, to display selectively captured and
+                fully decoded network traffic, to set user-defined
+                alarm conditions, and to obtain real-time updates
+                from all segments of a widely dispersed internetwork
+                from a centralized SNMP-compatible network management
+                console.
+
+                The NETscout family is based on standards so that
+                operation may be realized in heterogeneous networks
+                which constitute a multi-protocol, multi-topology,
+                multi-vendor environment.  The fundamental standards
+                upon which NETscout is based are the Simple Network
+                Management Protocol (SNMP), which defines the protocol
+                for all inter-communications between NETscout devices,
+                and the Remote Monitoring Management Information Base
+                (RMON-MIB), which defines the type of information
+                which is to be gathered and made available to the
+                user for each network segment.
+
+                NETscout clients provide a full array of monitoring
+                and analysis features including intelligent seven
+                level decoding of all majorprotocol stacks:
+
+                DOD including TCP/IP    XNS       Novell
+                DECNET including LAT    ISO       APPLETALK
+                IBM Token Ring          Vines     NETBIOS/SMB
+                SNMP including RMON-MIB SUN-NFS   SMT
+
+                NETscout agents support all nine groups of the
+                RMON-MIB standard.  NETscout agents can work with any
+                SNMP-based network management system and currently
+
+
+
+NOCTools2 Working Group                                       [Page 103]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+                support Ethernet and Token Ring.
+
+        MECHANISM
+                The operation of the NETscout family is divided into
+                two distinct subcategories.  The first is the "Client"
+                which is the user console from which operational
+                commands are issued and where all results and
+                diagnostic information are displayed. In a NETscout
+                topology it is feasible to have multiple clients
+                active simultaneously within a single network.  The
+                second category is the "Agent", a hardware/software
+                device which is attached to a specific network
+                segment and which gathers statistical information for
+                that segment as well as providing a window into that
+                segment where network traffic may be observed and
+                gathered for more detailed user analysis.  A
+                typical network will have multiple segments and
+                multiple agents up to the point of having one agent
+                for each logical network segment.
+
+                NETscout Model 9210 is a software package which, when
+                combined in a Sun SPARCstation in conjunction with
+                SunNet Manager running under Open Windows, implements
+                the NETscout client function.  SunNet Manager provides
+                the background operational tools for client operation
+                while the NETscout software provides
+                application-specific functions related to RMON-MIB
+                support as well as all software necessary to
+                perform the protocol decode function.
+                SunNet Manager also implements a network map file
+                which includes a topographical display of the entire
+                network and is the mechanism for selecting
+                network elements to perform operations.
+
+                NETscout Model 9215 is a software package that
+                operates in conjunction with SunNet Manager and
+                implements the statistics monitoring function only.
+                That is, it does not include the protocol
+                decode function or the mechanism to retrieve actual
+                data from a remote agent.  It does, however, include
+                complete statistics gathering and event and alarm
+                generation.
+
+                Frontier NETscout Models 9510 and 9515, and Model 9610
+                and 9615 are agent software packages that implement
+                selected network diagnostic functions when loaded into
+                a Sun SPARCstation (9510, 9515) or a SynOptics
+                LattisNet Hub (9610, 9615) respectively which is
+
+
+
+NOCTools2 Working Group                                       [Page 104]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+                connected to an Ethernet network segment
+                using conventional network interface hardware.  Models
+                9510 and 9610 support all nine RMON-MIB groups
+                including "filters" and "packet capture" and thus
+                provide for complete protocol monitoring and decode
+                when used with a client
+                equipped with protocol decode software.  Models 9515
+                an 9615 include support for seven RMON-MIB groups
+                which excludes "filters" and "data capture" and
+                therefore perform network monitoring only through
+                collection and presentation of network statistics,
+                events, and alarms.  All models also support the MIB2
+                system and interface groups.
+
+                Frontier NETscout Models 9520 and 9525, and Model 9620
+                and 9625 are agent software packages that are
+                identical in function to their respective models
+                described above except that they are for use on
+                Token Ring segments.
+
+        CAVEATS
+                The RMON-MIB standard for Token Ring applications has
+                not yet beenformally released and is not approved.
+                NETscout products correspond to the latest draft for
+                Token Ring functions and will be updated as
+                required to conform to the standard as it is approved.
+
+        BUGS
+                None known.
+
+        LIMITATIONS
+                None reported.
+
+        HARDWARE REQUIRED
+                Sun SPARCstation or LattisNet Hub depending upon Model
+                number.
+
+        SOFTWARE REQUIRED
+                Sun OS 4.1.1 for client and agent, SunNet Manager for
+                client.
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 105]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+                NETscout products are available commercially.  For
+                information regarding your local representative, contact:
+                        Frontier Software Development, Inc.
+                        1501 Main Street
+                        Tewksbury, MA  01876
+                        Phone:  508-851-8872
+                        Fax: 508-851-6956
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+                        Marketing
+                        Frontier Software
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 106]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                                NETSTAT
+
+          NAME
+               netstat
+
+          KEYWORDS
+               routing; IP; UNIX, VMS; free.
+
+          ABSTRACT
+               Netstat is a program that accesses network related data
+               structures within the kernel, then provides an ASCII
+               format at the terminal.  Netstat can provide reports on
+               the routing table, TCP connections, TCP and UDP
+               "listens", and protocol memory management.
+
+          MECHANISM
+               Netstat accesses operating system memory to read the
+               kernel routing tables.
+
+          CAVEATS
+               Kernel data structures can change while netstat is run-
+               ning.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               None reported.
+
+          HARDWARE REQUIRED
+               No restrictions.
+
+          SOFTWARE REQUIRED
+               BSD UNIX or related OS, or VMS.
+
+          AVAILABILITY
+               Available via anonymous FTP from uunet.uu.net, in
+               directory bsd-sources/src/ucb.  Available with 4.xBSD
+               UNIX and related operating systems.  For VMS, available
+               as part of TGV MultiNet IP software package, as well as
+               Wollongong's WIN/TCP.
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 107]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                     NETWORK_INTEGRATOR
+
+          NAME
+               Network Integrator I
+
+          KEYWORDS
+               map, traffic; ethernet; UNIX.
+
+          ABSTRACT
+               This tool monitors traffic on network segments.  All
+               information is dumped to either a log file or, for
+               real-time viewing, to a command tool window.  Data is
+               time-stamped according to date and time.  Logging can
+               continue for up to 24 hours.
+
+               The tool is flexible in data collection and presenta-
+               tion.  Traffic filters can be specified according to
+               header values of numerous protocols, including those
+               used by Apple, DEC, Sun, HP, and Apollo.  Bandwidth
+               utilization can be monitored, as well as actual load
+               and peak throughput.  Additionally, the Network
+               Integrator can analyze a network's topology, and record
+               the location of all operational nodes on a network.
+
+               Data can be displayed in six separate formats of bar
+               graphs.  In addition, there are several routines for
+               producing statistical summaries of the data collected.
+
+          MECHANISM
+               The tools work through RPC and XDR calls.
+
+          CAVEATS
+               Although the tool adds only little traffic to a net-
+               work, generation of statistics from captured files
+               requires a significant portion of a workstation's CPU.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               Must be root to run monitor.  There does not seem to be
+               a limit to the number of nodes, since it monitors by
+               segments.  The only major limitation is the amount of
+               disk space that a user can commit to the log files.
+               The size of the log files, however, can be controlled
+               through the tool's parameters.
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 108]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          HARDWARE REQUIRED
+               Sun3 or Sun4.
+
+          SOFTWARE REQUIRED
+               4.0BSD UNIX or greater, or related OS.
+
+          AVAILABILITY
+               Copyrighted, commercially available from
+               Network Integrators,
+               (408) 927-0412.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 109]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                               NFSwatch
+
+        NAME
+                nfswatch
+
+        KEYWORDS
+                Traffic; Ethernet, IP, NFS; Curses, Eavesdrop; UNIX;
+                Free
+
+        ABSTRACT
+                Nfswatch monitors all incoming ethernet traffic to an
+                NFS file server and divides it into several
+                categories.  The number and percentage of packets
+                received in each category is displayed on
+                the screen in a continuously updated display.
+
+                By default, nfswatch monitors all packets destined for
+                the local host over a single network interface.
+                Options are provided to specify the specific interface
+                to be monitored, or all interfaces at once.  NFS
+                traffic to the local host, to a remote host, from a
+                specific host, between two hosts, or all NFS traffic
+                on the network may be monitored.
+
+                Categories of packets monitored and counted include:
+                ND Read, ND Write, NFS Read, NFS Write, NFS Mount,
+                Yellow Pages (NIS), RPC Authorization, Other RPC, TCP,
+                UDP, ICMP, RIP, ARP, RARP, Ethernet Broadcast, and
+                Other.
+
+                Packets are also tallied either by file system or file
+                (specific files may be watched as an option), NFS
+                procedure name (RPC call), or NFS client hostname.
+
+                Facilities for taking "snapshots" of the screen, as
+                well as saving data to a log file for later analysis
+                (the analysis tool is included) are also available.
+
+        MECHANISM
+                Nfswatch uses the Network Interface Tap, nit(4) under
+                SunOS 4.x, and the Packet Filter, packetfilter(4),
+                under Ultrix 4.x, to place the ethernet interface into
+                promiscuous mode.  It filters out NFS packets, and
+                decodes the file handles in order to determine how to
+                count the packet.
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 110]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        CAVEATS
+                Because the NFS file handle is a non-standard (server
+                private) piece of data, nfswatch must be modified to
+                understand file handles used by various
+                implementations.  It currently knows
+                about the SunOS 4.x and Ultrix file handle formats.
+
+        BUGS
+                Does not monitor FDDI interfaces.  (It should be a
+                simple change, but neither author has access to a
+                system with FDDI interfaces for testing.)
+
+        LIMITATIONS
+                Up to 256 exported file systems and 256 individual
+                files can be monitored at any time.
+
+                Only NFS requests are counted; the NFS traffic
+                generated by a server in response to those packets
+                is not counted.
+
+        HARDWARE REQUIRED
+                Any Ultrix system (VAX or DEC RISC hardware)
+
+        SOFTWARE REQUIRED
+                Ultrix release 4.0 or later.  For Ultrix 4.1, may
+                require the patched "if_ln.o" kernel module, available
+                from Digital's Customer Support Center.
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+                Copyrighted, but freely distributable.  Available via
+                anonymous FTP from harbor.ecn.purdue.edu,
+                ftp.erg.sri.com, and gatekeeper.dec.com, as well as
+                numerous other sites around the Internet.  The current
+                version is Version 3.0 from January 1991.
+
+        Contact points:
+
+        Dave Curry                              Jeff Mogul
+        Purdue University                       Digital Equipment Corp.
+        Engineering Computer Network            Western Research Laboratory
+        1285 Electrical Engineering Bldg.       100 Hamilton Avenue
+        West Lafayette, IN 47907-1285           Palo Alto, CA 94301
+        davy@ecn.purdue.edu                     mogul@decwrl.dec.com
+
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+                Dave Curry (see address above).
+
+
+
+
+NOCTools2 Working Group                                       [Page 111]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                              NHFSSTONE
+
+          NAME
+               nhfsstone
+
+          KEYWORDS
+               benchmark, generator; NFS; spoof; UNIX; free.
+
+          ABSTRACT
+               Nhfsstone (pronounced n-f-s-stone, the "h" is silent)
+               is an NFS benchmarking program.  It is used on an NFS
+               client to generate an artificial load with a particular
+               mix of NFS operations.  It reports the average response
+               time of the server in milliseconds per call and the
+               load in calls per second.  The nhfsstone distribution
+               includes a script, "nhfsnums" that converts test
+               results into plot(5) format so that they can be graphed
+               using graph(1) and other tools.
+
+          MECHANISM
+               Nhfsstone is an NFS traffic generator.  It adjusts its
+               calling patterns based on the client's kernel NFS
+               statistics and the elapsed time.  Load can be generated
+               over a given time or number of NFS calls.
+
+          CAVEATS
+               Nhfsstone will compete for system resources with other
+               applications.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               None reported.
+
+          HARDWARE REQUIRED
+               No restrictions.
+
+          SOFTWARE REQUIRED
+               4.xBSD-based UNIX
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 112]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          AVAILABILITY
+               Available via anonymous FTP from bugs.cs.wisc.edu.
+               Alternatively, Legato Systems will provide the program
+               free of charge, if certain conditions are met.  Send
+               name and both email and U.S. mail addresses to:
+                    Legato Systems, Inc.
+                    Nhfsstone
+                    260 Sheridan Avenue
+                    Palo Alto, California  94306
+
+               A mailing list is maintained for regular information
+               and bug fixes: nhfsstone@legato.com or
+               uunet!legato.com!nhfsstone.  To join the list:
+               nhfsstone-request@legato.com or
+               uunet!legato.com!nhfsstone-request.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 113]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                                 NNSTAT
+
+          NAME
+               NNStat
+
+          KEYWORDS
+               manager, status, traffic; ethernet, IP; eavesdrop, NMS;
+               UNIX; free.
+
+          ABSTRACT
+               NNStat is a collection of programs that provides an
+               internet statistic collecting capability.  The NNStat
+               strategy for statistic collection is to collect traffic
+               statistics via a promiscuous ethernet tap on the local
+               networks, versus instrumenting the gateways.  If all
+               traffic entering or leaving a network or set of net-
+               works traverses a local ethernet, then by stationing a
+               statistic gathering agent on each local network a pro-
+               file of network traffic can be gathered.  Statistical
+               data is retrieved from the local agents by a global
+               manager.
+
+               A program called "statspy" performs the data gathering
+               function.  Essentially, statspy reads all packets on an
+               ethernet interface and records all information of
+               interest.  Information of interest is gathered by exa-
+               mining each packet and determining if the source or
+               destination IP address is one that is being monitored,
+               typically a gateway address.  If so then the contents
+               of the packet are examined to see if they match further
+               criteria.
+
+               A program called "collect" performs global data collec-
+               tion.  It periodically polls various statspy processes
+               in the domain of interest to retrieve locally logged
+               statistical data.
+
+               The NNSTAT distribution comes with several sample awk
+               programs which process the logged output of the collect
+               program.
+
+          MECHANISM
+               Local agents (statspy processes) collect raw traffic
+               data via a promiscuous ethernet tap.  Statistical, fil-
+               tered or otherwise reduced data is retrieved from the
+               local agents by a global manager (the "collect" pro-
+               cess).
+
+
+
+
+NOCTools2 Working Group                                       [Page 114]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          CAVEATS
+               None.
+
+          BUGS
+               Bug fixes, extensions, and other pointers are discussed
+               in the electronic mail forum, bytecounters.  To join,
+               send a request to bytecounters-request@venera.isi.edu.
+               Forum exchanges are archived in the file
+               bytecounters/bytecounters.mail, available via anonymous
+               FTP from venera.isi.edu.
+
+          LIMITATIONS
+               NNStat presumes a topology of one or more long haul
+               networks gatewayed to local ethernets.
+
+               A kernel mod required to run with SunOS4.  These mods
+               are described in the bytecounters archive.
+
+          HARDWARE REQUIRED
+               Ethernet interface.  Sun 3, Sun 4 (SPARC), or PC RT
+               workstation.
+
+          SOFTWARE REQUIRED
+               Distribution is for BSD UNIX, could easily be adapted
+               to any UNIX with promiscuous ethernet support.
+
+          AVAILABILITY
+               Distribution is available via anonymous FTP from
+               venera.isi.edu, in file pub/NNStat.tar.Z.  Documenta-
+               tion is in pub/NNStat.userdoc.ms.Z.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 115]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                               NOCOL(8)
+
+          NAME
+               nocol - network monitoring tools for an IP network
+
+          SYNOPSIS
+               This is an overview of the NOCOL software.
+
+          DESCRIPTION
+               NOCOL (Network Operations Center On-Line) is a
+               collection of network monitoring programs that run on
+               Unix systems.  The software consists of a number of
+               monitoring agents that poll various parameters from any
+               system and put it in a format suitable for
+               post-processing. The post-processors can be a display
+               agent, an automated troubleshooting program, an
+               event logging program, etc.  Presently, monitors for
+               tracking reachability, SNMP traps, data throughput
+               rate, and nameservers have been developed and are in
+               use.  Addition of more monitoring agents is easy and
+               they will be added as necessary.  A display agent-
+               nocol(1) using curses has already been developed. Work
+               on an "intelligent" module is currently in progress for
+               event logging and some automatic troubleshooting.
+
+               All data collected by the monitoring agents follows a
+               fixed (non-readable) format. Each data entry is termed
+               an event in NOCOL, and each event has certain flags and
+               severity associated with it. The display agent
+               nocol(1), displays the output of these monitoring
+               agents depending on the severity of the event. There
+               can be multiple displays running simultanously and
+               all process the same set of monitored data.
+
+               There are four levels of severity associated with an
+               event- CRITICAL, ERROR, WARNING and INFO. The severity
+               level is controlled independently by the monitoring
+               agents, and the decision to raise or set an event's
+               severity to any level depends on the logic imbedded in
+               the monitoring agent.
+
+               As an example, for the pingmon(8) monitor, if a site is
+               unreachable via ping, it would be assigned a severity
+               of WARNING by pingmon, which would then elevate to
+               CRITICAL if the site is still unreachable after some
+               time. In the case of trapmon(8), an SNMP trap message
+               of EGP neighbor lost would be directly assigned a
+               severity level of CRITICAL, while an Warm Start trap is
+
+
+
+NOCTools2 Working Group                                       [Page 116]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+               assigned a severity of WARNING.
+
+               The display agent (and other data post-processors)
+               would use this event severity to decide whether to
+               display it (or troubleshoot/log it) depending on the
+               user selected display severity level.
+
+               The software is very flexible and allows enhancements
+               and development with a minimum amount of effort. The
+               display module processes all the files present in the
+               data directory, and displays them sequentially. This
+               allows new monitoring programs to simply start
+               generating data in the data directory and the display
+               module will automatically start displaying the new
+               data. The monitoring tools can be changed, and the only
+               element that has to remain common between all the
+               modules is the EVENT data structure.
+
+          CURRENT MODULES
+               NOCOL presently consists of the following modules:
+
+          nocol
+               which simply displays the data collected by the
+               monitoring agents.  It uses the curses screen
+               management system to support a wide variety of terminal
+               types. The criterion for displaying an event is:
+
+               1. Severity level of the event is higher than the
+                  severity level set in the display.
+
+               2. The display filter (if set) matches some string in
+                  the event line.
+
+               The display can be in regular 80 column mode or in
+               extended 132 column mode.  Critical events are
+               displayed in reverse video (if the terminal type
+               supports it). Additional features like displaying
+               informational messages in a part of the window,
+               automatic resizing window sizes, operator
+               acknowledgement via a bell when a new event goes
+               critical are also available.
+
+          ippingmon
+               which monitors the reachability of a site via "ICMP"
+               ping packets (ICMP was preferred over SNMP for many
+               obvious reasons). This program can use the default out-
+               put from the system's ping program, but an accompanying
+               program ( multiping) can ping multiple IP sites at the
+
+
+
+NOCTools2 Working Group                                       [Page 117]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+               same time and is preferable for monitoring a large list
+               of sites.  A site is marked unreachable if a certain
+               number of packets is lost, and the severity level is
+               increased each time that the site tests unreachable.
+
+          osipingmon
+               which is similar to the ippingmon module but uses the
+               OSI ping program instead. No multiple ping program for
+               OSI sites has been developed at this time.  The only
+               requirement is that the system's ping program output
+               match the typical BSD IP ping program's output.
+
+          nsmon
+               which monitors the nameservers (named) on the list of
+               specified hosts. It periodically sends an SOA query for
+               the default domain and if the queried nameservers
+               cannot resolve the query, then the site is elevated to
+               CRITICAL status.
+
+          tpmon
+               For monitoring the throughput (kbits per second) to a
+               list of hosts.  The program connects to the discard
+               socket on the remote machine (using  a  STREAM  socket)
+               and sends large packets for a small amount of time to
+               evaluate the effective throughput. It elevates a site
+               to WARNING level if the throughput drops below a
+               certain threshold (set in the configuration file).
+
+          trapmon
+               Converts all SNMP traps into a format suitable for
+               displaying using NOCOL.  The severity of the various
+               traps is preset (and can be changed during compilation
+               time).
+
+
+     PLATFORM
+          Any Unix system with the curses screen management library
+          and IP (Internet Protocol) programming facility. It has been
+          tested on Sun Sparc 4.1.1, Ultrix, and NeXT systems. Porting
+          to other platforms might require minor adjustments depending
+          on the vagaries of the different vendors (mostly in the
+          include files).
+
+     AVAILABILITY
+          NOCOL was developed at JvNCnet and has been in use for
+          monitoring the JvNCnet wide area network since 1989.
+          It is available via anonymous FTP from ftp.jvnc.net under
+          pub/jvncnet-packages/nocol.tar.Z.  The system running at
+
+
+
+NOCTools2 Working Group                                       [Page 118]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          JvNCet can be viewed by logging into the host nocol.jvnc.net
+          with username nocol (an rlogin instead of telnet will handle
+          your X window terminal types better).
+          To be added to the NOCOL mailing list (for future updates
+          and bug fixes), send a message to nocol-users-
+          request@jvnc.net with your email address.
+
+     FUTURE DEVELOPMENTS
+
+          Possible future enhancements are:
+
+          1. Event logging.
+
+          2. Addition of an automated  troubleshooting  mechanism
+             when  a  site  severity  level  reaches a particular
+             level.
+
+          3. SNMP monitors to watch the state  of  certain  vari-
+             ables  (interface  errors,  packet rate, route state
+             changes).
+
+     AUTHOR
+          The software was developed at JvNCnet over a period of time.
+          The overall design and initial development was done by Vikas
+          Aggarwal and Sze-Ying Wuu.  Additional development is being
+          done and coordinated by Vikas Aggarwal (vikas@jvnc.net).
+          Copyright 1992 JvNCnet. (See the file COPYRIGHT for full
+          details)
+
+     SEE ALSO
+          nocol(1) nocol(3) tpmon(8) tsmon(8) nsmon(8)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 119]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                                   NPRV
+
+          NAME
+               NPRV -- IP Node/Protocol Reachability Verifier
+
+          KEYWORDS
+               map, routing, status; IP; ping; VMS; free.
+
+          ABSTRACT
+               NPRV is a full-screen, keypad-oriented utility that
+               runs under VAX/VMS.  It allows the user to quickly scan
+               through a user-defined list of IP addresses (or domain
+               names) and verify a node's reachability.  The node's
+               reachability is determined by performing an ICMP echo,
+               UDP echo and a TCP echo at alternating three second
+               intervals.  The total number of packets sent and
+               received are displayed, as well as the minimum, average
+               and maximum round-trip times (in milliseconds) for each
+               type of echo.  Additionally, a "trace route" function
+               is performed to determine the path from the local sys-
+               tem to the remote host.  Once all of the trace route
+               information has filled the screen, a "snapshot" of the
+               screen can be written to a text file.  Upon exiting the
+               utility, these text files can be used to generate a
+               logical network map showing host and gateway intercon-
+               nectivity.
+
+          MECHANISM
+               The ICMP echo is performed by sending ICMP ECHO REQUEST
+               packets.  The UDP and TCP echoes are performed by con-
+               necting to the UDP/TCP echo ports (port number 7).  The
+               trace route information is compiled by sending alter-
+               nating ICMP ECHO REQUEST packets and UDP packets with
+               very large destination UDP port numbers (in two
+               passes).  Each packet is initially sent with a TTL
+               (time to live) of 1.  This should cause an ICMP TIME
+               EXCEEDED error to be generated by the first routing
+               gateway.  Then each packet is sent with a TTL of 2.
+               This should cause an ICMP TIME EXCEEDED error to be
+               generated by the second routing gateway.  Then each
+               packet is sent with a TTL of 3, and so on.  This pro-
+               cess continues until an ICMP ECHO REPLY or UDP PORT
+               UNREACHABLE is received.  This indicates that the
+               remote host has been reached and that the trace route
+               information is complete.
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 120]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          CAVEATS
+               This utility sends one echo packet per second (ICMP,
+               UDP or TCP), as well as sending out one trace route
+               packet per second.  If a transmitted trace route packet
+               is returned in less than one second, another trace
+               route packet is sent in 100 milliseconds.  This could
+               cause a significant amount of contention on the local
+               network.
+
+          BUGS
+               None known.  Please report any discovered bugs to the
+               author at:
+                    Allen Sturtevant
+                    National Magnetic Fusion Energy Computer Center
+                    Lawrence Livermore National Laboratory
+                    P.O. Box 808; L-561
+                    Livermore, CA  94550
+                    Phone : (415) 422-8266
+                    E-Mail: sturtevant@ccc.nmfecc.gov
+
+          LIMITATIONS
+               The user is required to have SYSPRV privilege to per-
+               form the ICMP Echo and trace route functions.  The
+               utility will still run with this privilege disabled,
+               but only the UDP Echo and TCP Echo information will be
+               displayed.  This utility is written in C, but unfor-
+               tunately it cannot be easily ported over to UNIX since
+               many VMS system calls are used and all screen I/O is
+               done using the VMS Screen Management Routines.
+
+          HARDWARE REQUIRED
+               Any network interface supported by TGV Incorporated's
+               MultiNet software.
+
+          SOFTWARE REQUIRED
+               VAX/VMS V5.1+ and TGV Incorporated's MultiNet version
+               2.0.
+
+          AVAILABILITY
+               For executables only, FTP to the ANONYMOUS account
+               (password GUEST) on CCC.NMFECC.GOV (128.55.128.30) and
+               GET the following files:
+
+               [ANONYMOUS.PROGRAMS.NPRV]NPRV.DOC     (ASCII text)
+               [ANONYMOUS.PROGRAMS.NPRV]NPRV.EXE     (binary)
+               [ANONYMOUS.PROGRAMS.NPRV]SAMPLE.IPA   (ASCII text)
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 121]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                               NSLOOKUP
+
+        NAME
+                nslookup
+
+        KEYWORDS
+                status; DNS, BIND; UNIX, VMS; free.
+
+        ABSTRACT
+                Nslookup is an interactive program for querying
+                Internet Domain Name System (DNS) servers.  It is
+                essentially a user-friendly front end to
+                the BIND "resolver" library routines.
+
+                This program is useful for converting a hostname
+                into an IP address (and vice versa), determining
+                the name servers for a domain , listing
+                the contents of a domain, displaying any type of
+                DNS record, such as MX, CNAME, SOA, etc.,
+                diagnosing name server problems.
+
+                By default, nslookup will query
+                the default name server but you can specify a
+                different server on the command line or from a
+                configuration file.  You can also specify
+                different values for the options that control the
+                resolver routines.
+
+        MECHANISM
+                The program formats, sends and receives DNS
+                (RFC 1034) queries.
+
+        CAVEATS
+                 None.
+
+        BUGS
+                None known.
+
+        LIMITATIONS
+                None known.
+
+        HARDWARE REQUIRED
+                No restrictions.
+
+        SOFTWARE REQUIRED
+                BSD UNIX or related OS, or VMS.
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 122]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        AVAILABILITY
+                NSLookup is included in the BIND distribution.
+
+                Available via anonymous FTP from uunet.uu.net,
+                in directory /networking/ip/dns/bind.  Available
+                with 4.xBSD UNIX and related operating systems.
+                For VMS, available as part of TGV MultiNet IP
+                software package, as well as Wollongong's WIN/TCP.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 123]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                               OSITRACE
+
+          NAME
+               OSITRACE
+
+          KEYWORDS
+               traffic; OSI; eavesdrop; UNIX; free.
+
+          ABSTRACT
+               OSITRACE is a network performance tool that displays
+               information about ISO TP4 connections.  One line of
+               output is displayed for each packet indicating the
+               time, source, destination, length, packet type,
+               sequence number, credit, and any optional parameters
+               contained in the packet.  Numerous options are avail-
+               able to control the output of OSITRACE.
+
+               To obtain packets to analyze, OSITRACE uses Sun
+               Microsystems' Network Interface Tap (NIT) in SunOS 3.4,
+               3.5, and 4.0.X.  OSITRACE may also obtain data from the
+               NETMON utility which is described as another tool
+               entry.
+
+               In Sun systems, OSITRACE may be easily installed: OSI
+               kernel support is not needed, nor is any other form of
+               OSI software support.
+
+          MECHANISM
+               This tool has been designed in such a way that code to
+               process different protocol suites may be easily added.
+               As such, OSITRACE also has the ability to trace the DOD
+               TCP protocols.
+
+          CAVEATS
+               None.
+
+          BUGS
+               Bug reports and questions should be addressed to: ie-
+               tools@gateway.mitre.org
+
+               Requests to join this mailing list: ie-tools-
+               request@gateway.mitre.org
+
+               Questions and suggestions can also be directed to: Greg
+               Hollingsworth, gregh@gateway.mitre.org
+
+          LIMITATIONS
+               None reported.
+
+
+
+NOCTools2 Working Group                                       [Page 124]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          HARDWARE REQUIRED
+               No restriction.
+
+          SOFTWARE REQUIRED
+               SunOS 3.4, 3.5, or 4.0.X, or BSD UNIX-like network pro-
+               tocols with NETMON installed.
+
+          AVAILABILITY
+               OSITRACE is copyrighted by the MITRE-Washington Net-
+               working Center, but freely distributed "as is."  It re-
+               quires retention of a copyright text in code derived
+               from it.  The distribution is available by anonymous
+               FTP in pub/pdutrace.tar or pub/pdutrace.tar.Z from
+               aelred-3.ie.org.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 125]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                               OVERVIEW
+
+          NAME
+               OverVIEW
+
+          KEYWORDS
+               manager, status; IP; NMS, SNMP; DOS.
+
+          ABSTRACT
+               Network and internet monitor; Performance monitor;
+               Fully Graphic user interface; Event logging; TFTP boot
+               server
+
+          MECHANISM
+               OverVIEW uses SNMP to query routers, gateways and
+               hosts.  Also supports SGMP, PING and is committed to
+               CMIP/CMOT.  The SNMP queries allow dynamic determina-
+               tion of configuration and state.  Sets of related
+               queries allows monitoring of congestion and faults.
+               The hardware and software are sold as an integrated
+               package.
+
+          CAVEATS
+               None.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               256 nodes, 256 nets
+
+          HARDWARE REQUIRED
+               80286, 640K, EGA, mouse.
+
+          SOFTWARE REQUIRED
+               MS-DOS, OverVIEW, Network kernel, Mouse driver, SNMP
+               agents for monitored devices.
+
+          AVAILABILITY
+               Fully supported product of Proteon, Inc.  For more
+               information, contact:
+                   Proteon, Inc.             Phone: (508) 898-2800
+                   2 Technology Drive        Fax:   (508) 366-8901
+                   Westborough, MA  01581    Telex: 928124
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 126]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                                   PING
+
+          NAME
+               ping
+
+          KEYWORDS
+               generator, status; IP; ping; DOS, UNIX, VMS; free.
+
+          ABSTRACT
+               Ping is perhaps the most basic tool for internet
+               management.  It verifies that a remote IP implementa-
+               tion and the intervening networks and interfaces are
+               functional.  It can be used to measure round trip
+               delay.  Numerous versions of the ping program exist.
+
+          MECHANISM
+               Ping is based on the ICMP ECHO_REQUEST message.
+
+          CAVEATS
+               If run repeatedly, ping could generate high system
+               loads.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               PC/TCP's ping is the only implementation known support
+               both loose and strict source routing.  Though some ping
+               implementations support the ICMP "record route"
+               feature, the usefulness of this option for debugging
+               routes is limited by the fact that many gateways do not
+               correctly implement it.
+
+          HARDWARE REQUIRED
+               No restrictions.
+
+          SOFTWARE REQUIRED
+               None.
+
+          AVAILABILITY
+               Ping is widely included in TCP/IP distributions.  Pub-
+               lic domain versions of ping are available via anonymous
+               FTP from uunet.uu.net, in directory bsd-
+               sources/src/etc, and from venera.isi.edu, in directory
+               pub.
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 127]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                     PROCESS-TCPWARE-SNMP
+
+        NAME
+                SNMP agent
+
+        KEYWORDS
+                alarm, manager, status, traffic; IP; SNMP; VMS;.
+
+        ABSTRACT
+                The SNMP agent listens for and responds to network
+                management requests sent from SNMP-conforming network
+                management stations.  The SNMP agent also sends SNMP
+                traps, under specific conditions, to identified trap
+                receivers.  SNMP communities and generation of traps
+                are fully configurable.  The SNMP agent supports all
+                MIB-II variables except the EGP group.
+
+        MECHANISM
+                Network management variables are made available for
+                inspection and/or alteration by means of the Simple
+                Network Management Protocol (SNMP).
+
+        CAVEATS
+                None.
+
+        BUGS
+                No known bugs.
+
+        LIMITATIONS
+                Does not yet provide the ability for sites to add
+                extra MIB definitions.
+
+        HARDWARE REQUIRED
+                Supported VAX processors.
+
+        SOFTWARE REQUIRED
+                VMS V4 or later
+
+        AVAILABILITY
+                The SNMP agent is included in TCPware for VMS, a
+                commercial product available under license from:
+                        Process Software Corporation
+                        959 Concord Street
+                        Framingham, MA  01701
+                        +1 800 722 7770, +1 508 879 6994 (voice)
+                        +1 508 879-0042 (FAX)   TELEX 517891
+                        sales@process.com
+
+
+
+
+NOCTools2 Working Group                                       [Page 128]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                                 PROXYD
+
+        NAME
+                proxyd -- SNMP proxy agent daemons from SNMP Research.
+
+        KEYWORDS
+                control, management, status;
+                bridge, Ethernet, IP, OSI, ring, star;
+                NMS, SNMP;
+                UNIX;
+                library, sourcelib.
+
+        ABSTRACT
+                SNMP proxy agents may be used to permit the monitoring
+                and controlling of network elements which are otherwise
+                not addressable using the SNMP management protocol
+                (e.g., a network bridge that implements a proprietary
+                management protocol).  Similarly, SNMP proxy agents may
+                be used to protect SNMP agents from redundant network
+                management agents through the use of caches.  Finally,
+                SNMP proxy agents may be used to implement elaborate
+                MIB access policies.
+
+                The proxy agent daemon:
+
+                - listens for SNMP queries and commands from logically
+                  remote network management stations,
+                - translates and retransmits those as appropriate
+                  network management queries or cache lookups,
+                - listens for and parses the responses,
+                - translates the responses into SNMP responses, and
+                - returns those responses as SNMP messages to the
+                  network management station that originated the
+                  transaction.
+
+                The proxy agent daemon also emits SNMP traps to
+                identified trap receivers.  The proxy agent daemon is
+                designed to make the addition of additional vendor-
+                specific variables a straight-forward task.  The proxy
+                application comes complete with source code including a
+                powerful set of portable libraries for generating and
+                parsing SNMP messages and a set of command line utilities.
+
+        MECHANISM
+                Network management variables are made available for
+                inspection and/or alteration by means of the Simple
+                Network Management Protocol (SNMP).
+
+
+
+
+NOCTools2 Working Group                                       [Page 129]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        CAVEATS
+                None.
+
+        BUGS
+                None known.
+
+        LIMITATIONS
+                This application is a template for proxy application
+                writers.
+
+                Only a few of the many LanBridge 100 variables are
+                supported.
+
+        HARDWARE REQUIRED
+                System from Sun Microsystems, Incorporated.
+
+        SOFTWARE REQUIRED
+                Sun OS 3.5 or 4.x.
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+                This is a commercial product available under license
+                from:
+                        SNMP Research
+                        3001 Kimberlin Heights Road
+                        Knoxville, TN  37920-9716
+                        Attn:  John Southwood, Sales and Marketing
+                        (615) 573-1434 (Voice)  (615) 573-9197 (FAX)
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+                        users@seymour1.cs.utk.edu
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 130]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                   PROXYD_SNMP_RESEARCH
+
+        NAME
+                proxyd -- SNMP proxy agent daemons from SNMP Research.
+
+        KEYWORDS
+                control, management, status;
+                bridge, Ethernet, IP, OSI, ring, star;
+                NMS, SNMP;
+                UNIX;
+                library, sourcelib.
+
+        ABSTRACT
+                SNMP proxy agents may be used to permit the monitoring
+                and controlling of network elements which are otherwise
+                not addressable using the SNMP management protocol
+                (e.g., a network bridge that implements a proprietary
+                management protocol).  Similarly, SNMP proxy agents may
+                be used to protect SNMP agents from redundant network
+                management agents through the use of caches.  Finally,
+                SNMP proxy agents may be used to implement elaborate
+                MIB access policies.
+
+                The proxy agent daemon:
+
+                - listens for SNMP queries and commands from logically
+                  remote network management stations,
+                - translates and retransmits those as appropriate
+                  network management queries or cache lookups,
+                - listens for and parses the responses,
+                - translates the responses into SNMP responses, and
+                - returns those responses as SNMP messages to the
+                  network management station that originated the
+                  transaction.
+
+                The proxy agent daemon also emits SNMP traps to
+                identified trap receivers.  The proxy agent daemon is
+                designed to make the addition of additional vendor-
+                specific variables a straight-forward task.  The proxy
+                application comes complete with source code including a
+                powerful set of portable libraries for generating and
+                parsing SNMP messages and a set of command line utilities.
+
+        MECHANISM
+                Network management variables are made available for
+                inspection and/or alteration by means of the Simple
+                Network Management Protocol (SNMP).
+
+
+
+
+NOCTools2 Working Group                                       [Page 131]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        CAVEATS
+                None.
+
+        BUGS
+                None known.
+
+        LIMITATIONS
+                This application is a template for proxy application
+                writers.
+
+                Only a few of the many LanBridge 100 variables are
+                supported.
+
+        HARDWARE REQUIRED
+                System from Sun Microsystems, Incorporated.
+
+        SOFTWARE REQUIRED
+                Sun OS 3.5 or 4.x.
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+           This is a commercial product available under license
+           from:
+                SNMP Research
+                3001 Kimberlin Heights Road
+                Knoxville, TN  37920-9716
+                Attn:  John Southwood, Sales and Marketing
+                (615) 573-1434 (Voice)  (615) 573-9197 (FAX)
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+                users@seymour1.cs.utk.edu
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 132]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                                  QUERY
+
+          NAME
+               query, ripquery
+
+          KEYWORDS
+               routing; IP; spoof; UNIX; free.
+
+          ABSTRACT
+               Query allows remote viewing of a gateway's routing
+               tables.
+
+          MECHANISM
+               Query formats and sends a RIP request or POLL command
+               to a destination gateway.
+
+          CAVEATS
+               Query is intended to be used a a tool for debugging
+               gateways, not for network management.  SNMP is the pre-
+               ferred protocol for network management.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               The polled gateway must run RIP.
+
+          HARDWARE REQUIRED
+               No restriction.
+
+          SOFTWARE REQUIRED
+               4.3BSD UNIX or related OS.
+
+          AVAILABILITY
+               Available with routed and gated distributions.
+
+               Routed may be obtained via anonymous FTP from
+               uunet.uu.net, in file bsd-
+               sources/src/network/routed.tar.Z.
+
+               Gated may be obtained via anonymous FTP from
+               devvax.tn.cornell.edu.  Distribution files are in
+               directory pub/gated.
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 133]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                                SAS-CPE
+
+        NAME
+                SAS/CPE(tm) for Open Systems Software
+
+        KEYWORDS
+                manager, status;
+                bridge, ethernet, FDDI, IP, OSI, NFS;
+                X;
+                DOS, HP, UNIX;
+                library.
+
+        ABSTRACT
+        SAS/CPE(tm) for Open Systems software is an integrated system designed
+        to facilitate the analysis and presentation of computer performance
+        and resource utilization data.  SAS/CPE software features include:
+
+            . Processing of raw computer and network performance data into
+              detail-level SAS data sets.
+            . Conversion and validation of logged data values to forms
+              more useful for display and analysis (e.g., I/O counts
+              are converted to I/O rates per second).
+            . Numerous sample reports on performance data processed by
+              SAS/CPE software.
+            . Reduction of logged performance data into daily, weekly,
+              monthly or yearly summarized values.
+            . Menu-driven interface to the creation and management of multiple
+              performance data bases.
+            . Menu-driven report designing interface that allows users with no
+              programming knowledge to create and manage custom reports from
+              their performance data base. No SAS coding is needed for this
+              interface.
+
+        MECHANISM
+                SAS/CPE for Open Systems processes and reports data
+                from SNMP and other proprietary monitoring protocols,
+                as well as du and accounting.
+
+        CAVEATS
+                The product is currently in alpha testing.
+
+        BUGS
+                None known.
+
+        LIMITATIONS
+                None reported.
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 134]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        HARDWARE REQUIRED
+                HP, SUN or IBM Workstation
+
+        SOFTWARE REQUIRED
+                The SAS(r) System Base Software, SAS/GRAPH Software and
+                SAS/CPE for Open System Software
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+                SAS/CPE for Open Systems Software is available from:
+                     SAS Institute Inc.
+                     SAS Campus Drive
+                     Cary, NC  27513
+                     Phone 919-677-8000
+                     FAX 919-677-8123
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+                Send email to snodjs@mvs.sas.com.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 135]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                                SNIFFER
+
+          NAME
+               Sniffer
+
+          KEYWORDS
+               analyzer, generator, traffic; DECnet, ethernet, IP,
+               NFS, OSI, ring, SMTP, star; eavesdrop; standalone.
+
+          ABSTRACT
+               The Network General Sniffer is a protocol analyzer for
+               performing LAN diagnostics, monitoring, traffic genera-
+               tion, and troubleshooting.  The Sniffer protocol
+               analyzer has the capability of capturing every packet
+               on a network and of decoding all seven layers of the
+               OSI protocol model.  Capture frame selection is based
+               on several different filters: protocol content at lower
+               levels; node addresses; pattern matching (up to 8
+               logically-related patterns of 32 bytes each); and des-
+               tination class.  Users may extend the protocol
+               interpretation capability of the Sniffer by writing
+               their own customized protocol interpreters and linking
+               them to the Sniffer software.
+
+               The Sniffer displays network traffic information and
+               performance statistics in real time, in user-selectable
+               formats.  Numeric station addresses are translated to
+               symbolic names or manufacturer ID names.  Network
+               activities measured include frames accepted, Kbytes
+               accepted, and buffer use.  Each network version has
+               additional counters for activities specific to that
+               network.  Network activity is expressed as
+               frames/second, Kbytes/second, or per cent of network
+               bandwidth utilization.
+
+               Data collection by the Sniffer may be output to printer
+               or stored to disk in either print-file or spread-sheet
+               format.
+
+               Protocol suites understood by the Sniffer include:
+               Banyan Vines, IBM Token-Ring, Novell Netware, XNS/MS-
+               Net (3Com 3+), DECnet, TCP/IP (including SNMP and
+               applications-layer protocols such as FTP, SMTP, and
+               TELNET), X Windows (for X version 11), NFS, and several
+               SUN proprietary protocols (including mount, pmap, RPC,
+               and YP).  Supported LANs include: ethernet, Token-ring
+               (4Mb and 16Mb versions), ARCNET, StarLAN, IBM PC Net-
+               work (Broadband), and Apple Localtalk Network.
+
+
+
+NOCTools2 Working Group                                       [Page 136]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          MECHANISM
+               The Sniffer is a self-contained, portable protocol
+               analyzer that require only AC line power and connection
+               to a network to operate.  Normally passive (except when
+               in Traffic Generator mode), it captures images of all
+               or of selected frames in a working buffer, ready for
+               immediate analysis and display.
+
+               The Sniffer is a standalone device.  Two platforms are
+               available: one for use with single network topologies,
+               the other for use with multi-network topologies.  Both
+               include Sniffer core software, a modified network
+               interface card (or multiple cards), and optional proto-
+               col interpreter suites.
+
+               All Sniffer functions may be remotely controlled from a
+               modem-connected PC.  Output from the Sniffer can be
+               imported to database or spreadsheet packages.
+
+          CAVEATS
+               In normal use, the Sniffer is a passive device, and so
+               will not adversely effect network performance.  Perfor-
+               mance degradation will be observed, of course, if the
+               Sniffer is set to Traffic Generator mode and connected
+               to an active network.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               None reported.
+
+          HARDWARE REQUIRED
+               None.  The Sniffer is a self-contained unit, and
+               includes its own interface card.  It installs into a
+               network as would any normal workstation.
+
+          SOFTWARE REQUIRED
+               None.
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 137]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          AVAILABILITY
+               The Sniffer is available commercially.  For information
+               on your local representative, call or write:
+                    Network General Corporation
+                    4200 Bohannon Drive
+                    Menlo Park, CA  94025
+                    Phone: 415-688-2700
+                    Fax: 415-321-0855
+
+               For acquisition by government agencies, the Sniffer is
+               included on the GSA schedule.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 138]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                   SNMP_DEVELOPMENT_KIT
+
+          NAME
+               The SNMP Development Kit
+
+          KEYWORDS
+               manager, status; IP; NMS, SNMP; UNIX; free, sourcelib.
+
+          ABSTRACT
+               The SNMP Development Kit comprises C Language source
+               code for a programming library that facilitates access
+               to the management services of the SNMP (RFC 1098).
+               Sources are also included for a few simple client
+               applications whose main purpose is to illustrate the
+               use of the library.  Example client applications query
+               remote SNMP agents in a variety of modes, and generate
+               or collect SNMP traps.  Code for an example SNMP agent
+               that supports a subset of the Internet MIB (RFC 1066)
+               is also included.
+
+          MECHANISM
+               The Development Kit facilitates development of SNMP-
+               based management applications -- both clients and
+               agents.  Example applications execute SNMP management
+               operations according to the values of command line
+               arguments.
+
+          CAVEATS
+               None.
+
+          BUGS
+               Fixed in the next release.
+
+          LIMITATIONS
+               None reported.
+
+          HARDWARE REQUIRED
+               The SNMP library source code is highly portable and
+               runs on a wide range of platforms.
+
+          SOFTWARE REQUIRED
+               The SNMP library source code has almost no operating
+               system dependencies and runs in a wide range of
+               environments.  Certain portions of the example SNMP
+               agent code are specific to the 4.3BSD implementation of
+               the UNIX system for the DEC MicroVAX.
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 139]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          AVAILABILITY
+               The Development Kit is available via anonymous FTP from
+               host allspice.lcs.mit.edu.  The copyright for the
+               Development Kit is held by the Massachusetts Institute
+               of Technology, and the Kit is distributed without
+               charge according to the terms set forth in its code and
+               documentation.  The distribution takes the form of a
+               UNIX tar file.
+
+               Bug reports, questions, suggestions, or complaints may
+               be mailed electronically to snmp-dk@ptt.lcs.mit.edu,
+               although no response in any form is guaranteed.  Dis-
+               tribution via UUCP mail may be arranged by contacting
+               the same address.  Requests for hard-copy documentation
+               or copies of the distribution on magnetic media are
+               never honored.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 140]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog           SNMP_Libraries_SNMP_RESEARCH
+
+        NAME
+                SNMP Libraries and Utilities from SNMP Research.
+
+        KEYWORDS
+                alarm, control, manager, map, security, status;
+                bridge, DECnet, Ethernet, FDDI, IP, OSI, ring, star;
+                NMS, SNMP;
+                DOS, UNIX, VMS;
+                sourcelib.
+
+        ABSTRACT
+                The SNMP Libraries and Utilities serve two purposes:
+
+                1)   to act as building blocks for the construction of
+                     SNMP-based agent and manager applications; and
+
+                2)   to act as network management tools for network
+                     fire fighting and report generation.
+
+                The libraries perform ASN.1 parsing and generation tasks
+                for both network management station applications and
+                network management agent applications.  These libraries
+                hide the details of ASN.1 parsing and generation from
+                application writers and make it unnecessary for them to
+                be expert in these areas.  The libraries are very robust
+                with considerable error checking designed in.  The
+                several command line utilities include applications for
+                retrieving one or many variables, retrieving tables, or
+                effecting commands via the setting of remote network
+                management variables.
+
+        MECHANISM
+                The parsing is performed via recursive descent methods.
+                Messages are passed via the Simple Network Management
+                Protocol (SNMP).
+
+        CAVEATS
+                None.
+
+        BUGS
+                None known.
+
+        LIMITATIONS
+                The monitored and managed nodes must implement the SNMP
+                over UDP per RFC 1157 or must be reachable via a proxy
+                agent.
+
+
+
+NOCTools2 Working Group                                       [Page 141]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        HARDWARE REQUIRED
+                This software has been ported to numerous platforms
+                including workstations, general-purpose timesharing
+                systems, and embedded hardware in intelligent network
+                devices such as repeaters, bridges, and routers.
+
+        SOFTWARE REQUIRED
+                C compiler, TCP/IP library.
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+                This is a commercial product available under license
+                from:
+                        SNMP Research
+                        3001 Kimberlin Heights Road
+                        Knoxville, TN  37920-9716
+                        Attn:  John Southwood, Sales and Marketing
+                        (615) 573-1434 (Voice)  (615) 573-9197 (FAX)
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+                users@seymour1.cs.utk.edu
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 142]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog      SNMP_PACKAGED_AGENT_SNMP_RESEARCH
+
+        NAME
+                SNMP Packaged Agent System -- an SNMP host/gateway
+                agent daemon including a complete protocol stack and
+                runtime environment required to support an SNMP Agent
+                from SNMP Research.
+
+        KEYWORDS
+                control, manager, status;
+                bridge, Ethernet, FDDI, IP, OSI, ring, star;
+                NMS, SNMP;
+                DOS, standalone, UNIX;
+                sourcelib.
+
+        ABSTRACT
+                The snmpd agent daemon listens for and responds to
+                network management queries and commands from logically
+                remote network management stations.  The agent daemon
+                also emits SNMP traps to identified trap receivers.
+                The agent daemon is designed to make the addition of
+                additional vendor-specific variables a
+                straight-forward task.  The snmpd application comes
+                complete with source code including a powerful set of
+                portable libraries for generating and parsing SNMP
+                messages and a set of command line utilities.
+
+                The Packaged Agent System is designed to aid the
+                hardware manufacturer who is not experienced with the
+                TCP/IP protocol suite.  A lightweight, non-preemptive
+                scheduler/tasking system for faster execution and less
+                impact on slow CPUs is included in the package.
+                Development environment is either MS DOS or UNIX.
+
+        MECHANISM
+                Network management variables are made available for
+                inspection and/or alteration by means of the Simple
+                Network Management Protocol (SNMP).
+
+        CAVEATS
+                None.
+
+        BUGS
+                None known.
+
+        LIMITATIONS
+                None reported.
+
+
+
+
+NOCTools2 Working Group                                       [Page 143]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        HARDWARE REQUIRED
+                The Motorola 68XXX and the Intel 8088 and X86
+                platforms are fully supported.  Other platforms can be
+                supported.  Contact SNMP Research for details.
+
+                This software has been ported to numerous platforms
+                including workstations, general-purpose timesharing
+                systems, and embedded hardware in intelligent network
+                devices such as repeaters, bridges, and routers.
+
+        SOFTWARE REQUIRED
+                C compiler.
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+                This is a commercial product available under license
+                from:
+                        SNMP Research
+                        3001 Kimberlin Heights Road
+                        Knoxville, TN  37920-9716
+                        Attn:  John Southwood, Sales and Marketing
+                        (615) 573-1434 (Voice)  (615) 573-9197 (FAX)
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+                        users@seymour1.cs.utk.edu
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 144]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                    SNMPD_SNMP_RESEARCH
+
+        NAME
+                snmpd -- an SNMP host/gateway agent daemon from SNMP
+                Research.
+
+        KEYWORDS
+                control, mananger, status;
+                bridge, Ethernet, FDDI, IP, OSI, ring, star;
+                NMS, SNMP;
+                DOS, UNIX;
+                sourcelib.
+
+        ABSTRACT
+                The snmpd agent daemon listens for and responds to
+                network management queries and commands from logically
+                remote network management stations.  The agent daemon
+                also emits SNMP traps to identified trap receivers.  The
+                agent daemon is architected to make the addition of
+                additional vendor-specific variables a straight-forward
+                task.  The snmpd application comes complete with source
+                code including a powerful set of portable libraries for
+                generating and parsing SNMP messages and a set of
+                command line utilities.
+
+        MECHANISM
+                Network management variables are made available for
+                inspection and/or alteration by means of the Simple
+                Network Management Protocol (SNMP).
+
+        CAVEATS
+                None.
+
+        BUGS
+                None known.
+
+        LIMITATIONS
+                Only operating system variables available without
+                source code modifications to the operating system and
+                device device drivers are supported.
+
+        HARDWARE REQUIRED
+                This software has been ported to numerous platforms
+                including workstations, general-purpose timesharing
+                systems, and embedded hardware in intelligent network
+                devices such as repeaters, bridges, and routers.
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 145]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        SOFTWARE REQUIRED
+                C compiler.
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+                This is a commercial product available under license
+                from:
+                        SNMP Research
+                        3001 Kimberlin Heights Road
+                        Knoxville, TN  37920-9716
+                        Attn:  John Southwood, Sales and Marketing
+                        (615) 573-1434 (Voice)  (615) 573-9197 (FAX)
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+                        users@seymour1.cs.utk.edu
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 146]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                          SPIDERMONITOR
+
+          NAME
+               SpiderMonitor P220, K220 and
+               SpiderAnalyzer P320, K320
+
+          KEYWORDS
+               alarm, analyzer, generator, traffic; DECnet, ethernet,
+               IP, OSI; eavesdrop; standalone; sourcelib.
+
+          ABSTRACT
+               The SpiderMonitor and SpiderAnalyzer are protocol
+               analyzers for performing ethernet LAN diagnostics, mon-
+               itoring, traffic generation, and troubleshooting.  The
+               SpiderMonitor has the capability of capturing every
+               packet on a network and of decoding the first four
+               layers of the OSI protocol model.  The SpiderAnalyzer
+               has additional software for decoding higher protocol
+               layers.  Protocol suites understood: TCP/IP (including
+               SNMP and applications-layer protocols), OSI, XNS, DEC-
+               net and IPX.  User-definable decodes can be written in
+               'C' with the Microsoft version 5.0 'C' compiler.  A
+               decode guide is provided.
+
+               The SpiderAnalyzer supports multiple simultaneous
+               filters for capturing packets using predefined patterns
+               and error states.  Filter patterns can also trigger on
+               NOT matching 1 or more filters, an alarm, or a speci-
+               fied time.
+
+               The SpiderAnalyzer can also employ TDR (Time Domain
+               Reflectometry) to find media faults, open or short cir-
+               cuits, or transceiver faults.  It can transmit OSI,
+               XNS, and Xerox link-level echo packets to user-
+               specified stations, performs loop round tests.
+
+               In traffic generation mode, the SpiderAnalyzer has the
+               ability to generate packets at random intervals of ran-
+               dom lengths or any combination of random or fixed
+               interval or length, generation of packets with CRC
+               errors, or packets that are too short, or packets that
+               are too long.
+
+               Output from the SpiderMonitor/Analyzer can be imported
+               to database or spreadsheet packages.
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 147]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          MECHANISM
+               The SpiderMonitor and Spider Analyzer are available as
+               stand-alone, IBM PC compatible packages based upon a
+               Compaq III portable system, or as a plug-in boards for
+               any IBM XT/AT compatible machine.  The model 220 (Spi-
+               derMonitor) systems provide a functional base suited
+               for most network management needs.  The model 320 (Spi-
+               derAnalyzer) systems provide extended functionality in
+               the development mode and traffic generation mode as
+               well more filtering capabilities than the 220 models.
+
+          CAVEATS
+               Traffic generation will congest an operational ether-
+               net.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               Monitoring of up to 1024 stations and buffering of up
+               to 1500 packets.  The model 220 provides for 3 filters
+               with a filter depth of 46 bytes.  The model 320 pro-
+               vides for 4 filters and a second level of filtering
+               with a filter depth of 64 bytes.
+
+          HARDWARE REQUIRED
+               PX20s are self contained, the KX20s require an IBM
+               PC/XT-AT compatible machine with 5 megabytes of hard
+               disk storage and the spare slot into which the board
+               kit is plugged.
+
+          SOFTWARE REQUIRED
+               None.  The SpiderAnalyzer requires the Microsoft 'C'
+               Compiler, Version 5.0 for writing user defined decodes.
+
+          AVAILABILITY
+               The SpiderMonitor/Analyzer is available commercially.
+               For information on your local representative, call or
+               write:
+                    Spider Systems, Inc.
+                    12 New England Executive Park
+                    Burlington, MA  01803
+                    Telephone:  617-270-3510
+                    FAX:        617-270-9818
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 148]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                                  SPIMS
+
+        NAME
+                SPIMS -- the Swedish Institute of Computer Science
+                         (SICS) Protocol Implementation Measurement
+                         System tool.
+
+        KEYWORDS
+                benchmark, debugger; IP, OSI; spoof; UNIX.
+
+        ABSTRACT
+                SPIMS is used to measure the performance of protocol
+                and "protocol-like" services including response time
+                (two-way delay), throughput and the time to open and
+                close connections.  It has been used to:
+
+                o    benchmark alternative protocol implementations,
+
+                o    observe how performance varies when parameters in
+                        specific implementations have been varied (i.e.,
+                        to tune parameters).
+
+                SPIMS currently has interfaces to the DoD Internet Pro-
+                tocols: UDP, TCP, FTP, SunRPC, the OSI protocols from
+                the ISODE 4.0 distribution package: FTAM, ROSE, ISO TP0
+                and to Sunlink 5.2 ISO TP4 as well as Stanford's VMTP.
+                Also available are a rudimentary set of benchmarks,
+                stubs for new protocol interfaces and a user manual.
+
+                For an example of the use of SPIMS to tune protocols,
+                see:
+                        Nordmark & Cheriton, "Experiences from VMTP: How
+                        to achieve low response time," IFIP WG6.1/6.4:
+                        Protocols for High-Speed Networks, May 1989,
+                        Zurich.  To be published.
+
+                For an example of how SPIMS can be used to benchmark
+                protocols, see:
+
+                        Gunningberg, Bjorkman, Nordmark, Sjodin, Pink &
+                        Stromqvist "Application Protocols and Performance
+                        Benchmarks", IEEE Communications Magazine, June
+                        1989, Vol. 27, No.6, pp 30-36.
+
+                        Sjodin, Gunningberg, Nordmark, & Pink, "Towards
+                        Protocol Benchmarks', IFIP WG6.1/6.4 Protocols
+                        for High-Speed Networks, May 1989, Zurich, pp
+                        57-67
+
+
+
+NOCTools2 Working Group                                       [Page 149]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        MECHANISM
+                SPIMS runs as user processes and uses a TCP connection
+                for measurement set-up.  Measurements take place
+                between processes over the measured protocol.  SPIMS
+                generates messages and transfers them via the measured
+                protocol service according to a user-supplied specifi-
+                cation.  SPIMS has a unique measurement specification
+                language that is used to specify a measurement session.
+                In the language there are constructs for different
+                application types (e.g., bulk data transfer), for
+                specifying frequency and sequence of messages, for dis-
+                tribution over message sizes and for combining basic
+                specifications.  These specifications are independent
+                of both protocols and protocol implementations and can
+                be used for benchmarking.  For more details on the
+                internals of SPIMS, see:
+
+                Nordmark & Gunningberg, "SPIMS: A Tool for Protocol
+                Implementation Performance Measurements" Proc. of 13:th
+                Conf. on Local Computer Networks, Minneapolis 1989, pp
+                222-229.
+
+        CAVEATS
+                None.
+
+        BUGS
+                None known.
+
+        LIMITATIONS
+                None reported.
+
+        HARDWARE REQUIRED
+                No restrictions.
+
+        SOFTWARE REQUIRED
+                SPIMS is implemented on UNIX, including SunOS 4.,
+                4.3BSD UNIX, DN (UNIX System V, with extensions) and
+                Ultrix 2.0/3.0.  It requires a TCP connection for meas-
+                urement set-up.  No kernel modifications or any modifi-
+                cations to measured protocols are required.
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 150]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+                SPIMS is not in the public domain and the software is
+                covered by licenses.  Use of the SPIMS software
+                represents acceptance of the terms and conditions of
+                the licenses.
+                The licenses are enclosed in the distribution package.
+                Licenses and SPIMS cover letter can also be obtained
+                via an Internet FTP connection without getting the whole
+                software.  The retrieval procedure is identical to the
+                below university distribution via FTP.  The file to
+                retrieve is pub/spims-dist/licenses.tar.Z
+
+                There are two different distribution classes depending on
+                requesting organization:
+
+                1. Universities and non-profit organizations.
+
+                To these organizations, SPIMS source code is distributed
+                free of charge.  There are two ways to get the software:
+
+                        1. FTP.
+                        If you have an Internet FTP connection, you
+                        can use anonymous FTP to sics.se
+                        [192.16.123.90], and retrieve the file
+                        pub/spims-dist/dist910304.tar.Z
+                        (this is a .6MB compressed tar image) in
+                        BINARY mode.  Log in as user anonymous and at
+                        the password prompt, use your complete
+                        electronic mail address.
+
+                        2. On a Sun 1/4-inch cartridge tape.
+                        For mailing, a handling fee of US$150.00 will be
+                        charged.  Submit a bank check with the request.
+                        Do not send tapes or envelopes.
+
+                2. Commercial organizations.
+
+                These organizations can chose between a license for
+                commercial use, or a license for internal research
+                only and no commercial use whatsoever.
+
+                        For internal research use only:
+
+                        The SPIMS source code is distributed for a one
+                        time fee of US$500.00.  Organizations
+                        interested in the research prototype need to
+                        contact us via e-mail and briefly motivate why
+                        they qualify (non-commercial use) for the
+
+
+
+NOCTools2 Working Group                                       [Page 151]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+                        research prototype.
+                        They will thereafter get a permission to
+                        obtain a copy from the same distribution
+                        source as for universities.
+
+                        Commercial use:
+
+                        A commercial version of SPIMS will eventually
+                        be distributed and supported by a commercial
+                        partner.  nIn the meantime we will distribute
+                        the research prototype (source code) to
+                        interested organizations without any guaranty
+                        or support.  Contact SICS for further
+                        information.
+
+                For more information about the research prototype
+                distribution and about a commercial license, contact:
+
+                        Swedish Institute of Computer Science
+                        Att: Birgitta Klingenberg
+                        P.O. Box 1263
+                        S-164 28 Kista
+                        SWEDEN
+
+                        e-address: spims@sics.se
+                        Phone: +46-8-7521500, Fax: +46-8-7517230
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+                Bengt Ahlgren
+                Swedish Institute of Computer Science
+                Box 1263
+                S-164 28 KISTA, SWEDEN
+
+                Email:  bengta@sics.se
+                Tel:    +46 8 752 1562 (direct)
+                  or    +46 8 752 1500
+                Fax:    +46 8 751 7230
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 152]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                              SPRAY_SUN
+
+          NAME
+               spray
+
+          KEYWORDS
+               benchmark, generator; IP; ping; UNIX.
+
+          ABSTRACT
+               Spray is a traffic generation tool that generates RPC
+               or UDP packets, or ICMP Echo Requests.  The packets are
+               sent to a remote procedure call application at the des-
+               tination host.  The count of received packets is
+               retrieved from the remote application after a certain
+               number of packets have been transmitted.  The differ-
+               ence in packets received versus packets sent represents
+               (on a LAN) the packets that the destination host had to
+               drop due to increasing queue length.  A measure of
+               throughput relative to system speed and network load
+               can thus be obtained.
+
+          MECHANISM
+               See above.
+
+          CAVEATS
+               Spray can congest a network.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               None reported.
+
+          HARDWARE REQUIRED
+               No restrictions.
+
+          SOFTWARE REQUIRED
+               SunOS
+
+          AVAILABILITY
+               Supplied with SunOS.
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 153]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                                TCPDUMP
+
+          NAME
+               tcpdump
+
+          KEYWORDS
+               traffic; ethernet, IP, NFS; UNIX, VMS; free.
+
+          ABSTRACT
+               Tcpdump can interpret and print headers for the follow-
+               ing protocols: ethernet, IP, ICMP, TCP, UDP, NFS, ND,
+               ARP/RARP, AppleTalk.  Tcpdump has proven useful for
+               examining and evaluating the retransmission and window
+               management operations of TCP implementations.
+
+          MECHANISM
+               Much like etherfind, tcpdump writes a log file of the
+               frames traversing an ethernet interface.  Each output
+               line includes the time a packet is received, the type
+               of packet, and various values from its header.
+
+          CAVEATS
+               None.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               Public domain version requires a kernel patch for
+               SunOS. TCPware for VMS - currently interprets headers
+               for IP, TCP, UDP, and ICMP only.
+
+          HARDWARE REQUIRED
+               Any Ultrix system (VAX or DEC RISC hardware)
+
+          SOFTWARE REQUIRED
+               Ultrix release 4.0 or later.  For Ultrix 4.1, may
+               require the patched "if_ln.o" kernel module, available
+               from Digital's Customer Support Center.
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 154]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          AVAILABILITY
+               Available, though subject to copyright restrictions,
+               via anonymous FTP from ftp.ee.lbl.gov.  The source and
+               documentation for the tool is in compressed tar format,
+               in file tcpdump.tar.Z.  Also available from
+               spam.itstd.sri.com, in directory pub.  For VMS hosts
+               with DEC ethernet controllers, available as part of TGV
+               MultiNet IP software package and TCPware for VMS from
+               Process Software Corporation.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 155]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                              TCPLOGGER
+
+          NAME
+               tcplogger
+
+          KEYWORDS
+               traffic; IP; eavesdrop; UNIX; free.
+
+          ABSTRACT
+               Tcplogger consists of modifications to the 4.3BSD UNIX
+               source code, and a large library of post-processing
+               software.  Tcplogger records timestamped information
+               from TCP and IP packets that are sent and received on a
+               specified connection.  For each TCP packet, information
+               such as sequence number, acknowledgement sequence
+               number, packet size, and header flags is recorded.  For
+               an IP packet, header length, packet length and TTL
+               values are recorded.  Customized use of the TCP option
+               field allows the detection of lost or duplicate pack-
+               ets.
+
+          MECHANISM
+               Routines of 4.3BSD UNIX in the netinet directory have
+               been modified to append information to a log in memory.
+               The log is read continuously by a user process and
+               written to a file.  A TCP option has been added to
+               start the logging of a connection.  Lots of post-
+               processing software has been written to analyze the
+               data.
+
+          CAVEATS
+               None.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               To get a log at both ends of the connection, the modi-
+               fied kernel should be run at both the hosts.
+
+               All connections are logged in a single file, but
+               software is provided to filter out the record of a sin-
+               gle connection.
+
+          HARDWARE REQUIRED
+               No restrictions.
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 156]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          SOFTWARE REQUIRED
+               4.3BSD UNIX (as modified for this tool).
+
+          AVAILABILITY
+               Free, although a 4.3BSD license is required.  Contact
+               Olafur Gudmundsson (ogud@cs.umd.edu).
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 157]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                      TOKENVIEW_PROTEON
+
+          NAME
+               TokenVIEW
+
+          KEYWORDS
+               control, manager, status; ring; NMS, proprietary; DOS.
+
+          ABSTRACT
+               Network Management tool for 4/16 Mbit IEEE 802.5 Token
+               Ring Networks.  Monitors active nodes and ring errors.
+               Maintains database of nodes, wire centers and their
+               connections.  Separate network management ring allows
+               remote configuration of wire centers.
+
+          MECHANISM
+               A separate network management ring used with Proteon
+               Intelligent Wire Centers allows wire center configura-
+               tion information to be read and modified from a single
+               remote workstation.  A log of network events used with
+               a database contain nodes, wire centers and their con-
+               nections, facilitates tracking and correction of net-
+               work errors.  Requires an "E" series PROM, sold with
+               package.
+
+          CAVEATS
+               Currently, only ISA bus cards support the required E
+               series PROM.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               256 nodes, 1 net.
+
+          HARDWARE REQUIRED
+               512K RAM, CGA or better, hard disk, mouse supported.
+
+          SOFTWARE REQUIRED
+               MS-DOS, optional mouse driver
+
+          AVAILABILITY
+               Fully supported product of Proteon, Inc.  Previously
+               sold as Advanced Network Manager (ANM).  For more in-
+               formation, contact:
+                   Proteon, Inc.             Phone: (508) 898-2800
+                   2 Technology Drive        Fax:   (508) 366-8901
+                   Westborough, MA  01581    Telex: 928124
+
+
+
+NOCTools2 Working Group                                       [Page 158]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                             TRACEROUTE
+
+          NAME
+               traceroute
+
+          KEYWORDS
+               routing; IP; ping; UNIX, VMS; free.
+
+          ABSTRACT
+               Traceroute is a tool that allows the route taken by
+               packets from source to destination to be discovered.
+               It can be used for situations where the IP record route
+               option would fail, such as intermediate gateways dis-
+               carding packets, routes that exceed the capacity of an
+               datagram, or intermediate IP implementations that don't
+               support record route.  Round trip delays between the
+               source and intermediate gateways are also reported
+               allowing the determination of individual gateways con-
+               tribution to end-to-end delay.
+
+               Enhanced versions of traceroute have been developed
+               that allow specification of loose source routes for
+               datagrams.  This allows one to investigate the return
+               path from remote machines back to the local host.
+
+          MECHANISM
+               Traceroute relies on the ICMP TIME_EXCEEDED error
+               reporting mechanism.  When an IP packet is received by
+               an gateway with a time-to-live value of 0, an ICMP
+               packet is sent to the host which generated the packet.
+               By sending packets to a destination with a TTL of 0,
+               the next hop can be identified as the source of the
+               ICMP TIME EXCEEDED message.  By incrementing the TTL
+               field the subsequent hops can be identified.  Each
+               packet sent out is also time stamped.  The time stamp
+               is returned as part of the ICMP packet so a round trip
+               delay can be calculated.
+
+          CAVEATS
+               Some IP implementations forward packets with a TTL of
+               0, thus escaping identification.  Others use the TTL
+               field in the arriving packet as the TTL for the ICMP
+               error reply, which delays identification.
+
+               Sending datagrams with the source route option will
+               cause some gateways to crash.  It is considered poor
+               form to repeat this behavior.
+
+
+
+
+NOCTools2 Working Group                                       [Page 159]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               Most versions of UNIX have errors in the raw IP code
+               that require kernel mods for the standard version of
+               traceroute to work.  A version of traceroute exists
+               that runs without kernel mods under SunOS 3.5 (see
+               below), but it only operates over an ethernet inter-
+               face.
+
+          HARDWARE REQUIRED
+               No restrictions.
+
+          SOFTWARE REQUIRED
+               BSD UNIX or related OS, or VMS.
+
+          AVAILABILITY
+               Available by anonymous FTP from ftp.ee.lbl.gov, in file
+               traceroute.tar.Z.  It is also available from
+               uc.msc.umn.edu.
+
+               A version of traceroute that supports Loose Source
+               Record Route, along with the source code of the
+               required kernel modifications and a Makefile for
+               installing them, is available via anonymous FTP from
+               zerkalo.harvard.edu, in directory pub, file
+               traceroute_pkg.tar.Z.
+
+               A version of traceroute that runs under SunOS 3.5 and
+               does NOT require kernel mods is available via anonymous
+               FTP from dopey.cs.unc.edu, in file
+               ~ftp/pub/traceroute.tar.Z.
+
+               For VMS, traceroute is available as part of TGV Mul-
+               tiNet IP software package.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 160]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                                   TRPT
+
+          NAME
+               TRPT -- transliterate protocol trace
+
+          KEYWORDS
+               traffic; IP; eavesdrop; UNIX; free.
+
+          ABSTRACT
+               TRPT displays a trace of a TCP socket events.  When no
+               options are supplied, TRPT prints all the trace records
+               found in a system, grouped according to TCP connection
+               protocol control block (PCB).
+
+               An example of TRPT output is:
+
+               38241 ESTABLISHED:input
+               [e0531003..e0531203)@6cc5b402(win=4000)<ACK> -> ESTA-
+               BLISHED
+               38241 ESTABLISHED:user RCVD -> ESTABLISHED
+               38266 ESTABLISHED:output
+               6cc5b402@e0531203(win=4000)<ACK> -> ESTABLISHED
+               38331 ESTABLISHED:input
+               [e0531203..e0531403)@6cc5b402(win=4000)<ACK,FIN,PUSH>
+               -> CLOSE_WAIT
+               38331 CLOSE_WAIT:output
+               6cc5b402@e0531404(win=3dff)<ACK> -> CLOSE_WAIT
+               38331 CLOSE_WAIT:user RCVD -> CLOSE_WAIT
+               38343 LAST_ACK:output
+               6cc5b402@e0531404(win=4000)<ACK,FIN> -> LAST_ACK
+               38343 CLOSE_WAIT:user DISCONNECT -> LAST_ACK
+               38343 LAST_ACK:user DETACH -> LAST_ACK
+
+          MECHANISM
+               TRPT interrogates the buffer of TCP trace records that
+               is created when a TCP socket is marked for debugging.
+
+          CAVEATS
+               Prior to using TRPT, an analyst should take steps to
+               isolate the problem connection and find the address of
+               its protocol control blocks.
+
+          BUGS
+               None reported.
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 161]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          LIMITATIONS
+               A socket must have the debugging option set for TRPT to
+               operate.  Another problem is that the output format of
+               TRPT is difficult.
+
+          HARDWARE REQUIRED
+               No restrictions.
+
+          SOFTWARE REQUIRED
+               BSD UNIX or related OS.
+
+          AVAILABILITY
+               Included with BSD and SunOS distributions.  Available
+               via anonymous FTP from uunet.uu.net, in file bsd-
+               sources/src/etc/trpt.tar.Z.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 162]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                                   TTCP
+
+          NAME
+               TTCP
+
+          KEYWORDS
+               benchmark, generator; IP; ping; UNIX, VMS; free.
+
+          ABSTRACT
+               TTCP is a traffic generator that can be used for test-
+               ing end-to-end throughput.  It is good for evaluating
+               TCP/IP implementations.
+
+          MECHANISM
+               Cooperating processes are started on two hosts.  The
+               open a TCP connection and transfer a high volume of
+               data.  Delay and throughput are calculated.
+
+          CAVEATS
+               Will greatly increase system load.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               None reported.
+
+          HARDWARE REQUIRED
+               No restrictions.
+
+          SOFTWARE REQUIRED
+               BSD UNIX or related OS, or VMS.
+
+          AVAILABILITY
+               Source for BSD UNIX is available via anonymous FTP from
+               vgr.brl.mil, in file ftp/pub/ttcp.c, and from sgi.com,
+               in file sgi/src/ttcp.c.  A version of TTCP has also
+               been submitted to the USENET news group
+               comp.sources.unix.  For VMS, ttcp.c is included in the
+               MultiNet Programmer's Kit, a standard feature of TGV
+               MultiNet IP software package.
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 163]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                         UNISYS-PARAMAX
+
+          NAME
+                Paramax Network Security Server
+
+          KEYWORDS
+                alarm, control, manager, security, status;
+                ethernet, FDDI, IP; X; UNIX.
+
+          ABSTRACT
+                The Paramax Network Security Server (NSS) is a
+                security officer's tool for centralized security
+                management of TCP/IP-based networks.  The NSS provides
+                capability for collection, on-line storage,
+                maintenance, and correlation of audit data from hosts,
+                workstations, servers, and network devices.  Through
+                the X window based user interface, a security officer
+                can review and analyze this audit data at the NSS,
+                select and request filtered portions of host audit
+                data, and receive and analyze security alerts from
+                across the network.  The NSS supports centralized
+                access control of network resources through its
+                capability to create and update user and host access
+                permissions data.  The user access permissions data
+                identifies network addresses that each user is
+                permitted to access.  The host access permissions data
+                identifies network addresses between which
+                communication is permitted.  The NSS supports
+                centralized management of user authentication data
+                (user IDs and passwords) and other user data for use
+                by hosts, workstations, and servers in the network.
+                It generates pseudo-random pronounceable passwords for
+                selection and assignment to users by the security officer.
+
+                The NSS deadman timer locks the NSS screen or logs the
+                security officer off the NSS after periods of
+                inactivity.  A biometric authentication device is
+                optional for rigorous fingerprint authentication of
+                users at the NSS, and logins to the NSS itself are
+                permitted only at the console.  The NSS currently
+                provides centralized security management for a System High
+                Network.  It is being upgraded for a Compartmented Mode
+                environment.
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 164]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          MECHANISM
+                The NSS uses the Audit Information Transfer Protocol
+                (AITP) for the transfer of security alerts and audit
+                data.  AITP is NOT proprietary, and the specification
+                is available from the address listed below.  Access to
+                the NSS audit database is provided via the Structured
+                Query Language (SQL).
+
+          CAVEATS
+                None.
+
+          BUGS
+                None known.
+
+          LIMITATIONS
+                None reported.
+
+          HARDWARE REQUIRED
+                Hardware required is a Sun 4 (SPARCStation) with a color
+                monitor, at least 600 MB disk, and 150 MB 1/4"
+                cartridge tape drive.
+
+          SOFTWARE REQUIRED
+                SunOS Version 4.1.1 running the Sun OpenWindows X
+                windowing environment and the SYBASE Relational Data
+                Base Management System.
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+                Commercially available from:
+                        Paramax Systems Corporation
+                        5151 Camino Ruiz
+                        Camarillo, California 93011-6004
+                        805-987-6811
+                        Peter Vazzana
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+                        Paramax Systems Corporation
+                        5151 Camino Ruiz
+                        Camarillo, California 93011-6004
+                        805-987-6811
+                        Nina Lewis <nina@cam.paramax.com>
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 165]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                     WOLLONGONG-MANAGER
+
+        NAME
+                Management Station, Release 3.0
+
+        KEYWORDS
+                manager; ; snmp, x; sun, dec, dos;.
+
+        ABSTRACT
+                Management Station is a network management software
+                product that supports SNMP.  Release 3.0 implements a
+                distributed network management architecture that helps
+                solve the scalability and reliability limitations of
+                using a single cpu for all SNMP management tasks.
+                Additionally, there are many applications provided
+                that are all user-configurable.  The following
+                applications and their functionality is listed below:
+
+                General Info:
+
+                X Windows, 11.4 based implemented with OSF/Motif 1.1.1
+                toolkit.  X Windows interface for all configuration
+                files.  Most applications have "verbose" mode for
+                display of SNMP PDU traffic.  On-line help and
+                Reference manual pages.  ANSI C compliant.
+
+                Network Management Daemon:
+
+                Responsible for device discovery, trap/alarm
+                management and fault monitoring for the network map.
+                Connection with other distributed daemons and any
+                connected stations is accomplished with SNMP/TCP.
+                Configured via Manager MIB; also incorporates SMUX MIB
+                (RFC 1227).  Sends any information to INGRES, Oracle
+                or Sybase via an ESQL interface.  User-defined actions
+                include: send alarm to map; send info to flat file;
+                execute ESQL command; call any UNIX system command;
+                forward traps and filter user-defined alarms.
+                User-defined alarms can use any boolean expression and
+                MIB variable expressions can be combined with AND/OR
+                statements.
+
+                MIB Compiler
+
+                ASN.1 MIB compiler with X Windows interface.  Accepts
+                RFC 1155 and 1212 format.  Most vendor-specific MIBs
+                and proposed Internet standard MIBs already included.
+
+
+
+
+NOCTools2 Working Group                                       [Page 166]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+                Network Map
+
+                Comprehensive network monitoring map with click and
+                drag interface, hiearchical and virtual views.
+                Toolkit and preferences applications, device
+                discovery.  Uses /etc/hosts file, NIS or DNS for
+                device resolution.  Background pixmapping capability,
+                user-definable menu bar, network manager and console
+                operator modes via UNIX group permissions.  Multiple
+                map use without limitation.
+
+                MIB Form and MIB Form Editor
+
+                User-designed, X-based SNMP applications.  Alias for
+                MIB variables and interprets returned values.  GET
+                NEXT and SET capability.  User-defined polling and
+                multi-device [agent] capability.  Configured via X
+                interface.
+
+                MIB Chart and MIB Chart Editor
+
+                Choice of strip chart, packed strip chart or bar
+                graphs.  User-specified polling interval, MIB
+                variable(s) or MIB expressions using arithmetic
+                operands.  Plot actual value, delta or delta/interval.
+                Plot multiple MIB expressions from multiple agents
+                simultaneously.  X Windows interface.  Pause polling
+                and grid options.
+
+                MIB Tool
+
+                X Windows application for the general viewing and
+                'walking' of MIB trees.  GET NEXT and SET options.
+                Window for viewing RFC 1212 MIB definitions.  Command
+                line interface option.
+
+                Application Programming Interface
+
+                Complete set of APIs for developers to write SNMP
+                applications in character mode or X Windows.
+
+        MECHANISM
+                Management Station uses SNMP and ICMP Echo Request to
+                monitor and control SNMP Agents.  Network management
+                daemon implements Wollongong's Manager MIB, SNMP over
+                TCP and the SMUX protocol.
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 167]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        CAVEATS
+                none.
+
+        BUGS
+                See Product Release Notice.
+
+        LIMITATIONS
+                Limitations on number of management agents and network
+                management daemons not known at this time.
+
+        HARDWARE REQUIRED
+                Sun SPARC workstations and servers
+                DEC DECstations and DECsystems
+                Motorola MPC (Delta 8000 series)
+                3/486 PC and PC-compatible
+
+                16 MB RAM
+                n20 MB free disk space for installation
+                Color monitor strongly recommended
+
+        SOFTWARE REQUIRED
+                SunOS 4.1-1 or greater & OpenWindows 2.0 or greater (SUN)
+                X Windows, 11.4 or greater
+                RISC ULTRIX 4.1 or greater (DEC)
+                R32V2 (Motorola)
+                Open Desktop 1.1 or greater (3/486)
+
+                Provided on 1/4" cartridge, TK-50 or 3 1/2" diskettes,
+                as appropriate, in cpio format.
+
+        AVAILABILITY
+                A commercial product of:
+
+                 The Wollongong Group, Inc.
+                        1129 San Antonio Rd
+                        Palo Alto, CA.  94303
+                ph.:    (800) 962 - 8649 (in California)
+                        (800) 872 - 8649 (outside California)
+                fax:    (415) 962 - 0286
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 168]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                                 XNETDB
+
+        NAME
+                Xnetdb
+
+        KEYWORDS
+                database, manager, map, monitoring, status; IP; Ping,
+                SNMP, Unix, X; free.
+
+        ABSTRACT
+                Xnetdb is a network monitoring tool based on X Windows
+                and SNMP which also has integrated database and
+                statistic viewing capabilities.  Xnetdb will determine
+                and display the status of routers and circuits it has
+                been told to monitor by querying the designated sites
+                and displaying the result.  It can also query the
+                status of certain designated SNMP variables, such as a
+                default route for an important router.  Additionally,
+                it also has integrated database functionality in that
+                it can display additional information about a site or
+                circuit such as the equipment at the site, the contact
+                person(s) for the site, and other useful information.
+                Finally it can gather designated statistical
+                information about a circuit and display it on demand.
+
+        MECHANISM
+                Xnetdb uses SNMP or ping to monitor things which its
+                configured to monitor.  It dynamically builds a
+                network map on its display by querying entities and
+                obtaining IP addresses and subnet masks.  A
+                configuration file tells xnetdb which IP hosts you
+                want to monitor.
+
+        CAVEATS
+                While "ping" can be used to monitor hosts, more useful
+                results are obtained using SNMP.
+
+        BUGS
+                Bugs and other assorted topics are discussed on the
+                xnetdb mailing list.  To join, send a note to
+                "xnetdb-request@oar.net".
+
+        LIMITATIONS
+                None.
+
+        HARDWARE REQUIRED
+                No restrictions.
+
+
+
+
+NOCTools2 Working Group                                       [Page 169]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        SOFTWARE REQUIRED
+                Most any variety of UNIX plus X-Windows and/or
+                OpenWindows.
+
+        AVAILABILITY
+                Available via anonymous ftp from ftp.oar.net
+                (currently 131.187.1.102) in the directory /pub/src.
+                Special arrangements can be made for sites without
+                direct IP access by sending a note to
+                "xnetdb-request@oar.net".  There are minimal licensing
+                restrictions - these are detailed within the package.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 170]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog                  XNETMON_SNMP_RESEARCH
+
+        NAME
+                XNETMON -- an X windows based SNMP network management
+                station from SNMP Research.
+
+        KEYWORDS
+                alarm, benchmark, control, debugger, manager, map,
+                reference, security, status, traffic;
+                bridge, DECnet, Ethernet, FDDI, IP, OSI, ring, star;
+                NMS, Ping, SNMP, X;
+                UNIX;
+                Sourcelib.
+
+        ABSTRACT
+                The XNETMON application implements a powerful network
+                management station based on the X window system.
+                XNETMON's network management tools for configuration,
+                performance, security, and fault management have been
+                used successfully with a wide assortment of wide- and
+                local-area-network topologies and medias.
+                Multiprotocol devices are supported
+                including those using TCP/IP, DECnet, and OSI
+                protocols.
+
+        Some features of XNETMON's network management tools include:
+
+                o Fault management tool displays a map of the network
+                  configuration with node and link state indicated
+                  in one of several colors to indicate current status;
+                o Configuration management tool may be used to edit the
+                  network management information base stored in the
+                  NMS to reflect changes occurring in the network;
+                o Graphs and tabular tools for use in fault and performance
+                  management (e.g. XNETPERFMON);
+                o Mechanisms by which additional variables, such as vendor-
+                  specific variables, may be added;
+                o Alarms may be enabled to alert the operator of events
+                  occurring in the network;
+                o Events are logged to disk;
+                o Output data may be transferred via flat files for
+                  additional report generation by a variety of
+                  statistical packages.
+
+                The XNETMON application comes complete with source
+                code including a powerful set of portable libraries
+                for generating and parsing SNMP messages.
+
+
+
+
+NOCTools2 Working Group                                       [Page 171]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        MECHANISM
+                XNETMON is based on the Simple Network Management
+                Protocol (SNMP).  Polling is performed via the
+                powerful SNMP get-next operator and the SNMP get
+                operator.  Trap-directed polling is used to regulate
+                        focus and intensity of the polling.
+
+        CAVEATS
+                None.
+
+        BUGS
+                None known.
+
+        LIMITATIONS
+                Monitored and managed nodes must implement the SNMP over
+                UDP per RFC 1157 or must be reachable via a proxy agent.
+
+        HARDWARE REQUIRED
+                X windows workstation with UDP socket library.
+                Monochrome is acceptable, but color is far superior.
+
+        SOFTWARE REQUIRED
+                X windows version 11 release 4 or later or MOTIF.
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+                This is a commercial product available under license
+                from:
+                        SNMP Research
+                        3001 Kimberlin Heights Road
+                        Knoxville, TN  37920-9716
+                        Attn:  John Southwood, Sales and Marketing
+                        (615) 573-1434 (Voice)  (615) 573-9197 (FAX)
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+                users@seymour1.cs.utk.edu
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 172]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                      XNETMON_WELLFLEET
+
+          NAME
+               xnetmon, xpmon
+
+          KEYWORDS
+               alarm, manager, map, status; IP; NMS, SNMP; UNIX.
+
+          ABSTRACT
+               Xnetmon and xpmon provide graphical representation of
+               performance and status of SNMP-capable network ele-
+               ments.  Xnetmon presents a schematic network map
+               representing the up/down status of network elements;
+               xpmon draws a pen plot style graph of the change over
+               time of any arbitrary MIB object (RFC1066).  Both xnet-
+               mon and xpmon use the SNMP (RFC1098) for retrieving
+               status and performance data.
+
+          MECHANISM
+               Xnetmon polls network elements for the status of their
+               interfaces on a controllable polling interval.  Pop-up
+               windows displaying the values of any MIB variable are
+               supported by separate polls.  When SNMP traps are
+               received from a network element, that element and all
+               adjacent elements are immediately re-polled to update
+               their status.  The layout of the network map is stati-
+               cally configured.  Xpmon repeatedly polls (using SNMP)
+               the designated network element for the value of the
+               designated MIB variable on the user-specified interval.
+               The change in the variable is then plotted on the strip
+               chart.  The strip chart regularly adjusts its scale to
+               the current maximum value on the graph.
+
+          CAVEATS
+               Polling intervals should be chosen with care so as not
+               to affect system performance adversely.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               None reported.
+
+          HARDWARE REQUIRED
+               Distributed and supported for Sun-3 systems.
+
+          SOFTWARE REQUIRED
+               SunOS 3.5 or 4.x; X11, release 2 or 3.
+
+
+
+NOCTools2 Working Group                                       [Page 173]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          AVAILABILITY
+               Commercial product of:
+                    Wellfleet Communications, Inc.
+                    12 DeAngelo Drive
+                    Bedford, MA 01730-2204
+                    (617) 275-2400
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 174]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        Internet Tool Catalog              XNETPERFMON_SNMP_RESEARCH
+
+        NAME
+                xnetperfmon -- a graphical network performance and
+                fault management tool from SNMP Research.
+
+        KEYWORDS
+                manager, security, status;
+                DECnet, Ethernet, IP, OSI, ring, star;
+                NMS, SNMP, X;
+                DOS, UNIX, VMS;
+                sourcelib.
+
+        ABSTRACT
+                Xnetperfmon is a XNETMON tool used to produce plots of
+                SNMP variables in graphical displays.  The manager may
+                easily customize the labels, step size, update interval,
+                and variables to be plotted to produce graphs for fault
+                and performance management.  Scales automatically adjust
+                whenever a point to be plotted would go off scale.
+
+        MECHANISM
+                The xnetperfmon application communicates with remote
+                agents or proxy agents via the Simple Network Management
+                Protocol (SNMP).
+
+        CAVEATS
+                All plots for a single invocation of xnetperfmon must be
+                for variables provided by a single network management
+                agent.  However, multiple invocations of xnetperfmon may
+                be active on a single display simultaneously or proxy
+                agents may be used to summarize information at a common
+                point.
+
+        BUGS
+                None known.
+
+        LIMITATIONS
+                None reported.
+
+        HARDWARE REQUIRED
+                Systems supporting X windows.
+
+        SOFTWARE REQUIRED
+                XNETMON from SNMP Research and X Version 11 release 4 or
+                later (option MOTIF)
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 175]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+        AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL
+                This is a commercial product available under license
+                from:
+
+                SNMP Research
+                3001 Kimberlin Heights Road
+                Knoxville, TN  37920-9716
+                Attn:  John Southwood, Sales and Marketing
+                (615) 573-1434 (Voice)  (615) 573-9197 (FAX)
+
+        CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY
+                users@seymour1.cs.utk.edu
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 176]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+          Internet Tool Catalog                                 XUP_HP
+
+          NAME
+               xup
+
+          KEYWORDS
+               status; ping, X; HP.
+
+          ABSTRACT
+               Xup uses the X-Windows to display the status of an
+               "interesting" set of hosts.
+
+          MECHANISM
+               Xup uses ping to determine host status.
+
+          CAVEATS
+               Polling for status increases network load.
+
+          BUGS
+               None known.
+
+          LIMITATIONS
+               None reported.
+
+          HARDWARE REQUIRED
+               Runs only on HP series 300 and 800 workstations.
+
+          SOFTWARE REQUIRED
+               Version 10 of X-Windows.
+
+          AVAILABILITY
+               A standard command for the HP 300 & 800 Workstations.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 177]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+Appendix: "No-Writeups"
+
+   This section contains references to tools which are known to exist,
+   but which have not been fully cataloged.  If anyone wishes to author
+   an entry for one of these tools please contact: noctools-
+   request@merit.edu.
+
+   Each mention is separated by a <form-feed> for improved readability.
+   If you intend to actually print-out this section of the catalog, then
+   you should probably strip-out the <ff>.
+
+tuecho.c
+
+/*
+ * Send / receive TCP or UDP echos in any of a number of bizzare ways.
+ *
+ *   Joel P. Bion, March 1990
+ *   Copyright (c) 1990 cisco Systems. All rights reserved.
+ *
+ * This "tuecho" program is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Prompts as:
+ *   Host: -- host to send echos to -- can be name or a.b.c.d --
+ *   Enter protocol (0 = UDP, 1 = TCP) [0]: -- UDP or TCP
+ * Size of data portion (bytes) [100]: -- bytes in data, excluding
+ * headers -- Number of bursts [5]: -- number of bursts of packets to
+ * send -- Packets per burst [1]: -- packets per burst, all sent AT
+ * ONCE -- Timeout (seconds) [2]: -- how long to wait for data
+ * Pause interval (seconds) [0]: -- Pause interval between bursts of
+ * frames
+ *   Type of pattern (specify = 0, increment = 1) [1]:
+ *          -- if 0 specified, allow you to specify a 16bit pattern
+            -- as four hex digits (see below). If 1, will create a
+            -- "incrementing", cycling pattern from 0x0000 -> 0xffff
+            -- ->.
+ *   Enter pattern (hex value) [abcd]:  -- if "0" specified above
+ */
+
+Availability:
+        ftp.uu.net:/networking/cisco/tuecho.c
+        ftp.cisco.com:tuecho.c
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 178]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+SPY     An NFS monitoring/tracing tool
+
+Availability:
+        A postscript file describing SPY is located on
+        ftp.uu.net:/networking/ip/nfs/spy.ps.Z
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 179]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+NFSTRACE
+
+   This is the rpcspy/nfstrace package.
+
+   It is described in detail in the paper "NFS Tracing by Passive
+   Network Monitoring", which appeared in the January, 1992 USENIX
+   conference.
+
+   You'll need either a DEC machine running ULTRIX (with the
+   packetfilter installed in the kernel) or a Sun running SunOS 4.x
+   (with NIT).  Or you'll need to do a bit of hacking.
+
+   The package differs slightly from the version in the paper:
+
+
+   - The handle->name translation facility has been removed.  It's
+     just too fragile to include in the general release.  If you need it,
+     contact me directly and I'll be happy to mail you the code.
+
+   - The output format is a wee-bit different.
+
+   - The IBM-RT Enet filter version is also not included, since I seem to
+     be the only person in the world running it.  RTs are really too slow
+     for this anyway.
+
+   To configure the package, edit the makefile in the obvious (to me at
+   least) way.
+
+   Note that the not all versions of SunOS NIT have working versions of
+   the packet timestamp mechanism.  Try to set the -DSTAMPS option in
+   the makefile, and if that doesn't work, take it out.
+
+   If you are actually going to use this to gather traces, I'd like to
+   hear from you! Please send email, and share your results/traces if
+   your organization will allow it.  I maintain a mailing list of users
+   for updates, etc.  Send me mail to be added to it.
+
+   Happy tracing.
+   Matt Blaze
+   Department of Computer Science
+   Princeton University
+   35 Olden Street
+   Princeton, NJ 08544
+   mab@cs.princeton.edu
+   609-258-3946
+
+   Availability:
+           ftp.uu.net:/networking/ip/nfs/nfstrace.shar  (or check archie)
+
+
+
+NOCTools2 Working Group                                       [Page 180]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   LAMER
+
+   #  Lame delegation notifier
+   #  Author:  Bryan Beecher
+   #  Last Modified:   6/25/92
+   #
+   #  To make use of this software, you need to be running the
+   #  University of Michigan release of BIND 4.8.3, or any version
+   #  of named that supports the LAME_DELEGATION patches posted to
+   #  USENET.  The U-M release is available via anonymous ftp from
+   #  terminator.cc.umich.edu:/unix/dns/bind4.8.3.tar.Z.
+   #
+   #  You must also have a copy of query(1) and host(1).  These
+   #  are also available via anonymous ftp in the aforementioned
+   #  place.
+   # -------------------------------------------------------------
+
+   # -------------------------------------------------------------
+   #  handle arguments
+   # -------------------------------------------------------------
+   #       -d <day>
+   #       This flag is used to append a dot-day suffix to the LOGFILE.
+   #       Handy where log files are kept around for the last week
+   #       and contain a day suffix.
+   #
+   #       -f <logfile>
+   #       Change the LOGFILE value altogether.
+   #
+   #       -w
+   #       Count up all of the DNS statistics for the whole week.
+   #
+   #       -v
+   #       Be verbose.
+   #
+   #       -t
+   #       Test mode.  Do not send mail to the lame delegation
+   #       hostmasters.
+
+   Availability:
+           ftp.uu.net:/networking/ip/dns/lamer.tar.Z  (or check archie)
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 181]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   HOST
+
+     host - look up host names using domain server
+
+SYNOPSIS
+     host [-v] [-a] [-t querytype] [options]  name  [server]
+     host [-v] [-a] [-t querytype] [options]  -l domain  [server]
+     host [-v] [options]  -H [-D] [-E] [-G] domain
+     host [-v] [options]  -C domain
+     host [-v] [options]  -A host
+
+DESCRIPTION
+     host looks for information about Internet hosts or domains.
+     It gets this information from a set of interconnected
+     servers that are spread across the world.  By default, it
+     simply converts between host names and Internet addresses.
+     However, with the -t, -a and -v options, it can be used to
+     find all of the information about hosts or domains that is
+     maintained by the domain nameserver.
+
+/*
+ * Extensively modified by E. Wassenaar, Nikhef-H, <e07@nikhef.nl>
+ *
+ * The officially maintained source of this program is available
+ * via anonymous ftp from machine 'ftp.nikhef.nl' [192.16.199.1]
+ * in the directory '/pub/network' as 'host.tar.Z'
+ *
+ * Also available in this directory are patched versions of the
+ * BIND 4.8.3 nameserver and resolver library which you may need
+ * to fully exploit the features of this program, although they
+ * are not mandatory. See the file 'README_FIRST' for details.
+ *
+ * You are kindly requested to report bugs and make suggestions
+ * for improvements to the author at the given email address,
+ * and to not re-distribute your own modifications to others.
+ */
+/*
+ *                      New features
+ *
+ * - Major overhaul of the whole code.
+ * - Very rigid error checking, with more verbose error messages.
+ * - Zone listing section completely rewritten.
+ * - It is now possible to do recursive listings into subdomains.
+ * - Maintain resource record statistics during zone listings.
+ * - Maintain count of hosts during zone listings.
+ * - Exploit multiple server addresses if available.
+ * - Option to exploit only primary server for zone transfers.
+ * - Option to exclude info from names that do not reside in a domain.
+
+
+
+NOCTools2 Working Group                                       [Page 182]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+ * - Implement timeout handling during connect and read.
+ * - Write resource record output to optional logfile.
+ * - Special MB tracing by recursively expanding MR and MG records.
+ * - Special mode to check SOA records at each nameserver for domain.
+ * - Special mode to check inverse mappings of host addresses.
+ * - Code is extensively documented.
+ */
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 183]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+PINGs
+
+Many many versions of the PING program exist.
+Each implementation has its own set of additional features.
+Here are a few more PINGs that are worth taking a look at.
+
+Version on ftp.cc.berkeley.edu:pub/ping:
+        This version has duplicate packet detection, Record Route,
+        ability to specify data pattern for packets, flood pinging, an
+        interval option, Multicast support, etc.
+
+Version on nikhefh.nikhef.nl:/pub/network/rping.tar.Z:
+        'rping' is just like 'ping', but only a single probe packet
+        is sent to test the reachability of a destination.
+        As an option, the loose source routing facility is used
+        to show the roundtrip route the packet has taken.
+        Multiple addresses of remote hosts are tried until one
+        responds. As an option, each of multiple addresses can be
+        probed unconditionally.
+        Contains a patch for making loose source routing work in
+        case you have a SUN with an OMNINET ethernet controller.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 184]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+VRFY
+
+vrfy.tar.Z      (Version 921021)
+        'vrfy' is a tool to verify email addresses and mailing lists.
+        In its simplest form it takes an address "user@domain", figures
+        out the MX hosts for "domain", and issues the SMTP command VRFY
+        at the primary MX host (optionally all), or at "domain" itself
+        if no MX hosts exist. Without "domain" it goes to "localhost".
+        More complex capabilities are: recursively expanding forward
+        files or mailing lists, and detecting mail forwarding loops.
+        Full-blown RFC822 address specifications are understood.
+        Syntax checking can be carried out either locally or remotely.
+        Various options are provided to exploit alternative protocol
+        suites if necessary, and to print many forms of verbose output.
+        Obvious limitations exist, but on average it works pretty well.
+        Needless to say you need internet (nameserver and SMTP) access.
+        See the man page and the extensive documentation in the source
+        for further details.
+
+Please send comments and suggestions to Eric Wassenaar <e07@nikhef.nl>
+
+If you want to receive notification of updates, please send an email
+with the keyword "subscribe" in the subject or the body to the address
+<net-dist-request@nikhef.nl>
+
+available as:  nikhefh.nikhef.nl:/pub/network/vrfy.tar.Z
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 185]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+XNETLOAD
+
+NAME
+     xnetload - ethernet load average display for X
+
+SYNOPSIS
+     xnetload[-toolkitoption ...] [-scale integer]
+           [-update seconds] [-hl color] [-highlight color]
+           [-jumpscroll pixels] [-label string] [-nolabel] host
+
+DESCRIPTION
+     The xnetload program displays a periodically updating histo-
+     gram  of  the  ethernet load average for the specified host.
+     The resulting graph is  scaled  as  0%  to  100%,  where  0%
+     corresponds  to  0mbs  and 100% corresponds to 10mbs.  NOTE:
+     The specified host must be running rpc.etherd.
+
+This program has been run using X11R4 and X11R5, under the following
+operating systems:
+
+        SUNOS 4.1.0
+        SUNOS 4.1.1
+        ULTRIX V4.2
+        IRIX 3.3.2
+
+Assuming the Imake templates and Rules are in order and in the proper
+place on your system, these programs should compile and link
+straightforward by running the following sequence:
+
+        xmkmf
+        make
+
+Then, as root, issue the following:
+
+        make install
+        make install.man
+
+Then, on your host system, (or on any other system you can rlogin or rsh
+into) start the etherd daemon with the following (must be root):
+
+        /usr/etc/rpc.etherd le0 &
+
+where le0 is the mnemonic for the primary ethernet interface.
+
+To start the xnetload program, the following command line is suggested:
+
+        ./xnetload -hl red host &
+
+
+
+
+NOCTools2 Working Group                                       [Page 186]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+where "host" is the name of any reachable network node (including
+LOCALHOST) that is running the etherd daemon. A small xload window
+should appear on your local display with nine horizontal lines. The
+label:
+        "Ethernet Load %"
+should appear in the upper left hand corner, just below any additional
+title bars or other decorations provided by your window manager. If the
+program comes up without the nine lines, or without the "Ethernet Load"
+label, then either your resource file is not properly installed in the
+appropriate app-defaults directory, or you may have picked up the wrong
+xnetload image.  Try re-running "make install" as root, or be sure to
+include the "./" in front of the command name.
+
+Good Luck!
+
+The following changes have been made to this directory since R3:
+
+      o Now use Athena StripChart widget.
+
+      o Understands WM_DELETE_WINDOW.
+
+      o 3-26-92 Modified from xload to xnetload by Roger Smith,
+        Sterling Software at NASA-Ames Research Center,
+        Mountain View, Calif. rsmith@proteus.arc.nasa.gov
+
+Availability:
+        ftp proteus.arc.nasa.gov:pub/XEnetload.tar.Z  (or check archie)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 187]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+NETTEST
+
+     nettest, nettestd - Performs client and server functions for
+     timing data throughput
+
+     The nettest and nettestd commands invoke client  and  server
+     programs that are used for timing data throughput of various
+     methods of interprocess communication.  For TCP and OSI con-
+     nections,  the nettest program establishes a connection with
+     the nettestd program, and then it does count writes of  size
+     bytes,  followed by count reads of size bytes.  For UDP, the
+     nettest program performs only writes;  reads  are  not  per-
+     formed.  The nettestd program, if used with UDP connections,
+     reads the data packets and prints a message  for  each  data
+     packet  it  receives.   The number and size of the reads and
+     writes may not correlate with the number  and  size  of  the
+     actual  data packets that are transferred; it depends on the
+     protocol that is chosen.  If you append an optional k (or K)
+     to  the  size, count, or bufsize value, the number specified
+     is multiplied by 1024.
+
+   This source for nettest and nettestd are provided on an "as is"
+   basis.  Cray Research does not provide any support for this code
+   (unless you are a customer who has purchased the UNICOS operating
+   system).
+
+   We will gladly take bug reports for nettest/nettestd.  Suggested
+   fixes are prefered to just bug reports.  Changes to allow
+   nettest/nettestd to run on other architectures are also welcomed.  We
+   will try to incorporate bugfixes and update the publicly available
+   code, but we can make no guarantees.
+
+   For copyright information, see the notice in each source file.
+
+   Send bug-reports/fixes to:
+        E-mail:         dab@cray.com
+        U.S. Mail:      David Borman
+                        Cray Research, Inc.
+                        655F Lone Oak Drive
+                        Eagan, MN 55121
+   Notes:
+
+   1) The -b option to nettestd has not been tested...
+   2) The ISO code should work on a 4.4BSD system, but the
+      gethostinfo() routine is specific to UNICOS...
+
+   Availability:
+           ftp sgi.com:/sgi/src/nettest
+
+
+
+NOCTools2 Working Group                                       [Page 188]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   ETHERCK
+
+   etherck is a simple program that displays Sun ethernet statistics.
+   If you have a high percents of input errors that are due to "out of
+   buffers", then you can run the "iepatch" script to patch a kernel
+   that uses the Intel ethernet chip ("ie").  A back of the envelope
+   calculation shows that a .25% input error rate gives about a 10%
+   degradation of NFS performance if 8k packets are being used.
+
+   In our environment at Legato, patching the ie buffer allocation made
+   the input error rate drop more than 2 orders of magnitude.  This was
+   after we had applied other networking fixes (e.g., using Prestoserve,
+   going from thin wire to twisted pair) and pushed a higher load on the
+   server.
+
+   Note that both etherck and iepatch must be run by root (or you can
+   make etherck setgid kmem).
+
+   Availability:
+           send EMAIL to:          request@legato.com
+           with a Subject line:    send unsupported etherck
+
+   The following is part of the 'help' file from the Legato Email
+   Server:
+
+   This message comes to you from the request server at Legato.COM,
+   request@Legato.COM.  It received a message from you asking for help.
+
+   The request server is a mail-response program.  That means that you
+   mail it a request, and it mails back the response.
+
+   The request server is a very dumb program.  It does not have much
+   error checking.  If you don't send it the commands that it
+   understands, it will just answer "I don't understand you".
+
+   The request server has 4 commands.  Each command must be the first
+   word on a line.  The request server reads your entire message before
+   it does anything, so you can have several different commands in a
+   single message.  The request server treats the "Subject:" header line
+   just like any other line of the message.  You can use any combination
+   of upper and lower case letters in the commands.
+
+   The request server's files are organized into a series of directories
+   and subdirectories.  Each directory has an index, and each
+   subdirectory has an index.  The top-level index gives you an overview
+   of what is in the subdirectories, and the index for each subdirectory
+   tells you what is in it.
+
+
+
+
+NOCTools2 Working Group                                       [Page 189]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   The server has 4 commands:
+
+   "help" command: The command "help" or "send help" causes the server to
+           send you the help file.  You already know this, of course,
+           because you are reading the help file.  No other commands are
+           honored in a message that asks for help (the server figures
+           that you had better read the help message before you do
+           anything else).
+
+   SEND a request to Legato to get the rest of the help file!
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 190]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+   NETCK
+
+   netck is a shar file that contains the sources to build "netck", a
+   network checker that uses the rstat(3R) protocol to gather and print
+   statistics from machines on the network.  netck is useful to help
+   understand what part of what machines are potential NFS bottlenecks.
+   To get this file, send email to the request server with the command
+   "send unsupported netck".
+
+   Availability:
+           same as ETHERCK (send email To: request@legato.com; subject:
+           HELP)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 191]
+
+RFC 1470          FYI: Network Management Tool Catalog         June 1993
+
+
+References
+
+   [1] Stine, R., Editor, "FYI on a Network Management Tool Catalog:
+       Tools for Monitoring and Debugging TCP/IP Internets and
+       Interconnected Devices", FYI 2, RFC 1147, Sparta, Inc., April
+       1990.
+
+Security Considerations
+
+   Security issues are not discussed in this memo.
+
+Authors' Addresses
+
+   Robert M. Enger
+   Advanced Network and Services
+   1875 Campus Commons Drive,  Suite 220
+   Reston, VA.  22091-1552
+
+   Phone: 703-758-7722
+   EMail: enger@reston.ans.net
+
+
+   Joyce K. Reynolds
+   Information Sciences Institute
+   University of Southern California
+   4676 Admiralty Way
+   Marina del Rey, CA 90292
+
+   Phone: (310) 822-1511
+   Email: JKREY@ISI.EDU
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+NOCTools2 Working Group                                       [Page 192]
+
\ No newline at end of file
diff --git a/ext/picotcp/RFC/rfc1644.txt b/ext/picotcp/RFC/rfc1644.txt
new file mode 100644
index 0000000..2aca5a6
--- /dev/null
+++ b/ext/picotcp/RFC/rfc1644.txt
@@ -0,0 +1,2131 @@
+
+
+
+
+
+
+Network Working Group                                          R. Braden
+Request for Comments: 1644                                           ISI
+Category: Experimental                                         July 1994
+
+                T/TCP -- TCP Extensions for Transactions
+                        Functional Specification
+
+Status of this Memo
+
+   This memo describes an Experimental Protocol for the Internet
+   community, and requests discussion and suggestions for improvements.
+   It does not specify an Internet Standard.  Distribution is unlimited.
+
+Abstract
+
+   This memo specifies T/TCP, an experimental TCP extension for
+   efficient transaction-oriented (request/response) service.  This
+   backwards-compatible extension could fill the gap between the current
+   connection-oriented TCP and the datagram-based UDP.
+
+   This work was supported in part by the National Science Foundation
+   under Grant Number NCR-8922231.
+
+Table of Contents
+
+ 1. INTRODUCTION ..................................................  2
+ 2.  OVERVIEW .....................................................  3
+    2.1  Bypassing the Three-Way Handshake ........................  4
+    2.2  Transaction Sequences ....................................  6
+    2.3  Protocol Correctness .....................................  8
+    2.4  Truncating TIME-WAIT State ............................... 12
+    2.5  Transition to Standard TCP Operation ..................... 14
+ 3.  FUNCTIONAL SPECIFICATION ..................................... 17
+    3.1  Data Structures .......................................... 17
+    3.2  New TCP Options .......................................... 17
+    3.3  Connection States ........................................ 19
+    3.4  T/TCP Processing Rules ................................... 25
+    3.5  User Interface ........................................... 28
+ 4.  IMPLEMENTATION ISSUES ........................................ 30
+    4.1  RFC-1323 Extensions ...................................... 30
+    4.2  Minimal Packet Sequence .................................. 31
+    4.3  RTT Measurement .......................................... 31
+    4.4  Cache Implementation ..................................... 32
+    4.5  CPU Performance .......................................... 32
+    4.6  Pre-SYN Queue ............................................ 33
+ 6.  ACKNOWLEDGMENTS .............................................. 34
+ 7.  REFERENCES ................................................... 34
+ APPENDIX A.  ALGORITHM SUMMARY ................................... 35
+
+
+
+Braden                                                          [Page 1]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+ Security Considerations .......................................... 38
+ Author's Address ................................................. 38
+
+1. INTRODUCTION
+
+   TCP was designed to around the virtual circuit model, to support
+   streaming of data.  Another common mode of communication is a
+   client-server interaction, a request message followed by a response
+   message.  The request/response paradigm is used by application-layer
+   protocols that implement transaction processing or remote procedure
+   calls, as well as by a number of network control and management
+   protocols (e.g., DNS and SNMP).  Currently, many Internet user
+   programs that need request/response communication use UDP, and when
+   they require transport protocol functions such as reliable delivery
+   they must effectively build their own private transport protocol at
+   the application layer.
+
+   Request/response, or "transaction-oriented", communication has the
+   following features:
+
+   (a)  The fundamental interaction is a request followed by a response.
+
+   (b)  An explicit open or close phase may impose excessive overhead.
+
+   (c)  At-most-once semantics is required; that is, a transaction must
+        not be "replayed" as the result of a duplicate request packet.
+
+   (d)  The minimum transaction latency for a client should be RTT +
+        SPT, where RTT is the round-trip time and SPT is the server
+        processing time.
+
+   (e)  In favorable circumstances, a reliable request/response
+        handshake should be achievable with exactly one packet in each
+        direction.
+
+   This memo concerns T/TCP, an backwards-compatible extension of TCP to
+   provide efficient transaction-oriented service in addition to
+   virtual-circuit service.  T/TCP provides all the features listed
+   above, except for (e); the minimum exchange for T/TCP is three
+   segments.
+
+   In this memo, we use the term "transaction" for an elementary
+   request/response packet sequence.  This is not intended to imply any
+   of the semantics often associated with application-layer transaction
+   processing, like 3-phase commits.  It is expected that T/TCP can be
+   used as the transport layer underlying such an application-layer
+   service, but the semantics of T/TCP is limited to transport-layer
+   services such as reliable, ordered delivery and at-most-once
+
+
+
+Braden                                                          [Page 2]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+   operation.
+
+   An earlier memo [RFC-1379] presented the concepts involved in T/TCP.
+   However, the real-world usefulness of these ideas depends upon
+   practical issues like implementation complexity and performance.  To
+   help explore these issues, this memo presents a functional
+   specification for a particular embodiment of the ideas presented in
+   RFC-1379.  However, the specific algorithms in this memo represent a
+   later evolution than RFC-1379.  In particular, Appendix A in RFC-1379
+   explained the difficulties in truncating TIME-WAIT state.  However,
+   experience with an implementation of the RFC-1379 algorithms in a
+   workstation later showed that accumulation of TCB's in TIME-WAIT
+   state is an intolerable problem; this necessity led to a simple
+   solution for truncating TIME-WAIT state, described in this memo.
+
+   Section 2 introduces the T/TCP extensions, and section 3 contains the
+   complete specification of T/TCP.  Section 4 discusses some
+   implementation issues, and Appendix A contains an algorithmic
+   summary.  This document assumes familiarity with the standard TCP
+   specification [STD-007].
+
+2.  OVERVIEW
+
+   The TCP protocol is highly symmetric between the two ends of a
+   connection.  This symmetry is not lost in T/TCP; for example, T/TCP
+   supports TCP's symmetric simultaneous open from both sides (Section
+   2.3 below).  However, transaction sequences use T/TCP in a highly
+   unsymmetrical manner.  It is convenient to use the terms "client
+   host" and "server host" for the host that initiates a connection and
+   the host that responds, respectively.
+
+   The goal of T/TCP is to allow each transaction, i.e., each
+   request/response sequence, to be efficiently performed as a single
+   incarnation of a TCP connection.  Standard TCP imposes two
+   performance problems for transaction-oriented communication.  First,
+   a TCP connection is opened with a "3-way handshake", which must
+   complete successfully before data can be transferred.  The 3-way
+   handshake adds an extra RTT (round trip time) to the latency of a
+   transaction.
+
+   The second performance problem is that closing a TCP connection
+   leaves one or both ends in TIME-WAIT state for a time 2*MSL, where
+   MSL is the maximum segment lifetime (defined to be 120 seconds).
+   TIME-WAIT state severely limits the rate of successive transactions
+   between the same (host,port) pair, since a new incarnation of the
+   connection cannot be opened until the TIME-WAIT delay expires.  RFC-
+   1379 explained why the alternative approach, using a different user
+   port for each transaction between a pair of hosts, also limits the
+
+
+
+Braden                                                          [Page 3]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+   transaction rate: (1) the 16-bit port space limits the rate to
+   2**16/240 transactions per second, and (2) more practically, an
+   excessive amount of kernel space would be occupied by TCP state
+   blocks in TIME-WAIT state [RFC-1379].
+
+   T/TCP solves these two performance problems for transactions, by (1)
+   bypassing the 3-way handshake (3WHS) and (2) shortening the delay in
+   TIME-WAIT state.
+
+   2.1  Bypassing the Three-Way Handshake
+
+      T/TCP introduces a 32-bit incarnation number, called a "connection
+      count" (CC), that is carried in a TCP option in each segment.  A
+      distinct CC value is assigned to each direction of an open
+      connection.  A T/TCP implementation assigns monotonically
+      increasing CC values to successive connections that it opens
+      actively or passively.
+
+      T/TCP uses the monotonic property of CC values in initial <SYN>
+      segments to bypass the 3WHS, using a mechanism that we call TCP
+      Accelerated Open (TAO).  Under the TAO mechanism, a host caches a
+      small amount of state per remote host.  Specifically, a T/TCP host
+      that is acting as a server keeps a cache containing the last valid
+      CC value that it has received from each different client host.  If
+      an initial <SYN> segment (i.e., a segment containing a SYN bit but
+      no ACK bit) from a particular client host carries a CC value
+      larger than the corresponding cached value, the monotonic property
+      of CC's ensures that the <SYN> segment must be new and can
+      therefore be accepted immediately.  Otherwise, the server host
+      does not know whether the <SYN> segment is an old duplicate or was
+      simply delivered out of order; it therefore executes a normal 3WHS
+      to validate the <SYN>.  Thus, the TAO mechanism provides an
+      optimization, with the normal TCP mechanism as a fallback.
+
+      The CC value carried in non-<SYN> segments is used to protect
+      against old duplicate segments from earlier incarnations of the
+      same connection (we call such segments 'antique duplicates' for
+      short).  In the case of short connections (e.g., transactions),
+      these CC values allow TIME-WAIT state delay to be safely discuss
+      in Section 2.3.
+
+      T/TCP defines three new TCP options, each of which carries one
+      32-bit CC value.  These options are named CC, CC.NEW, and CC.ECHO.
+      The CC option is normally used; CC.NEW and CC.ECHO have special
+      functions, as follows.
+
+
+
+
+
+
+Braden                                                          [Page 4]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+      (a)  CC.NEW
+
+           Correctness of the TAO mechanism requires that clients
+           generate monotonically increasing CC values for successive
+           connection initiations.  These values can be generated using
+           a simple global counter.  There are certain circumstances
+           (discussed below in Section 2.2) when the client knows that
+           monotonicity may be violated; in this case, it sends a CC.NEW
+           rather than a CC option in the initial <SYN> segment.
+           Receiving a CC.NEW causes the server to invalidate its cache
+           entry and do a 3WHS.
+
+      (b)  CC.ECHO
+
+           When a server host sends a <SYN,ACK> segment, it echoes the
+           connection count from the initial <SYN> in a CC.ECHO option,
+           which is used by the client host to validate the <SYN,ACK>
+           segment.
+
+      Figure 1 illustrates the TAO mechanism bypassing a 3WHS.  The
+      cached CC values, denoted by cache.CC[host], are shown on each
+      side.  The server host compares the new CC value x in segment #1
+      against x0, its cached value for client host A; this comparison is
+      called the "TAO test".  Since x > x0, the <SYN> must be new and
+      can be accepted immediately; the data in the segment can therefore
+      be delivered to the user process B, and the cached value is
+      updated.  If the TAO test failed (x <= x0), the server host would
+      do a normal three-way handshake to validate the <SYN> segment, but
+      the cache would not be updated.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Braden                                                          [Page 5]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+
+          TCP A  (Client)                              TCP B (Server)
+          _______________                              ______________
+
+                                                          cache.CC[A]
+                                                            V
+
+                                                          [ x0 ]
+
+        #1        -->  <SYN, data1, CC=x> -->  (TAO test OK (x > x0) =>
+                                                     data1->user_B and
+                                                     cache.CC[A]= x; )
+
+                                                           [ x ]
+        #2       <-- <SYN, ACK(data1), data2, CC=y, CC.ECHO=x> <--
+            (data2->user_A;)
+
+
+              Figure 1. TAO: Three-Way Handshake is Bypassed
+
+
+      The CC value x is echoed in a CC.ECHO option in the <SYN,ACK>
+      segment (#2); the client side uses this option to validate the
+      segment.  Since segment #2 is valid, its data2 is delivered to the
+      client user process.  Segment #2 also carries B's CC value; this
+      is used by A to validate non-SYN segments from B, as explained in
+      Section 2.4.
+
+      Implementing the T/TCP extensions expands the connection control
+      block (TCB) to include the two CC values for the connection; call
+      these variables TCB.CCsend and TCB.CCrecv (or CCsend, CCrecv for
+      short).  For example, the sequence shown in Figure 1 sets
+      TCB.CCsend = x and TCB.CCrecv = y at host A, and vice versa at
+      host B.  Any segment that is received with a CC option containing
+      a value SEG.CC different from TCB.CCsend will be rejected as an
+      antique duplicate.
+
+   2.2  Transaction Sequences
+
+      T/TCP applies the TAO mechanism described in the previous section
+      to perform a transaction sequence.  Figure 2 shows a minimal
+      transaction, when the request and response data can each fit into
+      a single segment.  This requires three segments and completes in
+      one round-trip time (RTT).  If the TAO test had failed on segment
+      #1, B would have queued data1 and the FIN for later processing,
+      and then it would have returned a <SYN,ACK> segment to A, to
+      perform a normal 3WHS.
+
+
+
+
+Braden                                                          [Page 6]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+
+       TCP A  (Client)                                    TCP B (Server)
+       _______________                                    ______________
+
+       CLOSED                                                     LISTEN
+
+   #1  SYN-SENT*        --> <SYN,data1,FIN,CC=x> -->         CLOSE-WAIT*
+                                                           (TAO test OK)
+                                                         (data1->user_B)
+
+                                                           <-- LAST-ACK*
+   #2  TIME-WAIT   <-- <SYN,ACK(FIN),data2,FIN,CC=y,CC.ECHO=x>
+     (data2->user_A)
+
+
+   #3  TIME-WAIT          --> <ACK(FIN),CC=x> -->                 CLOSED
+
+       (timeout)
+         CLOSED
+
+             Figure 2: Minimal T/TCP Transaction Sequence
+
+
+      T/TCP extensions require additional connection states, e.g., the
+      SYN-SENT*, CLOSE-WAIT*, and LAST-ACK* states shown in Figure 2.
+      Section 3.3 describes these new connection states.
+
+      To obtain the minimal 3-segment sequence shown in Figure 2, the
+      server host must delay acknowledging segment #1 so the response
+      may be piggy-backed on segment #2.  If the application takes
+      longer than this delay to compute the response, the normal TCP
+      retransmission mechanism in TCP B will send an acknowledgment to
+      forestall a retransmission from TCP A.  Figure 3 shows an example
+      of a slow server application.  Although the sequence in Figure 3
+      does contain a 3-way handshake, the TAO mechanism has allowed the
+      request data to be accepted immediately, so that the client still
+      sees the minimum latency.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Braden                                                          [Page 7]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+
+       TCP A  (Client)                                    TCP B (Server)
+       _______________                                    ______________
+
+       CLOSED                                                     LISTEN
+
+   #1  SYN-SENT*       --> <SYN,data1,FIN,CC=x> -->          CLOSE-WAIT*
+                                                        (TAO test OK =>
+                                                          data1->user_B)
+
+                                                               (timeout)
+   #2  FIN-WAIT-1  <-- <SYN,ACK(FIN),CC=y,CC.ECHO=x> <--     CLOSE-WAIT*
+
+
+   #3  FIN-WAIT-1      --> <ACK(SYN),FIN,CC=x> -->            CLOSE-WAIT
+
+
+   #4  TIME-WAIT   <-- <ACK(FIN),data2,FIN,CC=y> <--            LAST-ACK
+       (data2->user_A)
+
+   #5  TIME_WAIT       --> <ACK(FIN),CC=x> -->                    CLOSED
+
+         (timeout)
+        CLOSED
+
+                  Figure 3: Acknowledgment Timeout in Server
+
+
+   2.3  Protocol Correctness
+
+      This section fills in more details of the TAO mechanism and
+      provides an informal sketch of why the T/TCP protocol works.
+
+      CC values are 32-bit integers.  The TAO test requires the same
+      kind of modular arithmetic that is used to compare two TCP
+      sequence numbers.  We assume that the boundary between y < z and z
+      < y for two CC values y and z occurs when they differ by 2**31,
+      i.e., by half the total CC space.
+
+      The essential requirement for correctness of T/TCP is this:
+
+           CC values must advance at a rate slower than 2**31      [R1]
+           counts per 2*MSL
+
+      where MSL denotes the maximum segment lifetime in the Internet.
+      The requirement [R1] is easily met with a 32-bit CC.  For example,
+      it will allow 10**6 transactions per second with the very liberal
+      MSL of 1000 seconds [RFC-1379].  This is well in excess of the
+
+
+
+Braden                                                          [Page 8]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+      transaction rates achievable with current operating systems and
+      network latency.
+
+      Assume for the present that successive connections from client A
+      to server B contain only monotonically increasing CC values.  That
+      is, if x(i) and x(i+1) are CC values carried in two successive
+      initial <SYN> segments from the same host, then x(i+1) > x(i).
+      Assuming the requirement [R1], the CC space cannot wrap within the
+      range of segments that can be outstanding at one time.  Therefore,
+      those successive <SYN> segments from a given host that have not
+      exceeded their MSL must contain an ordered set of CC values:
+
+             x(1) < x(2) < x(3) ... < x(n),
+
+      where the modular comparisons have been replaced by simple
+      arithmetic comparisons. Here x(n) is the most recent acceptable
+      <SYN>, which is cached by the server.  If the server host receives
+      a <SYN> segment containing a CC option with value y where y >
+      x(n), that <SYN> must be newer; an antique duplicate SYN with CC
+      value greater than x(n) must have exceeded its MSL and vanished.
+      Hence, monotonic CC values and the TAO test prevent erroneous
+      replay of antique <SYN>s.
+
+      There are two possible reasons for a client to generate non-
+      monotonic CC values: (a) the client may have crashed and
+      restarted, causing the generated CC values to jump backwards; or
+      (b) the generated CC values may have wrapped around the finite
+      space.  Wraparound may occur because CC generation is global to
+      all connections.  Suppose that host A sends a transaction to B,
+      then sends more than 2**31 transactions to other hosts, and
+      finally sends another transaction to B.  From B's viewpoint, CC
+      will have jumped backward relative to its cached value.
+
+      In either of these two cases, the server may see the CC value jump
+      backwards only after an interval of at least MSL since the last
+      <SYN> segment from the same client host.  In case (a), client host
+      restart, this is because T/TCP retains TCP's explicit "Quiet Time"
+      of an MSL interval [STD-007].  In case (b). wrap around, [R1]
+      ensures that a time of at least MSL must have passed before the CC
+      space wraps around.  Hence, there is no possibility that a TAO
+      test will succeed erroneously due to either cause of non-
+      monotonicity; i.e., there is no chance of replays due to TAO.
+
+      However, although CC values jumping backwards will not cause an
+      error, it may cause a performance degradation due to unnecessary
+      3WHS's.  This results from the generated CC values jumping
+      backwards through approximately half their range, so that all
+      succeeding TAO tests fail until the generated CC values catch up
+
+
+
+Braden                                                          [Page 9]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+      to the cached value.  To avoid this degradation, a client host
+      sends a CC.NEW option instead of a CC option in the case of either
+      system restart or CC wraparound.  Receiving CC.NEW forces a 3WHS,
+      but when this 3WHS completes successfully the server cache is
+      updated to the new CC value.  To detect CC wraparound, the client
+      must cache the last CC value it sent to each server.  It therefore
+      maintains cache.CCsent[B] for each server B.  If this cached value
+      is undefined or if it is larger than the next CC value generated
+      at the client, then the client sends a CC.NEW instead of a CC
+      option in the next SYN segment.
+
+      This is illustrated in Figure 4, which shows the scenario for the
+      first transaction from A to B after the client host A has crashed
+      and recovered.  A similar sequence occurs if x is not greater than
+      cache.CCsent[B], i.e., if there is a wraparound of the generated
+      CC values.  Because segment #1 contains a CC.NEW option, the
+      server host invalidates the cache entry and does a 3WHS; however,
+      it still sets B's TCB.CCrecv for this connection to x.  TCP B uses
+      this CCrecv value to validate the <ACK> segment (#3) that
+      completes the 3WHS.  Receipt of this segment updates cache.CC[A],
+      since the cache entry was previously undefined.  (If a 3WHS always
+      updated the cache, then out-of-order SYN segments could cause the
+      cached value to jump backwards, possibly allowing replays).
+      Finally, the CC.ECHO option in the <SYN,ACK> segment #2 defines
+      A's cache.CCsent entry.
+
+      This algorithm delays updating cache.CCsent[] until the <SYN> has
+      been ACK'd.  This allows the undefined cache.CCsent value to used
+      as a a "first-time switch" to reliable resynchronization of the
+      cached value at the server after a crash or wraparound.
+
+      When we use the term "cache", we imply that the value can be
+      discarded at any time without introducing erroneous behavior
+      although it may degrade performance.
+
+      (a)  If a server host receives an initial <SYN> from client A but
+           has no cached value cache.CC[A], the server simply forces a
+           3WHS to validate the <SYN> segment.
+
+      (b)  If a client host has no cached value cache.CCsent[B] when it
+           needs to send an initial <SYN> segment, the client simply
+           sends a CC.NEW option in the segment.  This forces a 3WHS at
+           the server.
+
+
+
+
+
+
+
+
+Braden                                                         [Page 10]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+          TCP A  (Client)                                TCP B (Server)
+          _______________                                ______________
+
+          cache.CCsent[B]                                   cache.CC[A]
+              V                                                  V
+
+        (Crash and restart)
+            [ ?? ]                                            [ x0 ]
+
+        #1         --> <SYN, data1,CC.NEW=x> -->      (invalidate cache;
+                                                            queue data1;
+                                                        3-way handshake)
+
+            [ ?? ]                                            [ ?? ]
+        #2          <-- <SYN, ACK(data1),CC=y,CC.ECHO=x> <--
+          (cache.CCsent[B]= x;)
+
+            [ x ]                                             [ ?? ]
+
+        #3                  --> <ACK(SYN),CC=x> -->       data1->user_B;
+                                                         cache.CC[A]= x;
+
+            [ x ]                                              [ x ]
+
+                      Figure 4.  Client Host Restarting
+
+
+      So far, we have considered only correctness of the TAO mechanism
+      for bypassing the 3WHS.  We must also protect a connection against
+      antique duplicate non-SYN segments.  In standard TCP, such
+      protection is one of the functions of the TIME-WAIT state delay.
+      (The other function is the TCP full-duplex close semantics, which
+      we need to preserve; that is discussed below in Section 2.5).  In
+      order to achieve a high rate of transaction processing, it must be
+      possible to truncate this TIME-WAIT state delay without exposure
+      to antique duplicate segments [RFC-1379].
+
+      For short connections (e.g., transactions), the CC values assigned
+      to each direction of the connection can be used to protect against
+      antique duplicate non-SYN segments.  Here we define "short" as a
+      duration less than MSL.  Suppose that there is a connection that
+      uses the CC values TCB.CCsend = x and TCB.CCrecv = y.  By the
+      requirement [R1], neither x nor y can be reused for a new
+      connection from the same remote host for a time at least 2*MSL.
+      If the connection has been in existence for a time less than MSL,
+      then its CC values will not be reused for a period that exceeds
+      MSL, and therefore all antique duplicates with that CC value must
+      vanish before it is reused.  Thus, for "short" connections we can
+
+
+
+Braden                                                         [Page 11]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+      guard against antique non-SYN segments by simply checking the CC
+      value in the segment againsts TCB.CCrecv.  Note that this check
+      does not use the monotonic property of the CC values, only that
+      they not cycle in less than 2*MSL.  Again, the quiet time at
+      system restart protects against errors due to crash with loss of
+      state.
+
+      If the connection duration exceeds MSL, safety from old duplicates
+      still requires a TIME-WAIT delay of 2*MSL.  Thus, truncation of
+      TIME-WAIT state is only possible for short connections.  (This
+      problem has also been noticed by Shankar and Lee [ShankarLee93]).
+      This difference in behavior for long and for short connections
+      does create a slightly complex service model for applications
+      using T/TCP.  An application has two different strategies for
+      multiple connections.  For "short" connections, it should use a
+      fixed port pair and use the T/TCP mechanism to get rapid and
+      efficient transaction processing.  For connections whose durations
+      are of the order of MSL or longer, it should use a different user
+      port for each successive connection, as is the current practice
+      with unmodified TCP.  The latter strategy will cause excessive
+      overhead (due to TCB's in TIME-WAIT state) if it is applied to
+      high-frequency short connections.  If an application makes the
+      wrong choice, its attempt to open a new connection may fail with a
+      "busy" error.  If connection durations may range between long and
+      short, an application may have to be able to switch strategies
+      when one fails.
+
+   2.4  Truncating TIME-WAIT State
+
+      Truncation of TIME-WAIT state is necessary to achieve high
+      transaction rates.  As Figure 2 illustrates, a standard
+      transaction leaves the client end of the connection in TIME-WAIT
+      state.  This section explains the protocol implications of
+      truncating TIME-WAIT state, when it is allowed (i.e., when the
+      connection has been in existence for less than MSL).  In this
+      case, the client host should be able to interrupt TIME-WAIT state
+      to initiate a new incarnation of the same connection (i.e., using
+      the same host and ports).  This will send an initial <SYN>
+      segment.
+
+      It is possible for the new <SYN> to arrive at the server before
+      the retransmission state from the previous incarnation is gone, as
+      shown in Figure 5.  Here the final <ACK> (segment #3) from the
+      previous incarnation is lost, leaving retransmission state at B.
+      However, the client received segment #2 and thinks the transaction
+      completed successfully, so it can initiate a new transaction by
+      sending <SYN> segment #4.  When this <SYN> arrives at the server
+      host, it must implicitly acknowledge segment #2, signalling
+
+
+
+Braden                                                         [Page 12]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+      success to the server application, deleting the old TCB, and
+      creating a new TCB, as shown in Figure 5.  Still assuming that the
+      new <SYN> is known to be valid, the server host marks the new
+      connection half-synchronized and delivers data3 to the server
+      application.  (The details of how this is accomplished are
+      presented in Section 3.3.)
+
+      The earlier discussion of the TAO mechanism assumed that the
+      previous incarnation was closed before a new <SYN> arrived at the
+      server.  However, TAO cannot be used to validate the <SYN> if
+      there is still state from the previous incarnation, as shown in
+      Figure 5; in this case, it would be exceedingly awkward to perform
+      a 3WHS if the TAO test should fail.  Fortunately, a modified
+      version of the TAO test can still be performed, using the state in
+      the earlier TCB rather than the cached state.
+
+      (A)  If the <SYN> segment contains a CC or CC.NEW option, the
+           value SEG.CC from this option is compared with TCB.CCrecv,
+           the CC value in the still-existing state block of the
+           previous incarnation.  If SEG.CC > TCB.CCrecv, the new <SYN>
+           segment must be valid.
+
+      (B)  Otherwise, the <SYN> is an old duplicate and is simply
+           discarded.
+
+      Truncating TIME-WAIT state may be looked upon as composing an
+      extended state machine that joins the state machines of the two
+      incarnations, old and new.  It may be described by introducing new
+      intermediate states (which we call I-states), with transitions
+      that join the two diagrams and share some state from each.  I-
+      states are detailed in Section 3.3.
+
+      Notice also segment #2' in Figure 5.  TCP's mechanism to recover
+      from half-open connections (see Figure 10 of [STD-007]) cause TCP
+      A to send a RST when 2' arrives, which would incorrectly make B
+      think that the previous transaction did not complete successfully.
+      The half-open recovery mechanism must be defeated in this case, by
+      A ignoring segment #2'.
+
+
+
+
+
+
+
+
+
+
+
+
+
+Braden                                                         [Page 13]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+
+      TCP A  (Client)                                     TCP B (Server)
+      _______________                                     ______________
+
+      CLOSED                                                      LISTEN
+
+  #1                --> <...,FIN,CC=x> -->                     LAST-ACK*
+
+  #2         <-- <...ACK(FIN),data2,FIN,CC=y,CC.ECHO=x>  <---  LAST-ACK*
+      TIME-WAIT
+    (data2->user_A)
+
+
+  #3  TIME-WAIT          --> <ACK(FIN),CC=x> --> X (DROP)
+
+      (New Active Open)                           (New Passive Open)
+
+  #4  SYN-SENT*    -->  <SYN, data3,CC=z> ...
+
+                                                               LISTEN-LA
+  #2' (discard) <-- <...ACK(FIN),data2,FIN,CC=y> <--- (retransmit)
+
+  #4  SYN-SENT*        ... <SYN,data3,CC=z> -->            ESTABLISHED*
+                                                    SYN OK (see text) =>
+                                                            {Ack seg #2;
+                                                         Delete old TCB;
+                                                         Create new TCB;
+                                                        data3 -> user_B;
+                                                        cache.CC[A]= z;}
+
+        Figure 5: Truncating TIME-WAIT State: SYN as Implicit ACK
+
+
+   2.5  Transition to Standard TCP Operation
+
+      T/TCP includes all normal TCP semantics, and it will continue to
+      operate exactly like TCP when the particular assumptions for
+      transactions do not hold.  There is no limit on the size of an
+      individual transaction, and behavior of T/TCP should merge
+      seamlessly from pure transaction operation as shown in Figure 2,
+      to pure streaming mode for sending large files.  All the sequences
+      shown in [STD-007] are still valid, and the inherent symmetry of
+      TCP is preserved.
+
+      Figure 6 shows a possible sequence when the request and response
+      messages each require two segments.  Segment #2 is a non-SYN
+      segment that contains a TCP option.  To avoid compatibility
+      problems with existing TCP implementations, the client side should
+
+
+
+Braden                                                         [Page 14]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+      send segment #2 only if cache.CCsent[B] is defined, i.e., only if
+      host A knows that host B plays the new game.
+
+
+
+          TCP A  (Client)                                 TCP B (Server)
+          _______________                                 ______________
+
+          CLOSED                                                  LISTEN
+
+
+       #1  SYN-SENT*       --> <SYN,data1,CC=x>  -->        ESTABLISHED*
+                                                       (TAO test OK =>
+                                                        data1-> user)
+
+       #2  SYN-SENT*       --> <data2,FIN,CC=x>  -->         CLOSE-WAIT*
+                                                       (data2-> user)
+
+                                                             CLOSE-WAIT*
+       #3  FIN-WAIT-2  <-- <SYN,ACK(FIN),data3,CC=y,CC.ECHO=x> <--
+            (data3->user)
+
+       #4  TIME_WAIT   <-- <ACK(FIN),data4,FIN,CC=y> <--       LAST-ACK*
+            (data4->user)
+
+       #5  TIME-WAIT       --> <ACK(FIN),CC=x> -->                CLOSED
+
+
+            Figure 6. Multi-Packet Request/Response Sequence
+
+      Figure 7 shows a more complex example, one possible sequence with
+      TAO combined with simultaneous open and close.  This may be
+      compared with Figure 8 of [STD-007].
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Braden                                                         [Page 15]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+
+          TCP A                                                    TCP B
+          _______________                                 ______________
+
+          CLOSED                                                  CLOSED
+
+      #1  SYN-SENT*         --> <SYN,data1,FIN,CC=x> ...
+
+      #2  CLOSING*     <-- <SYN,data2,FIN,CC=y> <--            SYN-SENT*
+          (TAO test OK =>
+           data2->user_A
+
+      #3  CLOSING*      --> <FIN,ACK(FIN),CC=x,CC.ECHO=y> ...
+
+      #1'                       ... <SYN,data1,FIN,CC=x> -->    CLOSING*
+                                                       (TAO test OK =>
+                                                        data1->user_B)
+
+      #4  TIME-WAIT   <-- <FIN,ACK(FIN),CC=y,CC.ECHO=x> <--     CLOSING*
+
+      #5  TIME-WAIT    --> <ACK(FIN),CC=x> ...
+
+      #3'              ... <FIN,ACK(FIN),CC=x,CC.ECHO=y> -->   TIME-WAIT
+
+      #6  TIME-WAIT            <-- <ACK(FIN),CC=y> <---        TIME-WAIT
+
+      #5' TIME-WAIT               ... <ACK(FIN),CC=x> -->      TIME-WAIT
+
+          (timeout)                                            (timeout)
+            CLOSED                                                CLOSED
+
+                  Figure 7: Simultaneous Open and Close
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Braden                                                         [Page 16]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+3.  FUNCTIONAL SPECIFICATION
+
+   3.1  Data Structures
+
+      A connection count is an unsigned 32-bit integer, with the value
+      zero excluded.  Zero is used to denote an undefined value.
+
+      A host maintains a global connection count variable CCgen, and
+      each connection control block (TCB) contains two new connection
+      count variables, TCB.CCsend and TCB.CCrecv.  Whenever a TCB is
+      created for the active or passive end of a new connection, CCgen
+      is incremented by 1 and placed in TCB.CCsend of the TCB; however,
+      if the previous CCgen value was 0xffffffff (-1), then the next
+      value should be 1.  TCB.CCrecv is initialized to zero (undefined).
+
+      T/TCP adds a per-host cache to TCP.  An entry in this cache for
+      foreign host fh includes two CC values, cache.CC[fh] and
+      cache.CCsent[fh].  It may include other values, as discussed in
+      Sections 4.3 and 4.4.  According to [STD-007], a TCP is not
+      permitted to send a segment larger than the default size 536,
+      unless it has received a larger value in an MSS (Maximum Segment
+      Size) option.  This could constrain the client to use the default
+      MSS of 536 bytes for every request.  To avoid this constraint, a
+      T/TCP may cache the MSS option values received from remote hosts,
+      and we allow a TCP to use a cached MSS option value for the
+      initial SYN segment.
+
+      When the client sends an initial <SYN> segment containing data, it
+      does not have a send window for the server host.  This is not a
+      great difficulty; we simply define a default initial window; our
+      current suggestion is 4K.  Such a non-zero default should be be
+      conditioned upon the existence of a cached connection count for
+      the foreign host, so that data may be included on an initial SYN
+      segment only if cache.CC[foreign host] is non-zero.
+
+      In TCP, the window is dynamically adjusted to provide congestion
+      control/avoidance [Jacobson88].  It is possible that a particular
+      path might not be able to absorb an initial burst of 4096 bytes
+      without congestive losses.  If this turns out to be a problem, it
+      should be possible to cache the congestion threshold for the path
+      and use this value to determine the maximum size of the initial
+      packet burst created by a request.
+
+   3.2  New TCP Options
+
+      Three new TCP options are defined: CC, CC.NEW, and CC.ECHO.  Each
+      carries a connection count SEG.CC.  The complete rules for sending
+      and processing these options are given in Section 3.4 below.
+
+
+
+Braden                                                         [Page 17]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+      CC Option
+
+         Kind: 11
+
+         Length: 6
+
+            +--------+--------+--------+--------+--------+--------+
+            |00001011|00000110|    Connection Count:  SEG.CC      |
+            +--------+--------+--------+--------+--------+--------+
+             Kind=11  Length=6
+
+         This option may be sent in an initial SYN segment, and it may
+         be sent in other segments if a CC or CC.NEW option has been
+         received for this incarnation of the connection.  Its SEG.CC
+         value is the TCB.CCsend value from the sender's TCB.
+
+      CC.NEW Option
+
+         Kind: 12
+
+         Length: 6
+
+            +--------+--------+--------+--------+--------+--------+
+            |00001100|00000110|    Connection Count:  SEG.CC      |
+            +--------+--------+--------+--------+--------+--------+
+             Kind=12  Length=6
+
+         This option may be sent instead of a CC option in an initial
+         <SYN> segment (i.e., SYN but not ACK bit), to indicate that the
+         SEG.CC value may not be larger than the previous value.  Its
+         SEG.CC value is the TCB.CCsend value from the sender's TCB.
+
+      CC.ECHO Option
+
+         Kind: 13
+
+         Length: 6
+
+            +--------+--------+--------+--------+--------+--------+
+            |00001101|00000110|    Connection Count:  SEG.CC      |
+            +--------+--------+--------+--------+--------+--------+
+             Kind=13  Length=6
+
+         This option must be sent (in addition to a CC option) in a
+         segment containing both a SYN and an ACK bit, if the initial
+         SYN segment contained a CC or CC.NEW option.  Its SEG.CC value
+         is the SEG.CC value from the initial SYN.
+
+
+
+
+Braden                                                         [Page 18]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+         A CC.ECHO option should be sent only in a <SYN,ACK> segment and
+         should be ignored if it is received in any other segment.
+
+   3.3  Connection States
+
+      T/TCP requires new connection states and state transitions.
+      Figure 8 shows the resulting finite state machine; see [RFC-1379]
+      for a detailed development.  If all state names ending in stars
+      are removed from Figure 8, the state diagram reduces to the
+      standard TCP state machine (see Figure 6 of [STD-007]), with two
+      exceptions:
+
+      *    STD-007 shows a direct transition from SYN-RECEIVED to FIN-
+           WAIT-1 state when the user issues a CLOSE call.  This
+           transition is suspect; a more accurate description of the
+           state machine would seem to require the intermediate SYN-
+           RECEIVED* state shown in Figure 8.
+
+      *    In STD-007, a user CLOSE call in SYN-SENT state causes a
+           direct transition to CLOSED state.  The extended diagram of
+           Figure 8 forces the connection to open before it closes,
+           since calling CLOSE to terminate the request in SYN-SENT
+           state is normal behavior for a transaction client.  In the
+           case that no data has been sent in SYN-SENT state, it is
+           reasonable for a user CLOSE call to immediately enter CLOSED
+           state and delete the TCB.
+
+      Each of the new states in Figure 8 bears a starred name, created
+      by suffixing a star onto a standard TCP state.  Each "starred"
+      state bears a simple relationship to the corresponding "unstarred"
+      state.
+
+      o    SYN-SENT* and SYN-RECEIVED* differ from the SYN-SENT and
+           SYN-RECEIVED state, respectively, in recording the fact that
+           a FIN needs to be sent.
+
+      o    The other starred states indicate that the connection is
+           half-synchronized (hence, a SYN bit needs to be sent).
+
+
+
+
+
+
+
+
+
+
+
+
+
+Braden                                                         [Page 19]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+      ________      g        ________
+     |        |<------------|        |
+     | CLOSED |------------>| LISTEN |
+     |________|  h    ------|________|
+          |          /        |     |
+          |         /        i|    j|
+          |        /          |     |
+         a|     a'/           |    _V______               ________
+          |      /     j      |   |ESTAB-  |       e'    | CLOSE- |
+          |     /  -----------|-->| LISHED*|------------>|   WAIT*|
+          |    /  /           |   |________|             |________|
+          |   /  /            |    |     |                |     |
+          |  /  /             |    |    c|              d'|    c|
+      ____V_V_ /       _______V    |   __V_____           |   __V_____
+     | SYN-   |   b'  |  SYN-  |c  |  |ESTAB-  |  e       |  | CLOSE- |
+     |   SENT |------>|RECEIVED|---|->|  LISHED|----------|->|   WAIT |
+     |________|       |________|   |  |________|          |  |________|
+        |               |          |     |                |        |
+        |               |          |     |              __V_____   |
+        |               |          |     |             | LAST-  |  |
+      d'|             d'|        d'|    d|             |  ACK*  |  |
+        |               |          |     |             |________|  |
+        |               |          |     |                    |    |
+        |               |    ______V_    |        ________    |c'  |d
+        |          k    |   |  FIN-  |   |  e''' |        |   |    |
+        |        -------|-->| WAIT-1*|---|------>|CLOSING*|   |    |
+        |       /       |   |________|   |       |________|   |    |
+        |      /        |          |     |            |       |    |
+        |     /         |        c'|     |          c'|       |    |
+     ___V___ /      ____V___       V_____V_       ____V___    V____V__
+    | SYN-   | b'' |  SYN-  |  c  |  FIN-  | e'' |        |  | LAST-  |
+    |  SENT* |---->|RECEIVD*|---->| WAIT-1 |---->|CLOSING |  |   ACK  |
+    |________|     |________|     |________|     |________|  |________|
+                                        |               |           |
+                                       f|              f|         f'|
+                                     ___V____       ____V___     ___V____
+                                    |  FIN-  | e   |TIME-   | T |        |
+                                    | WAIT-2 |---->|   WAIT |-->| CLOSED |
+                                    |________|     |________|   |________|
+
+
+                 Figure 8A: Basic T/TCP State Diagram
+
+
+
+
+
+
+
+
+
+Braden                                                         [Page 20]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+    ________________________________________________________________
+   |                                                                |
+   |        Label          Event / Action                           |
+   |        _____          ________________________                 |
+   |                                                                |
+   |          a            Active OPEN / create TCB, snd SYN        |
+   |          a'           Active OPEN / snd SYN                    |
+   |          b            rcv SYN [no TAO]/ snd ACK(SYN)           |
+   |          b'           rcv SYN [no TAO]/ snd SYN,ACK(SYN)       |
+   |          b''          rcv SYN [no TAO]/ snd SYN,FIN,ACK(SYN)   |
+   |          c            rcv ACK(SYN) /                           |
+   |          c'           rcv ACK(SYN) / snd FIN                   |
+   |          d            CLOSE / snd FIN                          |
+   |          d'           CLOSE / snd SYN,FIN                      |
+   |          e            rcv FIN / snd ACK(FIN)                   |
+   |          e'           rcv FIN / snd SYN,ACK(FIN)               |
+   |          e''          rcv FIN / snd FIN,ACK(FIN)               |
+   |          e'''         rcv FIN / snd SYN,FIN,ACK(FIN)           |
+   |          f            rcv ACK(FIN) /                           |
+   |          f'           rcv ACK(FIN) / delete TCB                |
+   |          g            CLOSE / delete TCB                       |
+   |          h            passive OPEN / create TCB                |
+   |          i (= b')     rcv SYN [no TAO]/ snd SYN,ACK(SYN)       |
+   |          j            rcv SYN [TAO OK] / snd SYN,ACK(SYN)      |
+   |          k            rcv SYN [TAO OK] / snd SYN,FIN,ACK(SYN)  |
+   |          T            timeout=2MSL / delete TCB                |
+   |                                                                |
+   |                                                                |
+   |          Figure 8B.  Definition of State Transitions           |
+   |________________________________________________________________|
+
+      This simple correspondence leads to an alternative state model,
+      which makes it easy to incorporate the new states in an existing
+      implementation.  Each state in the extended FSM is defined by the
+      triplet:
+
+          (old_state, SENDSYN, SENDFIN)
+
+      where 'old_state' is a standard TCP state and SENDFIN and SENDSYN
+      are Boolean flags see Figure 9.  The SENDFIN flag is turned on (on
+      the client side) by a SEND(...  EOF=YES) call, to indicate that a
+      FIN should be sent in a state which would not otherwise send a
+      FIN.  The SENDSYN flag is turned on when the TAO test succeeds to
+      indicate that the connection is only half synchronized; as a
+      result, a SYN will be sent in a state which would not otherwise
+      send a SYN.
+
+
+
+
+
+Braden                                                         [Page 21]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+       ________________________________________________________________
+      |                                                                |
+      |   New state:         Old_state:    SENDSYN:      SENDFIN:      |
+      |  __________         __________      ______        ______       |
+      |                                                                |
+      |  SYN-SENT*     =>   SYN-SENT        FALSE          TRUE        |
+      |                                                                |
+      |  SYN-RECEIVED* =>   SYN-RECEIVED    FALSE          TRUE        |
+      |                                                                |
+      |  ESTABLISHED*  =>   ESTABLISHED      TRUE         FALSE        |
+      |                                                                |
+      |  CLOSE-WAIT*   =>   CLOSE-WAIT       TRUE         FALSE        |
+      |                                                                |
+      |  LAST-ACK*     =>   LAST-ACK         TRUE         FALSE        |
+      |                                                                |
+      |  FIN-WAIT-1*   =>   FIN-WAIT-1       TRUE         FALSE        |
+      |                                                                |
+      |  CLOSING*      =>   CLOSING          TRUE         FALSE        |
+      |                                                                |
+      |                                                                |
+      |           Figure 9: Alternative State Definitions              |
+      |________________________________________________________________|
+
+
+      Here is a more complete description of these boolean variables.
+
+      *    SENDFIN
+
+           SENDFIN is turned on by the SEND(...EOF=YES) call, and turned
+           off when FIN-WAIT-1 state is entered.  It may only be on in
+           SYN-SENT* and SYN-RECEIVED* states.
+
+           SENDFIN has two effects.  First, it causes a FIN to be sent
+           on the last segment of data from the user.  Second, it causes
+           the SYN-SENT[*] and SYN-RECEIVED[*] states to transition
+           directly to FIN-WAIT-1, skipping ESTABLISHED state.
+
+      *    SENDSYN
+
+           The SENDSYN flag is turned on when an initial SYN segment is
+           received and passes the TAO test.  SENDSYN is turned off when
+           the SYN is acknowledged (specifically, when there is no RST
+           or SYN bit and SEG.UNA < SND.ACK).
+
+           SENDSYN has three effects.  First, it causes the SYN bit to
+           be set in segments sent with the initial sequence number
+           (ISN).  Second, it causes a transition directly from LISTEN
+           state to ESTABLISHED*, if there is no FIN bit, or otherwise
+
+
+
+Braden                                                         [Page 22]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+           to CLOSE-WAIT*.  Finally, it allows data to be received and
+           processed (passed to the application) even if the segment
+           does not contain an ACK bit.
+
+      According to the state model of the basic TCP specification [STD-
+      007], the server side must explicitly issued a passive OPEN call,
+      creating a TCB in LISTEN state, before an initial SYN may be
+      accepted.  To accommodate truncation of TIME-WAIT state within
+      this model, it is necessary to add the five "I-states" shown in
+      Figure 10.  The I-states are:  LISTEN-LA, LISTEN-LA*, LISTEN-CL,
+      LISTEN-CL*, and LISTEN-TW.  These are 'bridge states' between two
+      successive the state diagrams of two successive incarnations.
+      Here D is the duration of the previous connection, i.e., the
+      elapsed time since the connection opened.  The transitions labeled
+      with lower-case letters are taken from Figure 8.
+
+      Fortunately, many TCP implementations have a different user
+      interface model, in which the use can issue a generic passive open
+      ("listen") call; thereafter, when a matching initial SYN arrives,
+      a new TCB in LISTEN state is automatically generated.  With this
+      user model, the I-states of Figure 10 are unnecessary.
+
+      For example, suppose an initial SYN segment arrives for a
+      connection that is in LAST-ACK state.  If this segment carries a
+      CC option and if SEG.CC is greater than TCB.CCrecv in the existing
+      TCB, the "q" transition shown in Figure 10 can be made directly
+      from the LAST-ACK state.  That is, the previous TCB is processed
+      as if an ACK(FIN) had arrived, causing the user to be notified of
+      a successful CLOSE and the TCB to be deleted.  Then processing of
+      the new SYN segment is repeated, using a new TCB that is generated
+      automatically.  The same principle can be used to avoid
+      implementing any of the I-states.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Braden                                                         [Page 23]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+ ______________________________
+| P: Passive OPEN /            |
+|                              |
+| Q: Rcv SYN, special TAO test |                     d'|     d|
+|     (see text) / Delete TCB, |    ________        ___V____  |
+|     create TCB, snd SYN      |   |LISTEN- |  P   | LAST-  | |
+|                              |   |   LA*  |<-----|  ACK*  | |
+| Q': (same as Q) if D < MSL   |   |________|      |________| |
+|                              |    |     |            |      |
+| R: Rcv ACK(FIN) / Delete TCB,|   Q|   c'|          c'|      |
+|     create TCB               |    |     |            |      |
+|                              |    |  ___V____        V______V
+| S': Active OPEN if D < MSL / |    | |LISTEN- |  P   | LAST-  |
+|     Delete TCB, create TCB,  |    | |  LA    |<-----|   ACK  |
+|     snd SYN.                 |    | |________|      |________|
+|______________________________|    |  |     |            |
+                                    | Q|    R|           f|
+         ________        ________   |  |     |            |
+   e''' |        |  P   |LISTEN- |  |  |     V            V
+   ---->|CLOSING*|----->|   CL*  |  |  |   LISTEN       CLOSED
+        |________|      |________|  |  |
+             |            |   Q|    |  |
+           c'|          c'|    V    V  V
+             |            |   ESTABLISHED*
+         ____V___         V_______
+    e'' |        |  P    |LISTEN- |
+   ---->|CLOSING |------>|   CL   |
+        |________|       |________|
+             |           R|     Q|
+            f|            V      V
+             |         LISTEN   ESTABLISHED*
+         ____V___                _________
+     e  |TIME-   |  P           | LISTEN- |
+   ---->|   WAIT |------------->|    TW   |
+        |________|              |_________|
+        /     |                  |    |  |
+     S'/     T|                 T|  Q'|  |S'
+      |  _____V_      h     _____V__  |  V
+      | |        |-------->|        | |  SYN-SENT
+      | | CLOSED |<--------| LISTEN | |
+      | |________|   ------|________| |
+      |   |        /        |   j|    |
+      |  a|     a'/        i|    V    V
+      |   |      /          |   ESTABLISHED*
+      V   V     V           V
+        SYN-SENT           ...
+
+             Figure 10: I-States for TIME-WAIT Truncation
+
+
+
+Braden                                                         [Page 24]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+   3.4  T/TCP Processing Rules
+
+      This section summarizes the rules for sending and processing the
+      T/TCP options.
+
+      INITIALIZATION
+
+         I1:  All cache entries cache.CC[*] and cache.CCsent[*] are
+              undefined (zero) when a host system initializes, and CCgen
+              is set to a non-zero value.
+
+         I2:  A new TCB is initialized with TCB.CCrecv = 0 and
+              TCB.CCsend = current CCgen value; CCgen is then
+              incremented.  If the result is zero, CCgen is incremented
+              again.
+
+
+      SENDING SEGMENTS
+
+         S1:  Sending initial <SYN> Segment
+
+              An initial <SYN> segment is sent with either a CC option
+              or a CC.NEW option.  If cache.CCsent[fh] is undefined or
+              if TCB.CCsend < cache.CCsent[fh], then the option
+              CC.NEW(TCB.CCsend) is sent and cache.CCsent[fh] is set to
+              zero.  Otherwise, the option CC(TCB.CCsend) is sent and
+              cache.CCsent[fh] is set to CCsend.
+
+         S2:  Sending <SYN,ACK> Segment
+
+              If the sender's TCB.CCrecv is non-zero, then a <SYN,ACK>
+              segment is sent with both a CC(TCB.CCsend) option and a
+              CC.ECHO (TCB.CCrecv) option.
+
+         S3:  Sending Non-SYN Segment
+
+              A non-SYN segment is sent with a CC(TCB.CCsend) option if
+              the TCB.CCrecv value is non-zero, or if the state is SYN-
+              SENT or SYN-SENT* and cache.CCsent[fh] is non-zero (this
+              last is required to send CC options in the segments
+              following the first of a multi-segment request message;
+              see segment #2 in Figure 6).
+
+      RECEIVING INITIAL <SYN> SEGMENT
+
+         Suppose that a server host receives a segment containing a SYN
+         bit but no ACK bit in LISTEN, SYN-SENT, or SYN-SENT* state.
+
+
+
+
+Braden                                                         [Page 25]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+         R1.1:If the <SYN> segment contains a CC or CC.NEW option,
+              SEG.CC is stored into TCB.CCrecv of the new TCB.
+
+         R1.2:If the segment contains a CC option and if the local cache
+              entry cache.CC[fh] is defined and if
+              SEG.CC > cache.CC[fh], then the TAO test is passed and the
+              connection is half-synchronized in the incoming direction.
+              The server host replaces the cache.CC[fh] value by SEG.CC,
+              passes any data in the segment to the user, and processes
+              a FIN bit if present.
+
+              Acknowledgment of the SYN is delayed to allow piggybacking
+              on a response segment.
+
+         R1.3:If SEG.CC <= cache.CC[fh] (the TAO test has failed), or if
+              cache.CC[fh] is undefined, or if there is no CC option
+              (but possibly a CC.NEW option), the server host proceeds
+              with normal TCP processing.  If the connection was in
+              LISTEN state, then the host executes a 3-way handshake
+              using the standard TCP rules.  In the SYN-SENT or SYN-
+              SENT* state (i.e., the simultaneous open case), the TCP
+              sends ACK(SYN) and enters SYN-RECEIVED state.
+
+         R1.4:If there is no CC option (but possibly a CC.NEW option),
+              then the server host sets cache.CC[fh] undefined (zero).
+              Receiving an ACK for a SYN (following application of rule
+              R1.3) will update cache.CC[fh], by rule R3.
+
+         Suppose that an initial <SYN> segment containing a CC or CC.NEW
+         option arrives in an I-state (i.e., a state with a name of the
+         form 'LISTEN-xx', where xx is one of TW, LA, L8, CL, or CL*):
+
+         R1.5:If the state is LISTEN-TW, then the duration of the
+              current connection is compared with MSL.  If duration >
+              MSL then send a RST:
+
+                <SEQ=0><ACK=SEG.SEQ+SEG.LEN><CTL=RST,ACK>
+
+              drop the packet, and return.
+
+         R1.6:Perform a special TAO test: compare SEG.CC with
+              TCB.CCrecv.
+
+              If SEG.CC is greater, then processing is performed as if
+              an ACK(FIN) had arrived:  signal the application that the
+              previous close completed successfully and delete the
+              previous TCB.  Then create a new TCB in LISTEN state and
+              reprocess the SYN segment against the new TCB.
+
+
+
+Braden                                                         [Page 26]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+              Otherwise, silently discard the segment.
+
+      RECEIVING <SYN,ACK> SEGMENT
+
+         Suppose that a client host receives a <SYN,ACK> segment for a
+         connection in SYN-SENT or SYN-SENT* state.
+
+         R2.1:If SEG.ACK is not acceptable (see [STD-007]) and
+              cache.CCsent[fh] is non-zero, then simply drop the segment
+              without sending a RST.  (The new SYN that the client is
+              (re-)transmitting will eventually acknowledge any
+              outstanding data and FIN at the server.)
+
+         R2.2:If the segment contains a CC.ECHO option whose SEG.CC is
+              different from TCB.CCsend, then the segment is
+              unacceptable and is dropped.
+
+         R2.3:If cache.CCsent[fh] is zero, then it is set to TCB.CCsend.
+
+         R2.4:If the segment contains a CC option, its SEG.CC is stored
+              into TCB.CCrecv of the TCB.
+
+      RECEIVING <ACK> SEGMENT IN SYN-RECEIVED STATE
+
+         R3.1:If a segment contains a CC option whose SEG.CC differs
+              from TCB.CCrecv, then the segment is unacceptable and is
+              dropped.
+
+         R3.2:Otherwise, a 3-way handshake has completed successfully at
+              the server side.  If the segment contains a CC option and
+              if cache.CC[fh] is zero, then cache.CC[fh] is replaced by
+              TCB.CCrecv.
+
+      RECEIVING OTHER SEGMENT
+
+         R4:  Any other segment received with a CC option is
+              unacceptable if SEG.CC differs from TCB.CCrecv.  However,
+              a RST segment is exempted from this test.
+
+      OPEN REQUEST
+
+         To allow truncation of TIME-WAIT state, the following changes
+         are made in the state diagram for OPEN requests (see Figure
+         10):
+
+         O1.1:A new passive open request is allowed in any of the
+              states: LAST-ACK, LAST-ACK*, CLOSING, CLOSING*, or TIME-
+              WAIT.  This causes a transition to the corresponding I-
+
+
+
+Braden                                                         [Page 27]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+              state (see Figure 10), which retains the previous state,
+              including the retransmission queue and timer.
+
+         O1.2 A new active open request is allowed in TIME-WAIT or
+              LISTEN-TW state, if the elapsed time since the current
+              connection opened is less than MSL.  The result is to
+              delete the old TCB and create a new one, send a new SYN
+              segment, and enter SYN-SENT or SYN-SENT* state (depending
+              upon whether or not the SYN segment contains a FIN bit).
+
+      Finally, T/TCP has a provision to improve performance for the case
+      of a client that "sprays" transactions rapidly using many
+      different server hosts and/or ports.  If TCB.CCrecv in the TCB is
+      non-zero (and still assuming that the connection duration is less
+      than MSL), then the TIME-WAIT delay may be set to min(K*RTO,
+      2*MSL).  Here RTO is the measured retransmission timeout time and
+      the constant K is currently specified to be 8.
+
+   3.5  User Interface
+
+      STD-007 defines a prototype user interface ("transport service")
+      that implements the virtual circuit service model [STD-007,
+      Section 3.8].  One addition to this interface in required for
+      transaction processing: a new Boolean flag "end-of-file" (EOF),
+      added to the SEND call.  A generic SEND call becomes:
+
+        Send
+
+          Format:  SEND (local connection name, buffer address,
+               byte count, PUSH flag, URGENT flag, EOF flag [,timeout])
+
+      The following text would be added to the description of SEND in
+      [STD-007]:
+
+          If the EOF (End-Of-File) flag is set, any remaining queued
+          data is pushed and the connection is closed.  Just as with the
+          CLOSE call, all data being sent is delivered reliably before
+          the close takes effect, and data may continue to be received
+          on the connection after completion of the SEND call.
+
+      Figure 8A shows a skeleton sequence of user calls by which a
+      client could initiate a transaction.  The SEND call initiates a
+      transaction request to the foreign socket (host and port)
+      specified in the passive OPEN call.  The predicate "recv_EOF"
+      tests whether or not a FIN has been received on the connection;
+      this might be implemented using the STATUS command of [STD-007],
+      or it might be implemented by some operating-system-dependent
+      mechanism.  When recv_EOF returns TRUE, the connection has been
+
+
+
+Braden                                                         [Page 28]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+      completely closed and the client end of the connection is in
+      TIME-WAIT state.
+
+     __________________________________________________________________
+    |                                                                  |
+    |                                                                  |
+    | OPEN(local_port, foreign_socket, PASSIVE) -> conn_name;          |
+    |                                                                  |
+    | SEND(conn_name, request_buffer, length,                          |
+    |                                    PUSH=YES, URG=NO, EOF=YES);   |
+    |                                                                  |
+    | while (not recv_EOF(conn_name)) {                                |
+    |                                                                  |
+    |    RECEIVE(conn_name, reply_buffer, length) -> count;            |
+    |                                                                  |
+    |    <Process reply_buffer.>                                       |
+    | }                                                                |
+    |                                                                  |
+    |                                                                  |
+    |             Figure 8A: Client Side User Interface                |
+    |__________________________________________________________________|
+
+      If a client is going to send a rapid series of such requests to
+      the same foreign_socket, it should use the same local_port for
+      all.  This will allow truncation of TIME-WAIT state.  Otherwise,
+      it could leave local_port wild, allowing TCP to choose successive
+      local ports for each call, realizing that each transaction may
+      leave behind a significant control block overhead in the kernel.
+
+      Figure 8B shows a basic sequence of server calls.  The server
+      application waits for a request to arrive and then reads and
+      processes it until a FIN arrives (recv_EOF returns TRUE).  At this
+      time, the connection is half-closed.  The SEND call used to return
+      the reply completes the close in the other direction.  It should
+      be noted that the use of SEND(... EOF=YES) in Figure 4B instead of
+      a SEND, CLOSE sequence is only an optimization; it allows
+      piggybacking the FIN in order to minimize the number of segments.
+      It should have little effect on transaction latency.
+
+
+
+
+
+
+
+
+
+
+
+
+
+Braden                                                         [Page 29]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+     __________________________________________________________________
+    |                                                                  |
+    |                                                                  |
+    | OPEN(local_port, ANY_SOCKET, PASSIVE) -> conn_name;              |
+    |                                                                  |
+    | <Wait for connection to open.>                                   |
+    |                                                                  |
+    | STATUS(conn_name) -> foreign_socket                              |
+    |                                                                  |
+    | while (not recv_EOF(conn_name)) {                                |
+    |                                                                  |
+    |    RECEIVE(conn_name, request_buffer, length) -> count;          |
+    |                                                                  |
+    |     <Process request_buffer.>                                    |
+    | }                                                                |
+    |                                                                  |
+    | <Compute reply and store into reply_buffer.>                     |
+    |                                                                  |
+    | SEND(conn_name, reply_buffer, length,                            |
+    |                                  PUSH=YES, URG=NO, EOF=YES);     |
+    |                                                                  |
+    |                                                                  |
+    |             Figure 8B: Server Side User Interface                |
+    |__________________________________________________________________|
+
+
+4.  IMPLEMENTATION ISSUES
+
+   4.1  RFC-1323 Extensions
+
+      A recently-proposed set of TCP enhancements [RFC-1323] defines a
+      Timestamps option, which carries two 32-bit timestamp values.
+      This option is used to accurately measure round-trip time (RTT).
+      The same option is also used in a procedure known as "PAWS"
+      (Protect Against Wrapped Sequence) to prevent erroneous data
+      delivery due to a combination of old duplicate segments and
+      sequence number reuse at very high bandwidths.  The approach to
+      transactions specified in this memo is independent of the RFC-1323
+      enhancements, but implementation of RFC-1323 is desirable for all
+      TCP's.
+
+      The RFC-1323 extensions share several common implementation issues
+      with the T/TCP extensions.  Both require that TCP headers carry
+      options.  Accommodating options in TCP headers requires changes in
+      the way that the maximum segment size is determined, to prevent
+      inadvertent IP fragmentation.  Both require some additional state
+      variable in the TCB, which may or may not cause implementation
+      difficulties.
+
+
+
+Braden                                                         [Page 30]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+   4.2  Minimal Packet Sequence
+
+      Most TCP implementations will require some small modifications to
+      allow the minimal packet sequence for a transaction shown in
+      Figure 2.
+
+      Many TCP implementations contain a mechanism to delay
+      acknowledgments of some subset of the data segments, to cut down
+      on the number of acknowledgment segments and to allow piggybacking
+      on the reverse data flow (typically character echoes).  To obtain
+      minimal packet exchanges for transactions, it is necessary to
+      delay the acknowledgment of some control bits, in an analogous
+      manner.  In particular, the <SYN,ACK> segment that is to be sent
+      in ESTABLISHED* or CLOSE-WAIT* state should be delayed.  Note that
+      the amount of delay is determined by the minimum RTO at the
+      transmitter; it is a parameter of the communication protocol,
+      independent of the application.  We propose to use the same delay
+      parameter (and if possible, the same mechanism) that is used for
+      delaying data acknowledgments.
+
+      To get the FIN piggy-backed on the reply data (segment #3 in
+      Figure 2), thos implementations that have an implied PUSH=YES on
+      all SEND calls will need to augment the user interface so that
+      PUSH=NO can be set for transactions.
+
+   4.3  RTT Measurement
+
+      Transactions introduce new issues into the problem of measuring
+      round trip times [Jacobson88].
+
+      (a)  With the minimal 3-segment exchange, there can be exactly one
+           RTT measurement in each direction for each transaction.
+           Since dynamic estimation of RTT cannot take place within a
+           single transaction, it must take place across successive
+           transactions.  Therefore, cacheing the measured RTT and RTT
+           variance values is essential for transaction processing; in
+           normal virtual circuit communication, such cacheing is only
+           desirable.
+
+      (b)  At the completion of a transaction, the values for RTT and
+           RTT variance that are retained in the cache must be some
+           average of previous values with the values measured during
+           the transaction that is completing.  This raises the question
+           of the time constant for this average; quite different
+           dynamic considerations hold for transactions than for file
+           transfers, for example.
+
+      (c)  An RTT measurement by the client will yield the value:
+
+
+
+Braden                                                         [Page 31]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+                  T = RTT + min(SPT, ATO),
+
+           where SPT (server processing time) was defined in the
+           introduction, and ATO is the timeout period for sending a
+           delayed ACK.  Thus, the measured RTT includes SPT, which may
+           be arbitrarily variable; however, the resulting variability
+           of the measured T cannot exceed ATO. (In a popular TCP
+           implementation, for example, ATO = 200ms, so that the
+           variance of SPT makes a relatively small contribution to the
+           variance of RTT.)
+
+      (d)  Transactions sample the RTT at random times, which are
+           determined by the client and the server applications rather
+           than by the network dynamics.  When there are long pauses
+           between transactions, cached path properties will be poor
+           predictors of current values in the network.
+
+      Thus, the dynamics of RTT measurement for transactions differ from
+      those for virtual circuits.  RTT measurements should work
+      correctly for very short connections but reduce to the current TCP
+      algorithms for long-lasting connections.  Further study is this
+      issue is needed.
+
+   4.4  Cache Implementation
+
+      This extension requires a per-host cache of connection counts.
+      This cache may also contain values of the smoothed RTT, RTT
+      variance, congestion avoidance threshold, and MSS values.
+      Depending upon the implementation details, it may be simplest to
+      build a new cache for these values; another possibility is to use
+      the routing cache that should already be included in the host
+      [RFC-1122].
+
+      Implementation of the cache may be simplified because it is
+      consulted only when a connection is established; thereafter, the
+      CC values relevant to the connection are kept in the TCB.  This
+      means that a cache entry may be safely reused during the lifetime
+      of a connection, avoiding the need for locking.
+
+   4.5  CPU Performance
+
+      TCP implementations are customarily optimized for streaming of
+      data at high speeds, not for opening or closing connections.
+      Jacobson's Header Prediction algorithm [Jacobson90] handles the
+      simple common cases of in-sequence data and ACK segments when
+      streaming data.  To provide good performance for transactions, an
+      implementation might be able to do an analogous "header
+      prediction" specifically for the minimal request and the response
+
+
+
+Braden                                                         [Page 32]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+      segments.
+
+      The overhead of UDP provides a lower bound on the overhead of
+      TCP-based transaction processing.  It will probably not be
+      possible to reach this bound for TCP transactions, since opening a
+      TCP connection involves creating a significant amount of state
+      that is not required by UDP.
+
+      McKenney and Dove [McKenney92] have pointed out that transaction
+      processing applications of TCP can stress the performance of the
+      demultiplexing algorithm, i.e., the algorithm used to look up the
+      TCB when a segment arrives.  They advocate the use of hash-table
+      techniques rather than a linear search.  The effect of
+      demultiplexing on performance may become especially acute for a
+      transaction client using the extended TCP described here, due to
+      TCB's left in TIME-WAIT state.  A high rate of transactions from a
+      given client will leave a large number of TCB's in TIME-WAIT
+      state, until their timeout expires.  If the TCP implementation
+      uses a linear search for demultiplexing, all of these control
+      blocks must be traversed in order to discover that the new
+      association does not exist.  In this circumstance, performance of
+      a hash table lookup should not degrade severely due to
+      transactions.
+
+   4.6  Pre-SYN Queue
+
+      Suppose that segment #1 in Figure 4 is lost in the network; when
+      segment #2 arrives in LISTEN state, it will be ignored by the TCP
+      rules (see [STD-007] p.66, "fourth other text and control"), and
+      must be retransmitted.  It would be possible for the server side
+      to queue any ACK-less data segments received in LISTEN state and
+      to "replay" the segments in this queue when a SYN segment does
+      arrive.  A data segment received with an ACK bit, which is the
+      normal case for existing TCP's, would still a generate RST
+      segment.
+
+      Note that queueing segments in LISTEN state is different from
+      queueing out-of-order segments after the connection is
+      synchronized.  In LISTEN state, the sequence number corresponding
+      to the left window edge is not yet known, so that the segment
+      cannot be trimmed to fit within the window before it is queued.
+      In fact, no processing should be done on a queued segment while
+      the connection is still in LISTEN state.  Therefore, a new "pre-
+      SYN queue" would be needed.  A timeout would be required, to flush
+      the Pre-SYN Queue in case a SYN segment was not received.
+
+      Although implementation of a pre-SYN queue is not difficult in BSD
+      TCP, its limited contribution to throughput probably does not
+
+
+
+Braden                                                         [Page 33]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+      justify the effort.
+
+6.  ACKNOWLEDGMENTS
+
+   I am very grateful to Dave Clark for pointing out bugs in RFC-1379
+   and for helping me to clarify the model.  I also wish to thank Greg
+   Minshall, whose probing questions led to further elucidation of the
+   issues in T/TCP.
+
+7.  REFERENCES
+
+    [Jacobson88] Jacobson, V., "Congestion Avoidance and Control", ACM
+      SIGCOMM '88, Stanford, CA, August 1988.
+
+    [Jacobson90] Jacobson, V., "4BSD Header Prediction", Comp Comm
+      Review, v. 20, no. 2, April 1990.
+
+    [McKenney92]  McKenney, P., and K. Dove, "Efficient Demultiplexing
+      of Incoming TCP Packets", ACM SIGCOMM '92, Baltimore, MD, October
+      1992.
+
+    [RFC-1122]  Braden, R., Ed., "Requirements for Internet Hosts --
+      Communications Layers", STD-3, RFC-1122, USC/Information Sciences
+      Institute, October 1989.
+
+    [RFC-1323]  Jacobson, V., Braden, R., and D. Borman, "TCP Extensions
+      for High Performance, RFC-1323, LBL, USC/Information Sciences
+      Institute, Cray Research, February 1991.
+
+    [RFC-1379]  Braden, R., "Transaction TCP -- Concepts", RFC-1379,
+      USC/Information Sciences Institute, September 1992.
+
+    [ShankarLee93]  Shankar, A. and D. Lee, "Modulo-N Incarnation
+      Numbers for Cache-Based Transport Protocols", Report CS-TR-3046/
+      UIMACS-TR-93-24, University of Maryland, March 1993.
+
+    [STD-007]  Postel, J., "Transmission Control Protocol - DARPA
+      Internet Program Protocol Specification", STD-007, RFC-793,
+      USC/Information Sciences Institute, September 1981.
+
+
+
+
+
+
+
+
+
+
+
+
+Braden                                                         [Page 34]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+APPENDIX A.  ALGORITHM SUMMARY
+
+   This appendix summarizes the additional processing rules introduced
+   by T/TCP.  We define the following symbols:
+
+   Options
+
+       CC(SEG.CC):         TCP Connection Count (CC) Option
+       CC.NEW(SEG.CC):     TCP CC.NEW option
+       CC.ECHO(SEG.CC):    TCP CC.ECHO option
+
+           Here SEG.CC is option value in segment.
+
+   Per-Connection State Variables in TCB
+
+       CCsend:             CC value to be sent in segments
+       CCrecv:             CC value to be received in segments
+       Elapsed:            Duration of connection
+
+   Global Variables:
+
+       CCgen:              CC generator variable
+       cache.CC[fh]:       Cache entry: Last CC value received.
+       cache.CCsent[fh]:   Cache entry: Last CC value sent.
+
+
+   PSEUDO-CODE SUMMARY:
+
+   Passive OPEN => {
+       Create new TCB;
+   }
+
+   Active OPEN => {
+       <Create new TCB>
+       CCrecv = 0;
+       CCsend = CCgen;
+       If (CCgen == 0xffffffff) then Set CCgen = 1;
+                                else Set CCgen = CCgen + 1.
+       <Send initial {SYN} segment (see below)>
+   }
+
+
+   Send initial {SYN} segment => {
+
+       If (cache.CCsent[fh] == 0 OR CCsend < cache.CCsent[fh] ) then {
+
+             Include CC.NEW(CCsend) option in segment;
+             Set cache.CCsent[fh] = 0;
+
+
+
+Braden                                                         [Page 35]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+       }
+       else {
+
+             Include CC(CCsend) option in segment;
+             Set cache.CCsent[fh] = CCsend;
+       }
+    }
+
+
+   Send {SYN,ACK} segment => {
+
+       If (CCrecv != 0) then
+             Include CC(CCsend), CC.ECHO(CCrecv) options in segment.
+   }
+
+
+   Receive {SYN} segment in LISTEN, SYN-SENT, or SYN-SENT* state => {
+
+       If state == LISTEN then {
+             CCrecv = 0;
+             CCsend = CCgen;
+             If (CCgen == 0xffffffff) then Set CCgen = 1;
+                                      else Set CCgen = CCgen + 1.
+       }
+
+       If (Segment contains CC option  OR
+             Segment contains CC.NEW option) then
+                   Set CCrecv = SEG.CC.
+
+       if (Segment contains CC option  AND
+             cache.CC[fh] != 0  AND
+                   SEG.CC > cache.CC[fh] ) then {  /* TAO Test OK */
+
+             Set cache.CC[fh] = CCrecv;
+             <Mark connection half-synchronized>
+             <Process data and/or FIN and return>
+       }
+
+
+       If (Segment does not contain CC option)  then
+             Set cache.CC[fh] = 0;
+
+       <Do normal TCP processing and return>.
+   }
+
+   Receive {SYN} segment in LISTEN-TW, LISTEN-LA, LISTEN-LA*, LISTEN-CL,
+       or LISTEN-CL* state => {
+
+
+
+
+Braden                                                         [Page 36]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+       If ( (Segment contains CC option AND CCrecv != 0 )  then  {
+
+             If (state = LISTEN-TW AND Elapsed > MSL ) then
+                   <Send RST, drop segment, and return>.
+
+             if (SEG.CC > CCrecv )  then {
+                   <Implicitly ACK FIN and data in retransmission queue>;
+                   <Close and delete TCB>;
+                   <Reprocess segment>.
+                           /* Expect to match new TCB
+                            * in LISTEN state.
+                            */
+              }
+       }
+       else
+             <Drop segment>.
+   }
+
+
+   Receive {SYN,ACK} segment => {
+
+       if (Segment contains CC.ECHO option  AND
+                   SEG.CC != CCsend) then
+             <Send a reset and discard segment>.
+
+       if (Segment contains CC option) then {
+             Set CCrecv = SEG.CC.
+
+             if (cache.CC[fh] is undefined) then
+                   Set cache.CC[fh] = CCrecv.
+       }
+   }
+
+
+   Send non-SYN segment => {
+
+       if (CCrecv != 0  OR
+             (cache.CCsent[fh] != 0  AND
+              state is SYN-SENT or SYN-SENT*)) then
+                  Include CC(CCsend) option in segment.
+   }
+
+
+   Receive non-SYN segment in SYN-RECEIVED state => {
+
+       if (Segment contains CC option  AND  RST bit is off) {
+               if (SEG.CC != CCrecv)  then
+                     <Segment is unacceptable; drop it and send an
+
+
+
+Braden                                                         [Page 37]
+
+RFC 1644                    Transaction/TCP                    July 1994
+
+
+                       ACK segment, as in normal TCP processing>.
+
+               if (cache.CC[fh] is undefined)  then
+                     Set cache.CC[fh] = CCrecv.
+       }
+   }
+
+
+   Receive non-SYN segment in (state >= ESTABLISHED) => {
+
+       if (Segment contains CC option  AND  RST bit is off) {
+               if (SEG.CC != CCrecv)  then
+                     <Segment is unacceptable; drop it and send an
+                       ACK segment, as in normal TCP processing>.
+       }
+   }
+
+
+Security Considerations
+
+   Security issues are not discussed in this memo.
+
+Author's Address
+
+   Bob Braden
+   University of Southern California
+   Information Sciences Institute
+   4676 Admiralty Way
+   Marina del Rey, CA 90292
+
+   Phone: (310) 822-1511
+   EMail: Braden@ISI.EDU
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Braden                                                         [Page 38]
+
diff --git a/ext/picotcp/RFC/rfc1661.txt b/ext/picotcp/RFC/rfc1661.txt
new file mode 100644
index 0000000..02112bd
--- /dev/null
+++ b/ext/picotcp/RFC/rfc1661.txt
@@ -0,0 +1,2976 @@
+
+
+
+
+
+
+Network Working Group                                 W. Simpson, Editor
+Request for Comments: 1661                                    Daydreamer
+STD: 51                                                        July 1994
+Obsoletes: 1548
+Category: Standards Track
+
+
+                   The Point-to-Point Protocol (PPP)
+
+
+
+Status of this Memo
+
+   This document specifies an Internet standards track protocol for the
+   Internet community, and requests discussion and suggestions for
+   improvements.  Please refer to the current edition of the "Internet
+   Official Protocol Standards" (STD 1) for the standardization state
+   and status of this protocol.  Distribution of this memo is unlimited.
+
+
+Abstract
+
+   The Point-to-Point Protocol (PPP) provides a standard method for
+   transporting multi-protocol datagrams over point-to-point links.  PPP
+   is comprised of three main components:
+
+      1. A method for encapsulating multi-protocol datagrams.
+
+      2. A Link Control Protocol (LCP) for establishing, configuring,
+         and testing the data-link connection.
+
+      3. A family of Network Control Protocols (NCPs) for establishing
+         and configuring different network-layer protocols.
+
+   This document defines the PPP organization and methodology, and the
+   PPP encapsulation, together with an extensible option negotiation
+   mechanism which is able to negotiate a rich assortment of
+   configuration parameters and provides additional management
+   functions.  The PPP Link Control Protocol (LCP) is described in terms
+   of this mechanism.
+
+
+Table of Contents
+
+
+     1.     Introduction ..........................................    1
+        1.1       Specification of Requirements ...................    2
+        1.2       Terminology .....................................    3
+
+     2.     PPP Encapsulation .....................................    4
+
+
+Simpson                                                         [Page i]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+     3.     PPP Link Operation ....................................    6
+        3.1       Overview ........................................    6
+        3.2       Phase Diagram ...................................    6
+        3.3       Link Dead (physical-layer not ready) ............    7
+        3.4       Link Establishment Phase ........................    7
+        3.5       Authentication Phase ............................    8
+        3.6       Network-Layer Protocol Phase ....................    8
+        3.7       Link Termination Phase ..........................    9
+
+     4.     The Option Negotiation Automaton ......................   11
+        4.1       State Transition Table ..........................   12
+        4.2       States ..........................................   14
+        4.3       Events ..........................................   16
+        4.4       Actions .........................................   21
+        4.5       Loop Avoidance ..................................   23
+        4.6       Counters and Timers .............................   24
+
+     5.     LCP Packet Formats ....................................   26
+        5.1       Configure-Request ...............................   28
+        5.2       Configure-Ack ...................................   29
+        5.3       Configure-Nak ...................................   30
+        5.4       Configure-Reject ................................   31
+        5.5       Terminate-Request and Terminate-Ack .............   33
+        5.6       Code-Reject .....................................   34
+        5.7       Protocol-Reject .................................   35
+        5.8       Echo-Request and Echo-Reply .....................   36
+        5.9       Discard-Request .................................   37
+
+     6.     LCP Configuration Options .............................   39
+        6.1       Maximum-Receive-Unit (MRU) ......................   41
+        6.2       Authentication-Protocol .........................   42
+        6.3       Quality-Protocol ................................   43
+        6.4       Magic-Number ....................................   45
+        6.5       Protocol-Field-Compression (PFC) ................   48
+        6.6       Address-and-Control-Field-Compression (ACFC)
+
+     SECURITY CONSIDERATIONS ......................................   51
+     REFERENCES ...................................................   51
+     ACKNOWLEDGEMENTS .............................................   51
+     CHAIR'S ADDRESS ..............................................   52
+     EDITOR'S ADDRESS .............................................   52
+
+
+
+
+
+
+
+
+
+
+Simpson                                                        [Page ii]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+1.  Introduction
+
+   The Point-to-Point Protocol is designed for simple links which
+   transport packets between two peers.  These links provide full-duplex
+   simultaneous bi-directional operation, and are assumed to deliver
+   packets in order.  It is intended that PPP provide a common solution
+   for easy connection of a wide variety of hosts, bridges and routers
+   [1].
+
+   Encapsulation
+
+      The PPP encapsulation provides for multiplexing of different
+      network-layer protocols simultaneously over the same link.  The
+      PPP encapsulation has been carefully designed to retain
+      compatibility with most commonly used supporting hardware.
+
+      Only 8 additional octets are necessary to form the encapsulation
+      when used within the default HDLC-like framing.  In environments
+      where bandwidth is at a premium, the encapsulation and framing may
+      be shortened to 2 or 4 octets.
+
+      To support high speed implementations, the default encapsulation
+      uses only simple fields, only one of which needs to be examined
+      for demultiplexing.  The default header and information fields
+      fall on 32-bit boundaries, and the trailer may be padded to an
+      arbitrary boundary.
+
+   Link Control Protocol
+
+      In order to be sufficiently versatile to be portable to a wide
+      variety of environments, PPP provides a Link Control Protocol
+      (LCP).  The LCP is used to automatically agree upon the
+      encapsulation format options, handle varying limits on sizes of
+      packets, detect a looped-back link and other common
+      misconfiguration errors, and terminate the link.  Other optional
+      facilities provided are authentication of the identity of its peer
+      on the link, and determination when a link is functioning properly
+      and when it is failing.
+
+   Network Control Protocols
+
+      Point-to-Point links tend to exacerbate many problems with the
+      current family of network protocols.  For instance, assignment and
+      management of IP addresses, which is a problem even in LAN
+      environments, is especially difficult over circuit-switched
+      point-to-point links (such as dial-up modem servers).  These
+      problems are handled by a family of Network Control Protocols
+      (NCPs), which each manage the specific needs required by their
+
+
+
+Simpson                                                         [Page 1]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+      respective network-layer protocols.  These NCPs are defined in
+      companion documents.
+
+   Configuration
+
+      It is intended that PPP links be easy to configure.  By design,
+      the standard defaults handle all common configurations.  The
+      implementor can specify improvements to the default configuration,
+      which are automatically communicated to the peer without operator
+      intervention.  Finally, the operator may explicitly configure
+      options for the link which enable the link to operate in
+      environments where it would otherwise be impossible.
+
+      This self-configuration is implemented through an extensible
+      option negotiation mechanism, wherein each end of the link
+      describes to the other its capabilities and requirements.
+      Although the option negotiation mechanism described in this
+      document is specified in terms of the Link Control Protocol (LCP),
+      the same facilities are designed to be used by other control
+      protocols, especially the family of NCPs.
+
+
+
+1.1.  Specification of Requirements
+
+   In this document, several words are used to signify the requirements
+   of the specification.  These words are often capitalized.
+
+   MUST      This word, or the adjective "required", means that the
+             definition is an absolute requirement of the specification.
+
+   MUST NOT  This phrase means that the definition is an absolute
+             prohibition of the specification.
+
+   SHOULD    This word, or the adjective "recommended", means that there
+             may exist valid reasons in particular circumstances to
+             ignore this item, but the full implications must be
+             understood and carefully weighed before choosing a
+             different course.
+
+   MAY       This word, or the adjective "optional", means that this
+             item is one of an allowed set of alternatives.  An
+             implementation which does not include this option MUST be
+             prepared to interoperate with another implementation which
+             does include the option.
+
+
+
+
+
+
+Simpson                                                         [Page 2]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+1.2.  Terminology
+
+   This document frequently uses the following terms:
+
+   datagram  The unit of transmission in the network layer (such as IP).
+             A datagram may be encapsulated in one or more packets
+             passed to the data link layer.
+
+   frame     The unit of transmission at the data link layer.  A frame
+             may include a header and/or a trailer, along with some
+             number of units of data.
+
+   packet    The basic unit of encapsulation, which is passed across the
+             interface between the network layer and the data link
+             layer.  A packet is usually mapped to a frame; the
+             exceptions are when data link layer fragmentation is being
+             performed, or when multiple packets are incorporated into a
+             single frame.
+
+   peer      The other end of the point-to-point link.
+
+   silently discard
+             The implementation discards the packet without further
+             processing.  The implementation SHOULD provide the
+             capability of logging the error, including the contents of
+             the silently discarded packet, and SHOULD record the event
+             in a statistics counter.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Simpson                                                         [Page 3]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+2.  PPP Encapsulation
+
+   The PPP encapsulation is used to disambiguate multiprotocol
+   datagrams.  This encapsulation requires framing to indicate the
+   beginning and end of the encapsulation.  Methods of providing framing
+   are specified in companion documents.
+
+   A summary of the PPP encapsulation is shown below.  The fields are
+   transmitted from left to right.
+
+           +----------+-------------+---------+
+           | Protocol | Information | Padding |
+           | 8/16 bits|      *      |    *    |
+           +----------+-------------+---------+
+
+
+   Protocol Field
+
+      The Protocol field is one or two octets, and its value identifies
+      the datagram encapsulated in the Information field of the packet.
+      The field is transmitted and received most significant octet
+      first.
+
+      The structure of this field is consistent with the ISO 3309
+      extension mechanism for address fields.  All Protocols MUST be
+      odd; the least significant bit of the least significant octet MUST
+      equal "1".  Also, all Protocols MUST be assigned such that the
+      least significant bit of the most significant octet equals "0".
+      Frames received which don't comply with these rules MUST be
+      treated as having an unrecognized Protocol.
+
+      Protocol field values in the "0***" to "3***" range identify the
+      network-layer protocol of specific packets, and values in the
+      "8***" to "b***" range identify packets belonging to the
+      associated Network Control Protocols (NCPs), if any.
+
+      Protocol field values in the "4***" to "7***" range are used for
+      protocols with low volume traffic which have no associated NCP.
+      Protocol field values in the "c***" to "f***" range identify
+      packets as link-layer Control Protocols (such as LCP).
+
+
+
+
+
+
+
+
+
+
+
+Simpson                                                         [Page 4]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+      Up-to-date values of the Protocol field are specified in the most
+      recent "Assigned Numbers" RFC [2].  This specification reserves
+      the following values:
+
+      Value (in hex)  Protocol Name
+
+      0001            Padding Protocol
+      0003 to 001f    reserved (transparency inefficient)
+      007d            reserved (Control Escape)
+      00cf            reserved (PPP NLPID)
+      00ff            reserved (compression inefficient)
+
+      8001 to 801f    unused
+      807d            unused
+      80cf            unused
+      80ff            unused
+
+      c021            Link Control Protocol
+      c023            Password Authentication Protocol
+      c025            Link Quality Report
+      c223            Challenge Handshake Authentication Protocol
+
+      Developers of new protocols MUST obtain a number from the Internet
+      Assigned Numbers Authority (IANA), at IANA@isi.edu.
+
+
+   Information Field
+
+      The Information field is zero or more octets.  The Information
+      field contains the datagram for the protocol specified in the
+      Protocol field.
+
+      The maximum length for the Information field, including Padding,
+      but not including the Protocol field, is termed the Maximum
+      Receive Unit (MRU), which defaults to 1500 octets.  By
+      negotiation, consenting PPP implementations may use other values
+      for the MRU.
+
+
+   Padding
+
+      On transmission, the Information field MAY be padded with an
+      arbitrary number of octets up to the MRU.  It is the
+      responsibility of each protocol to distinguish padding octets from
+      real information.
+
+
+
+
+
+
+Simpson                                                         [Page 5]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+3.  PPP Link Operation
+
+3.1.  Overview
+
+   In order to establish communications over a point-to-point link, each
+   end of the PPP link MUST first send LCP packets to configure and test
+   the data link.  After the link has been established, the peer MAY be
+   authenticated.
+
+   Then, PPP MUST send NCP packets to choose and configure one or more
+   network-layer protocols.  Once each of the chosen network-layer
+   protocols has been configured, datagrams from each network-layer
+   protocol can be sent over the link.
+
+   The link will remain configured for communications until explicit LCP
+   or NCP packets close the link down, or until some external event
+   occurs (an inactivity timer expires or network administrator
+   intervention).
+
+
+
+3.2.  Phase Diagram
+
+   In the process of configuring, maintaining and terminating the
+   point-to-point link, the PPP link goes through several distinct
+   phases which are specified in the following simplified state diagram:
+
+   +------+        +-----------+           +--------------+
+   |      | UP     |           | OPENED    |              | SUCCESS/NONE
+   | Dead |------->| Establish |---------->| Authenticate |--+
+   |      |        |           |           |              |  |
+   +------+        +-----------+           +--------------+  |
+      ^               |                        |             |
+      |          FAIL |                   FAIL |             |
+      +<--------------+             +----------+             |
+      |                             |                        |
+      |            +-----------+    |           +---------+  |
+      |       DOWN |           |    |   CLOSING |         |  |
+      +------------| Terminate |<---+<----------| Network |<-+
+                   |           |                |         |
+                   +-----------+                +---------+
+
+   Not all transitions are specified in this diagram.  The following
+   semantics MUST be followed.
+
+
+
+
+
+
+
+Simpson                                                         [Page 6]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+3.3.  Link Dead (physical-layer not ready)
+
+   The link necessarily begins and ends with this phase.  When an
+   external event (such as carrier detection or network administrator
+   configuration) indicates that the physical-layer is ready to be used,
+   PPP will proceed to the Link Establishment phase.
+
+   During this phase, the LCP automaton (described later) will be in the
+   Initial or Starting states.  The transition to the Link Establishment
+   phase will signal an Up event to the LCP automaton.
+
+   Implementation Note:
+
+      Typically, a link will return to this phase automatically after
+      the disconnection of a modem.  In the case of a hard-wired link,
+      this phase may be extremely short -- merely long enough to detect
+      the presence of the device.
+
+
+
+3.4.  Link Establishment Phase
+
+   The Link Control Protocol (LCP) is used to establish the connection
+   through an exchange of Configure packets.  This exchange is complete,
+   and the LCP Opened state entered, once a Configure-Ack packet
+   (described later) has been both sent and received.
+
+   All Configuration Options are assumed to be at default values unless
+   altered by the configuration exchange.  See the chapter on LCP
+   Configuration Options for further discussion.
+
+   It is important to note that only Configuration Options which are
+   independent of particular network-layer protocols are configured by
+   LCP.  Configuration of individual network-layer protocols is handled
+   by separate Network Control Protocols (NCPs) during the Network-Layer
+   Protocol phase.
+
+   Any non-LCP packets received during this phase MUST be silently
+   discarded.
+
+   The receipt of the LCP Configure-Request causes a return to the Link
+   Establishment phase from the Network-Layer Protocol phase or
+   Authentication phase.
+
+
+
+
+
+
+
+
+Simpson                                                         [Page 7]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+3.5.  Authentication Phase
+
+   On some links it may be desirable to require a peer to authenticate
+   itself before allowing network-layer protocol packets to be
+   exchanged.
+
+   By default, authentication is not mandatory.  If an implementation
+   desires that the peer authenticate with some specific authentication
+   protocol, then it MUST request the use of that authentication
+   protocol during Link Establishment phase.
+
+   Authentication SHOULD take place as soon as possible after link
+   establishment.  However, link quality determination MAY occur
+   concurrently.  An implementation MUST NOT allow the exchange of link
+   quality determination packets to delay authentication indefinitely.
+
+   Advancement from the Authentication phase to the Network-Layer
+   Protocol phase MUST NOT occur until authentication has completed.  If
+   authentication fails, the authenticator SHOULD proceed instead to the
+   Link Termination phase.
+
+   Only Link Control Protocol, authentication protocol, and link quality
+   monitoring packets are allowed during this phase.  All other packets
+   received during this phase MUST be silently discarded.
+
+   Implementation Notes:
+
+      An implementation SHOULD NOT fail authentication simply due to
+      timeout or lack of response.  The authentication SHOULD allow some
+      method of retransmission, and proceed to the Link Termination
+      phase only after a number of authentication attempts has been
+      exceeded.
+
+      The implementation responsible for commencing Link Termination
+      phase is the implementation which has refused authentication to
+      its peer.
+
+
+
+3.6.  Network-Layer Protocol Phase
+
+   Once PPP has finished the previous phases, each network-layer
+   protocol (such as IP, IPX, or AppleTalk) MUST be separately
+   configured by the appropriate Network Control Protocol (NCP).
+
+   Each NCP MAY be Opened and Closed at any time.
+
+
+
+
+
+Simpson                                                         [Page 8]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+   Implementation Note:
+
+      Because an implementation may initially use a significant amount
+      of time for link quality determination, implementations SHOULD
+      avoid fixed timeouts when waiting for their peers to configure a
+      NCP.
+
+   After a NCP has reached the Opened state, PPP will carry the
+   corresponding network-layer protocol packets.  Any supported
+   network-layer protocol packets received when the corresponding NCP is
+   not in the Opened state MUST be silently discarded.
+
+   Implementation Note:
+
+      While LCP is in the Opened state, any protocol packet which is
+      unsupported by the implementation MUST be returned in a Protocol-
+      Reject (described later).  Only protocols which are supported are
+      silently discarded.
+
+   During this phase, link traffic consists of any possible combination
+   of LCP, NCP, and network-layer protocol packets.
+
+
+
+3.7.  Link Termination Phase
+
+   PPP can terminate the link at any time.  This might happen because of
+   the loss of carrier, authentication failure, link quality failure,
+   the expiration of an idle-period timer, or the administrative closing
+   of the link.
+
+   LCP is used to close the link through an exchange of Terminate
+   packets.  When the link is closing, PPP informs the network-layer
+   protocols so that they may take appropriate action.
+
+   After the exchange of Terminate packets, the implementation SHOULD
+   signal the physical-layer to disconnect in order to enforce the
+   termination of the link, particularly in the case of an
+   authentication failure.  The sender of the Terminate-Request SHOULD
+   disconnect after receiving a Terminate-Ack, or after the Restart
+   counter expires.  The receiver of a Terminate-Request SHOULD wait for
+   the peer to disconnect, and MUST NOT disconnect until at least one
+   Restart time has passed after sending a Terminate-Ack.  PPP SHOULD
+   proceed to the Link Dead phase.
+
+   Any non-LCP packets received during this phase MUST be silently
+   discarded.
+
+
+
+
+Simpson                                                         [Page 9]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+   Implementation Note:
+
+      The closing of the link by LCP is sufficient.  There is no need
+      for each NCP to send a flurry of Terminate packets.  Conversely,
+      the fact that one NCP has Closed is not sufficient reason to cause
+      the termination of the PPP link, even if that NCP was the only NCP
+      currently in the Opened state.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Simpson                                                        [Page 10]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+4.  The Option Negotiation Automaton
+
+   The finite-state automaton is defined by events, actions and state
+   transitions.  Events include reception of external commands such as
+   Open and Close, expiration of the Restart timer, and reception of
+   packets from a peer.  Actions include the starting of the Restart
+   timer and transmission of packets to the peer.
+
+   Some types of packets -- Configure-Naks and Configure-Rejects, or
+   Code-Rejects and Protocol-Rejects, or Echo-Requests, Echo-Replies and
+   Discard-Requests -- are not differentiated in the automaton
+   descriptions.  As will be described later, these packets do indeed
+   serve different functions.  However, they always cause the same
+   transitions.
+
+   Events                                   Actions
+
+   Up   = lower layer is Up                 tlu = This-Layer-Up
+   Down = lower layer is Down               tld = This-Layer-Down
+   Open = administrative Open               tls = This-Layer-Started
+   Close= administrative Close              tlf = This-Layer-Finished
+
+   TO+  = Timeout with counter > 0          irc = Initialize-Restart-Count
+   TO-  = Timeout with counter expired      zrc = Zero-Restart-Count
+
+   RCR+ = Receive-Configure-Request (Good)  scr = Send-Configure-Request
+   RCR- = Receive-Configure-Request (Bad)
+   RCA  = Receive-Configure-Ack             sca = Send-Configure-Ack
+   RCN  = Receive-Configure-Nak/Rej         scn = Send-Configure-Nak/Rej
+
+   RTR  = Receive-Terminate-Request         str = Send-Terminate-Request
+   RTA  = Receive-Terminate-Ack             sta = Send-Terminate-Ack
+
+   RUC  = Receive-Unknown-Code              scj = Send-Code-Reject
+   RXJ+ = Receive-Code-Reject (permitted)
+       or Receive-Protocol-Reject
+   RXJ- = Receive-Code-Reject (catastrophic)
+       or Receive-Protocol-Reject
+   RXR  = Receive-Echo-Request              ser = Send-Echo-Reply
+       or Receive-Echo-Reply
+       or Receive-Discard-Request
+
+
+
+
+
+
+
+
+
+
+Simpson                                                        [Page 11]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+4.1.  State Transition Table
+
+   The complete state transition table follows.  States are indicated
+   horizontally, and events are read vertically.  State transitions and
+   actions are represented in the form action/new-state.  Multiple
+   actions are separated by commas, and may continue on succeeding lines
+   as space requires; multiple actions may be implemented in any
+   convenient order.  The state may be followed by a letter, which
+   indicates an explanatory footnote.  The dash ('-') indicates an
+   illegal transition.
+
+      | State
+      |    0         1         2         3         4         5
+Events| Initial   Starting  Closed    Stopped   Closing   Stopping
+------+-----------------------------------------------------------
+ Up   |    2     irc,scr/6     -         -         -         -
+ Down |    -         -         0       tls/1       0         1
+ Open |  tls/1       1     irc,scr/6     3r        5r        5r
+ Close|    0       tlf/0       2         2         4         4
+      |
+  TO+ |    -         -         -         -       str/4     str/5
+  TO- |    -         -         -         -       tlf/2     tlf/3
+      |
+ RCR+ |    -         -       sta/2 irc,scr,sca/8   4         5
+ RCR- |    -         -       sta/2 irc,scr,scn/6   4         5
+ RCA  |    -         -       sta/2     sta/3       4         5
+ RCN  |    -         -       sta/2     sta/3       4         5
+      |
+ RTR  |    -         -       sta/2     sta/3     sta/4     sta/5
+ RTA  |    -         -         2         3       tlf/2     tlf/3
+      |
+ RUC  |    -         -       scj/2     scj/3     scj/4     scj/5
+ RXJ+ |    -         -         2         3         4         5
+ RXJ- |    -         -       tlf/2     tlf/3     tlf/2     tlf/3
+      |
+ RXR  |    -         -         2         3         4         5
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Simpson                                                        [Page 12]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+
+      | State
+      |    6         7         8           9
+Events| Req-Sent  Ack-Rcvd  Ack-Sent    Opened
+------+-----------------------------------------
+ Up   |    -         -         -           -
+ Down |    1         1         1         tld/1
+ Open |    6         7         8           9r
+ Close|irc,str/4 irc,str/4 irc,str/4 tld,irc,str/4
+      |
+  TO+ |  scr/6     scr/6     scr/8         -
+  TO- |  tlf/3p    tlf/3p    tlf/3p        -
+      |
+ RCR+ |  sca/8   sca,tlu/9   sca/8   tld,scr,sca/8
+ RCR- |  scn/6     scn/7     scn/6   tld,scr,scn/6
+ RCA  |  irc/7     scr/6x  irc,tlu/9   tld,scr/6x
+ RCN  |irc,scr/6   scr/6x  irc,scr/8   tld,scr/6x
+      |
+ RTR  |  sta/6     sta/6     sta/6   tld,zrc,sta/5
+ RTA  |    6         6         8       tld,scr/6
+      |
+ RUC  |  scj/6     scj/7     scj/8       scj/9
+ RXJ+ |    6         6         8           9
+ RXJ- |  tlf/3     tlf/3     tlf/3   tld,irc,str/5
+      |
+ RXR  |    6         7         8         ser/9
+
+
+   The states in which the Restart timer is running are identifiable by
+   the presence of TO events.  Only the Send-Configure-Request, Send-
+   Terminate-Request and Zero-Restart-Count actions start or re-start
+   the Restart timer.  The Restart timer is stopped when transitioning
+   from any state where the timer is running to a state where the timer
+   is not running.
+
+   The events and actions are defined according to a message passing
+   architecture, rather than a signalling architecture.  If an action is
+   desired to control specific signals (such as DTR), additional actions
+   are likely to be required.
+
+   [p]   Passive option; see Stopped state discussion.
+
+   [r]   Restart option; see Open event discussion.
+
+   [x]   Crossed connection; see RCA event discussion.
+
+
+
+
+
+
+Simpson                                                        [Page 13]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+4.2.  States
+
+   Following is a more detailed description of each automaton state.
+
+   Initial
+
+      In the Initial state, the lower layer is unavailable (Down), and
+      no Open has occurred.  The Restart timer is not running in the
+      Initial state.
+
+   Starting
+
+      The Starting state is the Open counterpart to the Initial state.
+      An administrative Open has been initiated, but the lower layer is
+      still unavailable (Down).  The Restart timer is not running in the
+      Starting state.
+
+      When the lower layer becomes available (Up), a Configure-Request
+      is sent.
+
+   Closed
+
+      In the Closed state, the link is available (Up), but no Open has
+      occurred.  The Restart timer is not running in the Closed state.
+
+      Upon reception of Configure-Request packets, a Terminate-Ack is
+      sent.  Terminate-Acks are silently discarded to avoid creating a
+      loop.
+
+   Stopped
+
+      The Stopped state is the Open counterpart to the Closed state.  It
+      is entered when the automaton is waiting for a Down event after
+      the This-Layer-Finished action, or after sending a Terminate-Ack.
+      The Restart timer is not running in the Stopped state.
+
+      Upon reception of Configure-Request packets, an appropriate
+      response is sent.  Upon reception of other packets, a Terminate-
+      Ack is sent.  Terminate-Acks are silently discarded to avoid
+      creating a loop.
+
+      Rationale:
+
+         The Stopped state is a junction state for link termination,
+         link configuration failure, and other automaton failure modes.
+         These potentially separate states have been combined.
+
+         There is a race condition between the Down event response (from
+
+
+
+Simpson                                                        [Page 14]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+         the This-Layer-Finished action) and the Receive-Configure-
+         Request event.  When a Configure-Request arrives before the
+         Down event, the Down event will supercede by returning the
+         automaton to the Starting state.  This prevents attack by
+         repetition.
+
+      Implementation Option:
+
+         After the peer fails to respond to Configure-Requests, an
+         implementation MAY wait passively for the peer to send
+         Configure-Requests.  In this case, the This-Layer-Finished
+         action is not used for the TO- event in states Req-Sent, Ack-
+         Rcvd and Ack-Sent.
+
+         This option is useful for dedicated circuits, or circuits which
+         have no status signals available, but SHOULD NOT be used for
+         switched circuits.
+
+   Closing
+
+      In the Closing state, an attempt is made to terminate the
+      connection.  A Terminate-Request has been sent and the Restart
+      timer is running, but a Terminate-Ack has not yet been received.
+
+      Upon reception of a Terminate-Ack, the Closed state is entered.
+      Upon the expiration of the Restart timer, a new Terminate-Request
+      is transmitted, and the Restart timer is restarted.  After the
+      Restart timer has expired Max-Terminate times, the Closed state is
+      entered.
+
+   Stopping
+
+      The Stopping state is the Open counterpart to the Closing state.
+      A Terminate-Request has been sent and the Restart timer is
+      running, but a Terminate-Ack has not yet been received.
+
+      Rationale:
+
+         The Stopping state provides a well defined opportunity to
+         terminate a link before allowing new traffic.  After the link
+         has terminated, a new configuration may occur via the Stopped
+         or Starting states.
+
+   Request-Sent
+
+      In the Request-Sent state an attempt is made to configure the
+      connection.  A Configure-Request has been sent and the Restart
+      timer is running, but a Configure-Ack has not yet been received
+
+
+
+Simpson                                                        [Page 15]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+      nor has one been sent.
+
+   Ack-Received
+
+      In the Ack-Received state, a Configure-Request has been sent and a
+      Configure-Ack has been received.  The Restart timer is still
+      running, since a Configure-Ack has not yet been sent.
+
+   Ack-Sent
+
+      In the Ack-Sent state, a Configure-Request and a Configure-Ack
+      have both been sent, but a Configure-Ack has not yet been
+      received.  The Restart timer is running, since a Configure-Ack has
+      not yet been received.
+
+   Opened
+
+      In the Opened state, a Configure-Ack has been both sent and
+      received.  The Restart timer is not running.
+
+      When entering the Opened state, the implementation SHOULD signal
+      the upper layers that it is now Up.  Conversely, when leaving the
+      Opened state, the implementation SHOULD signal the upper layers
+      that it is now Down.
+
+
+
+4.3.  Events
+
+   Transitions and actions in the automaton are caused by events.
+
+   Up
+
+      This event occurs when a lower layer indicates that it is ready to
+      carry packets.
+
+      Typically, this event is used by a modem handling or calling
+      process, or by some other coupling of the PPP link to the physical
+      media, to signal LCP that the link is entering Link Establishment
+      phase.
+
+      It also can be used by LCP to signal each NCP that the link is
+      entering Network-Layer Protocol phase.  That is, the This-Layer-Up
+      action from LCP triggers the Up event in the NCP.
+
+   Down
+
+      This event occurs when a lower layer indicates that it is no
+
+
+
+Simpson                                                        [Page 16]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+      longer ready to carry packets.
+
+      Typically, this event is used by a modem handling or calling
+      process, or by some other coupling of the PPP link to the physical
+      media, to signal LCP that the link is entering Link Dead phase.
+
+      It also can be used by LCP to signal each NCP that the link is
+      leaving Network-Layer Protocol phase.  That is, the This-Layer-
+      Down action from LCP triggers the Down event in the NCP.
+
+   Open
+
+      This event indicates that the link is administratively available
+      for traffic; that is, the network administrator (human or program)
+      has indicated that the link is allowed to be Opened.  When this
+      event occurs, and the link is not in the Opened state, the
+      automaton attempts to send configuration packets to the peer.
+
+      If the automaton is not able to begin configuration (the lower
+      layer is Down, or a previous Close event has not completed), the
+      establishment of the link is automatically delayed.
+
+      When a Terminate-Request is received, or other events occur which
+      cause the link to become unavailable, the automaton will progress
+      to a state where the link is ready to re-open.  No additional
+      administrative intervention is necessary.
+
+      Implementation Option:
+
+         Experience has shown that users will execute an additional Open
+         command when they want to renegotiate the link.  This might
+         indicate that new values are to be negotiated.
+
+         Since this is not the meaning of the Open event, it is
+         suggested that when an Open user command is executed in the
+         Opened, Closing, Stopping, or Stopped states, the
+         implementation issue a Down event, immediately followed by an
+         Up event.  Care must be taken that an intervening Down event
+         cannot occur from another source.
+
+         The Down followed by an Up will cause an orderly renegotiation
+         of the link, by progressing through the Starting to the
+         Request-Sent state.  This will cause the renegotiation of the
+         link, without any harmful side effects.
+
+   Close
+
+      This event indicates that the link is not available for traffic;
+
+
+
+Simpson                                                        [Page 17]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+      that is, the network administrator (human or program) has
+      indicated that the link is not allowed to be Opened.  When this
+      event occurs, and the link is not in the Closed state, the
+      automaton attempts to terminate the connection.  Futher attempts
+      to re-configure the link are denied until a new Open event occurs.
+
+      Implementation Note:
+
+         When authentication fails, the link SHOULD be terminated, to
+         prevent attack by repetition and denial of service to other
+         users.  Since the link is administratively available (by
+         definition), this can be accomplished by simulating a Close
+         event to the LCP, immediately followed by an Open event.  Care
+         must be taken that an intervening Close event cannot occur from
+         another source.
+
+         The Close followed by an Open will cause an orderly termination
+         of the link, by progressing through the Closing to the Stopping
+         state, and the This-Layer-Finished action can disconnect the
+         link.  The automaton waits in the Stopped or Starting states
+         for the next connection attempt.
+
+   Timeout (TO+,TO-)
+
+      This event indicates the expiration of the Restart timer.  The
+      Restart timer is used to time responses to Configure-Request and
+      Terminate-Request packets.
+
+      The TO+ event indicates that the Restart counter continues to be
+      greater than zero, which triggers the corresponding Configure-
+      Request or Terminate-Request packet to be retransmitted.
+
+      The TO- event indicates that the Restart counter is not greater
+      than zero, and no more packets need to be retransmitted.
+
+   Receive-Configure-Request (RCR+,RCR-)
+
+      This event occurs when a Configure-Request packet is received from
+      the peer.  The Configure-Request packet indicates the desire to
+      open a connection and may specify Configuration Options.  The
+      Configure-Request packet is more fully described in a later
+      section.
+
+      The RCR+ event indicates that the Configure-Request was
+      acceptable, and triggers the transmission of a corresponding
+      Configure-Ack.
+
+      The RCR- event indicates that the Configure-Request was
+
+
+
+Simpson                                                        [Page 18]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+      unacceptable, and triggers the transmission of a corresponding
+      Configure-Nak or Configure-Reject.
+
+      Implementation Note:
+
+         These events may occur on a connection which is already in the
+         Opened state.  The implementation MUST be prepared to
+         immediately renegotiate the Configuration Options.
+
+   Receive-Configure-Ack (RCA)
+
+      This event occurs when a valid Configure-Ack packet is received
+      from the peer.  The Configure-Ack packet is a positive response to
+      a Configure-Request packet.  An out of sequence or otherwise
+      invalid packet is silently discarded.
+
+      Implementation Note:
+
+         Since the correct packet has already been received before
+         reaching the Ack-Rcvd or Opened states, it is extremely
+         unlikely that another such packet will arrive.  As specified,
+         all invalid Ack/Nak/Rej packets are silently discarded, and do
+         not affect the transitions of the automaton.
+
+         However, it is not impossible that a correctly formed packet
+         will arrive through a coincidentally-timed cross-connection.
+         It is more likely to be the result of an implementation error.
+         At the very least, this occurance SHOULD be logged.
+
+   Receive-Configure-Nak/Rej (RCN)
+
+      This event occurs when a valid Configure-Nak or Configure-Reject
+      packet is received from the peer.  The Configure-Nak and
+      Configure-Reject packets are negative responses to a Configure-
+      Request packet.  An out of sequence or otherwise invalid packet is
+      silently discarded.
+
+      Implementation Note:
+
+         Although the Configure-Nak and Configure-Reject cause the same
+         state transition in the automaton, these packets have
+         significantly different effects on the Configuration Options
+         sent in the resulting Configure-Request packet.
+
+   Receive-Terminate-Request (RTR)
+
+      This event occurs when a Terminate-Request packet is received.
+      The Terminate-Request packet indicates the desire of the peer to
+
+
+
+Simpson                                                        [Page 19]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+      close the connection.
+
+      Implementation Note:
+
+         This event is not identical to the Close event (see above), and
+         does not override the Open commands of the local network
+         administrator.  The implementation MUST be prepared to receive
+         a new Configure-Request without network administrator
+         intervention.
+
+   Receive-Terminate-Ack (RTA)
+
+      This event occurs when a Terminate-Ack packet is received from the
+      peer.  The Terminate-Ack packet is usually a response to a
+      Terminate-Request packet.  The Terminate-Ack packet may also
+      indicate that the peer is in Closed or Stopped states, and serves
+      to re-synchronize the link configuration.
+
+   Receive-Unknown-Code (RUC)
+
+      This event occurs when an un-interpretable packet is received from
+      the peer.  A Code-Reject packet is sent in response.
+
+   Receive-Code-Reject, Receive-Protocol-Reject (RXJ+,RXJ-)
+
+      This event occurs when a Code-Reject or a Protocol-Reject packet
+      is received from the peer.
+
+      The RXJ+ event arises when the rejected value is acceptable, such
+      as a Code-Reject of an extended code, or a Protocol-Reject of a
+      NCP.  These are within the scope of normal operation.  The
+      implementation MUST stop sending the offending packet type.
+
+      The RXJ- event arises when the rejected value is catastrophic,
+      such as a Code-Reject of Configure-Request, or a Protocol-Reject
+      of LCP!  This event communicates an unrecoverable error that
+      terminates the connection.
+
+   Receive-Echo-Request, Receive-Echo-Reply, Receive-Discard-Request
+   (RXR)
+
+      This event occurs when an Echo-Request, Echo-Reply or Discard-
+      Request packet is received from the peer.  The Echo-Reply packet
+      is a response to an Echo-Request packet.  There is no reply to an
+      Echo-Reply or Discard-Request packet.
+
+
+
+
+
+
+Simpson                                                        [Page 20]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+4.4.  Actions
+
+   Actions in the automaton are caused by events and typically indicate
+   the transmission of packets and/or the starting or stopping of the
+   Restart timer.
+
+   Illegal-Event (-)
+
+      This indicates an event that cannot occur in a properly
+      implemented automaton.  The implementation has an internal error,
+      which should be reported and logged.  No transition is taken, and
+      the implementation SHOULD NOT reset or freeze.
+
+   This-Layer-Up (tlu)
+
+      This action indicates to the upper layers that the automaton is
+      entering the Opened state.
+
+      Typically, this action is used by the LCP to signal the Up event
+      to a NCP, Authentication Protocol, or Link Quality Protocol, or
+      MAY be used by a NCP to indicate that the link is available for
+      its network layer traffic.
+
+   This-Layer-Down (tld)
+
+      This action indicates to the upper layers that the automaton is
+      leaving the Opened state.
+
+      Typically, this action is used by the LCP to signal the Down event
+      to a NCP, Authentication Protocol, or Link Quality Protocol, or
+      MAY be used by a NCP to indicate that the link is no longer
+      available for its network layer traffic.
+
+   This-Layer-Started (tls)
+
+      This action indicates to the lower layers that the automaton is
+      entering the Starting state, and the lower layer is needed for the
+      link.  The lower layer SHOULD respond with an Up event when the
+      lower layer is available.
+
+      This results of this action are highly implementation dependent.
+
+   This-Layer-Finished (tlf)
+
+      This action indicates to the lower layers that the automaton is
+      entering the Initial, Closed or Stopped states, and the lower
+      layer is no longer needed for the link.  The lower layer SHOULD
+      respond with a Down event when the lower layer has terminated.
+
+
+
+Simpson                                                        [Page 21]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+      Typically, this action MAY be used by the LCP to advance to the
+      Link Dead phase, or MAY be used by a NCP to indicate to the LCP
+      that the link may terminate when there are no other NCPs open.
+
+      This results of this action are highly implementation dependent.
+
+   Initialize-Restart-Count (irc)
+
+      This action sets the Restart counter to the appropriate value
+      (Max-Terminate or Max-Configure).  The counter is decremented for
+      each transmission, including the first.
+
+      Implementation Note:
+
+         In addition to setting the Restart counter, the implementation
+         MUST set the timeout period to the initial value when Restart
+         timer backoff is used.
+
+   Zero-Restart-Count (zrc)
+
+      This action sets the Restart counter to zero.
+
+      Implementation Note:
+
+         This action enables the FSA to pause before proceeding to the
+         desired final state, allowing traffic to be processed by the
+         peer.  In addition to zeroing the Restart counter, the
+         implementation MUST set the timeout period to an appropriate
+         value.
+
+   Send-Configure-Request (scr)
+
+      A Configure-Request packet is transmitted.  This indicates the
+      desire to open a connection with a specified set of Configuration
+      Options.  The Restart timer is started when the Configure-Request
+      packet is transmitted, to guard against packet loss.  The Restart
+      counter is decremented each time a Configure-Request is sent.
+
+   Send-Configure-Ack (sca)
+
+      A Configure-Ack packet is transmitted.  This acknowledges the
+      reception of a Configure-Request packet with an acceptable set of
+      Configuration Options.
+
+   Send-Configure-Nak (scn)
+
+      A Configure-Nak or Configure-Reject packet is transmitted, as
+      appropriate.  This negative response reports the reception of a
+
+
+
+Simpson                                                        [Page 22]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+      Configure-Request packet with an unacceptable set of Configuration
+      Options.
+
+      Configure-Nak packets are used to refuse a Configuration Option
+      value, and to suggest a new, acceptable value.  Configure-Reject
+      packets are used to refuse all negotiation about a Configuration
+      Option, typically because it is not recognized or implemented.
+      The use of Configure-Nak versus Configure-Reject is more fully
+      described in the chapter on LCP Packet Formats.
+
+   Send-Terminate-Request (str)
+
+      A Terminate-Request packet is transmitted.  This indicates the
+      desire to close a connection.  The Restart timer is started when
+      the Terminate-Request packet is transmitted, to guard against
+      packet loss.  The Restart counter is decremented each time a
+      Terminate-Request is sent.
+
+   Send-Terminate-Ack (sta)
+
+      A Terminate-Ack packet is transmitted.  This acknowledges the
+      reception of a Terminate-Request packet or otherwise serves to
+      synchronize the automatons.
+
+   Send-Code-Reject (scj)
+
+      A Code-Reject packet is transmitted.  This indicates the reception
+      of an unknown type of packet.
+
+   Send-Echo-Reply (ser)
+
+      An Echo-Reply packet is transmitted.  This acknowledges the
+      reception of an Echo-Request packet.
+
+
+
+4.5.  Loop Avoidance
+
+   The protocol makes a reasonable attempt at avoiding Configuration
+   Option negotiation loops.  However, the protocol does NOT guarantee
+   that loops will not happen.  As with any negotiation, it is possible
+   to configure two PPP implementations with conflicting policies that
+   will never converge.  It is also possible to configure policies which
+   do converge, but which take significant time to do so.  Implementors
+   should keep this in mind and SHOULD implement loop detection
+   mechanisms or higher level timeouts.
+
+
+
+
+
+Simpson                                                        [Page 23]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+4.6.  Counters and Timers
+
+   Restart Timer
+
+      There is one special timer used by the automaton.  The Restart
+      timer is used to time transmissions of Configure-Request and
+      Terminate-Request packets.  Expiration of the Restart timer causes
+      a Timeout event, and retransmission of the corresponding
+      Configure-Request or Terminate-Request packet.  The Restart timer
+      MUST be configurable, but SHOULD default to three (3) seconds.
+
+      Implementation Note:
+
+         The Restart timer SHOULD be based on the speed of the link.
+         The default value is designed for low speed (2,400 to 9,600
+         bps), high switching latency links (typical telephone lines).
+         Higher speed links, or links with low switching latency, SHOULD
+         have correspondingly faster retransmission times.
+
+         Instead of a constant value, the Restart timer MAY begin at an
+         initial small value and increase to the configured final value.
+         Each successive value less than the final value SHOULD be at
+         least twice the previous value.  The initial value SHOULD be
+         large enough to account for the size of the packets, twice the
+         round trip time for transmission at the link speed, and at
+         least an additional 100 milliseconds to allow the peer to
+         process the packets before responding.  Some circuits add
+         another 200 milliseconds of satellite delay.  Round trip times
+         for modems operating at 14,400 bps have been measured in the
+         range of 160 to more than 600 milliseconds.
+
+   Max-Terminate
+
+      There is one required restart counter for Terminate-Requests.
+      Max-Terminate indicates the number of Terminate-Request packets
+      sent without receiving a Terminate-Ack before assuming that the
+      peer is unable to respond.  Max-Terminate MUST be configurable,
+      but SHOULD default to two (2) transmissions.
+
+   Max-Configure
+
+      A similar counter is recommended for Configure-Requests.  Max-
+      Configure indicates the number of Configure-Request packets sent
+      without receiving a valid Configure-Ack, Configure-Nak or
+      Configure-Reject before assuming that the peer is unable to
+      respond.  Max-Configure MUST be configurable, but SHOULD default
+      to ten (10) transmissions.
+
+
+
+
+Simpson                                                        [Page 24]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+   Max-Failure
+
+      A related counter is recommended for Configure-Nak.  Max-Failure
+      indicates the number of Configure-Nak packets sent without sending
+      a Configure-Ack before assuming that configuration is not
+      converging.  Any further Configure-Nak packets for peer requested
+      options are converted to Configure-Reject packets, and locally
+      desired options are no longer appended.  Max-Failure MUST be
+      configurable, but SHOULD default to five (5) transmissions.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Simpson                                                        [Page 25]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+5.  LCP Packet Formats
+
+   There are three classes of LCP packets:
+
+      1. Link Configuration packets used to establish and configure a
+         link (Configure-Request, Configure-Ack, Configure-Nak and
+         Configure-Reject).
+
+      2. Link Termination packets used to terminate a link (Terminate-
+         Request and Terminate-Ack).
+
+      3. Link Maintenance packets used to manage and debug a link
+         (Code-Reject, Protocol-Reject, Echo-Request, Echo-Reply, and
+         Discard-Request).
+
+   In the interest of simplicity, there is no version field in the LCP
+   packet.  A correctly functioning LCP implementation will always
+   respond to unknown Protocols and Codes with an easily recognizable
+   LCP packet, thus providing a deterministic fallback mechanism for
+   implementations of other versions.
+
+   Regardless of which Configuration Options are enabled, all LCP Link
+   Configuration, Link Termination, and Code-Reject packets (codes 1
+   through 7) are always sent as if no Configuration Options were
+   negotiated.  In particular, each Configuration Option specifies a
+   default value.  This ensures that such LCP packets are always
+   recognizable, even when one end of the link mistakenly believes the
+   link to be open.
+
+   Exactly one LCP packet is encapsulated in the PPP Information field,
+   where the PPP Protocol field indicates type hex c021 (Link Control
+   Protocol).
+
+   A summary of the Link Control Protocol packet format is shown below.
+   The fields are transmitted from left to right.
+
+    0                   1                   2                   3
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |     Code      |  Identifier   |            Length             |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |    Data ...
+   +-+-+-+-+
+
+
+   Code
+
+      The Code field is one octet, and identifies the kind of LCP
+
+
+
+Simpson                                                        [Page 26]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+      packet.  When a packet is received with an unknown Code field, a
+      Code-Reject packet is transmitted.
+
+      Up-to-date values of the LCP Code field are specified in the most
+      recent "Assigned Numbers" RFC [2].  This document concerns the
+      following values:
+
+         1       Configure-Request
+         2       Configure-Ack
+         3       Configure-Nak
+         4       Configure-Reject
+         5       Terminate-Request
+         6       Terminate-Ack
+         7       Code-Reject
+         8       Protocol-Reject
+         9       Echo-Request
+         10      Echo-Reply
+         11      Discard-Request
+
+
+   Identifier
+
+      The Identifier field is one octet, and aids in matching requests
+      and replies.  When a packet is received with an invalid Identifier
+      field, the packet is silently discarded without affecting the
+      automaton.
+
+   Length
+
+      The Length field is two octets, and indicates the length of the
+      LCP packet, including the Code, Identifier, Length and Data
+      fields.  The Length MUST NOT exceed the MRU of the link.
+
+      Octets outside the range of the Length field are treated as
+      padding and are ignored on reception.  When a packet is received
+      with an invalid Length field, the packet is silently discarded
+      without affecting the automaton.
+
+   Data
+
+      The Data field is zero or more octets, as indicated by the Length
+      field.  The format of the Data field is determined by the Code
+      field.
+
+
+
+
+
+
+
+
+Simpson                                                        [Page 27]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+5.1.  Configure-Request
+
+   Description
+
+      An implementation wishing to open a connection MUST transmit a
+      Configure-Request.  The Options field is filled with any desired
+      changes to the link defaults.  Configuration Options SHOULD NOT be
+      included with default values.
+
+      Upon reception of a Configure-Request, an appropriate reply MUST
+      be transmitted.
+
+   A summary of the Configure-Request packet format is shown below.  The
+   fields are transmitted from left to right.
+
+    0                   1                   2                   3
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |     Code      |  Identifier   |            Length             |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   | Options ...
+   +-+-+-+-+
+
+
+   Code
+
+      1 for Configure-Request.
+
+   Identifier
+
+      The Identifier field MUST be changed whenever the contents of the
+      Options field changes, and whenever a valid reply has been
+      received for a previous request.  For retransmissions, the
+      Identifier MAY remain unchanged.
+
+   Options
+
+      The options field is variable in length, and contains the list of
+      zero or more Configuration Options that the sender desires to
+      negotiate.  All Configuration Options are always negotiated
+      simultaneously.  The format of Configuration Options is further
+      described in a later chapter.
+
+
+
+
+
+
+
+
+
+Simpson                                                        [Page 28]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+5.2.  Configure-Ack
+
+   Description
+
+      If every Configuration Option received in a Configure-Request is
+      recognizable and all values are acceptable, then the
+      implementation MUST transmit a Configure-Ack.  The acknowledged
+      Configuration Options MUST NOT be reordered or modified in any
+      way.
+
+      On reception of a Configure-Ack, the Identifier field MUST match
+      that of the last transmitted Configure-Request.  Additionally, the
+      Configuration Options in a Configure-Ack MUST exactly match those
+      of the last transmitted Configure-Request.  Invalid packets are
+      silently discarded.
+
+   A summary of the Configure-Ack packet format is shown below.  The
+   fields are transmitted from left to right.
+
+    0                   1                   2                   3
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |     Code      |  Identifier   |            Length             |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   | Options ...
+   +-+-+-+-+
+
+
+   Code
+
+      2 for Configure-Ack.
+
+   Identifier
+
+      The Identifier field is a copy of the Identifier field of the
+      Configure-Request which caused this Configure-Ack.
+
+   Options
+
+      The Options field is variable in length, and contains the list of
+      zero or more Configuration Options that the sender is
+      acknowledging.  All Configuration Options are always acknowledged
+      simultaneously.
+
+
+
+
+
+
+
+
+Simpson                                                        [Page 29]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+5.3.  Configure-Nak
+
+   Description
+
+      If every instance of the received Configuration Options is
+      recognizable, but some values are not acceptable, then the
+      implementation MUST transmit a Configure-Nak.  The Options field
+      is filled with only the unacceptable Configuration Options from
+      the Configure-Request.  All acceptable Configuration Options are
+      filtered out of the Configure-Nak, but otherwise the Configuration
+      Options from the Configure-Request MUST NOT be reordered.
+
+      Options which have no value fields (boolean options) MUST use the
+      Configure-Reject reply instead.
+
+      Each Configuration Option which is allowed only a single instance
+      MUST be modified to a value acceptable to the Configure-Nak
+      sender.  The default value MAY be used, when this differs from the
+      requested value.
+
+      When a particular type of Configuration Option can be listed more
+      than once with different values, the Configure-Nak MUST include a
+      list of all values for that option which are acceptable to the
+      Configure-Nak sender.  This includes acceptable values that were
+      present in the Configure-Request.
+
+      Finally, an implementation may be configured to request the
+      negotiation of a specific Configuration Option.  If that option is
+      not listed, then that option MAY be appended to the list of Nak'd
+      Configuration Options, in order to prompt the peer to include that
+      option in its next Configure-Request packet.  Any value fields for
+      the option MUST indicate values acceptable to the Configure-Nak
+      sender.
+
+      On reception of a Configure-Nak, the Identifier field MUST match
+      that of the last transmitted Configure-Request.  Invalid packets
+      are silently discarded.
+
+      Reception of a valid Configure-Nak indicates that when a new
+      Configure-Request is sent, the Configuration Options MAY be
+      modified as specified in the Configure-Nak.  When multiple
+      instances of a Configuration Option are present, the peer SHOULD
+      select a single value to include in its next Configure-Request
+      packet.
+
+      Some Configuration Options have a variable length.  Since the
+      Nak'd Option has been modified by the peer, the implementation
+      MUST be able to handle an Option length which is different from
+
+
+
+Simpson                                                        [Page 30]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+      the original Configure-Request.
+
+   A summary of the Configure-Nak packet format is shown below.  The
+   fields are transmitted from left to right.
+
+    0                   1                   2                   3
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |     Code      |  Identifier   |            Length             |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   | Options ...
+   +-+-+-+-+
+
+
+   Code
+
+      3 for Configure-Nak.
+
+   Identifier
+
+      The Identifier field is a copy of the Identifier field of the
+      Configure-Request which caused this Configure-Nak.
+
+   Options
+
+      The Options field is variable in length, and contains the list of
+      zero or more Configuration Options that the sender is Nak'ing.
+      All Configuration Options are always Nak'd simultaneously.
+
+
+
+5.4.  Configure-Reject
+
+   Description
+
+      If some Configuration Options received in a Configure-Request are
+      not recognizable or are not acceptable for negotiation (as
+      configured by a network administrator), then the implementation
+      MUST transmit a Configure-Reject.  The Options field is filled
+      with only the unacceptable Configuration Options from the
+      Configure-Request.  All recognizable and negotiable Configuration
+      Options are filtered out of the Configure-Reject, but otherwise
+      the Configuration Options MUST NOT be reordered or modified in any
+      way.
+
+      On reception of a Configure-Reject, the Identifier field MUST
+      match that of the last transmitted Configure-Request.
+      Additionally, the Configuration Options in a Configure-Reject MUST
+
+
+
+Simpson                                                        [Page 31]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+      be a proper subset of those in the last transmitted Configure-
+      Request.  Invalid packets are silently discarded.
+
+      Reception of a valid Configure-Reject indicates that when a new
+      Configure-Request is sent, it MUST NOT include any of the
+      Configuration Options listed in the Configure-Reject.
+
+   A summary of the Configure-Reject packet format is shown below.  The
+   fields are transmitted from left to right.
+
+    0                   1                   2                   3
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |     Code      |  Identifier   |            Length             |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   | Options ...
+   +-+-+-+-+
+
+
+   Code
+
+      4 for Configure-Reject.
+
+   Identifier
+
+      The Identifier field is a copy of the Identifier field of the
+      Configure-Request which caused this Configure-Reject.
+
+   Options
+
+      The Options field is variable in length, and contains the list of
+      zero or more Configuration Options that the sender is rejecting.
+      All Configuration Options are always rejected simultaneously.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Simpson                                                        [Page 32]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+5.5.  Terminate-Request and Terminate-Ack
+
+   Description
+
+      LCP includes Terminate-Request and Terminate-Ack Codes in order to
+      provide a mechanism for closing a connection.
+
+      An implementation wishing to close a connection SHOULD transmit a
+      Terminate-Request.  Terminate-Request packets SHOULD continue to
+      be sent until Terminate-Ack is received, the lower layer indicates
+      that it has gone down, or a sufficiently large number have been
+      transmitted such that the peer is down with reasonable certainty.
+
+      Upon reception of a Terminate-Request, a Terminate-Ack MUST be
+      transmitted.
+
+      Reception of an unelicited Terminate-Ack indicates that the peer
+      is in the Closed or Stopped states, or is otherwise in need of
+      re-negotiation.
+
+   A summary of the Terminate-Request and Terminate-Ack packet formats
+   is shown below.  The fields are transmitted from left to right.
+
+    0                   1                   2                   3
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |     Code      |  Identifier   |            Length             |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |    Data ...
+   +-+-+-+-+
+
+
+   Code
+
+      5 for Terminate-Request;
+
+      6 for Terminate-Ack.
+
+   Identifier
+
+      On transmission, the Identifier field MUST be changed whenever the
+      content of the Data field changes, and whenever a valid reply has
+      been received for a previous request.  For retransmissions, the
+      Identifier MAY remain unchanged.
+
+      On reception, the Identifier field of the Terminate-Request is
+      copied into the Identifier field of the Terminate-Ack packet.
+
+
+
+
+Simpson                                                        [Page 33]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+   Data
+
+      The Data field is zero or more octets, and contains uninterpreted
+      data for use by the sender.  The data may consist of any binary
+      value.  The end of the field is indicated by the Length.
+
+
+
+5.6.  Code-Reject
+
+   Description
+
+      Reception of a LCP packet with an unknown Code indicates that the
+      peer is operating with a different version.  This MUST be reported
+      back to the sender of the unknown Code by transmitting a Code-
+      Reject.
+
+      Upon reception of the Code-Reject of a code which is fundamental
+      to this version of the protocol, the implementation SHOULD report
+      the problem and drop the connection, since it is unlikely that the
+      situation can be rectified automatically.
+
+   A summary of the Code-Reject packet format is shown below.  The
+   fields are transmitted from left to right.
+
+    0                   1                   2                   3
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |     Code      |  Identifier   |            Length             |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   | Rejected-Packet ...
+   +-+-+-+-+-+-+-+-+
+
+
+   Code
+
+      7 for Code-Reject.
+
+   Identifier
+
+      The Identifier field MUST be changed for each Code-Reject sent.
+
+   Rejected-Packet
+
+      The Rejected-Packet field contains a copy of the LCP packet which
+      is being rejected.  It begins with the Information field, and does
+      not include any Data Link Layer headers nor an FCS.  The
+      Rejected-Packet MUST be truncated to comply with the peer's
+
+
+
+Simpson                                                        [Page 34]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+      established MRU.
+
+
+
+5.7.  Protocol-Reject
+
+   Description
+
+      Reception of a PPP packet with an unknown Protocol field indicates
+      that the peer is attempting to use a protocol which is
+      unsupported.  This usually occurs when the peer attempts to
+      configure a new protocol.  If the LCP automaton is in the Opened
+      state, then this MUST be reported back to the peer by transmitting
+      a Protocol-Reject.
+
+      Upon reception of a Protocol-Reject, the implementation MUST stop
+      sending packets of the indicated protocol at the earliest
+      opportunity.
+
+      Protocol-Reject packets can only be sent in the LCP Opened state.
+      Protocol-Reject packets received in any state other than the LCP
+      Opened state SHOULD be silently discarded.
+
+   A summary of the Protocol-Reject packet format is shown below.  The
+   fields are transmitted from left to right.
+
+    0                   1                   2                   3
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |     Code      |  Identifier   |            Length             |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |       Rejected-Protocol       |      Rejected-Information ...
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+   Code
+
+      8 for Protocol-Reject.
+
+   Identifier
+
+      The Identifier field MUST be changed for each Protocol-Reject
+      sent.
+
+   Rejected-Protocol
+
+      The Rejected-Protocol field is two octets, and contains the PPP
+      Protocol field of the packet which is being rejected.
+
+
+
+Simpson                                                        [Page 35]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+   Rejected-Information
+
+      The Rejected-Information field contains a copy of the packet which
+      is being rejected.  It begins with the Information field, and does
+      not include any Data Link Layer headers nor an FCS.  The
+      Rejected-Information MUST be truncated to comply with the peer's
+      established MRU.
+
+
+
+5.8.  Echo-Request and Echo-Reply
+
+   Description
+
+      LCP includes Echo-Request and Echo-Reply Codes in order to provide
+      a Data Link Layer loopback mechanism for use in exercising both
+      directions of the link.  This is useful as an aid in debugging,
+      link quality determination, performance testing, and for numerous
+      other functions.
+
+      Upon reception of an Echo-Request in the LCP Opened state, an
+      Echo-Reply MUST be transmitted.
+
+      Echo-Request and Echo-Reply packets MUST only be sent in the LCP
+      Opened state.  Echo-Request and Echo-Reply packets received in any
+      state other than the LCP Opened state SHOULD be silently
+      discarded.
+
+
+   A summary of the Echo-Request and Echo-Reply packet formats is shown
+   below.  The fields are transmitted from left to right.
+
+    0                   1                   2                   3
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |     Code      |  Identifier   |            Length             |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                         Magic-Number                          |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |    Data ...
+   +-+-+-+-+
+
+
+   Code
+
+      9 for Echo-Request;
+
+      10 for Echo-Reply.
+
+
+
+Simpson                                                        [Page 36]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+   Identifier
+
+      On transmission, the Identifier field MUST be changed whenever the
+      content of the Data field changes, and whenever a valid reply has
+      been received for a previous request.  For retransmissions, the
+      Identifier MAY remain unchanged.
+
+      On reception, the Identifier field of the Echo-Request is copied
+      into the Identifier field of the Echo-Reply packet.
+
+   Magic-Number
+
+      The Magic-Number field is four octets, and aids in detecting links
+      which are in the looped-back condition.  Until the Magic-Number
+      Configuration Option has been successfully negotiated, the Magic-
+      Number MUST be transmitted as zero.  See the Magic-Number
+      Configuration Option for further explanation.
+
+   Data
+
+      The Data field is zero or more octets, and contains uninterpreted
+      data for use by the sender.  The data may consist of any binary
+      value.  The end of the field is indicated by the Length.
+
+
+
+5.9.  Discard-Request
+
+   Description
+
+      LCP includes a Discard-Request Code in order to provide a Data
+      Link Layer sink mechanism for use in exercising the local to
+      remote direction of the link.  This is useful as an aid in
+      debugging, performance testing, and for numerous other functions.
+
+      Discard-Request packets MUST only be sent in the LCP Opened state.
+      On reception, the receiver MUST silently discard any Discard-
+      Request that it receives.
+
+
+
+
+
+
+
+
+
+
+
+
+
+Simpson                                                        [Page 37]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+   A summary of the Discard-Request packet format is shown below.  The
+   fields are transmitted from left to right.
+
+    0                   1                   2                   3
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |     Code      |  Identifier   |            Length             |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                         Magic-Number                          |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |    Data ...
+   +-+-+-+-+
+
+   Code
+
+      11 for Discard-Request.
+
+   Identifier
+
+      The Identifier field MUST be changed for each Discard-Request
+      sent.
+
+   Magic-Number
+
+      The Magic-Number field is four octets, and aids in detecting links
+      which are in the looped-back condition.  Until the Magic-Number
+      Configuration Option has been successfully negotiated, the Magic-
+      Number MUST be transmitted as zero.  See the Magic-Number
+      Configuration Option for further explanation.
+
+   Data
+
+      The Data field is zero or more octets, and contains uninterpreted
+      data for use by the sender.  The data may consist of any binary
+      value.  The end of the field is indicated by the Length.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Simpson                                                        [Page 38]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+6.  LCP Configuration Options
+
+   LCP Configuration Options allow negotiation of modifications to the
+   default characteristics of a point-to-point link.  If a Configuration
+   Option is not included in a Configure-Request packet, the default
+   value for that Configuration Option is assumed.
+
+   Some Configuration Options MAY be listed more than once.  The effect
+   of this is Configuration Option specific, and is specified by each
+   such Configuration Option description.  (None of the Configuration
+   Options in this specification can be listed more than once.)
+
+   The end of the list of Configuration Options is indicated by the
+   Length field of the LCP packet.
+
+   Unless otherwise specified, all Configuration Options apply in a
+   half-duplex fashion; typically, in the receive direction of the link
+   from the point of view of the Configure-Request sender.
+
+   Design Philosophy
+
+      The options indicate additional capabilities or requirements of
+      the implementation that is requesting the option.  An
+      implementation which does not understand any option SHOULD
+      interoperate with one which implements every option.
+
+      A default is specified for each option which allows the link to
+      correctly function without negotiation of the option, although
+      perhaps with less than optimal performance.
+
+      Except where explicitly specified, acknowledgement of an option
+      does not require the peer to take any additional action other than
+      the default.
+
+      It is not necessary to send the default values for the options in
+      a Configure-Request.
+
+
+   A summary of the Configuration Option format is shown below.  The
+   fields are transmitted from left to right.
+
+    0                   1
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |     Type      |    Length     |    Data ...
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+
+
+Simpson                                                        [Page 39]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+   Type
+
+      The Type field is one octet, and indicates the type of
+      Configuration Option.  Up-to-date values of the LCP Option Type
+      field are specified in the most recent "Assigned Numbers" RFC [2].
+      This document concerns the following values:
+
+         0       RESERVED
+         1       Maximum-Receive-Unit
+         3       Authentication-Protocol
+         4       Quality-Protocol
+         5       Magic-Number
+         7       Protocol-Field-Compression
+         8       Address-and-Control-Field-Compression
+
+
+   Length
+
+      The Length field is one octet, and indicates the length of this
+      Configuration Option including the Type, Length and Data fields.
+
+      If a negotiable Configuration Option is received in a Configure-
+      Request, but with an invalid or unrecognized Length, a Configure-
+      Nak SHOULD be transmitted which includes the desired Configuration
+      Option with an appropriate Length and Data.
+
+   Data
+
+      The Data field is zero or more octets, and contains information
+      specific to the Configuration Option.  The format and length of
+      the Data field is determined by the Type and Length fields.
+
+      When the Data field is indicated by the Length to extend beyond
+      the end of the Information field, the entire packet is silently
+      discarded without affecting the automaton.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Simpson                                                        [Page 40]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+6.1.  Maximum-Receive-Unit (MRU)
+
+   Description
+
+      This Configuration Option may be sent to inform the peer that the
+      implementation can receive larger packets, or to request that the
+      peer send smaller packets.
+
+      The default value is 1500 octets.  If smaller packets are
+      requested, an implementation MUST still be able to receive the
+      full 1500 octet information field in case link synchronization is
+      lost.
+
+      Implementation Note:
+
+         This option is used to indicate an implementation capability.
+         The peer is not required to maximize the use of the capacity.
+         For example, when a MRU is indicated which is 2048 octets, the
+         peer is not required to send any packet with 2048 octets.  The
+         peer need not Configure-Nak to indicate that it will only send
+         smaller packets, since the implementation will always require
+         support for at least 1500 octets.
+
+   A summary of the Maximum-Receive-Unit Configuration Option format is
+   shown below.  The fields are transmitted from left to right.
+
+    0                   1                   2                   3
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |     Type      |    Length     |      Maximum-Receive-Unit     |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+   Type
+
+      1
+
+   Length
+
+      4
+
+   Maximum-Receive-Unit
+
+      The Maximum-Receive-Unit field is two octets, and specifies the
+      maximum number of octets in the Information and Padding fields.
+      It does not include the framing, Protocol field, FCS, nor any
+      transparency bits or bytes.
+
+
+
+
+Simpson                                                        [Page 41]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+6.2.  Authentication-Protocol
+
+   Description
+
+      On some links it may be desirable to require a peer to
+      authenticate itself before allowing network-layer protocol packets
+      to be exchanged.
+
+      This Configuration Option provides a method to negotiate the use
+      of a specific protocol for authentication.  By default,
+      authentication is not required.
+
+      An implementation MUST NOT include multiple Authentication-
+      Protocol Configuration Options in its Configure-Request packets.
+      Instead, it SHOULD attempt to configure the most desirable
+      protocol first.  If that protocol is Configure-Nak'd, then the
+      implementation SHOULD attempt the next most desirable protocol in
+      the next Configure-Request.
+
+      The implementation sending the Configure-Request is indicating
+      that it expects authentication from its peer.  If an
+      implementation sends a Configure-Ack, then it is agreeing to
+      authenticate with the specified protocol.  An implementation
+      receiving a Configure-Ack SHOULD expect the peer to authenticate
+      with the acknowledged protocol.
+
+      There is no requirement that authentication be full-duplex or that
+      the same protocol be used in both directions.  It is perfectly
+      acceptable for different protocols to be used in each direction.
+      This will, of course, depend on the specific protocols negotiated.
+
+   A summary of the Authentication-Protocol Configuration Option format
+   is shown below.  The fields are transmitted from left to right.
+
+    0                   1                   2                   3
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |     Type      |    Length     |     Authentication-Protocol   |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |    Data ...
+   +-+-+-+-+
+
+
+   Type
+
+      3
+
+
+
+
+
+Simpson                                                        [Page 42]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+   Length
+
+      >= 4
+
+   Authentication-Protocol
+
+      The Authentication-Protocol field is two octets, and indicates the
+      authentication protocol desired.  Values for this field are always
+      the same as the PPP Protocol field values for that same
+      authentication protocol.
+
+      Up-to-date values of the Authentication-Protocol field are
+      specified in the most recent "Assigned Numbers" RFC [2].  Current
+      values are assigned as follows:
+
+      Value (in hex)  Protocol
+
+      c023            Password Authentication Protocol
+      c223            Challenge Handshake Authentication Protocol
+
+
+   Data
+
+      The Data field is zero or more octets, and contains additional
+      data as determined by the particular protocol.
+
+
+
+6.3.  Quality-Protocol
+
+   Description
+
+      On some links it may be desirable to determine when, and how
+      often, the link is dropping data.  This process is called link
+      quality monitoring.
+
+      This Configuration Option provides a method to negotiate the use
+      of a specific protocol for link quality monitoring.  By default,
+      link quality monitoring is disabled.
+
+      The implementation sending the Configure-Request is indicating
+      that it expects to receive monitoring information from its peer.
+      If an implementation sends a Configure-Ack, then it is agreeing to
+      send the specified protocol.  An implementation receiving a
+      Configure-Ack SHOULD expect the peer to send the acknowledged
+      protocol.
+
+      There is no requirement that quality monitoring be full-duplex or
+
+
+
+Simpson                                                        [Page 43]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+      that the same protocol be used in both directions.  It is
+      perfectly acceptable for different protocols to be used in each
+      direction.  This will, of course, depend on the specific protocols
+      negotiated.
+
+   A summary of the Quality-Protocol Configuration Option format is
+   shown below.  The fields are transmitted from left to right.
+
+    0                   1                   2                   3
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |     Type      |    Length     |        Quality-Protocol       |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |    Data ...
+   +-+-+-+-+
+
+
+   Type
+
+      4
+
+   Length
+
+      >= 4
+
+   Quality-Protocol
+
+      The Quality-Protocol field is two octets, and indicates the link
+      quality monitoring protocol desired.  Values for this field are
+      always the same as the PPP Protocol field values for that same
+      monitoring protocol.
+
+      Up-to-date values of the Quality-Protocol field are specified in
+      the most recent "Assigned Numbers" RFC [2].  Current values are
+      assigned as follows:
+
+      Value (in hex)  Protocol
+
+      c025            Link Quality Report
+
+
+   Data
+
+      The Data field is zero or more octets, and contains additional
+      data as determined by the particular protocol.
+
+
+
+
+
+
+Simpson                                                        [Page 44]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+6.4.  Magic-Number
+
+   Description
+
+      This Configuration Option provides a method to detect looped-back
+      links and other Data Link Layer anomalies.  This Configuration
+      Option MAY be required by some other Configuration Options such as
+      the Quality-Protocol Configuration Option.  By default, the
+      Magic-Number is not negotiated, and zero is inserted where a
+      Magic-Number might otherwise be used.
+
+      Before this Configuration Option is requested, an implementation
+      MUST choose its Magic-Number.  It is recommended that the Magic-
+      Number be chosen in the most random manner possible in order to
+      guarantee with very high probability that an implementation will
+      arrive at a unique number.  A good way to choose a unique random
+      number is to start with a unique seed.  Suggested sources of
+      uniqueness include machine serial numbers, other network hardware
+      addresses, time-of-day clocks, etc.  Particularly good random
+      number seeds are precise measurements of the inter-arrival time of
+      physical events such as packet reception on other connected
+      networks, server response time, or the typing rate of a human
+      user.  It is also suggested that as many sources as possible be
+      used simultaneously.
+
+      When a Configure-Request is received with a Magic-Number
+      Configuration Option, the received Magic-Number is compared with
+      the Magic-Number of the last Configure-Request sent to the peer.
+      If the two Magic-Numbers are different, then the link is not
+      looped-back, and the Magic-Number SHOULD be acknowledged.  If the
+      two Magic-Numbers are equal, then it is possible, but not certain,
+      that the link is looped-back and that this Configure-Request is
+      actually the one last sent.  To determine this, a Configure-Nak
+      MUST be sent specifying a different Magic-Number value.  A new
+      Configure-Request SHOULD NOT be sent to the peer until normal
+      processing would cause it to be sent (that is, until a Configure-
+      Nak is received or the Restart timer runs out).
+
+      Reception of a Configure-Nak with a Magic-Number different from
+      that of the last Configure-Nak sent to the peer proves that a link
+      is not looped-back, and indicates a unique Magic-Number.  If the
+      Magic-Number is equal to the one sent in the last Configure-Nak,
+      the possibility of a looped-back link is increased, and a new
+      Magic-Number MUST be chosen.  In either case, a new Configure-
+      Request SHOULD be sent with the new Magic-Number.
+
+      If the link is indeed looped-back, this sequence (transmit
+      Configure-Request, receive Configure-Request, transmit Configure-
+
+
+
+Simpson                                                        [Page 45]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+      Nak, receive Configure-Nak) will repeat over and over again.  If
+      the link is not looped-back, this sequence might occur a few
+      times, but it is extremely unlikely to occur repeatedly.  More
+      likely, the Magic-Numbers chosen at either end will quickly
+      diverge, terminating the sequence.  The following table shows the
+      probability of collisions assuming that both ends of the link
+      select Magic-Numbers with a perfectly uniform distribution:
+
+         Number of Collisions        Probability
+         --------------------   ---------------------
+                 1              1/2**32    = 2.3 E-10
+                 2              1/2**32**2 = 5.4 E-20
+                 3              1/2**32**3 = 1.3 E-29
+
+
+      Good sources of uniqueness or randomness are required for this
+      divergence to occur.  If a good source of uniqueness cannot be
+      found, it is recommended that this Configuration Option not be
+      enabled; Configure-Requests with the option SHOULD NOT be
+      transmitted and any Magic-Number Configuration Options which the
+      peer sends SHOULD be either acknowledged or rejected.  In this
+      case, looped-back links cannot be reliably detected by the
+      implementation, although they may still be detectable by the peer.
+
+      If an implementation does transmit a Configure-Request with a
+      Magic-Number Configuration Option, then it MUST NOT respond with a
+      Configure-Reject when it receives a Configure-Request with a
+      Magic-Number Configuration Option.  That is, if an implementation
+      desires to use Magic Numbers, then it MUST also allow its peer to
+      do so.  If an implementation does receive a Configure-Reject in
+      response to a Configure-Request, it can only mean that the link is
+      not looped-back, and that its peer will not be using Magic-
+      Numbers.  In this case, an implementation SHOULD act as if the
+      negotiation had been successful (as if it had instead received a
+      Configure-Ack).
+
+      The Magic-Number also may be used to detect looped-back links
+      during normal operation, as well as during Configuration Option
+      negotiation.  All LCP Echo-Request, Echo-Reply, and Discard-
+      Request packets have a Magic-Number field.  If Magic-Number has
+      been successfully negotiated, an implementation MUST transmit
+      these packets with the Magic-Number field set to its negotiated
+      Magic-Number.
+
+      The Magic-Number field of these packets SHOULD be inspected on
+      reception.  All received Magic-Number fields MUST be equal to
+      either zero or the peer's unique Magic-Number, depending on
+      whether or not the peer negotiated a Magic-Number.
+
+
+
+Simpson                                                        [Page 46]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+      Reception of a Magic-Number field equal to the negotiated local
+      Magic-Number indicates a looped-back link.  Reception of a Magic-
+      Number other than the negotiated local Magic-Number, the peer's
+      negotiated Magic-Number, or zero if the peer didn't negotiate one,
+      indicates a link which has been (mis)configured for communications
+      with a different peer.
+
+      Procedures for recovery from either case are unspecified, and may
+      vary from implementation to implementation.  A somewhat
+      pessimistic procedure is to assume a LCP Down event.  A further
+      Open event will begin the process of re-establishing the link,
+      which can't complete until the looped-back condition is
+      terminated, and Magic-Numbers are successfully negotiated.  A more
+      optimistic procedure (in the case of a looped-back link) is to
+      begin transmitting LCP Echo-Request packets until an appropriate
+      Echo-Reply is received, indicating a termination of the looped-
+      back condition.
+
+   A summary of the Magic-Number Configuration Option format is shown
+   below.  The fields are transmitted from left to right.
+
+    0                   1                   2                   3
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |     Type      |    Length     |          Magic-Number
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         Magic-Number (cont)       |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+   Type
+
+      5
+
+   Length
+
+      6
+
+   Magic-Number
+
+      The Magic-Number field is four octets, and indicates a number
+      which is very likely to be unique to one end of the link.  A
+      Magic-Number of zero is illegal and MUST always be Nak'd, if it is
+      not Rejected outright.
+
+
+
+
+
+
+
+Simpson                                                        [Page 47]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+6.5.  Protocol-Field-Compression (PFC)
+
+   Description
+
+      This Configuration Option provides a method to negotiate the
+      compression of the PPP Protocol field.  By default, all
+      implementations MUST transmit packets with two octet PPP Protocol
+      fields.
+
+      PPP Protocol field numbers are chosen such that some values may be
+      compressed into a single octet form which is clearly
+      distinguishable from the two octet form.  This Configuration
+      Option is sent to inform the peer that the implementation can
+      receive such single octet Protocol fields.
+
+      As previously mentioned, the Protocol field uses an extension
+      mechanism consistent with the ISO 3309 extension mechanism for the
+      Address field; the Least Significant Bit (LSB) of each octet is
+      used to indicate extension of the Protocol field.  A binary "0" as
+      the LSB indicates that the Protocol field continues with the
+      following octet.  The presence of a binary "1" as the LSB marks
+      the last octet of the Protocol field.  Notice that any number of
+      "0" octets may be prepended to the field, and will still indicate
+      the same value (consider the two binary representations for 3,
+      00000011 and 00000000 00000011).
+
+      When using low speed links, it is desirable to conserve bandwidth
+      by sending as little redundant data as possible.  The Protocol-
+      Field-Compression Configuration Option allows a trade-off between
+      implementation simplicity and bandwidth efficiency.  If
+      successfully negotiated, the ISO 3309 extension mechanism may be
+      used to compress the Protocol field to one octet instead of two.
+      The large majority of packets are compressible since data
+      protocols are typically assigned with Protocol field values less
+      than 256.
+
+      Compressed Protocol fields MUST NOT be transmitted unless this
+      Configuration Option has been negotiated.  When negotiated, PPP
+      implementations MUST accept PPP packets with either double-octet
+      or single-octet Protocol fields, and MUST NOT distinguish between
+      them.
+
+      The Protocol field is never compressed when sending any LCP
+      packet.  This rule guarantees unambiguous recognition of LCP
+      packets.
+
+      When a Protocol field is compressed, the Data Link Layer FCS field
+      is calculated on the compressed frame, not the original
+
+
+
+Simpson                                                        [Page 48]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+      uncompressed frame.
+
+   A summary of the Protocol-Field-Compression Configuration Option
+   format is shown below.  The fields are transmitted from left to
+   right.
+
+    0                   1
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |     Type      |    Length     |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+   Type
+
+      7
+
+   Length
+
+      2
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Simpson                                                        [Page 49]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+6.6.  Address-and-Control-Field-Compression (ACFC)
+
+   Description
+
+      This Configuration Option provides a method to negotiate the
+      compression of the Data Link Layer Address and Control fields.  By
+      default, all implementations MUST transmit frames with Address and
+      Control fields appropriate to the link framing.
+
+      Since these fields usually have constant values for point-to-point
+      links, they are easily compressed.  This Configuration Option is
+      sent to inform the peer that the implementation can receive
+      compressed Address and Control fields.
+
+      If a compressed frame is received when Address-and-Control-Field-
+      Compression has not been negotiated, the implementation MAY
+      silently discard the frame.
+
+      The Address and Control fields MUST NOT be compressed when sending
+      any LCP packet.  This rule guarantees unambiguous recognition of
+      LCP packets.
+
+      When the Address and Control fields are compressed, the Data Link
+      Layer FCS field is calculated on the compressed frame, not the
+      original uncompressed frame.
+
+   A summary of the Address-and-Control-Field-Compression configuration
+   option format is shown below.  The fields are transmitted from left
+   to right.
+
+    0                   1
+    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |     Type      |    Length     |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+   Type
+
+      8
+
+   Length
+
+      2
+
+
+
+
+
+
+
+Simpson                                                        [Page 50]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+Security Considerations
+
+   Security issues are briefly discussed in sections concerning the
+   Authentication Phase, the Close event, and the Authentication-
+   Protocol Configuration Option.
+
+
+
+References
+
+   [1]   Perkins, D., "Requirements for an Internet Standard Point-to-
+         Point Protocol", RFC 1547, Carnegie Mellon University,
+         December 1993.
+
+   [2]   Reynolds, J., and Postel, J., "Assigned Numbers", STD 2, RFC
+         1340, USC/Information Sciences Institute, July 1992.
+
+
+Acknowledgements
+
+   This document is the product of the Point-to-Point Protocol Working
+   Group of the Internet Engineering Task Force (IETF).  Comments should
+   be submitted to the ietf-ppp@merit.edu mailing list.
+
+   Much of the text in this document is taken from the working group
+   requirements [1]; and RFCs 1171 & 1172, by Drew Perkins while at
+   Carnegie Mellon University, and by Russ Hobby of the University of
+   California at Davis.
+
+   William Simpson was principally responsible for introducing
+   consistent terminology and philosophy, and the re-design of the phase
+   and negotiation state machines.
+
+   Many people spent significant time helping to develop the Point-to-
+   Point Protocol.  The complete list of people is too numerous to list,
+   but the following people deserve special thanks: Rick Adams, Ken
+   Adelman, Fred Baker, Mike Ballard, Craig Fox, Karl Fox, Phill Gross,
+   Kory Hamzeh, former WG chair Russ Hobby, David Kaufman, former WG
+   chair Steve Knowles, Mark Lewis, former WG chair Brian Lloyd, John
+   LoVerso, Bill Melohn, Mike Patton, former WG chair Drew Perkins, Greg
+   Satz, John Shriver, Vernon Schryver, and Asher Waldfogel.
+
+   Special thanks to Morning Star Technologies for providing computing
+   resources and network access support for writing this specification.
+
+
+
+
+
+
+
+Simpson                                                        [Page 51]
+RFC 1661                Point-to-Point Protocol                July 1994
+
+
+Chair's Address
+
+   The working group can be contacted via the current chair:
+
+      Fred Baker
+      Advanced Computer Communications
+      315 Bollay Drive
+      Santa Barbara, California  93117
+
+      fbaker@acc.com
+
+
+
+Editor's Address
+
+   Questions about this memo can also be directed to:
+
+      William Allen Simpson
+      Daydreamer
+      Computer Systems Consulting Services
+      1384 Fontaine
+      Madison Heights, Michigan  48071
+
+      Bill.Simpson@um.cc.umich.edu
+          bsimpson@MorningStar.com
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Simpson                                                        [Page 52]
+
+
diff --git a/ext/picotcp/RFC/rfc1693.txt b/ext/picotcp/RFC/rfc1693.txt
new file mode 100644
index 0000000..0ee3e3f
--- /dev/null
+++ b/ext/picotcp/RFC/rfc1693.txt
@@ -0,0 +1,2019 @@
+
+
+
+
+
+
+Network Working Group                                       T.  Connolly
+Request for Comments: 1693                                       P. Amer
+Category: Experimental                                         P. Conrad
+                                                  University of Delaware
+                                                           November 1994
+
+
+              An Extension to TCP : Partial Order Service
+
+Status of This Memo
+
+   This memo defines an Experimental Protocol for the Internet
+   community.  This memo does not specify an Internet standard of any
+   kind.  Discussion and suggestions for improvement are requested.
+   Distribution of this memo is unlimited
+
+IESG Note:
+
+   Note that the work contained in this memo does not describe an
+   Internet standard.  The Transport AD and Transport Directorate do not
+   recommend the implementation of the TCP modifications described.
+   However, outside the context of TCP, we find that the memo offers a
+   useful analysis of how misordered and incomplete data may be handled.
+   See, for example, the discussion of Application Layer Framing by D.
+   Clark and D. Tennenhouse in, "Architectural Considerations for a New
+   Generation of Protocols", SIGCOM 90 Proceedings, ACM, September 1990.
+
+Abstract
+
+   This RFC introduces a new transport mechanism for TCP based upon
+   partial ordering.  The aim is to present the concepts of partial
+   ordering and promote discussions on its usefulness in network
+   communications.  Distribution of this memo is unlimited.
+
+Introduction
+
+   A service which allows partial order delivery and partial reliability
+   is one which requires some, but not all objects to be received in the
+   order transmitted while also allowing objects to be transmitted
+   unreliably (i.e., some may be lost).
+
+   The realization of such a service requires, (1) communication and/or
+   negotiation of what constitutes a valid ordering and/or loss-level,
+   and (2) an algorithm which enables the receiver to ascertain the
+   deliverability of objects as they arrive.  These issues are addressed
+   here - both conceptually and formally - summarizing the results of
+   research and initial implementation efforts.
+
+
+
+
+Connolly, Amer & Conrad                                         [Page 1]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   The authors envision the use of a partial order service within a
+   connection-oriented, transport protocol such as TCP providing a
+   further level of granularity to the transport user in terms of the
+   type and quality of offered service.  This RFC focuses specifically
+   on extending TCP to provide partial order connections.
+
+   The idea of a partial order service is not limited to TCP. It may be
+   considered a useful option for any transport protocol and we
+   encourage researchers and practitioners to investigate further the
+   most effective uses for partial ordering whether in a next-generation
+   TCP, or another general purpose protocol such as XTP, or perhaps
+   within a "special purpose" protocol tailored to a specific
+   application and network profile.
+
+   Finally, while the crux of this RFC relates to and introduces a new
+   way of considering object ordering, a number of other classic
+   transport mechanisms are also seen in a new light - among these are
+   reliability, window management and data acknowledgments.
+
+   Keywords: partial order, quality of service, reliability, multimedia,
+   client/server database, Windows, transport protocol
+
+Table of Contents
+
+   1. Introduction and motivation ..................................  3
+   2. Partial Order Delivery .......................................  4
+   2.1 Example 1: Remote Database ..................................  4
+   2.2 Example 2: Multimedia .......................................  8
+   2.3 Example 3: Windows Screen Refresh ...........................  9
+   2.4 Potential Savings ........................................... 10
+   3. Reliability vs. Order ........................................ 12
+   3.1 Reliability Classes ......................................... 13
+   4. Partial Order Connection ..................................... 15
+   4.1 Connection Establishment .................................... 16
+   4.2 Data Transmission ........................................... 19
+   4.2.1 Sender .................................................... 22
+   4.2.2 Receiver .................................................. 25
+   5. Quantifying and Comparing Partial Order Services ............. 30
+   6. Future Direction ............................................. 31
+   7. Summary ...................................................... 32
+   8. References ................................................... 34
+   Security Considerations ......................................... 35
+   Authors' Addresses .............................................. 36
+
+
+
+
+
+
+
+
+Connolly, Amer & Conrad                                         [Page 2]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+1. Introduction and motivation
+
+   Current applications that need to communicate objects (i.e., octets,
+   packets, frames, protocol data units) usually choose between a fully
+   ordered service such as that currently provided by TCP and one that
+   does not guarantee any ordering such as that provided by UDP.  A
+   similar "all-or-nothing" choice is made for object reliability -
+   reliable connections which guarantee all objects will be delivered
+   verses unreliable data transport which makes no guarantee.  What is
+   more appropriate for some applications is a partial order and/or
+   partial reliability service where a subset of objects being
+   communicated must arrive in the order transmitted, yet some objects
+   may arrive in a different order, and some (well specified subset) of
+   the objects may not arrive at all.
+
+   One motivating application for a partial order service is the
+   emerging area of multimedia communications.  Multimedia traffic is
+   often characterized either by periodic, synchronized parallel streams
+   of information (e.g., combined audio-video), or by structured image
+   streams (e.g., displays of multiple overlapping and nonoverlapping
+   windows).  These applications have a high degree of tolerance for
+   less-than-fully-ordered data transport as well as data loss.  Thus
+   they are ideal candidates for using a partial order, partial
+   reliability service.  In general, any application which communicates
+   parallel and/or independent data structures may potentially be able
+   to profit from a partial order service.
+
+   A second application that could benefit from a partial order service
+   involves remote or distributed databases.  Imagine the case where a
+   database user transmitting queries to a remote server expects objects
+   (or records) to be returned in some order, although not necessarily
+   total order.  For example a user writing an SQL data query might
+   specify this with the "order by" clause.  There exist today a great
+   number of commercial implementations of distributed databases which
+   utilize - and thus are penalized by - an ordered delivery service.
+
+   Currently these applications must use and pay for a fully
+   ordered/fully reliable service even though they do not need it.  The
+   introduction of partial services allows applications to lower the
+   demanded quality of service (QOS) of the communication assuming that
+   such a service is more efficient and less costly.  In effect, a
+   partial order extends the service level from two extremes - ordered
+   and unordered - to a range of discreet values encompassing both of
+   the extremes and all possible partial orderings in between.  A
+   similar phenomenon is demonstrated in the area of reliability.
+
+
+
+
+
+
+Connolly, Amer & Conrad                                         [Page 3]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   It is worth mentioning that a TCP implementation providing a partial
+   order service, as described here, would be able to communicate with a
+   non-partial order implementation simply by recognizing this fact at
+   connection establishment - hence this extension is backward
+   compatible with earlier versions of TCP.  Furthermore, it is
+   conceivable for a host to support the sending-half (or receiving-
+   half) of a partial order connection alone to reduce the size of the
+   TCP as well as the effort involved in the implementation.  Similar
+   "levels of conformance" have been proposed in other internet
+   extensions such as [Dee89] involving IP multicasting.
+
+   This RFC proceeds as follows.  The principles of partial order
+   delivery, published in [ACCD93a], are presented in Section 2.  The
+   notion of partial reliability, published in [ACCD93b], is introduced
+   in Section 3 followed by an explanation of "reliability classes".
+   Then, the practical issues involved with setting up and maintaining a
+   Partial Order Connection (POC) within a TCP framework are addressed
+   in Section 4 looking first at connection establishment, and then
+   discussing the sender's role and the receiver's role.  Section 5
+   provides insights into the expected performance improvements of a
+   partial order service over an ordered service and Section 6 discusses
+   some future directions.  Concluding remarks are given in Section 7.
+
+2. Partial Order Delivery
+
+   Partial order services are needed and can be employed as soon as a
+   complete ordering is not mandatory.  When two objects can be
+   delivered in either order, there is no need to use an ordered service
+   that must delay delivery of the second one transmitted until the
+   first arrives as the following examples demonstrate.
+
+2.1 Example 1: Remote Database
+
+   Simpson's Sporting Goods (SSG) has recently installed a state-of-
+   the-art enterprise-wide network.  Their first "network application"
+   is a client/server SQL database with the following four records,
+   numbered {1 2 3 4} for convenience:
+
+         SALESPERSON    LOCATION           CHARGES    DESCRIPTION
+         -------------  -----------------  ---------  -----------------
+      1  Anderson       Atlanta, GA        $4,200     Camping Gear
+      2  Baker          Boston, MA           $849     Camping Gear
+      3  Crowell        Boston, MA         $9,500     Sportswear
+      4  Dykstra        Wash., DC          $1,000     Sportswear
+
+   SSG employees running the client-side of the application can query
+   the database server from any location in the enterprise net using
+   standard SQL commands and the results will be displayed on their
+
+
+
+Connolly, Amer & Conrad                                         [Page 4]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   screen.  From the employee's perspective, the network is completely
+   reliable and delivers data (records) in an order that conforms to
+   their SQL request.  In reality though, it is the transport layer
+   protocol which provides the reliability and order on top of an
+   unreliable network layer - one which introduces loss, duplication,
+   and disorder.
+
+   Consider the four cases in Figure 1 - in the first query (1.a),
+   ordered by SALESPERSON, the records have only one acceptable order at
+   the destination, 1,2,3,4.  This is evident due to the fact that there
+   are four distinct salespersons.  If record 2 is received before
+   record 1 due to a network loss during transmission, the transport
+   service can not deliver it and must therefore buffer it until record
+   1 arrives.  An ordered service, also referred to as a virtual circuit
+   or FIFO channel, provides the desired level of service in this case.
+
+   At the other extreme, an unordered service is motivated in Figure 1.d
+   where the employee has implicitly specified that any ordering is
+   valid simply by omitting the "order by" clause.  Here any of 4! = 24
+   delivery orderings would satisfy the application, or from the
+   transport layer's point of view, all records are immediately
+   deliverable as soon as they arrive from the network.  No record needs
+   to buffered should it arrive out of sequential order.  As notation, 4
+   ordered objects are written 1;2;3;4 and 4 unordered objects are
+   written using a parallel operator: 1||2||3||4.
+
+   Figures 1.b and 1.c demonstrate two possible partial orders that
+   permit 2 and 4 orderings respectively at the destination.  Using the
+   notation just described, the valid orderings for the query in 1.b are
+   specified as 1;(2||3);4, which is to say that record 1 must be
+   delivered first followed by record 2 and 3 in either order followed
+   by record 4.  Likewise, the ordering for 1.c is (1||2);(3||4).  In
+   these two cases, an ordered service is too strict and an unordered
+   service is not strict enough.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Connolly, Amer & Conrad                                         [Page 5]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   +-----------------------------------------------------------------+
+   |    SELECT SALESPERSON, LOCATION, CHARGES, DESCRIPTION           |
+   |    FROM BILLING_TABLE                                           |
+   |                                                                 |
+   |    SALESPERSON    LOCATION           CHARGES    DESCRIPTION     |
+   |    -------------  -----------------  ---------  --------------- |
+   | 1  Anderson       Atlanta, GA        $4,200     Camping Gear    |
+   | 2  Baker          Boston, MA           $849     Camping Gear    |
+   | 3  Crowell        Boston, MA         $9,500     Sportswear      |
+   | 4  Dykstra        Wash., DC          $1,000     Sportswear      |
+   +=================================================================+
+   |a -  ORDER BY SALESPERSON                                        |
+   |                                                                 |
+   |  1,2,3,4                                          1,2,3,4       |
+   |                                                                 |
+   | Sender ----------->   NETWORK   -------------->   Receiver      |
+   |                                              (1 valid ordering) |
+   +-----------------------------------------------------------------+
+   |b -  ORDER BY LOCATION                                           |
+   |                                                   1,2,3,4       |
+   |  1,2,3,4                                          1,3,2,4       |
+   |                                                                 |
+   | Sender ----------->   NETWORK   -------------->   Receiver      |
+   |                                             (2 valid orderings) |
+   +-----------------------------------------------------------------+
+   |c -  ORDER BY DESCRIPTION                                        |
+   |                                                   1,2,3,4       |
+   |                                                   2,1,3,4       |
+   | 1,2,3,4                                           1,2,4,3       |
+   |                                                   2,1,4,3       |
+   |                                                                 |
+   | Sender ----------->   NETWORK   -------------->   Receiver      |
+   |                                             (4 valid orderings) |
+   +-----------------------------------------------------------------+
+   |d - (no order by clause)                                         |
+   |                                                   1,2,3,4       |
+   |                                                   1,2,4,3       |
+   | 1,2,3,4                                             ...         |
+   |                                                   4,3,2,1       |
+   |                                                                 |
+   | Sender ----------->   NETWORK   -------------->   Receiver      |
+   |                                         (4!=24 valid orderings) |
+   +-----------------------------------------------------------------+
+      Figure 1: Ordered vs. Partial Ordered vs. Unordered Delivery
+
+   It is vital for the transport layer to recognize the exact
+   requirements of the application and to ensure that these are met.
+   However, there is no inherent need to exceed these requirements; on
+
+
+
+Connolly, Amer & Conrad                                         [Page 6]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   the contrary, by exceeding these requirements unecessary resources
+   are consumed.  This example application requires a reliable
+   connection - all records must eventually be delivered - but has some
+   flexibility when it comes to record ordering.
+
+   In this example, each query has a different partial order.  In total,
+   there exist 16 different partial orders for the desired 4 records.
+   For an arbitrary number of objects N, there exist many possible
+   partial orders each of which accepts some number of valid orderings
+   between 1 and N!  (which correspond to the ordered and unordered
+   cases respectively).  For some classes of partial orders, the number
+   of valid orderings can be calculated easily, for others this
+   calculation is intractable.  An in-depth discussion on calculating
+   and comparing the number of orderings for a given partial order can
+   be found in [ACCD93a].
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Connolly, Amer & Conrad                                         [Page 7]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+2.2 Example 2: Multimedia
+
+   A second example application that motivates a partial order service
+   is a multimedia broadcast involving video, audio and text components.
+   Consider an extended presentation of the evening news - extended to
+   include two distinct audio channels, a text subtitle and a closed-
+   captioned sign language video for the hearing impaired, in addition
+   to the normal video signal, as modeled by the following diagram.
+
+            (left audio)                     (right audio)
+              +------+                         +------+
+              | ++++ |                         | ++++ |
+              | ++++ |                         | ++++ |
+              +------+                         +------+
+         ===================================================
+         I                                +---------------+I
+         I                                |               |I
+         I                                |  (hand signs) |I
+         I                                |               |I
+         I                                +---------------+I
+         I                                                 I
+         I                                                 I
+         I          (Main Video)                           I
+         I                                                 I
+         I                                                 I
+         I                                                 I
+         I                                                 I
+         I  +------------------------------------------+   I
+         I  |     (text subtitle)                      |   I
+         I  +------------------------------------------+   I
+         I                                                 I
+         ===================================================
+            Figure 2: Multimedia broadcast example
+
+  The multimedia signals have differing characteristics.  The main video
+  signal may consist of full image graphics at a rate of 30 images/sec
+  while the video of hand signs requires a lower quality, say 10
+  images/sec.  Assume the audio signals are each divided into 60 sound
+  fragments/sec and the text object each second consists of either (1)
+  new text, (2) a command to keep the previous second of text, or (3) a
+  command for no subtitle.
+
+  During a one-second interval of the broadcast, a sender transmits 30
+  full-motion video images, 10 closed-captioned hand sign images, 60
+  packets of a digitized audio signal for each of the audio streams and
+  a single text packet.  The following diagram then might represent the
+  characteristics of the multimedia presentation in terms of the media
+  types, the number of each, and their ordering.  Objects connected by a
+
+
+
+Connolly, Amer & Conrad                                         [Page 8]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+  horizontal line must be received in order, while those in parallel
+  have no inherent ordering requirement.
+
++----------------------------------------------------------------------+
+|                                                                      |
+|  |-o-|-o-|-o-|-o-|-o-|-o-|-o-|-o-|-o-...-o-|-o-|-o-|  right audio    |
+|  |   |   |   |   |   |   |   |   |         |   |   |  (60/sec)       |
+|  |   |   |   |   |   |   |   |   |         |   |   |                 |
+|  |-o-|-o-|-o-|-o-|-o-|-o-|-o-|-o-|-o-...-o-|-o-|-o-|  left audio     |
+|  |       |       |       |       |         |       |  (60/sec)       |
+|  |       |       |       |       |         |       |                 |
+|  |---o---|---o---|---o---|---o---|---...---|---o---|  normal video   |
+|  |                       |                         |  (30/sec)       |
+|  |                       |                         |                 |
+|  |-----------o-----------|--------o--...--------o--|  hand signs     |
+|  |                                                 |  (10/sec)       |
+|  |                                                 |                 |
+|  |-----------------------------o-----...-----------|  text           |
+|  |                                                 |  (1/sec)        |
+|                                                                      |
++----------------------------------------------------------------------+
+          Figure 3: Object ordering in multimedia application
+
+   Of particular interest to our discussion of partial ordering is the
+   fact that, while objects of a given media type generally must be
+   received in order, there exists flexibility between the separate
+   "streams" of multimedia data (where a "stream" represents the
+   sequence of objects for a specific media type).  Another significant
+   characteristic of this example is the repeating nature of the object
+   orderings.  Figure 3 represents a single, one-second, partial order
+   snapshot in a stream of possibly thousands of repeating sequential
+   periods of communication.
+
+   It is assumed that further synchronization concerns in presenting the
+   objects are addressed by a service provided on top of the proposed
+   partial order service.  Temporal ordering for synchronized playback
+   is considered, for example, in [AH91, HKN91].
+
+2.3 Example 3: Windows Screen Refresh
+
+   A third example to motivate a partial order service involves
+   refreshing a workstation screen/display containing multiple windows
+   from a remote source.  In this case, objects (icons, still or video
+   images) that do not overlap have a "parallel" relationship (i.e.,
+   their order of refreshing is independent) while overlapping screen
+   objects have a "sequential" relationship and should be delivered in
+   order.  Therefore, the way in which the windows overlap induces a
+   partial order.
+
+
+
+Connolly, Amer & Conrad                                         [Page 9]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   Consider the two cases in Figure 4.  A sender wishes to refresh a
+   remote display that contains four active windows (objects) named {1 2
+   3 4}.  Assume the windows are transmitted in numerical order and the
+   receiving application refreshes windows as soon as the transport
+   service delivers them.  If the windows are configured as in Figure
+   4a, then there exist two different orderings for redisplay, namely
+   1,2,3,4 or 1,3,2,4.  If window 2 is received before window 1, the
+   transport service cannot deliver it or an incorrect image will be
+   displayed.  In Figure 4b, the structure of the windows results in six
+   possible orderings - 1,2,3,4 or 1,3,2,4 or 1,3,4,2 or 3,4,1,2 or
+   3,1,4,2 or 3,1,2,4.
+
+       +================================+============================+
+       |a       +-----------+           |b   +----------+            |
+       |        | 1         |           |    | 1        |            |
+       |        |           |           |    |     +----------+      |
+       |  +---------+    +----------+   |    +-----| 2        |      |
+       |  | 2       |----| 3        |   |          |          |      |
+       |  |     +-----------+       |   |          +----------+      |
+       |  |     | 4         |       |   |    +----------+            |
+       |  +-----|           |-------+   |    | 3        |            |
+       |        |           |           |    |      +----------+     |
+       |        +-----------+           |    +------| 4        |     |
+       |                                |           |          |     |
+       |                                |           +----------+     |
+       |                                |                            |
+       |        1;(2||3);4              |       (1;2)||(3;4)         |
+       +================================+============================+
+                     Figure 4: Window screen refresh
+
+2.4 Potential Savings
+
+   In each of these examples, the valid orderings are strictly dependent
+   upon, and must be specified by the application.  Intuitively, as the
+   number of acceptable orderings increases, the amount of resources
+   utilized by a partial order transport service, in terms of buffers
+   and retransmissions, should decrease as compared to a fully ordered
+   transport service thus also decreasing the overall cost of the
+   connection.  Just how much lower will depend largely upon the
+   flexibility of the application and the quality of the underlying
+   network.
+
+   As an indication of the potential for improved service, let us
+   briefly look at the case where a database has the following 14
+   records.
+
+
+
+
+
+
+Connolly, Amer & Conrad                                        [Page 10]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+          SALESPERSON    LOCATION           CHARGES    DESCRIPTION
+          -------------  -----------------  ---------  ---------------
+       1  Anderson       Washington          $4,200    Camping Gear
+       2  Anderson       Philadelphia        $2,000    Golf Equipment
+       3  Anderson       Boston                $450    Bowling shoes
+       4  Baker          Boston                $849    Sportswear
+       5  Baker          Washington          $3,100    Weights
+       6  Baker          Washington           $2000    Camping Gear
+       7  Baker          Atlanta               $290    Baseball Gloves
+       8  Baker          Boston              $1,500    Sportswear
+       9  Crowell        Boston              $9,500    Camping Gear
+      10  Crowell        Philadelphia        $6,000    Exercise Bikes
+      11  Crowell        New York            $1,500    Sportswear
+      12  Dykstra        Atlanta             $1,000    Sportswear
+      13  Dykstra        Dallas             $15,000    Rodeo Gear
+      14  Dykstra        Miami               $3,200    Golf Equipment
+
+   Using formulas derived in [ACCD93a] one may calculate the total
+   number of valid orderings for any partial order that can be
+   represented in the notation mentioned previously.  For the case where
+   a user specifies "ORDER BY SALESPERSON", the partial order above can
+   be expressed as,
+
+          (1||2||3);(4||5||6||7||8);(9||10||11);(12||13||14)
+
+   Of the 14!=87,178,291,200 total possible combinations, there exist
+   25,920 valid orderings at the destination.  A service that may
+   deliver the records in any of these 25,920 orderings has a great deal
+   more flexibility than in the ordered case where there is only 1 valid
+   order for 14 objects.  It is interesting to consider the real
+   possibility of hundreds or even thousands of objects and the
+   potential savings in communication costs.
+
+   In all cases, the underlying network is assumed to be unreliable and
+   may thus introduce loss, duplication, and disorder.  It makes no
+   sense to put a partial order service on top of a reliable network.
+   While the exact amount of unreliability in a network may vary and is
+   not always well understood, initial experimental research indicates
+   that real world networks, for example the service provided by the
+   Internet's IP level, "yield high losses, duplicates and reorderings
+   of packets" [AS93,BCP93].  The authors plan to conduct further
+   experimentation into measuring Internet network unreliability.  This
+   information would say a great deal about the practical merit of a
+   partial order service.
+
+
+
+
+
+
+
+Connolly, Amer & Conrad                                        [Page 11]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+3. Reliability vs. Order
+
+   While TCP avoids the loss of even a single object, in fact for many
+   applications, there exists a genuine ability to tolerate loss.
+   Losing one frame per second in a 30 frame per second video or losing
+   a segment of its accompanying audio channel is usually not a problem.
+   Bearing this in mind, it is of value to consider a quality of service
+   that combines a partial order with a level of tolerated loss (partial
+   reliability).  Traditionally there exist 4 services: reliable-
+   ordered, reliable-unordered, unreliable-ordered, and unreliable-
+   unordered. See Figure 5.  Reliable-ordered service (denoted by a
+   single point) represents the case where all objects are delivered in
+   the order transmitted.  File transfer is an example application
+   requiring such a service.
+
+                   reliable-ordered                  reliable-unordered
+                      |                                 |
+                      |                                 |
+                      v                                 v
+          zero loss-->*---------------------------------*
+           min loss-->|<--                              |<--
+                .     |                                 |
+                .     |<--                              |<--
+                      |                                 |
+                      |<-- unreliable-                  |<-- unreliable-
+     RELIABILITY      |      ordered                    |     unordered
+                      |<--                              |<--
+                      |                                 |
+                      |<--                              |<--
+           max loss-->|                                 |
+                      +-+--+--+--+--+--+--+--+--+--+--+-+
+                   ordered       partial ordered     unordered
+
+                                   ORDER
+
+         Figure 5: Quality Of Service: Reliability vs. Order -
+                   Traditional Service Types
+
+   In a reliable-unordered service (also a single point), all objects
+   must be delivered, but not necessarily according to the order
+   transmitted; in fact, any order will suffice.  Some transaction
+   processing applications such as credit card verification require such
+   a service.
+
+   Unreliable-ordered service allows some objects to be lost.  Those
+   that are delivered, however, must arrive in relative order (An
+   "unreliable" service does not necessarily lose objects; rather, it
+   may do so without failing to provide its advertised quality of
+
+
+
+Connolly, Amer & Conrad                                        [Page 12]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   service; e.g., the postal system provides an unreliable service).
+   Since there are varying degrees of unreliability, this service is
+   represented by a set of points in Figure 5.  An unreliable-ordered
+   service is applicable to packet-voice or teleconferencing
+   applications.
+
+   Finally unreliable-unordered service allows objects to be lost and
+   delivered in any order.  This is the kind of service used for normal
+   e-mail (without acknowledgment receipts) and electronic announcements
+   or junk e-mail.
+
+   As mentioned previously, the concept of a partial order expands the
+   order dimension from the two extremes of ordered and unordered to a
+   range of discrete possibilities as depicted in Figure 6.
+   Additionally, as will be discussed presently, the notion of
+   reliability is extended to allow for varying degrees of reliability
+   on a per-object basis providing even greater flexibility and improved
+   resource utilization.
+
+                                reliable-PO
+
+                      |  |  |  |  |  |  |  |  |  |  |   |
+                      |  |  |  |  |  |  |  |  |  |  |   |
+                      v  v  v  v  v  v  v  v  v  v  v   v
+          zero loss-->*---------------------------------*
+           min loss-->| .  .  .  .  .  .  .  .  .  .  . |
+                .     | .  .  .  .  .  .  .  .  .  .  . |
+                .     | .  .  .  .  .  .  .  .  .  .  . |
+                      | .  .  .                 .  .  . |
+     RELIABILITY      | .  .  .  unreliable-PO  .  .  . |
+                      | .  .  .  .  .  .  .  .  .  .  . |
+                      | .  .  .  .  .  .  .  .  .  .  . |
+                      | .  .  .  .  .  .  .  .  .  .  . |
+                      | .  .  .  .  .  .  .  .  .  .  . |
+           max loss-->| .  .  .  .  .  .  .  .  .  .  . |
+                      +-+--+--+--+--+--+--+--+--+--+--+-+
+                   ordered       partial ordered     unordered
+
+                                   ORDER
+
+         Figure 6: Quality Of Service: Reliability vs. Order - Partial
+                   Order Service
+
+3.1 Reliability Classes
+
+   When considering unreliable service, one cannot assume that all
+   objects are equal with regards to their reliability.  This
+   classification is reasonable if all objects are identical (e.g.,
+
+
+
+Connolly, Amer & Conrad                                        [Page 13]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   video frames in a 30 frame/second film).  Many applications, such as
+   multimedia systems, however, often contain a variety of object types.
+   Thus three object reliability classes are proposed: BART-NL, BART-L,
+   and NBART-L.  Objects are assigned to one of these classes depending
+   on their temporal value as will be show presently.
+
+   BART-NL objects must be delivered to the destination.  These objects
+   have temporal value that lasts for an entire established connection
+   and require reliable delivery (NL =  No Loss allowed).  An example of
+   BART-NL objects would be the database records in Example 2.1 or the
+   windows in the screen refresh in Example 2.3.  If all objects are of
+   type BART-NL, the service is reliable.  One possible way to assure
+   eventual delivery of a BART-NL object in a protocol is for the sender
+   to buffer it, start a timeout timer, and retransmit it if no ACK
+   arrives before the timeout.  The receiver in turn returns an ACK when
+   the object has safely arrived and been delivered (BART = Buffers,
+   ACKs, Retransmissions, Timers).
+
+   BART-L objects are those that have temporal value over some
+   intermediate amount of time - enough to permit timeout and
+   retransmission, but not everlasting.  Once the temporal value of
+   these objects has expired, it is better to presume them lost than to
+   delay further the delivery pipeline of information.  One possibility
+   for deciding when an object's usefulness has expired is to require
+   each object to contain information defining its precise temporal
+   value [DS93].  An example of a BART-L object would be a movie
+   subtitle, sent in parallel with associated film images, which is
+   valuable any time during a twenty second film sequence.  If not
+   delivered sometime during the first ten seconds, the subtitle loses
+   its value and can be presumed lost.  These objects are buffered-
+   ACKed-retransmitted up to a certain point in time and then presumed
+   lost.
+
+   NBART-L objects are those with temporal values too short to bother
+   timing out and retransmitting.  An example of a NBART-L object would
+   be a single packet of speech in a packetized phone conversation or
+   one image in a 30 image/sec film.  A sender transmits these objects
+   once and the service makes a best effort to deliver them.  If the one
+   attempt is unsuccessful, no further attempts are made.
+
+   An obvious question comes to mind - what about NBART-NL objects?  Do
+   such objects exist?  The authors have considered the notion of
+   communicating an object without the use of BART and still being able
+   to provide a service without loss.  Perhaps with the use of forward
+   error correction this may become a viable alternative and could
+   certainly be included in the protocol.  However, for our purposes in
+   this document, only the first three classifications will be
+   considered.
+
+
+
+Connolly, Amer & Conrad                                        [Page 14]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   While classic transport protocols generally treat all objects
+   equally, the sending and receiving functions of a protocol providing
+   partial order/partial reliability service will behave differently for
+   each class of object.  For example, a sender buffers and, if
+   necessary, retransmits any BART-NL or BART-L objects that are not
+   acknowledged within a predefined timeout period.  On the contrary,
+   NBART-L objects are forgotten as soon as they are transmitted.
+
+4. Partial Order Connection
+
+   The implementation of a protocol that provides partial order service
+   requires, at a minimum, (1) communication of the partial ordering
+   between the two endpoints, and (2) dynamic evaluation of the
+   deliverability of objects as they arrive at the receiver.  In
+   addition, this RFC describes the mechanisms needed to (3) initiate a
+   connection, (4) provide varying degrees of reliability for the
+   objects being transmitted, and (5) improve buffer utilization at the
+   sender based on object reliability.
+
+   Throughout the discussion of these issues, the authors use the
+   generic notion of "objects" in describing the service details.  Thus,
+   one of the underlying requirements of a partial order service is the
+   ability to handle such an abstraction (e.g., recognize object
+   boundaries).  The details of object management are implementation
+   dependent and thus are not specified in this RFC.  However, as this
+   represents a potential fundamental change to the TCP protocol, some
+   discussion is in order.
+
+   At one extreme, it is possible to consider octets as objects and
+   require that the application specify the partial order accordingly
+   (octet by octet).  This likely would entail an inordinate amount of
+   overhead, processing each octet on an individual basis (literally
+   breaking up contiguous segments to determine which, if any, octets
+   are deliverable and which are not).  At the other extreme, the
+   transport protocol could maintain object atomicity regardless of size
+   - passing arbitrarily large data structures to IP for transmission.
+   At the sending side of the connection this would actually work since
+   IP is prepared to perform source fragmentation, however, there is no
+   guarantee that the receiving IP will be able to reassemble the
+   fragments!  IP relies on the TCP max segment size to prevent this
+   situation from occurring[LMKQ89].
+
+   A more realistic approach given the existing IP constraints might be
+   to maintain the current notion of a TCP max segment size for the
+   lower-layer interface with IP while allowing a much larger object
+   size at the upper-layer interface.  Of course this presents some
+   additional complexities.  First of all, the transport layer will now
+   have to be concerned with fragmentation/reassembly of objects larger
+
+
+
+Connolly, Amer & Conrad                                        [Page 15]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   than the max segment size and secondly, the increased object sizes
+   will require significantly more buffer space at the receiver if we
+   want to buffer the object until it arrives in entirety.
+   Alternatively, one may consider delivering "fragments" of an object
+   as they arrive as long as the ordering of the fragments is correct
+   and the application is able to process the fragments (this notion of
+   fragmented delivery is discussed further in Section 6).
+
+4.1 Connection Establishment
+
+   By extending the transport paradigm to allow partial ordering and
+   reliability classes, a user application may be able to take advantage
+   of a more efficient data transport facility by negotiating the
+   optimal service level which is required - no more, no less.  This is
+   accomplished by specifying these variables as QOS parameters or, in
+   TCP terminology, as options to be included in the TCP header [Pos81].
+
+   A TCP implementation that provides a partial order service requires
+   the use of two new TCP options.  The first is an enabling option
+   "POC-permitted" (Partial Order Connection Permitted) that may be used
+   in a SYN segment to request a partial order service.  The other is
+   the "POC-service-profile" option which is used periodically to
+   communicate the service characteristics.  This second option may be
+   sent only after successful transmission and acknowledgment of the
+   POC-permitted option.
+
+   A user process issuing either an active or passive OPEN may choose to
+   include the POC-permitted option if the application can benefit from
+   the use of a partial order service and in fact, in cases where the
+   viability of such service is unknown, it is suggested that the option
+   be used and that the decision be left to the user's peer.
+
+   For example, a multimedia server might issue a passive <SYN> with the
+   POC-permitted option in preparation for the connection by a remote
+   user.
+
+   Upon reception of a <SYN> segment with the POC-permitted option, the
+   receiving user has the option to respond with a similar POC-permitted
+   indication or may reject a partial order connection if the
+   application does not warrant the service or the receiving user is
+   simply unable to provide such a service (e.g., does not recognize the
+   POC-permitted option).
+
+   In the event that simultaneous initial <SYN> segments are exchanged,
+   the TCP will initiate a partial order connection only if both sides
+   include the POC-permitted option.
+
+
+
+
+
+Connolly, Amer & Conrad                                        [Page 16]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   A brief example should help to demonstrate this procedure.  The
+   following notation (a slight simplification on that employed in RFC
+   793) will be used.  Each line is numbered for reference purposes.
+   TCP-A (on the left) will play the role of the receiver and TCP-B will
+   be the sender.  Right arrows  (-->) indicate departure of a TCP
+   segment from TCP-A to TCP-B, or arrival of a segment at B from A.
+   Left arrows indicate the reverse.  TCP states represent the state
+   AFTER the departure or arrival of the segment (whose contents are
+   shown in the center of the line).  Liberties are taken with the
+   contents of the segments where only the fields of interest are shown.
+
+         TCP-A                                              TCP-B
+
+      1. CLOSED                                             LISTEN
+
+      2. SYN-SENT    --> <CTL=SYN><POC-perm>            --> SYN-RECEIVED
+
+      3. ESTABLISHED <-- <CTL=SYN,ACK><POC-perm>        <-- SYN-RECEIVED
+
+      4. ESTABLISHED --> <CTL=ACK>                      --> ESTABLISHED
+
+        Figure 7. Basic 3-Way handshake for a partial order connection
+
+   In line 1 of Figure 7, the sending user has already issued a passive
+   OPEN with the POC-permitted option and is waiting for a connection.
+   In line 2, the receiving user issues an active OPEN with the same
+   option which in turn prompts TCP-A to send a SYN segment with the
+   POC-permitted option and enter the SYN-SENT state.  TCP-B is able to
+   confirm the use of a PO connection and does so in line 3, after which
+   TCP-A enters the established state and completes the connection with
+   an ACK segment in line 4.
+
+   In the event that either side is unable to provide partial order
+   service, the POC-permitted option will be omitted and normal TCP
+   processing will ensue.
+
+   For completeness, the authors include the following specification for
+   both the POC-permitted option and the POC-service-profile option in a
+   format consistent with the TCP specification document [Pos81].
+
+      TCP POC-permitted Option:
+
+         Kind: 9  Length: - 2 bytes
+
+             +-----------+-------------+
+             |  Kind=9   |  Length=2   |
+             +-----------+-------------+
+
+
+
+
+Connolly, Amer & Conrad                                        [Page 17]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+      TCP POC-service-profile Option:
+
+         Kind: 10  Length: 3 bytes
+
+                                       1 bit        1 bit    6 bits
+             +----------+----------+------------+----------+--------+
+             |  Kind=10 | Length=3 | Start_flag | End_flag | Filler |
+             +----------+----------+------------+----------+--------+
+
+   The first option represents a simple indicator communicated between
+   the two peer transport entities and needs no further explanation.
+   The second option serves to communicate the information necessary to
+   carry out the job of the protocol - the type of information which is
+   typically found in the header of a TCP segment - and raises some
+   interesting questions.
+
+   Standard TCP maintains a 60-byte maximum header size on all segments.
+   The obvious intuition behind this rule is that one would like to
+   minimize the amount of overhead information present in each packet
+   while simultaneously increasing the payload, or data, section.  While
+   this is acceptable for most TCP connections today, a partial-order
+   service would necessarily require that significantly more control
+   information be passed between transport entities at certain points
+   during a connection.  Maintaining the strict interpretation of this
+   rule would prove to be inefficient.  If, for example, the service
+   profile occupied a total of 400 bytes (a modest amount as will be
+   confirmed in the next section), then one would have to fragment this
+   information across at least 10 segments, allocating 20 bytes per
+   segment for the normal TCP header.
+
+   Instead, the authors propose that the service profile be carried in
+   the data section of the segment and that the 3-byte POC-service-
+   profile option described above be placed in the header to indicate
+   the presence of this information.  Upon reception of such a segment,
+   the TCP extracts the service profile and uses it appropriately as
+   will be discussed in the following sections.
+
+   The option itself, as shown here, contains two 1-bit flags necessary
+   to handle the case where the service profile does not fit in a single
+   TCP segment.  The "Start_flag" indicates that the information in the
+   data section represents the beginning of the service profile and the
+   "End_flag" represents the converse.  For service profiles which fit
+   completely in a single segment, both flags will be set to 1.
+   Otherwise, the Start_flag is set in the initial segment and the
+   End_flag in the final segment allowing the peer entity to reconstrcut
+   the entire service profile (using the normal sequence numbers in the
+   segment header).  The "Filler" field serves merely to complete the
+   third byte of the option.
+
+
+
+Connolly, Amer & Conrad                                        [Page 18]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   Note that the length of the service profile may vary during the
+   connection as the order or reliability requirements of the user
+   change but this length must not exceed the buffering ability of the
+   peer TCP entity since the entire profile must be stored.  The exact
+   makeup of this data structure is presented in Section 4.2.
+
+4.2 Data Transmission
+
+   Examining the characteristics of a partial order TCP in chronological
+   fashion, one would start off with the establishment of a connection
+   as described in Section 4.1.  After which, although both ends have
+   acknowledged the acceptability of partial order transport, neither
+   has actually begun a partial order transmission - in other words,
+   both the sending-side and the receiving-side are operating in a
+   normal, ordered-reliable mode.  For the subsequent discussion, an
+   important distinction is made in the terms sending-side and
+   receiving-side which refer to the data flow from the sender and that
+   from the receiver, respectively.
+
+   For the partial ordering to commence, the TCP must be made aware of
+   the acceptable object orderings and reliability for both the send-
+   side and receive-side of the connection for a given set of objects
+   (hereafter referred to as a "period").  This information is contained
+   in the service profile and it is the responsibility of the user
+   application to define this profile.  Unlike standard TCP where
+   applications implicitly define a reliable, ordered profile; with
+   partial order TCP, the application must explicity define a profile.
+
+   The representation of the service profile is one of the concerns for
+   the transport protocol.  It would be useful if the TCP could encode a
+   partial ordering in as few bits as possible since these bits will be
+   transmitted to the destination each time the partial order changes.
+   A matrix representation appears to be well-suited to encoding the
+   partial order and a vector has been proposed to communicate and
+   manage the reliability aspects of the service.  Temporal values may
+   be included within the objects themselves or may be defined as a
+   function of the state of the connection [DS93].  Using these data
+   structures, the complete service profile would include (1) a partial
+   order matrix, (2) a reliability vector and (3) an object_sizes vector
+   which represents the size of the objects in octets (see
+   [ACCD93a,CAC93] for a discussion on alternative structures for these
+   variables).
+
+   Throughout this section, we use the following service profile as a
+   running example.  Shown here is a partial order matrix and graphical
+   representation for a simple partial order with 6 objects -
+   ((1;2)||(3;4)||5);6.  In the graphical diagram, arrows (-->) denote
+   sequential order and objects in parallel can be delivered in either
+
+
+
+Connolly, Amer & Conrad                                        [Page 19]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   order.  So in this example, object 2 must be delivered after object
+   1, object 4 must be delivered after object 3, and object 6 must be
+   delivered after objects 1 through 5 have all been delivered.  Among
+   the 6 objects, there are 30 valid orderings for this partial order
+   (each valid ordering is known as a linear extension of the partial
+   order).
+
+                1 2 3 4 5 6
+              +-------------+
+            1 | - 1 0 0 0 1 |         |               |       |
+            2 | - - 0 0 0 1 |         |-->1-->|-->2-->|       |
+            3 | - - - 1 0 1 |         |               |       |
+            4 | - - - - 0 1 |         |-->3-->|-->4-->|-->6-->|
+            5 | - - - - - 1 |         |               |       |
+            6 | - - - - - - |         |------>5------>|       |
+              +-------------+         |               |       |
+
+                 PO Matrix                 PO Graph
+
+
+   In the matrix, a 1 in row i of column j denotes that object i must be
+   delivered before object j.  Note that if objects are numbered in any
+   way such that 1,2,3,...,N is a valid ordering, only the upper right
+   triangle of the transitively closed matrix is needed [ACCD93a].
+   Thus, for N objects, the partial order can be encoded in (N*(N-1)/2)
+   bits.
+
+   The reliability vector for the case where reliability classes are
+   enumerated types such as {BART-NL=1, BART-L=2, NBART-L = 3} and all
+   objects are BART-NL would simply be, <1, 1, 1, 1, 1, 1>.  Together
+   with the object_sizes vector, the complete service profile is
+   described.
+
+   This information must be packaged and communicated to the sending TCP
+   before the first object is transmitted using a TCP service primitive
+   or comparable means depending upon the User/TCP interface.  Once the
+   service profile has been specified to the TCP, it remains in effect
+   until the connection is closed or the sending user specifies a new
+   service profile.  In the event that the largest object size can not
+   be processed by the receiving TCP, the user application is informed
+   that the connection cannot be maintained and the normal connection
+   close procedure is followed.
+
+   Typically, as has been described here, the service profile definition
+   and specification is handled at the sending end of the connection,
+   but there could be applications (such as the screen refresh) where
+   the receiving user has this knowledge.  Under these circumstances the
+   receiving user is obliged to transmit the object ordering on the
+
+
+
+Connolly, Amer & Conrad                                        [Page 20]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   return side of the connection (e.g., when making the request for a
+   screen refresh) and have the sender interpret this data to be used on
+   the send side of the connection.
+
+   Requiring that the sending application specify the service profile is
+   not an arbitrary choice.  To ensure proper object identification, the
+   receiving application must transmit the new object numbering to the
+   sending application (not the sending transport layer).  Since the
+   sending application must receive this information in any case, it
+   simplifies matters greatly to require that the sending application be
+   the only side that may specify the service profile to the transport
+   layer.
+
+   Consider now the layered architecture diagram in Figure 8 and assume
+   that a connection already is established.  Let us now say that UserA
+   specifies the service profile for the sending-side of the connection
+   via its interface with TCP-A. TCP-A places the profile in the header
+   of one or more data packets (depending upon the size of the service
+   profile, the profile may require several packets), sets the POC-
+   service-profile option and passes it to IP for transmission over the
+   network.  This packet must be transmitted reliably, therefore TCP-A
+   buffers it and starts a normal retransmit timer.  Subsequently, the
+   service profile arrives at the destination node and is handed to
+   TCP-B (as indicated by the arrows in Figure 8).  TCP-B returns an
+   acknowledgment and immediately adopts the service profile for one
+   direction of data flow over the connection.  When the acknowledgment
+   arrives back at TCP-A, the cycle is complete and both sides are now
+   able to use the partial order service.
+
+                 +--------+                +----------+
+        Service  | UserA  |                | UserB    |
+        Profile  +--------+                +----------+
+          |          |                           |
+          |          |                           |
+          v          |                           |
+          |      +---------+               +-----------+    Service
+          |      |  TCP-A  |               |  TCP-B    |    Profile
+          |      +---------+               +-----------+       ^
+          |          |                           |             |
+          |          |                           |             |
+          |          |                           |             |
+          |      +---------------------------------------+     |
+          v      |                                       |     |
+          ------>| ---- Service Profile ------------->   |----->
+                 +---------------------------------------+
+
+          Figure 8. Layered Communication Architecture
+
+
+
+
+Connolly, Amer & Conrad                                        [Page 21]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   Note that one of the TCP entities learns of the profile via its user
+   interface, while the other TCP entity is informed via its network
+   interface.
+
+   For the remaining discussions, we will assume that a partial order
+   profile has been successfully negotiated for a single direction of
+   the connection (as depicted in Figure 8) and that we may now speak of
+   a "sending TCP" (TCP-A) and a "receiving TCP" (TCP-B).  As such,
+   TCP-A refers to the partial order data stream as the "send-side" of
+   the connection, while TCP-B refers to the same data stream as the
+   "receive-side".
+
+   Having established a partial order connection, the communicating TCPs
+   each have their respective jobs to perform to ensure proper data
+   delivery.  The sending TCP ascertains the object ordering and
+   reliability from the service profile and uses this information in its
+   buffering/retransmission policy.  The receiver modifications are more
+   significant, particularly the issues of object deliverability and
+   reliability.  And both sides will need to redefine the notion of
+   window management.  Let us look specifically at how each side of the
+   TCP connection is managed under this new paradigm.
+
+4.2.1 Sender
+
+   The sender's concerns are still essentially four-fold - transmitting
+   data, managing buffer space, processing acknowledgments and
+   retransmitting after a time-out - however, each takes on a new
+   meaning in a partial order service.  Additionally, the management of
+   the service profile represents a fifth duty not previously needed.
+
+   Taking a rather simplistic view, normal TCP output processing
+   involves (1) setting up the header, (2) copying user data into the
+   outgoing segment, (3) sending the segment, (4) making a copy in a
+   send buffer for retransmission and (5) starting a retransmission
+   timer.  The only difference with a partial order service is that the
+   reliability vector must be examined to determine whether or not to
+   buffer the object and start a timer - if the object is classified as
+   NBART-L, then steps 4 and 5 are omitted.
+
+   Buffer management at the sending end of a partial order connection is
+   dependent upon the object reliability class and the object size.
+   When transmitting NBART-L objects the sender need not store the data
+   for later possible retransmission since NBART-L objects are never
+   retransmitted.  The details of buffer management - such as whether to
+   allocate fixed-size pools of memory, or perhaps utilize a dynamic
+   heap allocation strategy - are left to the particular system
+   implementer.
+
+
+
+
+Connolly, Amer & Conrad                                        [Page 22]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   Acknowledgment processing remains essentially intact -
+   acknowledgments are cumulative and specify the peer TCP's window
+   advertisement.  However, determination of this advertisement is no
+   longer a trivial process dependent only upon the available buffer
+   space (this is discussed further in Section 4.2.2).  Moreover, it
+   should be noted that the introduction of partial ordering and partial
+   reliability presents several new and interesting alternatives for the
+   acknowledgment policy.  The authors are investigating several of
+   these strategies through a simulation model and have included a brief
+   discussion of these issues in Section 6.
+
+   The retransmit function of the TCP is entirely unchanged and is
+   therefore not discussed further.
+
+   For some applications, it may be possible to maintain the same
+   partial order for multiple periods (e.g., the application repeats the
+   same partial order).  In the general case, however, the protocol must
+   be able to change the service profile during an existing connection.
+   When a change in the service profile is requested, the sending TCP is
+   obliged to complete the processing of the current partial order
+   before commencing with a new one.  This ensures consistency between
+   the user applications in the event of a connection failure and
+   simplifies the protocol (future study is planned to investigate the
+   performance improvement gained by allowing concurrent different
+   partial orders).  The current partial order is complete when all
+   sending buffers are free.  Then negotiation  of the new service
+   profile is performed in the same manner as with the initial profile.
+
+   Combining these issues, we propose the following simplified state
+   machine for the protocol (connection establishment and tear down
+   remains the same and is not show here).
+
+               (1)Send Request                            (5)Ack Arrival
+                  +------+                                +-----------+
+                  |      |                                |           |
+                  |      V                                |           |
+                +----------+  (4) New PO Profile    +----------+      |
+          +---->|          |----------------------->|   PO     |<-----+
+          |     |  ESTAB   |                        |          |
+      (2) |     |          |                        |  SETUP   |
+      Ack +-----|          |<-----------------------|          |<-----+
+      Arrival   +----------+  (7)PO Setup Complete  +----------+      |
+                  ^      |                                  |         |
+                  |      |                                  |         |
+                  +------+                                  +---------+
+                (3)Timeout                                  (6)Timeout
+
+
+
+
+
+Connolly, Amer & Conrad                                        [Page 23]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   Event (1) - User Makes a Data Send Request
+   =========
+      If Piggyback Timer is set then
+           cancel piggyback timer
+      Package and send the object (with ACK for receive-side)
+      If object type = (BART-L,BART-NL) then
+           Store the object and start a retransmit timer
+      If sending window is full then
+           Block Event (1) - allow no further send requests from user
+
+   Event (2) - ACK Arrives
+   =========
+      If ACKed object(s) is buffered then
+           Release the buffer(s) and stop the retransmit timer(s)
+      Extract the peer TCP's window advertisement
+      If remote TCP's window advertisement > sending window then
+           Enable Event (1)
+      If remote TCP's window advertisement <= sending window then
+           Block Event (1) - allow no further send requests from user
+      Adjust sending window based on received window advertisement
+
+   Event (3) - Retransmit Timer Expires
+   =========
+      If Piggyback Timer is set then
+           cancel piggyback timer
+      Re-transmit the segment (with ACK for receive-side)
+      Restart the timer
+
+   Event (4) - PO Service Profile Arrives at the User Interface
+   =========
+      Transition to the PO SETUP state
+      Store the Send-side PO service profile
+      Package the profile into 1 or more segments, setting the
+           POC-Service-Profile option on each
+      If Piggyback Timer is set then
+           cancel piggyback timer
+      Send the segment(s) (with ACK for receive-side)
+      Store the segment(s) and start a retransmit timer
+
+   Event (5) - ACK Arrival
+   =========
+      If ACKed object(s) is buffered then
+           Release the buffer(s) and stop the retransmit timer(s)
+      Extract the peer TCP's window advertisement
+      If all objects from previous service profile have been ACKed and
+      the new service profile has been ACKed then enable Event (7)
+
+
+
+
+
+Connolly, Amer & Conrad                                        [Page 24]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   Event (6) - Retransmit Timer Expires
+   =========
+      If Piggyback Timer is set then
+           cancel piggyback timer
+      Re-transmit the segment (with ACK for receive-side)
+      Restart the timer
+
+   Event (7) - PO Setup Completed
+   =========
+      Transition to the ESTAB state and begin processing new service
+      profile
+
+4.2.2 Receiver
+
+   The receiving TCP has additional decisions to make involving object
+   deliverability, reliability and window management.  Additionally, the
+   service profile must be established (and re-established) periodically
+   and some special processing must be performed at the end of each
+   period.
+
+   When an object arrives, the question is no longer, "is this the next
+   deliverable object?", but rather, "is this ONE OF the next
+   deliverable objects?"  Hence, it is convenient to think of a
+   "Deliverable Set" of objects with a partial order protocol.  To
+   determine the elements of this set and answer the question of
+   deliverability, the receiver relies upon the partial order matrix
+   but, unlike the sender, the receiver dynamically updates the matrix
+   as objects are processed thus making other objects (possibly already
+   buffered objects) deliverable as well.  A check of the object type
+   also must be performed since BART-NL and BART-L objects require an
+   ACK to be returned to the sender but NBART-L do not.  Consider our
+   example from the previous section.
+
+                1 2 3 4 5 6
+              +-------------+
+            1 | - 1 0 0 0 1 |         |               |       |
+            2 | - - 0 0 0 1 |         |-->1-->|-->2-->|       |
+            3 | - - - 1 0 1 |         |               |       |
+            4 | - - - - 0 1 |         |-->3-->|-->4-->|-->6-->|
+            5 | - - - - - 1 |         |               |       |
+            6 | - - - - - - |         |------>5------>|       |
+              +-------------+         |               |       |
+
+                 PO Matrix                 PO Graph
+
+   When object 5 arrives, the receiver scans column 5, finds that the
+   object is deliverable (since there are no 1's in the column) and
+   immediately delivers the object to the user application. Then, the
+
+
+
+Connolly, Amer & Conrad                                        [Page 25]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   matrix is updated to remove the constraint of any object whose
+   delivery depends on object 5 by clearing all entries of row 5.  This
+   may enable other objects to be delivered (for example, if object 2 is
+   buffered then the delivery of object 1 will make object 2
+   deliverable).  This leads us to the next issue - delivery of stored
+   objects.
+
+   In general, whenever an object is delivered, the buffers must be
+   examined to see if any other stored object(s) becomes deliverable.
+   CAC93 describes an efficient algorithm to implement this processing
+   based on traversing the precedence graph.
+
+   Consideration of object reliability is interesting.  The authors have
+   taken a polling approach wherein a procedure is executed
+   periodically, say once every 100 milliseconds, to evaluate the
+   temporal value of outstanding objects on which the destination is
+   waiting.  Those whose temporal value has expired (i.e. which are no
+   longer useful as defined by the application) are "declared lost" and
+   treated in much the same manner as delivered objects - the matrix is
+   updated, and if the object type is BART-L, an ACK is sent.  Any
+   objects from the current period which have not yet been delivered or
+   declared lost are candidates for the "Terminator" as the procedure is
+   called.  The Terminator's criterion is not specifically addressed in
+   this RFC, but one example might be for the receiving user to
+   periodically pass a list of no-longer-useful objects to TCP-B.
+
+   Another question which arises is, "How does one calculate the send
+   and receive windows?"  With a partial order service, these windows
+   are no longer contiguous intervals of objects but rather sets of
+   objects.  In fact, there are three sets which are of interest to the
+   receiving TCP one of which has already been mentioned - the
+   Deliverable Set.  Additionally, we can think of the Bufferable Set
+   and the Receivable Set.  Some definitions are in order:
+
+      Deliverable Set: objects which can be immediately passed up to
+           the user.
+
+      Buffered Set: objects stored in a buffer awaiting delivery.
+
+      Bufferable Set: objects which can be stored but not immediately
+           delivered (due to some ordering constraint).
+
+      Receivable Set: union of the Deliverable Set and the Bufferable
+           Set (which are disjoint) - intuitively, all objects which
+           are "receivable" must be either "deliverable" or
+           "bufferable".
+
+
+
+
+
+Connolly, Amer & Conrad                                        [Page 26]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   The following example will help to illustrate these sets.  Consider
+   our simple service profile from earlier for the case where the size
+   of each object is 1 MByte and the receiver has only 2 MBytes of
+   buffer space (enough for 2 objects).  Define a boolean vector of
+   length N (N = number of objects in a period) called the Processed
+   Vector which is used to indicate which objects from the current
+   period have been delivered or declared lost.  Initially, all buffers
+   are empty and the PO Matrix and Processed Vector are as shown here,
+
+                1 2 3 4 5 6
+              +-------------+
+            1 | - 1 0 0 0 1 |
+            2 | - - 0 0 0 1 |
+            3 | - - - 1 0 1 |
+            4 | - - - - 0 1 |
+            5 | - - - - - 1 |      [ F F F F F F ]
+            6 | - - - - - - |        1 2 3 4 5 6
+              +-------------+
+
+                 PO Matrix        Processed Vector
+
+   From the PO Matrix, it is clear that the Deliverable Set =
+   {(1,1),(1,3),(1,5)}, where (1,1) refers to object #1 from period #1,
+   asssuming that the current period is period #1.
+
+   The Bufferable Set, however, depends upon how one defines bufferable
+   objects.  Several approaches are possible.  The authors' initial
+   approach to determining the Bufferable Set can best be explained in
+   terms of the following rules,
+
+      Rule 1: Remaining space must be allocated for all objects from
+              period i before any object from period i+1 is buffered
+
+      Rule 2: In the event that there exists enough space to buffer
+              some but not all objects from a given period, space will
+              be reserved for the first objects (i.e. 1,2,3,...,k)
+
+   With these rules, the Bufferable Set = {(1,2),(1,4)}, the Buffered
+   Set is trivially equal to the empty set, { }, and the Receivable Set
+   = {(1,1),(1,2),(1,3),(1,4),(1,5)}.
+
+   Note that the current acknowledgment scheme uses the min and max
+   values in the Receivable Set for its window advertisement which is
+   transmitted in all ACK segments sent along the receive-side of the
+   connection (from receiver to sender).  Moreover, the
+   "piggyback_delay" timer is still used to couple ACKs with return data
+   (as utilized in standard TCP).
+
+
+
+
+Connolly, Amer & Conrad                                        [Page 27]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   Returning to our example, let us now assume that object 1 and then 3
+   arrive at the receiver and object 2 is lost.  After processing both
+   objects, the PO Matrix and Processed Vector will have the following
+   updated structure,
+
+                1 2 3 4 5 6
+              +-------------+
+            1 | - 0 0 0 0 0 |
+            2 | - - 0 0 0 1 |
+            3 | - - - 0 0 0 |
+            4 | - - - - 0 1 |
+            5 | - - - - - 1 |      [ T F T F F F ]
+            6 | - - - - - - |        1 2 3 4 5 6
+              +-------------+
+
+                 PO Matrix        Processed Vector
+
+   We can see that the Deliverable Set = {(1,2),(1,4),(1,5)}, but what
+   should the Bufferable Set consist of?  Since only one buffer is
+   required for the current period's objects, we have 1 Mbyte of
+   additional space available for "future" objects and therefore include
+   the first object from period #2 in both the Bufferable and the
+   Receivable Set,
+
+      Deliverable Set = {(1,2),(1,4),(1,5)}
+
+      Bufferable Set =  {(1,6),(2,1)}
+
+      Buffered Set = { }
+
+      Receivable Set = {(1,2),(1,4),(1,5),(1,6),(2,1)}
+
+   In general, the notion of window management takes on new meaning with
+   a partial order service.  One may re-examine the classic window
+   relations with a partial order service in mind and devise new, less
+   restrictive relations which may shed further light on the operation
+   of such a service.
+
+   Two final details: (1) as with the sender, the receiver must
+   periodically establish or modify the PO service profile and (2) upon
+   processing the last object in a period, the receiver must re-set the
+   PO matrix and Processed vector to their initial states.
+
+
+
+
+
+
+
+
+
+Connolly, Amer & Conrad                                        [Page 28]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   Let us look at the state machine and pseudo-code for the receiver.
+
+         (2)Data Segment Arrival          (5)PO Profile fragment Arrival
+            +------+                          +-------+
+            |      |                          |       |
+            |      V    (1)First PO Profile   |       V
+          +---------+     fragment arrives   +---------+(6) Data Segment
+    +---->|         |----------------------->|         |<-----+ Arrival
+    |     |  ESTAB  |                        |   PO    |------+
+    |     |         |                        |         |
+    |     |         |                        |  SETUP  |<-----+
+(3) +-----|         |<-----------------------|         |------+
+Terminator+---------+  (9)PO Setup complete  +---------+(7) Terminator
+            ^      |                          |      ^
+            |      |                          |      |
+            +------+                          +------+
+          (4)Piggyback Timeout             (8)Piggyback Timeout
+
+
+   Event 1 - First PO Service Profile fragment arrives at network
+   =======   interface
+      Transition to the PO SETUP state
+      Store the PO service profile (fragment)
+      Send an Acknowledgement of the PO service profile (fragment)
+
+   Event 2 - Data Segment Arrival
+   =======
+      If object is in Deliverable Set then
+           Deliver the object
+           Update PO Matrix and Processed Vector
+           Check buffers for newly deliverable objects
+           If all objects from current period have been processed then
+                Start the next period (re-initialize data structures)
+           Start piggyback_delay timer to send an ACK
+      Else if object is in Bufferable Set then
+           Store the object
+      Else
+           Discard object
+           Start piggyback_delay timer to send an ACK
+
+   Event 3 - Periodic call of the Terminator
+   =======
+      For all unprocessed objects in the current period do
+           If object is "no longer useful" then
+                Update PO Matrix and Processed Vector
+                If object is in a buffer then
+                     Release the buffer
+                Check buffers for newly deliverable objects
+
+
+
+Connolly, Amer & Conrad                                        [Page 29]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+                If all objects from current period have been processed
+                then Start the next period (re-initialize data
+                structures)
+
+   Event 4 - Piggyback_delay Timer Expires
+   =======
+      Send an ACK
+      Disable piggyback_delay timer
+
+   Event 5 - PO Service Profile fragment arrives at network interface
+   =======
+      Store the PO service profile (fragment)
+      Send an Acknowledgement of the PO service profile (fragment)
+      If entire PO Service profile has been received then enable Event
+      (9)
+
+   Event 6 - Data Segment arrival
+   =======
+      (See event 2)
+
+   Event 7 - Periodic call of the terminator
+   =======
+      (See Event 3)
+
+   Event 8 - Piggyback_delay Timer Expires
+   =======
+      (See Event 4)
+
+   Event 9 - PO Setup Complete
+   =======
+      Transition to the ESTAB state
+
+   Note that, for reasons of clarity, we have used a transitively closed
+   matrix representation of the partial order.  A more efficient
+   implementation based on an adjacency list representation of a
+   transitively reduced precedence graph results in a more efficient
+   running time [CAC93].
+
+5. Quantifying and Comparing Partial Order Services
+
+   While ordered, reliable delivery is ideal, the existence of less-
+   than-ideal underlying networks can cause delays for applications that
+   need only partial order or partial reliability.  By introducing a
+   partial order service, one may in effect relax the requirements on
+   order and reliability and presumably expect some savings in terms of
+   buffer utilization and bandwidth (due to fewer retransmissions) and
+   shorter overall delays.  A practical question to be addressed is,
+   "what are the expected savings likely to be?"
+
+
+
+Connolly, Amer & Conrad                                        [Page 30]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   As mentioned in Section 2, the extent of such savings will depend
+   largely on the quality of the underlying network - bandwidth, delay,
+   amount and distribution of loss/duplication/disorder - as well as the
+   flexibility of the partial order itself - specified by the PO matrix
+   and reliability vector.  If the underlying network has no loss, a
+   partial order service essentially becomes an ordered service.
+   Collecting experimental data to ascertain realistic network
+   conditions is a straightforward task and will help to quantify in
+   general the value of a partial order service [Bol93].  But how can
+   one quantify and compare the cost of providing specific levels of
+   service?
+
+   Preliminary research indicates that the number of linear extensions
+   (orderings) of a partial order in the presence of loss effectively
+   measures the complexity of that order.  The authors have derived
+   formulae for calculating the number of extensions when a partial
+   order is series-parallel and have proposed a metric for comparing
+   partial orders based on this number [ACCD93b].  This metric could be
+   used as a means for charging for the service, for example. What also
+   may be interesting is a specific head-to-head comparison between
+   different partial orders with varying degrees of flexibility.  Work
+   is currently underway on a simulation model aimed at providing this
+   information.  And finally, work is underway on an implementation of
+   TCP which includes partial order service.
+
+6. Future Direction
+
+   In addition to the simulation and implementation work the authors are
+   pursuing several problems related to partial ordering which will be
+   mentioned briefly.
+
+   An interesting question arises when discussing the acknowledgment
+   strategy for a partial order service.  For classic protocols, a
+   cumulative ACK of object i confirms all objects "up to and including"
+   i.  But the meaning of "up to and including" with a partial order
+   service has different implications than with an ordered service.
+
+   Consider our example partial order, ((1;2)||(3;4)||5);6).  What
+   should a cumulative ACK of object 4 confirm?  The most logical
+   definition would say it confirms receipt of object 4 and all objects
+   that precede 4 in the partial order, in this case, object 3.  Nothing
+   is said about the arrival of objects 1 or 2.  With this alternative
+   interpretation where cumulative ACKs depend on the partial order, the
+   sender must examine the partial order matrix to determine which
+   buffers can be released.  In this example, scanning column 4 of the
+   matrix reveals that object 3 must come before object 4 and therefore
+   both object buffers (and any buffers from a previous period) can be
+   released.
+
+
+
+Connolly, Amer & Conrad                                        [Page 31]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   Other partial order acknowledgment policies are possible for a
+   protocol providing a partial order service including the use of
+   selective ACKs (which has been proposed in [JB88] and implemented in
+   the Cray TCP [Chang93]) as well as the current TCP strategy where an
+   ACK of i also ACKs everything <= i (in a cyclical sequence number
+   space).  The authors are investigating an ACK policy which utilizes a
+   combination of selective and "partial-order-cumulative"
+   acknowledgments.  This is accomplished by replacing the current TCP
+   cumulative ACK with one which has the partial order meaning as
+   described above and augmenting this with intermittent selective ACKs
+   when needed.
+
+   In another area, the notion of fragmented delivery, mentioned in the
+   beginning of Section 4, looks like a promising technique for certain
+   classes of applications which may offer a substantial improvement in
+   memory utilization.  Briefly, the term fragmented delivery refers to
+   the ability to transfer less-than-complete objects between the
+   transport layer and the user application (or session layer as the
+   case may be).  For example, a 1Mbyte object could potentially be
+   delivered in multiple "chunks" as segments arrive thus freeing up
+   valuable memory and reducing the delay on those pieces of data.  The
+   scenario becomes somewhat more complex when multiple "parallel
+   streams" are considered where the application could now receive
+   pieces of multiple objects associated with different streams.
+
+   Additional work in the area of implementing a working partial order
+   protocol is being performed both at the University of Delaware and at
+   the LAAS du CNRS laboratory in Toulouse, France - particularly in
+   support of distributed, high-speed, multimedia communication. It will
+   be interesting to examine the processing requirements for an
+   implementation of a partial order protocol at key events (such as
+   object arrival) compared with a non-partial order implementation.
+
+   Finally, the authors are interested in the realization of a network
+   application utilizing a partial order service.  The aim of such work
+   is threefold: (1) provide further insight into the expected
+   performance gains, (2) identify new issues unique to partial order
+   transport and, (3) build a road-map for application designers
+   interested in using a partial order service.
+
+7. Summary
+
+   This RFC introduces the concepts of a partial order service and
+   discusses the practical issues involved with including partial
+   ordering in a transport protocol.  The need for such a service is
+   motivated by several applications including the vast fields of
+   distributed databases, and multimedia.  The service has been
+   presented as a backward-compatible extension to TCP to adapt to
+
+
+
+Connolly, Amer & Conrad                                        [Page 32]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   applications with different needs specified in terms of QOS
+   parameters.
+
+   The notion of a partial ordering extends QOS flexibility to include
+   object delivery, reliability, and temporal value thus allowing the
+   transport layer to effectively handle a wider range of applications
+   (i.e., any which might benefit from such mechanisms).  The service
+   profile described in Section 4 accurately characterizes the QOS for a
+   partial order service (which encompasses the two extremes of total
+   ordered and unordered transport as well).
+
+   Several significant modifications have been proposed and are
+   summarized here:
+
+       (1) Replacing the requirement for ordered delivery with one for
+           application-dependent partial ordering
+
+       (2) Allowing unreliable and partially reliable data transport
+
+       (3) Conducting a non-symmetrical connection (not entirely foreign
+           to TCP, the use of different MSS values for the two sides
+           of a connection is an example)
+
+       (4) Management of "objects" rather than octets
+
+       (5) Modified acknowledgment strategy
+
+       (6) New definition for the send and receive "windows"
+
+       (7) Extension of the User/TCP interface to include certain
+           QOS parameters
+
+       (8) Use of new TCP options
+
+   As evidenced by this list, a partial order and partial reliability
+   service proposes to re-examine several fundamental transport
+   mechanisms and, in so doing, offers the opportunity for substantial
+   improvement in the support of existing and new application areas.
+
+
+
+
+
+
+
+
+
+
+
+
+
+Connolly, Amer & Conrad                                        [Page 33]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+8. References
+
+   [ACCD93a]  Amer, P., Chassot, C., Connolly, T., and M. Diaz,
+              "Partial Order Transport Service for Multimedia
+              Applications: Reliable Service", Second International
+              Symposium on High Performance Distributed Computing
+              (HPDC-2), Spokane, Washington, July 1993.
+
+   [ACCD93b]  Amer, P., Chassot, C., Connolly, T., and M. Diaz,
+              "Partial Order Transport Service for Multimedia
+              Applications: Unreliable Service", Proc. INET '93, San
+              Francisco, August 1993.
+
+   [AH91]     Anderson, D., and G. Homsy, "A Continuous Media I/O
+              Server and its Synchronization Mechanism", IEEE
+              Computer, 24(10), 51-57, October 1991.
+
+   [AS93]     Agrawala, A., and D. Sanghi, "Experimental Assessment
+              of End-to-End Behavior on Internet," Proc. IEEE INFOCOM
+              '93, San Francisco, CA, March 1993.
+
+   [BCP93]    Claffy, K., Polyzos, G., and H.-W. Braun, "Traffic
+              Characteristics of the T1 NSFNET", Proc. IEEE INFOCOM
+              '93, San Francisco, CA, March 1993.
+
+   [Bol93]    Bolot, J., "End-to-End Packet Delay and Loss Behavior
+              in the Internet", SIGCOMM '93, Ithaca, NY, September
+              1993.
+
+   [CAC93]    Conrad, P., Amer, P., and T. Connolly, "Improving
+              Performance in Transport-Layer Communications Protocols
+              by using Partial Orders and Partial Reliability",
+              Work in Progress, December 1993.
+
+   [Chang93]  Chang, Y., "High-Speed Transport Protocol Evaluation --
+              the Final Report", MCNC Center for Communications
+              Technical Document, February 1993.
+
+   [Dee89]    Deering, S., "Host Extensions for IP Multicasting," STD
+              5, RFC 1112 Stanford University, August 1989.
+
+   [DS93]     Diaz, M., and P. Senac, "Time Stream Petri Nets: A
+              Model for Multimedia Synchronization", Proceedings of
+              Multimedia Modeling '93, Singapore, 1993.
+
+
+
+
+
+
+
+Connolly, Amer & Conrad                                        [Page 34]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+   [HKN91]    Hardt-Kornacki, S., and L. Ness, "Optimization Model
+              for the Delivery of Interactive Multimedia Documents",
+              In Proc.  Globecom '91, 669-673, Phoenix, Arizona,
+              December 1991.
+
+   [JB88]     Jacobson, V., and R. Braden, "TCP Extensions for
+              Long-Delay Paths", RFC 1072, LBL, USC/Information
+              Sciences Institute, October 1988.
+
+   [JBB92]    Jacobson, V., Braden, R., and D. Borman, "TCP
+              Extensions for High Performance", RFC 1323, LBL, Cray
+              Research, USC/Information Sciences Institute, May 1992.
+
+   [LMKQ89]   Leffler, S., McKusick, M., Karels, M., and J.
+              Quarterman, "4.3 BSD UNIX Operating System",
+              Addison-Wesley Publishing Company, Reading, MA, 1989.
+
+   [OP91]     O'Malley, S., and L. Peterson, "TCP Extensions
+              Considered Harmful", RFC 1263, University of Arizona,
+              October 1991.
+
+   [Pos81]    Postel, J., "Transmission Control Protocol - DARPA
+              Internet Program Protocol Specification," STD 7,
+              RFC 793, DARPA, September 1981.
+
+Security Considerations
+
+   Security issues are not discussed in this memo.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Connolly, Amer & Conrad                                        [Page 35]
+
+RFC 1693       An Extension to TCP: Partial Order Service  November 1994
+
+
+Authors' Addresses
+
+   Tom Connolly
+   101C Smith Hall
+   Department of Computer & Information Sciences
+   University of Delaware
+   Newark, DE 19716 - 2586
+
+   EMail: connolly@udel.edu
+
+
+   Paul D. Amer
+   101C Smith Hall
+   Department of Computer & Information Sciences
+   University of Delaware
+   Newark, DE 19716 - 2586
+
+   EMail: amer@udel.edu
+
+
+   Phill Conrad
+   101C Smith Hall
+   Department of Computer & Information Sciences
+   University of Delaware
+   Newark, DE 19716 - 2586
+
+   EMail: pconrad@udel.edu
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Connolly, Amer & Conrad                                        [Page 36]
+
diff --git a/ext/picotcp/RFC/rfc2026.txt b/ext/picotcp/RFC/rfc2026.txt
new file mode 100644
index 0000000..1c9c59a
--- /dev/null
+++ b/ext/picotcp/RFC/rfc2026.txt
@@ -0,0 +1,2019 @@
+
+
+
+
+
+
+Network Working Group                                         S. Bradner
+Request for Comments: 2026                            Harvard University
+BCP: 9                                                      October 1996
+Obsoletes: 1602
+Category: Best Current Practice
+
+
+              The Internet Standards Process -- Revision 3
+
+
+Status of this Memo
+
+   This document specifies an Internet Best Current Practices for the
+   Internet Community, and requests discussion and suggestions for
+   improvements.  Distribution of this memo is unlimited.
+
+Abstract
+
+   This memo documents the process used by the Internet community for
+   the standardization of protocols and procedures.  It defines the
+   stages in the standardization process, the requirements for moving a
+   document between stages and the types of documents used during this
+   process.  It also addresses the intellectual property rights and
+   copyright issues associated with the standards process.
+
+Table of Contents
+
+   1.  INTRODUCTION....................................................2
+     1.1  Internet Standards...........................................3
+     1.2  The Internet Standards Process...............................3
+     1.3  Organization of This Document................................5
+   2.  INTERNET STANDARDS-RELATED PUBLICATIONS.........................5
+     2.1  Requests for Comments (RFCs).................................5
+     2.2  Internet-Drafts..............................................7
+   3.  INTERNET STANDARD SPECIFICATIONS................................8
+     3.1  Technical Specification (TS).................................8
+     3.2  Applicability Statement (AS).................................8
+     3.3  Requirement Levels...........................................9
+   4.  THE INTERNET STANDARDS TRACK...................................10
+     4.1  Standards Track Maturity Levels.............................11
+       4.1.1  Proposed Standard.......................................11
+       4.1.2  Draft Standard..........................................12
+       4.1.3  Internet Standard.......................................13
+     4.2  Non-Standards Track Maturity Levels.........................13
+       4.2.1  Experimental............................................13
+       4.2.2  Informational...........................................14
+       4.2.3  Procedures for Experimental and Informational RFCs......14
+       4.2.4  Historic................................................15
+
+
+
+Bradner                  Best Current Practice                  [Page 1]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+   5.  Best Current Practice (BCP) RFCs...............................15
+     5.1  BCP Review Process..........................................16
+   6.  THE INTERNET STANDARDS PROCESS.................................17
+     6.1  Standards Actions...........................................17
+       6.1.1  Initiation of Action....................................17
+       6.1.2  IESG Review and Approval................................17
+       6.1.3  Publication.............................................18
+     6.2  Advancing in the Standards Track............................19
+     6.3  Revising a Standard.........................................20
+     6.4  Retiring a Standard.........................................20
+     6.5  Conflict Resolution and Appeals.............................21
+       6.5.1 Working Group Disputes...................................21
+       6.5.2 Process Failures.........................................22
+       6.5.3 Questions of Applicable Procedure........................22
+       6.5.4 Appeals Procedure........................................23
+   7.  EXTERNAL STANDARDS AND SPECIFICATIONS..........................23
+     7.1  Use of External Specifications..............................24
+       7.1.1  Incorporation of an Open Standard.......................24
+       7.1.2  Incorporation of a Other Specifications.................24
+       7.1.3  Assumption..............................................25
+   8. NOTICES AND RECORD KEEPING......................................25
+   9. VARYING THE PROCESS.............................................26
+     9.1 The Variance Procedure.......................................26
+     9.2 Exclusions...................................................27
+   10.  INTELLECTUAL PROPERTY RIGHTS..................................27
+     10.1.  General Policy............................................27
+     10.2   Confidentiality Obligations...............................28
+     10.3.  Rights and Permissions....................................28
+       10.3.1. All Contributions......................................28
+       10.3.2. Standards Track Documents..............................29
+       10.3.3  Determination of Reasonable and
+              Non-discriminatory Terms................................30
+     10.4.  Notices...................................................30
+   11. ACKNOWLEDGMENTS................................................32
+   12. SECURITY CONSIDERATIONS........................................32
+   13. REFERENCES.....................................................33
+   14. DEFINITIONS OF TERMS...........................................33
+   15. AUTHOR'S ADDRESS...............................................34
+   APPENDIX A: GLOSSARY OF ACRONYMS...................................35
+
+
+
+
+
+
+
+
+
+
+
+
+Bradner                  Best Current Practice                  [Page 2]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+1.  INTRODUCTION
+
+   This memo documents the process currently used by the Internet
+   community for the standardization of protocols and procedures.  The
+   Internet Standards process is an activity of the Internet Society
+   that is organized and managed on behalf of the Internet community by
+   the Internet Architecture Board (IAB) and the Internet Engineering
+   Steering Group (IESG).
+
+1.1  Internet Standards
+
+   The Internet, a loosely-organized international collaboration of
+   autonomous, interconnected networks, supports host-to-host
+   communication through voluntary adherence to open protocols and
+   procedures defined by Internet Standards.  There are also many
+   isolated interconnected networks, which are not connected to the
+   global Internet but use the Internet Standards.
+
+   The Internet Standards Process described in this document is
+   concerned with all protocols, procedures, and conventions that are
+   used in or by the Internet, whether or not they are part of the
+   TCP/IP protocol suite.  In the case of protocols developed and/or
+   standardized by non-Internet organizations, however, the Internet
+   Standards Process normally applies to the application of the protocol
+   or procedure in the Internet context, not to the specification of the
+   protocol itself.
+
+   In general, an Internet Standard is a specification that is stable
+   and well-understood, is technically competent, has multiple,
+   independent, and interoperable implementations with substantial
+   operational experience, enjoys significant public support, and is
+   recognizably useful in some or all parts of the Internet.
+
+1.2  The Internet Standards Process
+
+   In outline, the process of creating an Internet Standard is
+   straightforward:  a specification undergoes a period of development
+   and several iterations of review by the Internet community and
+   revision based upon experience, is adopted as a Standard by the
+   appropriate body (see below), and is published.  In practice, the
+   process is more complicated, due to (1) the difficulty of creating
+   specifications of high technical quality;  (2) the need to consider
+   the interests of all of the affected parties;  (3) the importance of
+   establishing widespread community consensus;  and (4) the difficulty
+   of evaluating the utility of a particular specification for the
+   Internet community.
+
+
+
+
+
+Bradner                  Best Current Practice                  [Page 3]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+   The goals of the Internet Standards Process are:
+   o  technical excellence;
+   o  prior implementation and testing;
+   o  clear, concise, and easily understood documentation;
+   o  openness and fairness;  and
+   o  timeliness.
+
+   The procedures described in this document are designed to be fair,
+   open, and objective;  to reflect existing (proven) practice;  and to
+   be flexible.
+
+   o  These procedures are intended to provide a fair, open, and
+      objective basis for developing, evaluating, and adopting Internet
+      Standards.  They provide ample opportunity for participation and
+      comment by all interested parties.  At each stage of the
+      standardization process, a specification is repeatedly discussed
+      and its merits debated in open meetings and/or public electronic
+      mailing lists, and it is made available for review via world-wide
+      on-line directories.
+
+   o  These procedures are explicitly aimed at recognizing and adopting
+      generally-accepted practices.  Thus, a candidate specification
+      must be implemented and tested for correct operation and
+      interoperability by multiple independent parties and utilized in
+      increasingly demanding environments, before it can be adopted as
+      an Internet Standard.
+
+   o  These procedures provide a great deal of flexibility to adapt to
+      the wide variety of circumstances that occur in the
+      standardization process.  Experience has shown this flexibility to
+      be vital in achieving the goals listed above.
+
+   The goal of technical competence, the requirement for prior
+   implementation and testing, and the need to allow all interested
+   parties to comment all require significant time and effort.  On the
+   other hand, today's rapid development of networking technology
+   demands timely development of standards.  The Internet Standards
+   Process is intended to balance these conflicting goals.  The process
+   is believed to be as short and simple as possible without sacrificing
+   technical excellence, thorough testing before adoption of a standard,
+   or openness and fairness.
+
+   From its inception, the Internet has been, and is expected to remain,
+   an evolving system whose participants regularly factor new
+   requirements and technology into its design and implementation. Users
+   of the Internet and providers of the equipment, software, and
+   services that support it should anticipate and embrace this evolution
+   as a major tenet of Internet philosophy.
+
+
+
+Bradner                  Best Current Practice                  [Page 4]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+   The procedures described in this document are the result of a number
+   of years of evolution, driven both by the needs of the growing and
+   increasingly diverse Internet community, and by experience.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Bradner                  Best Current Practice                  [Page 5]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+1.3  Organization of This Document
+
+   Section 2 describes the publications and archives of the Internet
+   Standards Process.  Section 3 describes the types of Internet
+   standard specifications.  Section 4 describes the Internet standards
+   specifications track.  Section 5 describes Best Current Practice
+   RFCs.  Section 6 describes the process and rules for Internet
+   standardization.  Section 7 specifies the way in which externally-
+   sponsored specifications and practices, developed and controlled by
+   other standards bodies or by others, are handled within the Internet
+   Standards Process.  Section 8 describes the requirements for notices
+   and record keeping  Section 9 defines a variance process to allow
+   one-time exceptions to some of the requirements in this document
+   Section 10 presents the rules that are required to protect
+   intellectual property rights in the context of the development and
+   use of Internet Standards.  Section 11 includes acknowledgments of
+   some of the people involved in creation of this document.  Section 12
+   notes that security issues are not dealt with by this document.
+   Section 13 contains a list of numbered references.  Section 14
+   contains definitions of some of the terms used in this document.
+   Section 15 lists the author's email and postal addresses.  Appendix A
+   contains a list of frequently-used acronyms.
+
+2.  INTERNET STANDARDS-RELATED PUBLICATIONS
+
+2.1  Requests for Comments (RFCs)
+
+   Each distinct version of an Internet standards-related specification
+   is published as part of the "Request for Comments" (RFC) document
+   series.  This archival series is the official publication channel for
+   Internet standards documents and other publications of the IESG, IAB,
+   and Internet community.  RFCs can be obtained from a number of
+   Internet hosts using anonymous FTP, gopher, World Wide Web, and other
+   Internet document-retrieval systems.
+
+   The RFC series of documents on networking began in 1969 as part of
+   the original ARPA wide-area networking (ARPANET) project (see
+   Appendix A for glossary of acronyms).  RFCs cover a wide range of
+   topics in addition to Internet Standards, from early discussion of
+   new research concepts to status memos about the Internet.  RFC
+   publication is the direct responsibility of the RFC Editor, under the
+   general direction of the IAB.
+
+
+
+
+
+
+
+
+
+Bradner                  Best Current Practice                  [Page 6]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+   The rules for formatting and submitting an RFC are defined in [5].
+   Every RFC is available in ASCII text.  Some RFCs are also available
+   in other formats.  The other versions of an RFC may contain material
+   (such as diagrams and figures) that is not present in the ASCII
+   version, and it may be formatted differently.
+
+      *********************************************************
+      *                                                       *
+      *  A stricter requirement applies to standards-track    *
+      *  specifications:  the ASCII text version is the       *
+      *  definitive reference, and therefore it must be a     *
+      *  complete and accurate specification of the standard, *
+      *  including all necessary diagrams and illustrations.  *
+      *                                                       *
+      *********************************************************
+
+   The status of Internet protocol and service specifications is
+   summarized periodically in an RFC entitled "Internet Official
+   Protocol Standards" [1].  This RFC shows the level of maturity and
+   other helpful information for each Internet protocol or service
+   specification (see section 3).
+
+   Some RFCs document Internet Standards.  These RFCs form the 'STD'
+   subseries of the RFC series [4].  When a specification has been
+   adopted as an Internet Standard, it is given the additional label
+   "STDxxx", but it keeps its RFC number and its place in the RFC
+   series. (see section 4.1.3)
+
+   Some RFCs standardize the results of community deliberations about
+   statements of principle or conclusions about what is the best way to
+   perform some operations or IETF process function.  These RFCs form
+   the specification has been adopted as a BCP, it is given the
+   additional label "BCPxxx", but it keeps its RFC number and its place
+   in the RFC series. (see section 5)
+
+   Not all specifications of protocols or services for the Internet
+   should or will become Internet Standards or BCPs.  Such non-standards
+   track specifications are not subject to the rules for Internet
+   standardization.  Non-standards track specifications may be published
+   directly as "Experimental" or "Informational" RFCs at the discretion
+   of the RFC Editor in consultation with the IESG (see section 4.2).
+
+
+
+
+
+
+
+
+
+
+Bradner                  Best Current Practice                  [Page 7]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+      ********************************************************
+      *                                                      *
+      *   It is important to remember that not all RFCs      *
+      *   are standards track documents, and that not all    *
+      *   standards track documents reach the level of       *
+      *   Internet Standard. In the same way, not all RFCs   *
+      *   which describe current practices have been given   *
+      *   the review and approval to become BCPs. See        *
+      *   RFC-1796 [6] for further information.              *
+      *                                                      *
+      ********************************************************
+
+2.2  Internet-Drafts
+
+   During the development of a specification, draft versions of the
+   document are made available for informal review and comment by
+   placing them in the IETF's "Internet-Drafts" directory, which is
+   replicated on a number of Internet hosts.  This makes an evolving
+   working document readily available to a wide audience, facilitating
+   the process of review and revision.
+
+   An Internet-Draft that is published as an RFC, or that has remained
+   unchanged in the Internet-Drafts directory for more than six months
+   without being recommended by the IESG for publication as an RFC, is
+   simply removed from the Internet-Drafts directory.  At any time, an
+   Internet-Draft may be replaced by a more recent version of the same
+   specification, restarting the six-month timeout period.
+
+   An Internet-Draft is NOT a means of "publishing" a specification;
+   specifications are published through the RFC mechanism described in
+   the previous section.  Internet-Drafts have no formal status, and are
+   subject to change or removal at any time.
+
+      ********************************************************
+      *                                                      *
+      *   Under no circumstances should an Internet-Draft    *
+      *   be referenced by any paper, report, or Request-    *
+      *   for-Proposal, nor should a vendor claim compliance *
+      *   with an Internet-Draft.                            *
+      *                                                      *
+      ********************************************************
+
+
+
+
+
+
+
+
+
+
+Bradner                  Best Current Practice                  [Page 8]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+   Note: It is acceptable to reference a standards-track specification
+   that may reasonably be expected to be published as an RFC using the
+   phrase "Work in Progress"  without referencing an Internet-Draft.
+   This may also be done in a standards track document itself  as long
+   as the specification in which the reference is made would stand as a
+   complete and understandable document with or without the reference to
+   the "Work in Progress".
+
+3.  INTERNET STANDARD SPECIFICATIONS
+
+   Specifications subject to the Internet Standards Process fall into
+   one of two categories:  Technical Specification (TS) and
+   Applicability Statement (AS).
+
+3.1  Technical Specification (TS)
+
+   A Technical Specification is any description of a protocol, service,
+   procedure, convention, or format.  It may completely describe all of
+   the relevant aspects of its subject, or it may leave one or more
+   parameters or options unspecified.  A TS may be completely self-
+   contained, or it may incorporate material from other specifications
+   by reference to other documents (which might or might not be Internet
+   Standards).
+
+   A TS shall include a statement of its scope and the general intent
+   for its use (domain of applicability).  Thus, a TS that is inherently
+   specific to a particular context shall contain a statement to that
+   effect.  However, a TS does not specify requirements for its use
+   within the Internet;  these requirements, which depend on the
+   particular context in which the TS is incorporated by different
+   system configurations, are defined by an Applicability Statement.
+
+3.2  Applicability Statement (AS)
+
+   An Applicability Statement specifies how, and under what
+   circumstances, one or more TSs may be applied to support a particular
+   Internet capability.  An AS may specify uses for TSs that are not
+   Internet Standards, as discussed in Section 7.
+
+   An AS identifies the relevant TSs and the specific way in which they
+   are to be combined, and may also specify particular values or ranges
+   of TS parameters or subfunctions of a TS protocol that must be
+   implemented.  An AS also specifies the circumstances in which the use
+   of a particular TS is required, recommended, or elective (see section
+   3.3).
+
+
+
+
+
+
+Bradner                  Best Current Practice                  [Page 9]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+   An AS may describe particular methods of using a TS in a restricted
+   "domain of applicability", such as Internet routers, terminal
+   servers, Internet systems that interface to Ethernets, or datagram-
+   based database servers.
+
+   The broadest type of AS is a comprehensive conformance specification,
+   commonly called a "requirements document", for a particular class of
+   Internet systems, such as Internet routers or Internet hosts.
+
+   An AS may not have a higher maturity level in the standards track
+   than any standards-track TS on which the AS relies (see section 4.1).
+   For example, a TS at Draft Standard level may be referenced by an AS
+   at the Proposed Standard or Draft Standard level, but not by an AS at
+   the Standard level.
+
+3.3  Requirement Levels
+
+   An AS shall apply one of the following "requirement levels" to each
+   of the TSs to which it refers:
+
+   (a)  Required:  Implementation of the referenced TS, as specified by
+      the AS, is required to achieve minimal conformance.  For example,
+      IP and ICMP must be implemented by all Internet systems using the
+      TCP/IP Protocol Suite.
+
+   (b)  Recommended:  Implementation of the referenced TS is not
+      required for minimal conformance, but experience and/or generally
+      accepted technical wisdom suggest its desirability in the domain
+      of applicability of the AS.  Vendors are strongly encouraged to
+      include the functions, features, and protocols of Recommended TSs
+      in their products, and should omit them only if the omission is
+      justified by some special circumstance. For example, the TELNET
+      protocol should be implemented by all systems that would benefit
+      from remote access.
+
+   (c)  Elective:  Implementation of the referenced TS is optional
+      within the domain of applicability of the AS;  that is, the AS
+      creates no explicit necessity to apply the TS.  However, a
+      particular vendor may decide to implement it, or a particular user
+      may decide that it is a necessity in a specific environment.  For
+      example, the DECNET MIB could be seen as valuable in an
+      environment where the DECNET protocol is used.
+
+
+
+
+
+
+
+
+
+Bradner                  Best Current Practice                 [Page 10]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+      As noted in section 4.1, there are TSs that are not in the
+      standards track or that have been retired from the standards
+      track, and are therefore not required, recommended, or elective.
+      Two additional "requirement level" designations are available for
+      these TSs:
+
+   (d)  Limited Use:  The TS is considered to be appropriate for use
+      only in limited or unique circumstances.  For example, the usage
+      of a protocol with the "Experimental" designation should generally
+      be limited to those actively involved with the experiment.
+
+   (e)  Not Recommended:  A TS that is considered to be inappropriate
+      for general use is labeled "Not Recommended". This may be because
+      of its limited functionality, specialized nature, or historic
+      status.
+
+   Although TSs and ASs are conceptually separate, in practice a
+   standards-track document may combine an AS and one or more related
+   TSs.  For example, Technical Specifications that are developed
+   specifically and exclusively for some particular domain of
+   applicability, e.g., for mail server hosts, often contain within a
+   single specification all of the relevant AS and TS information. In
+   such cases, no useful purpose would be served by deliberately
+   distributing the information among several documents just to preserve
+   the formal AS/TS distinction.  However, a TS that is likely to apply
+   to more than one domain of applicability should be developed in a
+   modular fashion, to facilitate its incorporation by multiple ASs.
+
+   The "Official Protocol Standards" RFC (STD1) lists a general
+   requirement level for each TS, using the nomenclature defined in this
+   section. This RFC is updated periodically.  In many cases, more
+   detailed descriptions of the requirement levels of particular
+   protocols and of individual features of the protocols will be found
+   in appropriate ASs.
+
+4.  THE INTERNET STANDARDS TRACK
+
+   Specifications that are intended to become Internet Standards evolve
+   through a set of maturity levels known as the "standards track".
+   These maturity levels -- "Proposed Standard", "Draft Standard", and
+   "Standard" -- are defined and discussed in section 4.1.  The way in
+   which specifications move along the standards track is described in
+   section 6.
+
+   Even after a specification has been adopted as an Internet Standard,
+   further evolution often occurs based on experience and the
+   recognition of new requirements.  The nomenclature and procedures of
+   Internet standardization provide for the replacement of old Internet
+
+
+
+Bradner                  Best Current Practice                 [Page 11]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+   Standards with new ones, and the assignment of descriptive labels to
+   indicate the status of "retired" Internet Standards.  A set of
+   maturity levels is defined in section 4.2 to cover these and other
+   specifications that are not considered to be on the standards track.
+
+4.1  Standards Track Maturity Levels
+
+   Internet specifications go through stages of development, testing,
+   and acceptance.  Within the Internet Standards Process, these stages
+   are formally labeled "maturity levels".
+
+   This section describes the maturity levels and the expected
+   characteristics of specifications at each level.
+
+4.1.1  Proposed Standard
+
+   The entry-level maturity for the standards track is "Proposed
+   Standard".  A specific action by the IESG is required to move a
+   specification onto the standards track at the "Proposed Standard"
+   level.
+
+   A Proposed Standard specification is generally stable, has resolved
+   known design choices, is believed to be well-understood, has received
+   significant community review, and appears to enjoy enough community
+   interest to be considered valuable.  However, further experience
+   might result in a change or even retraction of the specification
+   before it advances.
+
+   Usually, neither implementation nor operational experience is
+   required for the designation of a specification as a Proposed
+   Standard.  However, such experience is highly desirable, and will
+   usually represent a strong argument in favor of a Proposed Standard
+   designation.
+
+   The IESG may require implementation and/or operational experience
+   prior to granting Proposed Standard status to a specification that
+   materially affects the core Internet protocols or that specifies
+   behavior that may have significant operational impact on the
+   Internet.
+
+   A Proposed Standard should have no known technical omissions with
+   respect to the requirements placed upon it.  However, the IESG may
+   waive this requirement in order to allow a specification to advance
+   to the Proposed Standard state when it is considered to be useful and
+   necessary (and timely) even with known technical omissions.
+
+
+
+
+
+
+Bradner                  Best Current Practice                 [Page 12]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+   Implementors should treat Proposed Standards as immature
+   specifications.  It is desirable to implement them in order to gain
+   experience and to validate, test, and clarify the specification.
+   However, since the content of Proposed Standards may be changed if
+   problems are found or better solutions are identified, deploying
+   implementations of such standards into a disruption-sensitive
+   environment is not recommended.
+
+4.1.2  Draft Standard
+
+   A specification from which at least two independent and interoperable
+   implementations from different code bases have been developed, and
+   for which sufficient successful operational experience has been
+   obtained, may be elevated to the "Draft Standard" level.  For the
+   purposes of this section, "interoperable" means to be functionally
+   equivalent or interchangeable components of the system or process in
+   which they are used.  If patented or otherwise controlled technology
+   is required for implementation, the separate implementations must
+   also have resulted from separate exercise of the licensing process.
+   Elevation to Draft Standard is a major advance in status, indicating
+   a strong belief that the specification is mature and will be useful.
+
+   The requirement for at least two independent and interoperable
+   implementations applies to all of the options and features of the
+   specification.  In cases in which one or more options or features
+   have not been demonstrated in at least two interoperable
+   implementations, the specification may advance to the Draft Standard
+   level only if those options or features are removed.
+
+   The Working Group chair is responsible for documenting the specific
+   implementations which qualify the specification for Draft or Internet
+   Standard status along with documentation about testing of the
+   interoperation of these implementations.  The documentation must
+   include information about the support of each of the individual
+   options and features.  This documentation should be submitted to the
+   Area Director with the protocol action request. (see Section 6)
+
+   A Draft Standard must be well-understood and known to be quite
+   stable, both in its semantics and as a basis for developing an
+   implementation.  A Draft Standard may still require additional or
+   more widespread field experience, since it is possible for
+   implementations based on Draft Standard specifications to demonstrate
+   unforeseen behavior when subjected to large-scale use in production
+   environments.
+
+
+
+
+
+
+
+Bradner                  Best Current Practice                 [Page 13]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+   A Draft Standard is normally considered to be a final specification,
+   and changes are likely to be made only to solve specific problems
+   encountered.  In most circumstances, it is reasonable for vendors to
+   deploy implementations of Draft Standards into a disruption sensitive
+   environment.
+
+4.1.3  Internet Standard
+
+   A specification for which significant implementation and successful
+   operational experience has been obtained may be elevated to the
+   Internet Standard level.  An Internet Standard (which may simply be
+   referred to as a Standard) is characterized by a high degree of
+   technical maturity and by a generally held belief that the specified
+   protocol or service provides significant benefit to the Internet
+   community.
+
+   A specification that reaches the status of Standard is assigned a
+   number in the STD series while retaining its RFC number.
+
+4.2  Non-Standards Track Maturity Levels
+
+   Not every specification is on the standards track.  A specification
+   may not be intended to be an Internet Standard, or it may be intended
+   for eventual standardization but not yet ready to enter the standards
+   track.  A specification may have been superseded by a more recent
+   Internet Standard, or have otherwise fallen into disuse or disfavor.
+
+   Specifications that are not on the standards track are labeled with
+   one of three "off-track" maturity levels:  "Experimental",
+   "Informational", or "Historic".  The documents bearing these labels
+   are not Internet Standards in any sense.
+
+4.2.1  Experimental
+
+   The "Experimental" designation typically denotes a specification that
+   is part of some research or development effort.  Such a specification
+   is published for the general information of the Internet technical
+   community and as an archival record of the work, subject only to
+   editorial considerations and to verification that there has been
+   adequate coordination with the standards process (see below).  An
+   Experimental specification may be the output of an organized Internet
+   research effort (e.g., a Research Group of the IRTF), an IETF Working
+   Group, or it may be an individual contribution.
+
+
+
+
+
+
+
+
+Bradner                  Best Current Practice                 [Page 14]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+4.2.2  Informational
+
+   An "Informational" specification is published for the general
+   information of the Internet community, and does not represent an
+   Internet community consensus or recommendation.  The Informational
+   designation is intended to provide for the timely publication of a
+   very broad range of responsible informational documents from many
+   sources, subject only to editorial considerations and to verification
+   that there has been adequate coordination with the standards process
+   (see section 4.2.3).
+
+   Specifications that have been prepared outside of the Internet
+   community and are not incorporated into the Internet Standards
+   Process by any of the provisions of section 10 may be published as
+   Informational RFCs, with the permission of the owner and the
+   concurrence of the RFC Editor.
+
+4.2.3  Procedures for Experimental and Informational RFCs
+
+   Unless they are the result of IETF Working Group action, documents
+   intended to be published with Experimental or Informational status
+   should be submitted directly to the RFC Editor.  The RFC Editor will
+   publish any such documents as Internet-Drafts which have not already
+   been so published.  In order to differentiate these Internet-Drafts
+   they will be labeled or grouped in the I-D directory so they are
+   easily recognizable.  The RFC Editor will wait two weeks after this
+   publication for comments before proceeding further.  The RFC Editor
+   is expected to exercise his or her judgment concerning the editorial
+   suitability of a document for publication with Experimental or
+   Informational status, and may refuse to publish a document which, in
+   the expert opinion of the RFC Editor, is unrelated to Internet
+   activity or falls below the technical and/or editorial standard for
+   RFCs.
+
+   To ensure that the non-standards track Experimental and Informational
+   designations are not misused to circumvent the Internet Standards
+   Process, the IESG and the RFC Editor have agreed that the RFC Editor
+   will refer to the IESG any document submitted for Experimental or
+   Informational publication which, in the opinion of the RFC Editor,
+   may be related to work being done, or expected to be done, within the
+   IETF community.  The IESG shall review such a referred document
+   within a reasonable period of time, and recommend either that it be
+   published as originally submitted or referred to the IETF as a
+   contribution to the Internet Standards Process.
+
+   If (a) the IESG recommends that the document be brought within the
+   IETF and progressed within the IETF context, but the author declines
+   to do so, or (b) the IESG considers that the document proposes
+
+
+
+Bradner                  Best Current Practice                 [Page 15]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+   something that conflicts with, or is actually inimical to, an
+   established IETF effort, the document may still be published as an
+   Experimental or Informational RFC.  In these cases, however, the IESG
+   may insert appropriate "disclaimer" text into the RFC either in or
+   immediately following the "Status of this Memo" section in order to
+   make the circumstances of its publication clear to readers.
+
+   Documents proposed for Experimental and Informational RFCs by IETF
+   Working Groups go through IESG review.  The review is initiated using
+   the process described in section 6.1.1.
+
+4.2.4  Historic
+
+   A specification that has been superseded by a more recent
+   specification or is for any other reason considered to be obsolete is
+   assigned to the "Historic" level.  (Purists have suggested that the
+   word should be "Historical"; however, at this point the use of
+   "Historic" is historical.)
+
+   Note: Standards track specifications normally must not depend on
+   other standards track specifications which are at a lower maturity
+   level or on non standards track specifications other than referenced
+   specifications from other standards bodies.  (See Section 7.)
+
+5.  BEST CURRENT PRACTICE (BCP) RFCs
+
+   The BCP subseries of the RFC series is designed to be a way to
+   standardize practices and the results of community deliberations.  A
+   BCP document is subject to the same basic set of procedures as
+   standards track documents and thus is a vehicle by which the IETF
+   community can define and ratify the community's best current thinking
+   on a statement of principle or on what is believed to be the best way
+   to perform some operations or IETF process function.
+
+   Historically Internet standards have generally been concerned with
+   the technical specifications for hardware and software required for
+   computer communication across interconnected networks.  However,
+   since the Internet itself is composed of networks operated by a great
+   variety of organizations, with diverse goals and rules, good user
+   service requires that the operators and administrators of the
+   Internet follow some common guidelines for policies and operations.
+   While these guidelines are generally different in scope and style
+   from protocol standards, their establishment needs a similar process
+   for consensus building.
+
+   While it is recognized that entities such as the IAB and IESG are
+   composed of individuals who may participate, as individuals, in the
+   technical work of the IETF, it is also recognized that the entities
+
+
+
+Bradner                  Best Current Practice                 [Page 16]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+   themselves have an existence as leaders in the community.  As leaders
+   in the Internet technical community, these entities should have an
+   outlet to propose ideas to stimulate work in a particular area, to
+   raise the community's sensitivity to a certain issue, to make a
+   statement of architectural principle, or to communicate their
+   thoughts on other matters.  The BCP subseries creates a smoothly
+   structured way for these management entities to insert proposals into
+   the consensus-building machinery of the IETF while gauging the
+   community's view of that issue.
+
+   Finally, the BCP series may be used to document the operation of the
+   IETF itself.  For example, this document defines the IETF Standards
+   Process and is published as a BCP.
+
+5.1 BCP Review Process
+
+   Unlike standards-track documents, the mechanisms described in BCPs
+   are not well suited to the phased roll-in nature of the three stage
+   standards track and instead generally only make sense for full and
+   immediate instantiation.
+
+   The BCP process is similar to that for proposed standards.  The BCP
+   is submitted to the IESG for review, (see section 6.1.1) and the
+   existing review process applies, including a Last-Call on the IETF
+   Announce mailing list.  However, once the IESG has approved the
+   document, the process ends and the document is published.  The
+   resulting document is viewed as having the technical approval of the
+   IETF.
+
+   Specifically, a document to be considered for the status of BCP must
+   undergo the procedures outlined in sections 6.1, and 6.4 of this
+   document. The BCP process may be appealed according to the procedures
+   in section 6.5.
+
+   Because BCPs are meant to express community consensus but are arrived
+   at more quickly than standards, BCPs require particular care.
+   Specifically, BCPs should not be viewed simply as stronger
+   Informational RFCs, but rather should be viewed as documents suitable
+   for a content different from Informational RFCs.
+
+   A specification, or group of specifications, that has, or have been
+   approved as a BCP is assigned a number in the BCP series while
+   retaining its RFC number(s).
+
+
+
+
+
+
+
+
+Bradner                  Best Current Practice                 [Page 17]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+6.  THE INTERNET STANDARDS PROCESS
+
+   The mechanics of the Internet Standards Process involve decisions of
+   the IESG concerning the elevation of a specification onto the
+   standards track or the movement of a standards-track specification
+   from one maturity level to another.  Although a number of reasonably
+   objective criteria (described below and in section 4) are available
+   to guide the IESG in making a decision to move a specification onto,
+   along, or off the standards track, there is no algorithmic guarantee
+   of elevation to or progression along the standards track for any
+   specification.  The experienced collective judgment of the IESG
+   concerning the technical quality of a specification proposed for
+   elevation to or advancement in the standards track is an essential
+   component of the decision-making process.
+
+6.1  Standards Actions
+
+   A "standards action" -- entering a particular specification into,
+   advancing it within, or removing it from, the standards track -- must
+   be approved by the IESG.
+
+6.1.1  Initiation of Action
+
+   A specification that is intended to enter or advance in the Internet
+   standards track shall first be posted as an Internet-Draft (see
+   section 2.2) unless it has not changed since publication as an RFC.
+   It shall remain as an Internet-Draft for a period of time, not less
+   than two weeks, that permits useful community review, after which a
+   recommendation for action may be initiated.
+
+   A standards action is initiated by a recommendation by the IETF
+   Working group responsible for a specification to its Area Director,
+   copied to the IETF Secretariat or, in the case of a specification not
+   associated with a Working Group, a recommendation by an individual to
+   the IESG.
+
+6.1.2  IESG Review and Approval
+
+   The IESG shall determine whether or not a specification submitted to
+   it according to section 6.1.1 satisfies the applicable criteria for
+   the recommended action (see sections 4.1 and 4.2), and shall in
+   addition determine whether or not the technical quality and clarity
+   of the specification is consistent with that expected for the
+   maturity level to which the specification is recommended.
+
+   In order to obtain all of the information necessary to make these
+   determinations, particularly when the specification is considered by
+   the IESG to be extremely important in terms of its potential impact
+
+
+
+Bradner                  Best Current Practice                 [Page 18]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+   on the Internet or on the suite of Internet protocols, the IESG may,
+   at its discretion, commission an independent technical review of the
+   specification.
+
+   The IESG will send notice to the IETF of the pending IESG
+   consideration of the document(s) to permit a final review by the
+   general Internet community.  This "Last-Call" notification shall be
+   via electronic mail to the IETF Announce mailing list.  Comments on a
+   Last-Call shall be accepted from anyone, and should be sent as
+   directed in the Last-Call announcement.
+
+   The Last-Call period shall be no shorter than two weeks except in
+   those cases where the proposed standards action was not initiated by
+   an IETF Working Group, in which case the Last-Call period shall be no
+   shorter than four weeks.  If the IESG believes that the community
+   interest would be served by allowing more time for comment, it may
+   decide on a longer Last-Call period or to explicitly lengthen a
+   current Last-Call period.
+
+   The IESG is not bound by the action recommended when the
+   specification was submitted.  For example, the IESG may decide to
+   consider the specification for publication in a different category
+   than that requested.  If the IESG determines this before the Last-
+   Call is issued then the Last-Call should reflect the IESG's view.
+   The IESG could also decide to change the publication category based
+   on the response to a Last-Call. If this decision would result in a
+   specification being published at a "higher" level than the original
+   Last-Call was for, a new Last-Call should be issued indicating the
+   IESG recommendation. In addition, the IESG may decide to recommend
+   the formation of a new Working Group in the case of significant
+   controversy in response to a Last-Call for specification not
+   originating from an IETF Working Group.
+
+   In a timely fashion after the expiration of the Last-Call period, the
+   IESG shall make its final determination of whether or not to approve
+   the standards action, and shall notify the IETF of its decision via
+   electronic mail to the IETF Announce mailing list.
+
+6.1.3  Publication
+
+   If a standards action is approved, notification is sent to the RFC
+   Editor and copied to the IETF with instructions to publish the
+   specification as an RFC.  The specification shall at that point be
+   removed from the Internet-Drafts directory.
+
+
+
+
+
+
+
+Bradner                  Best Current Practice                 [Page 19]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+   An official summary of standards actions completed and pending shall
+   appear in each issue of the Internet Society's newsletter.  This
+   shall constitute the "publication of record" for Internet standards
+   actions.
+
+   The RFC Editor shall publish periodically an "Internet Official
+   Protocol Standards" RFC [1], summarizing the status of all Internet
+   protocol and service specifications.
+
+6.2  Advancing in the Standards Track
+
+   The procedure described in section 6.1 is followed for each action
+   that attends the advancement of a specification along the standards
+   track.
+
+   A specification shall remain at the Proposed Standard level for at
+   least six (6) months.
+
+   A specification shall remain at the Draft Standard level for at least
+   four (4) months, or until at least one IETF meeting has occurred,
+   whichever comes later.
+
+   These minimum periods are intended to ensure adequate opportunity for
+   community review without severely impacting timeliness.  These
+   intervals shall be measured from the date of publication of the
+   corresponding RFC(s), or, if the action does not result in RFC
+   publication, the date of the announcement of the IESG approval of the
+   action.
+
+   A specification may be (indeed, is likely to be) revised as it
+   advances through the standards track.  At each stage, the IESG shall
+   determine the scope and significance of the revision to the
+   specification, and, if necessary and appropriate, modify the
+   recommended action.  Minor revisions are expected, but a significant
+   revision may require that the specification accumulate more
+   experience at its current maturity level before progressing. Finally,
+   if the specification has been changed very significantly, the IESG
+   may recommend that the revision be treated as a new document, re-
+   entering the standards track at the beginning.
+
+   Change of status shall result in republication of the specification
+   as an RFC, except in the rare case that there have been no changes at
+   all in the specification since the last publication.  Generally,
+   desired changes will be "batched" for incorporation at the next level
+   in the standards track.  However, deferral of changes to the next
+   standards action on the specification will not always be possible or
+   desirable; for example, an important typographical error, or a
+   technical error that does not represent a change in overall function
+
+
+
+Bradner                  Best Current Practice                 [Page 20]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+   of the specification, may need to be corrected immediately.  In such
+   cases, the IESG or RFC Editor may be asked to republish the RFC (with
+   a new number) with corrections, and this will not reset the minimum
+   time-at-level clock.
+
+   When a standards-track specification has not reached the Internet
+   Standard level but has remained at the same maturity level for
+   twenty-four (24) months, and every twelve (12) months thereafter
+   until the status is changed, the IESG shall review the viability of
+   the standardization effort responsible for that specification and the
+   usefulness of the technology. Following each such review, the IESG
+   shall approve termination or continuation of the development effort,
+   at the same time the IESG shall decide to maintain the specification
+   at the same maturity level or to move it to Historic status.  This
+   decision shall be communicated to the IETF by electronic mail to the
+   IETF Announce mailing list to allow the Internet community an
+   opportunity to comment. This provision is not intended to threaten a
+   legitimate and active Working Group effort, but rather to provide an
+   administrative mechanism for terminating a moribund effort.
+
+6.3  Revising a Standard
+
+   A new version of an established Internet Standard must progress
+   through the full Internet standardization process as if it were a
+   completely new specification.  Once the new version has reached the
+   Standard level, it will usually replace the previous version, which
+   will be moved to Historic status.  However, in some cases both
+   versions may remain as Internet Standards to honor the requirements
+   of an installed base.  In this situation, the relationship between
+   the previous and the new versions must be explicitly stated in the
+   text of the new version or in another appropriate document (e.g., an
+   Applicability Statement; see section 3.2).
+
+6.4  Retiring a Standard
+
+   As the technology changes and matures, it is possible for a new
+   Standard specification to be so clearly superior technically that one
+   or more existing standards track specifications for the same function
+   should be retired.  In this case, or when it is felt for some other
+   reason that an existing standards track specification should be
+   retired, the IESG shall approve a change of status of the old
+   specification(s) to Historic.  This recommendation shall be issued
+   with the same Last-Call and notification procedures used for any
+   other standards action.  A request to retire an existing standard can
+   originate from a Working Group, an Area Director or some other
+   interested party.
+
+
+
+
+
+Bradner                  Best Current Practice                 [Page 21]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+6.5  Conflict Resolution and Appeals
+
+   Disputes are possible at various stages during the IETF process. As
+   much as possible the process is designed so that compromises can be
+   made, and genuine consensus achieved, however there are times when
+   even the most reasonable and knowledgeable people are unable to
+   agree. To achieve the goals of openness and fairness, such conflicts
+   must be resolved by a process of open review and discussion. This
+   section specifies the procedures that shall be followed to deal with
+   Internet standards issues that cannot be resolved through the normal
+   processes whereby IETF Working Groups and other Internet Standards
+   Process participants ordinarily reach consensus.
+
+6.5.1 Working Group Disputes
+
+   An individual (whether a participant in the relevant Working Group or
+   not) may disagree with a Working Group recommendation based on his or
+   her belief that either (a) his or her own views have not been
+   adequately considered by the Working Group, or (b) the Working Group
+   has made an incorrect technical choice which places the quality
+   and/or integrity of the Working Group's product(s) in significant
+   jeopardy.  The first issue is a difficulty with Working Group
+   process;  the latter is an assertion of technical error.  These two
+   types of disagreement are quite different, but both are handled by
+   the same process of review.
+
+   A person who disagrees with a Working Group recommendation shall
+   always first discuss the matter with the Working Group's chair(s),
+   who may involve other members of the Working Group (or the Working
+   Group as a whole) in the discussion.
+
+   If the disagreement cannot be resolved in this way, any of the
+   parties involved may bring it to the attention of the Area
+   Director(s) for the area in which the Working Group is chartered.
+   The Area Director(s) shall attempt to resolve the dispute.
+
+   If the disagreement cannot be resolved by the Area Director(s) any of
+   the parties involved may then appeal to the IESG as a whole.  The
+   IESG shall then review the situation and attempt to resolve it in a
+   manner of its own choosing.
+
+   If the disagreement is not resolved to the satisfaction of the
+   parties at the IESG level, any of the parties involved may appeal the
+   decision to the IAB.  The IAB shall then review the situation and
+   attempt to resolve it in a manner of its own choosing.
+
+
+
+
+
+
+Bradner                  Best Current Practice                 [Page 22]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+   The IAB decision is final with respect to the question of whether or
+   not the Internet standards procedures have been followed and with
+   respect to all questions of technical merit.
+
+6.5.2 Process Failures
+
+   This document sets forward procedures required to be followed to
+   ensure openness and fairness of the Internet Standards Process, and
+   the technical viability of the standards created. The IESG is the
+   principal agent of the IETF for this purpose, and it is the IESG that
+   is charged with ensuring that the required procedures have been
+   followed, and that any necessary prerequisites to a standards action
+   have been met.
+
+   If an individual should disagree with an action taken by the IESG in
+   this process, that person should first discuss the issue with the
+   ISEG Chair. If the IESG Chair is unable to satisfy the complainant
+   then the IESG as a whole should re-examine the action taken, along
+   with input from the complainant, and determine whether any further
+   action is needed.  The IESG shall issue a report on its review of the
+   complaint to the IETF.
+
+   Should the complainant not be satisfied with the outcome of the IESG
+   review, an appeal may be lodged to the IAB. The IAB shall then review
+   the situation and attempt to resolve it in a manner of its own
+   choosing and report to the IETF on the outcome of its review.
+
+   If circumstances warrant, the IAB may direct that an IESG decision be
+   annulled, and the situation shall then be as it was before the IESG
+   decision was taken. The IAB may also recommend an action to the IESG,
+   or make such other recommendations as it deems fit. The IAB may not,
+   however, pre-empt the role of the IESG by issuing a decision which
+   only the IESG is empowered to make.
+
+   The IAB decision is final with respect to the question of whether or
+   not the Internet standards procedures have been followed.
+
+6.5.3 Questions of Applicable Procedure
+
+   Further recourse is available only in cases in which the procedures
+   themselves (i.e., the procedures described in this document) are
+   claimed to be inadequate or insufficient to the protection of the
+   rights of all parties in a fair and open Internet Standards Process.
+   Claims on this basis may be made to the Internet Society Board of
+   Trustees.  The President of the Internet Society shall acknowledge
+   such an appeal within two weeks, and shall at the time of
+   acknowledgment advise the petitioner of the expected duration of the
+   Trustees' review of the appeal.  The Trustees shall review the
+
+
+
+Bradner                  Best Current Practice                 [Page 23]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+   situation in a manner of its own choosing and report to the IETF on
+   the outcome of its review.
+
+   The Trustees' decision upon completion of their review shall be final
+   with respect to all aspects of the dispute.
+
+6.5.4 Appeals Procedure
+
+   All appeals must include a detailed and specific description of the
+   facts of the dispute.
+
+   All appeals must be initiated within two months of the public
+   knowledge of the action or decision to be challenged.
+
+   At all stages of the appeals process, the individuals or bodies
+   responsible for making the decisions have the discretion to define
+   the specific procedures they will follow in the process of making
+   their decision.
+
+   In all cases a decision concerning the disposition of the dispute,
+   and the communication of that decision to the parties involved, must
+   be accomplished within a reasonable period of time.
+
+   [NOTE:  These procedures intentionally and explicitly do not
+   establish a fixed maximum time period that shall be considered
+   "reasonable" in all cases.  The Internet Standards Process places a
+   premium on consensus and efforts to achieve it, and deliberately
+   foregoes deterministically swift execution of procedures in favor of
+   a latitude within which more genuine technical agreements may be
+   reached.]
+
+7.  EXTERNAL STANDARDS AND SPECIFICATIONS
+
+   Many standards groups other than the IETF create and publish
+   standards documents for network protocols and services.  When these
+   external specifications play an important role in the Internet, it is
+   desirable to reach common agreements on their usage -- i.e., to
+   establish Internet Standards relating to these external
+   specifications.
+
+   There are two categories of external specifications:
+
+   (1)  Open Standards
+
+      Various national and international standards bodies, such as ANSI,
+      ISO, IEEE, and ITU-T, develop a variety of protocol and service
+      specifications that are similar to Technical Specifications
+      defined here.  National and international groups also publish
+
+
+
+Bradner                  Best Current Practice                 [Page 24]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+      "implementors' agreements" that are analogous to Applicability
+      Statements, capturing a body of implementation-specific detail
+      concerned with the practical application of their standards.  All
+      of these are considered to be "open external standards" for the
+      purposes of the Internet Standards Process.
+
+   (2)  Other Specifications
+
+      Other proprietary specifications that have come to be widely used
+      in the Internet may be treated by the Internet community as if
+      they were a "standards".  Such a specification is not generally
+      developed in an open fashion, is typically proprietary, and is
+      controlled by the vendor, vendors, or organization that produced
+      it.
+
+7.1  Use of External Specifications
+
+   To avoid conflict between competing versions of a specification, the
+   Internet community will not standardize a specification that is
+   simply an "Internet version" of an existing external specification
+   unless an explicit cooperative arrangement to do so has been made.
+   However, there are several ways in which an external specification
+   that is important for the operation and/or evolution of the Internet
+   may be adopted for Internet use.
+
+7.1.1  Incorporation of an Open Standard
+
+   An Internet Standard TS or AS may incorporate an open external
+   standard by reference.  For example, many Internet Standards
+   incorporate by reference the ANSI standard character set "ASCII" [2].
+   Whenever possible, the referenced specification shall be available
+   online.
+
+7.1.2  Incorporation of Other Specifications
+
+   Other proprietary specifications may be incorporated by reference to
+   a version of the specification as long as the proprietor meets the
+   requirements of section 10.  If the other proprietary specification
+   is not widely and readily available, the IESG may request that it be
+   published as an Informational RFC.
+
+   The IESG generally should not favor a particular proprietary
+   specification over technically equivalent and competing
+   specification(s) by making any incorporated vendor specification
+   "required" or "recommended".
+
+
+
+
+
+
+Bradner                  Best Current Practice                 [Page 25]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+7.1.3  Assumption
+
+   An IETF Working Group may start from an external specification and
+   develop it into an Internet specification.  This is acceptable if (1)
+   the specification is provided to the Working Group in compliance with
+   the requirements of section 10, and (2) change control has been
+   conveyed to IETF by the original developer of the specification for
+   the specification or for specifications derived from the original
+   specification.
+
+8.  NOTICES AND RECORD KEEPING
+
+   Each of the organizations involved in the development and approval of
+   Internet Standards shall publicly announce, and shall maintain a
+   publicly accessible record of, every activity in which it engages, to
+   the extent that the activity represents the prosecution of any part
+   of the Internet Standards Process.  For purposes of this section, the
+   organizations involved in the development and approval of Internet
+   Standards includes the IETF, the IESG, the IAB, all IETF Working
+   Groups, and the Internet Society Board of Trustees.
+
+   For IETF and Working Group meetings announcements shall be made by
+   electronic mail to the IETF Announce mailing list and shall be made
+   sufficiently far in advance of the activity to permit all interested
+   parties to effectively participate.  The announcement shall contain
+   (or provide pointers to) all of the information that is necessary to
+   support the participation of any interested individual.  In the case
+   of a meeting, for example, the announcement shall include an agenda
+   that specifies the standards- related issues that will be discussed.
+
+   The formal record of an organization's standards-related activity
+   shall include at least the following:
+
+   o  the charter of the organization (or a defining document equivalent
+      to a charter);
+   o  complete and accurate minutes of meetings;
+   o  the archives of Working Group electronic mail mailing lists;  and
+   o  all written contributions from participants that pertain to the
+      organization's standards-related activity.
+
+   As a practical matter, the formal record of all Internet Standards
+   Process activities is maintained by the IETF Secretariat, and is the
+   responsibility of the IETF Secretariat except that each IETF Working
+   Group is expected to maintain their own email list archive and must
+   make a best effort to ensure that all traffic is captured and
+   included in the archives.  Also, the Working Group chair is
+   responsible for providing the IETF Secretariat with complete and
+   accurate minutes of all Working Group meetings.  Internet-Drafts that
+
+
+
+Bradner                  Best Current Practice                 [Page 26]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+   have been removed (for any reason) from the Internet-Drafts
+   directories shall be archived by the IETF Secretariat for the sole
+   purpose of preserving an historical record of Internet standards
+   activity and thus are not retrievable except in special
+   circumstances.
+
+9.  VARYING THE PROCESS
+
+   This document, which sets out the rules and procedures by which
+   Internet Standards and related documents are made is itself a product
+   of the Internet Standards Process (as a BCP, as described in section
+   5). It replaces a previous version, and in time, is likely itself to
+   be replaced.
+
+   While, when published, this document represents the community's view
+   of the proper and correct process to follow, and requirements to be
+   met, to allow for the best possible Internet Standards and BCPs, it
+   cannot be assumed that this will always remain the case. From time to
+   time there may be a desire to update it, by replacing it with a new
+   version.  Updating this document uses the same open procedures as are
+   used for any other BCP.
+
+   In addition, there may be situations where following the procedures
+   leads to a deadlock about a specific specification, or there may be
+   situations where the procedures provide no guidance.  In these cases
+   it may be appropriate to invoke the variance procedure described
+   below.
+
+9.1 The Variance Procedure
+
+   Upon the recommendation of the responsible IETF Working Group (or, if
+   no Working Group is constituted, upon the recommendation of an ad hoc
+   committee), the IESG may enter a particular specification into, or
+   advance it within, the standards track even though some of the
+   requirements of this document have not or will not be met. The IESG
+   may approve such a variance, however, only if it first determines
+   that the likely benefits to the Internet community are likely to
+   outweigh any costs to the Internet community that result from
+   noncompliance with the requirements in this document.  In exercising
+   this discretion, the IESG shall at least consider (a) the technical
+   merit of the specification, (b) the possibility of achieving the
+   goals of the Internet Standards Process without granting a variance,
+   (c) alternatives to the granting of a variance, (d) the collateral
+   and precedential effects of granting a variance, and (e) the IESG's
+   ability to craft a variance that is as narrow as possible.  In
+   determining whether to approve a variance, the IESG has discretion to
+   limit the scope of the variance to particular parts of this document
+   and to impose such additional restrictions or limitations as it
+
+
+
+Bradner                  Best Current Practice                 [Page 27]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+   determines appropriate to protect the interests of the Internet
+   community.
+
+   The proposed variance must detail the problem perceived, explain the
+   precise provision of this document which is causing the need for a
+   variance, and the results of the IESG's considerations including
+   consideration of points (a) through (d) in the previous paragraph.
+   The proposed variance shall be issued as an Internet Draft.  The IESG
+   shall then issue an extended Last-Call, of no less than 4 weeks, to
+   allow for community comment upon the proposal.
+
+   In a timely fashion after the expiration of the Last-Call period, the
+   IESG shall make its final determination of whether or not to approve
+   the proposed variance, and shall notify the IETF of its decision via
+   electronic mail to the IETF Announce mailing list.  If the variance
+   is approved it shall be forwarded to the RFC Editor with a request
+   that it be published as a BCP.
+
+   This variance procedure is for use when a one-time waving of some
+   provision of this document is felt to be required.  Permanent changes
+   to this document shall be accomplished through the normal BCP
+   process.
+
+   The appeals process in section 6.5 applies to this process.
+
+9.2 Exclusions
+
+   No use of this procedure may lower any specified delays, nor exempt
+   any proposal from the requirements of openness, fairness, or
+   consensus, nor from the need to keep proper records of the meetings
+   and mailing list discussions.
+
+   Specifically, the following sections of this document must not be
+   subject of a variance: 5.1, 6.1, 6.1.1 (first paragraph), 6.1.2, 6.3
+   (first sentence), 6.5 and 9.
+
+10.  INTELLECTUAL PROPERTY RIGHTS
+
+10.1.  General Policy
+
+   In all matters of intellectual property rights and procedures, the
+   intention is to benefit the Internet community and the public at
+   large, while respecting the legitimate rights of others.
+
+
+
+
+
+
+
+
+Bradner                  Best Current Practice                 [Page 28]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+10.2  Confidentiality Obligations
+
+   No contribution that is subject to any requirement of confidentiality
+   or any restriction on its dissemination may be considered in any part
+   of the Internet Standards Process, and there must be no assumption of
+   any confidentiality obligation with respect to any such contribution.
+
+10.3.  Rights and Permissions
+
+   In the course of standards work, the IETF receives contributions in
+   various forms and from many persons.  To best facilitate the
+   dissemination of these contributions, it is necessary to understand
+   any intellectual property rights (IPR) relating to the contributions.
+
+10.3.1.  All Contributions
+
+   By submission of a contribution, each person actually submitting the
+   contribution is deemed to agree to the following terms and conditions
+   on his own behalf, on behalf of the organization (if any) he
+   represents and on behalf of the owners of any propriety rights in the
+   contribution..  Where a submission identifies contributors in
+   addition to the contributor(s) who provide the actual submission, the
+   actual submitter(s) represent that each other named contributor was
+   made aware of and agreed to accept the same terms and conditions on
+   his own behalf, on behalf of any organization he may represent and
+   any known owner of any proprietary rights in the contribution.
+
+   l. Some works (e.g. works of the U.S. Government) are not subject to
+      copyright.  However, to the extent that the submission is or may
+      be subject to copyright, the contributor, the organization he
+      represents (if any) and the owners of any proprietary rights in
+      the contribution, grant an unlimited perpetual, non-exclusive,
+      royalty-free, world-wide right and license to the ISOC and the
+      IETF under any copyrights in the contribution.  This license
+      includes the right to copy, publish and distribute the
+      contribution in any way, and to prepare derivative works that are
+      based on or incorporate all or part of the contribution, the
+      license to such derivative works to be of the same scope as the
+      license of the original contribution.
+
+   2. The contributor acknowledges that the ISOC and IETF have no duty
+      to publish or otherwise use or disseminate any contribution.
+
+   3. The contributor grants permission to reference the name(s) and
+      address(es) of the contributor(s) and of the organization(s) he
+      represents (if any).
+
+
+
+
+
+Bradner                  Best Current Practice                 [Page 29]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+   4. The contributor represents that contribution properly acknowledge
+      major contributors.
+
+   5. The contribuitor, the organization (if any) he represents and the
+      owners of any proprietary rights in the contribution, agree that
+      no information in the contribution is confidential and that the
+      ISOC and its affiliated organizations may freely disclose any
+      information in the contribution.
+
+   6. The contributor represents that he has disclosed the existence of
+      any proprietary or intellectual property rights in the
+      contribution that are reasonably and personally known to the
+      contributor.  The contributor does not represent that he
+      personally knows of all potentially pertinent proprietary and
+      intellectual property rights owned or claimed by the organization
+      he represents (if any) or third parties.
+
+   7. The contributor represents that there are no limits to the
+      contributor's ability to make the grants acknowledgments and
+      agreements above that are reasonably and personally known to the
+      contributor.
+
+      By ratifying this description of the IETF process the Internet
+      Society warrants that it will not inhibit the traditional open and
+      free access to IETF documents for which license and right have
+      been assigned according to the procedures set forth in this
+      section, including Internet-Drafts and RFCs. This warrant is
+      perpetual and will not be revoked by the Internet Society or its
+      successors or assigns.
+
+10.3.2. Standards Track Documents
+
+   (A)  Where any patents, patent applications, or other proprietary
+      rights are known, or claimed, with respect to any specification on
+      the standards track, and brought to the attention of the IESG, the
+      IESG shall not advance the specification without including in the
+      document a note indicating the existence of such rights, or
+      claimed rights.  Where implementations are required before
+      advancement of a specification, only implementations that have, by
+      statement of the implementors, taken adequate steps to comply with
+      any such rights, or claimed rights, shall be considered for the
+      purpose of showing the adequacy of the specification.
+   (B)  The IESG disclaims any responsibility for identifying the
+      existence of or for evaluating the applicability of any claimed
+      copyrights, patents, patent applications, or other rights in the
+      fulfilling of the its obligations under (A), and will take no
+      position on the validity or scope of any such rights.
+
+
+
+
+Bradner                  Best Current Practice                 [Page 30]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+   (C)  Where the IESG knows of rights, or claimed rights under (A), the
+      IETF Executive Director shall attempt to obtain from the claimant
+      of such rights, a written assurance that upon approval by the IESG
+      of the relevant Internet standards track specification(s), any
+      party will be able to obtain the right to implement, use and
+      distribute the technology or works when implementing, using or
+      distributing technology based upon the specific specification(s)
+      under openly specified, reasonable, non-discriminatory terms.
+      The Working Group proposing the use of the technology with respect
+      to which the proprietary rights are claimed may assist the IETF
+      Executive Director in this effort.  The results of this procedure
+      shall not affect advancement of a specification along the
+      standards track, except that the IESG may defer approval where a
+      delay may facilitate the obtaining of such assurances.  The
+      results will, however, be recorded by the IETF Executive Director,
+      and made available.  The IESG may also direct that a summary of
+      the results be included in any RFC published containing the
+      specification.
+
+10.3.3  Determination of Reasonable and Non-discriminatory Terms
+
+   The IESG will not make any explicit determination that the assurance
+   of reasonable and non-discriminatory terms for the use of a
+   technology has been fulfilled in practice.  It will instead use the
+   normal requirements for the advancement of Internet Standards to
+   verify that the terms for use are reasonable.  If the two unrelated
+   implementations of the specification that are required to advance
+   from Proposed Standard to Draft Standard have been produced by
+   different organizations or individuals or if the "significant
+   implementation and successful operational experience" required to
+   advance from Draft Standard to Standard has been achieved the
+   assumption is that the terms must be reasonable and to some degree,
+   non-discriminatory.  This assumption may be challenged during the
+   Last-Call period.
+
+10.4.  Notices
+
+   (A)  Standards track documents shall include the following notice:
+
+         "The IETF takes no position regarding the validity or scope of
+         any intellectual property or other rights that might be claimed
+         to  pertain to the implementation or use of the technology
+         described in this document or the extent to which any license
+         under such rights might or might not be available; neither does
+         it represent that it has made any effort to identify any such
+         rights.  Information on the IETF's procedures with respect to
+         rights in standards-track and standards-related documentation
+         can be found in BCP-11.  Copies of claims of rights made
+
+
+
+Bradner                  Best Current Practice                 [Page 31]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+         available for publication and any assurances of licenses to
+         be made available, or the result of an attempt made
+         to obtain a general license or permission for the use of such
+         proprietary rights by implementors or users of this
+         specification can be obtained from the IETF Secretariat."
+
+   (B)  The IETF encourages all interested parties to bring to its
+      attention, at the earliest possible time, the existence of any
+      intellectual property rights pertaining to Internet Standards.
+      For this purpose, each standards document shall include the
+      following invitation:
+
+         "The IETF invites any interested party to bring to its
+         attention any copyrights, patents or patent applications, or
+         other proprietary rights which may cover technology that may be
+         required to practice this standard.  Please address the
+         information to the IETF Executive Director."
+
+   (C)  The following copyright notice and disclaimer shall be included
+      in all ISOC standards-related documentation:
+
+         "Copyright (C) The Internet Society (date). All Rights
+         Reserved.
+
+         This document and translations of it may be copied and
+         furnished to others, and derivative works that comment on or
+         otherwise explain it or assist in its implmentation may be
+         prepared, copied, published and distributed, in whole or in
+         part, without restriction of any kind, provided that the above
+         copyright notice and this paragraph are included on all such
+         copies and derivative works.  However, this document itself may
+         not be modified in any way, such as by removing the copyright
+         notice or references to the Internet Society or other Internet
+         organizations, except as needed for the  purpose of developing
+         Internet standards in which case the procedures for copyrights
+         defined in the Internet Standards process must be followed, or
+         as required to translate it into languages other than English.
+
+         The limited permissions granted above are perpetual and will
+         not be revoked by the Internet Society or its successors or
+         assigns.
+
+
+
+
+
+
+
+
+
+
+Bradner                  Best Current Practice                 [Page 32]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+         This document and the information contained herein is provided
+         on an "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET
+         ENGINEERING TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR
+         IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE
+         OF THE INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY
+         IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+         PARTICULAR PURPOSE."
+
+   (D)  Where the IESG is aware at the time of publication of
+      proprietary rights claimed with respect to a standards track
+      document, or the technology described or referenced therein, such
+      document shall contain the following notice:
+
+         "The IETF has been notified of intellectual property rights
+         claimed in regard to some or all of the specification contained
+         in this document.  For more information consult the online list
+         of claimed rights."
+
+11.  ACKNOWLEDGMENTS
+
+   There have been a number of people involved with the development of
+   the documents defining the IETF Standards Process over the years.
+   The process was first described in RFC 1310 then revised in RFC 1602
+   before the current effort (which relies heavily on its predecessors).
+   Specific acknowledgments must be extended to Lyman Chapin, Phill
+   Gross and Christian Huitema as the editors of the previous versions,
+   to Jon Postel and Dave Crocker for their inputs to those versions, to
+   Andy Ireland, Geoff Stewart, Jim Lampert, and Dick Holleman for their
+   reviews of the legal aspects of the procedures described herein, and
+   to John Stewart, Robert Elz and Steve Coya for their extensive input
+   on the final version.
+
+   In addition much of the credit for the refinement of the details of
+   the IETF processes belongs to the many members of the various
+   incarnations of the POISED Working Group.
+
+12.  SECURITY CONSIDERATIONS
+
+   Security issues are not discussed in this memo.
+
+
+
+
+
+
+
+
+
+
+
+
+Bradner                  Best Current Practice                 [Page 33]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+13.  REFERENCES
+
+   [1]  Postel, J., "Internet Official Protocol Standards", STD 1,
+        USC/Information Sciences Institute, March 1996.
+
+   [2]  ANSI, Coded Character Set -- 7-Bit American Standard Code for
+        Information Interchange, ANSI X3.4-1986.
+
+   [3]  Reynolds, J., and J. Postel, "Assigned Numbers", STD 2,
+        USC/Information Sciences Institute, October 1994.
+
+   [4]  Postel, J., "Introduction to the STD Notes", RFC 1311,
+        USC/Information Sciences Institute, March 1992.
+
+   [5]  Postel, J., "Instructions to RFC Authors", RFC 1543,
+        USC/Information Sciences Institute, October 1993.
+
+   [6]  Huitema, C., J. Postel, and S. Crocker "Not All RFCs are
+        Standards", RFC 1796, April 1995.
+
+14. DEFINITIONS OF TERMS
+
+   IETF Area - A management division within the IETF.  An Area consists
+      of Working Groups related to a general topic such as routing.  An
+      Area is managed by one or two Area Directors.
+   Area Director - The manager of an IETF Area.  The Area Directors
+      along with the IETF Chair comprise the Internet Engineering
+      Steering Group (IESG).
+   File Transfer Protocol (FTP) - An Internet application used to
+      transfer files in a TCP/IP network.
+   gopher - An Internet application used to interactively select and
+      retrieve files in a TCP/IP network.
+   Internet Architecture Board (IAB) - An appointed group that assists
+      in the management of the IETF standards process.
+   Internet Engineering Steering Group (IESG) - A group comprised of the
+      IETF Area Directors and the IETF Chair.  The IESG is responsible
+      for the management, along with the IAB, of the IETF and is the
+      standards approval board for the IETF.
+   interoperable - For the purposes of this document, "interoperable"
+      means to be able to interoperate over a data communications path.
+   Last-Call - A public comment period used to gage the level of
+      consensus about the reasonableness of a proposed standards action.
+      (see section 6.1.2)
+
+
+
+
+
+
+
+
+Bradner                  Best Current Practice                 [Page 34]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+   online - Relating to information made available over the Internet.
+      When referenced in this document material is said to be online
+      when it is retrievable without restriction or undue fee using
+      standard Internet applications such as anonymous FTP, gopher or
+      the WWW.
+   Working Group - A group chartered by the IESG and IAB to work on a
+      specific specification, set of specifications or topic.
+
+15. AUTHOR'S ADDRESS
+
+   Scott O. Bradner
+   Harvard University
+   Holyoke Center, Room 813
+   1350 Mass. Ave.
+   Cambridge, MA  02138
+   USA
+
+   Phone: +1 617 495 3864
+   EMail: sob@harvard.edu
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Bradner                  Best Current Practice                 [Page 35]
+
+RFC 2026               Internet Standards Process           October 1996
+
+
+APPENDIX A: GLOSSARY OF ACRONYMS
+
+   ANSI:     American National Standards Institute
+   ARPA:     (U.S.) Advanced Research Projects Agency
+   AS:       Applicability Statement
+   FTP:      File Transfer Protocol
+   ASCII:    American Standard Code for Information Interchange
+   ITU-T:    Telecommunications Standardization sector of the
+             International Telecommunication Union (ITU), a UN
+             treaty organization; ITU-T was formerly called CCITT.
+   IAB:      Internet Architecture Board
+   IANA:     Internet Assigned Numbers Authority
+   IEEE:     Institute of Electrical and Electronics Engineers
+   ICMP:     Internet Control Message Protocol
+   IESG:     Internet Engineering Steering Group
+   IETF:     Internet Engineering Task Force
+   IP:       Internet Protocol
+   IRSG      Internet Research Steering Group
+   IRTF:     Internet Research Task Force
+   ISO:      International Organization for Standardization
+   ISOC:     Internet Society
+   MIB:      Management Information Base
+   OSI:      Open Systems Interconnection
+   RFC:      Request for Comments
+   TCP:      Transmission Control Protocol
+   TS:       Technical Specification
+   WWW:      World Wide Web
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Bradner                  Best Current Practice                 [Page 36]
+
diff --git a/ext/picotcp/RFC/rfc2131.txt b/ext/picotcp/RFC/rfc2131.txt
new file mode 100644
index 0000000..f45d9b8
--- /dev/null
+++ b/ext/picotcp/RFC/rfc2131.txt
@@ -0,0 +1,2523 @@
+
+
+
+
+
+
+Network Working Group                                           R. Droms
+Request for Comments: 2131                           Bucknell University
+Obsoletes: 1541                                               March 1997
+Category: Standards Track
+
+                  Dynamic Host Configuration Protocol
+
+Status of this memo
+
+   This document specifies an Internet standards track protocol for the
+   Internet community, and requests discussion and suggestions for
+   improvements.  Please refer to the current edition of the "Internet
+   Official Protocol Standards" (STD 1) for the standardization state
+   and status of this protocol.  Distribution of this memo is unlimited.
+
+Abstract
+
+   The Dynamic Host Configuration Protocol (DHCP) provides a framework
+   for passing configuration information to hosts on a TCPIP network.
+   DHCP is based on the Bootstrap Protocol (BOOTP) [7], adding the
+   capability of automatic allocation of reusable network addresses and
+   additional configuration options [19].  DHCP captures the behavior of
+   BOOTP relay agents [7, 21], and DHCP participants can interoperate
+   with BOOTP participants [9].
+
+Table of Contents
+
+   1.  Introduction. . . . . . . . . . . . . . . . . . . . . . . . .  2
+   1.1 Changes to RFC1541. . . . . . . . . . . . . . . . . . . . . .  3
+   1.2 Related Work. . . . . . . . . . . . . . . . . . . . . . . . .  4
+   1.3 Problem definition and issues . . . . . . . . . . . . . . . .  4
+   1.4 Requirements. . . . . . . . . . . . . . . . . . . . . . . . .  5
+   1.5 Terminology . . . . . . . . . . . . . . . . . . . . . . . . .  6
+   1.6 Design goals. . . . . . . . . . . . . . . . . . . . . . . . .  6
+   2.  Protocol Summary. . . . . . . . . . . . . . . . . . . . . . .  8
+   2.1 Configuration parameters repository . . . . . . . . . . . . . 11
+   2.2 Dynamic allocation of network addresses . . . . . . . . . . . 12
+   3.  The Client-Server Protocol. . . . . . . . . . . . . . . . . . 13
+   3.1 Client-server interaction - allocating a network address. . . 13
+   3.2 Client-server interaction - reusing a  previously allocated
+       network address . . . . . . . . . . . . . . . . . . . . . . . 17
+   3.3 Interpretation and representation of time values. . . . . . . 20
+   3.4 Obtaining parameters with externally configured network
+       address . . . . . . . . . . . . . . . . . . . . . . . . . . . 20
+   3.5 Client parameters in DHCP . . . . . . . . . . . . . . . . . . 21
+   3.6 Use of DHCP in clients with multiple interfaces . . . . . . . 22
+   3.7 When clients should use DHCP. . . . . . . . . . . . . . . . . 22
+   4.  Specification of the DHCP client-server protocol. . . . . . . 22
+
+
+
+Droms                       Standards Track                     [Page 1]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+   4.1 Constructing and sending DHCP messages. . . . . . . . . . . . 22
+   4.2 DHCP server administrative controls . . . . . . . . . . . . . 25
+   4.3 DHCP server behavior. . . . . . . . . . . . . . . . . . . . . 26
+   4.4 DHCP client behavior. . . . . . . . . . . . . . . . . . . . . 34
+   5.  Acknowledgments. . . . . . . . . . . . . . . . . . . . . . . .42
+   6.  References . . . . . . . . . . . . . . . . . . . . . . . . . .42
+   7.  Security Considerations. . . . . . . . . . . . . . . . . . . .43
+   8.  Author's Address . . . . . . . . . . . . . . . . . . . . . . .44
+   A.  Host Configuration Parameters  . . . . . . . . . . . . . . . .45
+List of Figures
+   1. Format of a DHCP message . . . . . . . . . . . . . . . . . . .  9
+   2. Format of the 'flags' field. . . . . . . . . . . . . . . . . . 11
+   3. Timeline diagram of messages exchanged between DHCP client and
+      servers when allocating a new network address. . . . . . . . . 15
+   4. Timeline diagram of messages exchanged between DHCP client and
+      servers when reusing a previously allocated network address. . 18
+   5. State-transition diagram for DHCP clients. . . . . . . . . . . 34
+List of Tables
+   1. Description of fields in a DHCP message. . . . . . . . . . . . 10
+   2. DHCP messages. . . . . . . . . . . . . . . . . . . . . . . . . 14
+   3. Fields and options used by DHCP servers. . . . . . . . . . . . 28
+   4. Client messages from various states. . . . . . . . . . . . . . 33
+   5. Fields and options used by DHCP clients. . . . . . . . . . . . 37
+
+1. Introduction
+
+   The Dynamic Host Configuration Protocol (DHCP) provides configuration
+   parameters to Internet hosts.  DHCP consists of two components: a
+   protocol for delivering host-specific configuration parameters from a
+   DHCP server to a host and a mechanism for allocation of network
+   addresses to hosts.
+
+   DHCP is built on a client-server model, where designated DHCP server
+   hosts allocate network addresses and deliver configuration parameters
+   to dynamically configured hosts.  Throughout the remainder of this
+   document, the term "server" refers to a host providing initialization
+   parameters through DHCP, and the term "client" refers to a host
+   requesting initialization parameters from a DHCP server.
+
+   A host should not act as a DHCP server unless explicitly configured
+   to do so by a system administrator.  The diversity of hardware and
+   protocol implementations in the Internet would preclude reliable
+   operation if random hosts were allowed to respond to DHCP requests.
+   For example, IP requires the setting of many parameters within the
+   protocol implementation software.  Because IP can be used on many
+   dissimilar kinds of network hardware, values for those parameters
+   cannot be guessed or assumed to have correct defaults.  Also,
+   distributed address allocation schemes depend on a polling/defense
+
+
+
+Droms                       Standards Track                     [Page 2]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+   mechanism for discovery of addresses that are already in use.  IP
+   hosts may not always be able to defend their network addresses, so
+   that such a distributed address allocation scheme cannot be
+   guaranteed to avoid allocation of duplicate network addresses.
+
+   DHCP supports three mechanisms for IP address allocation.  In
+   "automatic allocation", DHCP assigns a permanent IP address to a
+   client.  In "dynamic allocation", DHCP assigns an IP address to a
+   client for a limited period of time (or until the client explicitly
+   relinquishes the address).  In "manual allocation", a client's IP
+   address is assigned by the network administrator, and DHCP is used
+   simply to convey the assigned address to the client.  A particular
+   network will use one or more of these mechanisms, depending on the
+   policies of the network administrator.
+
+   Dynamic allocation is the only one of the three mechanisms that
+   allows automatic reuse of an address that is no longer needed by the
+   client to which it was assigned.  Thus, dynamic allocation is
+   particularly useful for assigning an address to a client that will be
+   connected to the network only temporarily or for sharing a limited
+   pool of IP addresses among a group of clients that do not need
+   permanent IP addresses.  Dynamic allocation may also be a good choice
+   for assigning an IP address to a new client being permanently
+   connected to a network where IP addresses are sufficiently scarce
+   that it is important to reclaim them when old clients are retired.
+   Manual allocation allows DHCP to be used to eliminate the error-prone
+   process of manually configuring hosts with IP addresses in
+   environments where (for whatever reasons) it is desirable to manage
+   IP address assignment outside of the DHCP mechanisms.
+
+   The format of DHCP messages is based on the format of BOOTP messages,
+   to capture the BOOTP relay agent behavior described as part of the
+   BOOTP specification [7, 21] and to allow interoperability of existing
+   BOOTP clients with DHCP servers.  Using BOOTP relay agents eliminates
+   the necessity of having a DHCP server on each physical network
+   segment.
+
+1.1 Changes to RFC 1541
+
+   This document updates the DHCP protocol specification that appears in
+   RFC1541.  A new DHCP message type, DHCPINFORM, has been added; see
+   section 3.4, 4.3 and 4.4 for details.  The classing mechanism for
+   identifying DHCP clients to DHCP servers has been extended to include
+   "vendor" classes as defined in sections 4.2 and 4.3.  The minimum
+   lease time restriction has been removed.  Finally, many editorial
+   changes have been made to clarify the text as a result of experience
+   gained in DHCP interoperability tests.
+
+
+
+
+Droms                       Standards Track                     [Page 3]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+1.2 Related Work
+
+   There are several Internet protocols and related mechanisms that
+   address some parts of the dynamic host configuration problem.  The
+   Reverse Address Resolution Protocol (RARP) [10] (through the
+   extensions defined in the Dynamic RARP (DRARP) [5]) explicitly
+   addresses the problem of network address discovery, and includes an
+   automatic IP address assignment mechanism.  The Trivial File Transfer
+   Protocol (TFTP) [20] provides for transport of a boot image from a
+   boot server.  The Internet Control Message Protocol (ICMP) [16]
+   provides for informing hosts of additional routers via "ICMP
+   redirect" messages.  ICMP also can provide subnet mask information
+   through the "ICMP mask request" message and other information through
+   the (obsolete) "ICMP information request" message.  Hosts can locate
+   routers through the ICMP router discovery mechanism [8].
+
+   BOOTP is a transport mechanism for a collection of configuration
+   information.  BOOTP is also extensible, and official extensions [17]
+   have been defined for several configuration parameters.  Morgan has
+   proposed extensions to BOOTP for dynamic IP address assignment [15].
+   The Network Information Protocol (NIP), used by the Athena project at
+   MIT, is a distributed mechanism for dynamic IP address assignment
+   [19].  The Resource Location Protocol RLP [1] provides for location
+   of higher level services.  Sun Microsystems diskless workstations use
+   a boot procedure that employs RARP, TFTP and an RPC mechanism called
+   "bootparams" to deliver configuration information and operating
+   system code to diskless hosts.  (Sun Microsystems, Sun Workstation
+   and SunOS are trademarks of Sun Microsystems, Inc.)  Some Sun
+   networks also use DRARP and an auto-installation mechanism to
+   automate the configuration of new hosts in an existing network.
+
+   In other related work, the path minimum transmission unit (MTU)
+   discovery algorithm can determine the MTU of an arbitrary internet
+   path [14].  The Address Resolution Protocol (ARP) has been proposed
+   as a transport protocol for resource location and selection [6].
+   Finally, the Host Requirements RFCs [3, 4] mention specific
+   requirements for host reconfiguration and suggest a scenario for
+   initial configuration of diskless hosts.
+
+1.3 Problem definition and issues
+
+   DHCP is designed to supply DHCP clients with the configuration
+   parameters defined in the Host Requirements RFCs.  After obtaining
+   parameters via DHCP, a DHCP client should be able to exchange packets
+   with any other host in the Internet.  The TCP/IP stack parameters
+   supplied by DHCP are listed in Appendix A.
+
+
+
+
+
+Droms                       Standards Track                     [Page 4]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+   Not all of these parameters are required for a newly initialized
+   client.  A client and server may negotiate for the transmission of
+   only those parameters required by the client or specific to a
+   particular subnet.
+
+   DHCP allows but does not require the configuration of client
+   parameters not directly related to the IP protocol.  DHCP also does
+   not address registration of newly configured clients with the Domain
+   Name System (DNS) [12, 13].
+
+   DHCP is not intended for use in configuring routers.
+
+1.4 Requirements
+
+   Throughout this document, the words that are used to define the
+   significance of particular requirements are capitalized.  These words
+   are:
+
+      o "MUST"
+
+        This word or the adjective "REQUIRED" means that the
+        item is an absolute requirement of this specification.
+
+      o "MUST NOT"
+
+        This phrase means that the item is an absolute prohibition
+        of this specification.
+
+      o "SHOULD"
+
+        This word or the adjective "RECOMMENDED" means that there
+        may exist valid reasons in particular circumstances to ignore
+        this item, but the full implications should be understood and
+        the case carefully weighed before choosing a different course.
+
+      o "SHOULD NOT"
+
+        This phrase means that there may exist valid reasons in
+        particular circumstances when the listed behavior is acceptable
+        or even useful, but the full implications should be understood
+        and the case carefully weighed before implementing any behavior
+        described with this label.
+
+
+
+
+
+
+
+
+
+Droms                       Standards Track                     [Page 5]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+      o "MAY"
+
+        This word or the adjective "OPTIONAL" means that this item is
+        truly optional.  One vendor may choose to include the item
+        because a particular marketplace requires it or because it
+        enhances the product, for example; another vendor may omit the
+        same item.
+
+1.5 Terminology
+
+   This document uses the following terms:
+
+      o "DHCP client"
+
+      A DHCP client is an Internet host using DHCP to obtain
+      configuration parameters such as a network address.
+
+      o "DHCP server"
+
+      A DHCP server is an Internet host that returns configuration
+      parameters to DHCP clients.
+
+      o "BOOTP relay agent"
+
+      A BOOTP relay agent or relay agent is an Internet host or router
+      that passes DHCP messages between DHCP clients and DHCP servers.
+      DHCP is designed to use the same relay agent behavior as specified
+      in the BOOTP protocol specification.
+
+      o "binding"
+
+      A binding is a collection of configuration parameters, including
+      at least an IP address, associated with or "bound to" a DHCP
+      client.  Bindings are managed by DHCP servers.
+
+1.6 Design goals
+
+   The following list gives general design goals for DHCP.
+
+      o DHCP should be a mechanism rather than a policy.  DHCP must
+        allow local system administrators control over configuration
+        parameters where desired; e.g., local system administrators
+        should be able to enforce local policies concerning allocation
+        and access to local resources where desired.
+
+
+
+
+
+
+
+Droms                       Standards Track                     [Page 6]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+      o Clients should require no manual configuration.  Each client
+        should be able to discover appropriate local configuration
+        parameters without user intervention and incorporate those
+        parameters into its own configuration.
+
+      o Networks should require no manual configuration for individual
+        clients.  Under normal circumstances, the network manager
+        should not have to enter any per-client configuration
+        parameters.
+
+      o DHCP should not require a server on each subnet.  To allow for
+        scale and economy, DHCP must work across routers or through the
+        intervention of BOOTP relay agents.
+
+      o A DHCP client must be prepared to receive multiple responses
+        to a request for configuration parameters.  Some installations
+        may include multiple, overlapping DHCP servers to enhance
+        reliability and increase performance.
+
+      o DHCP must coexist with statically configured, non-participating
+        hosts and with existing network protocol implementations.
+
+      o DHCP must interoperate with the BOOTP relay agent behavior as
+        described by RFC 951 and by RFC 1542 [21].
+
+      o DHCP must provide service to existing BOOTP clients.
+
+   The following list gives design goals specific to the transmission of
+   the network layer parameters.  DHCP must:
+
+      o Guarantee that any specific network address will not be in
+        use by more than one DHCP client at a time,
+
+      o Retain DHCP client configuration across DHCP client reboot.  A
+        DHCP client should, whenever possible, be assigned the same
+        configuration parameters (e.g., network address) in response
+        to each request,
+
+      o Retain DHCP client configuration across server reboots, and,
+        whenever possible, a DHCP client should be assigned the same
+        configuration parameters despite restarts of the DHCP mechanism,
+
+      o Allow automated assignment of configuration parameters to new
+        clients to avoid hand configuration for new clients,
+
+      o Support fixed or permanent allocation of configuration
+        parameters to specific clients.
+
+
+
+
+Droms                       Standards Track                     [Page 7]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+2. Protocol Summary
+
+   From the client's point of view, DHCP is an extension of the BOOTP
+   mechanism.  This behavior allows existing BOOTP clients to
+   interoperate with DHCP servers without requiring any change to the
+   clients' initialization software.  RFC 1542 [2] details the
+   interactions between BOOTP and DHCP clients and servers [9].  There
+   are some new, optional transactions that optimize the interaction
+   between DHCP clients and servers that are described in sections 3 and
+   4.
+
+   Figure 1 gives the format of a DHCP message and table 1 describes
+   each of the fields in the DHCP message.  The numbers in parentheses
+   indicate the size of each field in octets.  The names for the fields
+   given in the figure will be used throughout this document to refer to
+   the fields in DHCP messages.
+
+   There are two primary differences between DHCP and BOOTP.  First,
+   DHCP defines mechanisms through which clients can be assigned a
+   network address for a finite lease, allowing for serial reassignment
+   of network addresses to different clients.  Second, DHCP provides the
+   mechanism for a client to acquire all of the IP configuration
+   parameters that it needs in order to operate.
+
+   DHCP introduces a small change in terminology intended to clarify the
+   meaning of one of the fields.  What was the "vendor extensions" field
+   in BOOTP has been re-named the "options" field in DHCP. Similarly,
+   the tagged data items that were used inside the BOOTP "vendor
+   extensions" field, which were formerly referred to as "vendor
+   extensions," are now termed simply "options."
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Droms                       Standards Track                     [Page 8]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+   0                   1                   2                   3
+   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |     op (1)    |   htype (1)   |   hlen (1)    |   hops (1)    |
+   +---------------+---------------+---------------+---------------+
+   |                            xid (4)                            |
+   +-------------------------------+-------------------------------+
+   |           secs (2)            |           flags (2)           |
+   +-------------------------------+-------------------------------+
+   |                          ciaddr  (4)                          |
+   +---------------------------------------------------------------+
+   |                          yiaddr  (4)                          |
+   +---------------------------------------------------------------+
+   |                          siaddr  (4)                          |
+   +---------------------------------------------------------------+
+   |                          giaddr  (4)                          |
+   +---------------------------------------------------------------+
+   |                                                               |
+   |                          chaddr  (16)                         |
+   |                                                               |
+   |                                                               |
+   +---------------------------------------------------------------+
+   |                                                               |
+   |                          sname   (64)                         |
+   +---------------------------------------------------------------+
+   |                                                               |
+   |                          file    (128)                        |
+   +---------------------------------------------------------------+
+   |                                                               |
+   |                          options (variable)                   |
+   +---------------------------------------------------------------+
+
+                  Figure 1:  Format of a DHCP message
+
+   DHCP defines a new 'client identifier' option that is used to pass an
+   explicit client identifier to a DHCP server.  This change eliminates
+   the overloading of the 'chaddr' field in BOOTP messages, where
+   'chaddr' is used both as a hardware address for transmission of BOOTP
+   reply messages and as a client identifier.  The 'client identifier'
+   is an opaque key, not to be interpreted by the server; for example,
+   the 'client identifier' may contain a hardware address, identical to
+   the contents of the 'chaddr' field, or it may contain another type of
+   identifier, such as a DNS name.  The 'client identifier' chosen by a
+   DHCP client MUST be unique to that client within the subnet to which
+   the client is attached. If the client uses a 'client identifier' in
+   one message, it MUST use that same identifier in all subsequent
+   messages, to ensure that all servers correctly identify the client.
+
+
+
+
+Droms                       Standards Track                     [Page 9]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+   DHCP clarifies the interpretation of the 'siaddr' field as the
+   address of the server to use in the next step of the client's
+   bootstrap process.  A DHCP server may return its own address in the
+   'siaddr' field, if the server is prepared to supply the next
+   bootstrap service (e.g., delivery of an operating system executable
+   image).  A DHCP server always returns its own address in the 'server
+   identifier' option.
+
+   FIELD      OCTETS       DESCRIPTION
+   -----      ------       -----------
+
+   op            1  Message op code / message type.
+                    1 = BOOTREQUEST, 2 = BOOTREPLY
+   htype         1  Hardware address type, see ARP section in "Assigned
+                    Numbers" RFC; e.g., '1' = 10mb ethernet.
+   hlen          1  Hardware address length (e.g.  '6' for 10mb
+                    ethernet).
+   hops          1  Client sets to zero, optionally used by relay agents
+                    when booting via a relay agent.
+   xid           4  Transaction ID, a random number chosen by the
+                    client, used by the client and server to associate
+                    messages and responses between a client and a
+                    server.
+   secs          2  Filled in by client, seconds elapsed since client
+                    began address acquisition or renewal process.
+   flags         2  Flags (see figure 2).
+   ciaddr        4  Client IP address; only filled in if client is in
+                    BOUND, RENEW or REBINDING state and can respond
+                    to ARP requests.
+   yiaddr        4  'your' (client) IP address.
+   siaddr        4  IP address of next server to use in bootstrap;
+                    returned in DHCPOFFER, DHCPACK by server.
+   giaddr        4  Relay agent IP address, used in booting via a
+                    relay agent.
+   chaddr       16  Client hardware address.
+   sname        64  Optional server host name, null terminated string.
+   file        128  Boot file name, null terminated string; "generic"
+                    name or null in DHCPDISCOVER, fully qualified
+                    directory-path name in DHCPOFFER.
+   options     var  Optional parameters field.  See the options
+                    documents for a list of defined options.
+
+           Table 1:  Description of fields in a DHCP message
+
+   The 'options' field is now variable length. A DHCP client must be
+   prepared to receive DHCP messages with an 'options' field of at least
+   length 312 octets.  This requirement implies that a DHCP client must
+   be prepared to receive a message of up to 576 octets, the minimum IP
+
+
+
+Droms                       Standards Track                    [Page 10]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+   datagram size an IP host must be prepared to accept [3].  DHCP
+   clients may negotiate the use of larger DHCP messages through the
+   'maximum DHCP message size' option.  The options field may be further
+   extended into the 'file' and 'sname' fields.
+
+   In the case of a client using DHCP for initial configuration (before
+   the client's TCP/IP software has been completely configured), DHCP
+   requires creative use of the client's TCP/IP software and liberal
+   interpretation of RFC 1122.  The TCP/IP software SHOULD accept and
+   forward to the IP layer any IP packets delivered to the client's
+   hardware address before the IP address is configured; DHCP servers
+   and BOOTP relay agents may not be able to deliver DHCP messages to
+   clients that cannot accept hardware unicast datagrams before the
+   TCP/IP software is configured.
+
+   To work around some clients that cannot accept IP unicast datagrams
+   before the TCP/IP software is configured as discussed in the previous
+   paragraph, DHCP uses the 'flags' field [21].  The leftmost bit is
+   defined as the BROADCAST (B) flag.  The semantics of this flag are
+   discussed in section 4.1 of this document.  The remaining bits of the
+   flags field are reserved for future use.  They MUST be set to zero by
+   clients and ignored by servers and relay agents.  Figure 2 gives the
+   format of the 'flags' field.
+
+                                    1 1 1 1 1 1
+                0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+                +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+                |B|             MBZ             |
+                +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+                B:  BROADCAST flag
+
+                MBZ:  MUST BE ZERO (reserved for future use)
+
+                Figure 2:  Format of the 'flags' field
+
+2.1 Configuration parameters repository
+
+   The first service provided by DHCP is to provide persistent storage
+   of network parameters for network clients.  The model of DHCP
+   persistent storage is that the DHCP service stores a key-value entry
+   for each client, where the key is some unique identifier (for
+   example, an IP subnet number and a unique identifier within the
+   subnet) and the value contains the configuration parameters for the
+   client.
+
+   For example, the key might be the pair (IP-subnet-number, hardware-
+   address) (note that the "hardware-address" should be typed by the
+
+
+
+Droms                       Standards Track                    [Page 11]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+   type of hardware to accommodate possible duplication of hardware
+   addresses resulting from bit-ordering problems in a mixed-media,
+   bridged network) allowing for serial or concurrent reuse of a
+   hardware address on different subnets, and for hardware addresses
+   that may not be globally unique.  Alternately, the key might be the
+   pair (IP-subnet-number, hostname), allowing the server to assign
+   parameters intelligently to a DHCP client that has been moved to a
+   different subnet or has changed hardware addresses (perhaps because
+   the network interface failed and was replaced). The protocol defines
+   that the key will be (IP-subnet-number, hardware-address) unless the
+   client explicitly supplies an identifier using the 'client
+   identifier' option.           A client can query the DHCP service to
+   retrieve its configuration parameters.  The client interface to the
+   configuration parameters repository consists of protocol messages to
+   request configuration parameters and responses from the server
+   carrying the configuration parameters.
+
+2.2 Dynamic allocation of network addresses
+
+   The second service provided by DHCP is the allocation of temporary or
+   permanent network (IP) addresses to clients.  The basic mechanism for
+   the dynamic allocation of network addresses is simple: a client
+   requests the use of an address for some period of time.  The
+   allocation mechanism (the collection of DHCP servers) guarantees not
+   to reallocate that address within the requested time and attempts to
+   return the same network address each time the client requests an
+   address.  In this document, the period over which a network address
+   is allocated to a client is referred to as a "lease" [11].  The
+   client may extend its lease with subsequent requests.  The client may
+   issue a message to release the address back to the server when the
+   client no longer needs the address.  The client may ask for a
+   permanent assignment by asking for an infinite lease.  Even when
+   assigning "permanent" addresses, a server may choose to give out
+   lengthy but non-infinite leases to allow detection of the fact that
+   the client has been retired.
+
+   In some environments it will be necessary to reassign network
+   addresses due to exhaustion of available addresses.  In such
+   environments, the allocation mechanism will reuse addresses whose
+   lease has expired.  The server should use whatever information is
+   available in the configuration information repository to choose an
+   address to reuse.  For example, the server may choose the least
+   recently assigned address.  As a consistency check, the allocating
+   server SHOULD probe the reused address before allocating the address,
+   e.g., with an ICMP echo request, and the client SHOULD probe the
+   newly received address, e.g., with ARP.
+
+
+
+
+
+Droms                       Standards Track                    [Page 12]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+3. The Client-Server Protocol
+
+   DHCP uses the BOOTP message format defined in RFC 951 and given in
+   table 1 and figure 1.  The 'op' field of each DHCP message sent from
+   a client to a server contains BOOTREQUEST. BOOTREPLY is used in the
+   'op' field of each DHCP message sent from a server to a client.
+
+   The first four octets of the 'options' field of the DHCP message
+   contain the (decimal) values 99, 130, 83 and 99, respectively (this
+   is the same magic cookie as is defined in RFC 1497 [17]).  The
+   remainder of the 'options' field consists of a list of tagged
+   parameters that are called "options".  All of the "vendor extensions"
+   listed in RFC 1497 are also DHCP options.  RFC 1533 gives the
+   complete set of options defined for use with DHCP.
+
+   Several options have been defined so far.  One particular option -
+   the "DHCP message type" option - must be included in every DHCP
+   message.  This option defines the "type" of the DHCP message.
+   Additional options may be allowed, required, or not allowed,
+   depending on the DHCP message type.
+
+   Throughout this document, DHCP messages that include a 'DHCP message
+   type' option will be referred to by the type of the message; e.g., a
+   DHCP message with 'DHCP message type' option type 1 will be referred
+   to as a "DHCPDISCOVER" message.
+
+3.1 Client-server interaction - allocating a network address
+
+   The following summary of the protocol exchanges between clients and
+   servers refers to the DHCP messages described in table 2.  The
+   timeline diagram in figure 3 shows the timing relationships in a
+   typical client-server interaction.  If the client already knows its
+   address, some steps may be omitted; this abbreviated interaction is
+   described in section 3.2.
+
+   1. The client broadcasts a DHCPDISCOVER message on its local physical
+      subnet.  The DHCPDISCOVER message MAY include options that suggest
+      values for the network address and lease duration.  BOOTP relay
+      agents may pass the message on to DHCP servers not on the same
+      physical subnet.
+
+   2. Each server may respond with a DHCPOFFER message that includes an
+      available network address in the 'yiaddr' field (and other
+      configuration parameters in DHCP options).  Servers need not
+      reserve the offered network address, although the protocol will
+      work more efficiently if the server avoids allocating the offered
+      network address to another client.  When allocating a new address,
+      servers SHOULD check that the offered network address is not
+
+
+
+Droms                       Standards Track                    [Page 13]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+      already in use; e.g., the server may probe the offered address
+      with an ICMP Echo Request.  Servers SHOULD be implemented so that
+      network administrators MAY choose to disable probes of newly
+      allocated addresses.  The server transmits the DHCPOFFER message
+      to the client, using the BOOTP relay agent if necessary.
+
+   Message         Use
+   -------         ---
+
+   DHCPDISCOVER -  Client broadcast to locate available servers.
+
+   DHCPOFFER    -  Server to client in response to DHCPDISCOVER with
+                   offer of configuration parameters.
+
+   DHCPREQUEST  -  Client message to servers either (a) requesting
+                   offered parameters from one server and implicitly
+                   declining offers from all others, (b) confirming
+                   correctness of previously allocated address after,
+                   e.g., system reboot, or (c) extending the lease on a
+                   particular network address.
+
+   DHCPACK      -  Server to client with configuration parameters,
+                   including committed network address.
+
+   DHCPNAK      -  Server to client indicating client's notion of network
+                   address is incorrect (e.g., client has moved to new
+                   subnet) or client's lease as expired
+
+   DHCPDECLINE  -  Client to server indicating network address is already
+                   in use.
+
+   DHCPRELEASE  -  Client to server relinquishing network address and
+                   cancelling remaining lease.
+
+   DHCPINFORM   -  Client to server, asking only for local configuration
+                   parameters; client already has externally configured
+                   network address.
+
+                          Table 2:  DHCP messages
+
+
+
+
+
+
+
+
+
+
+
+
+Droms                       Standards Track                    [Page 14]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+                Server          Client          Server
+            (not selected)                    (selected)
+
+                  v               v               v
+                  |               |               |
+                  |     Begins initialization     |
+                  |               |               |
+                  | _____________/|\____________  |
+                  |/DHCPDISCOVER | DHCPDISCOVER  \|
+                  |               |               |
+              Determines          |          Determines
+             configuration        |         configuration
+                  |               |               |
+                  |\             |  ____________/ |
+                  | \________    | /DHCPOFFER     |
+                  | DHCPOFFER\   |/               |
+                  |           \  |                |
+                  |       Collects replies        |
+                  |             \|                |
+                  |     Selects configuration     |
+                  |               |               |
+                  | _____________/|\____________  |
+                  |/ DHCPREQUEST  |  DHCPREQUEST\ |
+                  |               |               |
+                  |               |     Commits configuration
+                  |               |               |
+                  |               | _____________/|
+                  |               |/ DHCPACK      |
+                  |               |               |
+                  |    Initialization complete    |
+                  |               |               |
+                  .               .               .
+                  .               .               .
+                  |               |               |
+                  |      Graceful shutdown        |
+                  |               |               |
+                  |               |\ ____________ |
+                  |               | DHCPRELEASE  \|
+                  |               |               |
+                  |               |        Discards lease
+                  |               |               |
+                  v               v               v
+     Figure 3: Timeline diagram of messages exchanged between DHCP
+               client and servers when allocating a new network address
+
+
+
+
+
+
+
+Droms                       Standards Track                    [Page 15]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+  3. The client receives one or more DHCPOFFER messages from one or more
+     servers.  The client may choose to wait for multiple responses.
+     The client chooses one server from which to request configuration
+     parameters, based on the configuration parameters offered in the
+     DHCPOFFER messages.  The client broadcasts a DHCPREQUEST message
+     that MUST include the 'server identifier' option to indicate which
+     server it has selected, and that MAY include other options
+     specifying desired configuration values.  The 'requested IP
+     address' option MUST be set to the value of 'yiaddr' in the
+     DHCPOFFER message from the server.  This DHCPREQUEST message is
+     broadcast and relayed through DHCP/BOOTP relay agents.  To help
+     ensure that any BOOTP relay agents forward the DHCPREQUEST message
+     to the same set of DHCP servers that received the original
+     DHCPDISCOVER message, the DHCPREQUEST message MUST use the same
+     value in the DHCP message header's 'secs' field and be sent to the
+     same IP broadcast address as the original DHCPDISCOVER message.
+     The client times out and retransmits the DHCPDISCOVER message if
+     the client receives no DHCPOFFER messages.
+
+  4. The servers receive the DHCPREQUEST broadcast from the client.
+     Those servers not selected by the DHCPREQUEST message use the
+     message as notification that the client has declined that server's
+     offer.  The server selected in the DHCPREQUEST message commits the
+     binding for the client to persistent storage and responds with a
+     DHCPACK message containing the configuration parameters for the
+     requesting client.  The combination of 'client identifier' or
+     'chaddr' and assigned network address constitute a unique
+     identifier for the client's lease and are used by both the client
+     and server to identify a lease referred to in any DHCP messages.
+     Any configuration parameters in the DHCPACK message SHOULD NOT
+     conflict with those in the earlier DHCPOFFER message to which the
+     client is responding.  The server SHOULD NOT check the offered
+     network address at this point. The 'yiaddr' field in the DHCPACK
+     messages is filled in with the selected network address.
+
+     If the selected server is unable to satisfy the DHCPREQUEST message
+     (e.g., the requested network address has been allocated), the
+     server SHOULD respond with a DHCPNAK message.
+
+     A server MAY choose to mark addresses offered to clients in
+     DHCPOFFER messages as unavailable.  The server SHOULD mark an
+     address offered to a client in a DHCPOFFER message as available if
+     the server receives no DHCPREQUEST message from that client.
+
+  5. The client receives the DHCPACK message with configuration
+     parameters.  The client SHOULD perform a final check on the
+     parameters (e.g., ARP for allocated network address), and notes the
+     duration of the lease specified in the DHCPACK message.  At this
+
+
+
+Droms                       Standards Track                    [Page 16]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+     point, the client is configured.  If the client detects that the
+     address is already in use (e.g., through the use of ARP), the
+     client MUST send a DHCPDECLINE message to the server and restarts
+     the configuration process.  The client SHOULD wait a minimum of ten
+     seconds before restarting the configuration process to avoid
+     excessive network traffic in case of looping.
+
+     If the client receives a DHCPNAK message, the client restarts the
+     configuration process.
+
+     The client times out and retransmits the DHCPREQUEST message if the
+     client receives neither a DHCPACK or a DHCPNAK message.  The client
+     retransmits the DHCPREQUEST according to the retransmission
+     algorithm in section 4.1.  The client should choose to retransmit
+     the DHCPREQUEST enough times to give adequate probability of
+     contacting the server without causing the client (and the user of
+     that client) to wait overly long before giving up; e.g., a client
+     retransmitting as described in section 4.1 might retransmit the
+     DHCPREQUEST message four times, for a total delay of 60 seconds,
+     before restarting the initialization procedure.  If the client
+     receives neither a DHCPACK or a DHCPNAK message after employing the
+     retransmission algorithm, the client reverts to INIT state and
+     restarts the initialization process.  The client SHOULD notify the
+     user that the initialization process has failed and is restarting.
+
+  6. The client may choose to relinquish its lease on a network address
+     by sending a DHCPRELEASE message to the server.  The client
+     identifies the lease to be released with its 'client identifier',
+     or 'chaddr' and network address in the DHCPRELEASE message. If the
+     client used a 'client identifier' when it obtained the lease, it
+     MUST use the same 'client identifier' in the DHCPRELEASE message.
+
+3.2 Client-server interaction - reusing a previously allocated network
+    address
+
+   If a client remembers and wishes to reuse a previously allocated
+   network address, a client may choose to omit some of the steps
+   described in the previous section.  The timeline diagram in figure 4
+   shows the timing relationships in a typical client-server interaction
+   for a client reusing a previously allocated network address.
+
+
+
+
+
+
+
+
+
+
+
+Droms                       Standards Track                    [Page 17]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+   1. The client broadcasts a DHCPREQUEST message on its local subnet.
+      The message includes the client's network address in the
+      'requested IP address' option. As the client has not received its
+      network address, it MUST NOT fill in the 'ciaddr' field. BOOTP
+      relay agents pass the message on to DHCP servers not on the same
+      subnet.  If the client used a 'client identifier' to obtain its
+      address, the client MUST use the same 'client identifier' in the
+      DHCPREQUEST message.
+
+   2. Servers with knowledge of the client's configuration parameters
+      respond with a DHCPACK message to the client.  Servers SHOULD NOT
+      check that the client's network address is already in use; the
+      client may respond to ICMP Echo Request messages at this point.
+
+                Server          Client          Server
+
+                  v               v               v
+                  |                |               |
+                  |              Begins            |
+                  |          initialization        |
+                  |                |               |
+                  |                /|\             |
+                  |   _________ __/ | \__________  |
+                  | /DHCPREQU EST  |  DHCPREQUEST\ |
+                  |/               |              \|
+                  |                |               |
+               Locates             |            Locates
+            configuration          |         configuration
+                  |                |               |
+                  |\               |              /|
+                  | \              |  ___________/ |
+                  |  \             | /  DHCPACK    |
+                  |   \ _______    |/              |
+                  |     DHCPACK\   |               |
+                  |          Initialization        |
+                  |             complete           |
+                  |               \|               |
+                  |                |               |
+                  |           (Subsequent          |
+                  |             DHCPACKS           |
+                  |             ignored)           |
+                  |                |               |
+                  |                |               |
+                  v                v               v
+
+     Figure 4: Timeline diagram of messages exchanged between DHCP
+               client and servers when reusing a previously allocated
+               network address
+
+
+
+Droms                       Standards Track                    [Page 18]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+      If the client's request is invalid (e.g., the client has moved
+      to a new subnet), servers SHOULD respond with a DHCPNAK message to
+      the client. Servers SHOULD NOT respond if their information is not
+      guaranteed to be accurate.  For example, a server that identifies a
+      request for an expired binding that is owned by another server SHOULD
+      NOT respond with a DHCPNAK unless the servers are using an explicit
+      mechanism to maintain coherency among the servers.
+
+      If 'giaddr' is 0x0 in the DHCPREQUEST message, the client is on
+      the same subnet as the server.  The server MUST
+      broadcast the DHCPNAK message to the 0xffffffff broadcast address
+      because the client may not have a correct network address or subnet
+      mask, and the client may not be answering ARP requests.
+      Otherwise, the server MUST send the DHCPNAK message to the IP
+      address of the BOOTP relay agent, as recorded in 'giaddr'.  The
+      relay agent will, in turn, forward the message directly to the
+      client's hardware address, so that the DHCPNAK can be delivered even
+      if the client has moved to a new network.
+
+   3. The client receives the DHCPACK message with configuration
+      parameters.  The client performs a final check on the parameters
+      (as in section 3.1), and notes the duration of the lease specified
+      in the DHCPACK message.  The specific lease is implicitly identified
+      by the 'client identifier' or 'chaddr' and the network address.  At
+      this point, the client is configured.
+
+      If the client detects that the IP address in the DHCPACK message
+      is already in use, the client MUST send a DHCPDECLINE message to the
+      server and restarts the configuration process by requesting a
+      new network address.  This action corresponds to the client
+      moving to the INIT state in the DHCP state diagram, which is
+      described in section 4.4.
+
+      If the client receives a DHCPNAK message, it cannot reuse its
+      remembered network address.  It must instead request a new
+      address by restarting the configuration process, this time
+      using the (non-abbreviated) procedure described in section
+      3.1.  This action also corresponds to the client moving to
+      the INIT state in the DHCP state diagram.
+
+      The client times out and retransmits the DHCPREQUEST message if
+      the client receives neither a DHCPACK nor a DHCPNAK message.  The
+      client retransmits the DHCPREQUEST according to the retransmission
+      algorithm in section 4.1.  The client should choose to retransmit
+      the DHCPREQUEST enough times to give adequate probability of
+      contacting the server without causing the client (and the user of
+      that client) to wait overly long before giving up; e.g., a client
+      retransmitting as described in section 4.1 might retransmit the
+
+
+
+Droms                       Standards Track                    [Page 19]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+      DHCPREQUEST message four times, for a total delay of 60 seconds,
+      before restarting the initialization procedure.  If the client
+      receives neither a DHCPACK or a DHCPNAK message after employing
+      the retransmission algorithm, the client MAY choose to use the
+      previously allocated network address and configuration parameters
+      for the remainder of the unexpired lease.  This corresponds to
+      moving to BOUND state in the client state transition diagram shown
+      in figure 5.
+
+   4. The client may choose to relinquish its lease on a network
+      address by sending a DHCPRELEASE message to the server.  The
+      client identifies the lease to be released with its
+      'client identifier', or 'chaddr' and network address in the
+      DHCPRELEASE message.
+
+      Note that in this case, where the client retains its network
+      address locally, the client will not normally relinquish its
+      lease during a graceful shutdown.  Only in the case where the
+      client explicitly needs to relinquish its lease, e.g., the client
+      is about to be moved to a different subnet, will the client send
+      a DHCPRELEASE message.
+
+3.3 Interpretation and representation of time values
+
+   A client acquires a lease for a network address for a fixed period of
+   time (which may be infinite).  Throughout the protocol, times are to
+   be represented in units of seconds.  The time value of 0xffffffff is
+   reserved to represent "infinity".
+
+   As clients and servers may not have synchronized clocks, times are
+   represented in DHCP messages as relative times, to be interpreted
+   with respect to the client's local clock.  Representing relative
+   times in units of seconds in an unsigned 32 bit word gives a range of
+   relative times from 0 to approximately 100 years, which is sufficient
+   for the relative times to be measured using DHCP.
+
+   The algorithm for lease duration interpretation given in the previous
+   paragraph assumes that client and server clocks are stable relative
+   to each other.  If there is drift between the two clocks, the server
+   may consider the lease expired before the client does.  To
+   compensate, the server may return a shorter lease duration to the
+   client than the server commits to its local database of client
+   information.
+
+3.4 Obtaining parameters with externally configured network address
+
+   If a client has obtained a network address through some other means
+   (e.g., manual configuration), it may use a DHCPINFORM request message
+
+
+
+Droms                       Standards Track                    [Page 20]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+   to obtain other local configuration parameters.  Servers receiving a
+   DHCPINFORM message construct a DHCPACK message with any local
+   configuration parameters appropriate for the client without:
+   allocating a new address, checking for an existing binding, filling
+   in 'yiaddr' or including lease time parameters.  The servers SHOULD
+   unicast the DHCPACK reply to the address given in the 'ciaddr' field
+   of the DHCPINFORM message.
+
+   The server SHOULD check the network address in a DHCPINFORM message
+   for consistency, but MUST NOT check for an existing lease.  The
+   server forms a DHCPACK message containing the configuration
+   parameters for the requesting client and sends the DHCPACK message
+   directly to the client.
+
+3.5 Client parameters in DHCP
+
+   Not all clients require initialization of all parameters listed in
+   Appendix A.  Two techniques are used to reduce the number of
+   parameters transmitted from the server to the client.  First, most of
+   the parameters have defaults defined in the Host Requirements RFCs;
+   if the client receives no parameters from the server that override
+   the defaults, a client uses those default values.  Second, in its
+   initial DHCPDISCOVER or DHCPREQUEST message, a client may provide the
+   server with a list of specific parameters the client is interested
+   in.  If the client includes a list of parameters in a DHCPDISCOVER
+   message, it MUST include that list in any subsequent DHCPREQUEST
+   messages.
+
+   The client SHOULD include the 'maximum DHCP message size' option to
+   let the server know how large the server may make its DHCP messages.
+   The parameters returned to a client may still exceed the space
+   allocated to options in a DHCP message.  In this case, two additional
+   options flags (which must appear in the 'options' field of the
+   message) indicate that the 'file' and 'sname' fields are to be used
+   for options.
+
+   The client can inform the server which configuration parameters the
+   client is interested in by including the 'parameter request list'
+   option.  The data portion of this option explicitly lists the options
+   requested by tag number.
+
+   In addition, the client may suggest values for the network address
+   and lease time in the DHCPDISCOVER message.  The client may include
+   the 'requested IP address' option to suggest that a particular IP
+   address be assigned, and may include the 'IP address lease time'
+   option to suggest the lease time it would like.  Other options
+   representing "hints" at configuration parameters are allowed in a
+   DHCPDISCOVER or DHCPREQUEST message.  However, additional options may
+
+
+
+Droms                       Standards Track                    [Page 21]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+   be ignored by servers, and multiple servers may, therefore, not
+   return identical values for some options.  The 'requested IP address'
+   option is to be filled in only in a DHCPREQUEST message when the
+   client is verifying network parameters obtained previously. The
+   client fills in the 'ciaddr' field only when correctly configured
+   with an IP address in BOUND, RENEWING or REBINDING state.
+
+   If a server receives a DHCPREQUEST message with an invalid 'requested
+   IP address', the server SHOULD respond to the client with a DHCPNAK
+   message and may choose to report the problem to the system
+   administrator.  The server may include an error message in the
+   'message' option.
+
+3.6 Use of DHCP in clients with multiple interfaces
+
+   A client with multiple network interfaces must use DHCP through each
+   interface independently to obtain configuration information
+   parameters for those separate interfaces.
+
+3.7 When clients should use DHCP
+
+   A client SHOULD use DHCP to reacquire or verify its IP address and
+   network parameters whenever the local network parameters may have
+   changed; e.g., at system boot time or after a disconnection from the
+   local network, as the local network configuration may change without
+   the client's or user's knowledge.
+
+   If a client has knowledge of a previous network address and is unable
+   to contact a local DHCP server, the client may continue to use the
+   previous network address until the lease for that address expires.
+   If the lease expires before the client can contact a DHCP server, the
+   client must immediately discontinue use of the previous network
+   address and may inform local users of the problem.
+
+4. Specification of the DHCP client-server protocol
+
+   In this section, we assume that a DHCP server has a block of network
+   addresses from which it can satisfy requests for new addresses.  Each
+   server also maintains a database of allocated addresses and leases in
+   local permanent storage.
+
+4.1 Constructing and sending DHCP messages
+
+   DHCP clients and servers both construct DHCP messages by filling in
+   fields in the fixed format section of the message and appending
+   tagged data items in the variable length option area.  The options
+   area includes first a four-octet 'magic cookie' (which was described
+   in section 3), followed by the options.  The last option must always
+
+
+
+Droms                       Standards Track                    [Page 22]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+   be the 'end' option.
+
+   DHCP uses UDP as its transport protocol.  DHCP messages from a client
+   to a server are sent to the 'DHCP server' port (67), and DHCP
+   messages from a server to a client are sent to the 'DHCP client' port
+   (68). A server with multiple network address (e.g., a multi-homed
+   host) MAY use any of its network addresses in outgoing DHCP messages.
+
+   The 'server identifier' field is used both to identify a DHCP server
+   in a DHCP message and as a destination address from clients to
+   servers.  A server with multiple network addresses MUST be prepared
+   to to accept any of its network addresses as identifying that server
+   in a DHCP message.  To accommodate potentially incomplete network
+   connectivity, a server MUST choose an address as a 'server
+   identifier' that, to the best of the server's knowledge, is reachable
+   from the client.  For example, if the DHCP server and the DHCP client
+   are connected to the same subnet (i.e., the 'giaddr' field in the
+   message from the client is zero), the server SHOULD select the IP
+   address the server is using for communication on that subnet as the
+   'server identifier'.  If the server is using multiple IP addresses on
+   that subnet, any such address may be used.  If the server has
+   received a message through a DHCP relay agent, the server SHOULD
+   choose an address from the interface on which the message was
+   recieved as the 'server identifier' (unless the server has other,
+   better information on which to make its choice).  DHCP clients MUST
+   use the IP address provided in the 'server identifier' option for any
+   unicast requests to the DHCP server.
+
+   DHCP messages broadcast by a client prior to that client obtaining
+   its IP address must have the source address field in the IP header
+   set to 0.
+
+   If the 'giaddr' field in a DHCP message from a client is non-zero,
+   the server sends any return messages to the 'DHCP server' port on the
+   BOOTP relay agent whose address appears in 'giaddr'. If the 'giaddr'
+   field is zero and the 'ciaddr' field is nonzero, then the server
+   unicasts DHCPOFFER and DHCPACK messages to the address in 'ciaddr'.
+   If 'giaddr' is zero and 'ciaddr' is zero, and the broadcast bit is
+   set, then the server broadcasts DHCPOFFER and DHCPACK messages to
+   0xffffffff. If the broadcast bit is not set and 'giaddr' is zero and
+   'ciaddr' is zero, then the server unicasts DHCPOFFER and DHCPACK
+   messages to the client's hardware address and 'yiaddr' address.  In
+   all cases, when 'giaddr' is zero, the server broadcasts any DHCPNAK
+   messages to 0xffffffff.
+
+   If the options in a DHCP message extend into the 'sname' and 'file'
+   fields, the 'option overload' option MUST appear in the 'options'
+   field, with value 1, 2 or 3, as specified in RFC 1533.  If the
+
+
+
+Droms                       Standards Track                    [Page 23]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+   'option overload' option is present in the 'options' field, the
+   options in the 'options' field MUST be terminated by an 'end' option,
+   and MAY contain one or more 'pad' options to fill the options field.
+   The options in the 'sname' and 'file' fields (if in use as indicated
+   by the 'options overload' option) MUST begin with the first octet of
+   the field, MUST be terminated by an 'end' option, and MUST be
+   followed by 'pad' options to fill the remainder of the field.  Any
+   individual option in the 'options', 'sname' and 'file' fields MUST be
+   entirely contained in that field.  The options in the 'options' field
+   MUST be interpreted first, so that any 'option overload' options may
+   be interpreted.  The 'file' field MUST be interpreted next (if the
+   'option overload' option indicates that the 'file' field contains
+   DHCP options), followed by the 'sname' field.
+
+   The values to be passed in an 'option' tag may be too long to fit in
+   the 255 octets available to a single option (e.g., a list of routers
+   in a 'router' option [21]).  Options may appear only once, unless
+   otherwise specified in the options document.  The client concatenates
+   the values of multiple instances of the same option into a single
+   parameter list for configuration.
+
+   DHCP clients are responsible for all message retransmission.  The
+   client MUST adopt a retransmission strategy that incorporates a
+   randomized exponential backoff algorithm to determine the delay
+   between retransmissions.  The delay between retransmissions SHOULD be
+   chosen to allow sufficient time for replies from the server to be
+   delivered based on the characteristics of the internetwork between
+   the client and the server.  For example, in a 10Mb/sec Ethernet
+   internetwork, the delay before the first retransmission SHOULD be 4
+   seconds randomized by the value of a uniform random number chosen
+   from the range -1 to +1.  Clients with clocks that provide resolution
+   granularity of less than one second may choose a non-integer
+   randomization value.  The delay before the next retransmission SHOULD
+   be 8 seconds randomized by the value of a uniform number chosen from
+   the range -1 to +1.  The retransmission delay SHOULD be doubled with
+   subsequent retransmissions up to a maximum of 64 seconds.  The client
+   MAY provide an indication of retransmission attempts to the user as
+   an indication of the progress of the configuration process.
+
+   The 'xid' field is used by the client to match incoming DHCP messages
+   with pending requests.  A DHCP client MUST choose 'xid's in such a
+   way as to minimize the chance of using an 'xid' identical to one used
+   by another client. For example, a client may choose a different,
+   random initial 'xid' each time the client is rebooted, and
+   subsequently use sequential 'xid's until the next reboot.  Selecting
+   a new 'xid' for each retransmission is an implementation decision.  A
+   client may choose to reuse the same 'xid' or select a new 'xid' for
+   each retransmitted message.
+
+
+
+Droms                       Standards Track                    [Page 24]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+   Normally, DHCP servers and BOOTP relay agents attempt to deliver
+   DHCPOFFER, DHCPACK and DHCPNAK messages directly to the client using
+   uicast delivery.  The IP destination address (in the IP header) is
+   set to the DHCP 'yiaddr' address and the link-layer destination
+   address is set to the DHCP 'chaddr' address.  Unfortunately, some
+   client implementations are unable to receive such unicast IP
+   datagrams until the implementation has been configured with a valid
+   IP address (leading to a deadlock in which the client's IP address
+   cannot be delivered until the client has been configured with an IP
+   address).
+
+   A client that cannot receive unicast IP datagrams until its protocol
+   software has been configured with an IP address SHOULD set the
+   BROADCAST bit in the 'flags' field to 1 in any DHCPDISCOVER or
+   DHCPREQUEST messages that client sends.  The BROADCAST bit will
+   provide a hint to the DHCP server and BOOTP relay agent to broadcast
+   any messages to the client on the client's subnet.  A client that can
+   receive unicast IP datagrams before its protocol software has been
+   configured SHOULD clear the BROADCAST bit to 0.  The BOOTP
+   clarifications document discusses the ramifications of the use of the
+   BROADCAST bit [21].
+
+   A server or relay agent sending or relaying a DHCP message directly
+   to a DHCP client (i.e., not to a relay agent specified in the
+   'giaddr' field) SHOULD examine the BROADCAST bit in the 'flags'
+   field.  If this bit is set to 1, the DHCP message SHOULD be sent as
+   an IP broadcast using an IP broadcast address (preferably 0xffffffff)
+   as the IP destination address and the link-layer broadcast address as
+   the link-layer destination address.  If the BROADCAST bit is cleared
+   to 0, the message SHOULD be sent as an IP unicast to the IP address
+   specified in the 'yiaddr' field and the link-layer address specified
+   in the 'chaddr' field.  If unicasting is not possible, the message
+   MAY be sent as an IP broadcast using an IP broadcast address
+   (preferably 0xffffffff) as the IP destination address and the link-
+   layer broadcast address as the link-layer destination address.
+
+4.2 DHCP server administrative controls
+
+   DHCP servers are not required to respond to every DHCPDISCOVER and
+   DHCPREQUEST message they receive.  For example, a network
+   administrator, to retain stringent control over the clients attached
+   to the network, may choose to configure DHCP servers to respond only
+   to clients that have been previously registered through some external
+   mechanism.  The DHCP specification describes only the interactions
+   between clients and servers when the clients and servers choose to
+   interact; it is beyond the scope of the DHCP specification to
+   describe all of the administrative controls that system
+   administrators might want to use.  Specific DHCP server
+
+
+
+Droms                       Standards Track                    [Page 25]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+   implementations may incorporate any controls or policies desired by a
+   network administrator.
+
+   In some environments, a DHCP server will have to consider the values
+   of the vendor class options included in DHCPDISCOVER or DHCPREQUEST
+   messages when determining the correct parameters for a particular
+   client.
+
+   A DHCP server needs to use some unique identifier to associate a
+   client with its lease.  The client MAY choose to explicitly provide
+   the identifier through the 'client identifier' option.  If the client
+   supplies a 'client identifier', the client MUST use the same 'client
+   identifier' in all subsequent messages, and the server MUST use that
+   identifier to identify the client.  If the client does not provide a
+   'client identifier' option, the server MUST use the contents of the
+   'chaddr' field to identify the client. It is crucial for a DHCP
+   client to use an identifier unique within the subnet to which the
+   client is attached in the 'client identifier' option.  Use of
+   'chaddr' as the client's unique identifier may cause unexpected
+   results, as that identifier may be associated with a hardware
+   interface that could be moved to a new client.  Some sites may choose
+   to use a manufacturer's serial number as the 'client identifier', to
+   avoid unexpected changes in a clients network address due to transfer
+   of hardware interfaces among computers.  Sites may also choose to use
+   a DNS name as the 'client identifier', causing address leases to be
+   associated with the DNS name rather than a specific hardware box.
+
+   DHCP clients are free to use any strategy in selecting a DHCP server
+   among those from which the client receives a DHCPOFFER message.  The
+   client implementation of DHCP SHOULD provide a mechanism for the user
+   to select directly the 'vendor class identifier' values.
+
+4.3 DHCP server behavior
+
+   A DHCP server processes incoming DHCP messages from a client based on
+   the current state of the binding for that client.  A DHCP server can
+   receive the following messages from a client:
+
+      o DHCPDISCOVER
+
+      o DHCPREQUEST
+
+      o DHCPDECLINE
+
+      o DHCPRELEASE
+
+      o DHCPINFORM
+
+
+
+
+Droms                       Standards Track                    [Page 26]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+   Table 3 gives the use of the fields and options in a DHCP message by
+   a server.  The remainder of this section describes the action of the
+   DHCP server for each possible incoming message.
+
+4.3.1 DHCPDISCOVER message
+
+   When a server receives a DHCPDISCOVER message from a client, the
+   server chooses a network address for the requesting client.  If no
+   address is available, the server may choose to report the problem to
+   the system administrator. If an address is available, the new address
+   SHOULD be chosen as follows:
+
+      o The client's current address as recorded in the client's current
+        binding, ELSE
+
+      o The client's previous address as recorded in the client's (now
+        expired or released) binding, if that address is in the server's
+        pool of available addresses and not already allocated, ELSE
+
+      o The address requested in the 'Requested IP Address' option, if that
+        address is valid and not already allocated, ELSE
+
+      o A new address allocated from the server's pool of available
+        addresses; the address is selected based on the subnet from which
+        the message was received (if 'giaddr' is 0) or on the address of
+        the relay agent that forwarded the message ('giaddr' when not 0).
+
+   As described in section 4.2, a server MAY, for administrative
+   reasons, assign an address other than the one requested, or may
+   refuse to allocate an address to a particular client even though free
+   addresses are available.
+
+   Note that, in some network architectures (e.g., internets with more
+   than one IP subnet assigned to a physical network segment), it may be
+   the case that the DHCP client should be assigned an address from a
+   different subnet than the address recorded in 'giaddr'.  Thus, DHCP
+   does not require that the client be assigned as address from the
+   subnet in 'giaddr'.  A server is free to choose some other subnet,
+   and it is beyond the scope of the DHCP specification to describe ways
+   in which the assigned IP address might be chosen.
+
+   While not required for correct operation of DHCP, the server SHOULD
+   NOT reuse the selected network address before the client responds to
+   the server's DHCPOFFER message.  The server may choose to record the
+   address as offered to the client.
+
+   The server must also choose an expiration time for the lease, as
+   follows:
+
+
+
+Droms                       Standards Track                    [Page 27]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+   o IF the client has not requested a specific lease in the
+     DHCPDISCOVER message and the client already has an assigned network
+     address, the server returns the lease expiration time previously
+     assigned to that address (note that the client must explicitly
+     request a specific lease to extend the expiration time on a
+     previously assigned address), ELSE
+
+   o IF the client has not requested a specific lease in the
+     DHCPDISCOVER message and the client does not have an assigned
+     network address, the server assigns a locally configured default
+     lease time, ELSE
+
+   o IF the client has requested a specific lease in the DHCPDISCOVER
+     message (regardless of whether the client has an assigned network
+     address), the server may choose either to return the requested
+     lease (if the lease is acceptable to local policy) or select
+     another lease.
+
+Field      DHCPOFFER            DHCPACK             DHCPNAK
+-----      ---------            -------             -------
+'op'       BOOTREPLY            BOOTREPLY           BOOTREPLY
+'htype'    (From "Assigned Numbers" RFC)
+'hlen'     (Hardware address length in octets)
+'hops'     0                    0                   0
+'xid'      'xid' from client    'xid' from client   'xid' from client
+           DHCPDISCOVER         DHCPREQUEST         DHCPREQUEST
+           message              message             message
+'secs'     0                    0                   0
+'ciaddr'   0                    'ciaddr' from       0
+                                DHCPREQUEST or 0
+'yiaddr'   IP address offered   IP address          0
+           to client            assigned to client
+'siaddr'   IP address of next   IP address of next  0
+           bootstrap server     bootstrap server
+'flags'    'flags' from         'flags' from        'flags' from
+           client DHCPDISCOVER  client DHCPREQUEST  client DHCPREQUEST
+           message              message             message
+'giaddr'   'giaddr' from        'giaddr' from       'giaddr' from
+           client DHCPDISCOVER  client DHCPREQUEST  client DHCPREQUEST
+           message              message             message
+'chaddr'   'chaddr' from        'chaddr' from       'chaddr' from
+           client DHCPDISCOVER  client DHCPREQUEST  client DHCPREQUEST
+           message              message             message
+'sname'    Server host name     Server host name    (unused)
+           or options           or options
+'file'     Client boot file     Client boot file    (unused)
+           name or options      name or options
+'options'  options              options
+
+
+
+Droms                       Standards Track                    [Page 28]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+Option                    DHCPOFFER    DHCPACK            DHCPNAK
+------                    ---------    -------            -------
+Requested IP address      MUST NOT     MUST NOT           MUST NOT
+IP address lease time     MUST         MUST (DHCPREQUEST) MUST NOT
+                                       MUST NOT (DHCPINFORM)
+Use 'file'/'sname' fields MAY          MAY                MUST NOT
+DHCP message type         DHCPOFFER    DHCPACK            DHCPNAK
+Parameter request list    MUST NOT     MUST NOT           MUST NOT
+Message                   SHOULD       SHOULD             SHOULD
+Client identifier         MUST NOT     MUST NOT           MAY
+Vendor class identifier   MAY          MAY                MAY
+Server identifier         MUST         MUST               MUST
+Maximum message size      MUST NOT     MUST NOT           MUST NOT
+All others                MAY          MAY                MUST NOT
+
+           Table 3:  Fields and options used by DHCP servers
+
+   Once the network address and lease have been determined, the server
+   constructs a DHCPOFFER message with the offered configuration
+   parameters.  It is important for all DHCP servers to return the same
+   parameters (with the possible exception of a newly allocated network
+   address) to ensure predictable client behavior regardless of which
+   server the client selects.  The configuration parameters MUST be
+   selected by applying the following rules in the order given below.
+   The network administrator is responsible for configuring multiple
+   DHCP servers to ensure uniform responses from those servers.  The
+   server MUST return to the client:
+
+   o The client's network address, as determined by the rules given
+     earlier in this section,
+
+   o The expiration time for the client's lease, as determined by the
+     rules given earlier in this section,
+
+   o Parameters requested by the client, according to the following
+     rules:
+
+        -- IF the server has been explicitly configured with a default
+           value for the parameter, the server MUST include that value
+           in an appropriate option in the 'option' field, ELSE
+
+        -- IF the server recognizes the parameter as a parameter
+           defined in the Host Requirements Document, the server MUST
+           include the default value for that parameter as given in the
+           Host Requirements Document in an appropriate option in the
+           'option' field, ELSE
+
+        -- The server MUST NOT return a value for that parameter,
+
+
+
+Droms                       Standards Track                    [Page 29]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+     The server MUST supply as many of the requested parameters as
+     possible and MUST omit any parameters it cannot provide.  The
+     server MUST include each requested parameter only once unless
+     explicitly allowed in the DHCP Options and BOOTP Vendor
+     Extensions document.
+
+   o Any parameters from the existing binding that differ from the Host
+     Requirements Document defaults,
+
+   o Any parameters specific to this client (as identified by
+     the contents of 'chaddr' or 'client identifier' in the DHCPDISCOVER
+     or DHCPREQUEST message), e.g., as configured by the network
+     administrator,
+
+   o Any parameters specific to this client's class (as identified
+     by the contents of the 'vendor class identifier'
+     option in the DHCPDISCOVER or DHCPREQUEST message),
+     e.g., as configured by the network administrator; the parameters
+     MUST be identified by an exact match between the client's vendor
+     class identifiers and the client's classes identified in the
+     server,
+
+   o Parameters with non-default values on the client's subnet.
+
+   The server MAY choose to return the 'vendor class identifier' used to
+   determine the parameters in the DHCPOFFER message to assist the
+   client in selecting which DHCPOFFER to accept.  The server inserts
+   the 'xid' field from the DHCPDISCOVER message into the 'xid' field of
+   the DHCPOFFER message and sends the DHCPOFFER message to the
+   requesting client.
+
+4.3.2 DHCPREQUEST message
+
+   A DHCPREQUEST message may come from a client responding to a
+   DHCPOFFER message from a server, from a client verifying a previously
+   allocated IP address or from a client extending the lease on a
+   network address.  If the DHCPREQUEST message contains a 'server
+   identifier' option, the message is in response to a DHCPOFFER
+   message.  Otherwise, the message is a request to verify or extend an
+   existing lease.  If the client uses a 'client identifier' in a
+   DHCPREQUEST message, it MUST use that same 'client identifier' in all
+   subsequent messages. If the client included a list of requested
+   parameters in a DHCPDISCOVER message, it MUST include that list in
+   all subsequent messages.
+
+
+
+
+
+
+
+Droms                       Standards Track                    [Page 30]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+   Any configuration parameters in the DHCPACK message SHOULD NOT
+   conflict with those in the earlier DHCPOFFER message to which the
+   client is responding.  The client SHOULD use the parameters in the
+   DHCPACK message for configuration.
+
+   Clients send DHCPREQUEST messages as follows:
+
+   o DHCPREQUEST generated during SELECTING state:
+
+      Client inserts the address of the selected server in 'server
+      identifier', 'ciaddr' MUST be zero, 'requested IP address' MUST be
+      filled in with the yiaddr value from the chosen DHCPOFFER.
+
+      Note that the client may choose to collect several DHCPOFFER
+      messages and select the "best" offer.  The client indicates its
+      selection by identifying the offering server in the DHCPREQUEST
+      message.  If the client receives no acceptable offers, the client
+      may choose to try another DHCPDISCOVER message.  Therefore, the
+      servers may not receive a specific DHCPREQUEST from which they can
+      decide whether or not the client has accepted the offer.  Because
+      the servers have not committed any network address assignments on
+      the basis of a DHCPOFFER, servers are free to reuse offered
+      network addresses in response to subsequent requests.  As an
+      implementation detail, servers SHOULD NOT reuse offered addresses
+      and may use an implementation-specific timeout mechanism to decide
+      when to reuse an offered address.
+
+   o DHCPREQUEST generated during INIT-REBOOT state:
+
+      'server identifier' MUST NOT be filled in, 'requested IP address'
+      option MUST be filled in with client's notion of its previously
+      assigned address. 'ciaddr' MUST be zero. The client is seeking to
+      verify a previously allocated, cached configuration. Server SHOULD
+      send a DHCPNAK message to the client if the 'requested IP address'
+      is incorrect, or is on the wrong network.
+
+      Determining whether a client in the INIT-REBOOT state is on the
+      correct network is done by examining the contents of 'giaddr', the
+      'requested IP address' option, and a database lookup. If the DHCP
+      server detects that the client is on the wrong net (i.e., the
+      result of applying the local subnet mask or remote subnet mask (if
+      'giaddr' is not zero) to 'requested IP address' option value
+      doesn't match reality), then the server SHOULD send a DHCPNAK
+      message to the client.
+
+
+
+
+
+
+
+Droms                       Standards Track                    [Page 31]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+      If the network is correct, then the DHCP server should check if
+      the client's notion of its IP address is correct. If not, then the
+      server SHOULD send a DHCPNAK message to the client. If the DHCP
+      server has no record of this client, then it MUST remain silent,
+      and MAY output a warning to the network administrator. This
+      behavior is necessary for peaceful coexistence of non-
+      communicating DHCP servers on the same wire.
+
+      If 'giaddr' is 0x0 in the DHCPREQUEST message, the client is on
+      the same subnet as the server.  The server MUST broadcast the
+      DHCPNAK message to the 0xffffffff broadcast address because the
+      client may not have a correct network address or subnet mask, and
+      the client may not be answering ARP requests.
+
+      If 'giaddr' is set in the DHCPREQUEST message, the client is on a
+      different subnet.  The server MUST set the broadcast bit in the
+      DHCPNAK, so that the relay agent will broadcast the DHCPNAK to the
+      client, because the client may not have a correct network address
+      or subnet mask, and the client may not be answering ARP requests.
+
+   o DHCPREQUEST generated during RENEWING state:
+
+      'server identifier' MUST NOT be filled in, 'requested IP address'
+      option MUST NOT be filled in, 'ciaddr' MUST be filled in with
+      client's IP address. In this situation, the client is completely
+      configured, and is trying to extend its lease. This message will
+      be unicast, so no relay agents will be involved in its
+      transmission.  Because 'giaddr' is therefore not filled in, the
+      DHCP server will trust the value in 'ciaddr', and use it when
+      replying to the client.
+
+      A client MAY choose to renew or extend its lease prior to T1.  The
+      server may choose not to extend the lease (as a policy decision by
+      the network administrator), but should return a DHCPACK message
+      regardless.
+
+   o DHCPREQUEST generated during REBINDING state:
+
+      'server identifier' MUST NOT be filled in, 'requested IP address'
+      option MUST NOT be filled in, 'ciaddr' MUST be filled in with
+      client's IP address. In this situation, the client is completely
+      configured, and is trying to extend its lease. This message MUST
+      be broadcast to the 0xffffffff IP broadcast address.  The DHCP
+      server SHOULD check 'ciaddr' for correctness before replying to
+      the DHCPREQUEST.
+
+
+
+
+
+
+Droms                       Standards Track                    [Page 32]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+      The DHCPREQUEST from a REBINDING client is intended to accommodate
+      sites that have multiple DHCP servers and a mechanism for
+      maintaining consistency among leases managed by multiple servers.
+      A DHCP server MAY extend a client's lease only if it has local
+      administrative authority to do so.
+
+4.3.3 DHCPDECLINE message
+
+   If the server receives a DHCPDECLINE message, the client has
+   discovered through some other means that the suggested network
+   address is already in use.  The server MUST mark the network address
+   as not available and SHOULD notify the local system administrator of
+   a possible configuration problem.
+
+4.3.4 DHCPRELEASE message
+
+   Upon receipt of a DHCPRELEASE message, the server marks the network
+   address as not allocated.  The server SHOULD retain a record of the
+   client's initialization parameters for possible reuse in response to
+   subsequent requests from the client.
+
+4.3.5 DHCPINFORM message
+
+   The server responds to a DHCPINFORM message by sending a DHCPACK
+   message directly to the address given in the 'ciaddr' field of the
+   DHCPINFORM message.  The server MUST NOT send a lease expiration time
+   to the client and SHOULD NOT fill in 'yiaddr'.  The server includes
+   other parameters in the DHCPACK message as defined in section 4.3.1.
+
+4.3.6 Client messages
+
+   Table 4 details the differences between messages from clients in
+   various states.
+
+   ---------------------------------------------------------------------
+   |              |INIT-REBOOT  |SELECTING    |RENEWING     |REBINDING |
+   ---------------------------------------------------------------------
+   |broad/unicast |broadcast    |broadcast    |unicast      |broadcast |
+   |server-ip     |MUST NOT     |MUST         |MUST NOT     |MUST NOT  |
+   |requested-ip  |MUST         |MUST         |MUST NOT     |MUST NOT  |
+   |ciaddr        |zero         |zero         |IP address   |IP address|
+   ---------------------------------------------------------------------
+
+              Table 4: Client messages from different states
+
+
+
+
+
+
+
+Droms                       Standards Track                    [Page 33]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+4.4 DHCP client behavior
+
+   Figure 5 gives a state-transition diagram for a DHCP client.  A
+   client can receive the following messages from a server:
+
+         o DHCPOFFER
+
+         o DHCPACK
+
+         o DHCPNAK
+
+   The DHCPINFORM message is not shown in figure 5.  A client simply
+   sends the DHCPINFORM and waits for DHCPACK messages.  Once the client
+   has selected its parameters, it has completed the configuration
+   process.
+
+   Table 5 gives the use of the fields and options in a DHCP message by
+   a client.  The remainder of this section describes the action of the
+   DHCP client for each possible incoming message.  The description in
+   the following section corresponds to the full configuration procedure
+   previously described in section 3.1, and the text in the subsequent
+   section corresponds to the abbreviated configuration procedure
+   described in section 3.2.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Droms                       Standards Track                    [Page 34]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+ --------                               -------
+|        | +-------------------------->|       |<-------------------+
+| INIT-  | |     +-------------------->| INIT  |                    |
+| REBOOT |DHCPNAK/         +---------->|       |<---+               |
+|        |Restart|         |            -------     |               |
+ --------  |  DHCPNAK/     |               |                        |
+    |      Discard offer   |      -/Send DHCPDISCOVER               |
+-/Send DHCPREQUEST         |               |                        |
+    |      |     |      DHCPACK            v        |               |
+ -----------     |   (not accept.)/   -----------   |               |
+|           |    |  Send DHCPDECLINE |           |                  |
+| REBOOTING |    |         |         | SELECTING |<----+            |
+|           |    |        /          |           |     |DHCPOFFER/  |
+ -----------     |       /            -----------   |  |Collect     |
+    |            |      /                  |   |       |  replies   |
+DHCPACK/         |     /  +----------------+   +-------+            |
+Record lease, set|    |   v   Select offer/                         |
+timers T1, T2   ------------  send DHCPREQUEST      |               |
+    |   +----->|            |             DHCPNAK, Lease expired/   |
+    |   |      | REQUESTING |                  Halt network         |
+    DHCPOFFER/ |            |                       |               |
+    Discard     ------------                        |               |
+    |   |        |        |                   -----------           |
+    |   +--------+     DHCPACK/              |           |          |
+    |              Record lease, set    -----| REBINDING |          |
+    |                timers T1, T2     /     |           |          |
+    |                     |        DHCPACK/   -----------           |
+    |                     v     Record lease, set   ^               |
+    +----------------> -------      /timers T1,T2   |               |
+               +----->|       |<---+                |               |
+               |      | BOUND |<---+                |               |
+  DHCPOFFER, DHCPACK, |       |    |            T2 expires/   DHCPNAK/
+   DHCPNAK/Discard     -------     |             Broadcast  Halt network
+               |       | |         |            DHCPREQUEST         |
+               +-------+ |        DHCPACK/          |               |
+                    T1 expires/   Record lease, set |               |
+                 Send DHCPREQUEST timers T1, T2     |               |
+                 to leasing server |                |               |
+                         |   ----------             |               |
+                         |  |          |------------+               |
+                         +->| RENEWING |                            |
+                            |          |----------------------------+
+                             ----------
+          Figure 5:  State-transition diagram for DHCP clients
+
+
+
+
+
+
+
+Droms                       Standards Track                    [Page 35]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+4.4.1 Initialization and allocation of network address
+
+   The client begins in INIT state and forms a DHCPDISCOVER message.
+   The client SHOULD wait a random time between one and ten seconds to
+   desynchronize the use of DHCP at startup.  The client sets 'ciaddr'
+   to 0x00000000.  The client MAY request specific parameters by
+   including the 'parameter request list' option.  The client MAY
+   suggest a network address and/or lease time by including the
+   'requested IP address' and 'IP address lease time' options.  The
+   client MUST include its hardware address in the 'chaddr' field, if
+   necessary for delivery of DHCP reply messages.  The client MAY
+   include a different unique identifier in the 'client identifier'
+   option, as discussed in section 4.2.  If the client included a list
+   of requested parameters in a DHCPDISCOVER message, it MUST include
+   that list in all subsequent messages.
+
+   The client generates and records a random transaction identifier and
+   inserts that identifier into the 'xid' field.  The client records its
+   own local time for later use in computing the lease expiration.  The
+   client then broadcasts the DHCPDISCOVER on the local hardware
+   broadcast address to the 0xffffffff IP broadcast address and 'DHCP
+   server' UDP port.
+
+   If the 'xid' of an arriving DHCPOFFER message does not match the
+   'xid' of the most recent DHCPDISCOVER message, the DHCPOFFER message
+   must be silently discarded.  Any arriving DHCPACK messages must be
+   silently discarded.
+
+   The client collects DHCPOFFER messages over a period of time, selects
+   one DHCPOFFER message from the (possibly many) incoming DHCPOFFER
+   messages (e.g., the first DHCPOFFER message or the DHCPOFFER message
+   from the previously used server) and extracts the server address from
+   the 'server identifier' option in the DHCPOFFER message.  The time
+   over which the client collects messages and the mechanism used to
+   select one DHCPOFFER are implementation dependent.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Droms                       Standards Track                    [Page 36]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+Field      DHCPDISCOVER          DHCPREQUEST           DHCPDECLINE,
+           DHCPINFORM                                  DHCPRELEASE
+-----      ------------          -----------           -----------
+'op'       BOOTREQUEST           BOOTREQUEST           BOOTREQUEST
+'htype'    (From "Assigned Numbers" RFC)
+'hlen'     (Hardware address length in octets)
+'hops'     0                     0                     0
+'xid'      selected by client    'xid' from server     selected by
+                                 DHCPOFFER message     client
+'secs'     0 or seconds since    0 or seconds since    0
+           DHCP process started  DHCP process started
+'flags'    Set 'BROADCAST'       Set 'BROADCAST'       0
+           flag if client        flag if client
+           requires broadcast    requires broadcast
+           reply                 reply
+'ciaddr'   0 (DHCPDISCOVER)      0 or client's         0 (DHCPDECLINE)
+           client's              network address       client's network
+           network address       (BOUND/RENEW/REBIND)  address
+           (DHCPINFORM)                                (DHCPRELEASE)
+'yiaddr'   0                     0                     0
+'siaddr'   0                     0                     0
+'giaddr'   0                     0                     0
+'chaddr'   client's hardware     client's hardware     client's hardware
+           address               address               address
+'sname'    options, if           options, if           (unused)
+           indicated in          indicated in
+           'sname/file'          'sname/file'
+           option; otherwise     option; otherwise
+           unused                unused
+'file'     options, if           options, if           (unused)
+           indicated in          indicated in
+           'sname/file'          'sname/file'
+           option; otherwise     option; otherwise
+           unused                unused
+'options'  options               options               (unused)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Droms                       Standards Track                    [Page 37]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+Option                     DHCPDISCOVER  DHCPREQUEST      DHCPDECLINE,
+                           DHCPINFORM                     DHCPRELEASE
+------                     ------------  -----------      -----------
+Requested IP address       MAY           MUST (in         MUST
+                           (DISCOVER)    SELECTING or     (DHCPDECLINE),
+                           MUST NOT      INIT-REBOOT)     MUST NOT
+                           (INFORM)      MUST NOT (in     (DHCPRELEASE)
+                                         BOUND or
+                                         RENEWING)
+IP address lease time      MAY           MAY              MUST NOT
+                           (DISCOVER)
+                           MUST NOT
+                           (INFORM)
+Use 'file'/'sname' fields  MAY           MAY              MAY
+DHCP message type          DHCPDISCOVER/ DHCPREQUEST      DHCPDECLINE/
+                           DHCPINFORM                     DHCPRELEASE
+Client identifier          MAY           MAY              MAY
+Vendor class identifier    MAY           MAY              MUST NOT
+Server identifier          MUST NOT      MUST (after      MUST
+                                         SELECTING)
+                                         MUST NOT (after
+                                         INIT-REBOOT,
+                                         BOUND, RENEWING
+                                         or REBINDING)
+Parameter request list     MAY           MAY              MUST NOT
+Maximum message size       MAY           MAY              MUST NOT
+Message                    SHOULD NOT    SHOULD NOT       SHOULD
+Site-specific              MAY           MAY              MUST NOT
+All others                 MAY           MAY              MUST NOT
+
+             Table 5:  Fields and options used by DHCP clients
+
+   If the parameters are acceptable, the client records the address of
+   the server that supplied the parameters from the 'server identifier'
+   field and sends that address in the 'server identifier' field of a
+   DHCPREQUEST broadcast message.  Once the DHCPACK message from the
+   server arrives, the client is initialized and moves to BOUND state.
+   The DHCPREQUEST message contains the same 'xid' as the DHCPOFFER
+   message.  The client records the lease expiration time as the sum of
+   the time at which the original request was sent and the duration of
+   the lease from the DHCPACK message.    The client SHOULD perform a
+   check on the suggested address to ensure that the address is not
+   already in use.  For example, if the client is on a network that
+   supports ARP, the client may issue an ARP request for the suggested
+   request.  When broadcasting an ARP request for the suggested address,
+   the client must fill in its own hardware address as the sender's
+   hardware address, and 0 as the sender's IP address, to avoid
+   confusing ARP caches in other hosts on the same subnet.  If the
+
+
+
+Droms                       Standards Track                    [Page 38]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+   network address appears to be in use, the client MUST send a
+   DHCPDECLINE message to the server. The client SHOULD broadcast an ARP
+   reply to announce the client's new IP address and clear any outdated
+   ARP cache entries in hosts on the client's subnet.
+
+4.4.2 Initialization with known network address
+
+   The client begins in INIT-REBOOT state and sends a DHCPREQUEST
+   message.  The client MUST insert its known network address as a
+   'requested IP address' option in the DHCPREQUEST message.  The client
+   may request specific configuration parameters by including the
+   'parameter request list' option.  The client generates and records a
+   random transaction identifier and inserts that identifier into the
+   'xid' field.  The client records its own local time for later use in
+   computing the lease expiration.  The client MUST NOT include a
+   'server identifier' in the DHCPREQUEST message.  The client then
+   broadcasts the DHCPREQUEST on the local hardware broadcast address to
+   the 'DHCP server' UDP port.
+
+   Once a DHCPACK message with an 'xid' field matching that in the
+   client's DHCPREQUEST message arrives from any server, the client is
+   initialized and moves to BOUND state.  The client records the lease
+   expiration time as the sum of the time at which the DHCPREQUEST
+   message was sent and the duration of the lease from the DHCPACK
+   message.
+
+4.4.3 Initialization with an externally assigned network address
+
+   The client sends a DHCPINFORM message. The client may request
+   specific configuration parameters by including the 'parameter request
+   list' option. The client generates and records a random transaction
+   identifier and inserts that identifier into the 'xid' field. The
+   client places its own network address in the 'ciaddr' field. The
+   client SHOULD NOT request lease time parameters.
+
+   The client then unicasts the DHCPINFORM to the DHCP server if it
+   knows the server's address, otherwise it broadcasts the message to
+   the limited (all 1s) broadcast address.  DHCPINFORM messages MUST be
+   directed to the 'DHCP server' UDP port.
+
+   Once a DHCPACK message with an 'xid' field matching that in the
+   client's DHCPINFORM message arrives from any server, the client is
+   initialized.
+
+   If the client does not receive a DHCPACK within a reasonable period
+   of time (60 seconds or 4 tries if using timeout suggested in section
+   4.1), then it SHOULD display a message informing the user of the
+   problem, and then SHOULD begin network processing using suitable
+
+
+
+Droms                       Standards Track                    [Page 39]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+   defaults as per Appendix A.
+
+4.4.4 Use of broadcast and unicast
+
+   The DHCP client broadcasts DHCPDISCOVER, DHCPREQUEST and DHCPINFORM
+   messages, unless the client knows the address of a DHCP server.  The
+   client unicasts DHCPRELEASE messages to the server.  Because the
+   client is declining the use of the IP address supplied by the server,
+   the client broadcasts DHCPDECLINE messages.
+
+   When the DHCP client knows the address of a DHCP server, in either
+   INIT or REBOOTING state, the client may use that address in the
+   DHCPDISCOVER or DHCPREQUEST rather than the IP broadcast address.
+   The client may also use unicast to send DHCPINFORM messages to a
+   known DHCP server.  If the client receives no response to DHCP
+   messages sent to the IP address of a known DHCP server, the DHCP
+   client reverts to using the IP broadcast address.
+
+4.4.5 Reacquisition and expiration
+
+   The client maintains two times, T1 and T2, that specify the times at
+   which the client tries to extend its lease on its network address.
+   T1 is the time at which the client enters the RENEWING state and
+   attempts to contact the server that originally issued the client's
+   network address.  T2 is the time at which the client enters the
+   REBINDING state and attempts to contact any server. T1 MUST be
+   earlier than T2, which, in turn, MUST be earlier than the time at
+   which the client's lease will expire.
+
+   To avoid the need for synchronized clocks, T1 and T2 are expressed in
+   options as relative times [2].
+
+   At time T1 the client moves to RENEWING state and sends (via unicast)
+   a DHCPREQUEST message to the server to extend its lease.  The client
+   sets the 'ciaddr' field in the DHCPREQUEST to its current network
+   address. The client records the local time at which the DHCPREQUEST
+   message is sent for computation of the lease expiration time.  The
+   client MUST NOT include a 'server identifier' in the DHCPREQUEST
+   message.
+
+   Any DHCPACK messages that arrive with an 'xid' that does not match
+   the 'xid' of the client's DHCPREQUEST message are silently discarded.
+   When the client receives a DHCPACK from the server, the client
+   computes the lease expiration time as the sum of the time at which
+   the client sent the DHCPREQUEST message and the duration of the lease
+   in the DHCPACK message.  The client has successfully reacquired its
+   network address, returns to BOUND state and may continue network
+   processing.
+
+
+
+Droms                       Standards Track                    [Page 40]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+   If no DHCPACK arrives before time T2, the client moves to REBINDING
+   state and sends (via broadcast) a DHCPREQUEST message to extend its
+   lease.  The client sets the 'ciaddr' field in the DHCPREQUEST to its
+   current network address.  The client MUST NOT include a 'server
+   identifier' in the DHCPREQUEST message.
+
+   Times T1 and T2 are configurable by the server through options.  T1
+   defaults to (0.5 * duration_of_lease).  T2 defaults to (0.875 *
+   duration_of_lease).  Times T1 and T2 SHOULD be chosen with some
+   random "fuzz" around a fixed value, to avoid synchronization of
+   client reacquisition.
+
+   A client MAY choose to renew or extend its lease prior to T1.  The
+   server MAY choose to extend the client's lease according to policy
+   set by the network administrator.  The server SHOULD return T1 and
+   T2, and their values SHOULD be adjusted from their original values to
+   take account of the time remaining on the lease.
+
+   In both RENEWING and REBINDING states, if the client receives no
+   response to its DHCPREQUEST message, the client SHOULD wait one-half
+   of the remaining time until T2 (in RENEWING state) and one-half of
+   the remaining lease time (in REBINDING state), down to a minimum of
+   60 seconds, before retransmitting the DHCPREQUEST message.
+
+   If the lease expires before the client receives a DHCPACK, the client
+   moves to INIT state, MUST immediately stop any other network
+   processing and requests network initialization parameters as if the
+   client were uninitialized.  If the client then receives a DHCPACK
+   allocating that client its previous network address, the client
+   SHOULD continue network processing.  If the client is given a new
+   network address, it MUST NOT continue using the previous network
+   address and SHOULD notify the local users of the problem.
+
+4.4.6 DHCPRELEASE
+
+   If the client no longer requires use of its assigned network address
+   (e.g., the client is gracefully shut down), the client sends a
+   DHCPRELEASE message to the server.  Note that the correct operation
+   of DHCP does not depend on the transmission of DHCPRELEASE messages.
+
+
+
+
+
+
+
+
+
+
+
+
+Droms                       Standards Track                    [Page 41]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+5. Acknowledgments
+
+   The author thanks the many (and too numerous to mention!) members of
+   the DHC WG for their tireless and ongoing efforts in the development
+   of DHCP and this document.
+
+   The efforts of J Allard, Mike Carney, Dave Lapp, Fred Lien and John
+   Mendonca in organizing DHCP interoperability testing sessions are
+   gratefully acknowledged.
+
+   The development of this document was supported in part by grants from
+   the Corporation for National Research Initiatives (CNRI), Bucknell
+   University and Sun Microsystems.
+
+6. References
+
+   [1] Acetta, M., "Resource Location Protocol", RFC 887, CMU, December
+       1983.
+
+   [2] Alexander, S., and R. Droms, "DHCP Options and BOOTP Vendor
+       Extensions", RFC 1533, Lachman Technology, Inc., Bucknell
+       University, October 1993.
+
+   [3] Braden, R., Editor, "Requirements for Internet Hosts --
+       Communication Layers", STD 3, RFC 1122, USC/Information Sciences
+       Institute, October 1989.
+
+   [4] Braden, R., Editor, "Requirements for Internet Hosts --
+       Application and Support, STD 3, RFC 1123, USC/Information
+       Sciences Institute, October 1989.
+
+   [5] Brownell, D, "Dynamic Reverse Address Resolution Protocol
+       (DRARP)", Work in Progress.
+
+   [6] Comer, D., and R. Droms, "Uniform Access to Internet Directory
+       Services", Proc. of ACM SIGCOMM '90 (Special issue of Computer
+       Communications Review), 20(4):50--59, 1990.
+
+   [7] Croft, B., and J. Gilmore, "Bootstrap Protocol (BOOTP)", RFC 951,
+       Stanford and SUN Microsystems, September 1985.
+
+   [8] Deering, S., "ICMP Router Discovery Messages", RFC 1256, Xerox
+       PARC, September 1991.
+
+   [9] Droms, D., "Interoperation between DHCP and BOOTP", RFC 1534,
+       Bucknell University, October 1993.
+
+
+
+
+
+Droms                       Standards Track                    [Page 42]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+   [10] Finlayson, R., Mann, T., Mogul, J., and M. Theimer, "A Reverse
+        Address Resolution Protocol", RFC 903, Stanford, June 1984.
+
+   [11] Gray C., and D. Cheriton, "Leases: An Efficient Fault-Tolerant
+        Mechanism for Distributed File Cache Consistency", In Proc. of
+        the Twelfth ACM Symposium on Operating Systems Design, 1989.
+
+   [12] Mockapetris, P., "Domain Names -- Concepts and Facilities", STD
+        13, RFC 1034, USC/Information Sciences Institute, November 1987.
+
+   [13] Mockapetris, P., "Domain Names -- Implementation and
+        Specification", STD 13, RFC 1035, USC/Information Sciences
+        Institute, November 1987.
+
+   [14] Mogul J., and S. Deering, "Path MTU Discovery", RFC 1191,
+        November 1990.
+
+   [15] Morgan, R., "Dynamic IP Address Assignment for Ethernet Attached
+        Hosts", Work in Progress.
+
+   [16] Postel, J., "Internet Control Message Protocol", STD 5, RFC 792,
+        USC/Information Sciences Institute, September 1981.
+
+   [17] Reynolds, J., "BOOTP Vendor Information Extensions", RFC 1497,
+        USC/Information Sciences Institute, August 1993.
+
+   [18] Reynolds, J., and J. Postel, "Assigned Numbers", STD 2, RFC 1700,
+        USC/Information Sciences Institute, October 1994.
+
+   [19] Jeffrey Schiller and Mark Rosenstein. A Protocol for the Dynamic
+        Assignment of IP Addresses for use on an Ethernet. (Available
+        from the Athena Project, MIT), 1989.
+
+   [20] Sollins, K., "The TFTP Protocol (Revision 2)",  RFC 783, NIC,
+        June 1981.
+
+   [21] Wimer, W., "Clarifications and Extensions for the Bootstrap
+        Protocol", RFC 1542, Carnegie Mellon University, October 1993.
+
+7. Security Considerations
+
+   DHCP is built directly on UDP and IP which are as yet inherently
+   insecure.  Furthermore, DHCP is generally intended to make
+   maintenance of remote and/or diskless hosts easier.  While perhaps
+   not impossible, configuring such hosts with passwords or keys may be
+   difficult and inconvenient.  Therefore, DHCP in its current form is
+   quite insecure.
+
+
+
+
+Droms                       Standards Track                    [Page 43]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+   Unauthorized DHCP servers may be easily set up.  Such servers can
+   then send false and potentially disruptive information to clients
+   such as incorrect or duplicate IP addresses, incorrect routing
+   information (including spoof routers, etc.), incorrect domain
+   nameserver addresses (such as spoof nameservers), and so on.
+   Clearly, once this seed information is in place, an attacker can
+   further compromise affected systems.
+
+   Malicious DHCP clients could masquerade as legitimate clients and
+   retrieve information intended for those legitimate clients.  Where
+   dynamic allocation of resources is used, a malicious client could
+   claim all resources for itself, thereby denying resources to
+   legitimate clients.
+
+8. Author's Address
+
+      Ralph Droms
+      Computer Science Department
+      323 Dana Engineering
+      Bucknell University
+      Lewisburg, PA 17837
+
+      Phone: (717) 524-1145
+      EMail: droms@bucknell.edu
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Droms                       Standards Track                    [Page 44]
+
+RFC 2131          Dynamic Host Configuration Protocol         March 1997
+
+
+A. Host Configuration Parameters
+
+   IP-layer_parameters,_per_host:_
+
+   Be a router                     on/off                 HRC 3.1
+   Non-local source routing        on/off                 HRC 3.3.5
+   Policy filters for
+   non-local source routing        (list)                 HRC 3.3.5
+   Maximum reassembly size         integer                HRC 3.3.2
+   Default TTL                     integer                HRC 3.2.1.7
+   PMTU aging timeout              integer                MTU 6.6
+   MTU plateau table               (list)                 MTU 7
+   IP-layer_parameters,_per_interface:_
+   IP address                      (address)              HRC 3.3.1.6
+   Subnet mask                     (address mask)         HRC 3.3.1.6
+   MTU                             integer                HRC 3.3.3
+   All-subnets-MTU                 on/off                 HRC 3.3.3
+   Broadcast address flavor        0x00000000/0xffffffff  HRC 3.3.6
+   Perform mask discovery          on/off                 HRC 3.2.2.9
+   Be a mask supplier              on/off                 HRC 3.2.2.9
+   Perform router discovery        on/off                 RD 5.1
+   Router solicitation address     (address)              RD 5.1
+   Default routers, list of:
+           router address          (address)              HRC 3.3.1.6
+           preference level        integer                HRC 3.3.1.6
+   Static routes, list of:
+           destination             (host/subnet/net)      HRC 3.3.1.2
+           destination mask        (address mask)         HRC 3.3.1.2
+           type-of-service         integer                HRC 3.3.1.2
+           first-hop router        (address)              HRC 3.3.1.2
+           ignore redirects        on/off                 HRC 3.3.1.2
+           PMTU                    integer                MTU 6.6
+           perform PMTU discovery  on/off                 MTU 6.6
+
+   Link-layer_parameters,_per_interface:_
+   Trailers                       on/off                 HRC 2.3.1
+   ARP cache timeout              integer                HRC 2.3.2.1
+   Ethernet encapsulation         (RFC 894/RFC 1042)     HRC 2.3.3
+
+   TCP_parameters,_per_host:_
+   TTL                            integer                HRC 4.2.2.19
+   Keep-alive interval            integer                HRC 4.2.3.6
+   Keep-alive data size           0/1                    HRC 4.2.3.6
+
+Key:
+
+   MTU = Path MTU Discovery (RFC 1191, Proposed Standard)
+   RD = Router Discovery (RFC 1256, Proposed Standard)
+
+
+
+Droms                       Standards Track                    [Page 45]
+
diff --git a/ext/picotcp/RFC/rfc2460.txt b/ext/picotcp/RFC/rfc2460.txt
new file mode 100644
index 0000000..de7b7fa
--- /dev/null
+++ b/ext/picotcp/RFC/rfc2460.txt
@@ -0,0 +1,2187 @@
+
+
+
+
+
+
+Network Working Group                                         S. Deering
+Request for Comments: 2460                                         Cisco
+Obsoletes: 1883                                                R. Hinden
+Category: Standards Track                                          Nokia
+                                                           December 1998
+
+
+                  Internet Protocol, Version 6 (IPv6)
+                             Specification
+
+Status of this Memo
+
+   This document specifies an Internet standards track protocol for the
+   Internet community, and requests discussion and suggestions for
+   improvements.  Please refer to the current edition of the "Internet
+   Official Protocol Standards" (STD 1) for the standardization state
+   and status of this protocol.  Distribution of this memo is unlimited.
+
+Copyright Notice
+
+   Copyright (C) The Internet Society (1998).  All Rights Reserved.
+
+Abstract
+
+   This document specifies version 6 of the Internet Protocol (IPv6),
+   also sometimes referred to as IP Next Generation or IPng.
+
+Table of Contents
+
+   1. Introduction..................................................2
+   2. Terminology...................................................3
+   3. IPv6 Header Format............................................4
+   4. IPv6 Extension Headers........................................6
+       4.1 Extension Header Order...................................7
+       4.2 Options..................................................9
+       4.3 Hop-by-Hop Options Header...............................11
+       4.4 Routing Header..........................................12
+       4.5 Fragment Header.........................................18
+       4.6 Destination Options Header..............................23
+       4.7 No Next Header..........................................24
+   5. Packet Size Issues...........................................24
+   6. Flow Labels..................................................25
+   7. Traffic Classes..............................................25
+   8. Upper-Layer Protocol Issues..................................27
+       8.1 Upper-Layer Checksums...................................27
+       8.2 Maximum Packet Lifetime.................................28
+       8.3 Maximum Upper-Layer Payload Size........................28
+       8.4 Responding to Packets Carrying Routing Headers..........29
+
+
+
+Deering & Hinden            Standards Track                     [Page 1]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+   Appendix A. Semantics and Usage of the Flow Label Field.........30
+   Appendix B. Formatting Guidelines for Options...................32
+   Security Considerations.........................................35
+   Acknowledgments.................................................35
+   Authors' Addresses..............................................35
+   References......................................................35
+   Changes Since RFC-1883..........................................36
+   Full Copyright Statement........................................39
+
+1.  Introduction
+
+   IP version 6 (IPv6) is a new version of the Internet Protocol,
+   designed as the successor to IP version 4 (IPv4) [RFC-791].  The
+   changes from IPv4 to IPv6 fall primarily into the following
+   categories:
+
+      o  Expanded Addressing Capabilities
+
+         IPv6 increases the IP address size from 32 bits to 128 bits, to
+         support more levels of addressing hierarchy, a much greater
+         number of addressable nodes, and simpler auto-configuration of
+         addresses.  The scalability of multicast routing is improved by
+         adding a "scope" field to multicast addresses.  And a new type
+         of address called an "anycast address" is defined, used to send
+         a packet to any one of a group of nodes.
+
+      o  Header Format Simplification
+
+         Some IPv4 header fields have been dropped or made optional, to
+         reduce the common-case processing cost of packet handling and
+         to limit the bandwidth cost of the IPv6 header.
+
+      o  Improved Support for Extensions and Options
+
+         Changes in the way IP header options are encoded allows for
+         more efficient forwarding, less stringent limits on the length
+         of options, and greater flexibility for introducing new options
+         in the future.
+
+      o  Flow Labeling Capability
+
+         A new capability is added to enable the labeling of packets
+         belonging to particular traffic "flows" for which the sender
+         requests special handling, such as non-default quality of
+         service or "real-time" service.
+
+
+
+
+
+
+Deering & Hinden            Standards Track                     [Page 2]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+      o  Authentication and Privacy Capabilities
+
+         Extensions to support authentication, data integrity, and
+         (optional) data confidentiality are specified for IPv6.
+
+   This document specifies the basic IPv6 header and the initially-
+   defined IPv6 extension headers and options.  It also discusses packet
+   size issues, the semantics of flow labels and traffic classes, and
+   the effects of IPv6 on upper-layer protocols.  The format and
+   semantics of IPv6 addresses are specified separately in [ADDRARCH].
+   The IPv6 version of ICMP, which all IPv6 implementations are required
+   to include, is specified in [ICMPv6].
+
+2.  Terminology
+
+   node        - a device that implements IPv6.
+
+   router      - a node that forwards IPv6 packets not explicitly
+                 addressed to itself.  [See Note below].
+
+   host        - any node that is not a router.  [See Note below].
+
+   upper layer - a protocol layer immediately above IPv6.  Examples are
+                 transport protocols such as TCP and UDP, control
+                 protocols such as ICMP, routing protocols such as OSPF,
+                 and internet or lower-layer protocols being "tunneled"
+                 over (i.e., encapsulated in) IPv6 such as IPX,
+                 AppleTalk, or IPv6 itself.
+
+   link        - a communication facility or medium over which nodes can
+                 communicate at the link layer, i.e., the layer
+                 immediately below IPv6.  Examples are Ethernets (simple
+                 or bridged); PPP links; X.25, Frame Relay, or ATM
+                 networks; and internet (or higher) layer "tunnels",
+                 such as tunnels over IPv4 or IPv6 itself.
+
+   neighbors   - nodes attached to the same link.
+
+   interface   - a node's attachment to a link.
+
+   address     - an IPv6-layer identifier for an interface or a set of
+                 interfaces.
+
+   packet      - an IPv6 header plus payload.
+
+   link MTU    - the maximum transmission unit, i.e., maximum packet
+                 size in octets, that can be conveyed over a link.
+
+
+
+
+Deering & Hinden            Standards Track                     [Page 3]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+   path MTU    - the minimum link MTU of all the links in a path between
+                 a source node and a destination node.
+
+   Note: it is possible, though unusual, for a device with multiple
+   interfaces to be configured to forward non-self-destined packets
+   arriving from some set (fewer than all) of its interfaces, and to
+   discard non-self-destined packets arriving from its other interfaces.
+   Such a device must obey the protocol requirements for routers when
+   receiving packets from, and interacting with neighbors over, the
+   former (forwarding) interfaces.  It must obey the protocol
+   requirements for hosts when receiving packets from, and interacting
+   with neighbors over, the latter (non-forwarding) interfaces.
+
+3.  IPv6 Header Format
+
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |Version| Traffic Class |           Flow Label                  |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |         Payload Length        |  Next Header  |   Hop Limit   |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                                                               |
+   +                                                               +
+   |                                                               |
+   +                         Source Address                        +
+   |                                                               |
+   +                                                               +
+   |                                                               |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                                                               |
+   +                                                               +
+   |                                                               |
+   +                      Destination Address                      +
+   |                                                               |
+   +                                                               +
+   |                                                               |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   Version              4-bit Internet Protocol version number = 6.
+
+   Traffic Class        8-bit traffic class field.  See section 7.
+
+   Flow Label           20-bit flow label.  See section 6.
+
+   Payload Length       16-bit unsigned integer.  Length of the IPv6
+                        payload, i.e., the rest of the packet following
+                        this IPv6 header, in octets.  (Note that any
+
+
+
+
+
+Deering & Hinden            Standards Track                     [Page 4]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+                        extension headers [section 4] present are
+                        considered part of the payload, i.e., included
+                        in the length count.)
+
+   Next Header          8-bit selector.  Identifies the type of header
+                        immediately following the IPv6 header.  Uses the
+                        same values as the IPv4 Protocol field [RFC-1700
+                        et seq.].
+
+   Hop Limit            8-bit unsigned integer.  Decremented by 1 by
+                        each node that forwards the packet. The packet
+                        is discarded if Hop Limit is decremented to
+                        zero.
+
+   Source Address       128-bit address of the originator of the packet.
+                        See [ADDRARCH].
+
+   Destination Address  128-bit address of the intended recipient of the
+                        packet (possibly not the ultimate recipient, if
+                        a Routing header is present).  See [ADDRARCH]
+                        and section 4.4.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Deering & Hinden            Standards Track                     [Page 5]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+4.  IPv6 Extension Headers
+
+   In IPv6, optional internet-layer information is encoded in separate
+   headers that may be placed between the IPv6 header and the upper-
+   layer header in a packet.  There are a small number of such extension
+   headers, each identified by a distinct Next Header value.  As
+   illustrated in these examples, an IPv6 packet may carry zero, one, or
+   more extension headers, each identified by the Next Header field of
+   the preceding header:
+
+   +---------------+------------------------
+   |  IPv6 header  | TCP header + data
+   |               |
+   | Next Header = |
+   |      TCP      |
+   +---------------+------------------------
+
+
+   +---------------+----------------+------------------------
+   |  IPv6 header  | Routing header | TCP header + data
+   |               |                |
+   | Next Header = |  Next Header = |
+   |    Routing    |      TCP       |
+   +---------------+----------------+------------------------
+
+
+   +---------------+----------------+-----------------+-----------------
+   |  IPv6 header  | Routing header | Fragment header | fragment of TCP
+   |               |                |                 |  header + data
+   | Next Header = |  Next Header = |  Next Header =  |
+   |    Routing    |    Fragment    |       TCP       |
+   +---------------+----------------+-----------------+-----------------
+
+   With one exception, extension headers are not examined or processed
+   by any node along a packet's delivery path, until the packet reaches
+   the node (or each of the set of nodes, in the case of multicast)
+   identified in the Destination Address field of the IPv6 header.
+   There, normal demultiplexing on the Next Header field of the IPv6
+   header invokes the module to process the first extension header, or
+   the upper-layer header if no extension header is present.  The
+   contents and semantics of each extension header determine whether or
+   not to proceed to the next header.  Therefore, extension headers must
+   be processed strictly in the order they appear in the packet; a
+   receiver must not, for example, scan through a packet looking for a
+   particular kind of extension header and process that header prior to
+   processing all preceding ones.
+
+
+
+
+
+Deering & Hinden            Standards Track                     [Page 6]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+   The exception referred to in the preceding paragraph is the Hop-by-
+   Hop Options header, which carries information that must be examined
+   and processed by every node along a packet's delivery path, including
+   the source and destination nodes.  The Hop-by-Hop Options header,
+   when present, must immediately follow the IPv6 header.  Its presence
+   is indicated by the value zero in the Next Header field of the IPv6
+   header.
+
+   If, as a result of processing a header, a node is required to proceed
+   to the next header but the Next Header value in the current header is
+   unrecognized by the node, it should discard the packet and send an
+   ICMP Parameter Problem message to the source of the packet, with an
+   ICMP Code value of 1 ("unrecognized Next Header type encountered")
+   and the ICMP Pointer field containing the offset of the unrecognized
+   value within the original packet.  The same action should be taken if
+   a node encounters a Next Header value of zero in any header other
+   than an IPv6 header.
+
+   Each extension header is an integer multiple of 8 octets long, in
+   order to retain 8-octet alignment for subsequent headers.  Multi-
+   octet fields within each extension header are aligned on their
+   natural boundaries, i.e., fields of width n octets are placed at an
+   integer multiple of n octets from the start of the header, for n = 1,
+   2, 4, or 8.
+
+   A full implementation of IPv6 includes implementation of the
+   following extension headers:
+
+           Hop-by-Hop Options
+           Routing (Type 0)
+           Fragment
+           Destination Options
+           Authentication
+           Encapsulating Security Payload
+
+   The first four are specified in this document; the last two are
+   specified in [RFC-2402] and [RFC-2406], respectively.
+
+4.1  Extension Header Order
+
+   When more than one extension header is used in the same packet, it is
+   recommended that those headers appear in the following order:
+
+           IPv6 header
+           Hop-by-Hop Options header
+           Destination Options header (note 1)
+           Routing header
+           Fragment header
+
+
+
+Deering & Hinden            Standards Track                     [Page 7]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+           Authentication header (note 2)
+           Encapsulating Security Payload header (note 2)
+           Destination Options header (note 3)
+           upper-layer header
+
+           note 1: for options to be processed by the first destination
+                   that appears in the IPv6 Destination Address field
+                   plus subsequent destinations listed in the Routing
+                   header.
+
+           note 2: additional recommendations regarding the relative
+                   order of the Authentication and Encapsulating
+                   Security Payload headers are given in [RFC-2406].
+
+           note 3: for options to be processed only by the final
+                   destination of the packet.
+
+   Each extension header should occur at most once, except for the
+   Destination Options header which should occur at most twice (once
+   before a Routing header and once before the upper-layer header).
+
+   If the upper-layer header is another IPv6 header (in the case of IPv6
+   being tunneled over or encapsulated in IPv6), it may be followed by
+   its own extension headers, which are separately subject to the same
+   ordering recommendations.
+
+   If and when other extension headers are defined, their ordering
+   constraints relative to the above listed headers must be specified.
+
+   IPv6 nodes must accept and attempt to process extension headers in
+   any order and occurring any number of times in the same packet,
+   except for the Hop-by-Hop Options header which is restricted to
+   appear immediately after an IPv6 header only.  Nonetheless, it is
+   strongly advised that sources of IPv6 packets adhere to the above
+   recommended order until and unless subsequent specifications revise
+   that recommendation.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Deering & Hinden            Standards Track                     [Page 8]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+4.2  Options
+
+   Two of the currently-defined extension headers -- the Hop-by-Hop
+   Options header and the Destination Options header -- carry a variable
+   number of type-length-value (TLV) encoded "options", of the following
+   format:
+
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- - - - - - - - -
+      |  Option Type  |  Opt Data Len |  Option Data
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- - - - - - - - -
+
+      Option Type          8-bit identifier of the type of option.
+
+      Opt Data Len         8-bit unsigned integer.  Length of the Option
+                           Data field of this option, in octets.
+
+      Option Data          Variable-length field.  Option-Type-specific
+                           data.
+
+   The sequence of options within a header must be processed strictly in
+   the order they appear in the header; a receiver must not, for
+   example, scan through the header looking for a particular kind of
+   option and process that option prior to processing all preceding
+   ones.
+
+   The Option Type identifiers are internally encoded such that their
+   highest-order two bits specify the action that must be taken if the
+   processing IPv6 node does not recognize the Option Type:
+
+      00 - skip over this option and continue processing the header.
+
+      01 - discard the packet.
+
+      10 - discard the packet and, regardless of whether or not the
+           packet's Destination Address was a multicast address, send an
+           ICMP Parameter Problem, Code 2, message to the packet's
+           Source Address, pointing to the unrecognized Option Type.
+
+      11 - discard the packet and, only if the packet's Destination
+           Address was not a multicast address, send an ICMP Parameter
+           Problem, Code 2, message to the packet's Source Address,
+           pointing to the unrecognized Option Type.
+
+   The third-highest-order bit of the Option Type specifies whether or
+   not the Option Data of that option can change en-route to the
+   packet's final destination.  When an Authentication header is present
+
+
+
+
+
+Deering & Hinden            Standards Track                     [Page 9]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+   in the packet, for any option whose data may change en-route, its
+   entire Option Data field must be treated as zero-valued octets when
+   computing or verifying the packet's authenticating value.
+
+      0 - Option Data does not change en-route
+
+      1 - Option Data may change en-route
+
+   The three high-order bits described above are to be treated as part
+   of the Option Type, not independent of the Option Type.  That is, a
+   particular option is identified by a full 8-bit Option Type, not just
+   the low-order 5 bits of an Option Type.
+
+   The same Option Type numbering space is used for both the Hop-by-Hop
+   Options header and the Destination Options header.  However, the
+   specification of a particular option may restrict its use to only one
+   of those two headers.
+
+   Individual options may have specific alignment requirements, to
+   ensure that multi-octet values within Option Data fields fall on
+   natural boundaries.  The alignment requirement of an option is
+   specified using the notation xn+y, meaning the Option Type must
+   appear at an integer multiple of x octets from the start of the
+   header, plus y octets.  For example:
+
+      2n    means any 2-octet offset from the start of the header.
+      8n+2  means any 8-octet offset from the start of the header,
+            plus 2 octets.
+
+   There are two padding options which are used when necessary to align
+   subsequent options and to pad out the containing header to a multiple
+   of 8 octets in length.  These padding options must be recognized by
+   all IPv6 implementations:
+
+   Pad1 option  (alignment requirement: none)
+
+      +-+-+-+-+-+-+-+-+
+      |       0       |
+      +-+-+-+-+-+-+-+-+
+
+      NOTE! the format of the Pad1 option is a special case -- it does
+            not have length and value fields.
+
+      The Pad1 option is used to insert one octet of padding into the
+      Options area of a header.  If more than one octet of padding is
+      required, the PadN option, described next, should be used, rather
+      than multiple Pad1 options.
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 10]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+   PadN option  (alignment requirement: none)
+
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- - - - - - - - -
+      |       1       |  Opt Data Len |  Option Data
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- - - - - - - - -
+
+      The PadN option is used to insert two or more octets of padding
+      into the Options area of a header.  For N octets of padding, the
+      Opt Data Len field contains the value N-2, and the Option Data
+      consists of N-2 zero-valued octets.
+
+   Appendix B contains formatting guidelines for designing new options.
+
+4.3  Hop-by-Hop Options Header
+
+   The Hop-by-Hop Options header is used to carry optional information
+   that must be examined by every node along a packet's delivery path.
+   The Hop-by-Hop Options header is identified by a Next Header value of
+   0 in the IPv6 header, and has the following format:
+
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    |  Next Header  |  Hdr Ext Len  |                               |
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+                               +
+    |                                                               |
+    .                                                               .
+    .                            Options                            .
+    .                                                               .
+    |                                                               |
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   Next Header          8-bit selector.  Identifies the type of header
+                        immediately following the Hop-by-Hop Options
+                        header.  Uses the same values as the IPv4
+                        Protocol field [RFC-1700 et seq.].
+
+   Hdr Ext Len          8-bit unsigned integer.  Length of the Hop-by-
+                        Hop Options header in 8-octet units, not
+                        including the first 8 octets.
+
+   Options              Variable-length field, of length such that the
+                        complete Hop-by-Hop Options header is an integer
+                        multiple of 8 octets long.  Contains one or more
+                        TLV-encoded options, as described in section
+                        4.2.
+
+   The only hop-by-hop options defined in this document are the Pad1 and
+   PadN options specified in section 4.2.
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 11]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+4.4  Routing Header
+
+   The Routing header is used by an IPv6 source to list one or more
+   intermediate nodes to be "visited" on the way to a packet's
+   destination.  This function is very similar to IPv4's Loose Source
+   and Record Route option.  The Routing header is identified by a Next
+   Header value of 43 in the immediately preceding header, and has the
+   following format:
+
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    |  Next Header  |  Hdr Ext Len  |  Routing Type | Segments Left |
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    |                                                               |
+    .                                                               .
+    .                       type-specific data                      .
+    .                                                               .
+    |                                                               |
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   Next Header          8-bit selector.  Identifies the type of header
+                        immediately following the Routing header.  Uses
+                        the same values as the IPv4 Protocol field
+                        [RFC-1700 et seq.].
+
+   Hdr Ext Len          8-bit unsigned integer.  Length of the Routing
+                        header in 8-octet units, not including the first
+                        8 octets.
+
+   Routing Type         8-bit identifier of a particular Routing header
+                        variant.
+
+   Segments Left        8-bit unsigned integer.  Number of route
+                        segments remaining, i.e., number of explicitly
+                        listed intermediate nodes still to be visited
+                        before reaching the final destination.
+
+   type-specific data   Variable-length field, of format determined by
+                        the Routing Type, and of length such that the
+                        complete Routing header is an integer multiple
+                        of 8 octets long.
+
+   If, while processing a received packet, a node encounters a Routing
+   header with an unrecognized Routing Type value, the required behavior
+   of the node depends on the value of the Segments Left field, as
+   follows:
+
+
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 12]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+      If Segments Left is zero, the node must ignore the Routing header
+      and proceed to process the next header in the packet, whose type
+      is identified by the Next Header field in the Routing header.
+
+      If Segments Left is non-zero, the node must discard the packet and
+      send an ICMP Parameter Problem, Code 0, message to the packet's
+      Source Address, pointing to the unrecognized Routing Type.
+
+   If, after processing a Routing header of a received packet, an
+   intermediate node determines that the packet is to be forwarded onto
+   a link whose link MTU is less than the size of the packet, the node
+   must discard the packet and send an ICMP Packet Too Big message to
+   the packet's Source Address.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 13]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+   The Type 0 Routing header has the following format:
+
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    |  Next Header  |  Hdr Ext Len  | Routing Type=0| Segments Left |
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    |                            Reserved                           |
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    |                                                               |
+    +                                                               +
+    |                                                               |
+    +                           Address[1]                          +
+    |                                                               |
+    +                                                               +
+    |                                                               |
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    |                                                               |
+    +                                                               +
+    |                                                               |
+    +                           Address[2]                          +
+    |                                                               |
+    +                                                               +
+    |                                                               |
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    .                               .                               .
+    .                               .                               .
+    .                               .                               .
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    |                                                               |
+    +                                                               +
+    |                                                               |
+    +                           Address[n]                          +
+    |                                                               |
+    +                                                               +
+    |                                                               |
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   Next Header          8-bit selector.  Identifies the type of header
+                        immediately following the Routing header.  Uses
+                        the same values as the IPv4 Protocol field
+                        [RFC-1700 et seq.].
+
+   Hdr Ext Len          8-bit unsigned integer.  Length of the Routing
+                        header in 8-octet units, not including the first
+                        8 octets.  For the Type 0 Routing header, Hdr
+                        Ext Len is equal to two times the number of
+                        addresses in the header.
+
+   Routing Type         0.
+
+
+
+Deering & Hinden            Standards Track                    [Page 14]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+   Segments Left        8-bit unsigned integer.  Number of route
+                        segments remaining, i.e., number of explicitly
+                        listed intermediate nodes still to be visited
+                        before reaching the final destination.
+
+   Reserved             32-bit reserved field.  Initialized to zero for
+                        transmission; ignored on reception.
+
+   Address[1..n]        Vector of 128-bit addresses, numbered 1 to n.
+
+   Multicast addresses must not appear in a Routing header of Type 0, or
+   in the IPv6 Destination Address field of a packet carrying a Routing
+   header of Type 0.
+
+   A Routing header is not examined or processed until it reaches the
+   node identified in the Destination Address field of the IPv6 header.
+   In that node, dispatching on the Next Header field of the immediately
+   preceding header causes the Routing header module to be invoked,
+   which, in the case of Routing Type 0, performs the following
+   algorithm:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 15]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+   if Segments Left = 0 {
+      proceed to process the next header in the packet, whose type is
+      identified by the Next Header field in the Routing header
+   }
+   else if Hdr Ext Len is odd {
+         send an ICMP Parameter Problem, Code 0, message to the Source
+         Address, pointing to the Hdr Ext Len field, and discard the
+         packet
+   }
+   else {
+      compute n, the number of addresses in the Routing header, by
+      dividing Hdr Ext Len by 2
+
+      if Segments Left is greater than n {
+         send an ICMP Parameter Problem, Code 0, message to the Source
+         Address, pointing to the Segments Left field, and discard the
+         packet
+      }
+      else {
+         decrement Segments Left by 1;
+         compute i, the index of the next address to be visited in
+         the address vector, by subtracting Segments Left from n
+
+         if Address [i] or the IPv6 Destination Address is multicast {
+            discard the packet
+         }
+         else {
+            swap the IPv6 Destination Address and Address[i]
+
+            if the IPv6 Hop Limit is less than or equal to 1 {
+               send an ICMP Time Exceeded -- Hop Limit Exceeded in
+               Transit message to the Source Address and discard the
+               packet
+            }
+            else {
+               decrement the Hop Limit by 1
+
+               resubmit the packet to the IPv6 module for transmission
+               to the new destination
+            }
+         }
+      }
+   }
+
+
+
+
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 16]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+   As an example of the effects of the above algorithm, consider the
+   case of a source node S sending a packet to destination node D, using
+   a Routing header to cause the packet to be routed via intermediate
+   nodes I1, I2, and I3.  The values of the relevant IPv6 header and
+   Routing header fields on each segment of the delivery path would be
+   as follows:
+
+   As the packet travels from S to I1:
+
+        Source Address = S                  Hdr Ext Len = 6
+        Destination Address = I1            Segments Left = 3
+                                            Address[1] = I2
+                                            Address[2] = I3
+                                            Address[3] = D
+
+   As the packet travels from I1 to I2:
+
+        Source Address = S                  Hdr Ext Len = 6
+        Destination Address = I2            Segments Left = 2
+                                            Address[1] = I1
+                                            Address[2] = I3
+                                            Address[3] = D
+
+   As the packet travels from I2 to I3:
+
+        Source Address = S                  Hdr Ext Len = 6
+        Destination Address = I3            Segments Left = 1
+                                            Address[1] = I1
+                                            Address[2] = I2
+                                            Address[3] = D
+
+   As the packet travels from I3 to D:
+
+        Source Address = S                  Hdr Ext Len = 6
+        Destination Address = D             Segments Left = 0
+                                            Address[1] = I1
+                                            Address[2] = I2
+                                            Address[3] = I3
+
+
+
+
+
+
+
+
+
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 17]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+4.5  Fragment Header
+
+   The Fragment header is used by an IPv6 source to send a packet larger
+   than would fit in the path MTU to its destination.  (Note: unlike
+   IPv4, fragmentation in IPv6 is performed only by source nodes, not by
+   routers along a packet's delivery path -- see section 5.)  The
+   Fragment header is identified by a Next Header value of 44 in the
+   immediately preceding header, and has the following format:
+
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |  Next Header  |   Reserved    |      Fragment Offset    |Res|M|
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                         Identification                        |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   Next Header          8-bit selector.  Identifies the initial header
+                        type of the Fragmentable Part of the original
+                        packet (defined below).  Uses the same values as
+                        the IPv4 Protocol field [RFC-1700 et seq.].
+
+   Reserved             8-bit reserved field.  Initialized to zero for
+                        transmission; ignored on reception.
+
+   Fragment Offset      13-bit unsigned integer.  The offset, in 8-octet
+                        units, of the data following this header,
+                        relative to the start of the Fragmentable Part
+                        of the original packet.
+
+   Res                  2-bit reserved field.  Initialized to zero for
+                        transmission; ignored on reception.
+
+   M flag               1 = more fragments; 0 = last fragment.
+
+   Identification       32 bits.  See description below.
+
+   In order to send a packet that is too large to fit in the MTU of the
+   path to its destination, a source node may divide the packet into
+   fragments and send each fragment as a separate packet, to be
+   reassembled at the receiver.
+
+   For every packet that is to be fragmented, the source node generates
+   an Identification value. The Identification must be different than
+   that of any other fragmented packet sent recently* with the same
+   Source Address and Destination Address.  If a Routing header is
+   present, the Destination Address of concern is that of the final
+   destination.
+
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 18]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+      * "recently" means within the maximum likely lifetime of a packet,
+        including transit time from source to destination and time spent
+        awaiting reassembly with other fragments of the same packet.
+        However, it is not required that a source node know the maximum
+        packet lifetime.  Rather, it is assumed that the requirement can
+        be met by maintaining the Identification value as a simple, 32-
+        bit, "wrap-around" counter, incremented each time a packet must
+        be fragmented.  It is an implementation choice whether to
+        maintain a single counter for the node or multiple counters,
+        e.g., one for each of the node's possible source addresses, or
+        one for each active (source address, destination address)
+        combination.
+
+   The initial, large, unfragmented packet is referred to as the
+   "original packet", and it is considered to consist of two parts, as
+   illustrated:
+
+   original packet:
+
+   +------------------+----------------------//-----------------------+
+   |  Unfragmentable  |                 Fragmentable                  |
+   |       Part       |                     Part                      |
+   +------------------+----------------------//-----------------------+
+
+      The Unfragmentable Part consists of the IPv6 header plus any
+      extension headers that must be processed by nodes en route to the
+      destination, that is, all headers up to and including the Routing
+      header if present, else the Hop-by-Hop Options header if present,
+      else no extension headers.
+
+      The Fragmentable Part consists of the rest of the packet, that is,
+      any extension headers that need be processed only by the final
+      destination node(s), plus the upper-layer header and data.
+
+   The Fragmentable Part of the original packet is divided into
+   fragments, each, except possibly the last ("rightmost") one, being an
+   integer multiple of 8 octets long.  The fragments are transmitted in
+   separate "fragment packets" as illustrated:
+
+   original packet:
+
+   +------------------+--------------+--------------+--//--+----------+
+   |  Unfragmentable  |    first     |    second    |      |   last   |
+   |       Part       |   fragment   |   fragment   | .... | fragment |
+   +------------------+--------------+--------------+--//--+----------+
+
+
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 19]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+   fragment packets:
+
+   +------------------+--------+--------------+
+   |  Unfragmentable  |Fragment|    first     |
+   |       Part       | Header |   fragment   |
+   +------------------+--------+--------------+
+
+   +------------------+--------+--------------+
+   |  Unfragmentable  |Fragment|    second    |
+   |       Part       | Header |   fragment   |
+   +------------------+--------+--------------+
+                         o
+                         o
+                         o
+   +------------------+--------+----------+
+   |  Unfragmentable  |Fragment|   last   |
+   |       Part       | Header | fragment |
+   +------------------+--------+----------+
+
+   Each fragment packet is composed of:
+
+      (1) The Unfragmentable Part of the original packet, with the
+          Payload Length of the original IPv6 header changed to contain
+          the length of this fragment packet only (excluding the length
+          of the IPv6 header itself), and the Next Header field of the
+          last header of the Unfragmentable Part changed to 44.
+
+      (2) A Fragment header containing:
+
+               The Next Header value that identifies the first header of
+               the Fragmentable Part of the original packet.
+
+               A Fragment Offset containing the offset of the fragment,
+               in 8-octet units, relative to the start of the
+               Fragmentable Part of the original packet.  The Fragment
+               Offset of the first ("leftmost") fragment is 0.
+
+               An M flag value of 0 if the fragment is the last
+               ("rightmost") one, else an M flag value of 1.
+
+               The Identification value generated for the original
+               packet.
+
+      (3) The fragment itself.
+
+   The lengths of the fragments must be chosen such that the resulting
+   fragment packets fit within the MTU of the path to the packets'
+   destination(s).
+
+
+
+Deering & Hinden            Standards Track                    [Page 20]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+   At the destination, fragment packets are reassembled into their
+   original, unfragmented form, as illustrated:
+
+   reassembled original packet:
+
+   +------------------+----------------------//------------------------+
+   |  Unfragmentable  |                 Fragmentable                   |
+   |       Part       |                     Part                       |
+   +------------------+----------------------//------------------------+
+
+   The following rules govern reassembly:
+
+      An original packet is reassembled only from fragment packets that
+      have the same Source Address, Destination Address, and Fragment
+      Identification.
+
+      The Unfragmentable Part of the reassembled packet consists of all
+      headers up to, but not including, the Fragment header of the first
+      fragment packet (that is, the packet whose Fragment Offset is
+      zero), with the following two changes:
+
+         The Next Header field of the last header of the Unfragmentable
+         Part is obtained from the Next Header field of the first
+         fragment's Fragment header.
+
+         The Payload Length of the reassembled packet is computed from
+         the length of the Unfragmentable Part and the length and offset
+         of the last fragment.  For example, a formula for computing the
+         Payload Length of the reassembled original packet is:
+
+           PL.orig = PL.first - FL.first - 8 + (8 * FO.last) + FL.last
+
+           where
+           PL.orig  = Payload Length field of reassembled packet.
+           PL.first = Payload Length field of first fragment packet.
+           FL.first = length of fragment following Fragment header of
+                      first fragment packet.
+           FO.last  = Fragment Offset field of Fragment header of
+                      last fragment packet.
+           FL.last  = length of fragment following Fragment header of
+                      last fragment packet.
+
+      The Fragmentable Part of the reassembled packet is constructed
+      from the fragments following the Fragment headers in each of the
+      fragment packets.  The length of each fragment is computed by
+      subtracting from the packet's Payload Length the length of the
+
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 21]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+      headers between the IPv6 header and fragment itself; its relative
+      position in Fragmentable Part is computed from its Fragment Offset
+      value.
+
+      The Fragment header is not present in the final, reassembled
+      packet.
+
+   The following error conditions may arise when reassembling fragmented
+   packets:
+
+      If insufficient fragments are received to complete reassembly of a
+      packet within 60 seconds of the reception of the first-arriving
+      fragment of that packet, reassembly of that packet must be
+      abandoned and all the fragments that have been received for that
+      packet must be discarded.  If the first fragment (i.e., the one
+      with a Fragment Offset of zero) has been received, an ICMP Time
+      Exceeded -- Fragment Reassembly Time Exceeded message should be
+      sent to the source of that fragment.
+
+      If the length of a fragment, as derived from the fragment packet's
+      Payload Length field, is not a multiple of 8 octets and the M flag
+      of that fragment is 1, then that fragment must be discarded and an
+      ICMP Parameter Problem, Code 0, message should be sent to the
+      source of the fragment, pointing to the Payload Length field of
+      the fragment packet.
+
+      If the length and offset of a fragment are such that the Payload
+      Length of the packet reassembled from that fragment would exceed
+      65,535 octets, then that fragment must be discarded and an ICMP
+      Parameter Problem, Code 0, message should be sent to the source of
+      the fragment, pointing to the Fragment Offset field of the
+      fragment packet.
+
+   The following conditions are not expected to occur, but are not
+   considered errors if they do:
+
+      The number and content of the headers preceding the Fragment
+      header of different fragments of the same original packet may
+      differ.  Whatever headers are present, preceding the Fragment
+      header in each fragment packet, are processed when the packets
+      arrive, prior to queueing the fragments for reassembly.  Only
+      those headers in the Offset zero fragment packet are retained in
+      the reassembled packet.
+
+      The Next Header values in the Fragment headers of different
+      fragments of the same original packet may differ.  Only the value
+      from the Offset zero fragment packet is used for reassembly.
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 22]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+4.6  Destination Options Header
+
+   The Destination Options header is used to carry optional information
+   that need be examined only by a packet's destination node(s).  The
+   Destination Options header is identified by a Next Header value of 60
+   in the immediately preceding header, and has the following format:
+
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    |  Next Header  |  Hdr Ext Len  |                               |
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+                               +
+    |                                                               |
+    .                                                               .
+    .                            Options                            .
+    .                                                               .
+    |                                                               |
+    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   Next Header          8-bit selector.  Identifies the type of header
+                        immediately following the Destination Options
+                        header.  Uses the same values as the IPv4
+                        Protocol field [RFC-1700 et seq.].
+
+   Hdr Ext Len          8-bit unsigned integer.  Length of the
+                        Destination Options header in 8-octet units, not
+                        including the first 8 octets.
+
+   Options              Variable-length field, of length such that the
+                        complete Destination Options header is an
+                        integer multiple of 8 octets long.  Contains one
+                        or  more TLV-encoded options, as described in
+                        section 4.2.
+
+   The only destination options defined in this document are the Pad1
+   and PadN options specified in section 4.2.
+
+   Note that there are two possible ways to encode optional destination
+   information in an IPv6 packet: either as an option in the Destination
+   Options header, or as a separate extension header.  The Fragment
+   header and the Authentication header are examples of the latter
+   approach.  Which approach can be used depends on what action is
+   desired of a destination node that does not understand the optional
+   information:
+
+      o  If the desired action is for the destination node to discard
+         the packet and, only if the packet's Destination Address is not
+         a multicast address, send an ICMP Unrecognized Type message to
+         the packet's Source Address, then the information may be
+         encoded either as a separate header or as an option in the
+
+
+
+Deering & Hinden            Standards Track                    [Page 23]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+         Destination Options header whose Option Type has the value 11
+         in its highest-order two bits.  The choice may depend on such
+         factors as which takes fewer octets, or which yields better
+         alignment or more efficient parsing.
+
+      o  If any other action is desired, the information must be encoded
+         as an option in the Destination Options header whose Option
+         Type has the value 00, 01, or 10 in its highest-order two bits,
+         specifying the desired action (see section 4.2).
+
+4.7 No Next Header
+
+   The value 59 in the Next Header field of an IPv6 header or any
+   extension header indicates that there is nothing following that
+   header.  If the Payload Length field of the IPv6 header indicates the
+   presence of octets past the end of a header whose Next Header field
+   contains 59, those octets must be ignored, and passed on unchanged if
+   the packet is forwarded.
+
+5. Packet Size Issues
+
+   IPv6 requires that every link in the internet have an MTU of 1280
+   octets or greater.  On any link that cannot convey a 1280-octet
+   packet in one piece, link-specific fragmentation and reassembly must
+   be provided at a layer below IPv6.
+
+   Links that have a configurable MTU (for example, PPP links [RFC-
+   1661]) must be configured to have an MTU of at least 1280 octets; it
+   is recommended that they be configured with an MTU of 1500 octets or
+   greater, to accommodate possible encapsulations (i.e., tunneling)
+   without incurring IPv6-layer fragmentation.
+
+   From each link to which a node is directly attached, the node must be
+   able to accept packets as large as that link's MTU.
+
+   It is strongly recommended that IPv6 nodes implement Path MTU
+   Discovery [RFC-1981], in order to discover and take advantage of path
+   MTUs greater than 1280 octets.  However, a minimal IPv6
+   implementation (e.g., in a boot ROM) may simply restrict itself to
+   sending packets no larger than 1280 octets, and omit implementation
+   of Path MTU Discovery.
+
+   In order to send a packet larger than a path's MTU, a node may use
+   the IPv6 Fragment header to fragment the packet at the source and
+   have it reassembled at the destination(s).  However, the use of such
+   fragmentation is discouraged in any application that is able to
+   adjust its packets to fit the measured path MTU (i.e., down to 1280
+   octets).
+
+
+
+Deering & Hinden            Standards Track                    [Page 24]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+   A node must be able to accept a fragmented packet that, after
+   reassembly, is as large as 1500 octets.  A node is permitted to
+   accept fragmented packets that reassemble to more than 1500 octets.
+   An upper-layer protocol or application that depends on IPv6
+   fragmentation to send packets larger than the MTU of a path should
+   not send packets larger than 1500 octets unless it has assurance that
+   the destination is capable of reassembling packets of that larger
+   size.
+
+   In response to an IPv6 packet that is sent to an IPv4 destination
+   (i.e., a packet that undergoes translation from IPv6 to IPv4), the
+   originating IPv6 node may receive an ICMP Packet Too Big message
+   reporting a Next-Hop MTU less than 1280.  In that case, the IPv6 node
+   is not required to reduce the size of subsequent packets to less than
+   1280, but must include a Fragment header in those packets so that the
+   IPv6-to-IPv4 translating router can obtain a suitable Identification
+   value to use in resulting IPv4 fragments.  Note that this means the
+   payload may have to be reduced to 1232 octets (1280 minus 40 for the
+   IPv6 header and 8 for the Fragment header), and smaller still if
+   additional extension headers are used.
+
+6.  Flow Labels
+
+   The 20-bit Flow Label field in the IPv6 header may be used by a
+   source to label sequences of packets for which it requests special
+   handling by the IPv6 routers, such as non-default quality of service
+   or "real-time" service.  This aspect of IPv6 is, at the time of
+   writing, still experimental and subject to change as the requirements
+   for flow support in the Internet become clearer.  Hosts or routers
+   that do not support the functions of the Flow Label field are
+   required to set the field to zero when originating a packet, pass the
+   field on unchanged when forwarding a packet, and ignore the field
+   when receiving a packet.
+
+   Appendix A describes the current intended semantics and usage of the
+   Flow Label field.
+
+7.  Traffic Classes
+
+   The 8-bit Traffic Class field in the IPv6 header is available for use
+   by originating nodes and/or forwarding routers to identify and
+   distinguish between different classes or priorities of IPv6 packets.
+   At the point in time at which this specification is being written,
+   there are a number of experiments underway in the use of the IPv4
+   Type of Service and/or Precedence bits to provide various forms of
+   "differentiated service" for IP packets, other than through the use
+   of explicit flow set-up.  The Traffic Class field in the IPv6 header
+   is intended to allow similar functionality to be supported in IPv6.
+
+
+
+Deering & Hinden            Standards Track                    [Page 25]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+   It is hoped that those experiments will eventually lead to agreement
+   on what sorts of traffic classifications are most useful for IP
+   packets.  Detailed definitions of the syntax and semantics of all or
+   some of the IPv6 Traffic Class bits, whether experimental or intended
+   for eventual standardization, are to be provided in separate
+   documents.
+
+   The following general requirements apply to the Traffic Class field:
+
+      o  The service interface to the IPv6 service within a node must
+         provide a means for an upper-layer protocol to supply the value
+         of the Traffic Class bits in packets originated by that upper-
+         layer protocol.  The default value must be zero for all 8 bits.
+
+      o  Nodes that support a specific (experimental or eventual
+         standard) use of some or all of the Traffic Class bits are
+         permitted to change the value of those bits in packets that
+         they originate, forward, or receive, as required for that
+         specific use.  Nodes should ignore and leave unchanged any bits
+         of the Traffic Class field for which they do not support a
+         specific use.
+
+      o  An upper-layer protocol must not assume that the value of the
+         Traffic Class bits in a received packet are the same as the
+         value sent by the packet's source.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 26]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+8. Upper-Layer Protocol Issues
+
+8.1 Upper-Layer Checksums
+
+   Any transport or other upper-layer protocol that includes the
+   addresses from the IP header in its checksum computation must be
+   modified for use over IPv6, to include the 128-bit IPv6 addresses
+   instead of 32-bit IPv4 addresses.  In particular, the following
+   illustration shows the TCP and UDP "pseudo-header" for IPv6:
+
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                                                               |
+   +                                                               +
+   |                                                               |
+   +                         Source Address                        +
+   |                                                               |
+   +                                                               +
+   |                                                               |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                                                               |
+   +                                                               +
+   |                                                               |
+   +                      Destination Address                      +
+   |                                                               |
+   +                                                               +
+   |                                                               |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                   Upper-Layer Packet Length                   |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                      zero                     |  Next Header  |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+      o  If the IPv6 packet contains a Routing header, the Destination
+         Address used in the pseudo-header is that of the final
+         destination.  At the originating node, that address will be in
+         the last element of the Routing header; at the recipient(s),
+         that address will be in the Destination Address field of the
+         IPv6 header.
+
+      o  The Next Header value in the pseudo-header identifies the
+         upper-layer protocol (e.g., 6 for TCP, or 17 for UDP).  It will
+         differ from the Next Header value in the IPv6 header if there
+         are extension headers between the IPv6 header and the upper-
+         layer header.
+
+      o  The Upper-Layer Packet Length in the pseudo-header is the
+         length of the upper-layer header and data (e.g., TCP header
+         plus TCP data).  Some upper-layer protocols carry their own
+
+
+
+Deering & Hinden            Standards Track                    [Page 27]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+         length information (e.g., the Length field in the UDP header);
+         for such protocols, that is the length used in the pseudo-
+         header.  Other protocols (such as TCP) do not carry their own
+         length information, in which case the length used in the
+         pseudo-header is the Payload Length from the IPv6 header, minus
+         the length of any extension headers present between the IPv6
+         header and the upper-layer header.
+
+      o  Unlike IPv4, when UDP packets are originated by an IPv6 node,
+         the UDP checksum is not optional.  That is, whenever
+         originating a UDP packet, an IPv6 node must compute a UDP
+         checksum over the packet and the pseudo-header, and, if that
+         computation yields a result of zero, it must be changed to hex
+         FFFF for placement in the UDP header.  IPv6 receivers must
+         discard UDP packets containing a zero checksum, and should log
+         the error.
+
+   The IPv6 version of ICMP [ICMPv6] includes the above pseudo-header in
+   its checksum computation; this is a change from the IPv4 version of
+   ICMP, which does not include a pseudo-header in its checksum.  The
+   reason for the change is to protect ICMP from misdelivery or
+   corruption of those fields of the IPv6 header on which it depends,
+   which, unlike IPv4, are not covered by an internet-layer checksum.
+   The Next Header field in the pseudo-header for ICMP contains the
+   value 58, which identifies the IPv6 version of ICMP.
+
+8.2 Maximum Packet Lifetime
+
+   Unlike IPv4, IPv6 nodes are not required to enforce maximum packet
+   lifetime.  That is the reason the IPv4 "Time to Live" field was
+   renamed "Hop Limit" in IPv6.  In practice, very few, if any, IPv4
+   implementations conform to the requirement that they limit packet
+   lifetime, so this is not a change in practice.  Any upper-layer
+   protocol that relies on the internet layer (whether IPv4 or IPv6) to
+   limit packet lifetime ought to be upgraded to provide its own
+   mechanisms for detecting and discarding obsolete packets.
+
+8.3 Maximum Upper-Layer Payload Size
+
+   When computing the maximum payload size available for upper-layer
+   data, an upper-layer protocol must take into account the larger size
+   of the IPv6 header relative to the IPv4 header.  For example, in
+   IPv4, TCP's MSS option is computed as the maximum packet size (a
+   default value or a value learned through Path MTU Discovery) minus 40
+   octets (20 octets for the minimum-length IPv4 header and 20 octets
+   for the minimum-length TCP header).  When using TCP over IPv6, the
+   MSS must be computed as the maximum packet size minus 60 octets,
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 28]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+   because the minimum-length IPv6 header (i.e., an IPv6 header with no
+   extension headers) is 20 octets longer than a minimum-length IPv4
+   header.
+
+8.4 Responding to Packets Carrying Routing Headers
+
+   When an upper-layer protocol sends one or more packets in response to
+   a received packet that included a Routing header, the response
+   packet(s) must not include a Routing header that was automatically
+   derived by "reversing" the received Routing header UNLESS the
+   integrity and authenticity of the received Source Address and Routing
+   header have been verified (e.g., via the use of an Authentication
+   header in the received packet).  In other words, only the following
+   kinds of packets are permitted in response to a received packet
+   bearing a Routing header:
+
+      o  Response packets that do not carry Routing headers.
+
+      o  Response packets that carry Routing headers that were NOT
+         derived by reversing the Routing header of the received packet
+         (for example, a Routing header supplied by local
+         configuration).
+
+      o  Response packets that carry Routing headers that were derived
+         by reversing the Routing header of the received packet IF AND
+         ONLY IF the integrity and authenticity of the Source Address
+         and Routing header from the received packet have been verified
+         by the responder.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 29]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+Appendix A. Semantics and Usage of the Flow Label Field
+
+   A flow is a sequence of packets sent from a particular source to a
+   particular (unicast or multicast) destination for which the source
+   desires special handling by the intervening routers.  The nature of
+   that special handling might be conveyed to the routers by a control
+   protocol, such as a resource reservation protocol, or by information
+   within the flow's packets themselves, e.g., in a hop-by-hop option.
+   The details of such control protocols or options are beyond the scope
+   of this document.
+
+   There may be multiple active flows from a source to a destination, as
+   well as traffic that is not associated with any flow.  A flow is
+   uniquely identified by the combination of a source address and a
+   non-zero flow label.  Packets that do not belong to a flow carry a
+   flow label of zero.
+
+   A flow label is assigned to a flow by the flow's source node.  New
+   flow labels must be chosen (pseudo-)randomly and uniformly from the
+   range 1 to FFFFF hex.  The purpose of the random allocation is to
+   make any set of bits within the Flow Label field suitable for use as
+   a hash key by routers, for looking up the state associated with the
+   flow.
+
+   All packets belonging to the same flow must be sent with the same
+   source address, destination address, and flow label.  If any of those
+   packets includes a Hop-by-Hop Options header, then they all must be
+   originated with the same Hop-by-Hop Options header contents
+   (excluding the Next Header field of the Hop-by-Hop Options header).
+   If any of those packets includes a Routing header, then they all must
+   be originated with the same contents in all extension headers up to
+   and including the Routing header (excluding the Next Header field in
+   the Routing header).  The routers or destinations are permitted, but
+   not required, to verify that these conditions are satisfied.  If a
+   violation is detected, it should be reported to the source by an ICMP
+   Parameter Problem message, Code 0, pointing to the high-order octet
+   of the Flow Label field (i.e., offset 1 within the IPv6 packet).
+
+   The maximum lifetime of any flow-handling state established along a
+   flow's path must be specified as part of the description of the
+   state-establishment mechanism, e.g., the resource reservation
+   protocol or the flow-setup hop-by-hop option.  A source must not re-
+   use a flow label for a new flow within the maximum lifetime of any
+   flow-handling state that might have been established for the prior
+   use of that flow label.
+
+
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 30]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+   When a node stops and restarts (e.g., as a result of a "crash"), it
+   must be careful not to use a flow label that it might have used for
+   an earlier flow whose lifetime may not have expired yet.  This may be
+   accomplished by recording flow label usage on stable storage so that
+   it can be remembered across crashes, or by refraining from using any
+   flow labels until the maximum lifetime of any possible previously
+   established flows has expired.  If the minimum time for rebooting the
+   node is known, that time can be deducted from the necessary waiting
+   period before starting to allocate flow labels.
+
+   There is no requirement that all, or even most, packets belong to
+   flows, i.e., carry non-zero flow labels.  This observation is placed
+   here to remind protocol designers and implementors not to assume
+   otherwise.  For example, it would be unwise to design a router whose
+   performance would be adequate only if most packets belonged to flows,
+   or to design a header compression scheme that only worked on packets
+   that belonged to flows.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 31]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+Appendix B. Formatting Guidelines for Options
+
+   This appendix gives some advice on how to lay out the fields when
+   designing new options to be used in the Hop-by-Hop Options header or
+   the Destination Options header, as described in section 4.2.  These
+   guidelines are based on the following assumptions:
+
+      o  One desirable feature is that any multi-octet fields within the
+         Option Data area of an option be aligned on their natural
+         boundaries, i.e., fields of width n octets should be placed at
+         an integer multiple of n octets from the start of the Hop-by-
+         Hop or Destination Options header, for n = 1, 2, 4, or 8.
+
+      o  Another desirable feature is that the Hop-by-Hop or Destination
+         Options header take up as little space as possible, subject to
+         the requirement that the header be an integer multiple of 8
+         octets long.
+
+      o  It may be assumed that, when either of the option-bearing
+         headers are present, they carry a very small number of options,
+         usually only one.
+
+   These assumptions suggest the following approach to laying out the
+   fields of an option: order the fields from smallest to largest, with
+   no interior padding, then derive the alignment requirement for the
+   entire option based on the alignment requirement of the largest field
+   (up to a maximum alignment of 8 octets).  This approach is
+   illustrated in the following examples:
+
+   Example 1
+
+   If an option X required two data fields, one of length 8 octets and
+   one of length 4 octets, it would be laid out as follows:
+
+
+                                   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+                                   | Option Type=X |Opt Data Len=12|
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                         4-octet field                         |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                                                               |
+   +                         8-octet field                         +
+   |                                                               |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 32]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+   Its alignment requirement is 8n+2, to ensure that the 8-octet field
+   starts at a multiple-of-8 offset from the start of the enclosing
+   header.  A complete Hop-by-Hop or Destination Options header
+   containing this one option would look as follows:
+
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |  Next Header  | Hdr Ext Len=1 | Option Type=X |Opt Data Len=12|
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                         4-octet field                         |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                                                               |
+   +                         8-octet field                         +
+   |                                                               |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   Example 2
+
+   If an option Y required three data fields, one of length 4 octets,
+   one of length 2 octets, and one of length 1 octet, it would be laid
+   out as follows:
+
+                                                   +-+-+-+-+-+-+-+-+
+                                                   | Option Type=Y |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |Opt Data Len=7 | 1-octet field |         2-octet field         |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                         4-octet field                         |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   Its alignment requirement is 4n+3, to ensure that the 4-octet field
+   starts at a multiple-of-4 offset from the start of the enclosing
+   header.  A complete Hop-by-Hop or Destination Options header
+   containing this one option would look as follows:
+
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |  Next Header  | Hdr Ext Len=1 | Pad1 Option=0 | Option Type=Y |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |Opt Data Len=7 | 1-octet field |         2-octet field         |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                         4-octet field                         |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   | PadN Option=1 |Opt Data Len=2 |       0       |       0       |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 33]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+   Example 3
+
+   A Hop-by-Hop or Destination Options header containing both options X
+   and Y from Examples 1 and 2 would have one of the two following
+   formats, depending on which option appeared first:
+
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |  Next Header  | Hdr Ext Len=3 | Option Type=X |Opt Data Len=12|
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                         4-octet field                         |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                                                               |
+   +                         8-octet field                         +
+   |                                                               |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   | PadN Option=1 |Opt Data Len=1 |       0       | Option Type=Y |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |Opt Data Len=7 | 1-octet field |         2-octet field         |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                         4-octet field                         |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   | PadN Option=1 |Opt Data Len=2 |       0       |       0       |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |  Next Header  | Hdr Ext Len=3 | Pad1 Option=0 | Option Type=Y |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |Opt Data Len=7 | 1-octet field |         2-octet field         |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                         4-octet field                         |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   | PadN Option=1 |Opt Data Len=4 |       0       |       0       |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |       0       |       0       | Option Type=X |Opt Data Len=12|
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                         4-octet field                         |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   |                                                               |
+   +                         8-octet field                         +
+   |                                                               |
+   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+
+
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 34]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+Security Considerations
+
+   The security features of IPv6 are described in the Security
+   Architecture for the Internet Protocol [RFC-2401].
+
+Acknowledgments
+
+   The authors gratefully acknowledge the many helpful suggestions of
+   the members of the IPng working group, the End-to-End Protocols
+   research group, and the Internet Community At Large.
+
+Authors' Addresses
+
+   Stephen E. Deering
+   Cisco Systems, Inc.
+   170 West Tasman Drive
+   San Jose, CA 95134-1706
+   USA
+
+   Phone: +1 408 527 8213
+   Fax:   +1 408 527 8254
+   EMail: deering@cisco.com
+
+
+   Robert M. Hinden
+   Nokia
+   232 Java Drive
+   Sunnyvale, CA 94089
+   USA
+
+   Phone: +1 408 990-2004
+   Fax:   +1 408 743-5677
+   EMail: hinden@iprg.nokia.com
+
+References
+
+   [RFC-2401]   Kent, S. and R. Atkinson, "Security Architecture for the
+                Internet Protocol", RFC 2401, November 1998.
+
+   [RFC-2402]   Kent, S. and R. Atkinson, "IP Authentication Header",
+                RFC 2402, November 1998.
+
+   [RFC-2406]   Kent, S. and R. Atkinson, "IP Encapsulating Security
+                Protocol (ESP)", RFC 2406, November 1998.
+
+   [ICMPv6]     Conta, A. and S. Deering, "ICMP for the Internet
+                Protocol Version 6 (IPv6)", RFC 2463, December 1998.
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 35]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+   [ADDRARCH]   Hinden, R. and S. Deering, "IP Version 6 Addressing
+                Architecture", RFC 2373, July 1998.
+
+   [RFC-1981]   McCann, J., Mogul, J. and S. Deering, "Path MTU
+                Discovery for IP version 6", RFC 1981, August 1996.
+
+   [RFC-791]    Postel, J., "Internet Protocol", STD 5, RFC 791,
+                September 1981.
+
+   [RFC-1700]   Reynolds, J. and J. Postel, "Assigned Numbers", STD 2,
+                RFC 1700, October 1994.  See also:
+                http://www.iana.org/numbers.html
+
+   [RFC-1661]   Simpson, W., "The Point-to-Point Protocol (PPP)", STD
+                51, RFC 1661, July 1994.
+
+CHANGES SINCE RFC-1883
+
+   This memo has the following changes from RFC-1883.  Numbers identify
+   the Internet-Draft version in which the change was made.
+
+    02) Removed all references to jumbograms and the Jumbo Payload
+        option (moved to a separate document).
+
+    02) Moved most of Flow Label description from section 6 to (new)
+        Appendix A.
+
+    02) In Flow Label description, now in Appendix A, corrected maximum
+        Flow Label value from FFFFFF to FFFFF (i.e., one less "F") due
+        to reduction of size of Flow Label field from 24 bits to 20
+        bits.
+
+    02) Renumbered (relettered?) the previous Appendix A to be Appendix
+        B.
+
+    02) Changed the wording of the Security Considerations section to
+        avoid dependency loop between this spec and the IPsec specs.
+
+    02) Updated R. Hinden's email address and company affiliation.
+
+
+        --------------------------------------------------------
+
+    01) In section 3, changed field name "Class" to "Traffic Class" and
+        increased its size from 4 to 8 bits.  Decreased size of Flow
+        Label field from 24 to 20 bits to compensate for increase in
+        Traffic Class field.
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 36]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+    01) In section 4.1, restored the order of the Authentication Header
+        and the ESP header, which were mistakenly swapped in the 00
+        version of this memo.
+
+    01) In section 4.4, deleted the Strict/Loose Bit Map field and the
+        strict routing functionality from the Type 0 Routing header, and
+        removed the restriction on number of addresses that may be
+        carried in the Type 0 Routing header (was limited to 23
+        addresses, because of the size of the strict/loose bit map).
+
+    01) In section 5, changed the minimum IPv6 MTU from 576 to 1280
+        octets, and added a recommendation that links with configurable
+        MTU (e.g., PPP links) be configured to have an MTU of at least
+        1500 octets.
+
+    01) In section 5, deleted the requirement that a node must not send
+        fragmented packets that reassemble to more than 1500 octets
+        without knowledge of the destination reassembly buffer size, and
+        replaced it with a recommendation that upper-layer protocols or
+        applications should not do that.
+
+    01) Replaced reference to the IPv4 Path MTU Discovery spec (RFC-
+        1191) with reference to the IPv6 Path MTU Discovery spec (RFC-
+        1981), and deleted the Notes at the end of section 5 regarding
+        Path MTU Discovery, since those details are now covered by RFC-
+        1981.
+
+    01) In section 6, deleted specification of "opportunistic" flow
+        set-up, and removed all references to the 6-second maximum
+        lifetime for opportunistically established flow state.
+
+    01) In section 7, deleted the provisional description of the
+        internal structure and semantics of the Traffic Class field, and
+        specified that such descriptions be provided in separate
+        documents.
+
+        --------------------------------------------------------
+
+    00) In section 4, corrected the Code value to indicate "unrecognized
+        Next Header type encountered" in an ICMP Parameter Problem
+        message (changed from 2 to 1).
+
+    00) In the description of the Payload Length field in section 3, and
+        of the Jumbo Payload Length field in section 4.3, made it
+        clearer that extension headers are included in the payload
+        length count.
+
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 37]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+    00) In section 4.1, swapped the order of the Authentication header
+        and the ESP header.  (NOTE: this was a mistake, and the change
+        was undone in version 01.)
+
+    00) In section 4.2, made it clearer that options are identified by
+        the full 8-bit Option Type, not by the low-order 5 bits of an
+        Option Type.  Also specified that the same Option Type numbering
+        space is used for both Hop-by-Hop Options and Destination
+        Options headers.
+
+    00) In section 4.4, added a sentence requiring that nodes processing
+        a Routing header must send an ICMP Packet Too Big message in
+        response to a packet that is too big to fit in the next hop link
+        (rather than, say, performing fragmentation).
+
+    00) Changed the name of the IPv6 Priority field to "Class", and
+        replaced the previous description of Priority in section 7 with
+        a description of the Class field.  Also, excluded this field
+        from the set of fields that must remain the same for all packets
+        in the same flow, as specified in section 6.
+
+    00) In the pseudo-header in section 8.1, changed the name of the
+        "Payload Length" field to "Upper-Layer Packet Length".  Also
+        clarified that, in the case of protocols that carry their own
+        length info (like non-jumbogram UDP), it is the upper-layer-
+        derived length, not the IP-layer-derived length, that is used in
+        the pseudo-header.
+
+    00) Added section 8.4, specifying that upper-layer protocols, when
+        responding to a received packet that carried a Routing header,
+        must not include the reverse of the Routing header in the
+        response packet(s) unless the received Routing header was
+        authenticated.
+
+    00) Fixed some typos and grammatical errors.
+
+    00) Authors' contact info updated.
+
+        --------------------------------------------------------
+
+
+
+
+
+
+
+
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 38]
+
+RFC 2460                   IPv6 Specification              December 1998
+
+
+Full Copyright Statement
+
+   Copyright (C) The Internet Society (1998).  All Rights Reserved.
+
+   This document and translations of it may be copied and furnished to
+   others, and derivative works that comment on or otherwise explain it
+   or assist in its implementation may be prepared, copied, published
+   and distributed, in whole or in part, without restriction of any
+   kind, provided that the above copyright notice and this paragraph are
+   included on all such copies and derivative works.  However, this
+   document itself may not be modified in any way, such as by removing
+   the copyright notice or references to the Internet Society or other
+   Internet organizations, except as needed for the purpose of
+   developing Internet standards in which case the procedures for
+   copyrights defined in the Internet Standards process must be
+   followed, or as required to translate it into languages other than
+   English.
+
+   The limited permissions granted above are perpetual and will not be
+   revoked by the Internet Society or its successors or assigns.
+
+   This document and the information contained herein is provided on an
+   "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
+   TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
+   BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
+   HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
+   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Deering & Hinden            Standards Track                    [Page 39]
+
diff --git a/ext/picotcp/RFC/rfc2525.txt b/ext/picotcp/RFC/rfc2525.txt
new file mode 100644
index 0000000..d5f30e8
--- /dev/null
+++ b/ext/picotcp/RFC/rfc2525.txt
@@ -0,0 +1,3419 @@
+
+
+
+
+
+
+Network Working Group                                          V. Paxson
+Request for Comments: 2525                                        Editor
+Category: Informational                                     ACIRI / ICSI
+                                                               M. Allman
+                            NASA Glenn Research Center/Sterling Software
+                                                               S. Dawson
+                                          Real-Time Computing Laboratory
+                                                               W. Fenner
+                                                              Xerox PARC
+                                                               J. Griner
+                                              NASA Glenn Research Center
+                                                              I. Heavens
+                                                    Spider Software Ltd.
+                                                                K. Lahey
+                                           NASA Ames Research Center/MRJ
+                                                                J. Semke
+                                        Pittsburgh Supercomputing Center
+                                                                 B. Volz
+                                            Process Software Corporation
+                                                              March 1999
+
+
+                   Known TCP Implementation Problems
+
+Status of this Memo
+
+   This memo provides information for the Internet community.  It does
+   not specify an Internet standard of any kind.  Distribution of this
+   memo is unlimited.
+
+Copyright Notice
+
+   Copyright (C) The Internet Society (1999).  All Rights Reserved.
+
+Table of Contents
+
+   1.  INTRODUCTION....................................................2
+   2.  KNOWN IMPLEMENTATION PROBLEMS...................................3
+     2.1  No initial slow start........................................3
+     2.2  No slow start after retransmission timeout...................6
+     2.3  Uninitialized CWND...........................................9
+     2.4  Inconsistent retransmission.................................11
+     2.5  Failure to retain above-sequence data.......................13
+     2.6  Extra additive constant in congestion avoidance.............17
+     2.7  Initial RTO too low.........................................23
+     2.8  Failure of window deflation after loss recovery.............26
+     2.9  Excessively short keepalive connection timeout..............28
+     2.10 Failure to back off retransmission timeout..................31
+
+
+
+Paxson, et. al.              Informational                      [Page 1]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+     2.11 Insufficient interval between keepalives....................34
+     2.12 Window probe deadlock.......................................36
+     2.13 Stretch ACK violation.......................................40
+     2.14 Retransmission sends multiple packets.......................43
+     2.15 Failure to send FIN notification promptly...................45
+     2.16 Failure to send a RST after Half Duplex Close...............47
+     2.17 Failure to RST on close with data pending...................50
+     2.18 Options missing from TCP MSS calculation....................54
+   3.  SECURITY CONSIDERATIONS........................................56
+   4.  ACKNOWLEDGEMENTS...............................................56
+   5.  REFERENCES.....................................................57
+   6.  AUTHORS' ADDRESSES.............................................58
+   7.  FULL COPYRIGHT STATEMENT.......................................60
+
+1. Introduction
+
+   This memo catalogs a number of known TCP implementation problems.
+   The goal in doing so is to improve conditions in the existing
+   Internet by enhancing the quality of current TCP/IP implementations.
+   It is hoped that both performance and correctness issues can be
+   resolved by making implementors aware of the problems and their
+   solutions.  In the long term, it is hoped that this will provide a
+   reduction in unnecessary traffic on the network, the rate of
+   connection failures due to protocol errors, and load on network
+   servers due to time spent processing both unsuccessful connections
+   and retransmitted data.  This will help to ensure the stability of
+   the global Internet.
+
+   Each problem is defined as follows:
+
+   Name of Problem
+      The name associated with the problem.  In this memo, the name is
+      given as a subsection heading.
+
+   Classification
+      One or more problem categories for which the problem is
+      classified:  "congestion control", "performance", "reliability",
+      "resource management".
+
+   Description
+      A definition of the problem, succinct but including necessary
+      background material.
+
+   Significance
+      A brief summary of the sorts of environments for which the problem
+      is significant.
+
+
+
+
+
+Paxson, et. al.              Informational                      [Page 2]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   Implications
+      Why the problem is viewed as a problem.
+
+   Relevant RFCs
+      The RFCs defining the TCP specification with which the problem
+      conflicts.  These RFCs often qualify behavior using terms such as
+      MUST, SHOULD, MAY, and others written capitalized.  See RFC 2119
+      for the exact interpretation of these terms.
+
+   Trace file demonstrating the problem
+      One or more ASCII trace files demonstrating the problem, if
+      applicable.
+
+   Trace file demonstrating correct behavior
+      One or more examples of how correct behavior appears in a trace,
+      if applicable.
+
+   References
+      References that further discuss the problem.
+
+   How to detect
+      How to test an implementation to see if it exhibits the problem.
+      This discussion may include difficulties and subtleties associated
+      with causing the problem to manifest itself, and with interpreting
+      traces to detect the presence of the problem (if applicable).
+
+   How to fix
+      For known causes of the problem, how to correct the
+      implementation.
+
+2. Known implementation problems
+
+2.1.
+
+   Name of Problem
+      No initial slow start
+
+   Classification
+      Congestion control
+
+   Description
+      When a TCP begins transmitting data, it is required by RFC 1122,
+      4.2.2.15, to engage in a "slow start" by initializing its
+      congestion window, cwnd, to one packet (one segment of the maximum
+      size).  (Note that an experimental change to TCP, documented in
+      [RFC2414], allows an initial value somewhat larger than one
+      packet.)  It subsequently increases cwnd by one packet for each
+      ACK it receives for new data.  The minimum of cwnd and the
+
+
+
+Paxson, et. al.              Informational                      [Page 3]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+      receiver's advertised window bounds the highest sequence number
+      the TCP can transmit.  A TCP that fails to initialize and
+      increment cwnd in this fashion exhibits "No initial slow start".
+
+   Significance
+      In congested environments, detrimental to the performance of other
+      connections, and possibly to the connection itself.
+
+   Implications
+      A TCP failing to slow start when beginning a connection results in
+      traffic bursts that can stress the network, leading to excessive
+      queueing delays and packet loss.
+
+      Implementations exhibiting this problem might do so because they
+      suffer from the general problem of not including the required
+      congestion window.  These implementations will also suffer from
+      "No slow start after retransmission timeout".
+
+      There are different shades of "No initial slow start".  From the
+      perspective of stressing the network, the worst is a connection
+      that simply always sends based on the receiver's advertised
+      window, with no notion of a separate congestion window.  Another
+      form is described in "Uninitialized CWND" below.
+
+   Relevant RFCs
+      RFC 1122 requires use of slow start.  RFC 2001 gives the specifics
+      of slow start.
+
+   Trace file demonstrating it
+      Made using tcpdump [Jacobson89] recording at the connection
+      responder.  No losses reported by the packet filter.
+
+   10:40:42.244503 B > A: S 1168512000:1168512000(0) win 32768
+                           <mss 1460,nop,wscale 0> (DF) [tos 0x8]
+   10:40:42.259908 A > B: S 3688169472:3688169472(0)
+                           ack 1168512001 win 32768 <mss 1460>
+   10:40:42.389992 B > A: . ack 1 win 33580 (DF) [tos 0x8]
+   10:40:42.664975 A > B: P 1:513(512) ack 1 win 32768
+   10:40:42.700185 A > B: . 513:1973(1460) ack 1 win 32768
+   10:40:42.718017 A > B: . 1973:3433(1460) ack 1 win 32768
+   10:40:42.762945 A > B: . 3433:4893(1460) ack 1 win 32768
+   10:40:42.811273 A > B: . 4893:6353(1460) ack 1 win 32768
+   10:40:42.829149 A > B: . 6353:7813(1460) ack 1 win 32768
+   10:40:42.853687 B > A: . ack 1973 win 33580 (DF) [tos 0x8]
+   10:40:42.864031 B > A: . ack 3433 win 33580 (DF) [tos 0x8]
+
+
+
+
+
+
+Paxson, et. al.              Informational                      [Page 4]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+      After the third packet, the connection is established.  A, the
+      connection responder, begins transmitting to B, the connection
+      initiator.  Host A quickly sends 6 packets comprising 7812 bytes,
+      even though the SYN exchange agreed upon an MSS of 1460 bytes
+      (implying an initial congestion window of 1 segment corresponds to
+      1460 bytes), and so A should have sent at most 1460 bytes.
+
+      The ACKs sent by B to A in the last two lines indicate that this
+      trace is not a measurement error (slow start really occurring but
+      the corresponding ACKs having been dropped by the packet filter).
+
+      A second trace confirmed that the problem is repeatable.
+
+   Trace file demonstrating correct behavior
+      Made using tcpdump recording at the connection originator.  No
+      losses reported by the packet filter.
+
+   12:35:31.914050 C > D: S 1448571845:1448571845(0)
+                            win 4380 <mss 1460>
+   12:35:32.068819 D > C: S 1755712000:1755712000(0)
+                            ack 1448571846 win 4096
+   12:35:32.069341 C > D: . ack 1 win 4608
+   12:35:32.075213 C > D: P 1:513(512) ack 1 win 4608
+   12:35:32.286073 D > C: . ack 513 win 4096
+   12:35:32.287032 C > D: . 513:1025(512) ack 1 win 4608
+   12:35:32.287506 C > D: . 1025:1537(512) ack 1 win 4608
+   12:35:32.432712 D > C: . ack 1537 win 4096
+   12:35:32.433690 C > D: . 1537:2049(512) ack 1 win 4608
+   12:35:32.434481 C > D: . 2049:2561(512) ack 1 win 4608
+   12:35:32.435032 C > D: . 2561:3073(512) ack 1 win 4608
+   12:35:32.594526 D > C: . ack 3073 win 4096
+   12:35:32.595465 C > D: . 3073:3585(512) ack 1 win 4608
+   12:35:32.595947 C > D: . 3585:4097(512) ack 1 win 4608
+   12:35:32.596414 C > D: . 4097:4609(512) ack 1 win 4608
+   12:35:32.596888 C > D: . 4609:5121(512) ack 1 win 4608
+   12:35:32.733453 D > C: . ack 4097 win 4096
+
+   References
+      This problem is documented in [Paxson97].
+
+   How to detect
+      For implementations always manifesting this problem, it shows up
+      immediately in a packet trace or a sequence plot, as illustrated
+      above.
+
+
+
+
+
+
+
+Paxson, et. al.              Informational                      [Page 5]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   How to fix
+      If the root problem is that the implementation lacks a notion of a
+      congestion window, then unfortunately this requires significant
+      work to fix.  However, doing so is important, as such
+      implementations also exhibit "No slow start after retransmission
+      timeout".
+
+2.2.
+
+   Name of Problem
+      No slow start after retransmission timeout
+
+   Classification
+      Congestion control
+
+   Description
+      When a TCP experiences a retransmission timeout, it is required by
+      RFC 1122, 4.2.2.15, to engage in "slow start" by initializing its
+      congestion window, cwnd, to one packet (one segment of the maximum
+      size).  It subsequently increases cwnd by one packet for each ACK
+      it receives for new data until it reaches the "congestion
+      avoidance" threshold, ssthresh, at which point the congestion
+      avoidance algorithm for updating the window takes over.  A TCP
+      that fails to enter slow start upon a timeout exhibits "No slow
+      start after retransmission timeout".
+
+   Significance
+      In congested environments, severely detrimental to the performance
+      of other connections, and also the connection itself.
+
+   Implications
+      Entering slow start upon timeout forms one of the cornerstones of
+      Internet congestion stability, as outlined in [Jacobson88].  If
+      TCPs fail to do so, the network becomes at risk of suffering
+      "congestion collapse" [RFC896].
+
+   Relevant RFCs
+      RFC 1122 requires use of slow start after loss.  RFC 2001 gives
+      the specifics of how to implement slow start.  RFC 896 describes
+      congestion collapse.
+
+      The retransmission timeout discussed here should not be confused
+      with the separate "fast recovery" retransmission mechanism
+      discussed in RFC 2001.
+
+   Trace file demonstrating it
+      Made using tcpdump recording at the sending TCP (A).  No losses
+      reported by the packet filter.
+
+
+
+Paxson, et. al.              Informational                      [Page 6]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   10:40:59.090612 B > A: . ack 357125 win 33580 (DF) [tos 0x8]
+   10:40:59.222025 A > B: . 357125:358585(1460) ack 1 win 32768
+   10:40:59.868871 A > B: . 357125:358585(1460) ack 1 win 32768
+   10:41:00.016641 B > A: . ack 364425 win 33580 (DF) [tos 0x8]
+   10:41:00.036709 A > B: . 364425:365885(1460) ack 1 win 32768
+   10:41:00.045231 A > B: . 365885:367345(1460) ack 1 win 32768
+   10:41:00.053785 A > B: . 367345:368805(1460) ack 1 win 32768
+   10:41:00.062426 A > B: . 368805:370265(1460) ack 1 win 32768
+   10:41:00.071074 A > B: . 370265:371725(1460) ack 1 win 32768
+   10:41:00.079794 A > B: . 371725:373185(1460) ack 1 win 32768
+   10:41:00.089304 A > B: . 373185:374645(1460) ack 1 win 32768
+   10:41:00.097738 A > B: . 374645:376105(1460) ack 1 win 32768
+   10:41:00.106409 A > B: . 376105:377565(1460) ack 1 win 32768
+   10:41:00.115024 A > B: . 377565:379025(1460) ack 1 win 32768
+   10:41:00.123576 A > B: . 379025:380485(1460) ack 1 win 32768
+   10:41:00.132016 A > B: . 380485:381945(1460) ack 1 win 32768
+   10:41:00.141635 A > B: . 381945:383405(1460) ack 1 win 32768
+   10:41:00.150094 A > B: . 383405:384865(1460) ack 1 win 32768
+   10:41:00.158552 A > B: . 384865:386325(1460) ack 1 win 32768
+   10:41:00.167053 A > B: . 386325:387785(1460) ack 1 win 32768
+   10:41:00.175518 A > B: . 387785:389245(1460) ack 1 win 32768
+   10:41:00.210835 A > B: . 389245:390705(1460) ack 1 win 32768
+   10:41:00.226108 A > B: . 390705:392165(1460) ack 1 win 32768
+   10:41:00.241524 B > A: . ack 389245 win 8760 (DF) [tos 0x8]
+
+      The first packet indicates the ack point is 357125.  130 msec
+      after receiving the ACK, A transmits the packet after the ACK
+      point, 357125:358585.  640 msec after this transmission, it
+      retransmits 357125:358585, in an apparent retransmission timeout.
+      At this point, A's cwnd should be one MSS, or 1460 bytes, as A
+      enters slow start.  The trace is consistent with this possibility.
+
+      B replies with an ACK of 364425, indicating that A has filled a
+      sequence hole.  At this point, A's cwnd should be 1460*2 = 2920
+      bytes, since in slow start receiving an ACK advances cwnd by MSS.
+      However, A then launches 19 consecutive packets, which is
+      inconsistent with slow start.
+
+      A second trace confirmed that the problem is repeatable.
+
+   Trace file demonstrating correct behavior
+      Made using tcpdump recording at the sending TCP (C).  No losses
+      reported by the packet filter.
+
+   12:35:48.442538 C > D: P 465409:465921(512) ack 1 win 4608
+   12:35:48.544483 D > C: . ack 461825 win 4096
+   12:35:48.703496 D > C: . ack 461825 win 4096
+   12:35:49.044613 C > D: . 461825:462337(512) ack 1 win 4608
+
+
+
+Paxson, et. al.              Informational                      [Page 7]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   12:35:49.192282 D > C: . ack 465921 win 2048
+   12:35:49.192538 D > C: . ack 465921 win 4096
+   12:35:49.193392 C > D: P 465921:466433(512) ack 1 win 4608
+   12:35:49.194726 C > D: P 466433:466945(512) ack 1 win 4608
+   12:35:49.350665 D > C: . ack 466945 win 4096
+   12:35:49.351694 C > D: . 466945:467457(512) ack 1 win 4608
+   12:35:49.352168 C > D: . 467457:467969(512) ack 1 win 4608
+   12:35:49.352643 C > D: . 467969:468481(512) ack 1 win 4608
+   12:35:49.506000 D > C: . ack 467969 win 3584
+
+      After C transmits the first packet shown to D, it takes no action
+      in response to D's ACKs for 461825, because the first packet
+      already reached the advertised window limit of 4096 bytes above
+      461825.  600 msec after transmitting the first packet, C
+      retransmits 461825:462337, presumably due to a timeout.  Its
+      congestion window is now MSS (512 bytes).
+
+      D acks 465921, indicating that C's retransmission filled a
+      sequence hole.  This ACK advances C's cwnd from 512 to 1024.  Very
+      shortly after, D acks 465921 again in order to update the offered
+      window from 2048 to 4096.  This ACK does not advance cwnd since it
+      is not for new data.  Very shortly after, C responds to the newly
+      enlarged window by transmitting two packets.  D acks both,
+      advancing cwnd from 1024 to 1536.  C in turn transmits three
+      packets.
+
+   References
+      This problem is documented in [Paxson97].
+
+   How to detect
+      Packet loss is common enough in the Internet that generally it is
+      not difficult to find an Internet path that will force
+      retransmission due to packet loss.
+
+      If the effective window prior to loss is large enough, however,
+      then the TCP may retransmit using the "fast recovery" mechanism
+      described in RFC 2001.  In a packet trace, the signature of fast
+      recovery is that the packet retransmission occurs in response to
+      the receipt of three duplicate ACKs, and subsequent duplicate ACKs
+      may lead to the transmission of new data, above both the ack point
+      and the highest sequence transmitted so far.  An absence of three
+      duplicate ACKs prior to retransmission suffices to distinguish
+      between timeout and fast recovery retransmissions.  In the face of
+      only observing fast recovery retransmissions, generally it is not
+      difficult to repeat the data transfer until observing a timeout
+      retransmission.
+
+
+
+
+
+Paxson, et. al.              Informational                      [Page 8]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+      Once armed with a trace exhibiting a timeout retransmission,
+      determining whether the TCP follows slow start is done by
+      computing the correct progression of cwnd and comparing it to the
+      amount of data transmitted by the TCP subsequent to the timeout
+      retransmission.
+
+   How to fix
+      If the root problem is that the implementation lacks a notion of a
+      congestion window, then unfortunately this requires significant
+      work to fix.  However, doing so is critical, for reasons outlined
+      above.
+
+2.3.
+
+   Name of Problem
+      Uninitialized CWND
+
+   Classification
+      Congestion control
+
+   Description
+      As described above for "No initial slow start", when a TCP
+      connection begins cwnd is initialized to one segment (or perhaps a
+      few segments, if experimenting with [RFC2414]).  One particular
+      form of "No initial slow start", worth separate mention as the bug
+      is fairly widely deployed, is "Uninitialized CWND".  That is,
+      while the TCP implements the proper slow start mechanism, it fails
+      to initialize cwnd properly, so slow start in fact fails to occur.
+
+      One way the bug can occur is if, during the connection
+      establishment handshake, the SYN ACK packet arrives without an MSS
+      option.  The faulty implementation uses receipt of the MSS option
+      to initialize cwnd to one segment; if the option fails to arrive,
+      then cwnd is instead initialized to a very large value.
+
+   Significance
+      In congested environments, detrimental to the performance of other
+      connections, and likely to the connection itself.  The burst can
+      be so large (see below) that it has deleterious effects even in
+      uncongested environments.
+
+   Implications
+      A TCP exhibiting this behavior is stressing the network with a
+      large burst of packets, which can cause loss in the network.
+
+   Relevant RFCs
+      RFC 1122 requires use of slow start.  RFC 2001 gives the specifics
+      of slow start.
+
+
+
+Paxson, et. al.              Informational                      [Page 9]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   Trace file demonstrating it
+      This trace was made using tcpdump running on host A.  Host A is
+      the sender and host B is the receiver.  The advertised window and
+      timestamp options have been omitted for clarity, except for the
+      first segment sent by host A.  Note that A sends an MSS option in
+      its initial SYN but B does not include one in its reply.
+
+   16:56:02.226937 A > B: S 237585307:237585307(0) win 8192
+         <mss 536,nop,wscale 0,nop,nop,timestamp[|tcp]>
+   16:56:02.557135 B > A: S 1617216000:1617216000(0)
+         ack 237585308 win 16384
+   16:56:02.557788 A > B: . ack 1 win 8192
+   16:56:02.566014 A > B: . 1:537(536) ack 1
+   16:56:02.566557 A > B: . 537:1073(536) ack 1
+   16:56:02.567120 A > B: . 1073:1609(536) ack 1
+   16:56:02.567662 A > B: P 1609:2049(440) ack 1
+   16:56:02.568349 A > B: . 2049:2585(536) ack 1
+   16:56:02.568909 A > B: . 2585:3121(536) ack 1
+
+      [54 additional burst segments deleted for brevity]
+
+   16:56:02.936638 A > B: . 32065:32601(536) ack 1
+   16:56:03.018685 B > A: . ack 1
+
+      After the three-way handshake, host A bursts 61 segments into the
+      network, before duplicate ACKs on the first segment cause a
+      retransmission to occur.  Since host A did not wait for the ACK on
+      the first segment before sending additional segments, it is
+      exhibiting "Uninitialized CWND"
+
+   Trace file demonstrating correct behavior
+
+      See the example for "No initial slow start".
+
+   References
+      This problem is documented in [Paxson97].
+
+   How to detect
+      This problem can be detected by examining a packet trace recorded
+      at either the sender or the receiver.  However, the bug can be
+      difficult to induce because it requires finding a remote TCP peer
+      that does not send an MSS option in its SYN ACK.
+
+   How to fix
+      This problem can be fixed by ensuring that cwnd is initialized
+      upon receipt of a SYN ACK, even if the SYN ACK does not contain an
+      MSS option.
+
+
+
+
+Paxson, et. al.              Informational                     [Page 10]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+2.4.
+
+   Name of Problem
+      Inconsistent retransmission
+
+   Classification
+      Reliability
+
+   Description
+      If, for a given sequence number, a sending TCP retransmits
+      different data than previously sent for that sequence number, then
+      a strong possibility arises that the receiving TCP will
+      reconstruct a different byte stream than that sent by the sending
+      application, depending on which instance of the sequence number it
+      accepts.
+
+      Such a sending TCP exhibits "Inconsistent retransmission".
+
+   Significance
+      Critical for all environments.
+
+   Implications
+      Reliable delivery of data is a fundamental property of TCP.
+
+   Relevant RFCs
+      RFC 793, section 1.5, discusses the central role of reliability in
+      TCP operation.
+
+   Trace file demonstrating it
+      Made using tcpdump recording at the receiving TCP (B).  No losses
+      reported by the packet filter.
+
+   12:35:53.145503 A > B: FP 90048435:90048461(26)
+                             ack 393464682 win 4096
+                                        4500 0042 9644 0000
+                    3006 e4c2 86b1 0401 83f3 010a b2a4 0015
+                    055e 07b3 1773 cb6a 5019 1000 68a9 0000
+   data starts here>504f 5254 2031 3334 2c31 3737*2c34 2c31
+                    2c31 3738 2c31 3635 0d0a
+   12:35:53.146479 B > A: R 393464682:393464682(0) win 8192
+   12:35:53.851714 A > B: FP 90048429:90048463(34)
+                          ack 393464682 win 4096
+                                        4500 004a 965b 0000
+                    3006 e4a3 86b1 0401 83f3 010a b2a4 0015
+                    055e 07ad 1773 cb6a 5019 1000 8bd3 0000
+   data starts here>5041 5356 0d0a 504f 5254 2031 3334 2c31
+                    3737*2c31 3035 2c31 3431 2c34 2c31 3539
+                    0d0a
+
+
+
+Paxson, et. al.              Informational                     [Page 11]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+      The sequence numbers shown in this trace are absolute and not
+      adjusted to reflect the ISN.  The 4-digit hex values show a dump
+      of the packet's IP and TCP headers, as well as payload.  A first
+      sends to B data for 90048435:90048461.  The corresponding data
+      begins with hex words 504f, 5254, etc.
+
+      B responds with a RST.  Since the recording location was local to
+      B, it is unknown whether A received the RST.
+
+      A then sends 90048429:90048463, which includes six sequence
+      positions below the earlier transmission, all 26 positions of the
+      earlier transmission, and two additional sequence positions.
+
+      The retransmission disagrees starting just after sequence
+      90048447, annotated above with a leading '*'.  These two bytes
+      were originally transmitted as hex 2c34 but retransmitted as hex
+      2c31.  Subsequent positions disagree as well.
+
+      This behavior has been observed in other traces involving
+      different hosts.  It is unknown how to repeat it.
+
+      In this instance, no corruption would occur, since B has already
+      indicated it will not accept further packets from A.
+
+      A second example illustrates a slightly different instance of the
+      problem.  The tracing again was made with tcpdump at the receiving
+      TCP (D).
+
+   22:23:58.645829 C > D: P 185:212(27) ack 565 win 4096
+                                        4500 0043 90a3 0000
+                    3306 0734 cbf1 9eef 83f3 010a 0525 0015
+                    a3a2 faba 578c 70a4 5018 1000 9a53 0000
+   data starts here>504f 5254 2032 3033 2c32 3431 2c31 3538
+                    2c32 3339 2c35 2c34 330d 0a
+   22:23:58.646805 D > C: . ack 184 win 8192
+                                        4500 0028 beeb 0000
+                    3e06 ce06 83f3 010a cbf1 9eef 0015 0525
+                    578c 70a4 a3a2 fab9 5010 2000 342f 0000
+   22:31:36.532244 C > D: FP 186:213(27) ack 565 win 4096
+                                        4500 0043 9435 0000
+                    3306 03a2 cbf1 9eef 83f3 010a 0525 0015
+                    a3a2 fabb 578c 70a4 5019 1000 9a51 0000
+   data starts here>504f 5254 2032 3033 2c32 3431 2c31 3538
+                    2c32 3339 2c35 2c34 330d 0a
+
+
+
+
+
+
+
+Paxson, et. al.              Informational                     [Page 12]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+      In this trace, sequence numbers are relative.  C sends 185:212,
+      but D only sends an ACK for 184 (so sequence number 184 is
+      missing).  C then sends 186:213.  The packet payload is identical
+      to the previous payload, but the base sequence number is one
+      higher, resulting in an inconsistent retransmission.
+
+      Neither trace exhibits checksum errors.
+
+   Trace file demonstrating correct behavior
+      (Omitted, as presumably correct behavior is obvious.)
+
+   References
+      None known.
+
+   How to detect
+      This problem unfortunately can be very difficult to detect, since
+      available experience indicates it is quite rare that it is
+      manifested.  No "trigger" has been identified that can be used to
+      reproduce the problem.
+
+   How to fix
+      In the absence of a known "trigger", we cannot always assess how
+      to fix the problem.
+
+      In one implementation (not the one illustrated above), the problem
+      manifested itself when (1) the sender received a zero window and
+      stalled; (2) eventually an ACK arrived that offered a window
+      larger than that in effect at the time of the stall; (3) the
+      sender transmitted out of the buffer of data it held at the time
+      of the stall, but (4) failed to limit this transfer to the buffer
+      length, instead using the newly advertised (and larger) offered
+      window.  Consequently, in addition to the valid buffer contents,
+      it sent whatever garbage values followed the end of the buffer.
+      If it then retransmitted the corresponding sequence numbers, at
+      that point it sent the correct data, resulting in an inconsistent
+      retransmission.  Note that this instance of the problem reflects a
+      more general problem, that of initially transmitting incorrect
+      data.
+
+2.5.
+
+   Name of Problem
+      Failure to retain above-sequence data
+
+   Classification
+      Congestion control, performance
+
+
+
+
+
+Paxson, et. al.              Informational                     [Page 13]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   Description
+      When a TCP receives an "above sequence" segment, meaning one with
+      a sequence number exceeding RCV.NXT but below RCV.NXT+RCV.WND, it
+      SHOULD queue the segment for later delivery (RFC 1122, 4.2.2.20).
+      (See RFC 793 for the definition of RCV.NXT and RCV.WND.)  A TCP
+      that fails to do so is said to exhibit "Failure to retain above-
+      sequence data".
+
+      It may sometimes be appropriate for a TCP to discard above-
+      sequence data to reclaim memory.  If they do so only rarely, then
+      we would not consider them to exhibit this problem.  Instead, the
+      particular concern is with TCPs that always discard above-sequence
+      data.
+
+   Significance
+      In environments prone to packet loss, detrimental to the
+      performance of both other connections and the connection itself.
+
+   Implications
+      In times of congestion, a failure to retain above-sequence data
+      will lead to numerous otherwise-unnecessary retransmissions,
+      aggravating the congestion and potentially reducing performance by
+      a large factor.
+
+   Relevant RFCs
+      RFC 1122 revises RFC 793 by upgrading the latter's MAY to a SHOULD
+      on this issue.
+
+   Trace file demonstrating it
+      Made using tcpdump recording at the receiving TCP.  No losses
+      reported by the packet filter.
+
+      B is the TCP sender, A the receiver.  A exhibits failure to retain
+      above sequence-data:
+
+   10:38:10.164860 B > A: . 221078:221614(536) ack 1 win 33232 [tos 0x8]
+   10:38:10.170809 B > A: . 221614:222150(536) ack 1 win 33232 [tos 0x8]
+   10:38:10.177183 B > A: . 222150:222686(536) ack 1 win 33232 [tos 0x8]
+   10:38:10.225039 A > B: . ack 222686 win 25800
+
+      Here B has sent up to (relative) sequence 222686 in-sequence, and
+      A accordingly acknowledges.
+
+   10:38:10.268131 B > A: . 223222:223758(536) ack 1 win 33232 [tos 0x8]
+   10:38:10.337995 B > A: . 223758:224294(536) ack 1 win 33232 [tos 0x8]
+   10:38:10.344065 B > A: . 224294:224830(536) ack 1 win 33232 [tos 0x8]
+   10:38:10.350169 B > A: . 224830:225366(536) ack 1 win 33232 [tos 0x8]
+   10:38:10.356362 B > A: . 225366:225902(536) ack 1 win 33232 [tos 0x8]
+
+
+
+Paxson, et. al.              Informational                     [Page 14]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   10:38:10.362445 B > A: . 225902:226438(536) ack 1 win 33232 [tos 0x8]
+   10:38:10.368579 B > A: . 226438:226974(536) ack 1 win 33232 [tos 0x8]
+   10:38:10.374732 B > A: . 226974:227510(536) ack 1 win 33232 [tos 0x8]
+   10:38:10.380825 B > A: . 227510:228046(536) ack 1 win 33232 [tos 0x8]
+   10:38:10.387027 B > A: . 228046:228582(536) ack 1 win 33232 [tos 0x8]
+   10:38:10.393053 B > A: . 228582:229118(536) ack 1 win 33232 [tos 0x8]
+   10:38:10.399193 B > A: . 229118:229654(536) ack 1 win 33232 [tos 0x8]
+   10:38:10.405356 B > A: . 229654:230190(536) ack 1 win 33232 [tos 0x8]
+
+      A now receives 13 additional packets from B.  These are above-
+      sequence because 222686:223222 was dropped.  The packets do
+      however fit within the offered window of 25800.  A does not
+      generate any duplicate ACKs for them.
+
+      The trace contributor (V. Paxson) verified that these 13 packets
+      had valid IP and TCP checksums.
+
+   10:38:11.917728 B > A: . 222686:223222(536) ack 1 win 33232 [tos 0x8]
+   10:38:11.930925 A > B: . ack 223222 win 32232
+
+      B times out for 222686:223222 and retransmits it.  Upon receiving
+      it, A only acknowledges 223222.  Had it retained the valid above-
+      sequence packets, it would instead have ack'd 230190.
+
+   10:38:12.048438 B > A: . 223222:223758(536) ack 1 win 33232 [tos 0x8]
+   10:38:12.054397 B > A: . 223758:224294(536) ack 1 win 33232 [tos 0x8]
+   10:38:12.068029 A > B: . ack 224294 win 31696
+
+      B retransmits two more packets, and A only acknowledges them.
+      This pattern continues as B retransmits the entire set of
+      previously-received packets.
+
+      A second trace confirmed that the problem is repeatable.
+
+   Trace file demonstrating correct behavior
+      Made using tcpdump recording at the receiving TCP (C).  No losses
+      reported by the packet filter.
+
+   09:11:25.790417 D > C: . 33793:34305(512) ack 1 win 61440
+   09:11:25.791393 D > C: . 34305:34817(512) ack 1 win 61440
+   09:11:25.792369 D > C: . 34817:35329(512) ack 1 win 61440
+   09:11:25.792369 D > C: . 35329:35841(512) ack 1 win 61440
+   09:11:25.793345 D > C: . 36353:36865(512) ack 1 win 61440
+   09:11:25.794321 C > D: . ack 35841 win 59904
+
+      A sequence hole occurs because 35841:36353 has been dropped.
+
+
+
+
+
+Paxson, et. al.              Informational                     [Page 15]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   09:11:25.794321 D > C: . 36865:37377(512) ack 1 win 61440
+   09:11:25.794321 C > D: . ack 35841 win 59904
+   09:11:25.795297 D > C: . 37377:37889(512) ack 1 win 61440
+   09:11:25.795297 C > D: . ack 35841 win 59904
+   09:11:25.796273 C > D: . ack 35841 win 61440
+   09:11:25.798225 D > C: . 37889:38401(512) ack 1 win 61440
+   09:11:25.799201 C > D: . ack 35841 win 61440
+   09:11:25.807009 D > C: . 38401:38913(512) ack 1 win 61440
+   09:11:25.807009 C > D: . ack 35841 win 61440
+   (many additional lines omitted)
+   09:11:25.884113 D > C: . 52737:53249(512) ack 1 win 61440
+   09:11:25.884113 C > D: . ack 35841 win 61440
+
+      Each additional, above-sequence packet C receives from D elicits a
+      duplicate ACK for 35841.
+
+      09:11:25.887041 D > C: . 35841:36353(512) ack 1 win 61440
+      09:11:25.887041 C > D: . ack 53249 win 44032
+
+      D retransmits 35841:36353 and C acknowledges receipt of data all
+      the way up to 53249.
+
+   References
+      This problem is documented in [Paxson97].
+
+   How to detect
+      Packet loss is common enough in the Internet that generally it is
+      not difficult to find an Internet path that will result in some
+      above-sequence packets arriving.  A TCP that exhibits "Failure to
+      retain ..." may not generate duplicate ACKs for these packets.
+      However, some TCPs that do retain above-sequence data also do not
+      generate duplicate ACKs, so failure to do so does not definitively
+      identify the problem.  Instead, the key observation is whether
+      upon retransmission of the dropped packet, data that was
+      previously above-sequence is acknowledged.
+
+      Two considerations in detecting this problem using a packet trace
+      are that it is easiest to do so with a trace made at the TCP
+      receiver, in order to unambiguously determine which packets
+      arrived successfully, and that such packets may still be correctly
+      discarded if they arrive with checksum errors.  The latter can be
+      tested by capturing the entire packet contents and performing the
+      IP and TCP checksum algorithms to verify their integrity; or by
+      confirming that the packets arrive with the same checksum and
+      contents as that with which they were sent, with a presumption
+      that the sending TCP correctly calculates checksums for the
+      packets it transmits.
+
+
+
+
+Paxson, et. al.              Informational                     [Page 16]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+      It is considerably easier to verify that an implementation does
+      NOT exhibit this problem.  This can be done by recording a trace
+      at the data sender, and observing that sometimes after a
+      retransmission the receiver acknowledges a higher sequence number
+      than just that which was retransmitted.
+
+   How to fix
+      If the root problem is that the implementation lacks buffer, then
+      then unfortunately this requires significant work to fix.
+      However, doing so is important, for reasons outlined above.
+
+2.6.
+
+   Name of Problem
+      Extra additive constant in congestion avoidance
+
+   Classification
+      Congestion control / performance
+
+   Description
+      RFC 1122 section 4.2.2.15 states that TCP MUST implement
+      Jacobson's "congestion avoidance" algorithm [Jacobson88], which
+      calls for increasing the congestion window, cwnd, by:
+
+           MSS * MSS / cwnd
+
+      for each ACK received for new data [RFC2001].  This has the effect
+      of increasing cwnd by approximately one segment in each round trip
+      time.
+
+      Some TCP implementations add an additional fraction of a segment
+      (typically MSS/8) to cwnd for each ACK received for new data
+      [Stevens94, Wright95]:
+
+           (MSS * MSS / cwnd) + MSS/8
+
+      These implementations exhibit "Extra additive constant in
+      congestion avoidance".
+
+   Significance
+      May be detrimental to performance even in completely uncongested
+      environments (see Implications).
+
+      In congested environments, may also be detrimental to the
+      performance of other connections.
+
+
+
+
+
+
+Paxson, et. al.              Informational                     [Page 17]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   Implications
+      The extra additive term allows a TCP to more aggressively open its
+      congestion window (quadratic rather than linear increase).  For
+      congested networks, this can increase the loss rate experienced by
+      all connections sharing a bottleneck with the aggressive TCP.
+
+      However, even for completely uncongested networks, the extra
+      additive term can lead to diminished performance, as follows.  In
+      congestion avoidance, a TCP sender probes the network path to
+      determine its available capacity, which often equates to the
+      number of buffers available at a bottleneck link.  With linear
+      congestion avoidance, the TCP only probes for sufficient capacity
+      (buffer) to hold one extra packet per RTT.
+
+      Thus, when it exceeds the available capacity, generally only one
+      packet will be lost (since on the previous RTT it already found
+      that the path could sustain a window with one less packet in
+      flight).  If the congestion window is sufficiently large, then the
+      TCP will recover from this single loss using fast retransmission
+      and avoid an expensive (in terms of performance) retransmission
+      timeout.
+
+      However, when the additional additive term is used, then cwnd can
+      increase by more than one packet per RTT, in which case the TCP
+      probes more aggressively.  If in the previous RTT it had reached
+      the available capacity of the path, then the excess due to the
+      extra increase will again be lost, but now this will result in
+      multiple losses from the flight instead of a single loss.  TCPs
+      that do not utilize SACK [RFC2018] generally will not recover from
+      multiple losses without incurring a retransmission timeout
+      [Fall96,Hoe96], significantly diminishing performance.
+
+   Relevant RFCs
+      RFC 1122 requires use of the "congestion avoidance" algorithm.
+      RFC 2001 outlines the fast retransmit/fast recovery algorithms.
+      RFC 2018 discusses the SACK option.
+
+   Trace file demonstrating it
+      Recorded using tcpdump running on the same FDDI LAN as host A.
+      Host A is the sender and host B is the receiver.  The connection
+      establishment specified an MSS of 4,312 bytes and a window scale
+      factor of 4.  We omit the establishment and the first 2.5 MB of
+      data transfer, as the problem is best demonstrated when the window
+      has grown to a large value.  At the beginning of the trace
+      excerpt, the congestion window is 31 packets.  The connection is
+      never receiver-window limited, so we omit window advertisements
+      from the trace for clarity.
+
+
+
+
+Paxson, et. al.              Informational                     [Page 18]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   11:42:07.697951 B > A: . ack 2383006
+   11:42:07.699388 A > B: . 2508054:2512366(4312)
+   11:42:07.699962 A > B: . 2512366:2516678(4312)
+   11:42:07.700012 B > A: . ack 2391630
+   11:42:07.701081 A > B: . 2516678:2520990(4312)
+   11:42:07.701656 A > B: . 2520990:2525302(4312)
+   11:42:07.701739 B > A: . ack 2400254
+   11:42:07.702685 A > B: . 2525302:2529614(4312)
+   11:42:07.703257 A > B: . 2529614:2533926(4312)
+   11:42:07.703295 B > A: . ack 2408878
+   11:42:07.704414 A > B: . 2533926:2538238(4312)
+   11:42:07.704989 A > B: . 2538238:2542550(4312)
+   11:42:07.705040 B > A: . ack 2417502
+   11:42:07.705935 A > B: . 2542550:2546862(4312)
+   11:42:07.706506 A > B: . 2546862:2551174(4312)
+   11:42:07.706544 B > A: . ack 2426126
+   11:42:07.707480 A > B: . 2551174:2555486(4312)
+   11:42:07.708051 A > B: . 2555486:2559798(4312)
+   11:42:07.708088 B > A: . ack 2434750
+   11:42:07.709030 A > B: . 2559798:2564110(4312)
+   11:42:07.709604 A > B: . 2564110:2568422(4312)
+   11:42:07.710175 A > B: . 2568422:2572734(4312) *
+
+   11:42:07.710215 B > A: . ack 2443374
+   11:42:07.710799 A > B: . 2572734:2577046(4312)
+   11:42:07.711368 A > B: . 2577046:2581358(4312)
+   11:42:07.711405 B > A: . ack 2451998
+   11:42:07.712323 A > B: . 2581358:2585670(4312)
+   11:42:07.712898 A > B: . 2585670:2589982(4312)
+   11:42:07.712938 B > A: . ack 2460622
+   11:42:07.713926 A > B: . 2589982:2594294(4312)
+   11:42:07.714501 A > B: . 2594294:2598606(4312)
+   11:42:07.714547 B > A: . ack 2469246
+   11:42:07.715747 A > B: . 2598606:2602918(4312)
+   11:42:07.716287 A > B: . 2602918:2607230(4312)
+   11:42:07.716328 B > A: . ack 2477870
+   11:42:07.717146 A > B: . 2607230:2611542(4312)
+   11:42:07.717717 A > B: . 2611542:2615854(4312)
+   11:42:07.717762 B > A: . ack 2486494
+   11:42:07.718754 A > B: . 2615854:2620166(4312)
+   11:42:07.719331 A > B: . 2620166:2624478(4312)
+   11:42:07.719906 A > B: . 2624478:2628790(4312) **
+
+   11:42:07.719958 B > A: . ack 2495118
+   11:42:07.720500 A > B: . 2628790:2633102(4312)
+   11:42:07.721080 A > B: . 2633102:2637414(4312)
+   11:42:07.721739 B > A: . ack 2503742
+   11:42:07.722348 A > B: . 2637414:2641726(4312)
+
+
+
+Paxson, et. al.              Informational                     [Page 19]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   11:42:07.722918 A > B: . 2641726:2646038(4312)
+   11:42:07.769248 B > A: . ack 2512366
+
+      The receiver's acknowledgment policy is one ACK per two packets
+      received.  Thus, for each ACK arriving at host A, two new packets
+      are sent, except when cwnd increases due to congestion avoidance,
+      in which case three new packets are sent.
+
+      With an ack-every-two-packets policy, cwnd should only increase
+      one MSS per 2 RTT.  However, at the point marked "*" the window
+      increases after 7 ACKs have arrived, and then again at "**" after
+      6 more ACKs.
+
+      While we do not have space to show the effect, this trace suffered
+      from repeated timeout retransmissions due to multiple packet
+      losses during a single RTT.
+
+   Trace file demonstrating correct behavior
+      Made using the same host and tracing setup as above, except now
+      A's TCP has been modified to remove the MSS/8 additive constant.
+      Tcpdump reported 77 packet drops; the excerpt below is fully
+      self-consistent so it is unlikely that any of these occurred
+      during the excerpt.
+
+      We again begin when cwnd is 31 packets (this occurs significantly
+      later in the trace, because the congestion avoidance is now less
+      aggressive with opening the window).
+
+   14:22:21.236757 B > A: . ack 5194679
+   14:22:21.238192 A > B: . 5319727:5324039(4312)
+   14:22:21.238770 A > B: . 5324039:5328351(4312)
+   14:22:21.238821 B > A: . ack 5203303
+   14:22:21.240158 A > B: . 5328351:5332663(4312)
+   14:22:21.240738 A > B: . 5332663:5336975(4312)
+   14:22:21.270422 B > A: . ack 5211927
+   14:22:21.271883 A > B: . 5336975:5341287(4312)
+   14:22:21.272458 A > B: . 5341287:5345599(4312)
+   14:22:21.279099 B > A: . ack 5220551
+   14:22:21.280539 A > B: . 5345599:5349911(4312)
+   14:22:21.281118 A > B: . 5349911:5354223(4312)
+   14:22:21.281183 B > A: . ack 5229175
+   14:22:21.282348 A > B: . 5354223:5358535(4312)
+   14:22:21.283029 A > B: . 5358535:5362847(4312)
+   14:22:21.283089 B > A: . ack 5237799
+   14:22:21.284213 A > B: . 5362847:5367159(4312)
+   14:22:21.284779 A > B: . 5367159:5371471(4312)
+   14:22:21.285976 B > A: . ack 5246423
+   14:22:21.287465 A > B: . 5371471:5375783(4312)
+
+
+
+Paxson, et. al.              Informational                     [Page 20]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   14:22:21.288036 A > B: . 5375783:5380095(4312)
+   14:22:21.288073 B > A: . ack 5255047
+   14:22:21.289155 A > B: . 5380095:5384407(4312)
+   14:22:21.289725 A > B: . 5384407:5388719(4312)
+   14:22:21.289762 B > A: . ack 5263671
+   14:22:21.291090 A > B: . 5388719:5393031(4312)
+   14:22:21.291662 A > B: . 5393031:5397343(4312)
+   14:22:21.291701 B > A: . ack 5272295
+   14:22:21.292870 A > B: . 5397343:5401655(4312)
+   14:22:21.293441 A > B: . 5401655:5405967(4312)
+   14:22:21.293481 B > A: . ack 5280919
+   14:22:21.294476 A > B: . 5405967:5410279(4312)
+   14:22:21.295053 A > B: . 5410279:5414591(4312)
+   14:22:21.295106 B > A: . ack 5289543
+   14:22:21.296306 A > B: . 5414591:5418903(4312)
+   14:22:21.296878 A > B: . 5418903:5423215(4312)
+   14:22:21.296917 B > A: . ack 5298167
+   14:22:21.297716 A > B: . 5423215:5427527(4312)
+   14:22:21.298285 A > B: . 5427527:5431839(4312)
+   14:22:21.298324 B > A: . ack 5306791
+   14:22:21.299413 A > B: . 5431839:5436151(4312)
+   14:22:21.299986 A > B: . 5436151:5440463(4312)
+   14:22:21.303696 B > A: . ack 5315415
+   14:22:21.305177 A > B: . 5440463:5444775(4312)
+   14:22:21.305755 A > B: . 5444775:5449087(4312)
+   14:22:21.308032 B > A: . ack 5324039
+   14:22:21.309525 A > B: . 5449087:5453399(4312)
+   14:22:21.310101 A > B: . 5453399:5457711(4312)
+   14:22:21.310144 B > A: . ack 5332663           ***
+
+   14:22:21.311615 A > B: . 5457711:5462023(4312)
+   14:22:21.312198 A > B: . 5462023:5466335(4312)
+   14:22:21.341876 B > A: . ack 5341287
+   14:22:21.343451 A > B: . 5466335:5470647(4312)
+   14:22:21.343985 A > B: . 5470647:5474959(4312)
+   14:22:21.350304 B > A: . ack 5349911
+   14:22:21.351852 A > B: . 5474959:5479271(4312)
+   14:22:21.352430 A > B: . 5479271:5483583(4312)
+   14:22:21.352484 B > A: . ack 5358535
+   14:22:21.353574 A > B: . 5483583:5487895(4312)
+   14:22:21.354149 A > B: . 5487895:5492207(4312)
+   14:22:21.354205 B > A: . ack 5367159
+   14:22:21.355467 A > B: . 5492207:5496519(4312)
+   14:22:21.356039 A > B: . 5496519:5500831(4312)
+   14:22:21.357361 B > A: . ack 5375783
+   14:22:21.358855 A > B: . 5500831:5505143(4312)
+   14:22:21.359424 A > B: . 5505143:5509455(4312)
+   14:22:21.359465 B > A: . ack 5384407
+
+
+
+Paxson, et. al.              Informational                     [Page 21]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   14:22:21.360605 A > B: . 5509455:5513767(4312)
+   14:22:21.361181 A > B: . 5513767:5518079(4312)
+   14:22:21.361225 B > A: . ack 5393031
+   14:22:21.362485 A > B: . 5518079:5522391(4312)
+   14:22:21.363057 A > B: . 5522391:5526703(4312)
+   14:22:21.363096 B > A: . ack 5401655
+   14:22:21.364236 A > B: . 5526703:5531015(4312)
+   14:22:21.364810 A > B: . 5531015:5535327(4312)
+   14:22:21.364867 B > A: . ack 5410279
+   14:22:21.365819 A > B: . 5535327:5539639(4312)
+   14:22:21.366386 A > B: . 5539639:5543951(4312)
+   14:22:21.366427 B > A: . ack 5418903
+   14:22:21.367586 A > B: . 5543951:5548263(4312)
+   14:22:21.368158 A > B: . 5548263:5552575(4312)
+   14:22:21.368199 B > A: . ack 5427527
+   14:22:21.369189 A > B: . 5552575:5556887(4312)
+   14:22:21.369758 A > B: . 5556887:5561199(4312)
+   14:22:21.369803 B > A: . ack 5436151
+   14:22:21.370814 A > B: . 5561199:5565511(4312)
+   14:22:21.371398 A > B: . 5565511:5569823(4312)
+   14:22:21.375159 B > A: . ack 5444775
+   14:22:21.376658 A > B: . 5569823:5574135(4312)
+   14:22:21.377235 A > B: . 5574135:5578447(4312)
+   14:22:21.379303 B > A: . ack 5453399
+   14:22:21.380802 A > B: . 5578447:5582759(4312)
+   14:22:21.381377 A > B: . 5582759:5587071(4312)
+   14:22:21.381947 A > B: . 5587071:5591383(4312) ****
+
+      "***" marks the end of the first round trip.  Note that cwnd did
+      not increase (as evidenced by each ACK eliciting two new data
+      packets).  Only at "****", which comes near the end of the second
+      round trip, does cwnd increase by one packet.
+
+      This trace did not suffer any timeout retransmissions.  It
+      transferred the same amount of data as the first trace in about
+      half as much time.  This difference is repeatable between hosts A
+      and B.
+
+   References
+      [Stevens94] and [Wright95] discuss this problem.  The problem of
+      Reno TCP failing to recover from multiple losses except via a
+      retransmission timeout is discussed in [Fall96,Hoe96].
+
+
+
+
+
+
+
+
+
+Paxson, et. al.              Informational                     [Page 22]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   How to detect
+      If source code is available, that is generally the easiest way to
+      detect this problem.  Search for each modification to the cwnd
+      variable; (at least) one of these will be for congestion
+      avoidance, and inspection of the related code should immediately
+      identify the problem if present.
+
+      The problem can also be detected by closely examining packet
+      traces taken near the sender.  During congestion avoidance, cwnd
+      will increase by an additional segment upon the receipt of
+      (typically) eight acknowledgements without a loss.  This increase
+      is in addition to the one segment increase per round trip time (or
+      two round trip times if the receiver is using delayed ACKs).
+
+      Furthermore, graphs of the sequence number vs. time, taken from
+      packet traces, are normally linear during congestion avoidance.
+      When viewing packet traces of transfers from senders exhibiting
+      this problem, the graphs appear quadratic instead of linear.
+
+      Finally, the traces will show that, with sufficiently large
+      windows, nearly every loss event results in a timeout.
+
+   How to fix
+      This problem may be corrected by removing the "+ MSS/8" term from
+      the congestion avoidance code that increases cwnd each time an ACK
+      of new data is received.
+
+2.7.
+
+   Name of Problem
+      Initial RTO too low
+
+   Classification
+      Performance
+
+   Description
+      When a TCP first begins transmitting data, it lacks the RTT
+      measurements necessary to have computed an adaptive retransmission
+      timeout (RTO).  RFC 1122, 4.2.3.1, states that a TCP SHOULD
+      initialize RTO to 3 seconds.  A TCP that uses a lower value
+      exhibits "Initial RTO too low".
+
+   Significance
+      In environments with large RTTs (where "large" means any value
+      larger than the initial RTO), TCPs will experience very poor
+      performance.
+
+
+
+
+
+Paxson, et. al.              Informational                     [Page 23]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   Implications
+      Whenever RTO < RTT, very poor performance can result as packets
+      are unnecessarily retransmitted (because RTO will expire before an
+      ACK for the packet can arrive) and the connection enters slow
+      start and congestion avoidance.  Generally, the algorithms for
+      computing RTO avoid this problem by adding a positive term to the
+      estimated RTT.  However, when a connection first begins it must
+      use some estimate for RTO, and if it picks a value less than RTT,
+      the above problems will arise.
+
+      Furthermore, when the initial RTO < RTT, it can take a long time
+      for the TCP to correct the problem by adapting the RTT estimate,
+      because the use of Karn's algorithm (mandated by RFC 1122,
+      4.2.3.1) will discard many of the candidate RTT measurements made
+      after the first timeout, since they will be measurements of
+      retransmitted segments.
+
+   Relevant RFCs
+      RFC 1122 states that TCPs SHOULD initialize RTO to 3 seconds and
+      MUST implement Karn's algorithm.
+
+   Trace file demonstrating it
+      The following trace file was taken using tcpdump at host A, the
+      data sender.  The advertised window and SYN options have been
+      omitted for clarity.
+
+   07:52:39.870301 A > B: S 2786333696:2786333696(0)
+   07:52:40.548170 B > A: S 130240000:130240000(0) ack 2786333697
+   07:52:40.561287 A > B: P 1:513(512) ack 1
+   07:52:40.753466 A > B: . 1:513(512) ack 1
+   07:52:41.133687 A > B: . 1:513(512) ack 1
+   07:52:41.458529 B > A: . ack 513
+   07:52:41.458686 A > B: . 513:1025(512) ack 1
+   07:52:41.458797 A > B: P 1025:1537(512) ack 1
+   07:52:41.541633 B > A: . ack 513
+   07:52:41.703732 A > B: . 513:1025(512) ack 1
+   07:52:42.044875 B > A: . ack 513
+   07:52:42.173728 A > B: . 513:1025(512) ack 1
+   07:52:42.330861 B > A: . ack 1537
+   07:52:42.331129 A > B: . 1537:2049(512) ack 1
+   07:52:42.331262 A > B: P 2049:2561(512) ack 1
+   07:52:42.623673 A > B: . 1537:2049(512) ack 1
+   07:52:42.683203 B > A: . ack 1537
+   07:52:43.044029 B > A: . ack 1537
+   07:52:43.193812 A > B: . 1537:2049(512) ack 1
+
+
+
+
+
+
+Paxson, et. al.              Informational                     [Page 24]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+      Note from the SYN/SYN-ACK exchange, the RTT is over 600 msec.
+      However, from the elapsed time between the third and fourth lines
+      (the first packet being sent and then retransmitted), it is
+      apparent the RTO was initialized to under 200 msec.  The next line
+      shows that this value has doubled to 400 msec (correct exponential
+      backoff of RTO), but that still does not suffice to avoid an
+      unnecessary retransmission.
+
+      Finally, an ACK from B arrives for the first segment.  Later two
+      more duplicate ACKs for 513 arrive, indicating that both the
+      original and the two retransmissions arrived at B.  (Indeed, a
+      concurrent trace at B showed that no packets were lost during the
+      entire connection).  This ACK opens the congestion window to two
+      packets, which are sent back-to-back, but at 07:52:41.703732 RTO
+      again expires after a little over 200 msec, leading to an
+      unnecessary retransmission, and the pattern repeats.  By the end
+      of the trace excerpt above, 1536 bytes have been successfully
+      transmitted from A to B, over an interval of more than 2 seconds,
+      reflecting terrible performance.
+
+   Trace file demonstrating correct behavior
+      The following trace file was taken using tcpdump at host C, the
+      data sender.  The advertised window and SYN options have been
+      omitted for clarity.
+
+   17:30:32.090299 C > D: S 2031744000:2031744000(0)
+   17:30:32.900325 D > C: S 262737964:262737964(0) ack 2031744001
+   17:30:32.900326 C > D: . ack 1
+   17:30:32.910326 C > D: . 1:513(512) ack 1
+   17:30:34.150355 D > C: . ack 513
+   17:30:34.150356 C > D: . 513:1025(512) ack 1
+   17:30:34.150357 C > D: . 1025:1537(512) ack 1
+   17:30:35.170384 D > C: . ack 1025
+   17:30:35.170385 C > D: . 1537:2049(512) ack 1
+   17:30:35.170386 C > D: . 2049:2561(512) ack 1
+   17:30:35.320385 D > C: . ack 1537
+   17:30:35.320386 C > D: . 2561:3073(512) ack 1
+   17:30:35.320387 C > D: . 3073:3585(512) ack 1
+   17:30:35.730384 D > C: . ack 2049
+
+      The initial SYN/SYN-ACK exchange shows that RTT is more than 800
+      msec, and for some subsequent packets it rises above 1 second, but
+      C's retransmit timer does not ever expire.
+
+   References
+      This problem is documented in [Paxson97].
+
+
+
+
+
+Paxson, et. al.              Informational                     [Page 25]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   How to detect
+      This problem is readily detected by inspecting a packet trace of
+      the startup of a TCP connection made over a long-delay path.  It
+      can be diagnosed from either a sender-side or receiver-side trace.
+      Long-delay paths can often be found by locating remote sites on
+      other continents.
+
+   How to fix
+      As this problem arises from a faulty initialization, one hopes
+      fixing it requires a one-line change to the TCP source code.
+
+2.8.
+
+   Name of Problem
+      Failure of window deflation after loss recovery
+
+   Classification
+      Congestion control / performance
+
+   Description
+      The fast recovery algorithm allows TCP senders to continue to
+      transmit new segments during loss recovery.  First, fast
+      retransmission is initiated after a TCP sender receives three
+      duplicate ACKs.  At this point, a retransmission is sent and cwnd
+      is halved.  The fast recovery algorithm then allows additional
+      segments to be sent when sufficient additional duplicate ACKs
+      arrive.  Some implementations of fast recovery compute when to
+      send additional segments by artificially incrementing cwnd, first
+      by three segments to account for the three duplicate ACKs that
+      triggered fast retransmission, and subsequently by 1 MSS for each
+      new duplicate ACK that arrives.  When cwnd allows, the sender
+      transmits new data segments.
+
+      When an ACK arrives that covers new data, cwnd is to be reduced by
+      the amount by which it was artificially increased.  However, some
+      TCP implementations fail to "deflate" the window, causing an
+      inappropriate amount of data to be sent into the network after
+      recovery.  One cause of this problem is the "header prediction"
+      code, which is used to handle incoming segments that require
+      little work.  In some implementations of TCP, the header
+      prediction code does not check to make sure cwnd has not been
+      artificially inflated, and therefore does not reduce the
+      artificially increased cwnd when appropriate.
+
+   Significance
+      TCP senders that exhibit this problem will transmit a burst of
+      data immediately after recovery, which can degrade performance, as
+      well as network stability.  Effectively, the sender does not
+
+
+
+Paxson, et. al.              Informational                     [Page 26]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+      reduce the size of cwnd as much as it should (to half its value
+      when loss was detected), if at all.  This can harm the performance
+      of the TCP connection itself, as well as competing TCP flows.
+
+   Implications
+      A TCP sender exhibiting this problem does not reduce cwnd
+      appropriately in times of congestion, and therefore may contribute
+      to congestive collapse.
+
+   Relevant RFCs
+      RFC 2001 outlines the fast retransmit/fast recovery algorithms.
+      [Brakmo95] outlines this implementation problem and offers a fix.
+
+   Trace file demonstrating it
+      The following trace file was taken using tcpdump at host A, the
+      data sender.  The advertised window (which never changed) has been
+      omitted for clarity, except for the first packet sent by each
+      host.
+
+   08:22:56.825635 A.7505 > B.7505: . 29697:30209(512) ack 1 win 4608
+   08:22:57.038794 B.7505 > A.7505: . ack 27649 win 4096
+   08:22:57.039279 A.7505 > B.7505: . 30209:30721(512) ack 1
+   08:22:57.321876 B.7505 > A.7505: . ack 28161
+   08:22:57.322356 A.7505 > B.7505: . 30721:31233(512) ack 1
+   08:22:57.347128 B.7505 > A.7505: . ack 28673
+   08:22:57.347572 A.7505 > B.7505: . 31233:31745(512) ack 1
+   08:22:57.347782 A.7505 > B.7505: . 31745:32257(512) ack 1
+   08:22:57.936393 B.7505 > A.7505: . ack 29185
+   08:22:57.936864 A.7505 > B.7505: . 32257:32769(512) ack 1
+   08:22:57.950802 B.7505 > A.7505: . ack 29697 win 4096
+   08:22:57.951246 A.7505 > B.7505: . 32769:33281(512) ack 1
+   08:22:58.169422 B.7505 > A.7505: . ack 29697
+   08:22:58.638222 B.7505 > A.7505: . ack 29697
+   08:22:58.643312 B.7505 > A.7505: . ack 29697
+   08:22:58.643669 A.7505 > B.7505: . 29697:30209(512) ack 1
+   08:22:58.936436 B.7505 > A.7505: . ack 29697
+   08:22:59.002614 B.7505 > A.7505: . ack 29697
+   08:22:59.003026 A.7505 > B.7505: . 33281:33793(512) ack 1
+   08:22:59.682902 B.7505 > A.7505: . ack 33281
+   08:22:59.683391 A.7505 > B.7505: P 33793:34305(512) ack 1
+   08:22:59.683748 A.7505 > B.7505: P 34305:34817(512) ack 1 ***
+   08:22:59.684043 A.7505 > B.7505: P 34817:35329(512) ack 1
+   08:22:59.684266 A.7505 > B.7505: P 35329:35841(512) ack 1
+   08:22:59.684567 A.7505 > B.7505: P 35841:36353(512) ack 1
+   08:22:59.684810 A.7505 > B.7505: P 36353:36865(512) ack 1
+   08:22:59.685094 A.7505 > B.7505: P 36865:37377(512) ack 1
+
+
+
+
+
+Paxson, et. al.              Informational                     [Page 27]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+      The first 12 lines of the trace show incoming ACKs clocking out a
+      window of data segments.  At this point in the transfer, cwnd is 7
+      segments.  The next 4 lines of the trace show 3 duplicate ACKs
+      arriving from the receiver, followed by a retransmission from the
+      sender.  At this point, cwnd is halved (to 3 segments) and
+      artificially incremented by the three duplicate ACKs that have
+      arrived, making cwnd 6 segments.  The next two lines show 2 more
+      duplicate ACKs arriving, each of which increases cwnd by 1
+      segment.  So, after these two duplicate ACKs arrive the cwnd is 8
+      segments and the sender has permission to send 1 new segment
+      (since there are 7 segments outstanding).  The next line in the
+      trace shows this new segment being transmitted.  The next packet
+      shown in the trace is an ACK from host B that covers the first 7
+      outstanding segments (all but the new segment sent during
+      recovery).  This should cause cwnd to be reduced to 3 segments and
+      2 segments to be transmitted (since there is already 1 outstanding
+      segment in the network).  However, as shown by the last 7 lines of
+      the trace, cwnd is not reduced, causing a line-rate burst of 7 new
+      segments.
+
+   Trace file demonstrating correct behavior
+      The trace would appear identical to the one above, only it would
+      stop after the line marked "***", because at this point host A
+      would correctly reduce cwnd after recovery, allowing only 2
+      segments to be transmitted, rather than producing a burst of 7
+      segments.
+
+   References
+      This problem is documented and the performance implications
+      analyzed in [Brakmo95].
+
+   How to detect
+      Failure of window deflation after loss recovery can be found by
+      examining sender-side packet traces recorded during periods of
+      moderate loss (so cwnd can grow large enough to allow for fast
+      recovery when loss occurs).
+
+   How to fix
+      When this bug is caused by incorrect header prediction, the fix is
+      to add a predicate to the header prediction test that checks to
+      see whether cwnd is inflated; if so, the header prediction test
+      fails and the usual ACK processing occurs, which (in this case)
+      takes care to deflate the window.  See [Brakmo95] for details.
+
+2.9.
+
+   Name of Problem
+      Excessively short keepalive connection timeout
+
+
+
+Paxson, et. al.              Informational                     [Page 28]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   Classification
+      Reliability
+
+   Description
+      Keep-alive is a mechanism for checking whether an idle connection
+      is still alive.  According to RFC 1122, keepalive should only be
+      invoked in server applications that might otherwise hang
+      indefinitely and consume resources unnecessarily if a client
+      crashes or aborts a connection during a network failure.
+
+      RFC 1122 also specifies that if a keep-alive mechanism is
+      implemented it MUST NOT interpret failure to respond to any
+      specific probe as a dead connection.  The RFC does not specify a
+      particular mechanism for timing out a connection when no response
+      is received for keepalive probes.  However, if the mechanism does
+      not allow ample time for recovery from network congestion or
+      delay, connections may be timed out unnecessarily.
+
+   Significance
+      In congested networks, can lead to unwarranted termination of
+      connections.
+
+   Implications
+      It is possible for the network connection between two peer
+      machines to become congested or to exhibit packet loss at the time
+      that a keep-alive probe is sent on a connection.  If the keep-
+      alive mechanism does not allow sufficient time before dropping
+      connections in the face of unacknowledged probes, connections may
+      be dropped even when both peers of a connection are still alive.
+
+   Relevant RFCs
+      RFC 1122 specifies that the keep-alive mechanism may be provided.
+      It does not specify a mechanism for determining dead connections
+      when keepalive probes are not acknowledged.
+
+   Trace file demonstrating it
+      Made using the Orchestra tool at the peer of the machine using
+      keep-alive.  After connection establishment, incoming keep-alives
+      were dropped by Orchestra to simulate a dead connection.
+
+   22:11:12.040000 A > B: 22666019:0 win 8192 datasz 4 SYN
+   22:11:12.060000 B > A: 2496001:22666020 win 4096 datasz 4 SYN ACK
+   22:11:12.130000 A > B: 22666020:2496002 win 8760 datasz 0 ACK
+   (more than two hours elapse)
+   00:23:00.680000 A > B: 22666019:2496002 win 8760 datasz 1 ACK
+   00:23:01.770000 A > B: 22666019:2496002 win 8760 datasz 1 ACK
+   00:23:02.870000 A > B: 22666019:2496002 win 8760 datasz 1 ACK
+   00:23.03.970000 A > B: 22666019:2496002 win 8760 datasz 1 ACK
+
+
+
+Paxson, et. al.              Informational                     [Page 29]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   00:23.05.070000 A > B: 22666019:2496002 win 8760 datasz 1 ACK
+
+      The initial three packets are the SYN exchange for connection
+      setup.  About two hours later, the keepalive timer fires because
+      the connection has been idle.  Keepalive probes are transmitted a
+      total of 5 times, with a 1 second spacing between probes, after
+      which the connection is dropped.  This is problematic because a 5
+      second network outage at the time of the first probe results in
+      the connection being killed.
+
+   Trace file demonstrating correct behavior
+      Made using the Orchestra tool at the peer of the machine using
+      keep-alive.  After connection establishment, incoming keep-alives
+      were dropped by Orchestra to simulate a dead connection.
+
+   16:01:52.130000 A > B: 1804412929:0 win 4096 datasz 4 SYN
+   16:01:52.360000 B > A: 16512001:1804412930 win 4096 datasz 4 SYN ACK
+   16:01:52.410000 A > B: 1804412930:16512002 win 4096 datasz 0 ACK
+   (two hours elapse)
+   18:01:57.170000 A > B: 1804412929:16512002 win 4096 datasz 0 ACK
+   18:03:12.220000 A > B: 1804412929:16512002 win 4096 datasz 0 ACK
+   18:04:27.270000 A > B: 1804412929:16512002 win 4096 datasz 0 ACK
+   18:05:42.320000 A > B: 1804412929:16512002 win 4096 datasz 0 ACK
+   18:06:57.370000 A > B: 1804412929:16512002 win 4096 datasz 0 ACK
+   18:08:12.420000 A > B: 1804412929:16512002 win 4096 datasz 0 ACK
+   18:09:27.480000 A > B: 1804412929:16512002 win 4096 datasz 0 ACK
+   18:10:43.290000 A > B: 1804412929:16512002 win 4096 datasz 0 ACK
+   18:11:57.580000 A > B: 1804412929:16512002 win 4096 datasz 0 ACK
+   18:13:12.630000 A > B: 1804412929:16512002 win 4096 datasz 0 RST ACK
+
+      In this trace, when the keep-alive timer expires, 9 keepalive
+      probes are sent at 75 second intervals.  75 seconds after the last
+      probe is sent, a final RST segment is sent indicating that the
+      connection has been closed.  This implementation waits about 11
+      minutes before timing out the connection, while the first
+      implementation shown allows only 5 seconds.
+
+   References
+      This problem is documented in [Dawson97].
+
+   How to detect
+      For implementations manifesting this problem, it shows up on a
+      packet trace after the keepalive timer fires if the peer machine
+      receiving the keepalive does not respond.  Usually the keepalive
+      timer will fire at least two hours after keepalive is turned on,
+      but it may be sooner if the timer value has been configured lower,
+      or if the keepalive mechanism violates the specification (see
+      Insufficient interval between keepalives problem).  In this
+
+
+
+Paxson, et. al.              Informational                     [Page 30]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+      example, suppressing the response of the peer to keepalive probes
+      was accomplished using the Orchestra toolkit, which can be
+      configured to drop packets.  It could also have been done by
+      creating a connection, turning on keepalive, and disconnecting the
+      network connection at the receiver machine.
+
+   How to fix
+      This problem can be fixed by using a different method for timing
+      out keepalives that allows a longer period of time to elapse
+      before dropping the connection.  For example, the algorithm for
+      timing out on dropped data could be used.  Another possibility is
+      an algorithm such as the one shown in the trace above, which sends
+      9 probes at 75 second intervals and then waits an additional 75
+      seconds for a response before closing the connection.
+
+2.10.
+
+   Name of Problem
+      Failure to back off retransmission timeout
+
+   Classification
+      Congestion control / reliability
+
+   Description
+      The retransmission timeout is used to determine when a packet has
+      been dropped in the network.  When this timeout has expired
+      without the arrival of an ACK, the segment is retransmitted. Each
+      time a segment is retransmitted, the timeout is adjusted according
+      to an exponential backoff algorithm, doubling each time.  If a TCP
+      fails to receive an ACK after numerous attempts at retransmitting
+      the same segment, it terminates the connection.  A TCP that fails
+      to double its retransmission timeout upon repeated timeouts is
+      said to exhibit "Failure to back off retransmission timeout".
+
+   Significance
+      Backing off the retransmission timer is a cornerstone of network
+      stability in the presence of congestion.  Consequently, this bug
+      can have severe adverse affects in congested networks.  It also
+      affects TCP reliability in congested networks, as discussed in the
+      next section.
+
+   Implications
+      It is possible for the network connection between two TCP peers to
+      become congested or to exhibit packet loss at the time that a
+      retransmission is sent on a connection.  If the retransmission
+      mechanism does not allow sufficient time before dropping
+
+
+
+
+
+Paxson, et. al.              Informational                     [Page 31]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+      connections in the face of unacknowledged segments, connections
+      may be dropped even when, by waiting longer, the connection could
+      have continued.
+
+   Relevant RFCs
+      RFC 1122 specifies mandatory exponential backoff of the
+      retransmission timeout, and the termination of connections after
+      some period of time (at least 100 seconds).
+
+   Trace file demonstrating it
+      Made using tcpdump on an intermediate host:
+
+   16:51:12.671727 A > B: S 510878852:510878852(0) win 16384
+   16:51:12.672479 B > A: S 2392143687:2392143687(0)
+                            ack 510878853 win 16384
+   16:51:12.672581 A > B: . ack 1 win 16384
+   16:51:15.244171 A > B: P 1:3(2) ack 1 win 16384
+   16:51:15.244933 B > A: . ack 3 win 17518  (DF)
+
+   <receiving host disconnected>
+
+   16:51:19.381176 A > B: P 3:5(2) ack 1 win 16384
+   16:51:20.162016 A > B: P 3:5(2) ack 1 win 16384
+   16:51:21.161936 A > B: P 3:5(2) ack 1 win 16384
+   16:51:22.161914 A > B: P 3:5(2) ack 1 win 16384
+   16:51:23.161914 A > B: P 3:5(2) ack 1 win 16384
+   16:51:24.161879 A > B: P 3:5(2) ack 1 win 16384
+   16:51:25.161857 A > B: P 3:5(2) ack 1 win 16384
+   16:51:26.161836 A > B: P 3:5(2) ack 1 win 16384
+   16:51:27.161814 A > B: P 3:5(2) ack 1 win 16384
+   16:51:28.161791 A > B: P 3:5(2) ack 1 win 16384
+   16:51:29.161769 A > B: P 3:5(2) ack 1 win 16384
+   16:51:30.161750 A > B: P 3:5(2) ack 1 win 16384
+   16:51:31.161727 A > B: P 3:5(2) ack 1 win 16384
+
+   16:51:32.161701 A > B: R 5:5(0) ack 1 win 16384
+
+      The initial three packets are the SYN exchange for connection
+      setup, then a single data packet, to verify that data can be
+      transferred.  Then the connection to the destination host was
+      disconnected, and more data sent.  Retransmissions occur every
+      second for 12 seconds, and then the connection is terminated with
+      a RST.  This is problematic because a 12 second pause in
+      connectivity could result in the termination of a connection.
+
+   Trace file demonstrating correct behavior
+      Again, a tcpdump taken from a third host:
+
+
+
+
+Paxson, et. al.              Informational                     [Page 32]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   16:59:05.398301 A > B: S 2503324757:2503324757(0) win 16384
+   16:59:05.399673 B > A: S 2492674648:2492674648(0)
+                           ack 2503324758 win 16384
+   16:59:05.399866 A > B: . ack 1 win 17520
+   16:59:06.538107 A > B: P 1:3(2) ack 1 win 17520
+   16:59:06.540977 B > A: . ack 3 win 17518  (DF)
+
+   <receiving host disconnected>
+
+   16:59:13.121542 A > B: P 3:5(2) ack 1 win 17520
+   16:59:14.010928 A > B: P 3:5(2) ack 1 win 17520
+   16:59:16.010979 A > B: P 3:5(2) ack 1 win 17520
+   16:59:20.011229 A > B: P 3:5(2) ack 1 win 17520
+   16:59:28.011896 A > B: P 3:5(2) ack 1 win 17520
+   16:59:44.013200 A > B: P 3:5(2) ack 1 win 17520
+   17:00:16.015766 A > B: P 3:5(2) ack 1 win 17520
+   17:01:20.021308 A > B: P 3:5(2) ack 1 win 17520
+   17:02:24.027752 A > B: P 3:5(2) ack 1 win 17520
+   17:03:28.034569 A > B: P 3:5(2) ack 1 win 17520
+   17:04:32.041567 A > B: P 3:5(2) ack 1 win 17520
+   17:05:36.048264 A > B: P 3:5(2) ack 1 win 17520
+   17:06:40.054900 A > B: P 3:5(2) ack 1 win 17520
+
+   17:07:44.061306 A > B: R 5:5(0) ack 1 win 17520
+
+      In this trace, when the retransmission timer expires, 12
+      retransmissions are sent at exponentially-increasing intervals,
+      until the interval value reaches 64 seconds, at which time the
+      interval stops growing.  64 seconds after the last retransmission,
+      a final RST segment is sent indicating that the connection has
+      been closed.  This implementation waits about 9 minutes before
+      timing out the connection, while the first implementation shown
+      allows only 12 seconds.
+
+   References
+      None known.
+
+   How to detect
+      A simple transfer can be easily interrupted by disconnecting the
+      receiving host from the network.  tcpdump or another appropriate
+      tool should show the retransmissions being sent.  Several trials
+      in a low-rtt environment may be required to demonstrate the bug.
+
+   How to fix
+      For one of the implementations studied, this problem seemed to be
+      the result of an error introduced with the addition of the
+      Brakmo-Peterson RTO algorithm [Brakmo95], which can return a value
+      of zero where the older Jacobson algorithm always returns a
+
+
+
+Paxson, et. al.              Informational                     [Page 33]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+      positive value.  Brakmo and Peterson specified an additional step
+      of min(rtt + 2, RTO) to avoid problems with this.  Unfortunately,
+      in the implementation this step was omitted when calculating the
+      exponential backoff for the RTO.  This results in an RTO of 0
+      seconds being multiplied by the backoff, yielding again zero, and
+      then being subjected to a later MAX operation that increases it to
+      1 second, regardless of the backoff factor.
+
+      A similar TCP persist failure has the same cause.
+
+2.11.
+
+   Name of Problem
+      Insufficient interval between keepalives
+
+   Classification
+      Reliability
+
+   Description
+      Keep-alive is a mechanism for checking whether an idle connection
+      is still alive.  According to RFC 1122, keep-alive may be included
+      in an implementation.  If it is included, the interval between
+      keep-alive packets MUST be configurable, and MUST default to no
+      less than two hours.
+
+   Significance
+      In congested networks, can lead to unwarranted termination of
+      connections.
+
+   Implications
+      According to RFC 1122, keep-alive is not required of
+      implementations because it could: (1) cause perfectly good
+      connections to break during transient Internet failures; (2)
+      consume unnecessary bandwidth ("if no one is using the connection,
+      who cares if it is still good?"); and (3) cost money for an
+      Internet path that charges for packets.  Regarding this last
+      point, we note that in addition the presence of dial-on-demand
+      links in the route can greatly magnify the cost penalty of excess
+      keepalives, potentially forcing a full-time connection on a link
+      that would otherwise only be connected a few minutes a day.
+
+      If keepalive is provided the RFC states that the required inter-
+      keepalive distance MUST default to no less than two hours.  If it
+      does not, the probability of connections breaking increases, the
+      bandwidth used due to keepalives increases, and cost increases
+      over paths which charge per packet.
+
+
+
+
+
+Paxson, et. al.              Informational                     [Page 34]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   Relevant RFCs
+      RFC 1122 specifies that the keep-alive mechanism may be provided.
+      It also specifies the two hour minimum for the default interval
+      between keepalive probes.
+
+   Trace file demonstrating it
+      Made using the Orchestra tool at the peer of the machine using
+      keep-alive.  Machine A was configured to use default settings for
+      the keepalive timer.
+
+   11:36:32.910000 A > B: 3288354305:0      win 28672 datasz 4 SYN
+   11:36:32.930000 B > A: 896001:3288354306 win 4096  datasz 4 SYN ACK
+   11:36:32.950000 A > B: 3288354306:896002 win 28672 datasz 0 ACK
+
+   11:50:01.190000 A > B: 3288354305:896002 win 28672 datasz 0 ACK
+   11:50:01.210000 B > A: 896002:3288354306 win 4096  datasz 0 ACK
+
+   12:03:29.410000 A > B: 3288354305:896002 win 28672 datasz 0 ACK
+   12:03:29.430000 B > A: 896002:3288354306 win 4096  datasz 0 ACK
+
+   12:16:57.630000 A > B: 3288354305:896002 win 28672 datasz 0 ACK
+   12:16:57.650000 B > A: 896002:3288354306 win 4096  datasz 0 ACK
+
+   12:30:25.850000 A > B: 3288354305:896002 win 28672 datasz 0 ACK
+   12:30:25.870000 B > A: 896002:3288354306 win 4096  datasz 0 ACK
+
+   12:43:54.070000 A > B: 3288354305:896002 win 28672 datasz 0 ACK
+   12:43:54.090000 B > A: 896002:3288354306 win 4096  datasz 0 ACK
+
+      The initial three packets are the SYN exchange for connection
+      setup.  About 13 minutes later, the keepalive timer fires because
+      the connection is idle.  The keepalive is acknowledged, and the
+      timer fires again in about 13 more minutes.  This behavior
+      continues indefinitely until the connection is closed, and is a
+      violation of the specification.
+
+   Trace file demonstrating correct behavior
+      Made using the Orchestra tool at the peer of the machine using
+      keep-alive.  Machine A was configured to use default settings for
+      the keepalive timer.
+
+   17:37:20.500000 A > B: 34155521:0       win 4096 datasz 4 SYN
+   17:37:20.520000 B > A: 6272001:34155522 win 4096 datasz 4 SYN ACK
+   17:37:20.540000 A > B: 34155522:6272002 win 4096 datasz 0 ACK
+
+   19:37:25.430000 A > B: 34155521:6272002 win 4096 datasz 0 ACK
+   19:37:25.450000 B > A: 6272002:34155522 win 4096 datasz 0 ACK
+
+
+
+
+Paxson, et. al.              Informational                     [Page 35]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   21:37:30.560000 A > B: 34155521:6272002 win 4096 datasz 0 ACK
+   21:37:30.570000 B > A: 6272002:34155522 win 4096 datasz 0 ACK
+
+   23:37:35.580000 A > B: 34155521:6272002 win 4096 datasz 0 ACK
+   23:37:35.600000 B > A: 6272002:34155522 win 4096 datasz 0 ACK
+
+   01:37:40.620000 A > B: 34155521:6272002 win 4096 datasz 0 ACK
+   01:37:40.640000 B > A: 6272002:34155522 win 4096 datasz 0 ACK
+
+   03:37:45.590000 A > B: 34155521:6272002 win 4096 datasz 0 ACK
+   03:37:45.610000 B > A: 6272002:34155522 win 4096 datasz 0 ACK
+
+      The initial three packets are the SYN exchange for connection
+      setup.  Just over two hours later, the keepalive timer fires
+      because the connection is idle.  The keepalive is acknowledged,
+      and the timer fires again just over two hours later.  This
+      behavior continues indefinitely until the connection is closed.
+
+   References
+      This problem is documented in [Dawson97].
+
+   How to detect
+      For implementations manifesting this problem, it shows up on a
+      packet trace.  If the connection is left idle, the keepalive
+      probes will arrive closer together than the two hour minimum.
+
+2.12.
+
+   Name of Problem
+      Window probe deadlock
+
+   Classification
+      Reliability
+
+   Description
+      When an application reads a single byte from a full window, the
+      window should not be updated, in order to avoid Silly Window
+      Syndrome (SWS; see [RFC813]).  If the remote peer uses a single
+      byte of data to probe the window, that byte can be accepted into
+      the buffer.  In some implementations, at this point a negative
+      argument to a signed comparison causes all further new data to be
+      considered outside the window; consequently, it is discarded
+      (after sending an ACK to resynchronize).  These discards include
+      the ACKs for the data packets sent by the local TCP, so the TCP
+      will consider the data unacknowledged.
+
+
+
+
+
+
+Paxson, et. al.              Informational                     [Page 36]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+      Consequently, the application may be unable to complete sending
+      new data to the remote peer, because it has exhausted the transmit
+      buffer available to its local TCP, and buffer space is never being
+      freed because incoming ACKs that would do so are being discarded.
+      If the application does not read any more data, which may happen
+      due to its failure to complete such sends, then deadlock results.
+
+   Significance
+      It's relatively rare for applications to use TCP in a manner that
+      can exercise this problem.  Most applications only transmit bulk
+      data if they know the other end is prepared to receive the data.
+      However, if a client fails to consume data, putting the server in
+      persist mode, and then consumes a small amount of data, it can
+      mistakenly compute a negative window.  At this point the client
+      will discard all further packets from the server, including ACKs
+      of the client's own data, since they are not inside the
+      (impossibly-sized) window.  If subsequently the client consumes
+      enough data to then send a window update to the server, the
+      situation will be rectified.  That is, this situation can only
+      happen if the client consumes 1 < N < MSS bytes, so as not to
+      cause a window update, and then starts its own transmission
+      towards the server of more than a window's worth of data.
+
+   Implications
+      TCP connections will hang and eventually time out.
+
+   Relevant RFCs
+      RFC 793 describes zero window probing.  RFC 813 describes Silly
+      Window Syndrome.
+
+   Trace file demonstrating it
+      Trace made from a version of tcpdump modified to print out the
+      sequence number attached to an ACK even if it's dataless.  An
+      unmodified tcpdump would not print seq:seq(0); however, for this
+      bug, the sequence number in the ACK is important for unambiguously
+      determining how the TCP is behaving.
+
+   [ Normal connection startup and data transmission from B to A.
+     Options, including MSS of 16344 in both directions, omitted
+     for clarity. ]
+   16:07:32.327616 A > B: S 65360807:65360807(0) win 8192
+   16:07:32.327304 B > A: S 65488807:65488807(0) ack 65360808 win 57344
+   16:07:32.327425 A > B: . 1:1(0) ack 1 win 57344
+   16:07:32.345732 B > A: P 1:2049(2048) ack 1 win 57344
+   16:07:32.347013 B > A: P 2049:16385(14336) ack 1 win 57344
+   16:07:32.347550 B > A: P 16385:30721(14336) ack 1 win 57344
+   16:07:32.348683 B > A: P 30721:45057(14336) ack 1 win 57344
+   16:07:32.467286 A > B: . 1:1(0) ack 45057 win 12288
+
+
+
+Paxson, et. al.              Informational                     [Page 37]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   16:07:32.467854 B > A: P 45057:57345(12288) ack 1 win 57344
+
+   [ B fills up A's offered window ]
+   16:07:32.667276 A > B: . 1:1(0) ack 57345 win 0
+
+   [ B probes A's window with a single byte ]
+   16:07:37.467438 B > A: . 57345:57346(1) ack 1 win 57344
+
+   [ A resynchronizes without accepting the byte ]
+   16:07:37.467678 A > B: . 1:1(0) ack 57345 win 0
+
+   [ B probes A's window again ]
+   16:07:45.467438 B > A: . 57345:57346(1) ack 1 win 57344
+
+   [ A resynchronizes and accepts the byte (per the ack field) ]
+   16:07:45.667250 A > B: . 1:1(0) ack 57346 win 0
+
+   [ The application on A has started generating data.  The first
+     packet A sends is small due to a memory allocation bug. ]
+   16:07:51.358459 A > B: P 1:2049(2048) ack 57346 win 0
+
+   [ B acks A's first packet ]
+   16:07:51.467239 B > A: . 57346:57346(0) ack 2049 win 57344
+
+   [ This looks as though A accepted B's ACK and is sending
+     another packet in response to it.  In fact, A is trying
+     to resynchronize with B, and happens to have data to send
+     and can send it because the first small packet didn't use
+     up cwnd. ]
+   16:07:51.467698 A > B: . 2049:14337(12288) ack 57346 win 0
+
+   [ B acks all of the data that A has sent ]
+   16:07:51.667283 B > A: . 57346:57346(0) ack 14337 win 57344
+
+   [ A tries to resynchronize.  Notice that by the packets
+     seen on the network, A and B *are* in fact synchronized;
+     A only thinks that they aren't. ]
+   16:07:51.667477 A > B: . 14337:14337(0) ack 57346 win 0
+
+   [ A's retransmit timer fires, and B acks all of the data.
+     A once again tries to resynchronize. ]
+   16:07:52.467682 A > B: . 1:14337(14336) ack 57346 win 0
+   16:07:52.468166 B > A: . 57346:57346(0) ack 14337 win 57344
+   16:07:52.468248 A > B: . 14337:14337(0) ack 57346 win 0
+
+   [ A's retransmit timer fires again, and B acks all of the data.
+     A once again tries to resynchronize. ]
+   16:07:55.467684 A > B: . 1:14337(14336) ack 57346 win 0
+
+
+
+Paxson, et. al.              Informational                     [Page 38]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   16:07:55.468172 B > A: . 57346:57346(0) ack 14337 win 57344
+   16:07:55.468254 A > B: . 14337:14337(0) ack 57346 win 0
+
+   Trace file demonstrating correct behavior
+      Made between the same two hosts after applying the bug fix
+      mentioned below (and using the same modified tcpdump).
+
+   [ Connection starts up with data transmission from B to A.
+     Note that due to a separate bug (the fact that A and B
+     are communicating over a loopback driver), B erroneously
+     skips slow start. ]
+   17:38:09.510854 A > B: S 3110066585:3110066585(0) win 16384
+   17:38:09.510926 B > A: S 3110174850:3110174850(0)
+                            ack 3110066586 win 57344
+   17:38:09.510953 A > B: . 1:1(0) ack 1 win 57344
+   17:38:09.512956 B > A: P 1:2049(2048) ack 1 win 57344
+   17:38:09.513222 B > A: P 2049:16385(14336) ack 1 win 57344
+   17:38:09.513428 B > A: P 16385:30721(14336) ack 1 win 57344
+   17:38:09.513638 B > A: P 30721:45057(14336) ack 1 win 57344
+   17:38:09.519531 A > B: . 1:1(0) ack 45057 win 12288
+   17:38:09.519638 B > A: P 45057:57345(12288) ack 1 win 57344
+
+   [ B fills up A's offered window ]
+   17:38:09.719526 A > B: . 1:1(0) ack 57345 win 0
+
+   [ B probes A's window with a single byte.  A resynchronizes
+     without accepting the byte ]
+   17:38:14.499661 B > A: . 57345:57346(1) ack 1 win 57344
+   17:38:14.499724 A > B: . 1:1(0) ack 57345 win 0
+
+   [ B probes A's window again.  A resynchronizes and accepts
+     the byte, as indicated by the ack field ]
+   17:38:19.499764 B > A: . 57345:57346(1) ack 1 win 57344
+   17:38:19.519731 A > B: . 1:1(0) ack 57346 win 0
+
+   [ B probes A's window with a single byte.  A resynchronizes
+     without accepting the byte ]
+   17:38:24.499865 B > A: . 57346:57347(1) ack 1 win 57344
+   17:38:24.499934 A > B: . 1:1(0) ack 57346 win 0
+
+   [ The application on A has started generating data.
+     B acks A's data and A accepts the ACKs and the
+     data transfer continues ]
+   17:38:28.530265 A > B: P 1:2049(2048) ack 57346 win 0
+   17:38:28.719914 B > A: . 57346:57346(0) ack 2049 win 57344
+
+   17:38:28.720023 A > B: . 2049:16385(14336) ack 57346 win 0
+   17:38:28.720089 A > B: . 16385:30721(14336) ack 57346 win 0
+
+
+
+Paxson, et. al.              Informational                     [Page 39]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   17:38:28.720370 B > A: . 57346:57346(0) ack 30721 win 57344
+
+   17:38:28.720462 A > B: . 30721:45057(14336) ack 57346 win 0
+   17:38:28.720526 A > B: P 45057:59393(14336) ack 57346 win 0
+   17:38:28.720824 A > B: P 59393:73729(14336) ack 57346 win 0
+   17:38:28.721124 B > A: . 57346:57346(0) ack 73729 win 47104
+
+   17:38:28.721198 A > B: P 73729:88065(14336) ack 57346 win 0
+   17:38:28.721379 A > B: P 88065:102401(14336) ack 57346 win 0
+
+   17:38:28.721557 A > B: P 102401:116737(14336) ack 57346 win 0
+   17:38:28.721863 B > A: . 57346:57346(0) ack 116737 win 36864
+
+   References
+      None known.
+
+   How to detect
+      Initiate a connection from a client to a server.  Have the server
+      continuously send data until its buffers have been full for long
+      enough to exhaust the window.  Next, have the client read 1 byte
+      and then delay for long enough that the server TCP sends a window
+      probe.  Now have the client start sending data.  At this point, if
+      it ignores the server's ACKs, then the client's TCP suffers from
+      the problem.
+
+   How to fix
+      In one implementation known to exhibit the problem (derived from
+      4.3-Reno), the problem was introduced when the macro MAX() was
+      replaced by the function call max() for computing the amount of
+      space in the receive window:
+
+          tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt));
+
+      When data has been received into a window beyond what has been
+      advertised to the other side, rcv_nxt > rcv_adv, making this
+      negative.  It's clear from the (int) cast that this is intended,
+      but the unsigned max() function sign-extends so the negative
+      number is "larger".  The fix is to change max() to imax():
+
+          tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
+
+      4.3-Tahoe and before did not have this bug, since it used the
+      macro MAX() for this calculation.
+
+2.13.
+
+   Name of Problem
+      Stretch ACK violation
+
+
+
+Paxson, et. al.              Informational                     [Page 40]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   Classification
+      Congestion Control/Performance
+
+   Description
+      To improve efficiency (both computer and network) a data receiver
+      may refrain from sending an ACK for each incoming segment,
+      according to [RFC1122].  However, an ACK should not be delayed an
+      inordinate amount of time.  Specifically, ACKs SHOULD be sent for
+      every second full-sized segment that arrives.  If a second full-
+      sized segment does not arrive within a given timeout (of no more
+      than 0.5 seconds), an ACK should be transmitted, according to
+      [RFC1122].  A TCP receiver which does not generate an ACK for
+      every second full-sized segment exhibits a "Stretch ACK
+      Violation".
+
+   Significance
+      TCP receivers exhibiting this behavior will cause TCP senders to
+      generate burstier traffic, which can degrade performance in
+      congested environments.  In addition, generating fewer ACKs
+      increases the amount of time needed by the slow start algorithm to
+      open the congestion window to an appropriate point, which
+      diminishes performance in environments with large bandwidth-delay
+      products.  Finally, generating fewer ACKs may cause needless
+      retransmission timeouts in lossy environments, as it increases the
+      possibility that an entire window of ACKs is lost, forcing a
+      retransmission timeout.
+
+   Implications
+      When not in loss recovery, every ACK received by a TCP sender
+      triggers the transmission of new data segments.  The burst size is
+      determined by the number of previously unacknowledged segments
+      each ACK covers.  Therefore, a TCP receiver ack'ing more than 2
+      segments at a time causes the sending TCP to generate a larger
+      burst of traffic upon receipt of the ACK.  This large burst of
+      traffic can overwhelm an intervening gateway, leading to higher
+      drop rates for both the connection and other connections passing
+      through the congested gateway.
+
+      In addition, the TCP slow start algorithm increases the congestion
+      window by 1 segment for each ACK received.  Therefore, increasing
+      the ACK interval (thus decreasing the rate at which ACKs are
+      transmitted) increases the amount of time it takes slow start to
+      increase the congestion window to an appropriate operating point,
+      and the connection consequently suffers from reduced performance.
+      This is especially true for connections using large windows.
+
+   Relevant RFCs
+      RFC 1122 outlines delayed ACKs as a recommended mechanism.
+
+
+
+Paxson, et. al.              Informational                     [Page 41]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   Trace file demonstrating it
+      Trace file taken using tcpdump at host B, the data receiver (and
+      ACK originator).  The advertised window (which never changed) and
+      timestamp options have been omitted for clarity, except for the
+      first packet sent by A:
+
+   12:09:24.820187 A.1174 > B.3999: . 2049:3497(1448) ack 1
+       win 33580 <nop,nop,timestamp 2249877 2249914> [tos 0x8]
+   12:09:24.824147 A.1174 > B.3999: . 3497:4945(1448) ack 1
+   12:09:24.832034 A.1174 > B.3999: . 4945:6393(1448) ack 1
+   12:09:24.832222 B.3999 > A.1174: . ack 6393
+   12:09:24.934837 A.1174 > B.3999: . 6393:7841(1448) ack 1
+   12:09:24.942721 A.1174 > B.3999: . 7841:9289(1448) ack 1
+   12:09:24.950605 A.1174 > B.3999: . 9289:10737(1448) ack 1
+   12:09:24.950797 B.3999 > A.1174: . ack 10737
+   12:09:24.958488 A.1174 > B.3999: . 10737:12185(1448) ack 1
+   12:09:25.052330 A.1174 > B.3999: . 12185:13633(1448) ack 1
+   12:09:25.060216 A.1174 > B.3999: . 13633:15081(1448) ack 1
+   12:09:25.060405 B.3999 > A.1174: . ack 15081
+
+      This portion of the trace clearly shows that the receiver (host B)
+      sends an ACK for every third full sized packet received.  Further
+      investigation of this implementation found that the cause of the
+      increased ACK interval was the TCP options being used.  The
+      implementation sent an ACK after it was holding 2*MSS worth of
+      unacknowledged data.  In the above case, the MSS is 1460 bytes so
+      the receiver transmits an ACK after it is holding at least 2920
+      bytes of unacknowledged data.  However, the length of the TCP
+      options being used [RFC1323] took 12 bytes away from the data
+      portion of each packet.  This produced packets containing 1448
+      bytes of data.  But the additional bytes used by the options in
+      the header were not taken into account when determining when to
+      trigger an ACK.  Therefore, it took 3 data segments before the
+      data receiver was holding enough unacknowledged data (>= 2*MSS, or
+      2920 bytes in the above example) to transmit an ACK.
+
+   Trace file demonstrating correct behavior
+      Trace file taken using tcpdump at host B, the data receiver (and
+      ACK originator), again with window and timestamp information
+      omitted except for the first packet:
+
+   12:06:53.627320 A.1172 > B.3999: . 1449:2897(1448) ack 1
+       win 33580 <nop,nop,timestamp 2249575 2249612> [tos 0x8]
+   12:06:53.634773 A.1172 > B.3999: . 2897:4345(1448) ack 1
+   12:06:53.634961 B.3999 > A.1172: . ack 4345
+   12:06:53.737326 A.1172 > B.3999: . 4345:5793(1448) ack 1
+   12:06:53.744401 A.1172 > B.3999: . 5793:7241(1448) ack 1
+   12:06:53.744592 B.3999 > A.1172: . ack 7241
+
+
+
+Paxson, et. al.              Informational                     [Page 42]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   12:06:53.752287 A.1172 > B.3999: . 7241:8689(1448) ack 1
+   12:06:53.847332 A.1172 > B.3999: . 8689:10137(1448) ack 1
+   12:06:53.847525 B.3999 > A.1172: . ack 10137
+
+      This trace shows the TCP receiver (host B) ack'ing every second
+      full-sized packet, according to [RFC1122].  This is the same
+      implementation shown above, with slight modifications that allow
+      the receiver to take the length of the options into account when
+      deciding when to transmit an ACK.
+
+   References
+      This problem is documented in [Allman97] and [Paxson97].
+
+   How to detect
+      Stretch ACK violations show up immediately in receiver-side packet
+      traces of bulk transfers, as shown above.  However, packet traces
+      made on the sender side of the TCP connection may lead to
+      ambiguities when diagnosing this problem due to the possibility of
+      lost ACKs.
+
+2.14.
+
+   Name of Problem
+      Retransmission sends multiple packets
+
+   Classification
+      Congestion control
+
+   Description
+      When a TCP retransmits a segment due to a timeout expiration or
+      beginning a fast retransmission sequence, it should only transmit
+      a single segment.  A TCP that transmits more than one segment
+      exhibits "Retransmission Sends Multiple Packets".
+
+      Instances of this problem have been known to occur due to
+      miscomputations involving the use of TCP options.  TCP options
+      increase the TCP header beyond its usual size of 20 bytes.  The
+      total size of header must be taken into account when
+      retransmitting a packet.  If a TCP sender does not account for the
+      length of the TCP options when determining how much data to
+      retransmit, it will send too much data to fit into a single
+      packet.  In this case, the correct retransmission will be followed
+      by a short segment (tinygram) containing data that may not need to
+      be retransmitted.
+
+      A specific case is a TCP using the RFC 1323 timestamp option,
+      which adds 12 bytes to the standard 20-byte TCP header.  On
+      retransmission of a packet, the 12 byte option is incorrectly
+
+
+
+Paxson, et. al.              Informational                     [Page 43]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+      interpreted as part of the data portion of the segment.  A
+      standard TCP header and a new 12-byte option is added to the data,
+      which yields a transmission of 12 bytes more data than contained
+      in the original segment.  This overflow causes a smaller packet,
+      with 12 data bytes, to be transmitted.
+
+   Significance
+      This problem is somewhat serious for congested environments
+      because the TCP implementation injects more packets into the
+      network than is appropriate.  However, since a tinygram is only
+      sent in response to a fast retransmit or a timeout, it does not
+      effect the sustained sending rate.
+
+   Implications
+      A TCP exhibiting this behavior is stressing the network with more
+      traffic than appropriate, and stressing routers by increasing the
+      number of packets they must process.  The redundant tinygram will
+      also elicit a duplicate ACK from the receiver, resulting in yet
+      another unnecessary transmission.
+
+   Relevant RFCs
+      RFC 1122 requires use of slow start after loss; RFC 2001
+      explicates slow start; RFC 1323 describes the timestamp option
+      that has been observed to lead to some implementations exhibiting
+      this problem.
+
+   Trace file demonstrating it
+      Made using tcpdump recording at a machine on the same subnet as
+      Host A.  Host A is the sender and Host B is the receiver.  The
+      advertised window and timestamp options have been omitted for
+      clarity, except for the first segment sent by host A.  In
+      addition, portions of the trace file not pertaining to the packet
+      in question have been removed (missing packets are denoted by
+      "[...]" in the trace).
+
+   11:55:22.701668 A > B: . 7361:7821(460) ack 1
+       win 49324 <nop,nop,timestamp 3485348 3485113>
+   11:55:22.702109 A > B: . 7821:8281(460) ack 1
+   [...]
+
+   11:55:23.112405 B > A: . ack 7821
+   11:55:23.113069 A > B: . 12421:12881(460) ack 1
+   11:55:23.113511 A > B: . 12881:13341(460) ack 1
+   11:55:23.333077 B > A: . ack 7821
+   11:55:23.336860 B > A: . ack 7821
+   11:55:23.340638 B > A: . ack 7821
+   11:55:23.341290 A > B: . 7821:8281(460) ack 1
+   11:55:23.341317 A > B: . 8281:8293(12) ack 1
+
+
+
+Paxson, et. al.              Informational                     [Page 44]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   11:55:23.498242 B > A: . ack 7821
+   11:55:23.506850 B > A: . ack 7821
+   11:55:23.510630 B > A: . ack 7821
+
+   [...]
+
+   11:55:23.746649 B > A: . ack 10581
+
+      The second line of the above trace shows the original transmission
+      of a segment which is later dropped.  After 3 duplicate ACKs, line
+      9 of the trace shows the dropped packet (7821:8281), with a 460-
+      byte payload, being retransmitted.  Immediately following this
+      retransmission, a packet with a 12-byte payload is unnecessarily
+      sent.
+
+   Trace file demonstrating correct behavior
+      The trace file would be identical to the one above, with a single
+      line:
+
+      11:55:23.341317 A > B: . 8281:8293(12) ack 1
+
+      omitted.
+
+   References
+      [Brakmo95]
+
+   How to detect
+      This problem can be detected by examining a packet trace of the
+      TCP connections of a machine using TCP options, during which a
+      packet is retransmitted.
+
+2.15.
+
+   Name of Problem
+      Failure to send FIN notification promptly
+
+   Classification
+      Performance
+
+   Description
+      When an application closes a connection, the corresponding TCP
+      should send the FIN notification promptly to its peer (unless
+      prevented by the congestion window).  If a TCP implementation
+      delays in sending the FIN notification, for example due to waiting
+      until unacknowledged data has been acknowledged, then it is said
+      to exhibit "Failure to send FIN notification promptly".
+
+
+
+
+
+Paxson, et. al.              Informational                     [Page 45]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+      Also, while not strictly required, FIN segments should include the
+      PSH flag to ensure expedited delivery of any pending data at the
+      receiver.
+
+   Significance
+      The greatest impact occurs for short-lived connections, since for
+      these the additional time required to close the connection
+      introduces the greatest relative delay.
+
+      The additional time can be significant in the common case of the
+      sender waiting for an ACK that is delayed by the receiver.
+
+   Implications
+      Can diminish total throughput as seen at the application layer,
+      because connection termination takes longer to complete.
+
+   Relevant RFCs
+      RFC 793 indicates that a receiver should treat an incoming FIN
+      flag as implying the push function.
+
+   Trace file demonstrating it
+      Made using tcpdump (no losses reported by the packet filter).
+
+   10:04:38.68 A > B: S 1031850376:1031850376(0) win 4096
+                   <mss 1460,wscale 0,eol> (DF)
+   10:04:38.71 B > A: S 596916473:596916473(0) ack 1031850377
+                   win 8760 <mss 1460> (DF)
+   10:04:38.73 A > B: . ack 1 win 4096 (DF)
+   10:04:41.98 A > B: P 1:4(3) ack 1 win 4096 (DF)
+   10:04:42.15 B > A: . ack 4 win 8757 (DF)
+   10:04:42.23 A > B: P 4:7(3) ack 1 win 4096 (DF)
+   10:04:42.25 B > A: P 1:11(10) ack 7 win 8754 (DF)
+   10:04:42.32 A > B: . ack 11 win 4096 (DF)
+   10:04:42.33 B > A: P 11:51(40) ack 7 win 8754 (DF)
+   10:04:42.51 A > B: . ack 51 win 4096 (DF)
+   10:04:42.53 B > A: F 51:51(0) ack 7 win 8754 (DF)
+   10:04:42.56 A > B: FP 7:7(0) ack 52 win 4096 (DF)
+   10:04:42.58 B > A: . ack 8 win 8754 (DF)
+
+      Machine B in the trace above does not send out a FIN notification
+      promptly if there is any data outstanding.  It instead waits for
+      all unacknowledged data to be acknowledged before sending the FIN
+      segment.  The connection was closed at 10:04.42.33 after
+      requesting 40 bytes to be sent.  However, the FIN notification
+      isn't sent until 10:04.42.51, after the (delayed) acknowledgement
+      of the 40 bytes of data.
+
+
+
+
+
+Paxson, et. al.              Informational                     [Page 46]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   Trace file demonstrating correct behavior
+      Made using tcpdump (no losses reported by the packet filter).
+
+   10:27:53.85 C > D: S 419744533:419744533(0) win 4096
+                   <mss 1460,wscale 0,eol> (DF)
+   10:27:53.92 D > C: S 10082297:10082297(0) ack 419744534
+                   win 8760 <mss 1460> (DF)
+   10:27:53.95 C > D: . ack 1 win 4096 (DF)
+   10:27:54.42 C > D: P 1:4(3) ack 1 win 4096 (DF)
+   10:27:54.62 D > C: . ack 4 win 8757 (DF)
+   10:27:54.76 C > D: P 4:7(3) ack 1 win 4096 (DF)
+   10:27:54.89 D > C: P 1:11(10) ack 7 win 8754 (DF)
+   10:27:54.90 D > C: FP 11:51(40) ack7 win 8754 (DF)
+   10:27:54.92 C > D: . ack 52 win 4096 (DF)
+   10:27:55.01 C > D: FP 7:7(0) ack 52 win 4096 (DF)
+   10:27:55.09 D > C: . ack 8 win 8754 (DF)
+
+      Here, Machine D sends a FIN with 40 bytes of data even before the
+      original 10 octets have been acknowledged. This is correct
+      behavior as it provides for the highest performance.
+
+   References
+      This problem is documented in [Dawson97].
+
+   How to detect
+      For implementations manifesting this problem, it shows up on a
+      packet trace.
+
+2.16.
+
+   Name of Problem
+      Failure to send a RST after Half Duplex Close
+
+   Classification
+      Resource management
+
+   Description
+      RFC 1122 4.2.2.13 states that a TCP SHOULD send a RST if data is
+      received after "half duplex close", i.e. if it cannot be delivered
+      to the application.  A TCP that fails to do so is said to exhibit
+      "Failure to send a RST after Half Duplex Close".
+
+   Significance
+      Potentially serious for TCP endpoints that manage large numbers of
+      connections, due to exhaustion of memory and/or process slots
+      available for managing connection state.
+
+
+
+
+
+Paxson, et. al.              Informational                     [Page 47]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   Implications
+      Failure to send the RST can lead to permanently hung TCP
+      connections.  This problem has been demonstrated when HTTP clients
+      abort connections, common when users move on to a new page before
+      the current page has finished downloading.  The HTTP client closes
+      by transmitting a FIN while the server is transmitting images,
+      text, etc.  The server TCP receives the FIN,  but its application
+      does not close the connection until all data has been queued for
+      transmission.  Since the server will not transmit a FIN until all
+      the preceding data has been transmitted, deadlock results if the
+      client TCP does not consume the pending data or tear down the
+      connection: the window decreases to zero, since the client cannot
+      pass the data to the application, and the server sends probe
+      segments.  The client acknowledges the probe segments with a zero
+      window. As mandated in RFC1122 4.2.2.17, the probe segments are
+      transmitted forever.  Server connection state remains in
+      CLOSE_WAIT, and eventually server processes are exhausted.
+
+      Note that there are two bugs.  First, probe segments should be
+      ignored if the window can never subsequently increase.  Second, a
+      RST should be sent when data is received after half duplex close.
+      Fixing the first bug, but not the second, results in the probe
+      segments eventually timing out the connection, but the server
+      remains in CLOSE_WAIT for a significant and unnecessary period.
+
+   Relevant RFCs
+      RFC 1122 sections 4.2.2.13 and 4.2.2.17.
+
+   Trace file demonstrating it
+      Made using an unknown network analyzer.  No drop information
+      available.
+
+   client.1391 > server.8080: S 0:1(0) ack: 0 win: 2000 <mss: 5b4>
+   server.8080 > client.1391: SA 8c01:8c02(0) ack: 1 win: 8000 <mss:100>
+   client.1391 > server.8080: PA
+   client.1391 > server.8080: PA 1:1c2(1c1) ack: 8c02 win: 2000
+   server.8080 > client.1391: [DF] PA 8c02:8cde(dc) ack: 1c2 win: 8000
+   server.8080 > client.1391: [DF] A 8cde:9292(5b4) ack: 1c2 win: 8000
+   server.8080 > client.1391: [DF] A 9292:9846(5b4) ack: 1c2 win: 8000
+   server.8080 > client.1391: [DF] A 9846:9dfa(5b4) ack: 1c2 win: 8000
+   client.1391 > server.8080: PA
+   server.8080 > client.1391: [DF] A 9dfa:a3ae(5b4) ack: 1c2 win: 8000
+   server.8080 > client.1391: [DF] A a3ae:a962(5b4) ack: 1c2 win: 8000
+   server.8080 > client.1391: [DF] A a962:af16(5b4) ack: 1c2 win: 8000
+   server.8080 > client.1391: [DF] A af16:b4ca(5b4) ack: 1c2 win: 8000
+   client.1391 > server.8080: PA
+   server.8080 > client.1391: [DF] A b4ca:ba7e(5b4) ack: 1c2 win: 8000
+   server.8080 > client.1391: [DF] A b4ca:ba7e(5b4) ack: 1c2 win: 8000
+
+
+
+Paxson, et. al.              Informational                     [Page 48]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   client.1391 > server.8080: PA
+   server.8080 > client.1391: [DF] A ba7e:bdfa(37c) ack: 1c2 win: 8000
+   client.1391 > server.8080: PA
+   server.8080 > client.1391: [DF] A bdfa:bdfb(1) ack: 1c2 win: 8000
+   client.1391 > server.8080: PA
+
+   [ HTTP client aborts and enters FIN_WAIT_1 ]
+
+   client.1391 > server.8080: FPA
+
+   [ server ACKs the FIN and enters CLOSE_WAIT ]
+
+   server.8080 > client.1391: [DF] A
+
+   [ client enters FIN_WAIT_2 ]
+
+   server.8080 > client.1391: [DF] A bdfa:bdfb(1) ack: 1c3 win: 8000
+
+   [ server continues to try to send its data ]
+
+   client.1391 > server.8080: PA < window = 0 >
+   server.8080 > client.1391: [DF] A bdfa:bdfb(1) ack: 1c3 win: 8000
+   client.1391 > server.8080: PA < window = 0 >
+   server.8080 > client.1391: [DF] A bdfa:bdfb(1) ack: 1c3 win: 8000
+   client.1391 > server.8080: PA < window = 0 >
+   server.8080 > client.1391: [DF] A bdfa:bdfb(1) ack: 1c3 win: 8000
+   client.1391 > server.8080: PA < window = 0 >
+   server.8080 > client.1391: [DF] A bdfa:bdfb(1) ack: 1c3 win: 8000
+   client.1391 > server.8080: PA < window = 0 >
+
+   [ ... repeat ad exhaustium ... ]
+
+   Trace file demonstrating correct behavior
+      Made using an unknown network analyzer.  No drop information
+      available.
+
+   client > server D=80 S=59500 Syn Seq=337 Len=0 Win=8760
+   server > client D=59500 S=80 Syn Ack=338 Seq=80153 Len=0 Win=8760
+   client > server D=80 S=59500 Ack=80154 Seq=338 Len=0 Win=8760
+
+   [ ... normal data omitted ... ]
+
+   client > server D=80 S=59500 Ack=14559 Seq=596 Len=0 Win=8760
+   server > client D=59500 S=80 Ack=596 Seq=114559 Len=1460 Win=8760
+
+   [ client closes connection ]
+
+   client > server D=80 S=59500 Fin Seq=596 Len=0 Win=8760
+
+
+
+Paxson, et. al.              Informational                     [Page 49]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   server > client D=59500 S=80 Ack=597 Seq=116019 Len=1460 Win=8760
+
+   [ client sends RST (RFC1122 4.2.2.13) ]
+
+   client > server D=80 S=59500 Rst Seq=597 Len=0 Win=0
+   server > client D=59500 S=80 Ack=597 Seq=117479 Len=1460 Win=8760
+   client > server D=80 S=59500 Rst Seq=597 Len=0 Win=0
+   server > client D=59500 S=80 Ack=597 Seq=118939 Len=1460 Win=8760
+   client > server D=80 S=59500 Rst Seq=597 Len=0 Win=0
+   server > client D=59500 S=80 Ack=597 Seq=120399 Len=892 Win=8760
+   client > server D=80 S=59500 Rst Seq=597 Len=0 Win=0
+   server > client D=59500 S=80 Ack=597 Seq=121291 Len=1460 Win=8760
+   client > server D=80 S=59500 Rst Seq=597 Len=0 Win=0
+
+      "client" sends a number of RSTs, one in response to each incoming
+      packet from "server".  One might wonder why "server" keeps sending
+      data packets after it has received a RST from "client"; the
+      explanation is that "server" had already transmitted all five of
+      the data packets before receiving the first RST from "client", so
+      it is too late to avoid transmitting them.
+
+   How to detect
+      The problem can be detected by inspecting packet traces of a
+      large, interrupted bulk transfer.
+
+2.17.
+
+   Name of Problem
+      Failure to RST on close with data pending
+
+   Classification
+      Resource management
+
+   Description
+      When an application closes a connection in such a way that it can
+      no longer read any received data, the TCP SHOULD, per section
+      4.2.2.13 of RFC 1122, send a RST if there is any unread received
+      data, or if any new data is received. A TCP that fails to do so
+      exhibits "Failure to RST on close with data pending".
+
+      Note that, for some TCPs, this situation can be caused by an
+      application "crashing" while a peer is sending data.
+
+      We have observed a number of TCPs that exhibit this problem.  The
+      problem is less serious if any subsequent data sent to the now-
+      closed connection endpoint elicits a RST (see illustration below).
+
+
+
+
+
+Paxson, et. al.              Informational                     [Page 50]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   Significance
+      This problem is most significant for endpoints that engage in
+      large numbers of connections, as their ability to do so will be
+      curtailed as they leak away resources.
+
+   Implications
+      Failure to reset the connection can lead to permanently hung
+      connections, in which the remote endpoint takes no further action
+      to tear down the connection because it is waiting on the local TCP
+      to first take some action.  This is particularly the case if the
+      local TCP also allows the advertised window to go to zero, and
+      fails to tear down the connection when the remote TCP engages in
+      "persist" probes (see example below).
+
+   Relevant RFCs
+      RFC 1122 section 4.2.2.13.  Also, 4.2.2.17 for the zero-window
+      probing discussion below.
+
+   Trace file demonstrating it
+      Made using tcpdump.  No drop information available.
+
+   13:11:46.04 A > B: S 458659166:458659166(0) win 4096
+                       <mss 1460,wscale 0,eol> (DF)
+   13:11:46.04 B > A: S 792320000:792320000(0) ack 458659167
+                       win 4096
+   13:11:46.04 A > B: . ack 1 win 4096 (DF)
+   13:11.55.80 A > B: . 1:513(512) ack 1 win 4096 (DF)
+   13:11.55.80 A > B: . 513:1025(512) ack 1 win 4096 (DF)
+   13:11:55.83 B > A: . ack 1025 win 3072
+   13:11.55.84 A > B: . 1025:1537(512) ack 1 win 4096 (DF)
+   13:11.55.84 A > B: . 1537:2049(512) ack 1 win 4096 (DF)
+   13:11.55.85 A > B: . 2049:2561(512) ack 1 win 4096 (DF)
+   13:11:56.03 B > A: . ack 2561 win 1536
+   13:11.56.05 A > B: . 2561:3073(512) ack 1 win 4096 (DF)
+   13:11.56.06 A > B: . 3073:3585(512) ack 1 win 4096 (DF)
+   13:11.56.06 A > B: . 3585:4097(512) ack 1 win 4096 (DF)
+   13:11:56.23 B > A: . ack 4097 win 0
+   13:11:58.16 A > B: . 4096:4097(1) ack 1 win 4096 (DF)
+   13:11:58.16 B > A: . ack 4097 win 0
+   13:12:00.16 A > B: . 4096:4097(1) ack 1 win 4096 (DF)
+   13:12:00.16 B > A: . ack 4097 win 0
+   13:12:02.16 A > B: . 4096:4097(1) ack 1 win 4096 (DF)
+   13:12:02.16 B > A: . ack 4097 win 0
+   13:12:05.37 A > B: . 4096:4097(1) ack 1 win 4096 (DF)
+   13:12:05.37 B > A: . ack 4097 win 0
+   13:12:06.36 B > A: F 1:1(0) ack 4097 win 0
+   13:12:06.37 A > B: . ack 2 win 4096 (DF)
+   13:12:11.78 A > B: . 4096:4097(1) ack 2 win 4096 (DF)
+
+
+
+Paxson, et. al.              Informational                     [Page 51]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   13:12:11.78 B > A: . ack 4097 win 0
+   13:12:24.59 A > B: . 4096:4097(1) ack 2 win 4096 (DF)
+   13:12:24.60 B > A: . ack 4097 win 0
+   13:12:50.22 A > B: . 4096:4097(1) ack 2 win 4096 (DF)
+   13:12:50.22 B > A: . ack 4097 win 0
+
+      Machine B in the trace above does not drop received data when the
+      socket is "closed" by the application (in this case, the
+      application process was terminated). This occurred at
+      approximately 13:12:06.36 and resulted in the FIN being sent in
+      response to the close. However, because there is no longer an
+      application to deliver the data to, the TCP should have instead
+      sent a RST.
+
+      Note: Machine A's zero-window probing is also broken.  It is
+      resending old data, rather than new data. Section 3.7 in RFC 793
+      and Section 4.2.2.17 in RFC 1122 discuss zero-window probing.
+
+   Trace file demonstrating better behavior
+      Made using tcpdump.  No drop information available.
+
+      Better, but still not fully correct, behavior, per the discussion
+      below.  We show this behavior because it has been observed for a
+      number of different TCP implementations.
+
+   13:48:29.24 C > D: S 73445554:73445554(0) win 4096
+                       <mss 1460,wscale 0,eol> (DF)
+   13:48:29.24 D > C: S 36050296:36050296(0) ack 73445555
+                       win 4096 <mss 1460,wscale 0,eol> (DF)
+   13:48:29.25 C > D: . ack 1 win 4096 (DF)
+   13:48:30.78 C > D: . 1:1461(1460) ack 1 win 4096 (DF)
+   13:48:30.79 C > D: . 1461:2921(1460) ack 1 win 4096 (DF)
+   13:48:30.80 D > C: . ack 2921 win 1176 (DF)
+   13:48:32.75 C > D: . 2921:4097(1176) ack 1 win 4096 (DF)
+   13:48:32.82 D > C: . ack 4097 win 0 (DF)
+   13:48:34.76 C > D: . 4096:4097(1) ack 1 win 4096 (DF)
+   13:48:34.84 D > C: . ack 4097 win 0 (DF)
+   13:48:36.34 D > C: FP 1:1(0) ack 4097 win 4096 (DF)
+   13:48:36.34 C > D: . 4097:5557(1460) ack 2 win 4096 (DF)
+   13:48:36.34 D > C: R 36050298:36050298(0) win 24576
+   13:48:36.34 C > D: . 5557:7017(1460) ack 2 win 4096 (DF)
+   13:48:36.34 D > C: R 36050298:36050298(0) win 24576
+
+      In this trace, the application process is terminated on Machine D
+      at approximately 13:48:36.34.  Its TCP sends the FIN with the
+      window opened again (since it discarded the previously received
+      data).  Machine C promptly sends more data, causing Machine D to
+
+
+
+
+Paxson, et. al.              Informational                     [Page 52]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+      reset the connection since it cannot deliver the data to the
+      application. Ideally, Machine D SHOULD send a RST instead of
+      dropping the data and re-opening the receive window.
+
+      Note: Machine C's zero-window probing is broken, the same as in
+      the example above.
+
+   Trace file demonstrating correct behavior
+      Made using tcpdump.  No losses reported by the packet filter.
+
+   14:12:02.19 E > F: S 1143360000:1143360000(0) win 4096
+   14:12:02.19 F > E: S 1002988443:1002988443(0) ack 1143360001
+                       win 4096 <mss 1460> (DF)
+   14:12:02.19 E > F: . ack 1 win 4096
+   14:12:10.43 E > F: . 1:513(512) ack 1 win 4096
+   14:12:10.61 F > E: . ack 513 win 3584 (DF)
+   14:12:10.61 E > F: . 513:1025(512) ack 1 win 4096
+   14:12:10.61 E > F: . 1025:1537(512) ack 1 win 4096
+   14:12:10.81 F > E: . ack 1537 win 2560 (DF)
+   14:12:10.81 E > F: . 1537:2049(512) ack 1 win 4096
+   14:12:10.81 E > F: . 2049:2561(512) ack 1 win 4096
+   14:12:10.81 E > F: . 2561:3073(512) ack 1 win 4096
+   14:12:11.01 F > E: . ack 3073 win 1024 (DF)
+   14:12:11.01 E > F: . 3073:3585(512) ack 1 win 4096
+   14:12:11.01 E > F: . 3585:4097(512) ack 1 win 4096
+   14:12:11.21 F > E: . ack 4097 win 0 (DF)
+   14:12:15.88 E > F: . 4097:4098(1) ack 1 win 4096
+   14:12:16.06 F > E: . ack 4097 win 0 (DF)
+   14:12:20.88 E > F: . 4097:4098(1) ack 1 win 4096
+   14:12:20.91 F > E: . ack 4097 win 0 (DF)
+   14:12:21.94 F > E: R 1002988444:1002988444(0) win 4096
+
+      When the application terminates at 14:12:21.94, F immediately
+      sends a RST.
+
+      Note: Machine E's zero-window probing is (finally) correct.
+
+   How to detect
+      The problem can often be detected by inspecting packet traces of a
+      transfer in which the receiving application terminates abnormally.
+      When doing so, there can be an ambiguity (if only looking at the
+      trace) as to whether the receiving TCP did indeed have unread data
+      that it could now no longer deliver.  To provoke this to happen,
+      it may help to suspend the receiving application so that it fails
+      to consume any data, eventually exhausting the advertised window.
+      At this point, since the advertised window is zero, we know that
+
+
+
+
+
+Paxson, et. al.              Informational                     [Page 53]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+      the receiving TCP has undelivered data buffered up.  Terminating
+      the application process then should suffice to test the
+      correctness of the TCP's behavior.
+
+2.18.
+
+   Name of Problem
+      Options missing from TCP MSS calculation
+
+   Classification
+      Reliability / performance
+
+   Description
+      When a TCP determines how much data to send per packet, it
+      calculates a segment size based on the MTU of the path.  It must
+      then subtract from that MTU the size of the IP and TCP headers in
+      the packet.  If IP options and TCP options are not taken into
+      account correctly in this calculation, the resulting segment size
+      may be too large.  TCPs that do so are said to exhibit "Options
+      missing from TCP MSS calculation".
+
+   Significance
+      In some implementations, this causes the transmission of strangely
+      fragmented packets.  In some implementations with Path MTU (PMTU)
+      discovery [RFC1191], this problem can actually result in a total
+      failure to transmit any data at all, regardless of the environment
+      (see below).
+
+      Arguably, especially since the wide deployment of firewalls, IP
+      options appear only rarely in normal operations.
+
+   Implications
+      In implementations using PMTU discovery, this problem can result
+      in packets that are too large for the output interface, and that
+      have the DF (don't fragment) bit set in the IP header.  Thus, the
+      IP layer on the local machine is not allowed to fragment the
+      packet to send it out the interface.  It instead informs the TCP
+      layer of the correct MTU size of the interface; the TCP layer
+      again miscomputes the MSS by failing to take into account the size
+      of IP options; and the problem repeats, with no data flowing.
+
+   Relevant RFCs
+      RFC 1122 describes the calculation of the effective send MSS.  RFC
+      1191 describes Path MTU discovery.
+
+
+
+
+
+
+
+Paxson, et. al.              Informational                     [Page 54]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   Trace file demonstrating it
+      Trace file taking using tcpdump on host C.  The first trace
+      demonstrates the fragmentation that occurs without path MTU
+      discovery:
+
+   13:55:25.488728 A.65528 > C.discard:
+           P 567833:569273(1440) ack 1 win 17520
+           <nop,nop,timestamp 3839 1026342>
+           (frag 20828:1472@0+)
+           (ttl 62, optlen=8 LSRR{B#} NOP)
+
+   13:55:25.488943 A > C:
+           (frag 20828:8@1472)
+           (ttl 62, optlen=8 LSRR{B#} NOP)
+
+   13:55:25.489052 C.discard > A.65528:
+           . ack 566385 win 60816
+           <nop,nop,timestamp 1026345 3839> (DF)
+           (ttl 60, id 41266)
+
+      Host A repeatedly sends 1440-octet data segments, but these hare
+      fragmented into two packets, one with 1432 octets of data, and
+      another with 8 octets of data.
+
+      The second trace demonstrates the failure to send any data
+      segments, sometimes seen with hosts doing path MTU discovery:
+
+   13:55:44.332219 A.65527 > C.discard:
+           S 1018235390:1018235390(0) win 16384
+           <mss 1460,nop,wscale 0,nop,nop,timestamp 3876 0> (DF)
+           (ttl 62, id 20912, optlen=8 LSRR{B#} NOP)
+
+   13:55:44.333015 C.discard > A.65527:
+           S 1271629000:1271629000(0) ack 1018235391 win 60816
+           <mss 1460,nop,wscale 0,nop,nop,timestamp 1026383 3876> (DF)
+           (ttl 60, id 41427)
+
+   13:55:44.333206 C.discard > A.65527:
+           S 1271629000:1271629000(0) ack 1018235391 win 60816
+           <mss 1460,nop,wscale 0,nop,nop,timestamp 1026383 3876> (DF)
+           (ttl 60, id 41427)
+
+      This is all of the activity seen on this connection.  Eventually
+      host C will time out attempting to establish the connection.
+
+   How to detect
+      The "netcat" utility [Hobbit96] is useful for generating source
+      routed packets:
+
+
+
+Paxson, et. al.              Informational                     [Page 55]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+      1% nc C discard
+      (interactive typing)
+      ^C
+      2% nc C discard < /dev/zero
+      ^C
+      3% nc -g B C discard
+      (interactive typing)
+      ^C
+      4% nc -g B C discard < /dev/zero
+      ^C
+
+      Lines 1 through 3 should generate appropriate packets, which can
+      be verified using tcpdump.  If the problem is present, line 4
+      should generate one of the two kinds of packet traces shown.
+
+   How to fix
+      The implementation should ensure that the effective send MSS
+      calculation includes a term for the IP and TCP options, as
+      mandated by RFC 1122.
+
+3. Security Considerations
+
+   This memo does not discuss any specific security-related TCP
+   implementation problems, as the working group decided to pursue
+   documenting those in a separate document.  Some of the implementation
+   problems discussed here, however, can be used for denial-of-service
+   attacks.  Those classified as congestion control present
+   opportunities to subvert TCPs used for legitimate data transfer into
+   excessively loading network elements.  Those classified as
+   "performance", "reliability" and "resource management" may be
+   exploitable for launching surreptitious denial-of-service attacks
+   against the user of the TCP.  Both of these types of attacks can be
+   extremely difficult to detect because in most respects they look
+   identical to legitimate network traffic.
+
+4. Acknowledgements
+
+   Thanks to numerous correspondents on the tcp-impl mailing list for
+   their input:  Steve Alexander, Larry Backman, Jerry Chu, Alan Cox,
+   Kevin Fall, Richard Fox, Jim Gettys, Rick Jones, Allison Mankin, Neal
+   McBurnett, Perry Metzger, der Mouse, Thomas Narten, Andras Olah,
+   Steve Parker, Francesco Potorti`, Luigi Rizzo, Allyn Romanow, Al
+   Smith, Jerry Toporek, Joe Touch, and Curtis Villamizar.
+
+   Thanks also to Josh Cohen for the traces documenting the "Failure to
+   send a RST after Half Duplex Close" problem; and to John Polstra, who
+   analyzed the "Window probe deadlock" problem.
+
+
+
+
+Paxson, et. al.              Informational                     [Page 56]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+5. References
+
+   [Allman97]   M. Allman, "Fixing Two BSD TCP Bugs," Technical Report
+                CR-204151, NASA Lewis Research Center, Oct. 1997.
+                http://roland.grc.nasa.gov/~mallman/papers/bug.ps
+
+   [RFC2414]    Allman, M., Floyd, S. and C. Partridge, "Increasing
+                TCP's Initial Window", RFC 2414, September 1998.
+
+   [RFC1122]    Braden, R., Editor, "Requirements for Internet Hosts --
+                Communication Layers", STD 3, RFC 1122, October 1989.
+
+   [RFC2119]    Bradner, S., "Key words for use in RFCs to Indicate
+                Requirement Levels", BCP 14, RFC 2119, March 1997.
+
+   [Brakmo95]   L. Brakmo and L. Peterson, "Performance Problems in
+                BSD4.4 TCP," ACM Computer Communication Review,
+                25(5):69-86, 1995.
+
+   [RFC813]     Clark, D., "Window and Acknowledgement Strategy in TCP,"
+                RFC 813, July 1982.
+
+   [Dawson97]   S. Dawson, F. Jahanian, and T. Mitton, "Experiments on
+                Six Commercial TCP Implementations Using a Software
+                Fault Injection Tool," to appear in Software Practice &
+                Experience, 1997.  A technical report version of this
+                paper can be obtained at
+                ftp://rtcl.eecs.umich.edu/outgoing/sdawson/CSE-TR-298-
+                96.ps.gz.
+
+   [Fall96]     K. Fall and S. Floyd, "Simulation-based Comparisons of
+                Tahoe, Reno, and SACK TCP," ACM Computer Communication
+                Review, 26(3):5-21, 1996.
+
+   [Hobbit96]   Hobbit, Avian Research, netcat, available via anonymous
+                ftp to ftp.avian.org, 1996.
+
+   [Hoe96]      J. Hoe, "Improving the Start-up Behavior of a Congestion
+                Control Scheme for TCP," Proc. SIGCOMM '96.
+
+   [Jacobson88] V. Jacobson, "Congestion Avoidance and Control," Proc.
+                SIGCOMM '88.  ftp://ftp.ee.lbl.gov/papers/congavoid.ps.Z
+
+   [Jacobson89] V. Jacobson, C. Leres, and S. McCanne, tcpdump,
+                available via anonymous ftp to ftp.ee.lbl.gov, Jun.
+                1989.
+
+
+
+
+
+Paxson, et. al.              Informational                     [Page 57]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   [RFC2018]    Mathis, M., Mahdavi, J., Floyd, S. and A. Romanow, "TCP
+                Selective Acknowledgement Options", RFC 2018, October
+                1996.
+
+   [RFC1191]    Mogul, J. and S. Deering, "Path MTU discovery", RFC
+                1191, November 1990.
+
+   [RFC896]     Nagle, J., "Congestion Control in IP/TCP Internetworks",
+                RFC 896, January 1984.
+
+   [Paxson97]   V. Paxson, "Automated Packet Trace Analysis of TCP
+                Implementations," Proc. SIGCOMM '97, available from
+                ftp://ftp.ee.lbl.gov/papers/vp-tcpanaly-sigcomm97.ps.Z.
+
+   [RFC793]     Postel, J., Editor, "Transmission Control Protocol," STD
+                7, RFC 793, September 1981.
+
+   [RFC2001]    Stevens, W., "TCP Slow Start, Congestion Avoidance, Fast
+                Retransmit, and Fast Recovery Algorithms", RFC 2001,
+                January 1997.
+
+   [Stevens94]  W. Stevens, "TCP/IP Illustrated, Volume 1", Addison-
+                Wesley Publishing Company, Reading, Massachusetts, 1994.
+
+   [Wright95]   G. Wright and W. Stevens, "TCP/IP Illustrated, Volume
+                2", Addison-Wesley Publishing Company, Reading
+                Massachusetts, 1995.
+
+6. Authors' Addresses
+
+   Vern Paxson
+   ACIRI / ICSI
+   1947 Center Street
+   Suite 600
+   Berkeley, CA 94704-1198
+
+   Phone: +1 510/642-4274 x302
+   EMail: vern@aciri.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+Paxson, et. al.              Informational                     [Page 58]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   Mark Allman <mallman@grc.nasa.gov>
+   NASA Glenn Research Center/Sterling Software
+   Lewis Field
+   21000 Brookpark Road
+   MS 54-2
+   Cleveland, OH 44135
+   USA
+
+   Phone: +1 216/433-6586
+   Email: mallman@grc.nasa.gov
+
+   Scott Dawson
+   Real-Time Computing Laboratory
+   EECS Building
+   University of Michigan
+   Ann Arbor, MI  48109-2122
+   USA
+
+   Phone: +1 313/763-5363
+   EMail: sdawson@eecs.umich.edu
+
+
+   William C. Fenner
+   Xerox PARC
+   3333 Coyote Hill Road
+   Palo Alto, CA 94304
+   USA
+
+   Phone: +1 650/812-4816
+   EMail: fenner@parc.xerox.com
+
+
+   Jim Griner <jgriner@grc.nasa.gov>
+   NASA Glenn Research Center
+   Lewis Field
+   21000 Brookpark Road
+   MS 54-2
+   Cleveland, OH 44135
+   USA
+
+   Phone: +1 216/433-5787
+   EMail: jgriner@grc.nasa.gov
+
+
+
+
+
+
+
+
+
+Paxson, et. al.              Informational                     [Page 59]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+   Ian Heavens
+   Spider Software Ltd.
+   8 John's Place, Leith
+   Edinburgh EH6 7EL
+   UK
+
+   Phone: +44 131/475-7015
+   EMail: ian@spider.com
+
+   Kevin Lahey
+   NASA Ames Research Center/MRJ
+   MS 258-6
+   Moffett Field, CA 94035
+   USA
+
+   Phone: +1 650/604-4334
+   EMail: kml@nas.nasa.gov
+
+
+   Jeff Semke
+   Pittsburgh Supercomputing Center
+   4400 Fifth Ave
+   Pittsburgh, PA 15213
+   USA
+
+   Phone: +1 412/268-4960
+   EMail: semke@psc.edu
+
+
+   Bernie Volz
+   Process Software Corporation
+   959 Concord Street
+   Framingham, MA 01701
+   USA
+
+   Phone: +1 508/879-6994
+   EMail: volz@process.com
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Paxson, et. al.              Informational                     [Page 60]
+
+RFC 2525              TCP Implementation Problems             March 1999
+
+
+7.  Full Copyright Statement
+
+   Copyright (C) The Internet Society (1999).  All Rights Reserved.
+
+   This document and translations of it may be copied and furnished to
+   others, and derivative works that comment on or otherwise explain it
+   or assist in its implementation may be prepared, copied, published
+   and distributed, in whole or in part, without restriction of any
+   kind, provided that the above copyright notice and this paragraph are
+   included on all such copies and derivative works.  However, this
+   document itself may not be modified in any way, such as by removing
+   the copyright notice or references to the Internet Society or other
+   Internet organizations, except as needed for the purpose of
+   developing Internet standards in which case the procedures for
+   copyrights defined in the Internet Standards process must be
+   followed, or as required to translate it into languages other than
+   English.
+
+   The limited permissions granted above are perpetual and will not be
+   revoked by the Internet Society or its successors or assigns.
+
+   This document and the information contained herein is provided on an
+   "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
+   TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
+   BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
+   HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
+   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Paxson, et. al.              Informational                     [Page 61]
+
diff --git a/ext/picotcp/RFC/rfc2757.txt b/ext/picotcp/RFC/rfc2757.txt
new file mode 100644
index 0000000..e49f141
--- /dev/null
+++ b/ext/picotcp/RFC/rfc2757.txt
@@ -0,0 +1,2579 @@
+
+
+
+
+
+
+Network Working Group                                      G. Montenegro
+Request for Comments: 2757                        Sun Microsystems, Inc.
+Category: Informational                                       S. Dawkins
+                                                         Nortel Networks
+                                                                 M. Kojo
+                                                  University of Helsinki
+                                                               V. Magret
+                                                                 Alcatel
+                                                               N. Vaidya
+                                                    Texas A&M University
+                                                            January 2000
+
+
+                           Long Thin Networks
+
+Status of this Memo
+
+   This memo provides information for the Internet community.  It does
+   not specify an Internet standard of any kind.  Distribution of this
+   memo is unlimited.
+
+Copyright Notice
+
+   Copyright (C) The Internet Society (2000).  All Rights Reserved.
+
+Abstract
+
+   In view of the unpredictable and problematic nature of long thin
+   networks (for example, wireless WANs), arriving at an optimized
+   transport is a daunting task.  We have reviewed the existing
+   proposals along with future research items. Based on this overview,
+   we also recommend mechanisms for implementation in long thin
+   networks.
+
+   Our goal is to identify a TCP that works for all users, including
+   users of long thin networks. We started from the working
+   recommendations of the IETF TCP Over Satellite Links (tcpsat) working
+   group with this end in mind.
+
+   We recognize that not every tcpsat recommendation will be required
+   for long thin networks as well, and work toward a set of TCP
+   recommendations that are 'benign' in environments that do not require
+   them.
+
+
+
+
+
+
+
+
+Montenegro, et al.           Informational                      [Page 1]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+Table of Contents
+
+   1 Introduction .................................................    3
+      1.1 Network Architecture ....................................    5
+      1.2 Assumptions about the Radio Link ........................    6
+   2 Should it be IP or Not?  .....................................    7
+      2.1 Underlying Network Error Characteristics ................    7
+      2.2 Non-IP Alternatives .....................................    8
+         2.2.1 WAP ................................................    8
+         2.2.2 Deploying Non-IP Alternatives ......................    9
+      2.3 IP-based Considerations .................................    9
+         2.3.1 Choosing the MTU [Stevens94, RFC1144] ..............    9
+         2.3.2 Path MTU Discovery [RFC1191] .......................   10
+         2.3.3 Non-TCP Proposals ..................................   10
+   3 The Case for TCP .............................................   11
+   4 Candidate Optimizations ......................................   12
+      4.1 TCP: Current Mechanisms .................................   12
+         4.1.1 Slow Start and Congestion Avoidance ................   12
+         4.1.2 Fast Retransmit and Fast Recovery ..................   12
+      4.2 Connection Setup with T/TCP [RFC1397, RFC1644] ..........   14
+      4.3 Slow Start Proposals ....................................   14
+         4.3.1 Larger Initial Window ..............................   14
+         4.3.2 Growing the Window during Slow Start ...............   15
+            4.3.2.1 ACK Counting ..................................   15
+            4.3.2.2 ACK-every-segment .............................   16
+         4.3.3 Terminating Slow Start .............................   17
+         4.3.4 Generating ACKs during Slow Start ..................   17
+      4.4 ACK Spacing .............................................   17
+      4.5 Delayed Duplicate Acknowlegements .......................   18
+      4.6 Selective Acknowledgements [RFC2018] ....................   18
+      4.7 Detecting Corruption Loss ...............................   19
+         4.7.1 Without Explicit Notification ......................   19
+         4.7.2 With Explicit Notifications ........................   20
+      4.8 Active Queue Management .................................   21
+      4.9 Scheduling Algorithms ...................................   21
+      4.10 Split TCP and Performance-Enhancing Proxies (PEPs) .....   22
+         4.10.1 Split TCP Approaches ..............................   23
+         4.10.2 Application Level Proxies .........................   26
+         4.10.3 Snoop and its Derivatives .........................   27
+         4.10.4 PEPs to handle Periods of Disconnection ...........   29
+      4.11 Header Compression Alternatives ........................   30
+      4.12 Payload Compression ....................................   31
+      4.13 TCP Control Block Interdependence [Touch97] ............   32
+   5 Summary of Recommended Optimizations .........................   33
+   6 Conclusion ...................................................   35
+   7 Acknowledgements .............................................   35
+   8 Security Considerations ......................................   35
+
+
+
+
+Montenegro, et al.           Informational                      [Page 2]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   9 References ...................................................   36
+   Authors' Addresses .............................................   44
+   Full Copyright Statement .......................................   46
+
+1 Introduction
+
+   Optimized wireless networking is one of the major hurdles that Mobile
+   Computing must solve if it is to enable ubiquitous access to
+   networking resources. However, current data networking protocols have
+   been optimized primarily for wired networks.  Wireless environments
+   have very different characteristics in terms of latency, jitter, and
+   error rate as compared to wired networks.  Accordingly, traditional
+   protocols are ill-suited to this medium.
+
+   Mobile Wireless networks can be grouped in W-LANs (for example,
+   802.11 compliant networks) and W-WANs (for example, CDPD [CDPD],
+   Ricochet, CDMA [CDMA], PHS, DoCoMo, GSM [GSM] to name a few).  W-WANs
+   present the most serious challenge, given that the length of the
+   wireless link (expressed as the delay*bandwidth product) is typically
+   4 to 5 times as long as that of its W-LAN counterparts.  For example,
+   for an 802.11 network, assuming the delay (round-trip time) is about
+   3 ms.  and the bandwidth is 1.5 Mbps, the delay*bandwidth product is
+   4500 bits. For a W-WAN such as Ricochet, a typical round-trip time
+   may be around 500 ms. (the best is about 230 ms.), and the sustained
+   bandwidth is about 24 Kbps. This yields a delay*bandwidth product
+   roughly equal to 1.5 KB. In the near future, 3rd Generation wireless
+   services will offer 384Kbps and more.  Assuming a 200 ms round-trip,
+   the delay*bandwidth product in this case is 76.8 Kbits (9.6 KB). This
+   value is larger than the default 8KB buffer space used by many TCP
+   implementations. This means that, whereas for W-LANs the default
+   buffer space is enough, future W-WANs will operate inefficiently
+   (that is, they will not be able to fill the pipe) unless they
+   override the default value. A 3rd Generation wireless service
+   offering 2 Mbps with 200-millisecond latency requires a 50 KB buffer.
+
+   Most importantly,  latency across a link adversely affects
+   throughput. For example,  [MSMO97] derives an upper bound on TCP
+   throughput. Indeed, the resultant expression is inversely related to
+   the round-trip time.
+
+   The long latencies also push the limits (and commonly transgress
+   them) for what is acceptable to users of interactive applications.
+
+   As a quick glance to our list of references will reveal, there is a
+   wealth of proposals that attempt to solve the wireless networking
+   problem. In this document, we survey the different solutions
+   available or under investigation, and issue the corresponding
+   recommendations.
+
+
+
+Montenegro, et al.           Informational                      [Page 3]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   There is a large body of work on the subject of improving TCP
+   performance over satellite links. The documents under development by
+   the tcpsat working group of the IETF [AGS98, ADGGHOSSTT98] are very
+   relevant. In both cases, it is essential to start by improving the
+   characteristics of the medium by using forward error correction (FEC)
+   at the link layer to reduce the BER (bit error rate) from values as
+   high as 10-3 to 10-6 or better. This makes the BER manageable. Once
+   in this realm, retransmission schemes like ARQ (automatic repeat
+   request) may be used to bring it down even further. Notice that
+   sometimes it may be desirable to forego ARQ because of the additional
+   delay it implies.  In particular, time sensitive traffic (video,
+   audio) must be delivered within a certain time limit beyond which the
+   data is obsolete. Exhaustive retransmissions in this case merely
+   succeed in wasting time in order to deliver data that will be
+   discarded once it arrives at its destination.  This indicates the
+   desirability of augmenting the protocol stack implementation on
+   devices such that the upper protocol layers can inform the link and
+   MAC layer when to avoid such costly retransmission schemes.
+
+   Networks that include satellite links are examples of "long fat
+   networks" (LFNs or "elephants"). They are "long" networks because
+   their round-trip time is quite high (for example, 0.5 sec and higher
+   for geosynchronous satellites). Not all satellite links fall within
+   the LFN regime. In particular, round-trip times in a low-earth
+   orbiting (LEO) satellite network may be as little as a few
+   milliseconds (and never extend beyond 160 to 200 ms). W-WANs share
+   the "L" with LFNs. However, satellite networks are also "fat" in the
+   sense that they may have high bandwidth. Satellite networks may often
+   have a delay*bandwidth product above 64 KBytes, in which case they
+   pose additional problems to TCP [TCPHP]. W-WANs do not generally
+   exhibit this behavior. Accordingly, this document only deals with
+   links that are "long thin pipes", and the networks that contain them:
+   "long thin networks". We call these "LTNs".
+
+   This document does not give an overview of the API used to access the
+   underlying transport. We believe this is an orthogonal issue, even
+   though some of the proposals below have been put forth assuming a
+   given interface.  It is possible, for example, to support the
+   traditional socket semantics without fully relying on TCP/IP
+   transport [MOWGLI].
+
+   Our focus is on the on-the-wire protocols. We try to include the most
+   relevant ones and briefly (given that we provide the references
+   needed for further study) mention their most salient points.
+
+
+
+
+
+
+
+Montenegro, et al.           Informational                      [Page 4]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+1.1 Network Architecture
+
+   One significant difference between LFNs and LTNs is that we assume
+   the W-WAN link is the last hop to the end user. This allows us to
+   assume that a single intermediate node sees all packets transferred
+   between the wireless mobile device and the rest of the Internet.
+   This is only one of the topologies considered by the TCP Satellite
+   community.
+
+   Given our focus on mobile wireless applications, we only consider a
+   very specific architecture that includes:
+
+      -  a wireless mobile device, connected via
+
+      -  a wireless link (which may, in fact comprise several hops at
+         the link layer), to
+
+      -  an intermediate node (sometimes referred to as a base station)
+         connected via
+
+      -  a wireline link, which in turn interfaces with
+
+      -  the landline Internet and millions of legacy servers and web
+         sites.
+
+   Specifically, we are not as concerned with paths that include two
+   wireless segments separated by a wired one. This may occur, for
+   example, if one mobile device connects across its immediate wireless
+   segment via an intermediate node to the Internet, and then via a
+   second wireless segment to another mobile device.  Quite often,
+   mobile devices connect to a legacy server on the wired Internet.
+
+   Typically, the endpoints of the wireless segment are the intermediate
+   node and the mobile device. However, the latter may be a wireless
+   router to a mobile network. This is also important and has
+   applications in, for example, disaster recovery.
+
+   Our target architecture has implications which concern the
+   deployability of candidate solutions. In particular, an important
+   requirement is that we cannot alter the networking stack on the
+   legacy servers. It would be preferable to only change the networking
+   stack at the intermediate node, although changing it at the mobile
+   devices is certainly an option and perhaps a necessity.
+
+   We envision mobile devices that can use the wireless medium very
+   efficiently, but overcome some of its traditional constraints.  That
+   is, full mobility implies that the devices have the flexibility and
+   agility to use whichever happens to be the best network connection
+
+
+
+Montenegro, et al.           Informational                      [Page 5]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   available at any given point in time or space.  Accordingly, devices
+   could switch from a wired office LAN and hand over their ongoing
+   connections to continue on, say, a wireless WAN. This type of agility
+   also requires Mobile IP [RFC2002].
+
+1.2 Assumptions about the Radio Link
+
+   The system architecture described above assumes at most one wireless
+   link (perhaps comprising more than one wireless hop).  However, this
+   is not enough to characterize a wireless link.  Additional
+   considerations are:
+
+      -  What are the error characteristics of the wireless medium?  The
+         link may present a higher BER than a wireline network due to
+         burst errors and disconnections. The techniques below usually
+         do not address all the types of errors. Accordingly, a complete
+         solution should combine the best of all the proposals.
+         Nevertheless, in this document we are more concerned with (and
+         give preference to solving) the most typical case: (1) higher
+         BER due to random errors (which implies longer and more
+         variable delays due to link-layer error corrections and
+         retransmissions) rather than (2) an interruption in service due
+         to a handoff or a disconnection.  The latter are also important
+         and we do include relevant proposals in this survey.
+
+      -  Is the wireless service datagram oriented, or is it a virtual
+         circuit?  Currently, switched virtual circuits are more common,
+         but packet networks are starting to appear, for example,
+         Metricom's Starmode [CB96], CDPD [CDPD] and General Packet
+         Radio Service (GPRS) [GPRS],[BW97] in GSM.
+
+      -  What kind of reliability does the link provide? Wireless
+         services typically retransmit a packet (frame) until it has
+         been acknowledged by the target. They may allow the user to
+         turn off this behavior. For example, GSM allows RLP [RLP]
+         (Radio Link Protocol)  to be turned off.  Metricom has a
+         similar "lightweight" mode. In GSM RLP, a frame is
+         retransmitted until the maximum number of retransmissions
+         (protocol parameter) is reached. What happens when this limit
+         is reached is determined by the telecom operator:  the physical
+         link connection is either disconnected or a link reset is
+         enforced where the sequence numbers are resynchronized and the
+         transmit and receive buffers are flushed resulting in lost
+         data. Some wireless services, like CDMA IS95-RLP [CDMA,
+         Karn93], limit the latency on the wireless link by
+         retransmitting a frame only a couple of times. This decreases
+         the residual frame error rate significantly, but does not
+         provide fully reliable link service.
+
+
+
+Montenegro, et al.           Informational                      [Page 6]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+      -  Does the mobile device transmit and receive at the same time?
+         Doing so increases the cost of the electronics on the mobile
+         device. Typically, this is not the case. We assume in this
+         document that mobile devices do not transmit and receive
+         simultaneously.
+
+      -  Does the mobile device directly address more than one peer on
+         the wireless link? Packets to each different peer may traverse
+         spatially distinct wireless paths. Accordingly, the path to
+         each peer may exhibit very different characteristics.  Quite
+         commonly, the mobile device addresses only one peer (the
+         intermediate node) at any given point in time.  When this is
+         not the case, techniques such as Channel-State Dependent Packet
+         Scheduling come into play (see the section "Packet Scheduling"
+         below).
+
+2 Should it be IP or Not?
+
+   The first decision is whether to use IP as the underlying network
+   protocol or not. In particular, some data protocols evolved from
+   wireless telephony are not always -- though at times they may be --
+   layered on top of IP [MOWGLI, WAP]. These proposals are based on the
+   concept of proxies that provide adaptation services between the
+   wireless and wireline segments.
+
+   This is a reasonable model for mobile devices that always communicate
+   through the proxy. However, we expect many wireless mobile devices to
+   utilize wireline networks whenever they are available. This model
+   closely follows current laptop usage patterns: devices typically
+   utilize LANs, and only resort to dial-up access when "out of the
+   office."
+
+   For these devices, an architecture that assumes IP is the best
+   approach, because it will be required for communications that do not
+   traverse the intermediate node (for example, upon reconnection to a
+   W-LAN or a 10BaseT network at the office).
+
+2.1 Underlying Network Error Characteristics
+
+   Using IP as the underlying network protocol requires a certain (low)
+   level of link robustness that is expected of wireless links.
+
+   IP, and the protocols that are carried in IP packets, are protected
+   end-to-end by checksums that are relatively weak [Stevens94,
+   Paxson97] (and, in some cases, optional). For much of the Internet,
+   these checksums are sufficient; in wireless environments, the error
+   characteristics of the raw wireless link are much less robust than
+   the rest of the end-to-end path.  Hence for paths that include
+
+
+
+Montenegro, et al.           Informational                      [Page 7]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   wireless links, exclusively relying on end-to-end mechanisms to
+   detect and correct transmission errors is undesirable. These should
+   be complemented by local link-level mechanisms. Otherwise, damaged IP
+   packets are propagated through the network only to be discarded at
+   the destination host. For example, intermediate routers are required
+   to check the IP header checksum, but not the UDP or TCP checksums.
+   Accordingly, when the payload of an IP packet is corrupted, this is
+   not detected until the packet arrives at its ultimate destination.
+
+   A better approach is to use link-layer mechanisms such as FEC,
+   retransmissions, and so on in order to improve the characteristics of
+   the wireless link and present a much more reliable service to IP.
+   This approach has been taken by CDPD, Ricochet and CDMA.
+
+   This approach is roughly analogous to the successful deployment of
+   Point-to-Point Protocol (PPP), with robust framing and 16-bit
+   checksumming, on wireline networks as a replacement for the Serial
+   Line Interface Protocol (SLIP), with only a single framing byte and
+   no checksumming.
+
+   [AGS98] recommends the use of FEC in satellite environments.
+
+   Notice that the link-layer could adapt its frame size to the
+   prevalent BER.  It would perform its own fragmentation and reassembly
+   so that IP could still enjoy a large enough MTU size [LS98].
+
+   A common concern for using IP as a transport is the header overhead
+   it implies. Typically, the underlying link-layer appears as PPP
+   [RFC1661] to the IP layer above. This allows for header compression
+   schemes [IPHC, IPHC-RTP, IPHC-PPP] which greatly alleviate the
+   problem.
+
+2.2 Non-IP Alternatives
+
+   A number of non-IP alternatives aimed at wireless environments have
+   been proposed. One representative proposal is discussed here.
+
+2.2.1 WAP
+
+   The Wireless Application Protocol (WAP) specifies an application
+   framework and network protocols for wireless devices such as mobile
+   telephones, pagers, and PDAs [WAP]. The architecture requires a proxy
+   between the mobile device and the server. The WAP protocol stack is
+   layered over a datagram transport service.  Such a service is
+   provided by most wireless networks; for example, IS-136, GSM
+   SMS/USSD, and UDP in IP networks like CDPD and GSM GPRS. The core of
+
+
+
+
+
+Montenegro, et al.           Informational                      [Page 8]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   the WAP protocols is a binary HTTP/1.1 protocol with additional
+   features such as header caching between requests and a shared state
+   between client and server.
+
+2.2.2 Deploying Non-IP Alternatives
+
+   IP is such a fundamental element of the Internet that non-IP
+   alternatives face substantial obstacles to deployment, because they
+   do not exploit the IP infrastructure. Any non-IP alternative that is
+   used to provide gatewayed access to the Internet must map between IP
+   addresses and non-IP addresses, must terminate IP-level security at a
+   gateway, and cannot use IP-oriented discovery protocols (Dynamic Host
+   Configuration Protocol, Domain Name Services, Lightweight Directory
+   Access Protocol, Service Location Protocol, etc.) without translation
+   at a gateway.
+
+   A further complexity occurs when a device supports both wireless and
+   wireline operation. If the device uses IP for wireless operation,
+   uninterrupted operation when the device is connected to a wireline
+   network is possible (using Mobile IP). If a non-IP alternative is
+   used, this switchover is more difficult to accomplish.
+
+   Non-IP alternatives face the burden of proof that IP is so ill-suited
+   to a wireless environment that it is not a viable technology.
+
+2.3 IP-based Considerations
+
+   Given its worldwide deployment, IP is an obvious choice for the
+   underlying network technology. Optimizations implemented at this
+   level benefit traditional Internet application protocols as well as
+   new ones layered on top of IP or UDP.
+
+2.3.1 Choosing the MTU [Stevens94, RFC1144]
+
+   In slow networks, the time required to transmit the largest possible
+   packet may be considerable.  Interactive response time should not
+   exceed the well-known human factors limit of 100 to 200 ms. This
+   should be considered the maximum time budget to (1) send a packet and
+   (2) obtain a response. In most networking stack implementations, (1)
+   is highly dependent on the maximum transmission unit (MTU). In the
+   worst case, a small packet from an interactive application may have
+   to wait for a large packet from a bulk transfer application before
+   being sent. Hence, a good rule of thumb is to choose an MTU such that
+   its transmission time is less than (or not much larger than) 200 ms.
+
+
+
+
+
+
+
+Montenegro, et al.           Informational                      [Page 9]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   Of course, compression and type-of-service queuing (whereby
+   interactive data packets are given a higher priority) may alleviate
+   this problem. In particular, the latter may reduce the average wait
+   time to about half the MTU's transmission time.
+
+2.3.2 Path MTU Discovery [RFC1191]
+
+   Path MTU discovery benefits any protocol built on top of IP. It
+   allows a sender to determine what the maximum end-to-end transmission
+   unit is to a given destination. Without Path MTU discovery, the
+   default IPv4 MTU size is 576. The benefits of using a larger MTU are:
+
+      -  Smaller ratio of header overhead to data
+
+      -  Allows TCP to grow its congestion window faster, since it
+         increases in units of segments.
+
+   Of course, for a given BER, a larger MTU has a correspondingly larger
+   probability of error within any given segment. The BER may be reduced
+   using lower level techniques like FEC and link-layer retransmissions.
+   The issue is that now delays may become a problem due to the
+   additional retransmissions, and the fact that packet transmission
+   time increases with a larger MTU.
+
+   Recommendation: Path MTU discovery is recommended. [AGS98] already
+   recommends its use in satellite environments.
+
+2.3.3 Non-TCP Proposals
+
+   Other proposals assume an underlying IP datagram service, and
+   implement an optimized transport either directly on top of IP
+   [NETBLT] or on top of UDP [MNCP]. Not relying on TCP is a bold move,
+   given the wealth of experience and research related to it.  It could
+   be argued that the Internet has not collapsed because its main
+   protocol, TCP, is very careful in how it uses the network, and
+   generally treats it as a black box assuming all packet losses are due
+   to congestion and prudently backing off. This avoids further
+   congestion.
+
+   However, in the wireless medium, packet losses may also be due to
+   corruption due to high BER, fading, and so on. Here, the right
+   approach is to try harder, instead of backing off. Alternative
+   transport protocols are:
+
+      -  NETBLT [NETBLT, RFC1986, RFC1030]
+
+      -  MNCP [MNCP]
+
+
+
+
+Montenegro, et al.           Informational                     [Page 10]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+      -  ESRO [RFC2188]
+
+      -  RDP [RFC908, RFC1151]
+
+      -  VMTP [VMTP]
+
+3 The Case for TCP
+
+   This is one of the most hotly debated issues in the wireless arena.
+   Here are some arguments against it:
+
+      -  It is generally recognized that TCP does not perform well in
+         the presence of significant levels of non-congestion loss.  TCP
+         detractors argue that the wireless medium is one such case, and
+         that it is hard enough to fix TCP. They argue that it is easier
+         to start from scratch.
+
+      -  TCP has too much header overhead.
+
+      -  By the time the mechanisms are in place to fix it, TCP is very
+         heavy, and ill-suited for use by lightweight, portable devices.
+
+   and here are some in support of TCP:
+
+      -  It is preferable to continue using the same protocol that the
+         rest of the Internet uses for compatibility reasons. Any
+         extensions specific to the wireless link may be negotiated.
+
+      -  Legacy mechanisms may be reused (for example three-way
+         handshake).
+
+      -  Link-layer FEC and ARQ can reduce the BER such that any losses
+         TCP does see are, in fact, caused by congestion (or a sustained
+         interruption of link connectivity). Modern W-WAN technologies
+         do this (CDPD, US-TDMA, CDMA, GSM), thus improving TCP
+         throughput.
+
+      -  Handoffs among different technologies are made possible by
+         Mobile IP [RFC2002], but only if the same protocols, namely
+         TCP/IP, are used throughout.
+
+      -  Given TCP's wealth of research and experience, alternative
+         protocols are relatively immature, and the full implications of
+         their widespread deployment not clearly understood.
+
+   Overall, we feel that the performance of TCP over long-thin networks
+   can be improved significantly. Mechanisms to do so are discussed in
+   the next sections.
+
+
+
+Montenegro, et al.           Informational                     [Page 11]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+4 Candidate Optimizations
+
+   There is a large volume of work on the subject of optimizing TCP for
+   operation over wireless media. Even though satellite networks
+   generally fall in the LFN regime, our current LTN focus has much to
+   benefit from it.  For example, the work of the TCP-over-Satellite
+   working group of the IETF has been extremely helpful in preparing
+   this section [AGS98, ADGGHOSSTT98].
+
+4.1 TCP: Current Mechanisms
+
+   A TCP sender adapts its use of bandwidth based on feedback from the
+   receiver. The high latency characteristic of LTNs implies that TCP's
+   adaptation is correspondingly slower than on networks with shorter
+   delays.  Similarly, delayed ACKs exacerbate the perceived latency on
+   the link. Given that TCP grows its congestion window in units of
+   segments, small MTUs may slow adaptation even further.
+
+4.1.1 Slow Start and Congestion Avoidance
+
+   Slow Start and Congestion Avoidance [RFC2581] are essential the
+   Internet's stability.  However there are two reasons why the wireless
+   medium adversely affects them:
+
+      -  Whenever TCP's retransmission timer expires, the sender assumes
+         that the network is congested and invokes slow start. This is
+         why it is important to minimize the losses caused by
+         corruption, leaving only those caused by congestion (as
+         expected by TCP).
+
+      -  The sender increases its window based on the number of ACKs
+         received. Their rate of arrival, of course, is dependent on the
+         RTT (round-trip-time) between sender and receiver, which
+         implies long ramp-up times in high latency links like LTNs. The
+         dependency lasts until the pipe is filled.
+
+      -  During slow start, the sender increases its window in units of
+         segments. This is why it is important to use an appropriately
+         large MTU which, in turn, requires requires link layers with
+         low loss.
+
+4.1.2 Fast Retransmit and Fast Recovery
+
+   When a TCP sender receives several duplicate ACKs, fast retransmit
+   [RFC2581] allows it to infer that a segment was lost.  The sender
+   retransmits what it considers to be this lost segment without waiting
+   for the full timeout, thus saving time.
+
+
+
+
+Montenegro, et al.           Informational                     [Page 12]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   After a fast retransmit, a sender invokes the fast recovery [RFC2581]
+   algorithm. Fast recovery allows the sender to transmit at half its
+   previous rate (regulating the growth of its window based on
+   congestion avoidance), rather than having to begin a slow start. This
+   also saves time.
+
+   In general, TCP can increase its window beyond the delay-bandwidth
+   product. However, in LTN links the congestion window may remain
+   rather small, less than four segments, for long periods of time due
+   to any of the following reasons:
+
+      1. Typical "file size" to be transferred over a connection is
+         relatively small (Web requests, Web document objects, email
+         messages, files, etc.) In particular, users of LTNs are not
+         very willing to carry out large transfers as the response time
+         is so long.
+
+      2. If the link has high BER, the congestion window tends to stay
+         small
+
+      3. When an LTN is combined with a highly congested wireline
+         Internet path, congestion losses on the Internet have the same
+         effect as 2.
+
+      4. Commonly, ISPs/operators configure only a small number of
+         buffers (even as few as for 3 packets) per user in their dial-
+         up routers
+
+      5. Often small socket buffers are recommended with LTNs in order
+         to prevent the RTO from inflating and to diminish the amount of
+         packets with competing traffic.
+
+   A small window effectively prevents the sender from taking advantage
+   of Fast Retransmits. Moreover, efficient recovery from multiple
+   losses within a single window requires adoption of new proposals
+   (NewReno [RFC2582]). In addition, on slow paths with no packet
+   reordering waiting for three duplicate ACKs to arrive postpones
+   retransmission unnecessarily.
+
+   Recommendation: Implement Fast Retransmit and Fast Recovery at this
+   time. This is a widely-implemented optimization and is currently at
+   Proposed Standard level. [AGS98] recommends implementation of Fast
+   Retransmit/Fast Recovery in satellite environments.  NewReno
+   [RFC2582] apparently does help a sender better handle partial ACKs
+   and multiple losses in a single window, but at this point is not
+   recommended due to its experimental nature.  Instead, SACK [RFC2018]
+   is the preferred mechanism.
+
+
+
+
+Montenegro, et al.           Informational                     [Page 13]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+4.2 Connection Setup with T/TCP [RFC1397, RFC1644]
+
+   TCP engages in a "three-way handshake" whenever a new connection is
+   set up.  Data transfer is only possible after this phase has
+   completed successfully.  T/TCP allows data to be exchanged in
+   parallel with the connection set up, saving valuable time for short
+   transactions on long-latency networks.
+
+   Recommendation: T/TCP is not recommended, for these reasons:
+
+   -  It is an Experimental RFC.
+
+   -  It is not widely deployed, and it has to be deployed at both ends
+      of a connection.
+
+   -  Security concerns have been raised that T/TCP is more vulnerable
+      to address-spoofing attacks than TCP itself.
+
+   -  At least some of the benefits of T/TCP (eliminating three-way
+      handshake on subsequent query-response transactions, for instance)
+      are also available with persistent connections on HTTP/1.1, which
+      is more widely deployed.
+
+   [ADGGHOSSTT98] does not have a recommendation on T/TCP in satellite
+   environments.
+
+4.3 Slow Start Proposals
+
+   Because slow start dominates the network response seen by interactive
+   users at the beginning of a TCP connection, a number of proposals
+   have been made to modify or eliminate slow start in long latency
+   environments.
+
+   Stability of the Internet is paramount, so these proposals must
+   demonstrate that they will not adversely affect Internet congestion
+   levels in significant ways.
+
+4.3.1 Larger Initial Window
+
+   Traditional slow start, with an initial window of one segment, is a
+   time-consuming bandwidth adaptation procedure over LTNs. Studies on
+   an initial window larger than one segment [RFC2414, AHO98] resulted
+   in the TCP standard supporting a maximum value of 2 [RFC2581]. Higher
+   values are still experimental in nature.
+
+
+
+
+
+
+
+Montenegro, et al.           Informational                     [Page 14]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   In simulations with an increased initial window of three packets
+   [RFC2415], this proposal does not contribute significantly to packet
+   drop rates, and it has the added benefit of improving initial
+   response times when the peer device delays acknowledgements during
+   slow start (see next proposal).
+
+   [RFC2416] addresses situations where the initial window exceeds the
+   number of buffers available to TCP and indicates that this situation
+   is no different from the case where the congestion window grows
+   beyond the number of buffers available.
+
+   [RFC2581] now allows an initial congestion window of two segments. A
+   larger initial window, perhaps as many as four segments, might be
+   allowed in the future in environments where this significantly
+   improves performance (LFNs and LTNs).
+
+   Recommendation: Implement this on devices now. The research on this
+   optimization indicates that 3 segments is a safe initial setting, and
+   is centering on choosing between 2, 3, and 4. For now, use 2
+   (following RFC2581), which at least allows clients running query-
+   response applications to get an initial ACK from unmodified servers
+   without waiting for a typical delayed ACK timeout of 200
+   milliseconds, and saves two round-trips. An initial window of 3
+   [RFC2415] looks promising and may be adopted in the future pending
+   further research and experience.
+
+4.3.2 Growing the Window during Slow Start
+
+   The sender increases its window based on the flow of ACKs coming back
+   from the receiver. Particularly during slow start, this flow is very
+   important.  A couple of the proposals that have been studied are (1)
+   ACK counting and (2) ACK-every-segment.
+
+4.3.2.1 ACK Counting
+
+   The main idea behind ACK counting is:
+
+      -  Make each ACK count to its fullest by growing the window based
+         on the data being acknowledged (byte counting) instead of the
+         number of ACKs (ACK counting). This has been shown to cause
+         bursts which lead to congestion. [Allman98] shows that Limited
+         Byte Counting (LBC), in which the window growth is limited to 2
+         segments, does not lead to as much burstiness, and offers some
+         performance gains.
+
+   Recommendation: Unlimited byte counting is not recommended.  Van
+   Jacobson cautions against byte counting [TCPSATMIN] because it leads
+   to burstiness, and recommends ACK spacing [ACKSPACING] instead.
+
+
+
+Montenegro, et al.           Informational                     [Page 15]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   ACK spacing requires ACKs to consistently pass through a single ACK-
+   spacing router.  This requirement works well for W-WAN environments
+   if the ACK-spacing router is also the intermediate node.
+
+   Limited byte counting warrants further investigation before we can
+   recommend this proposal, but it shows promise.
+
+4.3.2.2 ACK-every-segment
+
+   The main idea behind ACK-every-segment is:
+
+      -  Keep a constant stream of ACKs coming back by turning off
+         delayed ACKs [RFC1122] during slow start. ACK-every-segment
+         must be limited to slow start, in order to avoid penalizing
+         asymmetric-bandwidth configurations. For instance, a low
+         bandwidth link carrying acknowledgements back to the sender,
+         hinders the growth of the congestion window, even if the link
+         toward the client has a greater bandwidth [BPK99].
+
+   Even though simulations confirm its promise (it allows receivers to
+   receive the second segment from unmodified senders without waiting
+   for a typical delayed ACK timeout of 200 milliseconds), for this
+   technique to be practical the receiver must acknowledge every segment
+   only when the sender is in slow start.  Continuing to do so when the
+   sender is in congestion avoidance may have adverse effects on the
+   mobile device's battery consumption and on traffic in the network.
+
+   This violates a SHOULD in [RFC2581]:  delayed acknowledgements SHOULD
+   be used by a TCP receiver.
+
+   "Disabling Delayed ACKs During Slow Start" is technically
+   unimplementable, as the receiver has no way of knowing when the
+   sender crosses ssthresh (the "slow start threshold") and begins using
+   the congestion avoidance algorithm.  If receivers follow
+   recommendations for increased initial windows, disabling delayed ACKs
+   during an increased initial window would open the TCP window more
+   rapidly without doubling ACK traffic in general.  However, this
+   scheme might double ACK traffic if most connections remain in slow-
+   start.
+
+   Recommendation: ACK only the first segment on a new connection with
+   no delay.
+
+
+
+
+
+
+
+
+
+Montenegro, et al.           Informational                     [Page 16]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+4.3.3 Terminating Slow Start
+
+   New mechanisms [ADGGHOSSTT98] are being proposed to improve TCP's
+   adaptive properties such that the available bandwidth is better
+   utilized while reducing the possibility of congesting the network.
+   This results in the closing of the congestion window to 1 segment
+   (which precludes fast retransmit), and the subsequent slow start
+   phase.
+
+   Theoretically, an optimum value for slow-start threshold (ssthresh)
+   allows connection bandwidth utilization to ramp up as aggressively as
+   possible without "overshoot" (using so much bandwidth that packets
+   are lost and congestion avoidance procedures are invoked).
+
+   Recommendation: Estimating the slow start threshold is not
+   recommended.  Although this would be helpful if we knew how to do it,
+   rough consensus on the tcp-impl and tcp-sat mailing lists is that in
+   non-trivial operational networks there is no reliable method to probe
+   during TCP startup and estimate the bandwidth available.
+
+4.3.4 Generating ACKs during Slow Start
+
+   Mitigations that inject additional ACKs (whether "ACK-first-segment"
+   or "ACK-every-segment-during-slow-start") beyond what today's
+   conformant TCPs inject are only applicable during the slow-start
+   phases of a connection. After an initial exchange, the connection
+   usually completes slow-start, so TCPs only inject additional ACKs
+   when (1) the connection is closed, and a new connection is opened, or
+   (2) the TCPs handle idle connection restart correctly by performing
+   slow start.
+
+   Item (1) is typical when using HTTP/1.0, in which each request-
+   response transaction requires a new connection.  Persistent
+   connections in HTTP/1.1 help in maintaining a connection in
+   congestion avoidance instead of constantly reverting to slow-start.
+   Because of this, these optimizations which are only enabled during
+   slow-start do not get as much of a chance to act. Item (2), of
+   course, is independent of HTTP version.
+
+4.4 ACK Spacing
+
+   During slow start, the sender responds to the incoming ACK stream by
+   transmitting N+1 segments for each ACK, where N is the number of new
+   segments acknowledged by the incoming ACK.  This results in data
+   being sent at twice the speed at which it can be processed by the
+   network.  Accordingly, queues will form, and due to insufficient
+   buffering at the bottleneck router, packets may get dropped before
+   the link's capacity is full.
+
+
+
+Montenegro, et al.           Informational                     [Page 17]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   Spacing out the ACKs effectively controls the rate at which the
+   sender will transmit into the network, and may result in little or no
+   queueing at the bottleneck router [ACKSPACING].  Furthermore, ack
+   spacing reduces the size of the bursts.
+
+   Recommendation: No recommendation at this time. Continue monitoring
+   research in this area.
+
+4.5 Delayed Duplicate Acknowlegements
+
+   As was mentioned above, link-layer retransmissions may decrease the
+   BER enough that congestion accounts for most of packet losses; still,
+   nothing can be done about interruptions due to handoffs, moving
+   beyond wireless coverage, etc. In this scenario, it is imperative to
+   prevent interaction between link-layer retransmission and TCP
+   retransmission as these layers duplicate each other's efforts. In
+   such an environment it may make sense to delay TCP's efforts so as to
+   give the link-layer a chance to recover. With this in mind, the
+   Delayed Dupacks [MV97, Vaidya99] scheme selectively delays duplicate
+   acknowledgements at the receiver.  It is preferable to allow a local
+   mechanism to resolve a local problem, instead of invoking TCP's end-
+   to-end mechanism and incurring the associated costs, both in terms of
+   wasted bandwidth and in terms of its effect on TCP's window behavior.
+
+   The Delayed Dupacks scheme can be used despite IP encryption since
+   the intermediate node does not need to examine the TCP headers.
+
+   Currently, it is not well understood how long the receiver should
+   delay the duplicate acknowledgments. In particular, the impact of
+   wireless medium access control (MAC) protocol on the choice of delay
+   parameter needs to be studied. The MAC protocol may affect the
+   ability to choose the appropriate delay (either statically or
+   dynamically). In general, significant variabilities in link-level
+   retransmission times can have an adverse impact on the performance of
+   the Delayed Dupacks scheme. Furthermore, as discussed later in
+   section 4.10.3, Delayed Dupacks and some other schemes (such as Snoop
+   [SNOOP]) are only beneficial in certain types of network links.
+
+   Recommendation: Delaying duplicate acknowledgements may be useful in
+   specific network topologies, but a general recommendation requires
+   further research and experience.
+
+4.6 Selective Acknowledgements [RFC2018]
+
+   SACK may not be useful in many LTNs, according to Section 1.1 of
+   [TCPHP].  In particular, SACK is more useful in the LFN regime,
+   especially if large windows are being used, because there is a
+
+
+
+
+Montenegro, et al.           Informational                     [Page 18]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   considerable probability of multiple segment losses per window. In
+   the LTN regime, TCP windows are much smaller, and burst errors must
+   be much longer in duration in order to damage multiple segments.
+
+   Accordingly, the complexity of SACK may not be justifiable, unless
+   there is a high probability of burst errors and congestion on the
+   wireless link. A desire for compatibility with TCP recommendations
+   for non-LTN environments may dictate LTN support for SACK anyway.
+
+   [AGS98] recommends use of SACK with Large TCP Windows in satellite
+   environments, and notes that this implies support for PAWS
+   (Protection Against Wrapped Sequence space) and RTTM (Round Trip Time
+   Measurement) as well.
+
+   Berkeley's SNOOP protocol research [SNOOP] indicates that SACK does
+   improve throughput for SNOOP when multiple segments are lost per
+   window [BPSK96]. SACK allows SNOOP to recover from multi-segment
+   losses in one round-trip. In this case, the mobile device needs to
+   implement some form of selective acknowledgements.  If SACK is not
+   used, TCP may enter congestion avoidance as the time needed to
+   retransmit the lost segments may be greater than the retransmission
+   timer.
+
+   Recommendation: Implement SACK now for compatibility with other TCPs
+   and improved performance with SNOOP.
+
+4.7 Detecting Corruption Loss
+
+4.7.1 Without Explicit Notification
+
+   In the absence of explicit notification from the network, some
+   researchers have suggested statistical methods for congestion
+   avoidance [Jain89, WC91, VEGAS]. A natural extension of these
+   heuristics would enable a sender to distinguish between losses caused
+   by congestion and other causes.  The research results on the
+   reliability of sender-based heuristics is unfavorable [BV97, BV98].
+   [BV98a] reports better results in constrained environments using
+   packet inter-arrival times measured at the receiver, but highly-
+   variable delay - of the type encountered in wireless environments
+   during intercell handoff - confounds these heuristics.
+
+   Recommendation: No recommendation at this time - continue to monitor
+   research results.
+
+
+
+
+
+
+
+
+Montenegro, et al.           Informational                     [Page 19]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+4.7.2 With Explicit Notifications
+
+   With explicit notification from the network it is possible to
+   determine when a loss is due to congestion. Several proposals along
+   these lines include:
+
+      -  Explicit Loss Notification (ELN) [BPSK96]
+
+      -  Explicit Bad State Notification (EBSN) [BBKVP96]
+
+      -  Explicit Loss Notification to the Receiver (ELNR), and Explicit
+         Delayed Dupack Activation Notification (EDDAN) (notifications
+         to mobile receiver) [MV97]
+
+      -  Explicit Congestion Notification (ECN) [ECN]
+
+   Of these proposals, Explicit Congestion Notification (ECN) seems
+   closest to deployment on the Internet, and will provide some benefit
+   for TCP connections on long thin networks (as well as for all other
+   TCP connections).
+
+   Recommendation: No recommendation at this time. Schemes like ELNR and
+   EDDAN [MV97], in which  the only systems that need to be modified are
+   the intermediate node and the mobile device, are slated for adoption
+   pending further research.  However, this solution has some
+   limitations. Since the intermediate node must have access to the TCP
+   headers, the IP payload must not be encrypted.
+
+   ECN uses the TOS byte in the IP header to carry congestion
+   information (ECN-capable and Congestion-encountered).  This byte is
+   not encrypted in IPSEC, so ECN can be used on TCP connections that
+   are encrypted using IPSEC.
+
+   Recommendation: Implement ECN. In spite of this, mechanisms for
+   explicit corruption notification are still relevant and should be
+   tracked.
+
+   Note: ECN provides useful information to avoid deteriorating further
+   a bad situation, but has some limitations for wireless applications.
+   Absence of packets marked with ECN should not be interpreted by ECN-
+   capable TCP connections as a green light for aggressive
+   retransmissions. On the contrary, during periods of extreme network
+   congestion routers may drop packets marked with explicit notification
+   because their buffers are exhausted - exactly the wrong time for a
+   host to begin retransmitting aggressively.
+
+
+
+
+
+
+Montenegro, et al.           Informational                     [Page 20]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+4.8 Active Queue Management
+
+   As has been pointed out above, TCP responds to congestion by closing
+   down the window and invoking slow start. Long-delay networks take a
+   particularly long time to recover from this condition. Accordingly,
+   it is imperative to avoid congestion in LTNs. To remedy this, active
+   queue management techniques have been proposed as enhancements to
+   routers throughout the Internet [RED].  The primary motivation for
+   deployment of these mechanisms is to prevent "congestion collapse" (a
+   severe degradation in service) by controlling the average queue size
+   at the routers. As the average queue length grows, Random Early
+   Detection [RED] increases the possibility of dropping packets.
+
+   The benefits are:
+
+      -  Reduce packet drops in routers. By dropping a few packets
+         before severe congestion sets in, RED avoids dropping bursts of
+         packets. In other words, the objective is to drop m packets
+         early to prevent n drops later on, where m is less than n.
+
+      -  Provide lower delays. This follows from the smaller queue
+         sizes, and is particularly important for interactive
+         applications, for which the inherent delays of wireless links
+         already push the user experience to the limits of the non-
+         acceptable.
+
+      -  Avoid lock-outs. Lack of resources in a router (and the
+         resultant packet drops) may, in effect, obliterate throughput
+         on certain connections.  Because of active queue management, it
+         is more probable for an incoming packet to find available
+         buffer space at the router.
+
+   Active Queue Management has two components: (1) routers detect
+   congestion before exhausting their resources, and (2) they provide
+   some form of congestion indication. Dropping packets via RED is only
+   one example of the latter.  Another way to indicate congestion is to
+   use ECN [ECN] as discussed above under "Detecting Corruption Loss:
+   With Explicit Notifications."
+
+   Recommendation: RED is currently being deployed in the Internet, and
+   LTNs should follow suit. ECN deployment should complement RED's.
+
+4.9 Scheduling Algorithms
+
+   Active queue management helps control the length of the queues.
+   Additionally, a general solution requires replacing FIFO with other
+   scheduling algorithms that improve:
+
+
+
+
+Montenegro, et al.           Informational                     [Page 21]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+      1. Fairness (by policing how different packet streams utilize the
+         available bandwidth), and
+
+      2. Throughput (by improving the transmitter's radio channel
+         utilization).
+
+   For example, fairness is necessary for interactive applications (like
+   telnet or web browsing) to coexist with bulk transfer sessions.
+   Proposals here include:
+
+      - Fair Queueing (FQ) [Demers90]
+
+      - Class-based Queueing (CBQ) [Floyd95]
+
+   Even if they are only implemented over the wireless link portion of
+   the communication path, these proposals are attractive in wireless
+   LTN environments, because new connections for interactive
+   applications can have difficulty starting when a bulk TCP transfer
+   has already stabilized using all available bandwidth.
+
+   In our base architecture described above, the mobile device typically
+   communicates directly with only one wireless peer at a given time:
+   the intermediate node. In some W-WANs, it is possible to directly
+   address other mobiles within the same cell.  Direct communication
+   with each such wireless peer may traverse a spatially distinct path,
+   each of which may exhibit statistically independent radio link
+   characteristics. Channel State Dependent Packet Scheduling (CSDP)
+   [BBKT96] tracks the state of the various radio links (as defined by
+   the target devices), and gives preferential treatment to packets
+   destined for radio links in a "good" state. This avoids attempting to
+   transmit to (and expect acknowledgements from) a peer on a "bad"
+   radio link, thus improving throughput.
+
+   A further refinement of this idea suggests that both fairness and
+   throughput can be improved by combining a wireless-enhanced CBQ with
+   CSDP [FSS98].
+
+   Recommendation: No recommendation at this time, pending further
+   study.
+
+4.10 Split TCP and Performance-Enhancing Proxies (PEPs)
+
+   Given the dramatic differences between the wired and the wireless
+   links, a very common approach is to provide some impedance matching
+   where the two different technologies meet: at the intermediate node.
+
+
+
+
+
+
+Montenegro, et al.           Informational                     [Page 22]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   The idea is to replace an end-to-end TCP connection with two clearly
+   distinct connections: one across the wireless link, the other across
+   its wireline counterpart. Each of the two resulting TCP sessions
+   operates under very different networking characteristics, and may
+   adopt the policies best suited to its particular medium.  For
+   example, in a specific LTN topology it may be desirable to modify TCP
+   Fast Retransmit to resend after the first duplicate ack and Fast
+   Recovery to not shrink the congestion window if the LTN link has an
+   extremely long RTT, is known to not reorder packets, and is not
+   subject to congestion. Moreover, on a long-delay link or on a link
+   with a relatively high bandwidth-delay product it may be desirable to
+   "slow-start" with a relatively large initial window, even larger than
+   four segments.  While these kinds of TCP modifications can be
+   negotiated to be employed over the LTN link, they would not be
+   deployed end-to-end over the global Internet. In LTN topologies where
+   the underlying link characteristics are known, a various similar
+   types of performance enhancements can be employed without endangering
+   operations over the global Internet.
+
+   In some proposals, in addition to a PEP mechanism at the intermediate
+   node, custom protocols are used on the wireless link (for example,
+   [WAP], [YB94] or [MOWGLI]).
+
+   Even if the gains from using non-TCP protocols are moderate or
+   better, the wealth of research on optimizing TCP for wireless, and
+   compatibility with the Internet are compelling reasons to adopt TCP
+   on the wireless link (enhanced as suggested in section 5 below).
+
+4.10.1 Split TCP Approaches
+
+   Split-TCP proposals include schemes like I-TCP [ITCP] and MTCP [YB94]
+   which achieve performance improvements by abandoning end-to-end
+   semantics.
+
+   The Mowgli architecture [MOWGLI] proposes a split approach with
+   support for various enhancements at all the protocol layers, not only
+   at the transport layer. Mowgli provides an option to replace the
+   TCP/IP core protocols on the LTN link with a custom protocol that is
+   tuned for LTN links [KRLKA97].  In addition, the protocol provides
+   various features that are useful with LTNs. For example, it provides
+   priority-based multiplexing of concurrent connections together with
+   shared flow control, thus offering link capacity to interactive
+   applications in a timely manner even if there are bandwidth-intensive
+   background transfers.  Also with this option, Mowgli preserves the
+   socket semantics on the mobile device so that legacy applications can
+   be run unmodified.
+
+
+
+
+
+Montenegro, et al.           Informational                     [Page 23]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   Employing split TCP approaches have several benefits as well as
+   drawbacks. Benefits related to split TCP approaches include the
+   following:
+
+   -  Splitting the end-to-end TCP connection into two parts is a
+      straightforward way to shield the problems of the wireless link
+      from the wireline Internet path, and vice versa. Thus, a split TCP
+      approach enables applying local solutions to the local problems on
+      the wireless link.  For example, it automatically solves the
+      problem of distinguishing congestion related packet losses on the
+      wireline Internet and packet losses due to transmission error on
+      the wireless link as these occur on separate TCP connections.
+      Even if both segments experience congestion, it may be of a
+      different nature and may be treated as such.  Moreover, temporary
+      disconnections of the wireless link can be effectively shielded
+      from the wireline Internet.
+
+   -  When one of the TCP connections crosses only a single hop wireless
+      link or a very limited number of hops, some or all link
+      characteristics for the wireless TCP path are known. For example,
+      with a particular link we may know that the link provides reliable
+      delivery of packets, packets are not delivered out of order, or
+      the link is not subject to congestion. Having this information for
+      the TCP path one could expect that defining the TCP mitigations to
+      be employed becomes a significantly easier task. In addition,
+      several mitigations that cannot be employed safely over the global
+      Internet, can be successfully employed over the wireless link.
+
+   -  Splitting one TCP connection into two separate ones allows much
+      earlier deployment of various recent proposals to improve TCP
+      performance over wireless links; only the TCP implementations of
+      the mobile device and intermediate node need to be modified, thus
+      allowing the vast number of Internet hosts to continue running the
+      legacy TCP implementations unmodified. Any mitigations that would
+      require modification of TCP in these wireline hosts may take far
+      too long to become widely deployed.
+
+   -  Allows exploitation of various application level enhancements
+      which may give significant performance gains (see section 4.10.2).
+
+   Drawbacks related to split TCP approaches include the following:
+
+   -  One of the main criticisms against the split TCP approaches is
+      that it breaks TCP end-to-end semantics. This has various
+      drawbacks some of which are more severe than others. The most
+      detrimental drawback is probably that splitting the TCP connection
+      disables end-to-end usage of IP layer security mechanisms,
+      precluding the application of IPSec to achieve end-to-end
+
+
+
+Montenegro, et al.           Informational                     [Page 24]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+      security. Still, IPSec could be employed separately in each of the
+      two parts, thus requiring the intermediate node to become a party
+      to the security association between the mobile device and the
+      remote host. This, however, is an undesirable or unacceptable
+      alternative in most cases. Other security mechanisms above the
+      transport layer, like TLS [RFC2246] or SOCKS [RFC1928], should be
+      employed for end-to-end security.
+
+   -  Another drawback of breaking end-to-end semantics is that crashes
+      of the intermediate node become unrecoverable resulting in
+      termination of the TCP connections. Whether this should be
+      considered a severe problem depends on the expected frequency of
+      such crashes.
+
+   -  In many occasions claims have been stated that if TCP end-to-end
+      semantics is broken, applications relying on TCP to provide
+      reliable data delivery become more vulnerable. This, however, is
+      an overstatement as a well-designed application should never fully
+      rely on TCP in achieving end-to-end reliability at the application
+      level. First, current APIs to TCP, such as the Berkeley socket
+      interface, do not allow applications to know when an TCP
+      acknowledgement for previously sent user data arrives at TCP
+      sender.  Second, even if the application is informed of the TCP
+      acknowledgements, the sending application cannot know whether the
+      receiving application has received the data: it only knows that
+      the data reached the TCP receive buffer at the receiving end.
+      Finally, in order to achieve end-to-end reliability at the
+      application level an application level acknowledgement is required
+      to confirm that the receiver has taken the appropriate actions on
+      the data it received.
+
+   -  When a mobile device moves, it is subject to handovers by the
+      serving base station. If the base station acts as the intermediate
+      node for the split TCP connection, the state of both TCP endpoints
+      on the previous intermediate node must be transferred to the new
+      intermediate node to ensure continued operation over the split TCP
+      connection. This requires extra work and causes overhead. However,
+      in most of the W-WAN wireless networks, unlike in W-LANs, the W-
+      WAN base station does not provide the mobile device with the
+      connection point to the wireline Internet (such base stations may
+      not even have an IP stack).  Instead, the W-WAN network takes care
+      of the mobility and retains the connection point to the wireline
+      Internet unchanged while the mobile device moves.  Thus, TCP state
+      handover is not required in most W-WANs.
+
+   -  The packets traversing through all the protocol layers up to
+      transport layer and again down to the link layer result in extra
+      overhead at the intermediate node. In case of LTNs with low
+
+
+
+Montenegro, et al.           Informational                     [Page 25]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+      bandwidth, this extra overhead does not cause serious additional
+      performance problems unlike with W-LANs that typically have much
+      higher bandwidth.
+
+   -  Split TCP proposals are not applicable to networks with asymmetric
+      routing. Deploying a split TCP approach requires that traffic to
+      and from the mobile device be routed through the intermediate
+      node. With some networks, this cannot be accomplished, or it
+      requires that the intermediate node is located several hops away
+      from the wireless network edge which in turn is unpractical in
+      many cases and may result in non-optimal routing.
+
+   -  Split TCP, as the name implies, does not address problems related
+      to UDP.
+
+   It should noted that using split TCP does not necessarily exclude
+   simultaneous usage of IP for end-to-end connectivity.  Correct usage
+   of split TCP should be managed per application or per connection and
+   should be under the end-user control so that the user can decide
+   whether a particular TCP connection or application makes use of split
+   TCP or whether it operates end-to-end directly over IP.
+
+   Recommendation: Split TCP proposals that alter TCP semantics are not
+   recommended. Deploying custom protocols on the wireless link, such as
+   MOWGLI proposes is not recommended, because this note gives
+   preference to (1) improving TCP instead of designing a custom
+   protocol and (2) allowing end-to-end sessions at all times.
+
+4.10.2 Application Level Proxies
+
+   Nowadays, application level proxies are widely used in the Internet.
+   Such proxies include Web proxy caches, relay MTAs (Mail Transfer
+   Agents), and secure transport proxies (e.g., SOCKS). In effect,
+   employing an application level proxy results in a "split TCP
+   connection" with the proxy as the intermediary.  Hence, some of the
+   problems present with wireless links, such as combining of a
+   congested wide-area Internet path with a wireless LTN link, are
+   automatically alleviated to some extent.
+
+   The application protocols often employ plenty of (unnecessary) round
+   trips, lots of headers and inefficient encoding. Even unnecessary
+   data may get delivered over the wireless link in regular application
+   protocol operation. In many cases a significant amount of this
+   overhead can be reduced by simply running an application level proxy
+   on the intermediate node.  With LTN links, significant additional
+   improvement can be achieved by introducing application level proxies
+   with application-specific enhancements. Such a proxy may employ an
+   enhanced version of the application protocol over the wireless link.
+
+
+
+Montenegro, et al.           Informational                     [Page 26]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   In an LTN environment enhancements at the application layer may
+   provide much more notable performance improvements than any transport
+   level enhancements.
+
+   The Mowgli system provides full support for adding application level
+   agent-proxy pairs between the client and the server, the agent on the
+   mobile device and the proxy on the intermediate node. Such a pair may
+   be either explicit or fully transparent to the applications, but it
+   is, at all times, under the end-user control. Good examples of
+   enhancements achieved with application-specific proxies include
+   Mowgli WWW [LAKLR95], [LHKR96] and WebExpress [HL96], [CTCSM97].
+
+   Recommendation: Usage of application level proxies is conditionally
+   recommended: an application must be proxy enabled and the decision of
+   employing a proxy for an application must be under the user control
+   at all times.
+
+4.10.3 Snoop and its Derivatives
+
+   Berkeley's SNOOP protocol [SNOOP] is a hybrid scheme mixing link-
+   layer reliability mechanisms with the split connection approach. It
+   is an improvement over split TCP approaches in that end-to-end
+   semantics are retained. SNOOP does two things:
+
+      1. Locally (on the wireless link) retransmit lost packets, instead
+         of allowing TCP to do so end-to-end.
+
+      2. Suppress the duplicate acks on their way from the receiver back
+         to the sender, thus avoiding fast retransmit and congestion
+         avoidance at the latter.
+
+   Thus, the Snoop protocol is designed to avoid unnecessary fast
+   retransmits by the TCP sender, when the wireless link layer
+   retransmits a packet locally. Consider a system that does not use the
+   Snoop agent. Consider a TCP sender S that sends packets to receiver R
+   via an intermediate node IN. Assume that the sender sends packet A,
+   B, C, D, E (in that order) which are forwarded by IN to the wireless
+   receiver R. Assume that the intermediate node then retransmits B
+   subsequently, because the first transmission of packet B is lost due
+   to errors on the wireless link. In this case, receiver R receives
+   packets A, C, D, E and B (in that order). Receipt of packets C, D and
+   E triggers duplicate acknowledgements. When the TCP sender receives
+   three duplicate acknowledgements, it triggers fast retransmit (which
+   results in a retransmission, as well as reduction of congestion
+   window).  The fast retransmit occurs despite the link level
+   retransmit on the wireless link, degrading throughput.
+
+
+
+
+
+Montenegro, et al.           Informational                     [Page 27]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   SNOOP [SNOOP] deals with this problem by dropping TCP dupacks
+   appropriately (at the intermediate node). The Delayed Dupacks (see
+   section 4.5) attempts to approximate Snoop without requiring
+   modifications at the intermediate node.  Such schemes are needed only
+   if the possibility of a fast retransmit due to wireless errors is
+   non-negligible. In particular, if the wireless link uses a stop-and-
+   go protocol (or otherwise delivers packets in-order), then these
+   schemes are not very beneficial.  Also, if the bandwidth-delay
+   product of the wireless link is smaller than four segments, the
+   probability that the intermediate node will have an opportunity to
+   send three new packets before a lost packet is retransmitted is
+   small.  Since at least three dupacks are needed to trigger a fast
+   retransmit, with a wireless bandwidth-delay product less than four
+   packets, schemes such as Snoop and Delayed Dupacks would not be
+   necessary (unless the link layer is not designed properly).
+   Conversely, when the wireless bandwidth-delay product is large
+   enough, Snoop can provide significant performance improvement
+   (compared with standard TCP). For further discussion on these topics,
+   please refer to [Vaidya99].
+
+   The Delayed Dupacks scheme tends to provide performance benefit in
+   environments where Snoop performs well. In general, performance
+   improvement achieved by the Delayed Dupacks scheme is a function of
+   packet loss rates due to congestion and transmission errors. When
+   congestion-related losses occur, the Delayed Dupacks scheme
+   unnecessarily delays retransmission.  Thus, in the presence of
+   congestion losses, the Delayed Dupacks scheme cannot achieve the same
+   performance improvement as Snoop.  However, simulation results
+   [Vaidya99] indicate that the Delayed Dupacks can achieve a
+   significant improvement in performance despite moderate congestion
+   losses.
+
+   WTCP [WTCP] is similar to SNOOP in that it preserves end-to-end
+   semantics.  In WTCP, the intermediate node uses a complex scheme to
+   hide the time it spends recovering from local errors across the
+   wireless link (this typically includes retransmissions due to error
+   recovery, but may also include time spent dealing with congestion).
+   The idea is for the sender to derive a smooth estimate of round-trip
+   time.  In order to work effectively, it assumes that the TCP
+   endpoints implement the Timestamps option in RFC 1323 [TCPHP].
+   Unfortunately, support for RFC 1323 in TCP implementations is not yet
+   widespread. Beyond this, WTCP requires changes only at the
+   intermediate node.
+
+   SNOOP and WTCP require the intermediate node to examine and operate
+   on the traffic between the portable wireless device and the TCP
+   server on the wired Internet. SNOOP and WTCP do not work if the IP
+   traffic is encrypted, unless, of course, the intermediate node shares
+
+
+
+Montenegro, et al.           Informational                     [Page 28]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   the security association between the mobile device and its end-to-end
+   peer.  They also require that both the data and the corresponding
+   ACKs traverse the same intermediate node.  Furthermore, if the
+   intermediate node retransmits packets at the transport layer across
+   the wireless link, this may duplicate efforts by the link-layer.
+   SNOOP has been described by its designers as a TCP-aware link-layer.
+   This is the right approach:  the link and network layers can be much
+   more aware of each other than traditional OSI layering suggests.
+
+   Encryption of IP packets via IPSEC's ESP header (in either transport
+   or tunnel mode) renders the TCP header and payload unintelligible to
+   the intermediate node. This precludes SNOOP (and WTCP) from working,
+   because it needs to examine the TCP headers in both directions.
+   Possible solutions involve:
+
+   -  making the SNOOP (or WTCP) intermediate node a party to the
+      security association between the client and the server
+
+   -  IPSEC tunneling mode, terminated at the SNOOPing intermediate node
+
+   However, these techniques require that users trust intermediate
+   nodes.  Users valuing both privacy and performance should use SSL or
+   SOCKS for end-to-end security. These, however, are implemented above
+   the transport layer, and are not as resistant to some security
+   attacks (for example, those based on guessing TCP sequence numbers)
+   as IPSEC.
+
+   Recommendation: Implement SNOOP on intermediate nodes now.  Research
+   results are encouraging, and it is an "invisible" optimization in
+   that neither the client nor the server needs to change, only the
+   intermediate node (for basic SNOOP without SACK). However, as
+   discussed above there is little or no benefit from implementing SNOOP
+   if:
+
+      1. The wireless link provides reliable, in-order packet delivery,
+         or,
+
+      2. The bandwidth-delay product of the wireless link is smaller
+         than four segments.
+
+4.10.4 PEPs to handle Periods of Disconnection
+
+   Periods of disconnection are very common in wireless networks, either
+   during handoff, due to lack of resources (dropped connections) or
+   natural obstacles. During these periods, a TCP sender does not
+   receive the expected acknowledgements.  Upon expiration of the
+   retransmit timer, this causes TCP to close its congestion window
+   with all the related drawbacks.  Re-transmitting packets is useless
+
+
+
+Montenegro, et al.           Informational                     [Page 29]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   since the connection is broken. [M-TCP] aims at enabling TCP to
+   better handle handoffs and periods of disconnection, while preserving
+   end-to-end semantics.  M-TCP adds an element: supervisor host (SH-
+   TCP) at the edge of the wireless network.
+
+   This intermediate node monitors the traffic coming from the sender to
+   the mobile device. It does not break end-to-end semantics because the
+   ACKs sent from the intermediate node to the sender are effectively
+   the ones sent by the mobile node. The principle is to generally leave
+   the last byte unacknowledged.  Hence, SH-TCP could shut down the
+   sender's window by sending the ACK for the last byte with a window
+   set to zero. Thus the sender will go to persist mode.
+
+   The second optimization is done on both the intermediate node and the
+   mobile host. On the latter, TCP is aware of the current state of the
+   connection. In the event of a disconnection, it is capable of
+   freezing all timers. Upon reconnection, the mobile sends a specially
+   marked ACK with the number of the highest byte received.  The
+   intermediate node assumes that the mobile is disconnected because it
+   monitors the flow on the wireless link, so in the absence of
+   acknowledgments from the mobile, it will inform SH-TCP, which will
+   send the ACK closing the sender window as described in the previous
+   paragraph. The intermediate node learns that the mobile is again
+   connected when it receives a duplicate acknowledgment marked as
+   reconnected.  At this point it sends a duplicate ACK to the sender
+   and grows the window.  The sender exits persist mode and resumes
+   transmitting at the same rate as before. It begins by retransmitting
+   any data previously unacknowledged by the mobile node. Non
+   overlapping or non soft handoffs are lightweight because the previous
+   intermediate system  can shrink the window, and the new one modifies
+   it as soon as it has received an indication from the mobile.
+
+   Recommendation: M-TCP is not slated for adoption at this moment,
+   because of the highly experimental nature of the proposal, and the
+   uncertainty that TCP/IP implementations handle zero window updates
+   correctly. Continue tracking developments in this space.
+
+4.11 Header Compression Alternatives
+
+   Because Long Thin Networks are bandwidth-constrained, compressing
+   every byte out of over-the-air segments is worth while.
+
+   Mechanisms for TCP and IP header compression defined in [RFC1144,
+   IPHC, IPHC-RTP, IPHC-PPP] provide the following benefits:
+
+   -  Improve interactive response time
+
+   -  Allow using small packets for bulk data with good line efficiency
+
+
+
+Montenegro, et al.           Informational                     [Page 30]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+      -  Allow using small packets for delay sensitive low data-rate
+         traffic
+
+      -  Decrease header overhead (for a common TCP segment size of 512
+         the header overhead of IPv4/TCP within a Mobile IP tunnel can
+         decrease from 11.7 to less than 1 per cent.
+
+      -  Reduce packet loss rate over lossy links (because of the
+         smaller cross-section of compressed packets).
+
+   Van Jacobson (VJ) header compression [RFC1144] describes a Proposed
+   Standard for TCP Header compression that is widely deployed.  It uses
+   TCP timeouts to detect a loss of synchronization between the
+   compressor and decompressor. [IPHC] includes an explicit request for
+   transmission of uncompressed headers to allow resynchronization
+   without waiting for a TCP timeout (and executing congestion avoidance
+   procedures).
+
+   Recommendation: Implement [IPHC], in particular as it relates to IP-
+   in-IP [RFC2003] and Minimal Encapsulation [RFC2004] for Mobile IP, as
+   well as TCP header compression  for lossy links and links that
+   reorder packets. PPP capable devices should implement [IPHC-PPP].  VJ
+   header compression may optionally be implemented as it is a widely
+   deployed Proposed Standard.  However, it should only be enabled when
+   operating over reliable LTNs, because even a single bit error most
+   probably would result in a full TCP window being dropped, followed by
+   a costly recovery via slow-start.
+
+4.12 Payload Compression
+
+   Compression of IP payloads is also desirable. "IP Payload Compression
+   Protocol (IPComp)" [IPPCP] defines a framework where common
+   compression algorithms can be applied to arbitrary IP segment
+   payloads. IP payload compression is something of a niche
+   optimization. It is necessary because IP-level security converts IP
+   payloads to random bitstreams, defeating commonly-deployed link-layer
+   compression mechanisms which are faced with payloads that have no
+   redundant "information" that can be more compactly represented.
+
+   However, many IP payloads are already compressed (images, audio,
+   video, "zipped" files being FTPed), or are already encrypted above
+   the IP layer (SSL/TLS, etc.). These payloads will not "compress"
+   further, limiting the benefit of this optimization.
+
+   HTTP/1.1 already supports compression of the message body.  For
+   example, to use zlib compression the relevant directives are:
+   "Content-Encoding: deflate" and "Accept-Encoding:  deflate" [HTTP-
+   PERF].
+
+
+
+Montenegro, et al.           Informational                     [Page 31]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   HTTP-NG is considering supporting compression of resources at the
+   HTTP level, which would provide equivalent benefits for common
+   compressible MIME types like text/html. This will reduce the need for
+   IPComp. If IPComp is deployed more rapidly than HTTP-NG, IPComp
+   compression of HTML and MIME headers would be beneficial.
+
+   In general, application-level compression can often outperform
+   IPComp, because of the opportunity to use compression dictionaries
+   based on knowledge of the specific data being compressed.
+
+   Recommendation: IPComp may optionally be implemented. Track HTTP-NG
+   standardization and deployment for now. Implementing HTTP/1.1
+   compression using zlib SHOULD is recommended.
+
+4.13 TCP Control Block Interdependence [Touch97]
+
+   TCP maintains per-connection information such as connection state,
+   current round-trip time, congestion control or maximum segment size.
+   Sharing information between two consecutive connections or when
+   creating a new connection while the first is still active to the same
+   host may improve performance of the latter connection.  The principle
+   could easily be extended to sharing information amongst systems in a
+   LAN not just within a given system.  [Touch97] describes cache update
+   for both cases.
+
+   Users of W-WAN devices frequently request connections to the same
+   servers or set of servers. For example, in order to read their email
+   or to initiate connections to other servers, the devices may be
+   configured to always use the same email server or WWW proxy.  The
+   main advantage of this proposal is that it relieves the application
+   of the burden of optimizing the transport layer. In order to improve
+   the performance of TCP connections, this mechanism only requires
+   changes at the wireless device.
+
+   In general, this scheme should improve the dynamism of connection
+   setup without increasing the cost of the implementation.
+
+   Recommendation: This mechanism is recommended, although HTTP/1.1 with
+   its persistent connections may partially achieve the same effect
+   without it. Other applications (even HTTP/1.0) may find it useful.
+   Continue monitoring research on this. In particular, work on a
+   "Congestion Manager" [CM] may generalize this concept of sharing
+   information among protocols and applications with a view to making
+   them more adaptable to network conditions.
+
+
+
+
+
+
+
+Montenegro, et al.           Informational                     [Page 32]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+5 Summary of Recommended Optimizations
+
+   The table below summarizes our recommendations with regards to the
+   main proposals mentioned above.
+
+   The first column, "Stability of the Proposal," refers to the maturity
+   of the mechanism in question.  Some proposals are being pursued
+   within the IETF in a somewhat open fashion. An IETF proposal is
+   either an Internet Drafts (I-D) or a Request for Comments (RFC). The
+   former is a preliminary version. There are several types of RFCs.  A
+   Draft Standards (DS) is standards track, and carries more weight than
+   a Proposed Standard (PS), which may still undergo revisions.
+   Informational or Experimental RFCs do not specify a standard. Other
+   proposals are isolated efforts with little or no public review, and
+   unknown chances of garnering industry backing.
+
+   "Implemented at" indicates which participant in a TCP session must be
+   modified to implement the proposal. Legacy servers typically cannot
+   be modified, so this column indicates whether implementation happens
+   at either or both of the two nodes under some control: mobile device
+   and intermediate node. The symbols used are: WS (wireless sender,
+   that is, the mobile device's TCP send operation must be modified), WR
+   (wireless receiver, that is, the mobile device's TCP receive
+   operation must be modified), WD (wireless device, that is,
+   modifications at the mobile device are not specific to either TCP
+   send or receive), IN (intermediate node) and NI (network
+   infrastructure). These entities are to be understood within the
+   context of Section 1.1 ("Network Architecture"). NA simply means "not
+   applicable."
+
+   The "Recommendation" column captures our suggestions.  Some
+   mechanisms are endorsed for immediate adoption, others need more
+   evidence and research, and others are not recommended.
+
+Name                   Stability of     Implemented   Recommendation
+                       the Proposal     at
+====================   =============    ===========   =================
+
+Increased Initial      RFC 2581 (PS)    WS            Yes
+Window                                                (initial_window=2)
+
+Disable delayed ACKs   NA               WR            When stable
+during slow start
+
+Byte counting          NA               WS            No
+instead of ACK
+counting
+
+
+
+
+Montenegro, et al.           Informational                     [Page 33]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+TCP Header             RFC 1144 (PS)    WD            Yes
+compression for PPP                     IN            (see 4.11)
+
+IP Payload             RFC 2393 (PS)    WD            Yes
+Compression                             (simultaneously
+(IPComp)                                needed on Server)
+
+Header                 RFC 2507 (PS),   WD            Yes
+Compression            RFC 2509 (PS)    IN            (For IPv4, TCP and
+                                                      Mobile IP, PPP)
+
+SNOOP plus SACK        In limited use   IN            Yes
+                                        WD (for SACK)
+
+Fast retransmit/fast   RFC 2581 (PS)    WD            Yes (should be
+recovery                                              there already)
+
+Transaction/TCP        RFC 1644         WD            No
+                       (Experimental)   (simultaneously
+                                        needed on Server)
+
+Estimating Slow        NA               WS            No
+Start Threshold
+(ssthresh)
+
+Delayed Duplicate      Not stable       WR            When stable
+Acknowledgements                        IN (for
+                                        notifications)
+
+Class-based Queuing    NA               WD            When stable
+on End Systems
+
+Explicit Congestion    RFC 2481 (EXP)   WD            Yes
+
+Notification                            NI
+
+TCP Control Block      RFC 2140         WD            Yes
+Interdependence        (Informational)                (Track research)
+
+
+   Of all the optimizations in the table above, only SNOOP plus SACK and
+   Delayed duplicate acknowledgements are currently being proposed only
+   for wireless networks. The others are being considered even for non-
+   wireless applications. Their more general applicability attracts more
+   attention and analysis from the research community.
+
+   Of the above mechanisms, only Header Compression (for IP and TCP) and
+   "SNOOP plus SACK" cease to work in the presence of IPSec.
+
+
+
+Montenegro, et al.           Informational                     [Page 34]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+6 Conclusion
+
+   In view of the unpredictable and problematic nature of long thin
+   networks, arriving at an optimized transport is a daunting task. We
+   have reviewed the existing proposals along with future research
+   items. Based on this overview, we also recommend mechanisms for
+   implementation in long thin networks (LTNs).
+
+7 Acknowledgements
+
+   The authors are deeply indebted to the IETF tcpsat and tcpimpl
+   working groups. The following individuals have also provided valuable
+   feedback: Mark Allman (NASA), Vern Paxson (ACIRI), Raphi Rom
+   (Technion/Sun), Charlie Perkins (Nokia), Peter Stark (Phone.com).
+
+8 Security Considerations
+
+   The mechanisms discussed and recommended in this document have been
+   proposed in previous publications. The security considerations
+   outlined in the original discussions apply here as well.  Several
+   security issues are also discussed throughout this document.
+   Additionally, we present below a non-exhaustive list of the most
+   salient issues concerning our recommended mechanisms:
+
+   -  Larger Initial TCP Window Size
+
+      No known security issues [RFC2414, RFC2581].
+
+   -  Header Compression
+
+      May be open to some denial of service attacks. But any attacker in
+      a position to launch these attacks would have much stronger
+      attacks at his disposal [IPHC, IPHC-RTP].
+
+   -  Congestion Control, Fast Retransmit/Fast Recovery
+
+      An attacker may force TCP connections to grind to a halt, or, more
+      dangerously, behave more aggressively. The latter possibility may
+      lead to congestion collapse, at least in some regions of the
+      network [RFC2581].
+
+   -  Explicit Congestion Notification
+
+      It does not appear to increase the vulnerabilities in the network.
+      On the contrary, it may reduce them by aiding in the
+      identification of flows unresponsive to or non-compliant with TCP
+      congestion control [ECN].
+
+
+
+
+Montenegro, et al.           Informational                     [Page 35]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   -  Sharing of Network Performance Information (TCP Control Block
+      Sharing and Congestion Manager module)
+
+      Some information should not be shared. For example, TCP sequence
+      numbers are used to protect against spoofing attacks.  Even
+      limiting the sharing to performance values leaves open the
+      possibility of denial-of-service attacks [Touch97].
+
+   -  Performance Enhancing Proxies
+
+      These systems are men-in-the-middle from the point of view of
+      their security vulnerabilities. Accordingly, they must be used
+      with extreme care so as to prevent their being hijacked and
+      misused.
+
+   This last point is not to be underestimated: there is a general
+   security concern whenever an intermediate node performs operations
+   different from those carried out in an end-to-end basis. This is not
+   specific to performance-enhancing proxies.  In particular, there may
+   be a tendency to forego IPSEC-based privacy in order to allow, for
+   example, a SNOOP module, header compression (TCP, UDP, RTP, etc), or
+   HTTP proxies to work.
+
+   Adding end-to-end security at higher layers (for example via RTP
+   encryption, or via TLS encryption of the TCP payload) alleviates the
+   problem. However, this still leaves protocol headers in the clear,
+   and these may be exploited for traffic analysis and denial-of-service
+   attacks.
+
+9 References
+
+   [ACKSPACING]   Partridge, C., "ACK Spacing for High Delay-Bandwidth
+                  Paths with Insufficient Buffering", Work in Progress.
+
+   [ADGGHOSSTT98] Allman, M., Dawkins, S., Glover, D., Griner, J.,
+                  Henderson, T., Heidemann, J., Kruse, H., Osterman, S.,
+                  Scott, K., Semke, J., Touch, J. and D. Tran, "Ongoing
+                  TCP Research Related to Satellites", Work in Progress.
+
+   [AGS98]        Allman, M., Glover, D. and L. Sanchez, "Enhancing TCP
+                  Over Satellite Channels using Standard Mechanisms",
+                  BCP 28, RFC 2488, January 1999.
+
+
+
+
+
+
+
+
+
+Montenegro, et al.           Informational                     [Page 36]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   [Allman98]     Mark Allman. On the Generation and Use of TCP
+                  Acknowledgments. ACM Computer Communication Review,
+                  28(5), October 1998.
+
+   [AHO98]        Allman, M., Hayes, C., Ostermann, S., "An Evaluation
+                  of TCP with Larger Initial Windows," Computer
+                  Communication Review, 28(3), July 1998.
+
+   [BBKT96]       Bhagwat, P., Bhattacharya, P., Krishna, A., Tripathi,
+                  S., "Enhancing Throughput over Wireless LANs Using
+                  Channel State Dependent Packet Scheduling," in Proc.
+                  IEEE INFOCOM'96, pp. 1133-40, March 1996.
+
+   [BBKVP96]      Bakshi, B., P., Krishna, N., Vaidya, N., Pradhan,
+                  D.K., "Improving Performance of TCP over Wireless
+                  Networks," Technical Report 96-014, Texas A&M
+                  University, 1996.
+
+   [BPSK96]       Balakrishnan, H., Padmanabhan, V., Seshan, S., Katz,
+                  R., "A Comparison of Mechanisms for Improving TCP
+                  Performance over Wireless Links," in ACM SIGCOMM,
+                  Stanford, California, August 1996.
+
+   [BPK99]        Balakrishnan, H., Padmanabhan, V., Katz, R., "The
+                  effects of asymmetry on TCP performance," ACM Mobile
+                  Networks and Applications (MONET), Vol. 4, No. 3,
+                  1999, pp. 219-241.
+
+   [BV97]         S. Biaz and N. H. Vaidya, "Distinguishing Congestion
+                  Losses  from Wireless Transmission Losses: A Negative
+                  Result," Seventh International Conference on Computer
+                  Communications and Networks (IC3N), New Orleans,
+                  October 1998.
+
+   [BV98]         Biaz, S., Vaidya, N., "Sender-Based heuristics for
+                  Distinguishing Congestion Losses from Wireless
+                  Transmission Losses," Texas A&M University, Technical
+                  Report 98-013, June 1998.
+
+   [BV98a]        Biaz, S., Vaidya, N., "Discriminating Congestion
+                  Losses from Wireless Losses using Inter-Arrival Times
+                  at the Receiver," Texas A&M University, Technical
+                  Report 98-014, June 1998.
+
+   [BW97]         Brasche, G., Walke, B., "Concepts, Services, and
+                  Protocols of the New GSM Phase 2+ general Packet Radio
+                  Service," IEEE Communications Magazine, Vol. 35, No.
+                  8, August 1997.
+
+
+
+Montenegro, et al.           Informational                     [Page 37]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   [CB96]         Cheshire, S., Baker, M., "Experiences with a Wireless
+                  Network in MosquitoNet," IEEE Micro, February 1996.
+                  Available online as:
+                  http://rescomp.stanford.edu/~cheshire/papers
+                  /wireless.ps.
+
+   [CDMA]         Electronic Industry Alliance(EIA)/Telecommunications
+                  Industry Association (TIA), IS-95: Mobile Station-Base
+                  Station Compatibility Standard for Dual-Mode Wideband
+                  Spread Spectrum Cellular System, 1993.
+
+   [CDPD]         Wireless Data Forum, CDPD System Specification,
+                  Release 1.1, 1995.
+
+   [CM]           Hari Balakrishnan and Srinivasan Seshan, "The
+                  Congestion Manager," Work in Progress.
+
+   [CTCSM97]      Chang, H., Tait, C., Cohen, N., Shapiro, M.,
+                  Mastrianni, S., Floyd, R., Housel, B., Lindquist, D.,
+                  "Web Browsing in a Wireless Environment: Disconnected
+                  and Asynchronous Operation in ARTour Web Express," in
+                  Proc. MobiCom'97, Budapest, Hungary, September 1997.
+
+   [Demers90]     Demers, A., Keshav, S., and Shenker, S., Analysis and
+                  Simulation of a Fair Queueing Algorithm,
+                  Internetworking: Research and Experience, Vol. 1,
+                  1990, pp. 3-26.
+
+   [ECN]          Ramakrishnan, K. and S. Floyd, "A Proposal to add
+                  Explicit Congestion Notification (ECN) to IP", RFC
+                  2481, January 1999.
+
+   [Floyd95]      Floyd, S., and Jacobson, V., Link-sharing and Resource
+                  Management Models for Packet Networks. IEEE/ACM
+                  Transactions on Networking, Vol. 3 No. 4, pp. 365-386,
+                  August 1995.
+
+   [FSS98]        Fragouli, C., Sivaraman, V., Srivastava, M.,
+                  "Controlled Multimedia Wireless Link Sharing via
+                  Enhanced Class-Based Queueing with Channel-State-
+                  Dependent Packet Scheduling," Proc. IEEE INFOCOM'98,
+                  April 1998.
+
+   [GPRS]         ETSI, "General Packet Radio Service (GPRS): Service
+                  Description, Stage 2," GSM03.60, v.6.1.1 August 1998.
+
+
+
+
+
+
+Montenegro, et al.           Informational                     [Page 38]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   [GSM]          Rahnema, M., "Overview of the GSM system and protocol
+                  architecture," IEEE Communications Magazine, vol. 31,
+                  pp 92-100, April 1993.
+
+   [HL96]         Hausel, B., Lindquist, D., "WebExpress: A System for
+                  Optimizing Web Browsing in a Wireless Environment," in
+                  Proc.  MobiCom'96, Rye, New York, USA, November 1996.
+
+   [HTTP-PERF]    Henrik Frystyk Nielsen (W3C, MIT), Jim Gettys (W3C,
+                  Digital), Anselm Baird-Smith (W3C, INRIA), Eric
+                  Prud'hommeaux (W3C, MIT), Hon Lie (W3C, INRIA), Chris
+                  Lilley (W3C, INRIA), "Network Performance Effects of
+                  HTTP/1.1, CSS1, and PNG," ACM SIGCOMM '97, Cannes,
+                  France, September 1997.  Available at:
+                  http://www.w3.org/Protocols/HTTP/Performance
+                  /Pipeline.html
+
+   [IPPCP]        Shacham, A., Monsour, R., Pereira, R. and M. Thomas,
+                  "IP Payload Compression Protocol (IPComp)", RFC 2393,
+                  December 1998.
+
+   [IPHC]         Degermark, M., Nordgren, B. and S. Pink, "IP Header
+                  Compression", RFC 2507, February 1999.
+
+   [IPHC-RTP]     Casner, S. and  V. Jacobson, "Compressing IP/UDP/RTP
+                  Headers for Low-Speed Serial Links", RFC 2508,
+                  February 1999.
+
+   [IPHC-PPP]     Engan, M., Casner, S. and C. Bormann, "IP Header
+                  Compression over PPP", RFC 2509, February 1999.
+
+   [ITCP]         Bakre, A., Badrinath, B.R., "Handoff and Systems
+                  Support for Indirect TCP/IP. In Proceedings of the
+                  Second USENIX Symposium on Mobile and Location-
+                  Independent Computing, Ann Arbor, Michigan, April 10-
+                  11, 1995.
+
+   [Jain89]       Jain, R., "A Delay-Based Approach for Congestion
+                  Avoidance in Interconnected Heterogeneous Computer
+                  Networks," Digital Equipment Corporation, Technical
+                  Report DEC-TR-566, April 1989.
+
+   [Karn93]       Karn, P., "The Qualcomm CDMA Digital Cellular System"
+                  Proc. USENIX Mobile and Location-Independent Computing
+                  Symposium, USENIX Association, August 1993.
+
+
+
+
+
+
+Montenegro, et al.           Informational                     [Page 39]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   [KRLKA97]      Kojo, M., Raatikainen, K., Liljeberg,  M., Kiiskinen,
+                  J., Alanko, T., "An Efficient Transport Service for
+                  Slow Wireless Telephone Links," in IEEE Journal on
+                  Selected Areas of Communication, volume 15, number 7,
+                  September 1997.
+
+   [LAKLR95]      Liljeberg, M., Alanko, T., Kojo, M., Laamanen, H.,
+                  Raatikainen, K., "Optimizing World-Wide Web for
+                  Weakly-Connected Mobile Workstations: An Indirect
+                  Approach," in Proc. 2nd Int.  Workshop on Services in
+                  Distributed and Networked Environments, Whistler,
+                  Canada, pp. 132-139, June 1995.
+
+   [LHKR96]       Liljeberg, M., Helin, H., Kojo, M., Raatikainen, K.,
+                  "Mowgli WWW Software: Improved Usability of WWW in
+                  Mobile WAN Environments," in Proc.  IEEE Global
+                  Internet 1996 Conference, London, UK, November 1996.
+
+   [LS98]         Lettieri, P., Srivastava, M., "Adaptive Frame Length
+                  Control for Improving Wireless Link Throughput, Range,
+                  and Energy Efficiency," Proc.  IEEE INFOCOM'98, April
+                  1998.
+
+   [MNCP]         Piscitello, D., Phifer, L., Wang, Y., Hovey, R.,
+                  "Mobile Network Computing Protocol (MNCP)", Work in
+                  Progress.
+
+   [MOWGLI]       Kojo, M., Raatikainen, K., Alanko, T., "Connecting
+                  Mobile Workstations to the Internet over a Digital
+                  Cellular Telephone Network," in Proc. Workshop on
+                  Mobile and Wireless Information Systems (MOBIDATA),
+                  Rutgers University, NJ, November 1994.  Available at:
+                  http://www.cs.Helsinki.FI/research/mowgli/. Revised
+                  version published in Mobile Computing, pp. 253-270,
+                  Kluwer, 1996.
+
+   [MSMO97]       Mathis, M., Semke, J., Mahdavi, J., Ott, T., "The
+                  Macroscopic Behavior of the TCP Congestion Avoidance
+                  Algorithm," in Computer Communications Review, a
+                  publication of ACM SIGCOMM, volume 27, number 3, July
+                  1997.
+
+   [MTCP]         Brown, K. Singh, S., "A Network Architecture for
+                  Mobile Computing," Proc. IEEE INFOCOM'96, pp. 1388-
+                  1396, March 1996.  Available at
+                  ftp://ftp.ece.orst.edu/pub/singh/papers
+                  /transport.ps.gz
+
+
+
+
+Montenegro, et al.           Informational                     [Page 40]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   [M-TCP]        Brown, K. Singh, S., "M-TCP: TCP for Mobile Cellular
+                  Networks," ACM Computer Communications Review Vol.
+                  27(5), 1997.  Available at
+                  ftp://ftp.ece.orst.edu/pub/singh/papers/mtcp.ps.gz
+
+   [MV97]         Mehta, M., Vaidya, N., "Delayed Duplicate-
+                  Acknowledgements:  A Proposal to Improve Performance
+                  of TCP on Wireless Links," Texas A&M University,
+                  December 24, 1997.  Available at
+                  http://www.cs.tamu.edu/faculty/vaidya/mobile.html
+
+   [NETBLT]       White, J., "NETBLT (Network Block Transfer Protocol)",
+                  Work in Progress.
+
+   [Paxson97]     V. Paxson, "End-to-End Internet Packet Dynamics,"
+                  Proc. SIGCOMM '97.  Available at
+                  ftp://ftp.ee.lbl.gov/papers/vp-pkt-dyn-sigcomm97.ps.Z
+
+   [RED]          Braden, B., Clark, D., Crowcroft, J., Davie, B.,
+                  Deering, S., Estrin, D., Floyd, S., Jacobson, V.,
+                  Minshall, G., Partridge, C., Peterson, L.,
+                  Ramakrishnan, K., Shenker, S., Wroclawski, J. and L.
+                  Zhang, "Recommendations on Queue Management and
+                  Congestion Avoidance in the Internet", RFC 2309, April
+                  1998.
+
+   [RLP]          ETSI, "Radio Link Protocol for Data and Telematic
+                  Services on the Mobile Station - Base Station System
+                  (MS-BSS) interface and the Base Station System -
+                  Mobile Switching Center (BSS-MSC) interface," GSM
+                  Specification 04.22, Version 3.7.0, February 1992.
+
+   [RFC908]       Velten, D., Hinden, R. and J. Sax, "Reliable Data
+                  Protocol", RFC 908, July 1984.
+
+   [RFC1030]      Lambert, M., "On Testing the NETBLT Protocol over
+                  Divers Networks", RFC 1030, November 1987.
+
+   [RFC1122]      Braden, R., "Requirements for Internet Hosts --
+                  Communication Layers", STD 3, RFC 1122, October 1989.
+
+   [RFC1144]      Jacobson, V., "Compressing TCP/IP Headers for Low-
+                  Speed Serial Links", RFC 1144, February 1990.
+
+   [RFC1151]      Partridge, C., Hinden, R., "Version 2 of the Reliable
+                  Data Protocol (RDP)", RFC 1151, April 1990.
+
+
+
+
+
+Montenegro, et al.           Informational                     [Page 41]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   [RFC1191]      Mogul, J. and S. Deering, "Path MTU Discovery", RFC
+                  1191, November 1990.
+
+   [RFC1397]      Braden, R., "Extending TCP for Transactions --
+                  Concepts", RFC 1397, November 1992.
+
+   [RFC1644]      Braden, R., "T/TCP -- TCP Extensions for Transactions
+                  Functional Specification", RFC 1644, July 1994.
+
+   [RFC1661]      Simpson, W., "The Point-To-Point Protocol (PPP)", STD
+                  51, RFC 1661, July 1994.
+
+   [RFC1928]      Leech, M., Ganis, M., Lee, Y., Kuris, R., Koblas, D.
+                  and L. Jones, "SOCKS Protocol Version 5", RFC 1928,
+                  March 1996.
+
+   [RFC1986]      Polites, W., Wollman, W., Woo, D. and R. Langan,
+                  "Experiments with a Simple File Transfer Protocol for
+                  Radio Links using Enhanced Trivial File Transfer
+                  Protocol (ETFTP)", RFC 1986, August 1996.
+
+   [RFC2002]      Perkins, C., "IP Mobility Support", RFC 2002, October
+                  1996.
+
+   [RFC2003]      Perkins, C., "IP Encapsulation within IP", RFC 2003,
+                  October 1996.
+
+   [RFC2004]      Perkins, C., "Minimal Encapsulation within IP", RFC
+                  2004, October 1996.
+
+   [RFC2018]      Mathis, M., Mahdavi, J., Floyd, S. and A. Romanow,
+                  "TCP Selective Acknowledgment Options", RFC 2018,
+                  October 1996.
+
+   [RFC2188]      Banan, M., Taylor, M. and J. Cheng, "AT&T/Neda's
+                  Efficient Short Remote Operations (ESRO) Protocol
+                  Specification Version 1.2", RFC 2188, September 1997.
+
+   [RFC2246]      Dierk, T. and E. Allen, "TLS Protocol Version 1", RFC
+                  2246, January 1999.
+
+   [RFC2414]      Allman, M., Floyd, S. and C. Partridge. "Increasing
+                  TCP's Initial Window", RFC 2414, September 1998.
+
+   [RFC2415]      Poduri, K.and K. Nichols, "Simulation Studies of
+                  Increased Initial TCP Window Size", RFC 2415,
+                  September 1998.
+
+
+
+
+Montenegro, et al.           Informational                     [Page 42]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   [RFC2416]      Shepard, T. and C. Partridge, "When TCP Starts Up With
+                  Four Packets Into Only Three Buffers", RFC 2416,
+                  September 1998.
+
+   [RFC2581]      Allman, M., Paxson, V. and W. Stevens, "TCP Congestion
+                  Control", RFC 2581, April 1999.
+
+   [RFC2582]      Floyd, S. and T. Henderson, "The NewReno Modification
+                  to TCP's Fast Recovery Algorithm", RFC 2582, April
+                  1999.
+
+   [SNOOP]        Balakrishnan, H., Seshan, S., Amir, E., Katz, R.,
+                  "Improving TCP/IP Performance over Wireless Networks,"
+                  Proc. 1st ACM Conf. on Mobile Computing and Networking
+                  (Mobicom), Berkeley, CA, November 1995.
+
+   [Stevens94]    R. Stevens, "TCP/IP Illustrated, Volume 1," Addison-
+                  Wesley, 1994 (section 2.10 for MTU size considerations
+                  and section 11.3 for weak checksums).
+
+   [TCPHP]        Jacobson, V., Braden, R. and D. Borman, "TCP
+                  Extensions for High Performance", RFC 1323, May 1992.
+
+   [TCPSATMIN]    TCPSAT Minutes, August, 1997. Available at:
+                  http://tcpsat.lerc.nasa.gov/tcpsat/meetings/munich-
+                  minutes.txt.
+
+   [Touch97]      Touch, T., "TCP Control Block Interdependence", RFC
+                  2140, April 1997.
+
+   [Vaidya99]     N. H. Vaidya, M. Mehta, C. Perkins, G. Montenegro,
+                  "Delayed Duplicate Acknowledgements: A TCP-Unaware
+                  Approach to Improve Performance of TCP over Wireless,"
+                  Technical Report 99-003, Computer Science Dept., Texas
+                  A&M University, February 1999.
+
+   [VEGAS]        Brakmo, L., O'Malley, S., "TCP Vegas, New Techniques
+                  for Congestion Detection and Avoidance," SIGCOMM'94,
+                  London, pp 24-35, October 1994.
+
+   [VMTP]         Cheriton, D., "VMTP: Versatile Message Transaction
+                  Protocol", RFC 1045, February 1988.
+
+   [WAP]          Wireless Application Protocol Forum.
+                  http://www.wapforum.org/
+
+
+
+
+
+
+Montenegro, et al.           Informational                     [Page 43]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   [WC91]         Wang, Z., Crowcroft, J., "A New Congestion Control
+                  Scheme:  Slow Start and Search," ACM Computer
+                  Communication Review, vol 21, pp 32-43, January 1991.
+
+   [WTCP]         Ratnam, K., Matta, I., "WTCP: An Efficient
+                  Transmission Control Protocol for Networks with
+                  Wireless Links," Technical Report NU-CCS-97-11,
+                  Northeastern University, July 1997. Available at:
+                  http://www.ece.neu.edu/personal/karu/papers/WTCP-
+                  NU.ps.gz
+
+   [YB94]         Yavatkar, R., Bhagawat, N., "Improving End-to-End
+                  Performance of TCP over Mobile Internetworks," Proc.
+                  Workshop on Mobile Computing Systems and Applications,
+                  IEEE Computer Society Press, Los Alamitos, California,
+                  1994.
+
+Authors' Addresses
+
+   Questions about this document may be directed at:
+
+   Gabriel E. Montenegro
+   Sun Labs Networking and Security Group
+   Sun Microsystems, Inc.
+   901 San Antonio Road
+   Mailstop UMPK 15-214
+   Mountain View, California 94303
+
+   Phone: +1-650-786-6288
+   Fax:   +1-650-786-6445
+   EMail: gab@sun.com
+
+
+   Spencer Dawkins
+   Nortel Networks
+   P.O. Box 833805
+   Richardson, Texas 75083-3805
+
+   Phone: +1-972-684-4827
+   Fax:   +1-972-685-3292
+   EMail: sdawkins@nortel.com
+
+
+
+
+
+
+
+
+
+
+Montenegro, et al.           Informational                     [Page 44]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+   Markku Kojo
+   Department of Computer Science
+   University of Helsinki
+   P.O. Box 26 (Teollisuuskatu 23)
+   FIN-00014 HELSINKI
+   Finland
+
+   Phone: +358-9-1914-4179
+   Fax:   +358-9-1914-4441
+   EMail: kojo@cs.helsinki.fi
+
+
+   Vincent Magret
+   Corporate Research Center
+   Alcatel Network Systems, Inc
+   1201 Campbell
+   Mail stop 446-310
+   Richardson Texas 75081 USA
+   M/S 446-310
+
+   Phone: +1-972-996-2625
+   Fax:   +1-972-996-5902
+   EMail: vincent.magret@aud.alcatel.com
+
+
+   Nitin Vaidya
+   Dept. of Computer Science
+   Texas A&M University
+   College Station, TX 77843-3112
+
+   Phone: 979-845-0512
+   Fax: 979-847-8578
+   EMail: vaidya@cs.tamu.edu
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Montenegro, et al.           Informational                     [Page 45]
+
+RFC 2757                   Long Thin Networks               January 2000
+
+
+Full Copyright Statement
+
+   Copyright (C) The Internet Society (2000).  All Rights Reserved.
+
+   This document and translations of it may be copied and furnished to
+   others, and derivative works that comment on or otherwise explain it
+   or assist in its implementation may be prepared, copied, published
+   and distributed, in whole or in part, without restriction of any
+   kind, provided that the above copyright notice and this paragraph are
+   included on all such copies and derivative works.  However, this
+   document itself may not be modified in any way, such as by removing
+   the copyright notice or references to the Internet Society or other
+   Internet organizations, except as needed for the purpose of
+   developing Internet standards in which case the procedures for
+   copyrights defined in the Internet Standards process must be
+   followed, or as required to translate it into languages other than
+   English.
+
+   The limited permissions granted above are perpetual and will not be
+   revoked by the Internet Society or its successors or assigns.
+
+   This document and the information contained herein is provided on an
+   "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
+   TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
+   BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
+   HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
+   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+Acknowledgement
+
+   Funding for the RFC Editor function is currently provided by the
+   Internet Society.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Montenegro, et al.           Informational                     [Page 46]
+
diff --git a/ext/picotcp/RFC/rfc2760.txt b/ext/picotcp/RFC/rfc2760.txt
new file mode 100644
index 0000000..5779d8d
--- /dev/null
+++ b/ext/picotcp/RFC/rfc2760.txt
@@ -0,0 +1,2579 @@
+
+
+
+
+
+
+Network Working Group                                  M. Allman, Editor
+Request for Comments: 2760   NASA Glenn Research Center/BBN Technologies
+Category: Informational                                       S. Dawkins
+                                                                  Nortel
+                                                               D. Glover
+                                                               J. Griner
+                                                                 D. Tran
+                                              NASA Glenn Research Center
+                                                            T. Henderson
+                                    University of California at Berkeley
+                                                            J. Heidemann
+                                                                J. Touch
+                                   University of Southern California/ISI
+                                                                H. Kruse
+                                                            S. Ostermann
+                                                         Ohio University
+                                                                K. Scott
+                                                   The MITRE Corporation
+                                                                J. Semke
+                                        Pittsburgh Supercomputing Center
+                                                           February 2000
+
+
+               Ongoing TCP Research Related to Satellites
+
+
+Status of this Memo
+
+   This memo provides information for the Internet community.  It does
+   not specify an Internet standard of any kind.  Distribution of this
+   memo is unlimited.
+
+Copyright Notice
+
+   Copyright (C) The Internet Society (2000).  All Rights Reserved.
+
+Abstract
+
+   This document outlines possible TCP enhancements that may allow TCP
+   to better utilize the available bandwidth provided by networks
+   containing satellite links.  The algorithms and mechanisms outlined
+   have not been judged to be mature enough to be recommended by the
+   IETF.  The goal of this document is to educate researchers as to the
+   current work and progress being done in TCP research related to
+   satellite networks.
+
+
+
+
+
+
+Allman, et al.               Informational                      [Page 1]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+Table of Contents
+
+   1         Introduction. . . . . . . . . . . . . . . . . . . .  2
+   2         Satellite Architectures . . . . . . . . . . . . . .  3
+   2.1       Asymmetric Satellite Networks . . . . . . . . . . .  3
+   2.2       Satellite Link as Last Hop. . . . . . . . . . . . .  3
+   2.3       Hybrid Satellite Networks     . . . . . . . . . . .  4
+   2.4       Point-to-Point Satellite Networks . . . . . . . . .  4
+   2.5       Multiple Satellite Hops . . . . . . . . . . . . . .  4
+   3         Mitigations . . . . . . . . . . . . . . . . . . . .  4
+   3.1       TCP For Transactions. . . . . . . . . . . . . . . .  4
+   3.2       Slow Start. . . . . . . . . . . . . . . . . . . . .  5
+   3.2.1     Larger Initial Window . . . . . . . . . . . . . . .  6
+   3.2.2     Byte Counting . . . . . . . . . . . . . . . . . . .  7
+   3.2.3     Delayed ACKs After Slow Start . . . . . . . . . . .  9
+   3.2.4     Terminating Slow Start. . . . . . . . . . . . . . . 11
+   3.3       Loss Recovery . . . . . . . . . . . . . . . . . . . 12
+   3.3.1     Non-SACK Based Mechanisms . . . . . . . . . . . . . 12
+   3.3.2     SACK Based Mechanisms . . . . . . . . . . . . . . . 13
+   3.3.3     Explicit Congestion Notification. . . . . . . . . . 16
+   3.3.4     Detecting Corruption Loss . . . . . . . . . . . . . 18
+   3.4       Congestion Avoidance. . . . . . . . . . . . . . . . 21
+   3.5       Multiple Data Connections . . . . . . . . . . . . . 22
+   3.6       Pacing TCP Segments . . . . . . . . . . . . . . . . 24
+   3.7       TCP Header Compression. . . . . . . . . . . . . . . 26
+   3.8       Sharing TCP State Among Similar Connections . . . . 29
+   3.9       ACK Congestion Control. . . . . . . . . . . . . . . 32
+   3.10      ACK Filtering . . . . . . . . . . . . . . . . . . . 34
+   4         Conclusions . . . . . . . . . . . . . . . . . . . . 36
+   5         Security Considerations . . . . . . . . . . . . . . 36
+   6         Acknowledgments . . . . . . . . . . . . . . . . . . 37
+   7         References. . . . . . . . . . . . . . . . . . . . . 37
+   8         Authors' Addresses. . . . . . . . . . . . . . . . . 43
+   9         Full Copyright Statement. . . . . . . . . . . . . . 46
+
+1   Introduction
+
+   This document outlines mechanisms that may help the Transmission
+   Control Protocol (TCP) [Pos81] better utilize the bandwidth provided
+   by long-delay satellite environments.  These mechanisms may also help
+   in other environments or for other protocols.  The proposals outlined
+   in this document are currently being studied throughout the research
+   community.  Therefore, these mechanisms are not mature enough to be
+   recommended for wide-spread use by the IETF.  However, some of these
+   mechanisms may be safely used today.  It is hoped that this document
+   will stimulate further study into the described mechanisms.  If, at
+
+
+
+
+
+Allman, et al.               Informational                      [Page 2]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   some point, the mechanisms discussed in this memo prove to be safe
+   and appropriate to be recommended for general use, the appropriate
+   IETF documents will be written.
+
+   It should be noted that non-TCP mechanisms that help performance over
+   satellite links do exist (e.g., application-level changes, queueing
+   disciplines, etc.).  However, outlining these non-TCP mitigations is
+   beyond the scope of this document and therefore is left as future
+   work.  Additionally, there are a number of mitigations to TCP's
+   performance problems that involve very active intervention by
+   gateways along the end-to-end path from the sender to the receiver.
+   Documenting the pros and cons of such solutions is also left as
+   future work.
+
+2   Satellite Architectures
+
+   Specific characteristics of satellite links and the impact these
+   characteristics have on TCP are presented in RFC 2488 [AGS99].  This
+   section discusses several possible topologies where satellite links
+   may be integrated into the global Internet.  The mitigation outlined
+   in section 3 will include a discussion of which environment the
+   mechanism is expected to benefit.
+
+2.1 Asymmetric Satellite Networks
+
+   Some satellite networks exhibit a bandwidth asymmetry, a larger data
+   rate in one direction than the reverse direction, because of limits
+   on the transmission power and the antenna size at one end of the
+   link.  Meanwhile, some other satellite systems are unidirectional and
+   use a non-satellite return path (such as a dialup modem link).  The
+   nature of most TCP traffic is asymmetric with data flowing in one
+   direction and acknowledgments in opposite direction.  However, the
+   term asymmetric in this document refers to different physical
+   capacities in the forward and return links.  Asymmetry has been shown
+   to be a problem for TCP [BPK97,BPK98].
+
+2.2 Satellite Link as Last Hop
+
+   Satellite links that provide service directly to end users, as
+   opposed to satellite links located in the middle of a network, may
+   allow for specialized design of protocols used over the last hop.
+   Some satellite providers use the satellite link as a shared high
+   speed downlink to users with a lower speed, non-shared terrestrial
+   link that is used as a return link for requests and acknowledgments.
+   Many times this creates an asymmetric network, as discussed above.
+
+
+
+
+
+
+Allman, et al.               Informational                      [Page 3]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+2.3 Hybrid Satellite Networks
+
+   In the more general case, satellite links may be located at any point
+   in the network topology.  In this case, the satellite link acts as
+   just another link between two gateways.  In this environment, a given
+   connection may be sent over terrestrial links (including terrestrial
+   wireless), as well as satellite links.  On the other hand, a
+   connection could also travel over only the terrestrial network or
+   only over the satellite portion of the network.
+
+2.4 Point-to-Point Satellite Networks
+
+   In point-to-point satellite networks, the only hop in the network is
+   over the satellite link.  This pure satellite environment exhibits
+   only the problems associated with the satellite links, as outlined in
+   [AGS99].  Since this is a private network, some mitigations that are
+   not appropriate for shared networks can be considered.
+
+2.5 Multiple Satellite Hops
+
+   In some situations, network traffic may traverse multiple satellite
+   hops between the source and the destination.  Such an environment
+   aggravates the satellite characteristics described in [AGS99].
+
+3   Mitigations
+
+   The following sections will discuss various techniques for mitigating
+   the problems TCP faces in the satellite environment.  Each of the
+   following sections will be organized as follows: First, each
+   mitigation will be briefly outlined.  Next, research work involving
+   the mechanism in question will be briefly discussed.  Next the
+   implementation issues of the mechanism will be presented (including
+   whether or not the particular mechanism presents any dangers to
+   shared networks).  Then a discussion of the mechanism's potential
+   with regard to the topologies outlined above is given.  Finally, the
+   relationships and possible interactions with other TCP mechanisms are
+   outlined.  The reader is expected to be familiar with the TCP
+   terminology used in [AGS99].
+
+3.1 TCP For Transactions
+
+3.1.1 Mitigation Description
+
+   TCP uses a three-way handshake to setup a connection between two
+   hosts [Pos81].  This connection setup requires 1-1.5 round-trip times
+   (RTTs), depending upon whether the data sender started the connection
+   actively or passively.  This startup time can be eliminated by using
+   TCP extensions for transactions (T/TCP) [Bra94].  After the first
+
+
+
+Allman, et al.               Informational                      [Page 4]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   connection between a pair of hosts is established, T/TCP is able to
+   bypass the three-way handshake, allowing the data sender to begin
+   transmitting data in the first segment sent (along with the SYN).
+   This is especially helpful for short request/response traffic, as it
+   saves a potentially long setup phase when no useful data is being
+   transmitted.
+
+3.1.2 Research
+
+   T/TCP is outlined and analyzed in [Bra92,Bra94].
+
+3.1.3 Implementation Issues
+
+   T/TCP requires changes in the TCP stacks of both the data sender and
+   the data receiver.  While T/TCP is safe to implement in shared
+   networks from a congestion control perspective, several security
+   implications of sending data in the first data segment have been
+   identified [ddKI99].
+
+3.1.4 Topology Considerations
+
+   It is expected that T/TCP will be equally beneficial in all
+   environments outlined in section 2.
+
+3.1.5 Possible Interaction and Relationships with Other Research
+
+   T/TCP allows data transfer to start more rapidly, much like using a
+   larger initial congestion window (see section 3.2.1), delayed ACKs
+   after slow start (section 3.2.3) or byte counting (section 3.2.2).
+
+3.2 Slow Start
+
+   The slow start algorithm is used to gradually increase the size of
+   TCP's congestion window (cwnd) [Jac88,Ste97,APS99].  The algorithm is
+   an important safe-guard against transmitting an inappropriate amount
+   of data into the network when the connection starts up.  However,
+   slow start can also waste available network capacity, especially in
+   long-delay networks [All97a,Hay97].  Slow start is particularly
+   inefficient for transfers that are short compared to the
+   delay*bandwidth product of the network (e.g., WWW transfers).
+
+   Delayed ACKs are another source of wasted capacity during the slow
+   start phase.  RFC 1122 [Bra89] suggests data receivers refrain from
+   ACKing every incoming data segment.  However, every second full-sized
+   segment should be ACKed.  If a second full-sized segment does not
+   arrive within a given timeout, an ACK must be generated (this timeout
+   cannot exceed 500 ms).  Since the data sender increases the size of
+   cwnd based on the number of arriving ACKs, reducing the number of
+
+
+
+Allman, et al.               Informational                      [Page 5]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   ACKs slows the cwnd growth rate.  In addition, when TCP starts
+   sending, it sends 1 segment.  When using delayed ACKs a second
+   segment must arrive before an ACK is sent.  Therefore, the receiver
+   is always forced to wait for the delayed ACK timer to expire before
+   ACKing the first segment, which also increases the transfer time.
+
+   Several proposals have suggested ways to make slow start less time
+   consuming.  These proposals are briefly outlined below and references
+   to the research work given.
+
+3.2.1 Larger Initial Window
+
+3.2.1.1 Mitigation Description
+
+   One method that will reduce the amount of time required by slow start
+   (and therefore, the amount of wasted capacity) is to increase the
+   initial value of cwnd.  An experimental TCP extension outlined in
+   [AFP98] allows the initial size of cwnd to be increased from 1
+   segment to that given in equation (1).
+
+               min (4*MSS, max (2*MSS, 4380 bytes))               (1)
+
+   By increasing the initial value of cwnd, more packets are sent during
+   the first RTT of data transmission, which will trigger more ACKs,
+   allowing the congestion window to open more rapidly.  In addition, by
+   sending at least 2 segments initially, the first segment does not
+   need to wait for the delayed ACK timer to expire as is the case when
+   the initial size of cwnd is 1 segment (as discussed above).
+   Therefore, the value of cwnd given in equation 1 saves up to 3 RTTs
+   and a delayed ACK timeout when compared to an initial cwnd of 1
+   segment.
+
+   Also, we note that RFC 2581 [APS99], a standards-track document,
+   allows a TCP to use an initial cwnd of up to 2 segments.  This change
+   is highly recommended for satellite networks.
+
+3.2.1.2 Research
+
+   Several researchers have studied the use of a larger initial window
+   in various environments.  [Nic97] and [KAGT98] show a reduction in
+   WWW page transfer time over hybrid fiber coax (HFC) and satellite
+   links respectively.  Furthermore, it has been shown that using an
+   initial cwnd of 4 segments does not negatively impact overall
+   performance over dialup modem links with a small number of buffers
+   [SP98].  [AHO98] shows an improvement in transfer time for 16 KB
+   files across the Internet and dialup modem links when using a larger
+   initial value for cwnd.  However, a slight increase in dropped
+
+
+
+
+Allman, et al.               Informational                      [Page 6]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   segments was also shown.  Finally, [PN98] shows improved transfer
+   time for WWW traffic in simulations with competing traffic, in
+   addition to a small increase in the drop rate.
+
+3.2.1.3 Implementation Issues
+
+   The use of a larger initial cwnd value requires changes to the
+   sender's TCP stack.  Using an initial congestion window of 2 segments
+   is allowed by RFC 2581 [APS99].  Using an initial congestion window
+   of 3 or 4 segments is not expected to present any danger of
+   congestion collapse [AFP98], however may degrade performance in some
+   networks.
+
+3.2.1.4 Topology Considerations
+
+   It is expected that the use of a large initial window would be
+   equally beneficial to all network architectures outlined in section
+   2.
+
+3.2.1.5 Possible Interaction and Relationships with Other Research
+
+   Using a fixed larger initial congestion window decreases the impact
+   of a long RTT on transfer time (especially for short transfers) at
+   the cost of bursting data into a network with unknown conditions.  A
+   mechanism that mitigates bursts may make the use of a larger initial
+   congestion window more appropriate (e.g., limiting the size of line-
+   rate bursts [FF96] or pacing the segments in a burst [VH97a]).
+
+   Also, using delayed ACKs only after slow start (as outlined in
+   section 3.2.3) offers an alternative way to immediately ACK the first
+   segment of a transfer and open the congestion window more rapidly.
+   Finally, using some form of TCP state sharing among a number of
+   connections (as discussed in 3.8) may provide an alternative to using
+   a fixed larger initial window.
+
+3.2.2 Byte Counting
+
+3.2.2.1 Mitigation Description
+
+   As discussed above, the wide-spread use of delayed ACKs increases the
+   time needed by a TCP sender to increase the size of the congestion
+   window during slow start.  This is especially harmful to flows
+   traversing long-delay GEO satellite links.  One mechanism that has
+   been suggested to mitigate the problems caused by delayed ACKs is the
+   use of "byte counting", rather than standard ACK counting
+   [All97a,All98].  Using standard ACK counting, the congestion window
+   is increased by 1 segment for each ACK received during slow start.
+   However, using byte counting the congestion window increase is based
+
+
+
+Allman, et al.               Informational                      [Page 7]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   on the number of previously unacknowledged bytes covered by each
+   incoming ACK, rather than on the number of ACKs received.  This makes
+   the increase relative to the amount of data transmitted, rather than
+   being dependent on the ACK interval used by the receiver.
+
+   Two forms of byte counting are studied in [All98].  The first is
+   unlimited byte counting (UBC).  This mechanism simply uses the number
+   of previously unacknowledged bytes to increase the congestion window
+   each time an ACK arrives.  The second form is limited byte counting
+   (LBC).  LBC limits the amount of cwnd increase to 2 segments.  This
+   limit throttles the size of the burst of data sent in response to a
+   "stretch ACK" [Pax97].  Stretch ACKs are acknowledgments that cover
+   more than 2 segments of previously unacknowledged data.  Stretch ACKs
+   can occur by design [Joh95] (although this is not standard), due to
+   implementation bugs [All97b,PADHV99] or due to ACK loss.  [All98]
+   shows that LBC prevents large line-rate bursts when compared to UBC,
+   and therefore offers fewer dropped segments and better performance.
+   In addition, UBC causes large bursts during slow start based loss
+   recovery due to the large cumulative ACKs that can arrive during loss
+   recovery.  The behavior of UBC during loss recovery can cause large
+   decreases in performance and [All98] strongly recommends UBC not be
+   deployed without further study into mitigating the large bursts.
+
+   Note: The standards track RFC 2581 [APS99] allows a TCP to use byte
+   counting to increase cwnd during congestion avoidance, however not
+   during slow start.
+
+3.2.2.2 Research
+
+   Using byte counting, as opposed to standard ACK counting, has been
+   shown to reduce the amount of time needed to increase the value of
+   cwnd to an appropriate size in satellite networks [All97a].  In
+   addition, [All98] presents a simulation comparison of byte counting
+   and the standard cwnd increase algorithm in uncongested networks and
+   networks with competing traffic.  This study found that the limited
+   form of byte counting outlined above can improve performance, while
+   also increasing the drop rate slightly.
+
+   [BPK97,BPK98] also investigated unlimited byte counting in
+   conjunction with various ACK filtering algorithms (discussed in
+   section 3.10) in asymmetric networks.
+
+
+
+
+
+
+
+
+
+
+Allman, et al.               Informational                      [Page 8]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+3.2.2.3 Implementation Issues
+
+   Changing from ACK counting to byte counting requires changes to the
+   data sender's TCP stack.  Byte counting violates the algorithm for
+   increasing the congestion window outlined in RFC 2581 [APS99] (by
+   making congestion window growth more aggressive during slow start)
+   and therefore should not be used in shared networks.
+
+3.2.2.4 Topology Considerations
+
+   It has been suggested by some (and roundly criticized by others) that
+   byte counting will allow TCP to provide uniform cwnd increase,
+   regardless of the ACKing behavior of the receiver.  In addition, byte
+   counting also mitigates the retarded window growth provided by
+   receivers that generate stretch ACKs because of the capacity of the
+   return link, as discussed in [BPK97,BPK98].  Therefore, this change
+   is expected to be especially beneficial to asymmetric networks.
+
+3.2.2.5 Possible Interaction and Relationships with Other Research
+
+   Unlimited byte counting should not be used without a method to
+   mitigate the potentially large line-rate bursts the algorithm can
+   cause.  Also, LBC may send bursts that are too large for the given
+   network conditions.  In this case, LBC may also benefit from some
+   algorithm that would lessen the impact of line-rate bursts of
+   segments.  Also note that using delayed ACKs only after slow start
+   (as outlined in section 3.2.3) negates the limited byte counting
+   algorithm because each ACK covers only one segment during slow start.
+   Therefore, both ACK counting and byte counting yield the same
+   increase in the congestion window at this point (in the first RTT).
+
+3.2.3 Delayed ACKs After Slow Start
+
+3.2.3.1 Mitigation Description
+
+   As discussed above, TCP senders use the number of incoming ACKs to
+   increase the congestion window during slow start.  And, since delayed
+   ACKs reduce the number of ACKs returned by the receiver by roughly
+   half, the rate of growth of the congestion window is reduced.  One
+   proposed solution to this problem is to use delayed ACKs only after
+   the slow start (DAASS) phase.  This provides more ACKs while TCP is
+   aggressively increasing the congestion window and less ACKs while TCP
+   is in steady state, which conserves network resources.
+
+
+
+
+
+
+
+
+Allman, et al.               Informational                      [Page 9]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+3.2.3.2 Research
+
+   [All98] shows that in simulation, using delayed ACKs after slow start
+   (DAASS) improves transfer time when compared to a receiver that
+   always generates delayed ACKs.  However, DAASS also slightly
+   increases the loss rate due to the increased rate of cwnd growth.
+
+3.2.3.3 Implementation Issues
+
+   The major problem with DAASS is in the implementation.  The receiver
+   has to somehow know when the sender is using the slow start
+   algorithm.  The receiver could implement a heuristic that attempts to
+   watch the change in the amount of data being received and change the
+   ACKing behavior accordingly.  Or, the sender could send a message (a
+   flipped bit in the TCP header, perhaps) indicating that it was using
+   slow start.  The implementation of DAASS is, therefore, an open
+   issue.
+
+   Using DAASS does not violate the TCP congestion control specification
+   [APS99].  However, the standards (RFC 2581 [APS99]) currently
+   recommend using delayed acknowledgments and DAASS goes (partially)
+   against this recommendation.
+
+3.2.3.4 Topology Considerations
+
+   DAASS should work equally well in all scenarios presented in section
+   2.  However, in asymmetric networks it may aggravate ACK congestion
+   in the return link, due to the increased number of ACKs (see sections
+   3.9 and 3.10 for a more detailed discussion of ACK congestion).
+
+3.2.3.5 Possible Interaction and Relationships with Other Research
+
+   DAASS has several possible interactions with other proposals made in
+   the research community.  DAASS can aggravate congestion on the path
+   between the data receiver and the data sender due to the increased
+   number of returning acknowledgments.  This can have an especially
+   adverse effect on asymmetric networks that are prone to experiencing
+   ACK congestion.  As outlined in sections 3.9 and 3.10, several
+   mitigations have been proposed to reduce the number of ACKs that are
+   passed over a low-bandwidth return link.  Using DAASS will increase
+   the number of ACKs sent by the receiver.  The interaction between
+   DAASS and the methods for reducing the number of ACKs is an open
+   research question.  Also, as noted in section 3.2.1.5 above, DAASS
+   provides some of the same benefits as using a larger initial
+   congestion window and therefore it may not be desirable to use both
+   mechanisms together.  However, this remains an open question.
+   Finally, DAASS and limited byte counting are both used to increase
+
+
+
+
+Allman, et al.               Informational                     [Page 10]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   the rate at which the congestion window is opened.  The DAASS
+   algorithm substantially reduces the impact limited byte counting has
+   on the rate of congestion window increase.
+
+3.2.4 Terminating Slow Start
+
+3.2.4.1 Mitigation Description
+
+   The initial slow start phase is used by TCP to determine an
+   appropriate congestion window size for the given network conditions
+   [Jac88].  Slow start is terminated when TCP detects congestion, or
+   when the size of cwnd reaches the size of the receiver's advertised
+   window.  Slow start is also terminated if cwnd grows beyond a certain
+   size.  The threshold at which TCP ends slow start and begins using
+   the congestion avoidance algorithm is called "ssthresh" [Jac88].  In
+   most implementations, the initial value for ssthresh is the
+   receiver's advertised window.  During slow start, TCP roughly doubles
+   the size of cwnd every RTT and therefore can overwhelm the network
+   with at most twice as many segments as the network can handle.  By
+   setting ssthresh to a value less than the receiver's advertised
+   window initially, the sender may avoid overwhelming the network with
+   twice the appropriate number of segments.  Hoe [Hoe96] proposes using
+   the packet-pair algorithm [Kes91] and the measured RTT to determine a
+   more appropriate value for ssthresh.  The algorithm observes the
+   spacing between the first few returning ACKs to determine the
+   bandwidth of the bottleneck link.  Together with the measured RTT,
+   the delay*bandwidth product is determined and ssthresh is set to this
+   value.  When TCP's cwnd reaches this reduced ssthresh, slow start is
+   terminated and transmission continues using congestion avoidance,
+   which is a more conservative algorithm for increasing the size of the
+   congestion window.
+
+3.2.4.2 Research
+
+   It has been shown that estimating ssthresh can improve performance
+   and decrease packet loss in simulations [Hoe96].  However, obtaining
+   an accurate estimate of the available bandwidth in a dynamic network
+   is very challenging, especially attempting to do so on the sending
+   side of the TCP connection [AP99].  Therefore, before this mechanism
+   is widely deployed, bandwidth estimation must be studied in a more
+   detail.
+
+3.2.4.3 Implementation Issues
+
+   As outlined in [Hoe96], estimating ssthresh requires changes to the
+   data sender's TCP stack.  As suggested in [AP99], bandwidth estimates
+   may be more accurate when taken by the TCP receiver, and therefore
+   both sender and receiver changes would be required.  Estimating
+
+
+
+Allman, et al.               Informational                     [Page 11]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   ssthresh is safe to implement in production networks from a
+   congestion control perspective, as it can only make TCP more
+   conservative than outlined in RFC 2581 [APS99] (assuming the TCP
+   implementation is using an initial ssthresh of infinity as allowed by
+   [APS99]).
+
+3.2.4.4 Topology Considerations
+
+   It is expected that this mechanism will work equally well in all
+   symmetric topologies outlined in section 2.  However, asymmetric
+   links pose a special problem, as the rate of the returning ACKs may
+   not be the bottleneck bandwidth in the forward direction.  This can
+   lead to the sender setting ssthresh too low.  Premature termination
+   of slow start can hurt performance, as congestion avoidance opens
+   cwnd more conservatively.  Receiver-based bandwidth estimators do not
+   suffer from this problem.
+
+3.2.4.5 Possible Interaction and Relationships with Other Research
+
+   Terminating slow start at the right time is useful to avoid multiple
+   dropped segments.  However, using a selective acknowledgment-based
+   loss recovery scheme (as outlined in section 3.3.2) can drastically
+   improve TCP's ability to quickly recover from multiple lost segments
+   Therefore, it may not be as important to terminate slow start before
+   a large loss event occurs.  [AP99] shows that using delayed
+   acknowledgments [Bra89] reduces the effectiveness of sender-side
+   bandwidth estimation.  Therefore, using delayed ACKs only during slow
+   start (as outlined in section 3.2.3) may make bandwidth estimation
+   more feasible.
+
+3.3 Loss Recovery
+
+3.3.1 Non-SACK Based Mechanisms
+
+3.3.1.1 Mitigation Description
+
+   Several similar algorithms have been developed and studied that
+   improve TCP's ability to recover from multiple lost segments in a
+   window of data without relying on the (often long) retransmission
+   timeout.  These sender-side algorithms, known as NewReno TCP, do not
+   depend on the availability of selective acknowledgments (SACKs)
+   [MMFR96].
+
+   These algorithms generally work by updating the fast recovery
+   algorithm to use information provided by "partial ACKs" to trigger
+   retransmissions.  A partial ACK covers some new data, but not all
+   data outstanding when a particular loss event starts.  For instance,
+   consider the case when segment N is retransmitted using the fast
+
+
+
+Allman, et al.               Informational                     [Page 12]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   retransmit algorithm and segment M is the last segment sent when
+   segment N is resent.  If segment N is the only segment lost, the ACK
+   elicited by the retransmission of segment N would be for segment M.
+   If, however, segment N+1 was also lost, the ACK elicited by the
+   retransmission of segment N will be N+1.  This can be taken as an
+   indication that segment N+1 was lost and used to trigger a
+   retransmission.
+
+3.3.1.2 Research
+
+   Hoe [Hoe95,Hoe96] introduced the idea of using partial ACKs to
+   trigger retransmissions and showed that doing so could improve
+   performance.  [FF96] shows that in some cases using partial ACKs to
+   trigger retransmissions reduces the time required to recover from
+   multiple lost segments.  However, [FF96] also shows that in some
+   cases (many lost segments) relying on the RTO timer can improve
+   performance over simply using partial ACKs to trigger all
+   retransmissions.  [HK99] shows that using partial ACKs to trigger
+   retransmissions, in conjunction with SACK, improves performance when
+   compared to TCP using fast retransmit/fast recovery in a satellite
+   environment.  Finally, [FH99] describes several slightly different
+   variants of NewReno.
+
+3.3.1.3 Implementation Issues
+
+   Implementing these fast recovery enhancements requires changes to the
+   sender-side TCP stack.  These changes can safely be implemented in
+   production networks and are allowed by RFC 2581 [APS99].
+
+3.3.1.4 Topology Considerations
+
+   It is expected that these changes will work well in all environments
+   outlined in section 2.
+
+3.3.1.5 Possible Interaction and Relationships with Other Research
+
+   See section 3.3.2.2.5.
+
+3.3.2 SACK Based Mechanisms
+
+3.3.2.1 Fast Recovery with SACK
+
+3.3.2.1.1 Mitigation Description
+
+   Fall and Floyd [FF96] describe a conservative extension to the fast
+   recovery algorithm that takes into account information provided by
+   selective acknowledgments (SACKs) [MMFR96] sent by the receiver.  The
+   algorithm starts after fast retransmit triggers the resending of a
+
+
+
+Allman, et al.               Informational                     [Page 13]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   segment.  As with fast retransmit, the algorithm cuts cwnd in half
+   when a loss is detected.  The algorithm keeps a variable called
+   "pipe", which is an estimate of the number of outstanding segments in
+   the network.  The pipe variable is decremented by 1 segment for each
+   duplicate ACK that arrives with new SACK information.  The pipe
+   variable is incremented by 1 for each new or retransmitted segment
+   sent.  A segment may be sent when the value of pipe is less than cwnd
+   (this segment is either a retransmission per the SACK information or
+   a new segment if the SACK information indicates that no more
+   retransmits are needed).
+
+   This algorithm generally allows TCP to recover from multiple segment
+   losses in a window of data within one RTT of loss detection.  Like
+   the forward acknowledgment (FACK) algorithm described below, the SACK
+   information allows the pipe algorithm to decouple the choice of when
+   to send a segment from the choice of what segment to send.
+
+   [APS99] allows the use of this algorithm, as it is consistent with
+   the spirit of the fast recovery algorithm.
+
+3.3.2.1.2 Research
+
+   [FF96] shows that the above described SACK algorithm performs better
+   than several non-SACK based recovery algorithms when 1--4 segments
+   are lost from a window of data.  [AHKO97] shows that the algorithm
+   improves performance over satellite links.  Hayes [Hay97] shows the
+   in certain circumstances, the SACK algorithm can hurt performance by
+   generating a large line-rate burst of data at the end of loss
+   recovery, which causes further loss.
+
+3.3.2.1.3 Implementation Issues
+
+   This algorithm is implemented in the sender's TCP stack.  However, it
+   relies on SACK information generated by the receiver.  This algorithm
+   is safe for shared networks and is allowed by RFC 2581 [APS99].
+
+3.3.2.1.4 Topology Considerations
+
+   It is expected that the pipe algorithm will work equally well in all
+   scenarios presented in section 2.
+
+3.3.2.1.5 Possible Interaction and Relationships with Other Research
+
+   See section 3.3.2.2.5.
+
+
+
+
+
+
+
+Allman, et al.               Informational                     [Page 14]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+3.3.2.2 Forward Acknowledgments
+
+3.3.2.2.1 Mitigation Description
+
+   The Forward Acknowledgment (FACK) algorithm [MM96a,MM96b] was
+   developed to improve TCP congestion control during loss recovery.
+   FACK uses TCP SACK options to glean additional information about the
+   congestion state, adding more precise control to the injection of
+   data into the network during recovery.  FACK decouples the congestion
+   control algorithms from the data recovery algorithms to provide a
+   simple and direct way to use SACK to improve congestion control.  Due
+   to the separation of these two algorithms, new data may be sent
+   during recovery to sustain TCP's self-clock when there is no further
+   data to retransmit.
+
+   The most recent version of FACK is Rate-Halving [MM96b], in which one
+   packet is sent for every two ACKs received during recovery.
+   Transmitting a segment for every-other ACK has the result of reducing
+   the congestion window in one round trip to half of the number of
+   packets that were successfully handled by the network (so when cwnd
+   is too large by more than a factor of two it still gets reduced to
+   half of what the network can sustain).  Another important aspect of
+   FACK with Rate-Halving is that it sustains the ACK self-clock during
+   recovery because transmitting a packet for every-other ACK does not
+   require half a cwnd of data to drain from the network before
+   transmitting, as required by the fast recovery algorithm
+   [Ste97,APS99].
+
+   In addition, the FACK with Rate-Halving implementation provides
+   Thresholded Retransmission to each lost segment.  "Tcprexmtthresh" is
+   the number of duplicate ACKs required by TCP to trigger a fast
+   retransmit and enter recovery.  FACK applies thresholded
+   retransmission to all segments by waiting until tcprexmtthresh SACK
+   blocks indicate that a given segment is missing before resending the
+   segment.  This allows reasonable behavior on links that reorder
+   segments.  As described above, FACK sends a segment for every second
+   ACK received during recovery.  New segments are transmitted except
+   when tcprexmtthresh SACK blocks have been observed for a dropped
+   segment, at which point the dropped segment is retransmitted.
+
+   [APS99] allows the use of this algorithm, as it is consistent with
+   the spirit of the fast recovery algorithm.
+
+3.3.2.2.2 Research
+
+   The original FACK algorithm is outlined in [MM96a].  The algorithm
+   was later enhanced to include Rate-Halving [MM96b].  The real-world
+   performance of FACK with Rate-Halving was shown to be much closer to
+
+
+
+Allman, et al.               Informational                     [Page 15]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   the theoretical maximum for TCP than either TCP Reno or the SACK-
+   based extensions to fast recovery outlined in section 3.3.2.1
+   [MSMO97].
+
+3.3.2.2.3 Implementation Issues
+
+   In order to use FACK, the sender's TCP stack must be modified.  In
+   addition, the receiver must be able to generate SACK options to
+   obtain the full benefit of using FACK.  The FACK algorithm is safe
+   for shared networks and is allowed by RFC 2581 [APS99].
+
+3.3.2.2.4 Topology Considerations
+
+   FACK is expected to improve performance in all environments outlined
+   in section 2.  Since it is better able to sustain its self-clock than
+   TCP Reno, it may be considerably more attractive over long delay
+   paths.
+
+3.3.2.2.5 Possible Interaction and Relationships with Other Research
+
+   Both SACK based loss recovery algorithms described above (the fast
+   recovery enhancement and the FACK algorithm) are similar in that they
+   attempt to effectively repair multiple lost segments from a window of
+   data.  Which of the SACK-based loss recovery algorithms to use is
+   still an open research question.  In addition, these algorithms are
+   similar to the non-SACK NewReno algorithm described in section 3.3.1,
+   in that they attempt to recover from multiple lost segments without
+   reverting to using the retransmission timer.  As has been shown, the
+   above SACK based algorithms are more robust than the NewReno
+   algorithm.  However, the SACK algorithm requires a cooperating TCP
+   receiver, which the NewReno algorithm does not.  A reasonable TCP
+   implementation might include both a SACK-based and a NewReno-based
+   loss recovery algorithm such that the sender can use the most
+   appropriate loss recovery algorithm based on whether or not the
+   receiver supports SACKs.  Finally, both SACK-based and non-SACK-based
+   versions of fast recovery have been shown to transmit a large burst
+   of data upon leaving loss recovery, in some cases [Hay97].
+   Therefore, the algorithms may benefit from some burst suppression
+   algorithm.
+
+3.3.3 Explicit Congestion Notification
+
+3.3.3.1 Mitigation Description
+
+   Explicit congestion notification (ECN) allows routers to inform TCP
+   senders about imminent congestion without dropping segments.  Two
+   major forms of ECN have been studied.  A router employing backward
+   ECN (BECN), transmits messages directly to the data originator
+
+
+
+Allman, et al.               Informational                     [Page 16]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   informing it of congestion.  IP routers can accomplish this with an
+   ICMP Source Quench message.  The arrival of a BECN signal may or may
+   not mean that a TCP data segment has been dropped, but it is a clear
+   indication that the TCP sender should reduce its sending rate (i.e.,
+   the value of cwnd).  The second major form of congestion notification
+   is forward ECN (FECN).  FECN routers mark data segments with a
+   special tag when congestion is imminent, but forward the data
+   segment.  The data receiver then echos the congestion information
+   back to the sender in the ACK packet.  A description of a FECN
+   mechanism for TCP/IP is given in [RF99].
+
+   As described in [RF99], senders transmit segments with an "ECN-
+   Capable Transport" bit set in the IP header of each packet.  If a
+   router employing an active queueing strategy, such as Random Early
+   Detection (RED) [FJ93,BCC+98], would otherwise drop this segment, an
+   "Congestion Experienced" bit in the IP header is set instead.  Upon
+   reception, the information is echoed back to TCP senders using a bit
+   in the TCP header.  The TCP sender adjusts the congestion window just
+   as it would if a segment was dropped.
+
+   The implementation of ECN as specified in [RF99] requires the
+   deployment of active queue management mechanisms in the affected
+   routers.  This allows the routers to signal congestion by sending TCP
+   a small number of "congestion signals" (segment drops or ECN
+   messages), rather than discarding a large number of segments, as can
+   happen when TCP overwhelms a drop-tail router queue.
+
+   Since satellite networks generally have higher bit-error rates than
+   terrestrial networks, determining whether a segment was lost due to
+   congestion or corruption may allow TCP to achieve better performance
+   in high BER environments than currently possible (due to TCP's
+   assumption that all loss is due to congestion).  While not a solution
+   to this problem, adding an ECN mechanism to TCP may be a part of a
+   mechanism that will help achieve this goal.  See section 3.3.4 for a
+   more detailed discussion of differentiating between corruption and
+   congestion based losses.
+
+3.3.3.2 Research
+
+   [Flo94] shows that ECN is effective in reducing the segment loss rate
+   which yields better performance especially for short and interactive
+   TCP connections.  Furthermore, [Flo94] also shows that ECN avoids
+   some unnecessary, and costly TCP retransmission timeouts.  Finally,
+   [Flo94] also considers some of the advantages and disadvantages of
+   various forms of explicit congestion notification.
+
+
+
+
+
+
+Allman, et al.               Informational                     [Page 17]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+3.3.3.3 Implementation Issues
+
+   Deployment of ECN requires changes to the TCP implementation on both
+   sender and receiver.  Additionally, deployment of ECN requires
+   deployment of some active queue management infrastructure in routers.
+   RED is assumed in most ECN discussions, because RED is already
+   identifying segments to drop, even before its buffer space is
+   exhausted.  ECN simply allows the delivery of "marked" segments while
+   still notifying the end nodes that congestion is occurring along the
+   path.  ECN is safe (from a congestion control perspective) for shared
+   networks, as it maintains the same TCP congestion control principles
+   as are used when congestion is detected via segment drops.
+
+3.3.3.4 Topology Considerations
+
+   It is expected that none of the environments outlined in section 2
+   will present a bias towards or against ECN traffic.
+
+3.3.3.5 Possible Interaction and Relationships with Other Research
+
+   Note that some form of active queueing is necessary to use ECN (e.g.,
+   RED queueing).
+
+3.3.4 Detecting Corruption Loss
+
+   Differentiating between congestion (loss of segments due to router
+   buffer overflow or imminent buffer overflow) and corruption (loss of
+   segments due to damaged bits) is a difficult problem for TCP.  This
+   differentiation is particularly important because the action that TCP
+   should take in the two cases is entirely different.  In the case of
+   corruption, TCP should merely retransmit the damaged segment as soon
+   as its loss is detected; there is no need for TCP to adjust its
+   congestion window.  On the other hand, as has been widely discussed
+   above, when the TCP sender detects congestion, it should immediately
+   reduce its congestion window to avoid making the congestion worse.
+
+   TCP's defined behavior, as motivated by [Jac88,Jac90] and defined in
+   [Bra89,Ste97,APS99], is to assume that all loss is due to congestion
+   and to trigger the congestion control algorithms, as defined in
+   [Ste97,APS99].  The loss may be detected using the fast retransmit
+   algorithm, or in the worst case is detected by the expiration of
+   TCP's retransmission timer.
+
+   TCP's assumption that loss is due to congestion rather than
+   corruption is a conservative mechanism that prevents congestion
+   collapse [Jac88,FF98].  Over satellite networks, however, as in many
+   wireless environments, loss due to corruption is more common than on
+   terrestrial networks.  One common partial solution to this problem is
+
+
+
+Allman, et al.               Informational                     [Page 18]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   to add Forward Error Correction (FEC) to the data that's sent over
+   the satellite/wireless link.  A more complete discussion of the
+   benefits of FEC can be found in [AGS99].  However, given that FEC
+   does not always work or cannot be universally applied, other
+   mechanisms have been studied to attempt to make TCP able to
+   differentiate between congestion-based and corruption-based loss.
+
+   TCP segments that have been corrupted are most often dropped by
+   intervening routers when link-level checksum mechanisms detect that
+   an incoming frame has errors.  Occasionally, a TCP segment containing
+   an error may survive without detection until it arrives at the TCP
+   receiving host, at which point it will almost always either fail the
+   IP header checksum or the TCP checksum and be discarded as in the
+   link-level error case.  Unfortunately, in either of these cases, it's
+   not generally safe for the node detecting the corruption to return
+   information about the corrupt packet to the TCP sender because the
+   sending address itself might have been corrupted.
+
+3.3.4.1 Mitigation Description
+
+   Because the probability of link errors on a satellite link is
+   relatively greater than on a hardwired link, it is particularly
+   important that the TCP sender retransmit these lost segments without
+   reducing its congestion window.  Because corrupt segments do not
+   indicate congestion, there is no need for the TCP sender to enter a
+   congestion avoidance phase, which may waste available bandwidth.
+   Simulations performed in [SF98] show a performance improvement when
+   TCP can properly differentiate between between corruption and
+   congestion of wireless links.
+
+   Perhaps the greatest research challenge in detecting corruption is
+   getting TCP (a transport-layer protocol) to receive appropriate
+   information from either the network layer (IP) or the link layer.
+   Much of the work done to date has involved link-layer mechanisms that
+   retransmit damaged segments.  The challenge seems to be to get these
+   mechanisms to make repairs in such a way that TCP understands what
+   happened and can respond appropriately.
+
+3.3.4.2 Research
+
+   Research into corruption detection to date has focused primarily on
+   making the link level detect errors and then perform link-level
+   retransmissions.  This work is summarized in [BKVP97,BPSK96].  One of
+   the problems with this promising technique is that it causes an
+   effective reordering of the segments from the TCP receiver's point of
+   view.  As a simple example, if segments A B C D are sent across a
+   noisy link and segment B is corrupted, segments C and D may have
+   already crossed the link before B can be retransmitted at the link
+
+
+
+Allman, et al.               Informational                     [Page 19]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   level, causing them to arrive at the TCP receiver in the order A C D
+   B.  This segment reordering would cause the TCP receiver to generate
+   duplicate ACKs upon the arrival of segments C and D.  If the
+   reordering was bad enough, the sender would trigger the fast
+   retransmit algorithm in the TCP sender, in response to the duplicate
+   ACKs.  Research presented in [MV98] proposes the idea of suppressing
+   or delaying the duplicate ACKs in the reverse direction to counteract
+   this behavior.  Alternatively, proposals that make TCP more robust in
+   the face of re-ordered segment arrivals [Flo99] may reduce the side
+   effects of the re-ordering caused by link-layer retransmissions.
+
+   A more high-level approach, outlined in the [DMT96], uses a new
+   "corruption experienced" ICMP error message generated by routers that
+   detect corruption.  These messages are sent in the forward direction,
+   toward the packet's destination, rather than in the reverse direction
+   as is done with ICMP Source Quench messages.  Sending the error
+   messages in the forward direction allows this feedback to work over
+   asymmetric paths.  As noted above, generating an error message in
+   response to a damaged packet is problematic because the source and
+   destination addresses may not be valid.  The mechanism outlined in
+   [DMT96] gets around this problem by having the routers maintain a
+   small cache of recent packet destinations; when the router
+   experiences an error rate above some threshold, it sends an ICMP
+   corruption-experienced message to all of the destinations in its
+   cache.  Each TCP receiver then must return this information to its
+   respective TCP sender (through a TCP option).  Upon receiving an ACK
+   with this "corruption-experienced" option, the TCP sender assumes
+   that packet loss is due to corruption rather than congestion for two
+   round trip times (RTT) or until it receives additional link state
+   information (such as "link down", source quench, or additional
+   "corruption experienced" messages).  Note that in shared networks,
+   ignoring segment loss for 2 RTTs may aggravate congestion by making
+   TCP unresponsive.
+
+3.3.4.3 Implementation Issues
+
+   All of the techniques discussed above require changes to at least the
+   TCP sending and receiving stacks, as well as intermediate routers.
+   Due to the concerns over possibly ignoring congestion signals (i.e.,
+   segment drops), the above algorithm is not recommended for use in
+   shared networks.
+
+3.3.4.4 Topology Considerations
+
+   It is expected that corruption detection, in general would be
+   beneficial in all environments outlined in section 2.  It would be
+   particularly beneficial in the satellite/wireless environment over
+   which these errors may be more prevalent.
+
+
+
+Allman, et al.               Informational                     [Page 20]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+3.3.4.5 Possible Interaction and Relationships with Other Research
+
+   SACK-based loss recovery algorithms (as described in 3.3.2) may
+   reduce the impact of corrupted segments on mostly clean links because
+   recovery will be able to happen more rapidly (and without relying on
+   the retransmission timer).  Note that while SACK-based loss recovery
+   helps, throughput will still suffer in the face of non-congestion
+   related packet loss.
+
+3.4 Congestion Avoidance
+
+3.4.1  Mitigation Description
+
+   During congestion avoidance, in the absence of loss, the TCP sender
+   adds approximately one segment to its congestion window during each
+   RTT [Jac88,Ste97,APS99].  Several researchers have observed that this
+   policy leads to unfair sharing of bandwidth when multiple connections
+   with different RTTs traverse the same bottleneck link, with the long
+   RTT connections obtaining only a small fraction of their fair share
+   of the bandwidth.
+
+   One effective solution to this problem is to deploy fair queueing and
+   TCP-friendly buffer management in network routers [Sut98].  However,
+   in the absence of help from the network, other researchers have
+   investigated changes to the congestion avoidance policy at the TCP
+   sender, as described in [Flo91,HK98].
+
+3.4.2 Research
+
+   The "Constant-Rate" increase policy has been studied in [Flo91,HK98].
+   It attempts to equalize the rate at which TCP senders increase their
+   sending rate during congestion avoidance.  Both [Flo91] and [HK98]
+   illustrate cases in which the "Constant-Rate" policy largely corrects
+   the bias against long RTT connections, although [HK98] presents some
+   evidence that such a policy may be difficult to incrementally deploy
+   in an operational network.  The proper selection of a constant (for
+   the constant rate of increase) is an open issue.
+
+   The "Increase-by-K" policy can be selectively used by long RTT
+   connections in a heterogeneous environment.  This policy simply
+   changes the slope of the linear increase, with connections over a
+   given RTT threshold adding "K" segments to the congestion window
+   every RTT, instead of one.  [HK98] presents evidence that this
+   policy, when used with small values of "K", may be successful in
+   reducing the unfairness while keeping the link utilization high, when
+   a small number of connections share a bottleneck link.  The selection
+   of the constant "K," the RTT threshold to invoke this policy, and
+   performance under a large number of flows are all open issues.
+
+
+
+Allman, et al.               Informational                     [Page 21]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+3.4.3 Implementation Issues
+
+   Implementation of either the "Constant-Rate" or "Increase-by-K"
+   policies requires a change to the congestion avoidance mechanism at
+   the TCP sender.  In the case of "Constant-Rate," such a change must
+   be implemented globally.  Additionally, the TCP sender must have a
+   reasonably accurate estimate of the RTT of the connection.  The
+   algorithms outlined above violate the congestion avoidance algorithm
+   as outlined in RFC 2581 [APS99] and therefore should not be
+   implemented in shared networks at this time.
+
+3.4.4 Topology Considerations
+
+   These solutions are applicable to all satellite networks that are
+   integrated with a terrestrial network, in which satellite connections
+   may be competing with terrestrial connections for the same bottleneck
+   link.
+
+3.4.5 Possible Interaction and Relationships with Other Research
+
+   As shown in [PADHV99], increasing the congestion window by multiple
+   segments per RTT can cause TCP to drop multiple segments and force a
+   retransmission timeout in some versions of TCP.  Therefore, the above
+   changes to the congestion avoidance algorithm may need to be
+   accompanied by a SACK-based loss recovery algorithm that can quickly
+   repair multiple dropped segments.
+
+3.5 Multiple Data Connections
+
+3.5.1 Mitigation Description
+
+   One method that has been used to overcome TCP's inefficiencies in the
+   satellite environment is to use multiple TCP flows to transfer a
+   given file.  The use of N TCP connections makes the sender N times
+   more aggressive and therefore can improve throughput in some
+   situations.  Using N multiple TCP connections can impact the transfer
+   and the network in a number of ways, which are listed below.
+
+   1. The transfer is able to start transmission using an effective
+      congestion window of N segments, rather than a single segment as
+      one TCP flow uses.  This allows the transfer to more quickly
+      increase the effective cwnd size to an appropriate size for the
+      given network.  However, in some circumstances an initial window
+      of N segments is inappropriate for the network conditions.  In
+      this case, a transfer utilizing more than one connection may
+      aggravate congestion.
+
+
+
+
+
+Allman, et al.               Informational                     [Page 22]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   2. During the congestion avoidance phase, the transfer increases the
+      effective cwnd by N segments per RTT, rather than the one segment
+      per RTT increase that a single TCP connection provides.  Again,
+      this can aid the transfer by more rapidly increasing the effective
+      cwnd to an appropriate point.  However, this rate of increase can
+      also be too aggressive for the network conditions.  In this case,
+      the use of multiple data connections can aggravate congestion in
+      the network.
+
+   3. Using multiple connections can provide a very large overall
+      congestion window.  This can be an advantage for TCP
+      implementations that do not support the TCP window scaling
+      extension [JBB92].  However, the aggregate cwnd size across all N
+      connections is equivalent to using a TCP implementation that
+      supports large windows.
+
+   4. The overall cwnd decrease in the face of dropped segments is
+      reduced when using N parallel connections.  A single TCP
+      connection reduces the effective size of cwnd to half when a
+      single segment loss is detected.  When utilizing N connections
+      each using a window of W bytes, a single drop reduces the window
+      to:
+
+        (N * W) - (W / 2)
+
+   Clearly this is a less dramatic reduction in the effective cwnd size
+   than when using a single TCP connection.  And, the amount by which
+   the cwnd is decreased is further reduced by increasing N.
+
+   The use of multiple data connections can increase the ability of
+   non-SACK TCP implementations to quickly recover from multiple dropped
+   segments without resorting to a timeout, assuming the dropped
+   segments cross connections.
+
+   The use of multiple parallel connections makes TCP overly aggressive
+   for many environments and can contribute to congestive collapse in
+   shared networks [FF99].  The advantages provided by using multiple
+   TCP connections are now largely provided by TCP extensions (larger
+   windows, SACKs, etc.).  Therefore, the use of a single TCP connection
+   is more "network friendly" than using multiple parallel connections.
+   However, using multiple parallel TCP connections may provide
+   performance improvement in private networks.
+
+
+
+
+
+
+
+
+
+Allman, et al.               Informational                     [Page 23]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+3.5.2 Research
+
+   Research on the use of multiple parallel TCP connections shows
+   improved performance [IL92,Hah94,AOK95,AKO96].  In addition, research
+   has shown that multiple TCP connections can outperform a single
+   modern TCP connection (with large windows and SACK) [AHKO97].
+   However, these studies did not consider the impact of using multiple
+   TCP connections on competing traffic.  [FF99] argues that using
+   multiple simultaneous connections to transfer a given file may lead
+   to congestive collapse in shared networks.
+
+3.5.3 Implementation Issues
+
+   To utilize multiple parallel TCP connections a client application and
+   the corresponding server must be customized.  As outlined in [FF99]
+   using multiple parallel TCP connections is not safe (from a
+   congestion control perspective) in shared networks and should not be
+   used.
+
+3.5.4 Topological Considerations
+
+   As stated above, [FF99] outlines that the use of multiple parallel
+   connections in a shared network, such as the Internet, may lead to
+   congestive collapse.  However, the use of multiple connections may be
+   safe and beneficial in private networks.  The specific topology being
+   used will dictate the number of parallel connections required.  Some
+   work has been done to determine the appropriate number of connections
+   on the fly [AKO96], but such a mechanism is far from complete.
+
+3.5.5 Possible Interaction and Relationships with Other Research
+
+   Using multiple concurrent TCP connections enables use of a large
+   congestion window, much like the TCP window scaling option [JBB92].
+   In addition, a larger initial congestion window is achieved, similar
+   to using [AFP98] or TCB sharing (see section 3.8).
+
+3.6 Pacing TCP Segments
+
+3.6.1 Mitigation Description
+
+   Slow-start takes several round trips to fully open the TCP congestion
+   window over routes with high bandwidth-delay products.  For short TCP
+   connections (such as WWW traffic with HTTP/1.0), the slow-start
+   overhead can preclude effective use of the high-bandwidth satellite
+   links.  When senders implement slow-start restart after a TCP
+   connection goes idle (suggested by Jacobson and Karels [JK92]),
+
+
+
+
+
+Allman, et al.               Informational                     [Page 24]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   performance is reduced in long-lived (but bursty) connections (such
+   as HTTP/1.1, which uses persistent TCP connections to transfer
+   multiple WWW page elements) [Hei97a].
+
+   Rate-based pacing (RBP) is a technique, used in the absence of
+   incoming ACKs, where the data sender temporarily paces TCP segments
+   at a given rate to restart the ACK clock.  Upon receipt of the first
+   ACK, pacing is discontinued and normal TCP ACK clocking resumes.  The
+   pacing rate may either be known from recent traffic estimates (when
+   restarting an idle connection or from recent prior connections), or
+   may be known through external means (perhaps in a point-to-point or
+   point-to-multipoint satellite network where available bandwidth can
+   be assumed to be large).
+
+   In addition, pacing data during the first RTT of a transfer may allow
+   TCP to make effective use of high bandwidth-delay links even for
+   short transfers.  However, in order to pace segments during the first
+   RTT a TCP will have to be using a non-standard initial congestion
+   window and a new mechanism to pace outgoing segments rather than send
+   them back-to-back.  Determining an appropriate size for the initial
+   cwnd is an open research question.  Pacing can also be used to reduce
+   bursts in general (due to buggy TCPs or byte counting, see section
+   3.2.2 for a discussion on byte counting).
+
+3.6.2 Research
+
+   Simulation studies of rate-paced pacing for WWW-like traffic have
+   shown reductions in router congestion and drop rates [VH97a].  In
+   this environment, RBP substantially improves performance compared to
+   slow-start-after-idle for intermittent senders, and it slightly
+   improves performance over burst-full-cwnd-after-idle (because of
+   drops) [VH98].  More recently, pacing has been suggested to eliminate
+   burstiness in networks with ACK filtering [BPK97].
+
+3.6.3 Implementation Issues
+
+   RBP requires only sender-side changes to TCP.  Prototype
+   implementations of RBP are available [VH97b].  RBP requires an
+   additional sender timer for pacing.  The overhead of timer-driven
+   data transfer is often considered too high for practical use.
+   Preliminary experiments suggest that in RBP this overhead is minimal
+   because RBP only requires this timer for one RTT of transmission
+   [VH98].  RBP is expected to make TCP more conservative in sending
+   bursts of data after an idle period in hosts that do not revert to
+   slow start after an idle period.  On the other hand, RBP makes TCP
+   more aggressive if the sender uses the slow start algorithm to start
+   the ACK clock after a long idle period.
+
+
+
+
+Allman, et al.               Informational                     [Page 25]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+3.6.4  Topology Considerations
+
+   RBP could be used to restart idle TCP connections for all topologies
+   in Section 2.  Use at the beginning of new connections would be
+   restricted to topologies where available bandwidth can be estimated
+   out-of-band.
+
+3.6.5 Possible Interaction and Relationships with Other Research
+
+   Pacing segments may benefit from sharing state amongst various flows
+   between two hosts, due to the time required to determine the needed
+   information.  Additionally, pacing segments, rather than sending
+   back-to-back segments, may make estimating the available bandwidth
+   (as outlined in section 3.2.4) more difficult.
+
+3.7 TCP Header Compression
+
+   The TCP and IP header information needed to reliably deliver packets
+   to a remote site across the Internet can add significant overhead,
+   especially for interactive applications.  Telnet packets, for
+   example, typically carry only a few bytes of data per packet, and
+   standard IPv4/TCP headers add at least 40 bytes to this; IPv6/TCP
+   headers add at least 60 bytes.  Much of this information remains
+   relatively constant over the course of a session and so can be
+   replaced by a short session identifier.
+
+3.7.1 Mitigation Description
+
+   Many fields in the TCP and IP headers either remain constant during
+   the course of a session, change very infrequently, or can be inferred
+   from other sources.  For example, the source and destination
+   addresses, as well as the IP version, protocol, and port fields
+   generally do not change during a session.  Packet length can be
+   deduced from the length field of the underlying link layer protocol
+   provided that the link layer packet is not padded.  Packet sequence
+   numbers in a forward data stream generally change with every packet,
+   but increase in a predictable manner.
+
+   The TCP/IP header compression methods described in
+   [DNP99,DENP97,Jac90] reduce the overhead of TCP sessions by replacing
+   the data in the TCP and IP headers that remains constant, changes
+   slowly, or changes in a predictable manner with a short "connection
+   number".  Using this method, the sender first sends a full TCP/IP
+   header, including in it a connection number that the sender will use
+   to reference the connection.  The receiver stores the full header and
+   uses it as a template, filling in some fields from the limited
+
+
+
+
+
+Allman, et al.               Informational                     [Page 26]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   information contained in later, compressed headers.  This compression
+   can reduce the size of an IPv4/TCP headers from 40 to as few as 3 to
+   5 bytes (3 bytes for some common cases, 5 bytes in general).
+
+   Compression and decompression generally happen below the IP layer, at
+   the end-points of a given physical link (such as at two routers
+   connected by a serial line).  The hosts on either side of the
+   physical link must maintain some state about the TCP connections that
+   are using the link.
+
+   The decompresser must pass complete, uncompressed packets to the IP
+   layer.  Thus header compression is transparent to routing, for
+   example, since an incoming packet with compressed headers is expanded
+   before being passed to the IP layer.
+
+   A variety of methods can be used by the compressor/decompressor to
+   negotiate the use of header compression.  For example, the PPP serial
+   line protocol allows for an option exchange, during which time the
+   compressor/decompressor agree on whether or not to use header
+   compression.  For older SLIP implementations, [Jac90] describes a
+   mechanism that uses the first bit in the IP packet as a flag.
+
+   The reduction in overhead is especially useful when the link is
+   bandwidth-limited such as terrestrial wireless and mobile satellite
+   links, where the overhead associated with transmitting the header
+   bits is nontrivial.  Header compression has the added advantage that
+   for the case of uniformly distributed bit errors, compressing TCP/IP
+   headers can provide a better quality of service by decreasing the
+   packet error probability.  The shorter, compressed packets are less
+   likely to be corrupted, and the reduction in errors increases the
+   connection's throughput.
+
+   Extra space is saved by encoding changes in fields that change
+   relatively slowly by sending only their difference from their values
+   in the previous packet instead of their absolute values.  In order to
+   decode headers compressed this way, the receiver keeps a copy of each
+   full, reconstructed TCP header after it is decoded, and applies the
+   delta values from the next decoded compressed header to the
+   reconstructed full header template.
+
+   A disadvantage to using this delta encoding scheme where values are
+   encoded as deltas from their values in the previous packet is that if
+   a single compressed packet is lost, subsequent packets with
+   compressed headers can become garbled if they contain fields which
+   depend on the lost packet.  Consider a forward data stream of packets
+   with compressed headers and increasing sequence numbers.  If packet N
+   is lost, the full header of packet N+1 will be reconstructed at the
+   receiver using packet N-1's full header as a template.  Thus the
+
+
+
+Allman, et al.               Informational                     [Page 27]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   sequence number, which should have been calculated from packet N's
+   header, will be wrong, the checksum will fail, and the packet will be
+   discarded.  When the sending TCP times out and retransmits a packet
+   with a full header is forwarded to re-synchronize the decompresser.
+
+   It is important to note that the compressor does not maintain any
+   timers, nor does the decompresser know when an error occurred (only
+   the receiving TCP knows this, when the TCP checksum fails).  A single
+   bit error will cause the decompresser to lose sync, and subsequent
+   packets with compressed headers will be dropped by the receiving TCP,
+   since they will all fail the TCP checksum. When this happens, no
+   duplicate acknowledgments will be generated, and the decompresser can
+   only re-synchronize when it receives a packet with an uncompressed
+   header.  This means that when header compression is being used, both
+   fast retransmit and selective acknowledgments will not be able
+   correct packets lost on a compressed link.  The "twice" algorithm,
+   described below, may be a partial solution to this problem.
+
+   [DNP99] and [DENP97] describe TCP/IPv4 and TCP/IPv6 compression
+   algorithms including compressing the various IPv6 extension headers
+   as well as methods for compressing non-TCP streams.  [DENP97] also
+   augments TCP header compression by introducing the "twice" algorithm.
+   If a particular packet fails to decompress properly, the twice
+   algorithm modifies its assumptions about the inferred fields in the
+   compressed header, assuming that a packet identical to the current
+   one was dropped between the last correctly decoded packet and the
+   current one.  Twice then tries to decompress the received packet
+   under the new assumptions and, if the checksum passes, the packet is
+   passed to IP and the decompresser state has been re-synchronized.
+   This procedure can be extended to three or more decoding attempts.
+   Additional robustness can be achieved by caching full copies of
+   packets which don't decompress properly in the hopes that later
+   arrivals will fix the problem.  Finally, the performance improvement
+   if the decompresser can explicitly request a full header is
+   discussed.  Simulation results show that twice, in conjunction with
+   the full header request mechanism, can improve throughput over
+   uncompressed streams.
+
+3.7.2 Research
+
+   [Jac90] outlines a simple header compression scheme for TCP/IP.
+
+   In [DENP97] the authors present the results of simulations showing
+   that header compression is advantageous for both low and medium
+   bandwidth links.  Simulations show that the twice algorithm, combined
+   with an explicit header request mechanism, improved throughput by
+   10-15% over uncompressed sessions across a wide range of bit error
+   rates.
+
+
+
+Allman, et al.               Informational                     [Page 28]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   Much of this improvement may have been due to the twice algorithm
+   quickly re-synchronizing the decompresser when a packet is lost.
+   This is because the twice algorithm, applied one or two times when
+   the decompresser becomes unsynchronized, will re-sync the
+   decompresser in between 83% and 99% of the cases examined.  This
+   means that packets received correctly after twice has resynchronized
+   the decompresser will cause duplicate acknowledgments.  This re-
+   enables the use of both fast retransmit and SACK in conjunction with
+   header compression.
+
+3.7.3 Implementation Issues
+
+   Implementing TCP/IP header compression requires changes at both the
+   sending (compressor) and receiving (decompresser) ends of each link
+   that uses compression.  The twice algorithm requires very little
+   extra machinery over and above header compression, while the explicit
+   header request mechanism of [DENP97] requires more extensive
+   modifications to the sending and receiving ends of each link that
+   employs header compression.  Header compression does not violate
+   TCP's congestion control mechanisms and therefore can be safely
+   implemented in shared networks.
+
+3.7.4 Topology Considerations
+
+   TCP/IP header compression is applicable to all of the environments
+   discussed in section 2, but will provide relatively more improvement
+   in situations where packet sizes are small (i.e., overhead is large)
+   and there is medium to low bandwidth and/or higher BER. When TCP's
+   congestion window size is large, implementing the explicit header
+   request mechanism, the twice algorithm, and caching packets which
+   fail to decompress properly becomes more critical.
+
+3.7.5 Possible Interaction and Relationships with Other Research
+
+   As discussed above, losing synchronization between a sender and
+   receiver can cause many packet drops.  The frequency of losing
+   synchronization and the effectiveness of the twice algorithm may
+   point to using a SACK-based loss recovery algorithm to reduce the
+   impact of multiple lost segments.  However, even very robust SACK-
+   based algorithms may not work well if too many segments are lost.
+
+3.8 Sharing TCP State Among Similar Connections
+
+
+
+
+
+
+
+
+
+Allman, et al.               Informational                     [Page 29]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+3.8.1 Mitigation Description
+
+   Persistent TCP state information can be used to overcome limitations
+   in the configuration of the initial state, and to automatically tune
+   TCP to environments using satellite links and to coordinate multiple
+   TCP connections sharing a satellite link.
+
+   TCP includes a variety of parameters, many of which are set to
+   initial values which can severely affect the performance of TCP
+   connections traversing satellite links, even though most TCP
+   parameters are adjusted later after the connection is established.
+   These parameters include initial size of cwnd and initial MSS size.
+   Various suggestions have been made to change these initial
+   conditions, to more effectively support satellite links.  However, it
+   is difficult to select any single set of parameters which is
+   effective for all environments.
+
+   An alternative to attempting to select these parameters a-priori is
+   sharing state across TCP connections and using this state when
+   initializing a new connection.  For example, if all connections to a
+   subnet result in extended congestion windows of 1 megabyte, it is
+   probably more efficient to start new connections with this value,
+   than to rediscover it by requiring the cwnd to increase using slow
+   start over a period of dozens of round-trip times.
+
+3.8.2 Research
+
+   Sharing state among connections brings up a number of questions such
+   as what information to share, with whom to share, how to share it,
+   and how to age shared information.  First, what information is to be
+   shared must be determined.  Some information may be appropriate to
+   share among TCP connections, while some information sharing may be
+   inappropriate or not useful.  Next, we need to determine with whom to
+   share information.  Sharing may be appropriate for TCP connections
+   sharing a common path to a given host.  Information may be shared
+   among connections within a host, or even among connections between
+   different hosts, such as hosts on the same LAN.  However, sharing
+   information between connections not traversing the same network may
+   not be appropriate.  Given the state to share and the parties that
+   share it, a mechanism for the sharing is required.  Simple state,
+   like MSS and RTT, is easy to share, but congestion window information
+   can be shared a variety of ways. The sharing mechanism determines
+   priorities among the sharing connections, and a variety of fairness
+   criteria need to be considered.  Also, the mechanisms by which
+   information is aged require further study.  See RFC 2140 for a
+   discussion of the security issues in both sharing state within a
+   single host and sharing state among hosts on a subnet.  Finally, the
+   security concerns associated with sharing a piece of information need
+
+
+
+Allman, et al.               Informational                     [Page 30]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   to be carefully considered before introducing such a mechanism.  Many
+   of these open research questions must be answered before state
+   sharing can be widely deployed.
+
+   The opportunity for such sharing, both among a sequence of
+   connections, as well as among concurrent connections, is described in
+   more detail in [Tou97].  The state management itself is largely an
+   implementation issue, however what information should be shared and
+   the specific ways in which the information should be shared is an
+   open question.
+
+   Sharing parts of the TCB state was originally documented in T/TCP
+   [Bra92], and is used there to aggregate RTT values across connection
+   instances, to provide meaningful average RTTs, even though most
+   connections are expected to persist for only one RTT.  T/TCP also
+   shares a connection identifier, a sequence number separate from the
+   window number and address/port pairs by which TCP connections are
+   typically distinguished. As a result of this shared state, T/TCP
+   allows a receiver to pass data in the SYN segment to the receiving
+   application, prior to the completion of the three-way handshake,
+   without compromising the integrity of the connection. In effect, this
+   shared state caches a partial handshake from the previous connection,
+   which is a variant of the more general issue of TCB sharing.
+
+   Sharing state among connections (including transfers using non-TCP
+   protocols) is further investigated in [BRS99].
+
+3.8.3 Implementation Issues
+
+   Sharing TCP state across connections requires changes to the sender's
+   TCP stack, and possibly the receiver's TCP stack (as in the case of
+   T/TCP, for example).  Sharing TCP state may make a particular TCP
+   connection more aggressive.  However, the aggregate traffic should be
+   more conservative than a group of independent TCP connections.
+   Therefore, sharing TCP state should be safe for use in shared
+   networks.  Note that state sharing does not present any new security
+   problems within multiuser hosts.  In such a situation, users can
+   steal network resources from one another with or without state
+   sharing.
+
+3.8.4 Topology Considerations
+
+   It is expected that sharing state across TCP connections may be
+   useful in all network environments presented in section 2.
+
+
+
+
+
+
+
+Allman, et al.               Informational                     [Page 31]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+3.8.5 Possible Interaction and Relationships with Other Research
+
+   The state sharing outlined above is very similar to the Congestion
+   Manager proposal [BRS99] that attempts to share congestion control
+   information among both TCP and UDP flows between a pair of hosts.
+
+3.9 ACK Congestion Control
+
+   In highly asymmetric networks, a low-speed return link can restrict
+   the performance of the data flow on a high-speed forward link by
+   limiting the flow of acknowledgments returned to the data sender.
+   For example, if the data sender uses 1500 byte segments, and the
+   receiver generates 40 byte acknowledgments (IPv4, TCP without
+   options), the reverse link will congest with ACKs for asymmetries of
+   more than 75:1 if delayed ACKs are used, and 37:1 if every segment is
+   acknowledged.  For a 1.5 Mb/second data link, ACK congestion will
+   occur for reverse link speeds below 20 kilobits/sec.  These levels of
+   asymmetry will readily occur if the reverse link is shared among
+   multiple satellite receivers, as is common in many VSAT satellite
+   networks.  If a terrestrial modem link is used as a reverse link, ACK
+   congestion is also likely, especially as the speed of the forward
+   link is increased.  Current congestion control mechanisms are aimed
+   at controlling the flow of data segments, but do not affect the flow
+   of ACKs.
+
+   In [KVR98] the authors point out that the flow of acknowledgments can
+   be restricted on the low-speed link not only by the bandwidth of the
+   link, but also by the queue length of the router.  The router may
+   limit its queue length by counting packets, not bytes, and therefore
+   begin discarding ACKs even if there is enough bandwidth to forward
+   them.
+
+3.9.1 Mitigation Description
+
+   ACK Congestion Control extends the concept of flow control for data
+   segments to acknowledgment segments.  In the method described in
+   [BPK97], any intermediate router can mark an acknowledgment with an
+   Explicit Congestion Notification (ECN) bit once the queue occupancy
+   in the router exceeds a given threshold.  The data sender (which
+   receives the acknowledgment) must "echo" the ECN bit back to the data
+   receiver (see section 3.3.3 for a more detailed discussion of ECN).
+   The proposed algorithm for marking ACK segments with an ECN bit is
+   Random Early Detection (RED) [FJ93].  In response to the receipt of
+   ECN marked data segments, the receiver will dynamically reduce the
+   rate of acknowledgments using a multiplicative backoff.  Once
+   segments without ECN are received, the data receiver speeds up
+   acknowledgments using a linear increase, up to a rate of either 1 (no
+
+
+
+
+Allman, et al.               Informational                     [Page 32]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   delayed ACKs) or 2 (normal delayed ACKs) data segments per ACK.  The
+   authors suggest that an ACK be generated at least once per window,
+   and ideally a few times per window.
+
+   As in the RED congestion control mechanism for data flow, the
+   bottleneck gateway can randomly discard acknowledgments, rather than
+   marking them with an ECN bit, once the queue fills beyond a given
+   threshold.
+
+3.9.2 Research
+
+   [BPK97] analyze the effect of ACK Congestion Control (ACC) on the
+   performance of an asymmetric network.  They note that the use of ACC,
+   and indeed the use of any scheme which reduces the frequency of
+   acknowledgments, has potential unwanted side effects.  Since each ACK
+   will acknowledge more than the usual one or two data segments, the
+   likelihood of segment bursts from the data sender is increased.  In
+   addition, congestion window growth may be impeded if the receiver
+   grows the window by counting received ACKs, as mandated by
+   [Ste97,APS99].  The authors therefore combine ACC with a series of
+   modifications to the data sender, referred to as TCP Sender
+   Adaptation (SA).  SA combines a limit on the number of segments sent
+   in a burst, regardless of window size.  In addition, byte counting
+   (as opposed to ACK counting) is employed for window growth.  Note
+   that byte counting has been studied elsewhere and can introduce
+   side-effects, as well [All98].
+
+   The results presented in [BPK97] indicate that using ACC and SA will
+   reduce the bursts produced by ACK losses in unmodified (Reno) TCP.
+   In cases where these bursts would lead to data loss at an
+   intermediate router, the ACC and SA modification significantly
+   improve the throughput for a single data transfer.  The results
+   further suggest that the use of ACC and SA significantly improve
+   fairness between two simultaneous transfers.
+
+   ACC is further reported to prevent the increase in round trip time
+   (RTT) that occurs when an unmodified TCP fills the reverse router
+   queue with acknowledgments.
+
+   In networks where the forward direction is expected to suffer losses
+   in one of the gateways, due to queue limitations, the authors report
+   at best a very slight improvement in performance for ACC and SA,
+   compared to unmodified Reno TCP.
+
+
+
+
+
+
+
+
+Allman, et al.               Informational                     [Page 33]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+3.9.3 Implementation Issues
+
+   Both ACC and SA require modification of the sending and receiving
+   hosts, as well as the bottleneck gateway.  The current research
+   suggests that implementing ACC without the SA modifications results
+   in a data sender which generates potentially disruptive segment
+   bursts.  It should be noted that ACC does require host modifications
+   if it is implemented in the way proposed in [BPK97].  The authors
+   note that ACC can be implemented by discarding ACKs (which requires
+   only a gateway modification, but no changes in the hosts), as opposed
+   to marking them with ECN.  Such an implementation may, however,
+   produce bursty data senders if it is not combined with a burst
+   mitigation technique.  ACC requires changes to the standard ACKing
+   behavior of a receiving TCP and therefore is not recommended for use
+   in shared networks.
+
+3.9.4 Topology Considerations
+
+   Neither ACC nor SA require the storage of state in the gateway.
+   These schemes should therefore be applicable for all topologies,
+   provided that the hosts using the satellite or hybrid network can be
+   modified.  However, these changes are expected to be especially
+   beneficial to networks containing asymmetric satellite links.
+
+3.9.5 Possible Interaction and Relationships with Other Research
+
+   Note that ECN is a pre-condition for using ACK congestion control.
+   Additionally, the ACK Filtering algorithm discussed in the next
+   section attempts to solve the same problem as ACC.  Choosing between
+   the two algorithms (or another mechanism) is currently an open
+   research question.
+
+3.10 ACK Filtering
+
+   ACK Filtering (AF) is designed to address the same ACK congestion
+   effects described in 3.9.  Contrary to ACC, however, AF is designed
+   to operate without host modifications.
+
+3.10.1 Mitigation Description
+
+   AF takes advantage of the cumulative acknowledgment structure of TCP.
+   The bottleneck router in the reverse direction (the low speed link)
+   must be modified to implement AF.  Upon receipt of a segment which
+   represents a TCP acknowledgment, the router scans the queue for
+   redundant ACKs for the same connection, i.e. ACKs which acknowledge
+   portions of the window which are included in the most recent ACK.
+   All of these "earlier" ACKs are removed from the queue and discarded.
+
+
+
+
+Allman, et al.               Informational                     [Page 34]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   The router does not store state information, but does need to
+   implement the additional processing required to find and remove
+   segments from the queue upon receipt of an ACK.
+
+3.10.2  Research
+
+   [BPK97] analyzes the effects of AF.  As is the case in ACC, the use
+   of ACK filtering alone would produce significant sender bursts, since
+   the ACKs will be acknowledging more previously-unacknowledged data.
+   The SA modifications described in 3.9.2 could be used to prevent
+   those bursts, at the cost of requiring host modifications.  To
+   prevent the need for modifications in the TCP stack, AF is more
+   likely to be paired with the ACK Reconstruction (AR) technique, which
+   can be implemented at the router where segments exit the slow reverse
+   link.
+
+   AR inspects ACKs exiting the link, and if it detects large "gaps" in
+   the ACK sequence, it generates additional ACKs to reconstruct an
+   acknowledgment flow which more closely resembles what the data sender
+   would have seen had ACK Filtering not been introduced.  AR requires
+   two parameters; one parameter is the desired ACK frequency, while the
+   second controls the spacing, in time, between the release of
+   consecutive reconstructed ACKs.
+
+   In [BPK97], the authors show the combination of AF and AR to increase
+   throughput, in the networks studied, over both unmodified TCP and the
+   ACC/SA modifications.  Their results also strongly suggest that the
+   use of AF alone, in networks where congestion losses are expected,
+   decreases performance (even below the level of unmodified TCP Reno)
+   due to sender bursting.
+
+   AF delays acknowledgments from arriving at the receiver by dropping
+   earlier ACKs in favor of later ACKs.  This process can cause a slight
+   hiccup in the transmission of new data by the TCP sender.
+
+3.10.3 Implementation Issues
+
+   Both ACK Filtering and ACK Reconstruction require only router
+   modification.  However, the implementation of AR requires some
+   storage of state information in the exit router.  While AF does not
+   require storage of state information, its use without AR (or SA)
+   could produce undesired side effects.  Furthermore, more research is
+   required regarding appropriate ranges for the parameters needed in
+   AR.
+
+
+
+
+
+
+
+Allman, et al.               Informational                     [Page 35]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+3.10.4 Topology Considerations
+
+   AF and AR appear applicable to all topologies, assuming that the
+   storage of state information in AR does not prove to be prohibitive
+   for routers which handle large numbers of flows.  The fact that TCP
+   stack modifications are not required for AF/AR makes this approach
+   attractive for hybrid networks and networks with diverse types of
+   hosts.  These modifications, however, are expected to be most
+   beneficial in asymmetric network paths.
+
+   On the other hand, the implementation of AF/AR requires the routers
+   to examine the TCP header, which prohibits their use in secure
+   networks where IPSEC is deployed.  In such networks, AF/AR can be
+   effective only inside the security perimeter of a private, or virtual
+   private network, or in private networks where the satellite link is
+   protected only by link-layer encryption (as opposed to IPSEC).  ACK
+   Filtering is safe to use in shared networks (from a congestion
+   control point-of-view), as the number of ACKs can only be reduced,
+   which makes TCP less aggressive.  However, note that while TCP is
+   less aggressive, the delays that AF induces (outlined above) can lead
+   to larger bursts than would otherwise occur.
+
+3.10.5 Possible Interaction and Relationships with Other Research
+
+   ACK Filtering attempts to solve the same problem as ACK Congestion
+   Control (as outlined in section 3.9).  Which of the two algorithms is
+   more appropriate is currently an open research question.
+
+4   Conclusions
+
+   This document outlines TCP items that may be able to mitigate the
+   performance problems associated with using TCP in networks containing
+   satellite links.  These mitigations are not IETF standards track
+   mechanisms and require more study before being recommended by the
+   IETF.  The research community is encouraged to examine the above
+   mitigations in an effort to determine which are safe for use in
+   shared networks such as the Internet.
+
+5   Security Considerations
+
+   Several of the above sections noted specific security concerns which
+   a given mitigation aggravates.
+
+   Additionally, any form of wireless communication link is more
+   susceptible to eavesdropping security attacks than standard wire-
+   based links due to the relative ease with which an attacker can watch
+   the network and the difficultly in finding attackers monitoring the
+   network.
+
+
+
+Allman, et al.               Informational                     [Page 36]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+6   Acknowledgments
+
+   Our thanks to Aaron Falk and Sally Floyd, who provided very helpful
+   comments on drafts of this document.
+
+7   References
+
+   [AFP98]   Allman, M., Floyd, S. and C. Partridge, "Increasing TCP's
+             Initial Window", RFC 2414, September 1998.
+
+   [AGS99]   Allman, M., Glover, D. and L. Sanchez, "Enhancing TCP Over
+             Satellite Channels using Standard Mechanisms", BCP 28, RFC
+             2488, January 1999.
+
+   [AHKO97]  Mark Allman, Chris Hayes, Hans Kruse, Shawn Ostermann.  TCP
+             Performance Over Satellite Links.  In Proceedings of the
+             5th International Conference on Telecommunication Systems,
+             March 1997.
+
+   [AHO98]   Mark Allman, Chris Hayes, Shawn Ostermann.  An Evaluation
+             of TCP with Larger Initial Windows.  Computer Communication
+             Review, 28(3), July 1998.
+
+   [AKO96]   Mark Allman, Hans Kruse, Shawn Ostermann.  An Application-
+             Level Solution to TCP's Satellite Inefficiencies.  In
+             Proceedings of the First International Workshop on
+             Satellite-based Information Services (WOSBIS), November
+             1996.
+
+   [All97a]  Mark Allman.  Improving TCP Performance Over Satellite
+             Channels.  Master's thesis, Ohio University, June 1997.
+
+   [All97b]  Mark Allman.  Fixing Two BSD TCP Bugs.  Technical Report
+             CR-204151, NASA Lewis Research Center, October 1997.
+
+   [All98]   Mark Allman. On the Generation and Use of TCP
+             Acknowledgments.  ACM Computer Communication Review, 28(5),
+             October 1998.
+
+   [AOK95]   Mark Allman, Shawn Ostermann, Hans Kruse.  Data Transfer
+             Efficiency Over Satellite Circuits Using a Multi-Socket
+             Extension to the File Transfer Protocol (FTP).  In
+             Proceedings of the ACTS Results Conference, NASA Lewis
+             Research Center, September 1995.
+
+   [AP99]    Mark Allman, Vern Paxson.  On Estimating End-to-End Network
+             Path Properties. ACM SIGCOMM, September 1999.
+
+
+
+
+Allman, et al.               Informational                     [Page 37]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   [APS99]   Allman, M., Paxson, V. and W. Richard Stevens, "TCP
+             Congestion Control", RFC 2581, April 1999.
+
+   [BCC+98]  Braden, B., Clark, D., Crowcroft, J., Davie, B., Deering,
+             S., Estrin, D., Floyd, S., Jacobson, V., Minshall, G.,
+             Partridge, C., Peterson, L., Ramakrishnan, K., Shenker, S.,
+             Wroclawski, J. and L. Zhang, "Recommendations on Queue
+             Management and Congestion Avoidance in the Internet", RFC
+             2309, April 1998.
+
+   [BKVP97]  B. Bakshi and P. Krishna and N. Vaidya and D. Pradham,
+             "Improving Performance of TCP over Wireless Networks", 17th
+             International Conference on Distributed Computing Systems
+             (ICDCS), May 1997.
+
+   [BPK97]   Hari Balakrishnan, Venkata N. Padmanabhan, and Randy H.
+             Katz.  The Effects of Asymmetry on TCP Performance.  In
+             Proceedings of the ACM/IEEE Mobicom, Budapest, Hungary,
+             ACM.  September, 1997.
+
+   [BPK98]   Hari Balakrishnan, Venkata Padmanabhan, Randy H. Katz.  The
+             Effects of Asymmetry on TCP Performance.  ACM Mobile
+             Networks and Applications (MONET), 1998 (to appear).
+
+   [BPSK96]  H. Balakrishnan and V. Padmanabhan and S. Sechan and R.
+             Katz, "A Comparison of Mechanisms for Improving TCP
+             Performance over Wireless Links", ACM SIGCOMM, August 1996.
+
+   [Bra89]   Braden, R., "Requirements for Internet Hosts --
+             Communication Layers", STD 3, RFC 1122, October 1989.
+
+   [Bra92]   Braden, R., "Transaction TCP -- Concepts", RFC 1379,
+             September 1992.
+
+   [Bra94]   Braden, R., "T/TCP -- TCP Extensions for Transactions:
+             Functional Specification", RFC 1644, July 1994.
+
+   [BRS99]   Hari Balakrishnan, Hariharan Rahul, and Srinivasan Seshan.
+             An Integrated Congestion Management Architecture for
+             Internet Hosts.  ACM SIGCOMM, September 1999.
+
+   [ddKI99]  M. deVivo, G.O. deVivo, R. Koeneke, G. Isern.  Internet
+             Vulnerabilities Related to TCP/IP and T/TCP.  Computer
+             Communication Review, 29(1), January 1999.
+
+
+
+
+
+
+
+Allman, et al.               Informational                     [Page 38]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   [DENP97]  Mikael Degermark, Mathias Engan, Bjorn Nordgren, Stephen
+             Pink.  Low-Loss TCP/IP Header Compression for Wireless
+             Networks.  ACM/Baltzer Journal on Wireless Networks, vol.3,
+             no.5, p. 375-87.
+
+   [DMT96]   R. C. Durst and G. J. Miller and E. J. Travis, "TCP
+             Extensions for Space Communications", Mobicom 96, ACM, USA,
+             1996.
+
+   [DNP99]   Degermark, M., Nordgren, B. and S. Pink, "IP Header
+             Compression", RFC 2507, February 1999.
+
+   [FF96]    Kevin Fall, Sally Floyd.  Simulation-based Comparisons of
+             Tahoe, Reno, and SACK TCP.  Computer Communication Review,
+             V. 26 N. 3, July 1996, pp. 5-21.
+
+   [FF99]    Sally Floyd, Kevin Fall.  Promoting the Use of End-to-End
+             Congestion Control in the Internet, IEEE/ACM Transactions
+             on Networking, August 1999.
+
+   [FH99]    Floyd, S. and T. Henderson, "The NewReno Modification to
+             TCP's Fast Recovery Algorithm", RFC 2582, April 1999.
+
+   [FJ93]    Sally Floyd and Van Jacobson.  Random Early Detection
+             Gateways for Congestion Avoidance, IEEE/ACM Transactions on
+             Networking, V. 1 N. 4, August 1993.
+
+   [Flo91]   Sally Floyd.  Connections with Multiple Congested Gateways
+             in Packet-Switched Networks, Part 1: One-way Traffic.  ACM
+             Computer Communications Review, V. 21, N. 5, October 1991.
+
+   [Flo94]   Sally Floyd.  TCP and Explicit Congestion Notification, ACM
+             Computer Communication Review, V. 24 N. 5, October 1994.
+
+   [Flo99]   Sally Floyd.  "Re: TCP and out-of-order delivery", email to
+             end2end-interest mailing list, February, 1999.
+
+   [Hah94]   Jonathan Hahn.  MFTP: Recent Enhancements and Performance
+             Measurements.  Technical Report RND-94-006, NASA Ames
+             Research Center, June 1994.
+
+   [Hay97]   Chris Hayes.  Analyzing the Performance of New TCP
+             Extensions Over Satellite Links.  Master's Thesis, Ohio
+             University, August 1997.
+
+   [HK98]    Tom Henderson, Randy Katz.  On Improving the Fairness of
+             TCP Congestion Avoidance.  Proceedings of IEEE Globecom `98
+             Conference, 1998.
+
+
+
+Allman, et al.               Informational                     [Page 39]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   [HK99]    Tim Henderson, Randy Katz.  Transport Protocols for
+             Internet-Compatible Satellite Networks, IEEE Journal on
+             Selected Areas of Communications, February, 1999.
+
+   [Hoe95]   J. Hoe, Startup Dynamics of TCP's Congestion Control and
+             Avoidance Schemes. Master's Thesis, MIT, 1995.
+
+   [Hoe96]   Janey Hoe.  Improving the Startup Behavior of a Congestion
+             Control Scheme for TCP.  In ACM SIGCOMM, August 1996.
+
+   [IL92]    David Iannucci and John Lakashman.  MFTP: Virtual TCP
+             Window Scaling Using Multiple Connections.  Technical
+             Report RND-92-002, NASA Ames Research Center, January 1992.
+
+   [Jac88]   Van Jacobson.  Congestion Avoidance and Control.  In
+             Proceedings of the SIGCOMM '88, ACM.  August, 1988.
+
+   [Jac90]   Jacobson, V., "Compressing TCP/IP Headers", RFC 1144,
+             February 1990.
+
+   [JBB92]   Jacobson, V., Braden, R. and D. Borman, "TCP Extensions for
+             High Performance", RFC 1323, May 1992.
+
+   [JK92]    Van Jacobson and Mike Karels.  Congestion Avoidance and
+             Control.  Originally appearing in the proceedings of
+             SIGCOMM '88 by Jacobson only, this revised version includes
+             an additional appendix.  The revised version is available
+             at ftp://ftp.ee.lbl.gov/papers/congavoid.ps.Z.  1992.
+
+   [Joh95]   Stacy Johnson.  Increasing TCP Throughput by Using an
+             Extended Acknowledgment Interval.  Master's Thesis, Ohio
+             University, June 1995.
+
+   [KAGT98]  Hans Kruse, Mark Allman, Jim Griner, Diepchi Tran.  HTTP
+             Page Transfer Rates Over Geo-Stationary Satellite Links.
+             March 1998. Proceedings of the Sixth International
+             Conference on Telecommunication Systems.
+
+   [Kes91]   Srinivasan Keshav.  A Control Theoretic Approach to Flow
+             Control.  In ACM SIGCOMM, September 1991.
+
+   [KM97]    S. Keshav, S. Morgan. SMART Retransmission: Performance
+             with Overload and Random Losses. Proceeding of Infocom.
+             1997.
+
+
+
+
+
+
+
+Allman, et al.               Informational                     [Page 40]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   [KVR98]   Lampros Kalampoukas, Anujan Varma, and K. K.Ramakrishnan.
+             Improving TCP Throughput Over Two-Way Asymmetric Links:
+             Analysis and Solutions.  Measurement and Modeling of
+             Computer Systems, 1998, Pages 78-89.
+
+   [MM96a]   M. Mathis, J. Mahdavi, "Forward Acknowledgment: Refining
+             TCP Congestion Control," Proceedings of SIGCOMM'96, August,
+             1996, Stanford, CA.  Available from
+             http://www.psc.edu/networking/papers/papers.html
+
+   [MM96b]   M. Mathis, J. Mahdavi, "TCP Rate-Halving with Bounding
+             Parameters" Available from
+             http://www.psc.edu/networking/papers/FACKnotes/current.
+
+   [MMFR96]  Mathis, M., Mahdavi, J., Floyd, S. and A. Romanow, "TCP
+             Selective Acknowledgment Options", RFC 2018, October 1996.
+
+   [MSMO97]  M. Mathis, J. Semke, J. Mahdavi, T. Ott, "The Macroscopic
+             Behavior of the TCP Congestion Avoidance
+             Algorithm",Computer Communication Review, volume 27,
+             number3, July 1997.  Available from
+             http://www.psc.edu/networking/papers/papers.html
+
+   [MV98]    Miten N. Mehta and Nitin H. Vaidya.  Delayed Duplicate-
+             Acknowledgments: A Proposal to Improve Performance of TCP
+             on Wireless Links.  Technical Report 98-006, Department of
+             Computer Science, Texas A&M University, February 1998.
+
+   [Nic97]   Kathleen Nichols.  Improving Network Simulation with
+             Feedback.  Com21, Inc. Technical Report.  Available from
+             http://www.com21.com/pages/papers/068.pdf.
+
+   [PADHV99] Paxson, V., Allman, M., Dawson, S., Heavens, I. and B.
+             Volz, "Known TCP Implementation Problems", RFC 2525, March
+             1999.
+
+   [Pax97]   Vern Paxson.  Automated Packet Trace Analysis of TCP
+             Implementations.  In Proceedings of ACM SIGCOMM, September
+             1997.
+
+   [PN98]    Poduri, K. and K. Nichols, "Simulation Studies of Increased
+             Initial TCP Window Size", RFC 2415, September 1998.
+
+   [Pos81]   Postel, J., "Transmission Control Protocol", STD 7, RFC
+             793, September 1981.
+
+
+
+
+
+
+Allman, et al.               Informational                     [Page 41]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   [RF99]    Ramakrishnan, K. and S. Floyd, "A Proposal to add Explicit
+             Congestion Notification (ECN) to IP", RFC 2481, January
+             1999.
+
+   [SF98]    Nihal K. G. Samaraweera and Godred Fairhurst,
+             "Reinforcement of TCP error Recovery for Wireless
+             Communication", Computer Communication Review, volume 28,
+             number 2, April 1998.
+
+   [SP98]    Shepard, T. and C. Partridge, "When TCP Starts Up With Four
+             Packets Into Only Three Buffers", RFC 2416, September 1998.
+
+   [Ste97]   Stevens, W., "TCP Slow Start, Congestion Avoidance, Fast
+             Retransmit, and Fast Recovery Algorithms", RFC 2001,
+             January 1997.
+
+   [Sut98]   B. Suter, T. Lakshman, D. Stiliadis, and A. Choudhury.
+             Design Considerations for Supporting TCP with Per-flow
+             Queueing.  Proceedings of IEEE Infocom `98 Conference,
+             1998.
+
+   [Tou97]   Touch, J., "TCP Control Block Interdependence", RFC 2140,
+             April 1997.
+
+   [VH97a]   Vikram Visweswaraiah and John Heidemann.  Improving Restart
+             of Idle TCP Connections.  Technical Report 97-661,
+             University of Southern California, 1997.
+
+   [VH97b]   Vikram Visweswaraiah and John Heidemann.  Rate-based pacing
+             Source Code Distribution, Web page:
+             http://www.isi.edu/lsam/publications/rate_based_pacing/README.html
+             November, 1997.
+
+   [VH98]    Vikram Visweswaraiah and John Heidemann.  Improving Restart
+             of Idle TCP Connections (revised).  Submitted for
+             publication.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Allman, et al.               Informational                     [Page 42]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+8   Authors' Addresses
+
+   Mark Allman
+   NASA Glenn Research Center/BBN Technologies
+   Lewis Field
+   21000 Brookpark Rd.  MS 54-2
+   Cleveland, OH  44135
+
+   EMail: mallman@grc.nasa.gov
+   http://roland.grc.nasa.gov/~mallman
+
+
+   Spencer Dawkins
+   Nortel
+   P.O.Box 833805
+   Richardson, TX 75083-3805
+
+   EMail: Spencer.Dawkins.sdawkins@nt.com
+
+
+   Dan Glover
+   NASA Glenn Research Center
+   Lewis Field
+   21000 Brookpark Rd.  MS 3-6
+   Cleveland, OH  44135
+
+   EMail: Daniel.R.Glover@grc.nasa.gov
+   http://roland.grc.nasa.gov/~dglover
+
+
+   Jim Griner
+   NASA Glenn Research Center
+   Lewis Field
+   21000 Brookpark Rd.  MS 54-2
+   Cleveland, OH  44135
+
+   EMail: jgriner@grc.nasa.gov
+   http://roland.grc.nasa.gov/~jgriner
+
+
+   Diepchi Tran
+   NASA Glenn Research Center
+   Lewis Field
+   21000 Brookpark Rd.  MS 54-2
+   Cleveland, OH  44135
+
+   EMail: dtran@grc.nasa.gov
+
+
+
+
+Allman, et al.               Informational                     [Page 43]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   Tom Henderson
+   University of California at Berkeley
+   Phone: +1 (510) 642-8919
+
+   EMail: tomh@cs.berkeley.edu
+   URL: http://www.cs.berkeley.edu/~tomh/
+
+
+   John Heidemann
+   University of Southern California/Information Sciences Institute
+   4676 Admiralty Way
+   Marina del Rey, CA 90292-6695
+
+   EMail: johnh@isi.edu
+
+
+   Joe Touch
+   University of Southern California/Information Sciences Institute
+   4676 Admiralty Way
+   Marina del Rey, CA 90292-6601
+   USA
+
+   Phone: +1 310-448-9151
+   Fax:   +1 310-823-6714
+   URL:   http://www.isi.edu/touch
+   EMail: touch@isi.edu
+
+
+   Hans Kruse
+   J. Warren McClure School of Communication Systems Management
+   Ohio University
+   9 S. College Street
+   Athens, OH 45701
+
+   Phone: 740-593-4891
+   Fax: 740-593-4889
+   EMail: hkruse1@ohiou.edu
+   http://www.csm.ohiou.edu/kruse
+
+
+   Shawn Ostermann
+   School of Electrical Engineering and Computer Science
+   Ohio University
+   416 Morton Hall
+   Athens, OH  45701
+
+   Phone: (740) 593-1234
+   EMail: ostermann@cs.ohiou.edu
+
+
+
+Allman, et al.               Informational                     [Page 44]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+   Keith Scott
+   The MITRE Corporation
+   M/S W650
+   1820 Dolley Madison Blvd.
+   McLean VA 22102-3481
+
+   EMail: kscott@mitre.org
+
+
+   Jeffrey Semke
+   Pittsburgh Supercomputing Center
+   4400 Fifth Ave.
+   Pittsburgh, PA  15213
+
+   EMail: semke@psc.edu
+   http://www.psc.edu/~semke
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Allman, et al.               Informational                     [Page 45]
+
+RFC 2760       Ongoing TCP Research Related to Satellites  February 2000
+
+
+9  Full Copyright Statement
+
+   Copyright (C) The Internet Society (2000).  All Rights Reserved.
+
+   This document and translations of it may be copied and furnished to
+   others, and derivative works that comment on or otherwise explain it
+   or assist in its implementation may be prepared, copied, published
+   and distributed, in whole or in part, without restriction of any
+   kind, provided that the above copyright notice and this paragraph are
+   included on all such copies and derivative works.  However, this
+   document itself may not be modified in any way, such as by removing
+   the copyright notice or references to the Internet Society or other
+   Internet organizations, except as needed for the purpose of
+   developing Internet standards in which case the procedures for
+   copyrights defined in the Internet Standards process must be
+   followed, or as required to translate it into languages other than
+   English.
+
+   The limited permissions granted above are perpetual and will not be
+   revoked by the Internet Society or its successors or assigns.
+
+   This document and the information contained herein is provided on an
+   "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
+   TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
+   BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
+   HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
+   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+Acknowledgement
+
+   Funding for the RFC Editor function is currently provided by the
+   Internet Society.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Allman, et al.               Informational                     [Page 46]
+
diff --git a/ext/picotcp/RFC/rfc3135.txt b/ext/picotcp/RFC/rfc3135.txt
new file mode 100644
index 0000000..1138e09
--- /dev/null
+++ b/ext/picotcp/RFC/rfc3135.txt
@@ -0,0 +1,2523 @@
+
+
+
+
+
+
+Network Working Group                                          J. Border
+Request for Comments: 3135                        Hughes Network Systems
+Category: Informational                                          M. Kojo
+                                                  University of Helsinki
+                                                               J. Griner
+                                              NASA Glenn Research Center
+                                                           G. Montenegro
+                                                  Sun Microsystems, Inc.
+                                                               Z. Shelby
+                                                      University of Oulu
+                                                               June 2001
+
+
+    Performance Enhancing Proxies Intended to Mitigate Link-Related
+                              Degradations
+
+Status of this Memo
+
+   This memo provides information for the Internet community.  It does
+   not specify an Internet standard of any kind.  Distribution of this
+   memo is unlimited.
+
+Copyright Notice
+
+   Copyright (C) The Internet Society (2001).  All Rights Reserved.
+
+Abstract
+
+   This document is a survey of Performance Enhancing Proxies (PEPs)
+   often employed to improve degraded TCP performance caused by
+   characteristics of specific link environments, for example, in
+   satellite, wireless WAN, and wireless LAN environments.  Different
+   types of Performance Enhancing Proxies are described as well as the
+   mechanisms used to improve performance.  Emphasis is put on proxies
+   operating with TCP.  In addition, motivations for their development
+   and use are described along with some of the consequences of using
+   them, especially in the context of the Internet.
+
+Table of Contents
+
+   1. Introduction  . . . . . . . . . . . . . . . . . . . . . . . . .  3
+   2. Types of Performance Enhancing Proxies  . . . . . . . . . . . .  4
+   2.1 Layering . . . . . . . . . . . . . . . . . . . . . . . . . . .  4
+   2.1.1 Transport Layer PEPs . . . . . . . . . . . . . . . . . . . .  5
+   2.1.2 Application Layer PEPs . . . . . . . . . . . . . . . . . . .  5
+   2.2 Distribution . . . . . . . . . . . . . . . . . . . . . . . . .  6
+   2.3 Implementation Symmetry  . . . . . . . . . . . . . . . . . . .  6
+   2.4 Split Connections  . . . . . . . . . . . . . . . . . . . . . .  7
+
+
+
+Border, et al.               Informational                      [Page 1]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   2.5 Transparency . . . . . . . . . . . . . . . . . . . . . . . . .  8
+   3. PEP Mechanisms  . . . . . . . . . . . . . . . . . . . . . . . .  9
+   3.1 TCP ACK Handling . . . . . . . . . . . . . . . . . . . . . . .  9
+   3.1.1 TCP ACK Spacing  . . . . . . . . . . . . . . . . . . . . . .  9
+   3.1.2 Local TCP Acknowledgements . . . . . . . . . . . . . . . . .  9
+   3.1.3 Local TCP Retransmissions  . . . . . . . . . . . . . . . . .  9
+   3.1.4 TCP ACK Filtering and Reconstruction . . . . . . . . . . . . 10
+   3.2 Tunneling  . . . . . . . . . . . . . . . . . . . . . . . . . . 10
+   3.3 Compression  . . . . . . . . . . . . . . . . . . . . . . . . . 10
+   3.4 Handling Periods of Link Disconnection with TCP  . . . . . . . 11
+   3.5 Priority-based Multiplexing  . . . . . . . . . . . . . . . . . 12
+   3.6 Protocol Booster Mechanisms  . . . . . . . . . . . . . . . . . 13
+   4. Implications of Using PEPs  . . . . . . . . . . . . . . . . . . 14
+   4.1 The End-to-end Argument  . . . . . . . . . . . . . . . . . . . 14
+   4.1.1 Security . . . . . . . . . . . . . . . . . . . . . . . . . . 14
+   4.1.1.1 Security Implications  . . . . . . . . . . . . . . . . . . 15
+   4.1.1.2 Security Implication Mitigations . . . . . . . . . . . . . 16
+   4.1.1.3 Security Research Related to PEPs  . . . . . . . . . . . . 16
+   4.1.2 Fate Sharing . . . . . . . . . . . . . . . . . . . . . . . . 16
+   4.1.3 End-to-end Reliability . . . . . . . . . . . . . . . . . . . 17
+   4.1.4 End-to-end Failure Diagnostics . . . . . . . . . . . . . . . 19
+   4.2 Asymmetric Routing . . . . . . . . . . . . . . . . . . . . . . 19
+   4.3 Mobile Hosts . . . . . . . . . . . . . . . . . . . . . . . . . 20
+   4.4 Scalability  . . . . . . . . . . . . . . . . . . . . . . . . . 20
+   4.5 Other Implications of Using PEPs . . . . . . . . . . . . . . . 21
+   5. PEP Environment Examples  . . . . . . . . . . . . . . . . . . . 21
+   5.1 VSAT Environments  . . . . . . . . . . . . . . . . . . . . . . 21
+   5.1.1 VSAT Network Characteristics . . . . . . . . . . . . . . . . 22
+   5.1.2 VSAT Network PEP Implementations . . . . . . . . . . . . . . 23
+   5.1.3 VSAT Network PEP Motivation  . . . . . . . . . . . . . . . . 24
+   5.2 W-WAN Environments . . . . . . . . . . . . . . . . . . . . . . 25
+   5.2.1 W-WAN Network Characteristics  . . . . . . . . . . . . . . . 25
+   5.2.2 W-WAN PEP Implementations  . . . . . . . . . . . . . . . . . 26
+   5.2.2.1 Mowgli System  . . . . . . . . . . . . . . . . . . . . . . 26
+   5.2.2.2 Wireless Application Protocol (WAP)  . . . . . . . . . . . 28
+   5.2.3 W-WAN PEP Motivation . . . . . . . . . . . . . . . . . . . . 29
+   5.3 W-LAN Environments . . . . . . . . . . . . . . . . . . . . . . 30
+   5.3.1 W-LAN Network Characteristics  . . . . . . . . . . . . . . . 30
+   5.3.2 W-LAN PEP Implementations: Snoop . . . . . . . . . . . . . . 31
+   5.3.3 W-LAN PEP Motivation . . . . . . . . . . . . . . . . . . . . 33
+   6. Security Considerations . . . . . . . . . . . . . . . . . . . . 34
+   7. IANA Considerations . . . . . . . . . . . . . . . . . . . . . . 34
+   8. Acknowledgements  . . . . . . . . . . . . . . . . . . . . . . . 34
+   9. References  . . . . . . . . . . . . . . . . . . . . . . . . . . 35
+   10. Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . 39
+   Appendix A - PEP Terminology Summary . . . . . . . . . . . . . . . 41
+   Full Copyright Statement . . . . . . . . . . . . . . . . . . . . . 45
+
+
+
+
+Border, et al.               Informational                      [Page 2]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+1. Introduction
+
+   The Transmission Control Protocol [RFC0793] (TCP) is used as the
+   transport layer protocol by many Internet and intranet applications.
+   However, in certain environments, TCP and other higher layer protocol
+   performance is limited by the link characteristics of the
+   environment.
+
+   This document is a survey of Performance Enhancing Proxy (PEP)
+   performance migitigation techniques.  A PEP is used to improve the
+   performance of the Internet protocols on network paths where native
+   performance suffers due to characteristics of a link or subnetwork on
+   the path.  This document is informational and does not make
+   recommendations about using PEPs or not using them.  Distinct
+   standards track recommendations for the performance mitigation of TCP
+   over links with high error rates, links with low bandwidth, and so
+   on, have been developed or are in development by the Performance
+   Implications of Link Characteristics WG (PILC) [PILCWEB].
+
+   Link design choices may have a significant influence on the
+   performance and efficiency of the Internet.  However, not all link
+   characteristics, for example, high latency, can be compensated for by
+   choices in the link layer design.  And, the cost of compensating for
+   some link characteristics may be prohibitive for some technologies.
+   The techniques surveyed here are applied to existing link
+   technologies.  When new link technologies are designed, they should
+   be designed so that these techniques are not required, if at all
+   possible.
+
+   This document does not advocate the use of PEPs in any general case.
+   On the contrary, we believe that the end-to-end principle in
+   designing Internet protocols should be retained as the prevailing
+   approach and PEPs should be used only in specific environments and
+   circumstances where end-to-end mechanisms providing similar
+   performance enhancements are not available.  In any environment where
+   one might consider employing a PEP for improved performance, an end
+   user (or, in some cases, the responsible network administrator)
+   should be aware of the PEP and the choice of employing PEP
+   functionality should be under the control of the end user, especially
+   if employing the PEP would interfere with end-to-end usage of IP
+   layer security mechanisms or otherwise have undesirable implications
+   in some circumstances.  This would allow the user to choose end-to-
+   end IP at all times but, of course, without the performance
+   enhancements that employing the PEP may yield.
+
+   This survey does not make recommendations, for or against, with
+   respect to using PEPs.  Standards track recommendations have been or
+   are being developed within the IETF for individual link
+
+
+
+Border, et al.               Informational                      [Page 3]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   characteristics, e.g., links with high error rates, links with low
+   bandwidth, links with asymmetric bandwidth, etc., by the Performance
+   Implications of Link Characteristics WG (PILC) [PILCWEB].
+
+   The remainder of this document is organized as follows.  Section 2
+   provides an overview of different kinds of PEP implementations.
+
+   Section 3 discusses some of the mechanisms which PEPs may employ in
+   order to improve performance.  Section 4 discusses some of the
+   implications with respect to using PEPs, especially in the context of
+   the global Internet.  Finally, Section 5 discusses some example
+   environments where PEPs are used: satellite very small aperture
+   terminal (VSAT) environments, mobile wireless WAN (W-WAN)
+   environments and wireless LAN (W-LAN) environments.  A summary of PEP
+   terminology is included in an appendix (Appendix A).
+
+2. Types of Performance Enhancing Proxies
+
+   There are many types of Performance Enhancing Proxies.  Different
+   types of PEPs are used in different environments to overcome
+   different link characteristics which affect protocol performance.
+   Note that enhancing performance is not necessarily limited in scope
+   to throughput.  Other performance related aspects, like usability of
+   a link, may also be addressed.  For example, [M-TCP] addresses the
+   issue of keeping TCP connections alive during periods of
+   disconnection in wireless networks.
+
+   The following sections describe some of the key characteristics which
+   differentiate different types of PEPs.
+
+2.1 Layering
+
+   In principle, a PEP implementation may function at any protocol layer
+   but typically it functions at one or two layers only.  In this
+   document we focus on PEP implementations that function at the
+   transport layer or at the application layer as such PEPs are most
+   commonly used to enhance performance over links with problematic
+   characteristics.  A PEP implementation may also operate below the
+   network layer, that is, at the link layer, but this document pays
+   only little attention to such PEPs as link layer mechanisms can be
+   and typically are implemented transparently to network and higher
+   layers, requiring no modifications to protocol operation above the
+   link layer.  It should also be noted that some PEP implementations
+   operate across several protocol layers by exploiting the protocol
+   information and possibly modifying the protocol operation at more
+   than one layer.  For such a PEP it may be difficult to define at
+   which layer(s) it exactly operates on.
+
+
+
+
+Border, et al.               Informational                      [Page 4]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+2.1.1 Transport Layer PEPs
+
+   Transport layer PEPs operate at the transport level.  They may be
+   aware of the type of application being carried by the transport layer
+   but, at most, only use this information to influence their behavior
+   with respect to the transport protocol; they do not modify the
+   application protocol in any way, but let the application protocol
+   operate end-to-end.  Most transport layer PEP implementations
+   interact with TCP.  Such an implementation is called a TCP
+   Performance Enhancing Proxy (TCP PEP).  For example, in an
+   environment where ACKs may bunch together causing undesirable data
+   segment bursts, a TCP PEP may be used to simply modify the ACK
+   spacing in order to improve performance.  On the other hand, in an
+   environment with a large bandwidth*delay product, a TCP PEP may be
+   used to alter the behavior of the TCP connection by generating local
+   acknowledgments to TCP data segments in order to improve the
+   connection's throughput.
+
+   The term TCP spoofing is sometimes used synonymously for TCP PEP
+   functionality.  However, the term TCP spoofing more accurately
+   describes the characteristic of intercepting a TCP connection in the
+   middle and terminating the connection as if the interceptor is the
+   intended destination.  While this is a characteristic of many TCP PEP
+   implementations, it is not a characteristic of all TCP PEP
+   implementations.
+
+2.1.2 Application Layer PEPs
+
+   Application layer PEPs operate above the transport layer.  Today,
+   different kinds of application layer proxies are widely used in the
+   Internet.  Such proxies include Web caches and relay Mail Transfer
+   Agents (MTA) and they typically try to improve performance or service
+   availability and reliability in general and in a way which is
+   applicable in any environment but they do not necessarily include any
+   optimizations that are specific to certain link characteristics.
+
+   Application layer PEPs, on the other hand, can be implemented to
+   improve application protocol as well as transport layer performance
+   with respect to a particular application being used with a particular
+   type of link.  An application layer PEP may have the same
+   functionality as the corresponding regular proxy for the same
+   application (e.g., relay MTA or Web caching proxy) but extended with
+   link-specific optimizations of the application protocol operation.
+
+   Some application protocols employ extraneous round trips, overly
+   verbose headers and/or inefficient header encoding which may have a
+   significant impact on performance, in particular, with long delay and
+   slow links.  This unnecessary overhead can be reduced, in general or
+
+
+
+Border, et al.               Informational                      [Page 5]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   for a particular type of link, by using an application layer PEP in
+   an intermediate node.  Some examples of application layer PEPs which
+   have been shown to improve performance on slow wireless WAN links are
+   described in [LHKR96] and [CTC+97].
+
+2.2 Distribution
+
+   A PEP implementation may be integrated, i.e., it comprises a single
+   PEP component implemented within a single node, or distributed, i.e.,
+   it comprises two or more PEP components, typically implemented in
+   multiple nodes.  An integrated PEP implementation represents a single
+   point at which performance enhancement is applied.  For example, a
+   single PEP component might be implemented to provide impedance
+   matching at the point where wired and wireless links meet.
+
+   A distributed PEP implementation is generally used to surround a
+   particular link for which performance enhancement is desired.  For
+   example, a PEP implementation for a satellite connection may be
+   distributed between two PEPs located at each end of the satellite
+   link.
+
+2.3 Implementation Symmetry
+
+   A PEP implementation may be symmetric or asymmetric.  Symmetric PEPs
+   use identical behavior in both directions, i.e., the actions taken by
+   the PEP occur independent from which interface a packet is received.
+   Asymmetric PEPs operate differently in each direction.  The direction
+   can be defined in terms of the link (e.g., from a central site to a
+   remote site) or in terms of protocol traffic (e.g., the direction of
+   TCP data flow, often called the TCP data channel, or the direction of
+   TCP ACK flow, often called the TCP ACK channel).  An asymmetric PEP
+   implementation is generally used at a point where the characteristics
+   of the links on each side of the PEP differ or with asymmetric
+   protocol traffic.  For example, an asymmetric PEP might be placed at
+   the intersection of wired and wireless networks or an asymmetric
+   application layer PEP might be used for the request-reply type of
+   HTTP traffic.  A PEP implementation may also be both symmetric and
+   asymmetric at the same time with regard to different mechanisms it
+   employs.  (PEP mechanisms are described in Section 3.)
+
+   Whether a PEP implementation is symmetric or asymmetric is
+   independent of whether the PEP implementation is integrated or
+   distributed.  In other words, a distributed PEP implementation might
+   operate symmetrically at each end of a link (i.e., the two PEPs
+   function identically).  On the other hand, a distributed PEP
+   implementation might operate asymmetrically, with a different PEP
+   implementation at each end of the link.  Again, this usually is used
+   with asymmetric links.  For example, for a link with an asymmetric
+
+
+
+Border, et al.               Informational                      [Page 6]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   amount of bandwidth available in each direction, the PEP on the end
+   of the link forwarding traffic in the direction with a large amount
+   of bandwidth might focus on locally acknowledging TCP traffic in
+   order to use the available bandwidth.  At the same time, the PEP on
+   the end of the link forwarding traffic in the direction with very
+   little bandwidth might focus on reducing the amount of TCP
+   acknowledgement traffic being forwarded across the link (to keep the
+   link from congesting).
+
+2.4 Split Connections
+
+   A split connection TCP implementation terminates the TCP connection
+   received from an end system and establishes a corresponding TCP
+   connection to the other end system.  In a distributed PEP
+   implementation, this is typically done to allow the use of a third
+   connection between two PEPs optimized for the link.  This might be a
+   TCP connection optimized for the link or it might be another
+   protocol, for example, a proprietary protocol running on top of UDP.
+   Also, the distributed implementation might use a separate connection
+   between the proxies for each TCP connection or it might multiplex the
+   data from multiple TCP connections across a single connection between
+   the PEPs.
+
+   In an integrated PEP split connection TCP implementation, the PEP
+   again terminates the connection from one end system and originates a
+   separate connection to the other end system.  [I-TCP] documents an
+   example of a single PEP split connection implementation.
+
+   Many integrated PEPs use a split connection implementation in order
+   to address a mismatch in TCP capabilities between two end systems.
+   For example, the TCP window scaling option [RFC1323] can be used to
+   extend the maximum amount of TCP data which can be "in flight" (i.e.,
+   sent and awaiting acknowledgement).  This is useful for filling a
+   link which has a high bandwidth*delay product.  If one end system is
+   capable of using scaled TCP windows but the other is not, the end
+   system which is not capable can set up its connection with a PEP on
+   its side of the high bandwidth*delay link.  The split connection PEP
+   then sets up a TCP connection with window scaling over the link to
+   the other end system.
+
+   Split connection TCP implementations can effectively leverage TCP
+   performance enhancements optimal for a particular link but which
+   cannot necessarily be employed safely over the global Internet.
+
+   Note that using split connection PEPs does not necessarily exclude
+   simultaneous use of IP for end-to-end connectivity.  If a split
+   connection is managed per application or per connection and is under
+   the control of the end user, the user can decide whether a particular
+
+
+
+Border, et al.               Informational                      [Page 7]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   TCP connection or application makes use of the split connection PEP
+   or whether it operates end-to-end.  When a PEP is employed on a last
+   hop link, the end user control is relatively easy to implement.
+
+   In effect, application layer proxies for TCP-based applications are
+   split connection TCP implementations with end systems using PEPs as a
+   service related to a particular application.  Therefore, all
+   transport (TCP) layer enhancements that are available with split
+   connection TCP implementations can also be employed with application
+   layer PEPs in conjunction with application layer enhancements.
+
+2.5 Transparency
+
+   Another key characteristic of a PEP is its degree of transparency.
+   PEPs may operate totally transparently to the end systems, transport
+   endpoints, and/or applications involved (in a connection), requiring
+   no modifications to the end systems, transport endpoints, or
+   applications.
+
+   On the other hand, a PEP implementation may require modifications to
+   both ends in order to be used.  In between, a PEP implementation may
+   require modifications to only one of the ends involved.  Either of
+   these kind of PEP implementations is non-transparent, at least to the
+   layer requiring modification.
+
+   It is sometimes useful to think of the degree of transparency of a
+   PEP implementation at four levels, transparency with respect to the
+   end systems (network-layer transparent PEP), transparency with
+   respect to the transport endpoints (transport-layer transparent PEP),
+   transparency with respect to the applications (application-layer
+   transparent PEP) and transparency with respect to the users.  For
+   example, a user who subscribes to a satellite Internet access service
+   may be aware that the satellite terminal is providing a performance
+   enhancing service even though the TCP/IP stack and the applications
+   in the user's PC are not aware of the PEP which implements it.
+
+   Note that the issue of transparency is not the same as the issue of
+   maintaining end-to-end semantics.  For example, a PEP implementation
+   which simply uses a TCP ACK spacing mechanism maintains the end-to-
+   end semantics of the TCP connection while a split connection TCP PEP
+   implementation may not.  Yet, both can be implemented transparently
+   to the transport endpoints at both ends.  The implications of not
+   maintaining the end-to-end semantics, in particular the end-to-end
+   semantics of TCP connections, are discussed in Section 4.
+
+
+
+
+
+
+
+Border, et al.               Informational                      [Page 8]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+3. PEP Mechanisms
+
+   An obvious key characteristic of a PEP implementation is the
+   mechanism(s) it uses to improve performance.  Some examples of PEP
+   mechanisms are described in the following subsections.  A PEP
+   implementation might implement more than one of these mechanisms.
+
+3.1 TCP ACK Handling
+
+   Many TCP PEP implementations are based on TCP ACK manipulation.  The
+   handling of TCP acknowledgments can differ significantly between
+   different TCP PEP implementations.  The following subsections
+   describe various TCP ACK handling mechanisms.  Many implementations
+   combine some of these mechanisms and possibly employ some additional
+   mechanisms as well.
+
+3.1.1 TCP ACK Spacing
+
+   In environments where ACKs tend to bunch together, ACK spacing is
+   used to smooth out the flow of TCP acknowledgments traversing a link.
+   This improves performance by eliminating bursts of TCP data segments
+   that the TCP sender would send due to back-to-back arriving TCP
+   acknowledgments [BPK97].
+
+3.1.2 Local TCP Acknowledgements
+
+   In some PEP implementations, TCP data segments received by the PEP
+   are locally acknowledged by the PEP.  This is very useful over
+   network paths with a large bandwidth*delay product as it speeds up
+   TCP slow start and allows the sending TCP to quickly open up its
+   congestion window.  Local (negative) acknowledgments are often also
+   employed to trigger local (and faster) error recovery on links with
+   significant error rates.  (See Section 3.1.3.)
+
+   Local acknowledgments are automatically employed with split
+   connection TCP implementations.  When local acknowledgments are used,
+   the burden falls upon the TCP PEP to recover any data which is
+   dropped after the PEP acknowledges it.
+
+3.1.3 Local TCP Retransmissions
+
+   A TCP PEP may locally retransmit data segments lost on the path
+   between the TCP PEP and the receiving end system, thus aiming at
+   faster recovery from lost data.  In order to achieve this the TCP PEP
+   may use acknowledgments arriving from the end system that receives
+   the TCP data segments, along with appropriate timeouts, to determine
+
+
+
+
+
+Border, et al.               Informational                      [Page 9]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   when to locally retransmit lost data.  TCP PEPs sending local
+   acknowledgments to the sending end system are required to employ
+   local retransmissions towards the receiving end system.
+
+   Some PEP implementations perform local retransmissions even though
+   they do not use local acknowledgments to alter TCP connection
+   performance.  Basic Snoop [SNOOP] is a well know example of such a
+   PEP implementation.  Snoop caches TCP data segments it receives and
+   forwards and then monitors the end-to-end acknowledgments coming from
+   the receiving TCP end system for duplicate acknowledgments (DUPACKs).
+   When DUPACKs are received, Snoop locally retransmits the lost TCP
+   data segments from its cache, suppressing the DUPACKs flowing to the
+   sending TCP end system until acknowledgments for new data are
+   received.  The Snoop system also implements an option to employ local
+   negative acknowledgments to trigger local TCP retransmissions.  This
+   can be achieved, for example, by applying TCP selective
+   acknowledgments locally on the error-prone link.  (See Section 5.3
+   for details.)
+
+3.1.4 TCP ACK Filtering and Reconstruction
+
+   On paths with highly asymmetric bandwidth the TCP ACKs flowing in the
+   low-speed direction may get congested if the asymmetry ratio is high
+   enough.  The ACK filtering and reconstruction mechanism addresses
+   this by filtering the ACKs on one side of the link and reconstructing
+   the deleted ACKs on the other side of the link.  The mechanism and
+   the issue of dealing with TCP ACK congestion with highly asymmetric
+   links are discussed in detail in [RFC2760] and in [BPK97].
+
+3.2 Tunneling
+
+   A Performance Enhancing Proxy may encapsulate messages to carry the
+   messages across a particular link or to force messages to traverse a
+   particular path.  A PEP at the other end of the encapsulation tunnel
+   removes the tunnel wrappers before final delivery to the receiving
+   end system.  A tunnel might be used by a distributed split connection
+   TCP implementation as the means for carrying the connection between
+   the distributed PEPs.  A tunnel might also be used to support forcing
+   TCP connections which use asymmetric routing to go through the end
+   points of a distributed PEP implementation.
+
+3.3 Compression
+
+   Many PEP implementations include support for one or more forms of
+   compression.  In some PEP implementations, compression may even be
+   the only mechanism used for performance improvement.  Compression
+   reduces the number of bytes which need to be sent across a link.
+   This is useful in general and can be very important for bandwidth
+
+
+
+Border, et al.               Informational                     [Page 10]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   limited links.  Benefits of using compression include improved link
+   efficiency and higher effective link utilization, reduced latency and
+   improved interactive response time, decreased overhead and reduced
+   packet loss rate over lossy links.
+
+   Where appropriate, link layer compression is used.  TCP and IP header
+   compression are also frequently used with PEP implementations.
+   [RFC1144] describes a widely deployed method for compressing TCP
+   headers.  Other header compression algorithms are described in
+   [RFC2507], [RFC2508] and [RFC2509].
+
+   Payload compression is also desirable and is increasing in importance
+   with today's increased emphasis on Internet security.  Network (IP)
+   layer (and above) security mechanisms convert IP payloads into random
+   bit streams which defeat applicable link layer compression mechanisms
+   by removing or hiding redundant "information."  Therefore,
+   compression of the payload needs to be applied before security
+   mechanisms are applied.  [RFC2393] defines a framework where common
+   compression algorithms can be applied to arbitrary IP segment
+   payloads.  However, [RFC2393] compression is not always applicable.
+   Many types of IP payloads (e.g., images, audio, video and "zipped"
+   files being transferred) are already compressed.  And, when security
+   mechanisms such as TLS [RFC2246] are applied above the network (IP)
+   layer, the data is already encrypted (and possibly also compressed),
+   again removing or hiding any redundancy in the payload.  The
+   resulting additional transport or network layer compression will
+   compact only headers, which are small, and possibly already covered
+   by separate compression algorithms of their own.
+
+   With application layer PEPs one can employ application-specific
+   compression.  Typically an application-specific (or content-specific)
+   compression mechanism is much more efficient than any generic
+   compression mechanism.  For example, a distributed Web PEP
+   implementation may implement more efficient binary encoding of HTTP
+   headers, or a PEP can employ lossy compression that reduces the image
+   quality of online-images on Web pages according to end user
+   instructions, thus reducing the number of bytes transferred over a
+   slow link and consequently the response time perceived by the user
+   [LHKR96].
+
+3.4 Handling Periods of Link Disconnection with TCP
+
+   Periods of link disconnection or link outages are very common with
+   some wireless links.  During these periods, a TCP sender does not
+   receive the expected acknowledgments.  Upon expiration of the
+   retransmit timer, this causes TCP to close its congestion window with
+   all of the related drawbacks.  A TCP PEP may monitor the traffic
+   coming from the TCP sender towards the TCP receiver behind the
+
+
+
+Border, et al.               Informational                     [Page 11]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   disconnected link.  The TCP PEP retains the last ACK, so that it can
+   shut down the TCP sender's window by sending the last ACK with a
+   window set to zero.  Thus, the TCP sender will go into persist mode.
+
+   To make this work in both directions with an integrated TCP PEP
+   implementation, the TCP receiver behind the disconnected link must be
+   aware of the current state of the connection and, in the event of a
+   disconnection, it must be capable of freezing all timers.  [M-TCP]
+   implements such operation.  Another possibility is that the
+   disconnected link is surrounded by a distributed PEP pair.
+
+   In split connection TCP implementations, a period of link
+   disconnection can easily be hidden from the end host on the other
+   side of the PEP thus precluding the TCP connection from breaking even
+   if the period of link disconnection lasts a very long time; if the
+   TCP PEP cannot forward data due to link disconnection, it stops
+   receiving data.  Normal TCP flow control then prevents the TCP sender
+   from sending more than the TCP advertised window allowed by the PEP.
+   Consequently, the PEP and its counterpart behind the disconnected
+   link can employ a modified TCP version which retains the state and
+   all unacknowledged data segments across the period of disconnection
+   and then performs local recovery as the link is reconnected.  The
+   period of link disconnection may or may not be hidden from the
+   application and user, depending upon what application the user is
+   using the TCP connection for.
+
+3.5 Priority-based Multiplexing
+
+   Implementing priority-based multiplexing of data over a slow and
+   expensive link may significantly improve the performance and
+   usability of the link for selected applications or connections.
+
+   A user behind a slow link would experience the link more feasible to
+   use in case of simultaneous data transfers, if urgent data transfers
+   (e.g., interactive connections) could have shorter response time
+   (better performance) than less urgent background transfers.  If the
+   interactive connections transmit enough data to keep the slow link
+   fully utilized, it might be necessary to fully suspend the background
+   transfers for awhile to ensure timely delivery for the interactive
+   connections.
+
+   In flight TCP segments of an end-to-end TCP connection (with low
+   priority) cannot be delayed for a long time.  Otherwise, the TCP
+   timer at the sending end would expire, resulting in suboptimal
+   performance.  However, this kind of operation can be controlled in
+   conjunction with a split connection TCP PEP by assigning different
+   priorities for different connections (or applications).  A split
+   connection PEP implementation allows the PEP in an intermediate node
+
+
+
+Border, et al.               Informational                     [Page 12]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   to delay the data delivery of a lower-priority TCP flow for an
+   unlimited period of time by simply rescheduling the order in which it
+   forwards data of different flows to the destination host behind the
+   slow link.  This does not have a negative impact on the delayed TCP
+   flow as normal TCP flow control takes care of suspending the flow
+   between the TCP sender and the PEP, when the PEP is not forwarding
+   data for the flow, and resumes it once the PEP decides to continue
+   forwarding data for the flow.  This can further be assisted, if the
+   protocol stacks on both sides of the slow link implement priority
+   based scheduling of connections.
+
+   With such a PEP implementation, along with user-controlled
+   priorities, the user can assign higher priority for selected
+   interactive connection(s) and have much shorter response time for the
+   selected connection(s), even if there are simultaneous low priority
+   bulk data transfers which in regular end-to-end operation would
+   otherwise eat the available bandwidth of the slow link almost
+   completely.  These low priority bulk data transfers would then
+   proceed nicely during the idle periods of interactive connections,
+   allowing the user to keep the slow and expensive link (e.g., wireless
+   WAN) fully utilized.
+
+   Other priority-based mechanisms may be applied on shared wireless
+   links with more than two terminals.  With shared wireless mediums
+   becoming a weak link in Internet QoS architectures, many may turn to
+   PEPs to provide extra priority levels across a shared wireless medium
+   [SHEL00].  These PEPs are distributed on all nodes of the shared
+   wireless medium.  For example, in an 802.11 WLAN this PEP is
+   implemented in the access point (base station) and each mobile host.
+   One PEP then uses distributed queuing techniques to coordinate
+   traffic classes of all nodes.  This is also sometimes called subnet
+   bandwidth management.  See [BBKT97] for an example of queuing
+   techniques which can be used to achieve this.  This technique can be
+   implemented either above or below the IP layer.  Priority treatment
+   can typically be specified either by the user or by marking the
+   (IPv4) ToS or (IPv6) Traffic Class IP header field.
+
+3.6 Protocol Booster Mechanisms
+
+   Work in [FMSBMR98] shows a range of other possible PEP mechanisms
+   called protocol boosters.  Some of these mechanisms are specific to
+   UDP flows.  For example, a PEP may apply asymmetrical methods such as
+   extra UDP error detection.  Since the 16 bit UDP checksum is
+   optional, it is typically not computed.  However, for links with
+   errors, the checksum could be beneficial.  This checksum can be added
+   to outgoing UDP packets by a PEP.
+
+
+
+
+
+Border, et al.               Informational                     [Page 13]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   Symmetrical mechanisms have also been developed.  A Forward Erasure
+   Correction (FZC) mechanism can be used with real-time and multicast
+   traffic.  The encoding PEP adds a parity packet over a block of
+   packets.  Upon reception, the parity is removed and missing data is
+   regenerated.  A jitter control mechanism can be implemented at the
+   expense of extra latency.  A sending PEP can add a timestamp to
+   outgoing packets.  The receiving PEP then delays packets in order to
+   reproduce the correct interval.
+
+4. Implications of Using PEPs
+
+   The following sections describe some of the implications of using
+   Performance Enhancing Proxies.
+
+4.1 The End-to-end Argument
+
+   As indicated in [RFC1958], the end-to-end argument [SRC84] is one of
+   the architectural principles of the Internet.  The basic argument is
+   that, as a first principle, certain required end-to-end functions can
+   only be correctly performed by the end systems themselves.  Most of
+   the potential negative implications associated with using PEPs are
+   related to the possibility of breaking the end-to-end semantics of
+   connections.  This is one of the main reasons why PEPs are not
+   recommended for general use.
+
+   As indicated in Section 2.5, not all PEP implementations break the
+   end-to-end semantics of connections.  Correctly designed PEPs do not
+   attempt to replace any application level end-to-end function, but
+   only attempt to add performance optimizations to a subpath of the
+   end-to-end path between the application endpoints.  Doing this can be
+   consistent with the end-to-end argument.  However, a user or network
+   administrator adding a PEP to his network configuration should be
+   aware of the potential end-to-end implications related to the
+   mechanisms being used by the particular PEP implementation.
+
+4.1.1 Security
+
+   In most cases, security applied above the transport layer can be used
+   with PEPs, especially transport layer PEPs.  However, today, only a
+   limited number of applications include support for the use of
+   transport (or higher) layer security.  Network (IP) layer security
+   (IPsec) [RFC2401], on the other hand, can generally be used by any
+   application, transparently to the application.
+
+
+
+
+
+
+
+
+Border, et al.               Informational                     [Page 14]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+4.1.1.1 Security Implications
+
+   The most detrimental negative implication of breaking the end-to-end
+   semantics of a connection is that it disables end-to-end use of
+   IPsec.  In general, a user or network administrator must choose
+   between using PEPs and using IPsec.  If IPsec is employed end-to-end,
+   PEPs that are implemented on intermediate nodes in the network cannot
+   examine the transport or application headers of IP packets because
+   encryption of IP packets via IPsec's ESP header (in either transport
+   or tunnel mode) renders the TCP header and payload unintelligible to
+   the PEPs.  Without being able to examine the transport or application
+   headers, a PEP may not function optimally or at all.
+
+   If a PEP implementation is non-transparent to the users and the users
+   trust the PEP in the middle, IPsec can be used separately between
+   each end system and PEP.  However, in most cases this is an
+   undesirable or unacceptable alternative as the end systems cannot
+   trust PEPs in general.  In addition, this is not as secure as end-
+   to-end security.  (For example, the traffic is exposed in the PEP
+   when it is decrypted to be processed.)  And, it can lead to
+   potentially misleading security level assumptions by the end systems.
+   If the two end systems negotiate different levels of security with
+   the PEP, the end system which negotiated the stronger level of
+   security may not be aware that a lower level of security is being
+   provided for part of the connection.  The PEP could be implemented to
+   prevent this from happening by being smart enough to force the same
+   level of security to each end system but this increases the
+   complexity of the PEP implementation (and still is not as secure as
+   end-to-end security).
+
+   With a transparent PEP implementation, it is difficult for the end
+   systems to trust the PEP because they may not be aware of its
+   existence.  Even if the user is aware of the PEP, setting up
+   acceptable security associations with the PEP while maintaining the
+   PEP's transparent nature is problematic (if not impossible).
+
+   Note that even when a PEP implementation does not break the end-to-
+   end semantics of a connection, the PEP implementation may not be able
+   to function in the presence of IPsec.  For example, it is difficult
+   to do ACK spacing if the PEP cannot reliably determine which IP
+   packets contain ACKs of interest.  In any case, the authors are
+   currently not aware of any PEP implementations, transparent or non-
+   transparent, which provide support for end-to-end IPsec, except in a
+   case where the PEPs are implemented on the end hosts.
+
+
+
+
+
+
+
+Border, et al.               Informational                     [Page 15]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+4.1.1.2 Security Implication Mitigations
+
+   There are some steps which can be taken to allow the use of IPsec and
+   PEPs to coexist.  If an end user can select the use of IPsec for some
+   traffic and not for other traffic, PEP processing can be applied to
+   the traffic sent without IPsec.  Of course, the user must then do
+   without security for this traffic or provide security for the traffic
+   via other means (for example, by using transport layer security).
+   However, even when this is possible, significant complexity may need
+   to be added to the configuration of the end system.
+
+   Another alternative is to implement IPsec between the two PEPs of a
+   distributed PEP implementation.  This at least protects the traffic
+   between the two PEPs.  (The issue of trusting the PEPs does not
+   change.)  In the case where the PEP implementation is not transparent
+   to the user, (assuming that the user trusts the PEPs,) the user can
+   configure his end system to use the PEPs as the end points of an
+   IPsec tunnel.  And, an IPsec tunnel could even potentially be used
+   between the end system and a PEP to protect traffic on this part of
+   the path.  But, all of this adds complexity.  And, it still does not
+   eliminate the risk of the traffic being exposed in the PEP itself as
+   the traffic is received from one IPsec tunnel, processed and then
+   forwarded (even if forwarded through another IPsec tunnel).
+
+4.1.1.3 Security Research Related to PEPs
+
+   There is research underway investigating the possibility of changing
+   the implementation of IPsec to be more friendly to the use of PEPs.
+   One approach being actively looked at is the use of multi-layer IP
+   security.  [Zhang00] describes a method which allows TCP headers to
+   be encrypted as one layer (with the PEPs in the path of the TCP
+   connections included in the security associations used to encrypt the
+   TCP headers) while the TCP payload is encrypted end-to-end as a
+   separate layer.  This still involves trusting the PEP, but to a much
+   lesser extent.  However, a drawback to this approach is that it adds
+   a significant amount of complexity to the IP security implementation.
+   Given the existing complexity of IPsec, this drawback is a serious
+   impediment to the standardization of the multi-layer IP security idea
+   and it is very unlikely that this approach will be adopted as a
+   standard any time soon.  Therefore, relying on this type of approach
+   will likely involve the use of non-standard protocols (and the
+   associated risk of doing so).
+
+4.1.2 Fate Sharing
+
+   Another important aspect of the end-to-end argument is fate sharing.
+   If a failure occurs in the network, the ability of the connection to
+   survive the failure depends upon how much state is being maintained
+
+
+
+Border, et al.               Informational                     [Page 16]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   on behalf of the connection in the network and whether the state is
+   self-healing.  If no connection specific state resides in the network
+   or such state is self-healing as in case of regular end-to-end
+   operation, then a failure in the network will break the connection
+   only if there is no alternate path through the network between the
+   end systems.  And, if there is no path, both end systems can detect
+   this.  However, if the connection depends upon some state being
+   stored in the network (e.g., in a PEP), then a failure in the network
+   (e.g., the node containing a PEP crashes) causes this state to be
+   lost, forcing the connection to terminate even if an alternate path
+   through the network exists.
+
+   The importance of this aspect of the end-to-end argument with respect
+   to PEPs is dependent upon both the PEP implementation and upon the
+   types of applications being used.  Sometimes coincidentally but more
+   often by design, PEPs are used in environments where there is no
+   alternate path between the end systems and, therefore, a failure of
+   the intermediate node containing a PEP would result in the
+   termination of the connection in any case.  And, even when this is
+   not the case, the risk of losing the connection in the case of
+   regular end-to-end operation may exist as the connection could break
+   for some other reason, for example, a long enough link outage of a
+   last-hop wireless link to the end host.  Therefore, users may choose
+   to accept the risk of a PEP crashing in order to take advantage of
+   the performance gains offered by the PEP implementation.  The
+   important thing is that accepting the risk should be under the
+   control of the user (i.e., the user should always have the option to
+   choose end-to-end operation) and, if the user chooses to use the PEP,
+   the user should be aware of the implications that a PEP failure has
+   with respect to the applications being used.
+
+4.1.3 End-to-end Reliability
+
+   Another aspect of the end-to-end argument is that of acknowledging
+   the receipt of data end-to-end in order to achieve reliable end-to-
+   end delivery of data.  An application aiming at reliable end-to-end
+   delivery must implement an end-to-end check and recovery at the
+   application level.  According to the end-to-end argument, this is the
+   only possibility to correctly implement reliable end-to-end
+   operation.  Otherwise the application violates the end-to-end
+   argument.  This also means that a correctly designed application can
+   never fully rely on the transport layer (e.g., TCP) or any other
+   communication subsystem to provide reliable end-to-end delivery.
+
+   First, a TCP connection may break down for some reason and result in
+   lost data that must be recovered at the application level.  Second,
+   the checksum provided by TCP may be considered inadequate, resulting
+   in undetected (by TCP) data corruption [Pax99] and requiring an
+
+
+
+Border, et al.               Informational                     [Page 17]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   application level check for data corruption.  Third, a TCP
+   acknowledgement only indicates that data was delivered to the TCP
+   implementation on the other end system.  It does not guarantee that
+   the data was delivered to the application layer on the other end
+   system.  Therefore, a well designed application must use an
+   application layer acknowledgement to ensure end-to-end delivery of
+   application layer data.  Note that this does not diminish the value
+   of a reliable transport protocol (i.e., TCP) as such a protocol
+   allows efficient implementation of several essential functions (e.g.,
+   congestion control) for an application.
+
+   If a PEP implementation acknowledges application data prematurely
+   (before the PEP receives an application ACK from the other endpoint),
+   end-to-end reliability cannot be guaranteed.  Typically, application
+   layer PEPs do not acknowledge data prematurely, i.e., the PEP does
+   not send an application ACK to the sender until it receives an
+   application ACK from the receiver.  And, transport layer PEP
+   implementations, including TCP PEPs, generally do not interfere with
+   end-to-end application layer acknowledgments as they let applications
+   operate end-to-end.  However, the user and/or network administrator
+   employing the PEP must understand how it operates in order to
+   understand the risks related to end-to-end reliability.
+
+   Some Internet applications do not necessarily operate end-to-end in
+   their regular operation, thus abandoning any end-to-end reliability
+   guarantee.  For example, Internet email delivery often operates via
+   relay Mail Transfer Agents, that is, relay Simple Mail Transfer
+   Protocol (SMTP) servers.  An originating MTA (SMTP server) sends the
+   mail message to a relay MTA that receives the mail message, stores it
+   in non-volatile storage (e.g., on disk) and then sends an application
+   level acknowledgement.  The relay MTA then takes "full
+   responsibility" for delivering the mail message to the destination
+   SMTP server (maybe via another relay MTA); it tries to forward the
+   message for a relatively long time (typically around 5 days).  This
+   scheme does not give a 100% guarantee of email delivery, but
+   reliability is considered "good enough".
+
+   An application layer PEP for this kind of an application may
+   acknowledge application data (e.g., mail message) without essentially
+   decreasing reliability, as long as the PEP operates according to the
+   same procedure as the regular proxy (e.g., relay MTA).  Again, as
+   indicated above, the user and/or network administrator employing such
+   a PEP needs to understand how it operates in order to understand the
+   reliability risks associated with doing so.
+
+
+
+
+
+
+
+Border, et al.               Informational                     [Page 18]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+4.1.4 End-to-end Failure Diagnostics
+
+   Another aspect of the end-to-end argument is the ability to support
+   end-to-end failure diagnostics when problems are encountered.  If a
+   network problem occurs which breaks a connection, the end points of
+   the connection will detect the failure via timeouts.  However, the
+   existence of a PEP in between the two end points could delay
+   (sometimes significantly) the detection of the failure by one or both
+   of the end points.  (Of course, some PEPs are intentionally designed
+   to hide these types of failures as described in Section 3.4.)  The
+   implications of delayed detection of a failed connection depend on
+   the applications being used.  Possibilities range from no impact at
+   all (or just minor annoyance to the end user) all the way up to
+   impacting mission critical business functions by delaying switchovers
+   to alternate communications paths.
+
+   In addition, tools used to debug connection failures may be affected
+   by the use of a PEP.  For example, PING (described in [RFC792] and
+   [RFC2151]) is often used to test for connectivity.  But, because PING
+   is based on ICMP instead of TCP (i.e., it is implemented using ICMP
+   Echo and Reply commands at the network layer), it is possible that
+   the configuration of the network might route PING traffic around the
+   PEP.  Thus, PING could indicate that an end-to-end path exists
+   between two hosts when it does not actually exist for TCP traffic.
+   Even when the PING traffic does go through the PEP, the diagnostics
+   indications provided by the PING traffic are altered.  For example,
+   if the PING traffic goes transparently through the PEP, PING does not
+   provide any indication that the PEP exists and since the PING traffic
+   is not being subjected to the same processing as TCP traffic, it may
+   not necessarily provide an accurate indication of the network delay
+   being experienced by TCP traffic.  On the other hand, if the PEP
+   terminates the PING and responds to it on behalf of the end host,
+   then the PING provides information only on the connectivity to the
+   PEP.  Traceroute (also described in [RFC2151]) is similarly affected
+   by the presence of the PEP.
+
+4.2 Asymmetric Routing
+
+   Deploying a PEP implementation usually requires that traffic to and
+   from the end hosts is routed through the intermediate node(s) where
+   PEPs reside.  With some networks, this cannot be accomplished, or it
+   might require that the intermediate node is located several hops away
+   from the target link edge which in turn is impractical in many cases
+   and may result in non-optimal routing.
+
+
+
+
+
+
+
+Border, et al.               Informational                     [Page 19]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   Note that this restriction does not apply to all PEP implementations.
+   For example, a PEP which is simply doing ACK spacing only needs to
+   see one direction of the traffic flow (the direction in which the
+   ACKs are flowing).  ACK spacing can be done without seeing the actual
+   flow of data.
+
+4.3 Mobile Hosts
+
+   In environments where a PEP implementation is used to serve mobile
+   hosts, additional problems may be encountered because PEP related
+   state information may need to be transferred to a new PEP node during
+   a handoff.
+
+   When a mobile host moves, it is subject to handovers.  If the
+   intermediate node and home for the serving PEP changes due to
+   handover, any state information that the PEP maintains and is
+   required for continuous operation must be transferred to the new
+   intermediate node to ensure continued operation of the connection.
+   This requires extra work and overhead and may not be possible to
+   perform fast enough, especially if the host moves frequently over
+   cell boundaries of a wireless network.  If the mobile host moves to
+   another IP network, routing to and from the mobile host may need to
+   be changed to traverse a new PEP node.
+
+   Today, mobility implications with respect to using PEPs are more
+   significant to W-LAN networks than to W-WAN networks.  Currently, a
+   W-WAN base station typically does not provide the mobile host with
+   the connection point to the wireline Internet.  (A W-WAN base station
+   may not even have an IP stack.)  Instead, the W-WAN network takes
+   care of mobility with the connection point to the wireline Internet
+   remaining unchanged while the mobile host moves.  Thus, PEP state
+   handover is not currently required in most W-WAN networks when the
+   host moves.  However, this is generally not true in W-LAN networks
+   and, even in the case of W-WAN networks, the user and/or network
+   administrator using a PEP needs to be cognizant of how the W-WAN base
+   stations and the PEP work in case W-WAN PEP state handoff becomes
+   necessary in the future.
+
+4.4 Scalability
+
+   Because a PEP typically processes packet information above the IP
+   layer, a PEP requires more processing power per packet than a router.
+   Therefore, PEPs will always be (at least) one step behind routers in
+   terms of the total throughput they can support.  (Processing above
+   the IP layer is also more difficult to implement in hardware.)  In
+   addition, since most PEP implementations require per connection
+   state, PEP memory requirements are generally significantly higher
+
+
+
+
+Border, et al.               Informational                     [Page 20]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   than with a router.  Therefore, a PEP implementation may have a limit
+   on the number of connections which it can support whereas a router
+   has no such limitation.
+
+   Increased processing power and memory requirements introduce
+   scalability issues with respect to the use of PEPs.  Placement of a
+   PEP on a high speed link or a link which supports a large number of
+   connections may require network topology changes beyond just
+   inserting the PEP into the path of the traffic.  For example, if a
+   PEP can only handle half of the traffic on a link, multiple PEPs may
+   need to be used in parallel, adding complexity to the network
+   configuration to divide the traffic between the PEPs.
+
+4.5 Other Implications of Using PEPs
+
+   This document describes some significant implications with respect to
+   using Performance Enhancing Proxies.  However, the list of
+   implications provided in this document is not necessarily exhaustive.
+   Some examples of other potential implications related to using PEPs
+   include the use of PEPs in multi-homing environments and the use of
+   PEPs with respect to Quality of Service (QoS) transparency.  For
+   example, there may be potential interaction with the priority-based
+   multiplexing mechanism described in Section 3.5 and the use of
+   differentiated services [RFC2475].  Therefore, users and network
+   administrators who wish to deploy a PEP should look not only at the
+   implications described in this document but also at the overall
+   impact (positive and negative) that the PEP will have on their
+   applications and network infrastructure, both initially and in the
+   future when new applications are added and/or changes in the network
+   infrastructure are required.
+
+5. PEP Environment Examples
+
+   The following sections describe examples of environments where PEP is
+   currently used to improve performance.  The examples are provided to
+   illustrate the use of the various PEP types and PEP mechanisms
+   described earlier in the document and to help illustrate the
+   motivation for their development and use.
+
+5.1 VSAT Environments
+
+   Today, VSAT networks are implemented with geosynchronous satellites.
+   VSAT data networks are typically implemented using a star topology.
+   A large hub earth station is located at the center of the star with
+   VSATs used at the remote sites of the network.  Data is sent from the
+   hub to the remote sites via an outroute.  Data is sent from the
+   remote sites to the hub via one or more inroutes.  VSATs represent an
+   environment with highly asymmetric links, with an outroute typically
+
+
+
+Border, et al.               Informational                     [Page 21]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   much larger than an inroute.  (Multiple inroutes can be used with
+   each outroute but any particular VSAT only has access to a single
+   inroute at a time, making the link asymmetric.)
+
+   VSAT networks are generally used to implement private networks (i.e.,
+   intranets) for enterprises (e.g., corporations) with geographically
+   dispersed sites.  VSAT networks are rarely, if ever, used to
+   implement Internet connectivity except at the edge of the Internet
+   (i.e., as the last hop).  Connection to the Internet for the VSAT
+   network is usually implemented at the VSAT network hub site using
+   appropriate firewall and (when necessary) NAT [RFC2663] devices.
+
+5.1.1 VSAT Network Characteristics
+
+   With respect to TCP performance, VSAT networks exhibit the following
+   subset of the satellite characteristics documented in [RFC2488]:
+
+   Long feedback loops
+
+      Propagation delay from a sender to a receiver in a geosynchronous
+      satellite network can range from 240 to 280 milliseconds,
+      depending on where the sending and receiving sites are in the
+      satellite footprint.  This makes the round trip time just due to
+      propagation delay at least 480 milliseconds.  Queueing delay and
+      delay due to shared channel access methods can sometimes increase
+      the total delay up to on the order of a few seconds.
+
+   Large bandwidth*delay products
+
+      VSAT networks can support capacity ranging from a few kilobits per
+      second up to multiple megabits per second.  When combined with the
+      relatively long round trip time, TCP needs to keep a large number
+      of packets "in flight" in order to fully utilize the satellite
+      link.
+
+   Asymmetric capacity
+
+      As indicated above, the outroute of a VSAT network is usually
+      significantly larger than an inroute.  Even though multiple
+      inroutes can be used within a network, a given VSAT can only
+      access one inroute at a time.  Therefore, the incoming (outroute)
+      and outgoing (inroute) capacity for a VSAT is often very
+      asymmetric.  As outroute capacity has increased in recent years,
+      ratios of 400 to 1 or greater are becoming more and more common.
+      With a TCP maximum segment size of 1460 bytes and delayed
+      acknowledgments [RFC1122] in use, the ratio of IP packet bytes for
+      data to IP packet bytes for ACKs is only (3000 to 40) 75 to 1.
+
+
+
+
+Border, et al.               Informational                     [Page 22]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+      Thus, inroute capacity for carrying ACKs can have a significant
+      impact on TCP performance.  (The issue of asymmetric link impact
+      on TCP performance is described in more detail in [BPK97].)
+
+   With respect to the other satellite characteristics listed in
+   [RFC2488], VSAT networks typically do not suffer from intermittent
+   connectivity or variable round trip times.  Also, VSAT networks
+   generally include a significant amount of error correction coding.
+   This makes the bit error rate very low during clear sky conditions,
+   approaching the bit error rate of a typical terrestrial network.  In
+   severe weather, the bit error rate may increase significantly but
+   such conditions are rare (when looked at from an overall network
+   availability point of view) and VSAT networks are generally
+   engineered to work during these conditions but not to optimize
+   performance during these conditions.
+
+5.1.2 VSAT Network PEP Implementations
+
+   Performance Enhancing Proxies implemented for VSAT networks generally
+   focus on improving throughput (for applications such as FTP and HTTP
+   web page retrievals).  To a lesser degree, PEP implementations also
+   work to improve interactive response time for small transactions.
+
+   There is not a dominant PEP implementation used with VSAT networks.
+   Each VSAT network vendor tends to implement their own version of PEP
+   functionality, integrated with the other features of their VSAT
+   product.  [HNS] and [SPACENET] describe VSAT products with integrated
+   PEP capabilities.  There are also third party PEP implementations
+   designed to be used with VSAT networks.  These products run on nodes
+   external to the VSAT network at the hub and remote sites.  NettGain
+   [FLASH] and Venturi [FOURELLE] are examples of such products.  VSAT
+   network PEP implementations generally share the following
+   characteristics:
+
+      - They focus on improving TCP performance;
+
+      - They use an asymmetric distributed implementation;
+
+      - They use a split connection approach with local acknowledgments
+        and local retransmissions;
+
+      - They support some form of compression to reduce the amount of
+        bandwidth required (with emphasis on saving inroute bandwidth).
+
+   The key differentiators between VSAT network PEP implementations are:
+
+      - The maximum throughput they attempt to support (mainly a
+        function of the amount of buffer space they use);
+
+
+
+Border, et al.               Informational                     [Page 23]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+      - The protocol used over the satellite link.  Some implementations
+        use a modified version of TCP while others use a proprietary
+        protocol running on top of UDP;
+
+      - The type of compression used.  Third party VSAT network PEP
+        implementations generally focus on application (e.g., HTTP)
+        specific compression algorithms while PEP implementations
+        integrated into the VSAT network generally focus on link
+        specific compression.
+
+   PEP implementations integrated into a VSAT product are generally
+   transparent to the end systems.  Third party PEP implementations used
+   with VSAT networks usually require configuration changes in the
+   remote site end systems to route TCP packets to the remote site
+   proxies but do not require changes to the hub site end systems.  In
+   some cases, the PEP implementation is actually integrated
+   transparently into the end system node itself, using a "bump in the
+   stack" approach.  In all cases, the use of a PEP is non-transparent
+   to the user, i.e., the user is aware when a PEP implementation is
+   being used to boost performance.
+
+5.1.3 VSAT Network PEP Motivation
+
+   VSAT networks, since the early stages of their deployment, have
+   supported the use of local termination of a protocol (e.g., SDLC and
+   X.25) on each side of the satellite link to hide the satellite link
+   from the applications using the protocol.  Therefore, when LAN
+   capabilities were added to VSAT networks, VSAT customers expected
+   and, in fact, demanded, the use of similar techniques for improving
+   the performance of IP based traffic, in particular TCP traffic.
+
+   As indicated in Section 5.1, VSAT networks are primarily used to
+   implement intranets with Internet connectivity limited to and closely
+   controlled at the hub site of the VSAT network.  Therefore, VSAT
+   customers are not as affected (or at least perceive that they are not
+   as affected) by the Internet related implications of using PEPs as
+   are other technologies.  Instead, what is more important to VSAT
+   customers is the optimization of the network.  And, VSAT customers,
+   in general, prefer that the optimization of the network be done by
+   the network itself rather than by implementing changes (such as
+   enabling the TCP scaled window option) to their own equipment.  VSAT
+   customers prefer to optimize their end system configuration for local
+   communications related to their local mission critical functions and
+   let the VSAT network hide the presence of the satellite link as much
+   as possible.  VSAT network vendors have also been able to use PEP
+   functionality to provide value added "services" to their customers
+   such as extending the useful of life of older equipment which
+   includes older, "non-modern" TCP stacks.
+
+
+
+Border, et al.               Informational                     [Page 24]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   Of course, as the line between intranets and the Internet continues
+   to fade, the implications of using PEPs start to become more
+   significant for VSAT networks.  For example, twelve years ago
+   security was not a major concern because the equipment cost related
+   to being able to intercept VSAT traffic was relatively high.  Now, as
+   technology has advanced, the cost is much less prohibitive.
+   Therefore, because the use of PEP functionality in VSAT networks
+   prevents the use of IPsec, customers must rely on the use of higher
+   layer security mechanisms such as TLS or on proprietary security
+   mechanisms implemented in the VSAT networks themselves (since
+   currently many applications are incapable of making (or simply don't
+   make) use of the standardized higher layer security mechanisms).
+   This, in turn, affects the cost of the VSAT network as well as
+   affects the ability of the customers to make use of Internet based
+   capabilities.
+
+5.2 W-WAN Environments
+
+   In mobile wireless WAN (W-WAN) environments the wireless link is
+   typically used as the last-hop link to the end user.  W-WANs include
+   such networks as GSM [GSM], GPRS [GPRS],[BW97], CDPD [CDPD], IS-95
+   [CDMA], RichoNet, and PHS.  Many of these networks, but not all, have
+   been designed to provide mobile telephone voice service in the first
+   place but include data services as well or they evolve from a mobile
+   telephone network.
+
+5.2.1 W-WAN Network Characteristics
+
+   W-WAN links typically exhibit some combination of the following link
+   characteristics:
+
+      -  low bandwidth (with some links the available bandwidth might be
+         as low as a few hundred bits/sec)
+
+      -  high latency (minimum round-trip delay close to one second is
+         not exceptional)
+
+      -  high BER resulting in frame or packet losses, or long variable
+         delays due to local link-layer error recovery
+
+      -  some W-WAN links have a lot of internal buffer space which tend
+         to accumulate data, thus resulting in increased round-trip
+         delay due to long (and variable) queuing delays
+
+      -  on some W-WAN links the users may share common channels for
+         their data packet delivery which, in turn, may cause unexpected
+         delays to the packet delivery of a user due to simultaneous use
+         of the same channel resources by the other users
+
+
+
+Border, et al.               Informational                     [Page 25]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+      -  unexpected link disconnections (or intermittent link outages)
+         may occur frequently and the period of disconnection may last a
+         very long time
+
+      -  (re)setting the link-connection up may take a long time
+         (several tens of seconds or even minutes)
+
+      -  the W-WAN network typically takes care of terminal mobility:
+         the connection point to the Internet is retained while the user
+         moves with the mobile host
+
+      -  the use of most W-WAN links is expensive.  Many of the service
+         providers apply time-based charging.
+
+5.2.2 W-WAN PEP Implementations
+
+   Performance Enhancing Proxies implemented for W-WAN environments
+   generally focus on improving the interactive response time but at the
+   same time aim at improving throughput, mainly by reducing the
+   transfer volume over the inherently slow link in various ways.  To
+   achieve this, typically enhancements are applied at almost all
+   protocol layers.
+
+5.2.2.1 Mowgli System
+
+   The Mowgli system [KRA94] is one of the early approaches to address
+   the challenges induced by the problematic characteristics of low
+   bandwidth W-WAN links.
+
+   The indirect approach used in Mowgli is not limited to a single layer
+   as in many other split connection approaches, but it involves all
+   protocol layers.  The basic architecture is based on split TCP (UDP
+   is also supported) together with full support for application layer
+   proxies with a distributed PEP approach.  An application layer proxy
+   pair may be added between a client and server, the agent (local
+   proxy) on a mobile host and the proxy on an intermediate node that
+   provides the mobile host with the connection to the wireline
+   Internet.  Such a pair may be either explicit or fully transparent to
+   the applications, but it is, at all times, under end-user control
+   thus allowing the user to select the traffic that traverses through
+   the PEP implementation and choose end-to-end IP for other traffic.
+
+   In order to allow running legacy applications unmodified and without
+   recompilation, the socket layer implementation on the mobile host is
+   slightly modified to connect the applications, which are configured
+   to traverse through the PEP, to a local agent while retaining the
+   original TCP/IP socket semantics.  Two types of application layer
+   agent-proxy pairs can be configured for mobile host application use.
+
+
+
+Border, et al.               Informational                     [Page 26]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   A generic pair can be used with any application and it simply
+   provides split transport service with some optional generic
+   enhancements like compression.  An application-specific pair can be
+   retailed for any application or a group of applications that are able
+   to take leverage on the same kind of enhancements.  A good example of
+   enhancements achieved with an application-specific proxy pair is the
+   Mowgli WWW system that improves significantly the user perceived
+   response time of Web browsing mainly by reducing the transfer volume
+   and the number of round trips over the wireless link [LAKLR95],
+   [LHKR96].
+
+   Mowgli provides also an option to replace the TCP/IP core protocols
+   on the last-hop link with a custom protocol that is tuned for low-
+   bandwidth W-WAN links [KRLKA97].  This protocol was designed to
+   provide the same transport service with similar semantics as regular
+   TCP and UDP provide, but use a different protocol implementation that
+   can freely apply any appropriate protocol mechanisms without being
+   constrained by the current TCP/IP packet format or protocol
+   operation.  As this protocol is required to operate over a single
+   logical link only, it could partially combine the protocol control
+   information and protocol operation of the link, network, and
+   transport layers.  In addition, the protocol can operate on top of
+   various link services, for example on top of different raw link
+   services, on top of PPP, on top of IP, or even on top of a single TCP
+   connection using it as a link service and implementing "TCP
+   multiplexing" over it.  In all other cases, except when the protocol
+   is configured to operate on top of raw (wireless) link service, IP
+   may co-exist with the custom protocol allowing simultaneous end-to-
+   end IP delivery for the traffic not traversing through the PEP
+   implementation.
+
+   Furthermore, the custom protocol can be run in different operation
+   modes which turn on or off certain protocol functions depending on
+   the underlying link service.  For example, if the underlying link
+   service provides reliable data delivery, the checksum and the
+   window-based error recovery can be turned off, thus reducing the
+   protocol overhead; only a very simple recovery mechanism is needed to
+   allow recovery from an unexpected link disconnection.  Therefore, the
+   protocol design was able to use extremely efficient header encoding
+   (only 1-3 bytes per packet in a typical case), reduce the number of
+   round trips significantly, and various features that are useful with
+   low-bandwidth W-WAN links were easy to add.  Such features include
+   suspending the protocol operation over the periods of link
+   disconnection or link outage together with fast start once the link
+   becomes operational again, priority-based multiplexing of user data
+   over the W-WAN link thus offering link capacity to interactive
+
+
+
+
+
+Border, et al.               Informational                     [Page 27]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   applications in a timely manner even in presence of bandwidth-
+   intensive background transfers, and link-level flow control to
+   prevent data from accumulating into the W-WAN link internal buffers.
+
+   If desired, regular TCP/IP transport, possibly with corresponding
+   protocol modifications in TCP (and UDP) that would tune it more
+   suitable for W-WAN links, can be employed on the last-hop link.
+
+5.2.2.2 Wireless Application Protocol (WAP)
+
+   The Mowgli system was designed to support mobile hosts that are
+   attached to the Internet over constrained links, but did not address
+   the specific challenges with low-end mobile devices.  Many mobile
+   wireless devices are power, memory, and processing constrained, and
+   the communication links to these devices have lower bandwidth and
+   less stable connections.  These limitations led designers to develop
+   the Wireless Application Protocol (WAP) that specifies an application
+   framework and network protocols intended to work across differing
+   narrowband wireless network technologies bringing Internet content
+   and advanced data services to low-end digital cellular phones and
+   other mobile wireless terminals, such as pagers and PDAs.
+
+   The WAP model consists of a WAP client (mobile terminal), a WAP
+   proxy, and an origin server.  It requires a WAP proxy between the WAP
+   client and the server on the Internet.  WAP uses a layered, scalable
+   architecture [WAPARCH], specifying the following five protocol layers
+   to be used between the terminal and the proxy: Application Layer
+   (WAE) [WAPWAE], Session Layer (WSP) [WAPWSP], Transaction Layer (WTP)
+   [WAPWTP], Security Layer (WTLS) [WAPWTLS], and Transport Layer (WDP)
+   [WAPWDP].  Standard Internet protocols are used between the proxy and
+   the origin server.  If the origin server includes WAP proxy
+   functionality, it is called a WAP Server.
+
+   In a typical scenario, a WAP client sends an encoded WAP request to a
+   WAP proxy.  The WAP proxy translates the WAP request into a WWW
+   (HTTP) request, performing the required protocol conversions, and
+   submits this request to a standard web server on the Internet.  After
+   the web server responds to the WAP proxy, the response is encoded
+   into a more compact binary format to decrease the size of the data
+   over the air.  This encoded response is forwarded to the WAP client
+   [WAPPROXY].
+
+   WAP operates over a variety of bearer datagram services.  When
+   communicating over these bearer services, the WAP transport layer
+   (WDP) is always used between the WAP client and WAP proxy and it
+   provides port addressed datagram service to the higher WAP layers.
+   If the bearer service supports IP (e.g., GSM-CSD, GSM-GPRS, IS-136,
+   CDPD), UDP is used as the datagram protocol.  However, if the bearer
+
+
+
+Border, et al.               Informational                     [Page 28]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   service does not support IP (e.g., GSM-SMS, GSM-USSD, GSM Cell
+   Broadcast, CDMS-SMS, TETRA-SDS), WDP implements the required datagram
+   protocol as an adaptation layer between the bearer network and the
+   protocol stack.
+
+   The use of the other layers depends on the port number.  WAP has
+   registered a set of well-known ports with IANA.  The port number
+   selected by the application for communication between a WAP client
+   and proxy defines the other layers to be used at each end.  The
+   security layer, WTLS, provides privacy, data integrity and
+   authentication.  Its functionality is similar to TLS 1.0 [RFC2246]
+   extended with datagram support, optimized handshake and dynamic key
+   refreshing.  If the origin server includes WAP proxy functionality,
+   it might be used to facilitate the end-to-end security solutions,
+   otherwise it provides security between the mobile terminal and the
+   proxy.
+
+   The transaction layer, WTP, is message based without connection
+   establishment and tear down.  It supports three types of transaction
+   classes: an unconfirmed request (unidirectional), a reliable
+   (confirmed) request (unidirectional), and a reliable (confirmed)
+   request-reply transaction.  Data is carried in the first packet and
+   3-way handshake is eliminated to reduce latencies.  In addition
+   acknowledgments, retransmission, and flow control are provided.  It
+   allows more than one outstanding transaction at a time.  It handles
+   the bearer dependence of a transfer, e.g., selects timeout values and
+   packet sizes according to the bearer.  Unfortunately, WTP uses fixed
+   retransmission timers and does not include congestion control, which
+   is a potential problem area as the use of WAP increases [RFC3002].
+
+   The session layer, WSP, supports binary encoded HTTP 1.1 with some
+   extensions such as long living session with suspend/resume facility
+   and state handling, header caching, and push facility.  On top of the
+   architecture is the application environment (WAE).
+
+5.2.3 W-WAN PEP Motivation
+
+   As indicated in Section 5.2.1, W-WAN networks typically offer very
+   low bandwidth connections with high latency and relatively frequent
+   periods of link disconnection and they usually are expensive to use.
+   Therefore, the transfer volume and extra round-trips, such as those
+   associated with TCP connection setup and teardown, must be reduced
+   and the slow W-WAN link should be efficiently shielded from excess
+   traffic and global (wired) Internet congestion to make Internet
+   access usable and economical.  Furthermore, interactive traffic must
+   be transmitted in a timely manner even if there are other
+   simultaneous bandwidth intensive (background) transfers and during
+   the periods with connectivity the link must be kept fully utilized
+
+
+
+Border, et al.               Informational                     [Page 29]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   due to expensive use.  In addition, the (long) periods of link
+   disconnection must not abort active (bulk data) transfers, if an
+   end-user so desires.
+
+   As (all) applications cannot be made mobility/W-WAN aware in short
+   time frame or maybe ever, support for mobile W-WAN use should be
+   implemented in a way which allows most applications, at least those
+   running on fixed Internet hosts, to continue their operation
+   unmodified.
+
+5.3 W-LAN Environments
+
+   Wireless LANs (W-LAN) are typically organized in a cellular topology
+   where an access point with a W-LAN transceiver controls a single
+   cell.  A cell is defined in terms of the coverage area of the base
+   station.  The access points are directly connected to the wired
+   network.  The access point in each of the cells is responsible for
+   forwarding packets to and from the hosts located in the cell.  Often
+   the hosts with W-LAN transceivers are mobile.  When such a mobile
+   host moves from one cell to another cell, the responsibility for
+   forwarding packets between the wired network and the mobile host must
+   be transferred to the access point of the new cell.  This is known as
+   a handoff.  Many W-LAN systems also support an operation mode
+   enabling ad-hoc networking.  In this mode access points are not
+   necessarily needed, but hosts with W-LAN transceiver can communicate
+   directly with the other hosts within the transceiver's transmission
+   range.
+
+5.3.1 W-LAN Network Characteristics
+
+   Current wireless LANs typically provide link bandwidth from 1 Mbps to
+   11 Mbps.  In the future, wide deployment of higher bandwidths up to
+   54 Mbps or even higher can be expected.  The round-trip delay with
+   wireless LANs is on the order of a few milliseconds or tens of
+   milliseconds.  Examples of W-LANs include IEEE 802.11, HomeRF, and
+   Hiperlan.  Wireless personal area networks (WPAN) such as Bluethooth
+   can use the same PEP techniques.
+
+   Wireless LANs are error-prone due to bit errors, collisions and link
+   outages.  In addition, consecutive packet losses may also occur
+   during handoffs.  Most W-LAN MAC protocols perform low level
+   retransmissions.  This feature shields upper layers from most losses.
+   However, unavoidable losses, retransmission latency and link outages
+   still affect upper layers.  TCP performance over W-LANs or a network
+   path involving a W-LAN link is likely to suffer from these effects.
+
+
+
+
+
+
+Border, et al.               Informational                     [Page 30]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   As TCP wrongly interprets these packet losses to be network
+   congestion, the TCP sender reduces its congestion window and is often
+   forced to timeout in order to recover from the consecutive losses.
+   The result is often unacceptably poor end-to-end performance.
+
+5.3.2 W-LAN PEP Implementations: Snoop
+
+   Berkeley's Snoop protocol [SNOOP] is a TCP-specific approach in which
+   a TCP-aware module, a Snoop agent, is deployed at the W-LAN base
+   station that acts as the last-hop router to the mobile host.  Snoop
+   aims at retaining the TCP end-to-end semantics.  The Snoop agent
+   monitors every packet that passes through the base station in either
+   direction and maintains soft state for each TCP connection.  The
+   Snoop agent is an asymmetric PEP implementation as it operates
+   differently on TCP data and ACK channels as well as on the uplink
+   (from the mobile host) and downlink (to the mobile host) TCP
+   segments.
+
+   For a data transfer to a mobile host, the Snoop agent caches
+   unacknowledged TCP data segments which it forwards to the TCP
+   receiver and monitors the corresponding ACKs.  It does two things:
+
+   1. Retransmits any lost data segments locally by using local timers
+      and TCP duplicate ACKs to identify packet loss, instead of waiting
+      for the TCP sender to do so end-to-end.
+
+   2. Suppresses the duplicate ACKs on their way from the mobile host
+      back to the sender, thus avoiding fast retransmit and congestion
+      avoidance at the latter.
+
+   Suppressing the duplicate ACKs is required to avoid unnecessary fast
+   retransmits by the TCP sender as the Snoop agent retransmits a packet
+   locally.  Consider a system that employs the Snoop agent and a TCP
+   sender S that sends packets to receiver R via a base station BS.
+   Assume that S sends packets A, B, C, D, E (in that order) which are
+   forwarded by BS to the wireless receiver R.  Assume the first
+   transmission of packet B is lost due to errors on the wireless link.
+   In this case, R receives packets A, C, D, E and B (in that order).
+   Receipt of packets C, D and E trigger duplicate ACKs.  When S
+   receives three duplicate ACKs, it triggers fast retransmit (which
+   results in a retransmission, as well as reduction of the congestion
+   window).  The Snoop agent also retransmits B locally, when it
+   receives three duplicate ACKs.  The fast retransmit at S occurs
+   despite the local retransmit on the wireless link, degrading
+   throughput.  Snoop deals with this problem by dropping TCP duplicate
+   ACKs appropriately at BS.
+
+
+
+
+
+Border, et al.               Informational                     [Page 31]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   For a data transfer from a mobile host, the Snoop agent detects the
+   packet losses on the wireless link by monitoring the data segments it
+   forwards.  It then employs either Negative Acknowledgements (NAK)
+   locally or Explicit Loss Notifications (ELN) to inform the mobile
+   sender that the packet loss was not related to congestion, thus
+   allowing the sender to retransmit without triggering normal
+   congestion control procedures.  To implement this, changes at the
+   mobile host are required.
+
+   When a Snoop agent uses NAKs to inform the TCP sender of the packet
+   losses on the wireless link, one possibility to implement them is
+   using the Selective Acknowledgment (SACK) option of TCP [RFC2018].
+   This requires enabling SACK processing at the mobile host.  The Snoop
+   agent sends a TCP SACK, when it detects a hole in the transmission
+   sequence from the mobile host or when it has not received any new
+   packets from the mobile host for a certain time period.  This
+   approach relies on the advisory nature of the SACKs: the mobile
+   sender is advised to retransmit the missing segments indicated by
+   SACK, but it must not assume successful end-to-end delivery of the
+   segments acknowledged with SACK as these segments might get lost
+   later in the path to the receiver.  Instead, the sender must wait for
+   a cumulative ACK to arrive.
+
+   When the ELN mechanism is used to inform the mobile sender of the
+   packet losses, Snoop uses one of the 'unreserved' bits in the TCP
+   header for ELN [SNOOPELN].  The Snoop agent keeps track of the holes
+   that correspond to segments lost over the wireless link.  When a
+   (duplicate) ACK corresponding to a hole in the sequence space arrives
+   from the TCP receiver, the Snoop agent sets the ELN bit on the ACK to
+   indicate that the loss is unrelated to congestion and then forwards
+   the ACK to the TCP sender.  When the sender receives a certain number
+   of (duplicate) ACKs with ELN (a configurable variable at the mobile
+   host, e.g., two), it retransmit the missing segment without
+   performing any congestion control measures.
+
+   The ELN mechanism using one of the six bits reserved for future use
+   in the TCP header is dangerous as it exercises checks that might not
+   be correctly implemented in TCP stacks, and may expose bugs.
+
+   A scheme such as Snoop is needed only if the possibility of a fast
+   retransmit due to wireless errors is non-negligible.  In particular,
+   if the wireless link uses link-layer recovery for lost data, then
+   this scheme is not beneficial.  Also, if the TCP window tends to stay
+   smaller than four segments, for example, due to congestion related
+   losses on the wired network, the probability that the Snoop agent
+   will have an opportunity to locally retransmit a lost packet is
+   small.  This is because at least three duplicate ACKs are needed to
+   trigger the local retransmission, but due to small window the Snoop
+
+
+
+Border, et al.               Informational                     [Page 32]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   agent may not be able to forward three new packets after the lost
+   packet and thus induce the required three duplicate ACKs.
+   Conversely, when the TCP window is large enough, Snoop can provide
+   significant performance improvement (compared with standard TCP).
+
+   In order to alleviate the problem with small TCP windows, Snoop
+   proposes a solution in which a TCP sender is allowed to transmit a
+   new data segment for each duplicate ACK it receives as long as the
+   number of duplicate ACKs is less than the threshold for TCP fast
+   retransmission (three duplicate ACKs).  If the new segment reaches
+   the receiver, it will generate another duplicate ACK which, in turn,
+   allows the sender to transmit yet another data segment.  This
+   continues until enough duplicate ACKs have accumulated to trigger TCP
+   fast retransmission.  This proposal is the same as the "Limited
+   Transfer" proposal [RFC3042] that has recently been forwarded to the
+   standards track.  However, to be able to benefit from this solution,
+   it needs to be deployed on TCP senders and therefore it is not ready
+   for use in a short time frame.
+
+   Snoop requires the intermediate node (base station) to examine and
+   operate on the traffic between the mobile host and the other end host
+   on the wired Internet.  Hence, Snoop does not work if the IP traffic
+   is encrypted.  Possible solutions involve:
+
+   - making the Snoop agent a party to the security association
+     between the client and the server;
+
+   - IPsec tunneling mode, terminated at the Snooping base station.
+
+   However, these techniques require that users trust base stations.
+
+   Snoop also requires that both the data and the corresponding ACKs
+   traverse the same base station.  Furthermore, the Snoop agent may
+   duplicate efforts by the link layer as it retransmits the TCP data
+   segments "at the transport layer" across  the wireless link.  (Snoop
+   has been described by its designers as a TCP-aware link layer.  This
+   is the right approach: the link and network layers can be much more
+   aware of each other than strict layering suggests.)
+
+5.3.3 W-LAN PEP Motivation
+
+   Wireless LANs suffer from an error prone wireless channel.  Errors
+   can typically be considered bursty and channel conditions may change
+   rapidly from mobility and environmental changes.  Packets are dropped
+   from bit errors or during handovers.  Periods of link outage can also
+   be experienced.  Although the typical MAC performs retransmissions,
+   dropped packets, outages and retransmission latency still can have
+   serious performance implications for IP performance, especially TCP.
+
+
+
+Border, et al.               Informational                     [Page 33]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   PEPs can be used to alleviate problems caused by packet losses,
+   protect TCP from link outages, and to add priority multiplexing.
+   Techniques such as Snoop are integrally implemented in access points,
+   while priority and compression schemes are distributed across the W-
+   LAN.
+
+6. Security Considerations
+
+   The use of Performance Enhancing Proxies introduces several issues
+   which impact security.  First, (as described in detail in Section
+   4.1.1,) using PEPs and using IPsec is generally mutually exclusive.
+   Unless the PEP is also both capable and trusted to be the endpoint of
+   an IPsec tunnel (and the use of an IPsec tunnel is deemed good enough
+   security for the applicable threat model), a user or network
+   administrator must choose between improved performance and network
+   layer security.  In some cases, transport (or higher) layer security
+   can be used in conjunction with a PEP to mitigate the impact of not
+   having network layer security.  But, support by applications for the
+   use of transport (or higher) layer security is far from ubiquitous.
+
+   Additionally, the PEP itself needs to be protected from attack.
+   First, even when IPsec tunnels are used with the PEP, the PEP
+   represents a point in the network where traffic is exposed.  And, the
+   placement of a PEP in the network makes it an ideal platform from
+   which to launch a denial of service or man in the middle attack.
+   (Also, taking the PEP out of action is a potential denial of service
+   attack itself.)  Therefore, the PEP must be protected (e.g., by a
+   firewall) or must protect itself from improper access by an attacker
+   just like any other device which resides in a network.
+
+7. IANA Considerations
+
+   This document is an informational overview document and, as such,
+   does not introduce new nor modify existing name or number spaces
+   managed by IANA.
+
+8. Acknowledgements
+
+   This document grew out of the Internet-Draft "TCP Performance
+   Enhancing Proxy Terminology", RFC 2757 "Long Thin Networks", and work
+   done in the IETF TCPSAT working group.  The authors are indebted to
+   the active members of the PILC working group.  In particular, Joe
+   Touch and Mark Allman gave us invaluable feedback on various aspects
+   of the document and Magdolna Gerendai provided us with essential help
+   on the WAP example.
+
+
+
+
+
+
+Border, et al.               Informational                     [Page 34]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+9. References
+
+   [BBKT97]    P. Bhagwat, P. Bhattacharya, A. Krishma, S.K. Tripathi,
+               "Using channel state dependent packet scheduling to
+               improve TCP throughput over wireless LANs," ACM Wireless
+               Networks, March 1997, pp. 91 - 102.  Available at:
+               http://www.acm.org/pubs
+               /articles/journals/wireless/1997-3-1/p91-bhagwat/p91-
+               bhagwat.pdf
+
+   [BPK97]     H. Balakrishnan, V.N. Padmanabhan, R.H. Katz, "The
+               Effects of Asymmetry on TCP Performance," Proc. ACM/IEEE
+               Mobicom, Budapest, Hungary, September 1997.
+
+   [BW97]      G. Brasche, B. Walke, "Concepts, Services, and Protocols
+               of the New GSM Phase 2+ general Packet Radio Service,"
+               IEEE Communications Magazine, Vol. 35, No. 8, August
+               1997.
+
+   [CDMA]      Electronic Industry Alliance (EIA)/Telecommunications
+               Industry Association (TIA), IS-95: Mobile Station-Base
+               Station Compatibility Standard for Dual-Mode Wideband
+               Spread Spectrum Cellular System, 1993.
+
+   [CDPD]      Wireless Data Forum, CDPD System Specification, Release
+               1.1, 1995.
+
+   [CTC+97]    H. Chang, C. Tait, N. Cohen, M. Shapiro, S. Mastrianni,
+               R. Floyd, B. Housel, D. Lindquist, "Web Browsing in a
+               Wireless Environment: Disconnected and Asynchronous
+               Operation in ARTour Web Express," Proc. MobiCom'97,
+               Budapest, Hungary, September 1997.
+
+   [FMSBMR98]  D.C. Feldmeier, A.J. McAuley, J.M. Smith, D.S. Bakin,
+               W.S. Marcus, T.M. Raleigh, "Protocol Boosters," IEEE
+               Journal on Selected Areas of Communication, Vol. 16, No.
+               3, April 1998.
+
+   [FLASH]     Flash Networks Ltd., performance boosting products
+               technology vendor based in Holmdel, New Jersey.  Website
+               at http://www.flashnetworks.com.
+
+   [FOURELLE]  Fourelle Systems, performance boosting products
+               technology vendor based in Santa Clara, California.
+               Website at http://www.fourelle.com.
+
+   [GPRS]      ETSI, "General Packet Radio Service (GPRS): Service
+               Description, Stage 2," GSM03.60, v.6.1.1, August 1998.
+
+
+
+Border, et al.               Informational                     [Page 35]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   [GSM]       M. Rahnema, "Overview of the GSM system and protocol
+               architecture," IEEE Communications Magazine, Vol. 31, No.
+               4, pp. 92-100, April 1993.
+
+   [HNS]       Hughes Network Systems, Inc., VSAT technology vendor
+               based in Germantown, Maryland.  Website at
+               http://www.hns.com.
+
+   [I-TCP]     A. Bakre, B.R. Badrinath, "I-TCP: Indirect TCP for Mobile
+               Hosts," Proc. 15th International Conference on
+               Distributed Computing Systems (ICDCS), May 1995.
+
+   [KRA94]     M. Kojo, K. Raatikainen, T. Alanko, "Connecting Mobile
+               Workstations to the Internet over a Digital Cellular
+               Telephone Network," Proc. Workshop on Mobile and Wireless
+               Information Systems (MOBIDATA), Rutgers University, NJ,
+               November 1994.  Revised version published in Mobile
+               Computing, pp. 253-270, Kluwer, 1996.
+
+   [KRLKA97]   M. Kojo, K. Raatikainen, M. Liljeberg, J. Kiiskinen, T.
+               Alanko, "An Efficient Transport Service for Slow Wireless
+               Telephone Links," IEEE Journal on Selected Areas of
+               Communication, Vol. 15, No. 7, September 1997.
+
+   [LAKLR95]   M. Liljeberg, T. Alanko, M. Kojo, H. Laamanen, K.
+               Raatikainen, "Optimizing World-Wide Web for Weakly-
+               Connected Mobile Workstations: An Indirect Approach,"
+               Proc. of the 2nd Int. Workshop on Services in Distributed
+               and Networked Environments, Whistler, Canada, pp. 132-
+               139, June 1995.
+
+   [LHKR96]    M. Liljeberg, H. Helin, M. Kojo, K. Raatikainen, "Mowgli
+               WWW Software: Improved Usability of WWW in Mobile WAN
+               Environments," Proc. IEEE Global Internet 1996
+               Conference, London, UK, November 1996.
+
+   [M-TCP]     K. Brown, S. Singh, "M-TCP: TCP for Mobile Cellular
+               Networks," ACM Computer Communications Review Volume
+               27(5), 1997.  Available at
+               ftp://ftp.ece.orst.edu/pub/singh/papers/mtcp.ps.gz.
+
+   [Pax99]     V. Paxson, "End-to-End Internet Packet Dynamics,"
+               IEEE/ACM Transactions on Networking, Vol. 7, No. 3, 1999,
+               pp. 277-292.
+
+   [PILCWEB]   http://pilc.grc.nasa.gov.
+
+
+
+
+
+Border, et al.               Informational                     [Page 36]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   [RFC0792]   Postel, J., "Internet Control Message Protocol", STD 5,
+               RFC 792, September 1981.
+
+   [RFC0793]   Postel, J., "Transmission Control Protocol", STD 7, RFC
+               793, September 1981.
+
+   [RFC1122]   Braden, R., "Requirements for Internet Hosts --
+               Communications Layers", STD 3, RFC 1122, October 1989.
+
+   [RFC1144]   Jacobson, V., "Compressing TCP/IP Headers for Low-Speed
+               Serial Links", RFC 1144, February 1990.
+
+   [RFC1323]   Jacobson, V., Braden, R. and D. Borman, "TCP Extensions
+               for High Performance", RFC 1323, May 1992.
+
+   [RFC1958]   Carpenter, B., "Architectural Principles of the
+               Internet", RFC 1958, June 1996.
+
+   [RFC2018]   Mathis, M., Mahdavi, J., Floyd, S. and A. Romanow, "TCP
+               Selective Acknowledgment Options", RFC 2018, October
+               1996.
+
+   [RFC2151]   Kessler, G. and S. Shepard, "A Primer On Internet and
+               TCP/IP Tools and Utilities", FYI 30, RFC 2151, June 1997.
+
+   [RFC2246]   Dierk, T. and E. Allen, "TLS Protocol Version 1," RFC
+               2246, January 1999.
+
+   [RFC2393]   Shacham, A., Monsour, R., Pereira, R. and M. Thomas, "IP
+               Payload Compression Protocol (IPcomp)", RFC 2393,
+               December 1998.
+
+   [RFC2401]   Kent, S., and R. Atkinson, "Security Architecture for the
+               Internet Protocol", RFC 2401, November 1998.
+
+   [RFC2475]   Blake, S., Black, D., Carlson, M., Davies, E., Wang, Z.
+               and W. Weiss, "An Architecture for Differentiated
+               Services", RFC 2475, December 1998.
+
+   [RFC2488]   Allman, M., Glover, D. and L. Sanchez, "Enhancing TCP
+               Over Satellite Channels using Standard Mechanisms", BCP
+               28, RFC 2488, January 1999.
+
+   [RFC2507]   Degermark, M., Nordgren, B. and S. Pink, "IP Header
+               Compression", RFC 2507, February 1999.
+
+
+
+
+
+
+Border, et al.               Informational                     [Page 37]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   [RFC2508]   Casner, S. and V. Jacobson, "Compressing IP/UDP/RTP
+               Headers for Low-Speed Serial Links", RFC 2508, February
+               1999.
+
+   [RFC2509]   Engan, M., Casner, S. and C. Bormann, "IP Header
+               Compression over PPP", RFC 2509, February 1999.
+
+   [RFC2663]   Srisuresh, P. and Y. Holdrege, "IP Network Address
+               Translator (NAT) Terminology and Considerations", RFC
+               2663, August 1999.
+
+   [RFC2760]   Allman, M., Dawkins, S., Glover, D., Griner, J.,
+               Henderson, T., Heidemann, J., Kruse, H., Ostermann, S.,
+               Scott, K., Semke, J., Touch, J. and D. Tran, "Ongoing TCP
+               Research Related to Satellites", RFC 2760, February 2000.
+
+   [RFC3002]   Mitzel, D., "Overview of 2000 IAB Wireless
+               Internetworking Workshop", RFC 3002, December 2000.
+
+   [RFC3042]   Allman, M., Balakrishnan, H. and S. Floyd, "Enhancing
+               TCP's Loss Recovery Using Limited Transmit", RFC 3042,
+               January 2001.
+
+   [SHEL00]    Z. Shelby, T. Saarinen, P. Mahonen, D. Melpignano, A.
+               Marshall, L. Munoz, "Wireless IPv6 Networks - WINE," IST
+               Mobile Summit, Ireland, October 2000.
+
+   [SNOOP]     H. Balakrishnan, S. Seshan, E. Amir, R. Katz, "Improving
+               TCP/IP Performance over Wireless Networks," Proc. 1st ACM
+               Conference on Mobile Communications and Networking
+               (Mobicom), Berkeley, California, November 1995.
+
+   [SNOOPELN]  H. Balakrishnan, R. Katz, "Explicit Loss Notification and
+               Wireless Web Performance," Proc. IEEE Globecom 1998,
+               Internet Mini-Conference, Sydney, Australia, November
+               1998.
+
+   [SPACENET]  Spacenet, VSAT technology vendor based in Mclean,
+               Virginia.  Website at http://www.spacenet.com.
+
+   [SRC84]     J.H. Saltzer, D.P. Reed, D.D. Clark, "End-To-End
+               Arguments in System Design," ACM TOCS, Vol. 2, No. 4, pp.
+               277-288, November 1984.
+
+   [WAPARCH]   Wireless Application Protocol Architecture Specification,
+               April 1998, http://www.wapforum.org.
+
+
+
+
+
+Border, et al.               Informational                     [Page 38]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   [WAPPROXY]  Wireless Application Protocol Push Proxy Gateway Service
+               Specification, August 1999, http://www.wapforum.org.
+
+   [WAPWAE]    Wireless Application Protocol Wireless Application
+               Environment Overview, March 2000,
+               http://www.wapforum.org.
+
+   [WAPWDP]    Wireless Application Protocol Wireless Datagram Protocol
+               Specification, February 2000, http://www.wapforum.org.
+
+   [WAPWSP]    Wireless Application Protocol Wireless Session Protocol
+               Specification, May 2000, http://www.wapforum.org.
+
+   [WAPWTLS]   Wireless Application Protocol Wireless Transport Layer
+               Security Specification, February 2000,
+               http://www.wapforum.org.
+
+   [WAPWTP]    Wireless Application Protocol Wireless Transaction
+               Protocol Specification, February 2000,
+               http://www.wapforum.org.
+
+   [Zhang00]   Y. Zhang, B. Singh, "A Multi-Layer IPsec Protocol," Proc.
+               proceedings of 9th USENIX Security Symposium, Denver,
+               Colorado, August 2000.  Available at
+               http://www.wins.hrl.com/people/ygz/papers/usenix00.html.
+
+10. Authors' Addresses
+
+   Questions about this document may be directed to:
+
+   John Border
+   Hughes Network Systems
+   11717 Exploration Lane
+   Germantown, Maryland  20876
+
+   Phone: +1-301-548-6819
+   Fax:   +1-301-548-1196
+   EMail: border@hns.com
+
+
+
+
+
+
+
+
+
+
+
+
+
+Border, et al.               Informational                     [Page 39]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   Markku Kojo
+   Department of Computer Science
+   University of Helsinki
+   P.O. Box 26 (Teollisuuskatu 23)
+   FIN-00014 HELSINKI
+   Finland
+
+   Phone: +358-9-1914-4179
+   Fax:   +358-9-1914-4441
+   EMail: kojo@cs.helsinki.fi
+
+
+   Jim Griner
+   NASA Glenn Research Center
+   MS: 54-5
+   21000 Brookpark Orad
+   Cleveland, Ohio  44135-3191
+
+   Phone: +1-216-433-5787
+   Fax:   +1-216-433-8705
+   EMail: jgriner@grc.nasa.gov
+
+
+   Gabriel Montenegro
+   Sun Microsystems Laboratories, Europe
+   29, chemin du Vieux Chene
+   38240 Meylan, FRANCE
+
+   Phone: +33 476 18 80 45
+   EMail: gab@sun.com
+
+
+   Zach Shelby
+   University of Oulu
+   Center for Wireless Communications
+   PO Box 4500
+   FIN-90014
+   Finland
+
+   Phone: +358-40-779-6297
+   EMail: zach.shelby@ee.oulu.fi
+
+
+
+
+
+
+
+
+
+
+Border, et al.               Informational                     [Page 40]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+Appendix A - PEP Terminology Summary
+
+   This appendix provides a summary of terminology frequently used
+   during discussion of Performance Enhancing Proxies.  (In some cases,
+   these terms have different meanings from their non-PEP related
+   usage.)
+
+   ACK filtering
+
+      Removing acknowledgments to prevent congestion of a low speed
+      link, usually used with paths which include a highly asymmetric
+      link.  Sometimes also called ACK reduction.  See Section 3.1.4.
+
+   ACK spacing
+
+      Delayed forwarding of acknowledgments in order to space them
+      appropriately, for example, to help minimize the burstiness of
+      TCP data.  See Section 3.1.1.
+
+   application layer PEP
+
+      A Performance Enhancing Proxy operating above the transport
+      layer.  May be aimed at improving application or transport
+      protocol performance (or both).  Described in detail in Section
+      2.1.2.
+
+   asymmetric link
+
+      A link which has different rates for the forward channel (used for
+      data segments) and the back (or return) channel (used for ACKs).
+
+   available bandwidth
+
+      The total capacity of a link available to carry information at any
+      given time.  May be lower than the raw bandwidth due to competing
+      traffic.
+
+   bandwidth utilization
+
+      The actual amount of information delivered over a link in a given
+      period, usually expressed as a percent of the raw bandwidth of
+      the link.
+
+   gateway
+
+      Has several meanings with respect to PEPs, depending on context:
+
+         -  An access point to a particular link;
+
+
+
+Border, et al.               Informational                     [Page 41]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+         -  A device capable of initiating and terminating connections
+            on
+
+            behalf of a user or end system (e.g., a firewall or proxy).
+
+      Not necessarily, but could be, a router.
+
+   in flight (data)
+
+      Data sent but not yet acknowledged.  More precisely, data sent for
+      which the sender has not yet received the acknowledgement.
+
+   link layer PEP
+
+      A Performance Enhancing Proxy operating below the network layer.
+
+   local acknowledgement
+
+      The generation of acknowledgments by an entity in the path
+      between two end systems in order to allow the sending system to
+      transmit more data without waiting for end-to-end
+      acknowledgments.  Described (in the context of TCP) in Section
+      3.1.2.
+
+   performance enhancing proxy
+
+      An entity in the network acting on behalf of an end system or user
+      (with or without the knowledge of the end system or user) in order
+      to enhance protocol performance.  Section 2 describes various
+      types of performance enhancing proxies.  Section 3 describes the
+      mechanisms performance enhancing proxies use to improve
+      performance.
+
+   raw bandwidth
+
+      The total capacity of an unloaded link available to carry
+      information.
+
+   Snoop
+
+      A TCP-aware link layer developed for wireless packet radio and
+      cellular networks.  It works by caching segments at a wireless
+      base station.  If the base station sees duplicate acknowledgments
+      for a segment that it has cached, it retransmits the missing
+      segment while suppressing the duplicate acknowledgement stream
+      being forwarded back to the sender until the wireless receiver
+      starts to acknowledge new data.  Described in detail in Section
+      5.3.2 and [SNOOP].
+
+
+
+Border, et al.               Informational                     [Page 42]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   split connection
+
+      A connection that has been terminated before reaching the intended
+      destination end system in order to initiate another connection
+      towards the end system.  This allows the use of different
+      connection characteristics for different parts of the path of
+      the originally intended connection.  See Section 2.4.
+
+   TCP PEP
+
+      A Performance Enhancing Proxy operating at the transport layer
+      with TCP.  Aimed at improving TCP performance.
+
+   TCP splitting
+
+      Using one or more split TCP connections to improve TCP
+      performance.
+
+   TCP spoofing
+
+      Sometimes used as a synonym for TCP PEP.  More accurately, TCP
+      spoofing refers to using transparent (to the TCP stacks in the
+      end systems) mechanisms to improve TCP performance.  See Section
+      2.1.1.
+
+   transparent
+
+      In the context of a PEP, transparent refers to not requiring
+      changes to be made to the end systems, transport endpoints
+      and/or applications involved in a connection.  See Section 2.5
+      for a more detailed explanation.
+
+   transport layer PEP
+
+      A Performance Enhancing Proxy operating at the transport layer.
+      Described in detail in Section 2.1.1.
+
+   tunneling
+
+      In the context of PEPs, tunneling refers to the process of
+      wrapping a packet for transmission over a particular link
+      between two PEPs.  See Section 3.2.
+
+
+
+
+
+
+
+
+
+Border, et al.               Informational                     [Page 43]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+   WAP
+
+      The Wireless Application Protocol specifies an application
+      framework and network protocols intended to work across
+      differing narrow-band wireless network technologies.  See
+      Section 5.2.2.2.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Border, et al.               Informational                     [Page 44]
+
+RFC 3135          PILC - Performance Enhancing Proxies         June 2001
+
+
+Full Copyright Statement
+
+   Copyright (C) The Internet Society (2001).  All Rights Reserved.
+
+   This document and translations of it may be copied and furnished to
+   others, and derivative works that comment on or otherwise explain it
+   or assist in its implementation may be prepared, copied, published
+   and distributed, in whole or in part, without restriction of any
+   kind, provided that the above copyright notice and this paragraph are
+   included on all such copies and derivative works.  However, this
+   document itself may not be modified in any way, such as by removing
+   the copyright notice or references to the Internet Society or other
+   Internet organizations, except as needed for the purpose of
+   developing Internet standards in which case the procedures for
+   copyrights defined in the Internet Standards process must be
+   followed, or as required to translate it into languages other than
+   English.
+
+   The limited permissions granted above are perpetual and will not be
+   revoked by the Internet Society or its successors or assigns.
+
+   This document and the information contained herein is provided on an
+   "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
+   TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
+   BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
+   HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
+   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+Acknowledgement
+
+   Funding for the RFC Editor function is currently provided by the
+   Internet Society.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Border, et al.               Informational                     [Page 45]
+
diff --git a/ext/picotcp/RFC/rfc3168.txt b/ext/picotcp/RFC/rfc3168.txt
new file mode 100644
index 0000000..30b05f7
--- /dev/null
+++ b/ext/picotcp/RFC/rfc3168.txt
@@ -0,0 +1,3531 @@
+
+
+
+
+
+
+Network Working Group                                    K. Ramakrishnan
+Request for Comments: 3168                            TeraOptic Networks
+Updates: 2474, 2401, 793                                        S. Floyd
+Obsoletes: 2481                                                    ACIRI
+Category: Standards Track                                       D. Black
+                                                                     EMC
+                                                          September 2001
+
+
+      The Addition of Explicit Congestion Notification (ECN) to IP
+
+Status of this Memo
+
+   This document specifies an Internet standards track protocol for the
+   Internet community, and requests discussion and suggestions for
+   improvements.  Please refer to the current edition of the "Internet
+   Official Protocol Standards" (STD 1) for the standardization state
+   and status of this protocol.  Distribution of this memo is unlimited.
+
+Copyright Notice
+
+   Copyright (C) The Internet Society (2001).  All Rights Reserved.
+
+Abstract
+
+   This memo specifies the incorporation of ECN (Explicit Congestion
+   Notification) to TCP and IP, including ECN's use of two bits in the
+   IP header.
+
+Table of Contents
+
+   1.  Introduction..................................................  3
+   2.  Conventions and Acronyms......................................  5
+   3.  Assumptions and General Principles............................  5
+   4.  Active Queue Management (AQM).................................  6
+   5.  Explicit Congestion Notification in IP........................  6
+   5.1.  ECN as an Indication of Persistent Congestion............... 10
+   5.2.  Dropped or Corrupted Packets................................ 11
+   5.3.  Fragmentation............................................... 11
+   6.  Support from the Transport Protocol........................... 12
+   6.1.  TCP......................................................... 13
+   6.1.1  TCP Initialization......................................... 14
+   6.1.1.1.  Middlebox Issues........................................ 16
+   6.1.1.2.  Robust TCP Initialization with an Echoed Reserved Field. 17
+   6.1.2.  The TCP Sender............................................ 18
+   6.1.3.  The TCP Receiver.......................................... 19
+   6.1.4.  Congestion on the ACK-path................................ 20
+   6.1.5.  Retransmitted TCP packets................................. 20
+
+
+
+Ramakrishnan, et al.        Standards Track                     [Page 1]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   6.1.6.  TCP Window Probes......................................... 22
+   7.  Non-compliance by the End Nodes............................... 22
+   8.  Non-compliance in the Network................................. 24
+   8.1.  Complications Introduced by Split Paths..................... 25
+   9.  Encapsulated Packets.......................................... 25
+   9.1.  IP packets encapsulated in IP............................... 25
+   9.1.1.  The Limited-functionality and Full-functionality Options.. 27
+   9.1.2.  Changes to the ECN Field within an IP Tunnel.............. 28
+   9.2.  IPsec Tunnels............................................... 29
+   9.2.1.  Negotiation between Tunnel Endpoints...................... 31
+   9.2.1.1.  ECN Tunnel Security Association Database Field.......... 32
+   9.2.1.2.  ECN Tunnel Security Association Attribute............... 32
+   9.2.1.3.  Changes to IPsec Tunnel Header Processing............... 33
+   9.2.2.  Changes to the ECN Field within an IPsec Tunnel........... 35
+   9.2.3.  Comments for IPsec Support................................ 35
+   9.3.  IP packets encapsulated in non-IP Packet Headers............ 36
+   10.  Issues Raised by Monitoring and Policing Devices............. 36
+   11.  Evaluations of ECN........................................... 37
+   11.1.  Related Work Evaluating ECN................................ 37
+   11.2.  A Discussion of the ECN nonce.............................. 37
+   11.2.1.  The Incremental Deployment of ECT(1) in Routers.......... 38
+   12.  Summary of changes required in IP and TCP.................... 38
+   13.  Conclusions.................................................. 40
+   14.  Acknowledgements............................................. 41
+   15.  References................................................... 41
+   16.  Security Considerations...................................... 45
+   17.  IPv4 Header Checksum Recalculation........................... 45
+   18.  Possible Changes to the ECN Field in the Network............. 45
+   18.1.  Possible Changes to the IP Header.......................... 46
+   18.1.1.  Erasing the Congestion Indication........................ 46
+   18.1.2.  Falsely Reporting Congestion............................. 47
+   18.1.3.  Disabling ECN-Capability................................. 47
+   18.1.4.  Falsely Indicating ECN-Capability........................ 47
+   18.2.  Information carried in the Transport Header................ 48
+   18.3.  Split Paths................................................ 49
+   19.  Implications of Subverting End-to-End Congestion Control..... 50
+   19.1.  Implications for the Network and for Competing Flows....... 50
+   19.2.  Implications for the Subverted Flow........................ 53
+   19.3.  Non-ECN-Based Methods of Subverting End-to-end Congestion
+          Control.................................................... 54
+   20.  The Motivation for the ECT Codepoints........................ 54
+   20.1.  The Motivation for an ECT Codepoint........................ 54
+   20.2.  The Motivation for two ECT Codepoints...................... 55
+   21.  Why use Two Bits in the IP Header?........................... 57
+   22.  Historical Definitions for the IPv4 TOS Octet................ 58
+   23.  IANA Considerations.......................................... 60
+   23.1.  IPv4 TOS Byte and IPv6 Traffic Class Octet................. 60
+   23.2.  TCP Header Flags........................................... 61
+
+
+
+Ramakrishnan, et al.        Standards Track                     [Page 2]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   23.3. IPSEC Security Association Attributes....................... 62
+   24.  Authors' Addresses........................................... 62
+   25.  Full Copyright Statement..................................... 63
+
+1.  Introduction
+
+   We begin by describing TCP's use of packet drops as an indication of
+   congestion.  Next we explain that with the addition of active queue
+   management (e.g., RED) to the Internet infrastructure, where routers
+   detect congestion before the queue overflows, routers are no longer
+   limited to packet drops as an indication of congestion.  Routers can
+   instead set the Congestion Experienced (CE) codepoint in the IP
+   header of packets from ECN-capable transports.  We describe when the
+   CE codepoint is to be set in routers, and describe modifications
+   needed to TCP to make it ECN-capable.  Modifications to other
+   transport protocols (e.g., unreliable unicast or multicast, reliable
+   multicast, other reliable unicast transport protocols) could be
+   considered as those protocols are developed and advance through the
+   standards process.  We also describe in this document the issues
+   involving the use of ECN within IP tunnels, and within IPsec tunnels
+   in particular.
+
+   One of the guiding principles for this document is that, to the
+   extent possible, the mechanisms specified here be incrementally
+   deployable.  One challenge to the principle of incremental deployment
+   has been the prior existence of some IP tunnels that were not
+   compatible with the use of ECN.  As ECN becomes deployed, non-
+   compatible IP tunnels will have to be upgraded to conform to this
+   document.
+
+   This document obsoletes RFC 2481, "A Proposal to add Explicit
+   Congestion Notification (ECN) to IP", which defined ECN as an
+   Experimental Protocol for the Internet Community.  This document also
+   updates RFC 2474, "Definition of the Differentiated Services Field
+   (DS Field) in the IPv4 and IPv6 Headers", in defining the ECN field
+   in the IP header, RFC 2401, "Security Architecture for the Internet
+   Protocol" to change the handling of IPv4 TOS Byte and IPv6 Traffic
+   Class Octet in tunnel mode header construction to be compatible with
+   the use of ECN, and RFC 793, "Transmission Control Protocol", in
+   defining two new flags in the TCP header.
+
+   TCP's congestion control and avoidance algorithms are based on the
+   notion that the network is a black-box [Jacobson88, Jacobson90].  The
+   network's state of congestion or otherwise is determined by end-
+   systems probing for the network state, by gradually increasing the
+   load on the network (by increasing the window of packets that are
+   outstanding in the network) until the network becomes congested and a
+   packet is lost.  Treating the network as a "black-box" and treating
+
+
+
+Ramakrishnan, et al.        Standards Track                     [Page 3]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   loss as an indication of congestion in the network is appropriate for
+   pure best-effort data carried by TCP, with little or no sensitivity
+   to delay or loss of individual packets.  In addition, TCP's
+   congestion management algorithms have techniques built-in (such as
+   Fast Retransmit and Fast Recovery) to minimize the impact of losses,
+   from a throughput perspective.  However, these mechanisms are not
+   intended to help applications that are in fact sensitive to the delay
+   or loss of one or more individual packets.  Interactive traffic such
+   as telnet, web-browsing, and transfer of audio and video data can be
+   sensitive to packet losses (especially when using an unreliable data
+   delivery transport such as UDP) or to the increased latency of the
+   packet caused by the need to retransmit the packet after a loss (with
+   the reliable data delivery semantics provided by TCP).
+
+   Since TCP determines the appropriate congestion window to use by
+   gradually increasing the window size until it experiences a dropped
+   packet, this causes the queues at the bottleneck router to build up.
+   With most packet drop policies at the router that are not sensitive
+   to the load placed by each individual flow (e.g., tail-drop on queue
+   overflow), this means that some of the packets of latency-sensitive
+   flows may be dropped. In addition, such drop policies lead to
+   synchronization of loss across multiple flows.
+
+   Active queue management mechanisms detect congestion before the queue
+   overflows, and provide an indication of this congestion to the end
+   nodes.  Thus, active queue management can reduce unnecessary queuing
+   delay for all traffic sharing that queue.  The advantages of active
+   queue management are discussed in RFC 2309 [RFC2309].  Active queue
+   management avoids some of the bad properties of dropping on queue
+   overflow, including the undesirable synchronization of loss across
+   multiple flows.  More importantly, active queue management means that
+   transport protocols with mechanisms for congestion control (e.g.,
+   TCP) do not have to rely on buffer overflow as the only indication of
+   congestion.
+
+   Active queue management mechanisms may use one of several methods for
+   indicating congestion to end-nodes. One is to use packet drops, as is
+   currently done. However, active queue management allows the router to
+   separate policies of queuing or dropping packets from the policies
+   for indicating congestion. Thus, active queue management allows
+   routers to use the Congestion Experienced (CE) codepoint in a packet
+   header as an indication of congestion, instead of relying solely on
+   packet drops. This has the potential of reducing the impact of loss
+   on latency-sensitive flows.
+
+
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                     [Page 4]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   There exist some middleboxes (firewalls, load balancers, or intrusion
+   detection systems) in the Internet that either drop a TCP SYN packet
+   configured to negotiate ECN, or respond with a RST.  This document
+   specifies procedures that TCP implementations may use to provide
+   robust connectivity even in the presence of such equipment.
+
+2.  Conventions and Acronyms
+
+   The keywords MUST, MUST NOT, REQUIRED, SHALL, SHALL NOT, SHOULD,
+   SHOULD NOT, RECOMMENDED, MAY, and OPTIONAL, when they appear in this
+   document, are to be interpreted as described in [RFC2119].
+
+3.  Assumptions and General Principles
+
+   In this section, we describe some of the important design principles
+   and assumptions that guided the design choices in this proposal.
+
+      * Because ECN is likely to be adopted gradually, accommodating
+        migration is essential. Some routers may still only drop packets
+        to indicate congestion, and some end-systems may not be ECN-
+        capable. The most viable strategy is one that accommodates
+        incremental deployment without having to resort to "islands" of
+        ECN-capable and non-ECN-capable environments.
+
+      * New mechanisms for congestion control and avoidance need to co-
+        exist and cooperate with existing mechanisms for congestion
+        control.  In particular, new mechanisms have to co-exist with
+        TCP's current methods of adapting to congestion and with
+        routers' current practice of dropping packets in periods of
+        congestion.
+
+      * Congestion may persist over different time-scales. The time
+        scales that we are concerned with are congestion events that may
+        last longer than a round-trip time.
+
+      * The number of packets in an individual flow (e.g., TCP
+        connection or an exchange using UDP) may range from a small
+        number of packets to quite a large number. We are interested in
+        managing the congestion caused by flows that send enough packets
+        so that they are still active when network feedback reaches
+        them.
+
+      * Asymmetric routing is likely to be a normal occurrence in the
+        Internet. The path (sequence of links and routers) followed by
+        data packets may be different from the path followed by the
+        acknowledgment packets in the reverse direction.
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                     [Page 5]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+      * Many routers process the "regular" headers in IP packets more
+        efficiently than they process the header information in IP
+        options.  This suggests keeping congestion experienced
+        information in the regular headers of an IP packet.
+
+      * It must be recognized that not all end-systems will cooperate in
+        mechanisms for congestion control. However, new mechanisms
+        shouldn't make it easier for TCP applications to disable TCP
+        congestion control.  The benefit of lying about participating in
+        new mechanisms such as ECN-capability should be small.
+
+4.  Active Queue Management (AQM)
+
+   Random Early Detection (RED) is one mechanism for Active Queue
+   Management (AQM) that has been proposed to detect incipient
+   congestion [FJ93], and is currently being deployed in the Internet
+   [RFC2309].  AQM is meant to be a general mechanism using one of
+   several alternatives for congestion indication, but in the absence of
+   ECN, AQM is restricted to using packet drops as a mechanism for
+   congestion indication.  AQM drops packets based on the average queue
+   length exceeding a threshold, rather than only when the queue
+   overflows.  However, because AQM may drop packets before the queue
+   actually overflows, AQM is not always forced by memory limitations to
+   discard the packet.
+
+   AQM can set a Congestion Experienced (CE) codepoint in the packet
+   header instead of dropping the packet, when such a field is provided
+   in the IP header and understood by the transport protocol.  The use
+   of the CE codepoint with ECN allows the receiver(s) to receive the
+   packet, avoiding the potential for excessive delays due to
+   retransmissions after packet losses.  We use the term 'CE packet' to
+   denote a packet that has the CE codepoint set.
+
+5.  Explicit Congestion Notification in IP
+
+   This document specifies that the Internet provide a congestion
+   indication for incipient congestion (as in RED and earlier work
+   [RJ90]) where the notification can sometimes be through marking
+   packets rather than dropping them.  This uses an ECN field in the IP
+   header with two bits, making four ECN codepoints, '00' to '11'.  The
+   ECN-Capable Transport (ECT) codepoints '10' and '01' are set by the
+   data sender to indicate that the end-points of the transport protocol
+   are ECN-capable; we call them ECT(0) and ECT(1) respectively.  The
+   phrase "the ECT codepoint" in this documents refers to either of the
+   two ECT codepoints.  Routers treat the ECT(0) and ECT(1) codepoints
+   as equivalent.  Senders are free to use either the ECT(0) or the
+   ECT(1) codepoint to indicate ECT, on a packet-by-packet basis.
+
+
+
+
+Ramakrishnan, et al.        Standards Track                     [Page 6]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   The use of both the two codepoints for ECT, ECT(0) and ECT(1), is
+   motivated primarily by the desire to allow mechanisms for the data
+   sender to verify that network elements are not erasing the CE
+   codepoint, and that data receivers are properly reporting to the
+   sender the receipt of packets with the CE codepoint set, as required
+   by the transport protocol.  Guidelines for the senders and receivers
+   to differentiate between the ECT(0) and ECT(1) codepoints will be
+   addressed in separate documents, for each transport protocol.  In
+   particular, this document does not address mechanisms for TCP end-
+   nodes to differentiate between the ECT(0) and ECT(1) codepoints.
+   Protocols and senders that only require a single ECT codepoint SHOULD
+   use ECT(0).
+
+   The not-ECT codepoint '00' indicates a packet that is not using ECN.
+   The CE codepoint '11' is set by a router to indicate congestion to
+   the end nodes.  Routers that have a packet arriving at a full queue
+   drop the packet, just as they do in the absence of ECN.
+
+      +-----+-----+
+      | ECN FIELD |
+      +-----+-----+
+        ECT   CE         [Obsolete] RFC 2481 names for the ECN bits.
+         0     0         Not-ECT
+         0     1         ECT(1)
+         1     0         ECT(0)
+         1     1         CE
+
+      Figure 1: The ECN Field in IP.
+
+   The use of two ECT codepoints essentially gives a one-bit ECN nonce
+   in packet headers, and routers necessarily "erase" the nonce when
+   they set the CE codepoint [SCWA99].  For example, routers that erased
+   the CE codepoint would face additional difficulty in reconstructing
+   the original nonce, and thus repeated erasure of the CE codepoint
+   would be more likely to be detected by the end-nodes.  The ECN nonce
+   also can address the problem of misbehaving transport receivers lying
+   to the transport sender about whether or not the CE codepoint was set
+   in a packet.  The motivations for the use of two ECT codepoints is
+   discussed in more detail in Section 20, along with some discussion of
+   alternate possibilities for the fourth ECT codepoint (that is, the
+   codepoint '01').  Backwards compatibility with earlier ECN
+   implementations that do not understand the ECT(1) codepoint is
+   discussed in Section 11.
+
+   In RFC 2481 [RFC2481], the ECN field was divided into the ECN-Capable
+   Transport (ECT) bit and the CE bit.  The ECN field with only the
+   ECN-Capable Transport (ECT) bit set in RFC 2481 corresponds to the
+   ECT(0) codepoint in this document, and the ECN field with both the
+
+
+
+Ramakrishnan, et al.        Standards Track                     [Page 7]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   ECT and CE bit in RFC 2481 corresponds to the CE codepoint in this
+   document.  The '01' codepoint was left undefined in RFC 2481, and
+   this is the reason for recommending the use of ECT(0) when only a
+   single ECT codepoint is needed.
+
+         0     1     2     3     4     5     6     7
+      +-----+-----+-----+-----+-----+-----+-----+-----+
+      |          DS FIELD, DSCP           | ECN FIELD |
+      +-----+-----+-----+-----+-----+-----+-----+-----+
+
+        DSCP: differentiated services codepoint
+        ECN:  Explicit Congestion Notification
+
+      Figure 2: The Differentiated Services and ECN Fields in IP.
+
+   Bits 6 and 7 in the IPv4 TOS octet are designated as the ECN field.
+   The IPv4 TOS octet corresponds to the Traffic Class octet in IPv6,
+   and the ECN field is defined identically in both cases.  The
+   definitions for the IPv4 TOS octet [RFC791] and the IPv6 Traffic
+   Class octet have been superseded by the six-bit DS (Differentiated
+   Services) Field [RFC2474, RFC2780].  Bits 6 and 7 are listed in
+   [RFC2474] as Currently Unused, and are specified in RFC 2780 as
+   approved for experimental use for ECN.  Section 22 gives a brief
+   history of the TOS octet.
+
+   Because of the unstable history of the TOS octet, the use of the ECN
+   field as specified in this document cannot be guaranteed to be
+   backwards compatible with those past uses of these two bits that
+   pre-date ECN.  The potential dangers of this lack of backwards
+   compatibility are discussed in Section 22.
+
+   Upon the receipt by an ECN-Capable transport of a single CE packet,
+   the congestion control algorithms followed at the end-systems MUST be
+   essentially the same as the congestion control response to a *single*
+   dropped packet.  For example, for ECN-Capable TCP the source TCP is
+   required to halve its congestion window for any window of data
+   containing either a packet drop or an ECN indication.
+
+   One reason for requiring that the congestion-control response to the
+   CE packet be essentially the same as the response to a dropped packet
+   is to accommodate the incremental deployment of ECN in both end-
+   systems and in routers.  Some routers may drop ECN-Capable packets
+   (e.g., using the same AQM policies for congestion detection) while
+   other routers set the CE codepoint, for equivalent levels of
+   congestion.  Similarly, a router might drop a non-ECN-Capable packet
+   but set the CE codepoint in an ECN-Capable packet, for equivalent
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                     [Page 8]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   levels of congestion.  If there were different congestion control
+   responses to a CE codepoint than to a packet drop, this could result
+   in unfair treatment for different flows.
+
+   An additional goal is that the end-systems should react to congestion
+   at most once per window of data (i.e., at most once per round-trip
+   time), to avoid reacting multiple times to multiple indications of
+   congestion within a round-trip time.
+
+   For a router, the CE codepoint of an ECN-Capable packet SHOULD only
+   be set if the router would otherwise have dropped the packet as an
+   indication of congestion to the end nodes. When the router's buffer
+   is not yet full and the router is prepared to drop a packet to inform
+   end nodes of incipient congestion, the router should first check to
+   see if the ECT codepoint is set in that packet's IP header.  If so,
+   then instead of dropping the packet, the router MAY instead set the
+   CE codepoint in the IP header.
+
+   An environment where all end nodes were ECN-Capable could allow new
+   criteria to be developed for setting the CE codepoint, and new
+   congestion control mechanisms for end-node reaction to CE packets.
+   However, this is a research issue, and as such is not addressed in
+   this document.
+
+   When a CE packet (i.e., a packet that has the CE codepoint set) is
+   received by a router, the CE codepoint is left unchanged, and the
+   packet is transmitted as usual. When severe congestion has occurred
+   and the router's queue is full, then the router has no choice but to
+   drop some packet when a new packet arrives.  We anticipate that such
+   packet losses will become relatively infrequent when a majority of
+   end-systems become ECN-Capable and participate in TCP or other
+   compatible congestion control mechanisms. In an ECN-Capable
+   environment that is adequately-provisioned, packet losses should
+   occur primarily during transients or in the presence of non-
+   cooperating sources.
+
+   The above discussion of when CE may be set instead of dropping a
+   packet applies by default to all Differentiated Services Per-Hop
+   Behaviors (PHBs) [RFC 2475].  Specifications for PHBs MAY provide
+   more specifics on how a compliant implementation is to choose between
+   setting CE and dropping a packet, but this is NOT REQUIRED.  A router
+   MUST NOT set CE instead of dropping a packet when the drop that would
+   occur is caused by reasons other than congestion or the desire to
+   indicate incipient congestion to end nodes (e.g., a diffserv edge
+   node may be configured to unconditionally drop certain classes of
+   traffic to prevent them from entering its diffserv domain).
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                     [Page 9]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   We expect that routers will set the CE codepoint in response to
+   incipient congestion as indicated by the average queue size, using
+   the RED algorithms suggested in [FJ93, RFC2309].  To the best of our
+   knowledge, this is the only proposal currently under discussion in
+   the IETF for routers to drop packets proactively, before the buffer
+   overflows.  However, this document does not attempt to specify a
+   particular mechanism for active queue management, leaving that
+   endeavor, if needed, to other areas of the IETF.  While ECN is
+   inextricably tied up with the need to have a reasonable active queue
+   management mechanism at the router, the reverse does not hold; active
+   queue management mechanisms have been developed and deployed
+   independent of ECN, using packet drops as indications of congestion
+   in the absence of ECN in the IP architecture.
+
+5.1.  ECN as an Indication of Persistent Congestion
+
+   We emphasize that a *single* packet with the CE codepoint set in an
+   IP packet causes the transport layer to respond, in terms of
+   congestion control, as it would to a packet drop.  The instantaneous
+   queue size is likely to see considerable variations even when the
+   router does not experience persistent congestion.  As such, it is
+   important that transient congestion at a router, reflected by the
+   instantaneous queue size reaching a threshold much smaller than the
+   capacity of the queue, not trigger a reaction at the transport layer.
+   Therefore, the CE codepoint should not be set by a router based on
+   the instantaneous queue size.
+
+   For example, since the ATM and Frame Relay mechanisms for congestion
+   indication have typically been defined without an associated notion
+   of average queue size as the basis for determining that an
+   intermediate node is congested, we believe that they provide a very
+   noisy signal. The TCP-sender reaction specified in this document for
+   ECN is NOT the appropriate reaction for such a noisy signal of
+   congestion notification.  However, if the routers that interface to
+   the ATM network have a way of maintaining the average queue at the
+   interface, and use it to come to a reliable determination that the
+   ATM subnet is congested, they may use the ECN notification that is
+   defined here.
+
+   We continue to encourage experiments in techniques at layer 2 (e.g.,
+   in ATM switches or Frame Relay switches) to take advantage of ECN.
+   For example, using a scheme such as RED (where packet marking is
+   based on the average queue length exceeding a threshold), layer 2
+   devices could provide a reasonably reliable indication of congestion.
+   When all the layer 2 devices in a path set that layer's own
+   Congestion Experienced codepoint (e.g., the EFCI bit for ATM, the
+   FECN bit in Frame Relay) in this reliable manner, then the interface
+   router to the layer 2 network could copy the state of that layer 2
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 10]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   Congestion Experienced codepoint into the CE codepoint in the IP
+   header.  We recognize that this is not the current practice, nor is
+   it in current standards. However, encouraging experimentation in this
+   manner may provide the information needed to enable evolution of
+   existing layer 2 mechanisms to provide a more reliable means of
+   congestion indication, when they use a single bit for indicating
+   congestion.
+
+5.2.  Dropped or Corrupted Packets
+
+   For the proposed use for ECN in this document (that is, for a
+   transport protocol such as TCP for which a dropped data packet is an
+   indication of congestion), end nodes detect dropped data packets, and
+   the congestion response of the end nodes to a dropped data packet is
+   at least as strong as the congestion response to a received CE
+   packet.  To ensure the reliable delivery of the congestion indication
+   of the CE codepoint, an ECT codepoint MUST NOT be set in a packet
+   unless the loss of that packet in the network would be detected by
+   the end nodes and interpreted as an indication of congestion.
+
+   Transport protocols such as TCP do not necessarily detect all packet
+   drops, such as the drop of a "pure" ACK packet; for example, TCP does
+   not reduce the arrival rate of subsequent ACK packets in response to
+   an earlier dropped ACK packet.  Any proposal for extending ECN-
+   Capability to such packets would have to address issues such as the
+   case of an ACK packet that was marked with the CE codepoint but was
+   later dropped in the network. We believe that this aspect is still
+   the subject of research, so this document specifies that at this
+   time, "pure" ACK packets MUST NOT indicate ECN-Capability.
+
+   Similarly, if a CE packet is dropped later in the network due to
+   corruption (bit errors), the end nodes should still invoke congestion
+   control, just as TCP would today in response to a dropped data
+   packet. This issue of corrupted CE packets would have to be
+   considered in any proposal for the network to distinguish between
+   packets dropped due to corruption, and packets dropped due to
+   congestion or buffer overflow.  In particular, the ubiquitous
+   deployment of ECN would not, in and of itself, be a sufficient
+   development to allow end-nodes to interpret packet drops as
+   indications of corruption rather than congestion.
+
+5.3.  Fragmentation
+
+   ECN-capable packets MAY have the DF (Don't Fragment) bit set.
+   Reassembly of a fragmented packet MUST NOT lose indications of
+   congestion.  In other words, if any fragment of an IP packet to be
+   reassembled has the CE codepoint set, then one of two actions MUST be
+   taken:
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 11]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+      * Set the CE codepoint on the reassembled packet.  However, this
+        MUST NOT occur if any of the other fragments contributing to
+        this reassembly carries the Not-ECT codepoint.
+
+      * The packet is dropped, instead of being reassembled, for any
+        other reason.
+
+   If both actions are applicable, either MAY be chosen.  Reassembly of
+   a fragmented packet MUST NOT change the ECN codepoint when all of the
+   fragments carry the same codepoint.
+
+   We would note that because RFC 2481 did not specify reassembly
+   behavior, older ECN implementations conformant with that Experimental
+   RFC do not necessarily perform reassembly correctly, in terms of
+   preserving the CE codepoint in a fragment.  The sender could avoid
+   the consequences of this behavior by setting the DF bit in ECN-
+   Capable packets.
+
+   Situations may arise in which the above reassembly specification is
+   insufficiently precise.  For example, if there is a malicious or
+   broken entity in the path at or after the fragmentation point, packet
+   fragments could carry a mixture of ECT(0), ECT(1), and/or Not-ECT
+   codepoints.  The reassembly specification above does not place
+   requirements on reassembly of fragments in this case.  In situations
+   where more precise reassembly behavior would be required, protocol
+   specifications SHOULD instead specify that DF MUST be set in all
+   ECN-capable packets sent by the protocol.
+
+6.  Support from the Transport Protocol
+
+   ECN requires support from the transport protocol, in addition to the
+   functionality given by the ECN field in the IP packet header. The
+   transport protocol might require negotiation between the endpoints
+   during setup to determine that all of the endpoints are ECN-capable,
+   so that the sender can set the ECT codepoint in transmitted packets.
+   Second, the transport protocol must be capable of reacting
+   appropriately to the receipt of CE packets.  This reaction could be
+   in the form of the data receiver informing the data sender of the
+   received CE packet (e.g., TCP), of the data receiver unsubscribing to
+   a layered multicast group (e.g., RLM [MJV96]), or of some other
+   action that ultimately reduces the arrival rate of that flow on that
+   congested link.  CE packets indicate persistent rather than transient
+   congestion (see Section 5.1), and hence reactions to the receipt of
+   CE packets should be those appropriate for persistent congestion.
+
+   This document only addresses the addition of ECN Capability to TCP,
+   leaving issues of ECN in other transport protocols to further
+   research.  For TCP, ECN requires three new pieces of functionality:
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 12]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   negotiation between the endpoints during connection setup to
+   determine if they are both ECN-capable; an ECN-Echo (ECE) flag in the
+   TCP header so that the data receiver can inform the data sender when
+   a CE packet has been received; and a Congestion Window Reduced (CWR)
+   flag in the TCP header so that the data sender can inform the data
+   receiver that the congestion window has been reduced. The support
+   required from other transport protocols is likely to be different,
+   particularly for unreliable or reliable multicast transport
+   protocols, and will have to be determined as other transport
+   protocols are brought to the IETF for standardization.
+
+   In a mild abuse of terminology, in this document we refer to `TCP
+   packets' instead of `TCP segments'.
+
+6.1.  TCP
+
+   The following sections describe in detail the proposed use of ECN in
+   TCP.  This proposal is described in essentially the same form in
+   [Floyd94]. We assume that the source TCP uses the standard congestion
+   control algorithms of Slow-start, Fast Retransmit and Fast Recovery
+   [RFC2581].
+
+   This proposal specifies two new flags in the Reserved field of the
+   TCP header.  The TCP mechanism for negotiating ECN-Capability uses
+   the ECN-Echo (ECE) flag in the TCP header.  Bit 9 in the Reserved
+   field of the TCP header is designated as the ECN-Echo flag.  The
+   location of the 6-bit Reserved field in the TCP header is shown in
+   Figure 4 of RFC 793 [RFC793] (and is reproduced below for
+   completeness).  This specification of the ECN Field leaves the
+   Reserved field as a 4-bit field using bits 4-7.
+
+   To enable the TCP receiver to determine when to stop setting the
+   ECN-Echo flag, we introduce a second new flag in the TCP header, the
+   CWR flag.  The CWR flag is assigned to Bit 8 in the Reserved field of
+   the TCP header.
+
+        0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
+      +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
+      |               |                       | U | A | P | R | S | F |
+      | Header Length |        Reserved       | R | C | S | S | Y | I |
+      |               |                       | G | K | H | T | N | N |
+      +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
+
+      Figure 3: The old definition of bytes 13 and 14 of the TCP
+                header.
+
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 13]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+        0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
+      +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
+      |               |               | C | E | U | A | P | R | S | F |
+      | Header Length |    Reserved   | W | C | R | C | S | S | Y | I |
+      |               |               | R | E | G | K | H | T | N | N |
+      +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
+
+      Figure 4: The new definition of bytes 13 and 14 of the TCP
+                Header.
+
+   Thus, ECN uses the ECT and CE flags in the IP header (as shown in
+   Figure 1) for signaling between routers and connection endpoints, and
+   uses the ECN-Echo and CWR flags in the TCP header (as shown in Figure
+   4) for TCP-endpoint to TCP-endpoint signaling.  For a TCP connection,
+   a typical sequence of events in an ECN-based reaction to congestion
+   is as follows:
+
+      * An ECT codepoint is set in packets transmitted by the sender to
+        indicate that ECN is supported by the transport entities for
+        these packets.
+
+      * An ECN-capable router detects impending congestion and detects
+        that an ECT codepoint is set in the packet it is about to drop.
+        Instead of dropping the packet, the router chooses to set the CE
+        codepoint in the IP header and forwards the packet.
+
+      * The receiver receives the packet with the CE codepoint set, and
+        sets the ECN-Echo flag in its next TCP ACK sent to the sender.
+
+      * The sender receives the TCP ACK with ECN-Echo set, and reacts to
+        the congestion as if a packet had been dropped.
+
+      * The sender sets the CWR flag in the TCP header of the next
+        packet sent to the receiver to acknowledge its receipt of and
+        reaction to the ECN-Echo flag.
+
+   The negotiation for using ECN by the TCP transport entities and the
+   use of the ECN-Echo and CWR flags is described in more detail in the
+   sections below.
+
+6.1.1  TCP Initialization
+
+   In the TCP connection setup phase, the source and destination TCPs
+   exchange information about their willingness to use ECN.  Subsequent
+   to the completion of this negotiation, the TCP sender sets an ECT
+   codepoint in the IP header of data packets to indicate to the network
+   that the transport is capable and willing to participate in ECN for
+   this packet. This indicates to the routers that they may mark this
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 14]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   packet with the CE codepoint, if they would like to use that as a
+   method of congestion notification. If the TCP connection does not
+   wish to use ECN notification for a particular packet, the sending TCP
+   sets the ECN codepoint to not-ECT, and the TCP receiver ignores the
+   CE codepoint in the received packet.
+
+   For this discussion, we designate the initiating host as Host A and
+   the responding host as Host B.  We call a SYN packet with the ECE and
+   CWR flags set an "ECN-setup SYN packet", and we call a SYN packet
+   with at least one of the ECE and CWR flags not set a "non-ECN-setup
+   SYN packet".  Similarly, we call a SYN-ACK packet with only the ECE
+   flag set but the CWR flag not set an "ECN-setup SYN-ACK packet", and
+   we call a SYN-ACK packet with any other configuration of the ECE and
+   CWR flags a "non-ECN-setup SYN-ACK packet".
+
+   Before a TCP connection can use ECN, Host A sends an ECN-setup SYN
+   packet, and Host B sends an ECN-setup SYN-ACK packet.  For a SYN
+   packet, the setting of both ECE and CWR in the ECN-setup SYN packet
+   is defined as an indication that the sending TCP is ECN-Capable,
+   rather than as an indication of congestion or of response to
+   congestion. More precisely, an ECN-setup SYN packet indicates that
+   the TCP implementation transmitting the SYN packet will participate
+   in ECN as both a sender and receiver.  Specifically, as a receiver,
+   it will respond to incoming data packets that have the CE codepoint
+   set in the IP header by setting ECE in outgoing TCP Acknowledgement
+   (ACK) packets.  As a sender, it will respond to incoming packets that
+   have ECE set by reducing the congestion window and setting CWR when
+   appropriate.  An ECN-setup SYN packet does not commit the TCP sender
+   to setting the ECT codepoint in any or all of the packets it may
+   transmit.  However, the commitment to respond appropriately to
+   incoming packets with the CE codepoint set remains even if the TCP
+   sender in a later transmission, within this TCP connection, sends a
+   SYN packet without ECE and CWR set.
+
+   When Host B sends an ECN-setup SYN-ACK packet, it sets the ECE flag
+   but not the CWR flag.  An ECN-setup SYN-ACK packet is defined as an
+   indication that the TCP transmitting the SYN-ACK packet is ECN-
+   Capable.  As with the SYN packet, an ECN-setup SYN-ACK packet does
+   not commit the TCP host to setting the ECT codepoint in transmitted
+   packets.
+
+   The following rules apply to the sending of ECN-setup packets within
+   a TCP connection, where a TCP connection is defined by the standard
+   rules for TCP connection establishment and termination.
+
+      * If a host has received an ECN-setup SYN packet, then it MAY send
+        an ECN-setup SYN-ACK packet.  Otherwise, it MUST NOT send an
+        ECN-setup SYN-ACK packet.
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 15]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+      * A host MUST NOT set ECT on data packets unless it has sent at
+        least one ECN-setup SYN or ECN-setup SYN-ACK packet, and has
+        received at least one ECN-setup SYN or ECN-setup SYN-ACK packet,
+        and has sent no non-ECN-setup SYN or non-ECN-setup SYN-ACK
+        packet.  If a host has received at least one non-ECN-setup SYN
+        or non-ECN-setup SYN-ACK packet, then it SHOULD NOT set ECT on
+        data packets.
+
+      * If a host ever sets the ECT codepoint on a data packet, then
+        that host MUST correctly set/clear the CWR TCP bit on all
+        subsequent packets in the connection.
+
+      * If a host has sent at least one ECN-setup SYN or ECN-setup SYN-
+        ACK packet, and has received no non-ECN-setup SYN or non-ECN-
+        setup SYN-ACK packet, then if that host receives TCP data
+        packets with ECT and CE codepoints set in the IP header, then
+        that host MUST process these packets as specified for an ECN-
+        capable connection.
+
+      * A host that is not willing to use ECN on a TCP connection SHOULD
+        clear both the ECE and CWR flags in all non-ECN-setup SYN and/or
+        SYN-ACK packets that it sends to indicate this unwillingness.
+        Receivers MUST correctly handle all forms of the non-ECN-setup
+        SYN and SYN-ACK packets.
+
+      * A host MUST NOT set ECT on SYN or SYN-ACK packets.
+
+   A TCP client enters TIME-WAIT state after receiving a FIN-ACK, and
+   transitions to CLOSED state after a timeout.  Many TCP
+   implementations create a new TCP connection if they receive an in-
+   window SYN packet during TIME-WAIT state.  When a TCP host enters
+   TIME-WAIT or CLOSED state, it should ignore any previous state about
+   the negotiation of ECN for that connection.
+
+6.1.1.1.  Middlebox Issues
+
+   ECN introduces the use of the ECN-Echo and CWR flags in the TCP
+   header (as shown in Figure 3) for initialization.  There exist some
+   faulty firewalls, load balancers, and intrusion detection systems in
+   the Internet that either drop an ECN-setup SYN packet or respond with
+   a RST, in the belief that such a packet (with these bits set) is a
+   signature for a port-scanning tool that could be used in a denial-
+   of-service attack.  Some of the offending equipment has been
+   identified, and a web page [FIXES] contains a list of non-compliant
+   products and the fixes posted by the vendors, where these are
+   available.  The TBIT web page [TBIT] lists some of the web servers
+   affected by this faulty equipment.  We mention this in this document
+   as a warning to the community of this problem.
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 16]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   To provide robust connectivity even in the presence of such faulty
+   equipment, a host that receives a RST in response to the transmission
+   of an ECN-setup SYN packet MAY resend a SYN with CWR and ECE cleared.
+   This could result in a TCP connection being established without using
+   ECN.
+
+   A host that receives no reply to an ECN-setup SYN within the normal
+   SYN retransmission timeout interval MAY resend the SYN and any
+   subsequent SYN retransmissions with CWR and ECE cleared.  To overcome
+   normal packet loss that results in the original SYN being lost, the
+   originating host may retransmit one or more ECN-setup SYN packets
+   before giving up and retransmitting the SYN with the CWR and ECE bits
+   cleared.
+
+   We note that in this case, the following example scenario is
+   possible:
+
+   (1) Host A: Sends an ECN-setup SYN.
+   (2) Host B: Sends an ECN-setup SYN/ACK, packet is dropped or delayed.
+   (3) Host A: Sends a non-ECN-setup SYN.
+   (4) Host B: Sends a non-ECN-setup SYN/ACK.
+
+   We note that in this case, following the procedures above, neither
+   Host A nor Host B may set the ECT bit on data packets.  Further, an
+   important consequence of the rules for ECN setup and usage in Section
+   6.1.1 is that a host is forbidden from using the reception of ECT
+   data packets as an implicit signal that the other host is ECN-
+   capable.
+
+6.1.1.2.  Robust TCP Initialization with an Echoed Reserved Field
+
+   There is the question of why we chose to have the TCP sending the SYN
+   set two ECN-related flags in the Reserved field of the TCP header for
+   the SYN packet, while the responding TCP sending the SYN-ACK sets
+   only one ECN-related flag in the SYN-ACK packet.  This asymmetry is
+   necessary for the robust negotiation of ECN-capability with some
+   deployed TCP implementations.  There exists at least one faulty TCP
+   implementation in which TCP receivers set the Reserved field of the
+   TCP header in ACK packets (and hence the SYN-ACK) simply to reflect
+   the Reserved field of the TCP header in the received data packet.
+   Because the TCP SYN packet sets the ECN-Echo and CWR flags to
+   indicate ECN-capability, while the SYN-ACK packet sets only the ECN-
+   Echo flag, the sending TCP correctly interprets a receiver's
+   reflection of its own flags in the Reserved field as an indication
+   that the receiver is not ECN-capable.  The sending TCP is not mislead
+   by a faulty TCP implementation sending a SYN-ACK packet that simply
+   reflects the Reserved field of the incoming SYN packet.
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 17]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+6.1.2.  The TCP Sender
+
+   For a TCP connection using ECN, new data packets are transmitted with
+   an ECT codepoint set in the IP header.  When only one ECT codepoint
+   is needed by a sender for all packets sent on a TCP connection,
+   ECT(0) SHOULD be used.  If the sender receives an ECN-Echo (ECE) ACK
+   packet (that is, an ACK packet with the ECN-Echo flag set in the TCP
+   header), then the sender knows that congestion was encountered in the
+   network on the path from the sender to the receiver.  The indication
+   of congestion should be treated just as a congestion loss in non-
+   ECN-Capable TCP. That is, the TCP source halves the congestion window
+   "cwnd" and reduces the slow start threshold "ssthresh".  The sending
+   TCP SHOULD NOT increase the congestion window in response to the
+   receipt of an ECN-Echo ACK packet.
+
+   TCP should not react to congestion indications more than once every
+   window of data (or more loosely, more than once every round-trip
+   time). That is, the TCP sender's congestion window should be reduced
+   only once in response to a series of dropped and/or CE packets from a
+   single window of data.  In addition, the TCP source should not
+   decrease the slow-start threshold, ssthresh, if it has been decreased
+   within the last round trip time.  However, if any retransmitted
+   packets are dropped, then this is interpreted by the source TCP as a
+   new instance of congestion.
+
+   After the source TCP reduces its congestion window in response to a
+   CE packet, incoming acknowledgments that continue to arrive can
+   "clock out" outgoing packets as allowed by the reduced congestion
+   window.  If the congestion window consists of only one MSS (maximum
+   segment size), and the sending TCP receives an ECN-Echo ACK packet,
+   then the sending TCP should in principle still reduce its congestion
+   window in half. However, the value of the congestion window is
+   bounded below by a value of one MSS.  If the sending TCP were to
+   continue to send, using a congestion window of 1 MSS, this results in
+   the transmission of one packet per round-trip time.  It is necessary
+   to still reduce the sending rate of the TCP sender even further, on
+   receipt of an ECN-Echo packet when the congestion window is one.  We
+   use the retransmit timer as a means of reducing the rate further in
+   this circumstance.  Therefore, the sending TCP MUST reset the
+   retransmit timer on receiving the ECN-Echo packet when the congestion
+   window is one.  The sending TCP will then be able to send a new
+   packet only when the retransmit timer expires.
+
+   When an ECN-Capable TCP sender reduces its congestion window for any
+   reason (because of a retransmit timeout, a Fast Retransmit, or in
+   response to an ECN Notification), the TCP sender sets the CWR flag in
+   the TCP header of the first new data packet sent after the window
+   reduction.  If that data packet is dropped in the network, then the
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 18]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   sending TCP will have to reduce the congestion window again and
+   retransmit the dropped packet.
+
+   We ensure that the "Congestion Window Reduced" information is
+   reliably delivered to the TCP receiver.  This comes about from the
+   fact that if the new data packet carrying the CWR flag is dropped,
+   then the TCP sender will have to again reduce its congestion window,
+   and send another new data packet with the CWR flag set.  Thus, the
+   CWR bit in the TCP header SHOULD NOT be set on retransmitted packets.
+
+   When the TCP data sender is ready to set the CWR bit after reducing
+   the congestion window, it SHOULD set the CWR bit only on the first
+   new data packet that it transmits.
+
+   [Floyd94] discusses TCP's response to ECN in more detail.  [Floyd98]
+   discusses the validation test in the ns simulator, which illustrates
+   a wide range of ECN scenarios. These scenarios include the following:
+   an ECN followed by another ECN, a Fast Retransmit, or a Retransmit
+   Timeout; a Retransmit Timeout or a Fast Retransmit followed by an
+   ECN; and a congestion window of one packet followed by an ECN.
+
+   TCP follows existing algorithms for sending data packets in response
+   to incoming ACKs, multiple duplicate acknowledgments, or retransmit
+   timeouts [RFC2581].  TCP also follows the normal procedures for
+   increasing the congestion window when it receives ACK packets without
+   the ECN-Echo bit set [RFC2581].
+
+6.1.3.  The TCP Receiver
+
+   When TCP receives a CE data packet at the destination end-system, the
+   TCP data receiver sets the ECN-Echo flag in the TCP header of the
+   subsequent ACK packet.  If there is any ACK withholding implemented,
+   as in current "delayed-ACK" TCP implementations where the TCP
+   receiver can send an ACK for two arriving data packets, then the
+   ECN-Echo flag in the ACK packet will be set to '1' if the CE
+   codepoint is set in any of the data packets being acknowledged.  That
+   is, if any of the received data packets are CE packets, then the
+   returning ACK has the ECN-Echo flag set.
+
+   To provide robustness against the possibility of a dropped ACK packet
+   carrying an ECN-Echo flag, the TCP receiver sets the ECN-Echo flag in
+   a series of ACK packets sent subsequently.  The TCP receiver uses the
+   CWR flag received from the TCP sender to determine when to stop
+   setting the ECN-Echo flag.
+
+   After a TCP receiver sends an ACK packet with the ECN-Echo bit set,
+   that TCP receiver continues to set the ECN-Echo flag in all the ACK
+   packets it sends (whether they acknowledge CE data packets or non-CE
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 19]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   data packets) until it receives a CWR packet (a packet with the CWR
+   flag set).  After the receipt of the CWR packet, acknowledgments for
+   subsequent non-CE data packets do not have the ECN-Echo flag set. If
+   another CE packet is received by the data receiver, the receiver
+   would once again send ACK packets with the ECN-Echo flag set.  While
+   the receipt of a CWR packet does not guarantee that the data sender
+   received the ECN-Echo message, this does suggest that the data sender
+   reduced its congestion window at some point *after* it sent the data
+   packet for which the CE codepoint was set.
+
+   We have already specified that a TCP sender is not required to reduce
+   its congestion window more than once per window of data.  Some care
+   is required if the TCP sender is to avoid unnecessary reductions of
+   the congestion window when a window of data includes both dropped
+   packets and (marked) CE packets.  This is illustrated in [Floyd98].
+
+6.1.4.  Congestion on the ACK-path
+
+   For the current generation of TCP congestion control algorithms, pure
+   acknowledgement packets (e.g., packets that do not contain any
+   accompanying data) MUST be sent with the not-ECT codepoint.  Current
+   TCP receivers have no mechanisms for reducing traffic on the ACK-path
+   in response to congestion notification.  Mechanisms for responding to
+   congestion on the ACK-path are areas for current and future research.
+   (One simple possibility would be for the sender to reduce its
+   congestion window when it receives a pure ACK packet with the CE
+   codepoint set). For current TCP implementations, a single dropped ACK
+   generally has only a very small effect on the TCP's sending rate.
+
+6.1.5.  Retransmitted TCP packets
+
+   This document specifies ECN-capable TCP implementations MUST NOT set
+   either ECT codepoint (ECT(0) or ECT(1)) in the IP header for
+   retransmitted data packets, and that the TCP data receiver SHOULD
+   ignore the ECN field on arriving data packets that are outside of the
+   receiver's current window.  This is for greater security against
+   denial-of-service attacks, as well as for robustness of the ECN
+   congestion indication with packets that are dropped later in the
+   network.
+
+   First, we note that if the TCP sender were to set an ECT codepoint on
+   a retransmitted packet, then if an unnecessarily-retransmitted packet
+   was later dropped in the network, the end nodes would never receive
+   the indication of congestion from the router setting the CE
+   codepoint.  Thus, setting an ECT codepoint on retransmitted data
+   packets is not consistent with the robust delivery of the congestion
+   indication even for packets that are later dropped in the network.
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 20]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   In addition, an attacker capable of spoofing the IP source address of
+   the TCP sender could send data packets with arbitrary sequence
+   numbers, with the CE codepoint set in the IP header.  On receiving
+   this spoofed data packet, the TCP data receiver would determine that
+   the data does not lie in the current receive window, and return a
+   duplicate acknowledgement.  We define an out-of-window packet at the
+   TCP data receiver as a data packet that lies outside the receiver's
+   current window.  On receiving an out-of-window packet, the TCP data
+   receiver has to decide whether or not to treat the CE codepoint in
+   the packet header as a valid indication of congestion, and therefore
+   whether to return ECN-Echo indications to the TCP data sender.  If
+   the TCP data receiver ignored the CE codepoint in an out-of-window
+   packet, then the TCP data sender would not receive this possibly-
+   legitimate indication of congestion from the network, resulting in a
+   violation of end-to-end congestion control.  On the other hand, if
+   the TCP data receiver honors the CE indication in the out-of-window
+   packet, and reports the indication of congestion to the TCP data
+   sender, then the malicious node that created the spoofed, out-of-
+   window packet has successfully "attacked" the TCP connection by
+   forcing the data sender to unnecessarily reduce (halve) its
+   congestion window.  To prevent such a denial-of-service attack, we
+   specify that a legitimate TCP data sender MUST NOT set an ECT
+   codepoint on retransmitted data packets, and that the TCP data
+   receiver SHOULD ignore the CE codepoint on out-of-window packets.
+
+   One drawback of not setting ECT(0) or ECT(1) on retransmitted packets
+   is that it denies ECN protection for retransmitted packets.  However,
+   for an ECN-capable TCP connection in a fully-ECN-capable environment
+   with mild congestion, packets should rarely be dropped due to
+   congestion in the first place, and so instances of retransmitted
+   packets should rarely arise.  If packets are being retransmitted,
+   then there are already packet losses (from corruption or from
+   congestion) that ECN has been unable to prevent.
+
+   We note that if the router sets the CE codepoint for an ECN-capable
+   data packet within a TCP connection, then the TCP connection is
+   guaranteed to receive that indication of congestion, or to receive
+   some other indication of congestion within the same window of data,
+   even if this packet is dropped or reordered in the network.  We
+   consider two cases, when the packet is later retransmitted, and when
+   the packet is not later retransmitted.
+
+   In the first case, if the packet is either dropped or delayed, and at
+   some point retransmitted by the data sender, then the retransmission
+   is a result of a Fast Retransmit or a Retransmit Timeout for either
+   that packet or for some prior packet in the same window of data.  In
+   this case, because the data sender already has retransmitted this
+   packet, we know that the data sender has already responded to an
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 21]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   indication of congestion for some packet within the same window of
+   data as the original packet.  Thus, even if the first transmission of
+   the packet is dropped in the network, or is delayed, if it had the CE
+   codepoint set, and is later ignored by the data receiver as an out-
+   of-window packet, this is not a problem, because the sender has
+   already responded to an indication of congestion for that window of
+   data.
+
+   In the second case, if the packet is never retransmitted by the data
+   sender, then this data packet is the only copy of this data received
+   by the data receiver, and therefore arrives at the data receiver as
+   an in-window packet, regardless of how much the packet might be
+   delayed or reordered.  In this case, if the CE codepoint is set on
+   the packet within the network, this will be treated by the data
+   receiver as a valid indication of congestion.
+
+6.1.6.  TCP Window Probes.
+
+   When the TCP data receiver advertises a zero window, the TCP data
+   sender sends window probes to determine if the receiver's window has
+   increased.  Window probe packets do not contain any user data except
+   for the sequence number, which is a byte.  If a window probe packet
+   is dropped in the network, this loss is not detected by the receiver.
+   Therefore, the TCP data sender MUST NOT set either an ECT codepoint
+   or the CWR bit on window probe packets.
+
+   However, because window probes use exact sequence numbers, they
+   cannot be easily spoofed in denial-of-service attacks.  Therefore, if
+   a window probe arrives with the CE codepoint set, then the receiver
+   SHOULD respond to the ECN indications.
+
+7.  Non-compliance by the End Nodes
+
+   This section discusses concerns about the vulnerability of ECN to
+   non-compliant end-nodes (i.e., end nodes that set the ECT codepoint
+   in transmitted packets but do not respond to received CE packets).
+   We argue that the addition of ECN to the IP architecture will not
+   significantly increase the current vulnerability of the architecture
+   to unresponsive flows.
+
+   Even for non-ECN environments, there are serious concerns about the
+   damage that can be done by non-compliant or unresponsive flows (that
+   is, flows that do not respond to congestion control indications by
+   reducing their arrival rate at the congested link).  For example, an
+   end-node could "turn off congestion control" by not reducing its
+   congestion window in response to packet drops. This is a concern for
+   the current Internet.  It has been argued that routers will have to
+   deploy mechanisms to detect and differentially treat packets from
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 22]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   non-compliant flows [RFC2309,FF99].  It has also been suggested that
+   techniques such as end-to-end per-flow scheduling and isolation of
+   one flow from another, differentiated services, or end-to-end
+   reservations could remove some of the more damaging effects of
+   unresponsive flows.
+
+   It might seem that dropping packets in itself is an adequate
+   deterrent for non-compliance, and that the use of ECN removes this
+   deterrent.  We would argue in response that (1) ECN-capable routers
+   preserve packet-dropping behavior in times of high congestion; and
+   (2) even in times of high congestion, dropping packets in itself is
+   not an adequate deterrent for non-compliance.
+
+   First, ECN-Capable routers will only mark packets (as opposed to
+   dropping them) when the packet marking rate is reasonably low. During
+   periods where the average queue size exceeds an upper threshold, and
+   therefore the potential packet marking rate would be high, our
+   recommendation is that routers drop packets rather then set the CE
+   codepoint in packet headers.
+
+   During the periods of low or moderate packet marking rates when ECN
+   would be deployed, there would be little deterrent effect on
+   unresponsive flows of dropping rather than marking those packets. For
+   example, delay-insensitive flows using reliable delivery might have
+   an incentive to increase rather than to decrease their sending rate
+   in the presence of dropped packets.  Similarly, delay-sensitive flows
+   using unreliable delivery might increase their use of FEC in response
+   to an increased packet drop rate, increasing rather than decreasing
+   their sending rate.  For the same reasons, we do not believe that
+   packet dropping itself is an effective deterrent for non-compliance
+   even in an environment of high packet drop rates, when all flows are
+   sharing the same packet drop rate.
+
+   Several methods have been proposed to identify and restrict non-
+   compliant or unresponsive flows. The addition of ECN to the network
+   environment would not in any way increase the difficulty of designing
+   and deploying such mechanisms. If anything, the addition of ECN to
+   the architecture would make the job of identifying unresponsive flows
+   slightly easier.  For example, in an ECN-Capable environment routers
+   are not limited to information about packets that are dropped or have
+   the CE codepoint set at that router itself; in such an environment,
+   routers could also take note of arriving CE packets that indicate
+   congestion encountered by that packet earlier in the path.
+
+
+
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 23]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+8.  Non-compliance in the Network
+
+   This section considers the issues when a router is operating,
+   possibly maliciously, to modify either of the bits in the ECN field.
+   We note that in IPv4, the IP header is protected from bit errors by a
+   header checksum;  this is not the case in IPv6.  Thus for IPv6 the
+   ECN field can be accidentally modified by bit errors on links or in
+   routers without being detected by an IP header checksum.
+
+   By tampering with the bits in the ECN field, an adversary (or a
+   broken router) could do one or more of the following: falsely report
+   congestion, disable ECN-Capability for an individual packet, erase
+   the ECN congestion indication, or falsely indicate ECN-Capability.
+   Section 18 systematically examines the various cases by which the ECN
+   field could be modified.  The important criterion considered in
+   determining the consequences of such modifications is whether it is
+   likely to lead to poorer behavior in any dimension (throughput,
+   delay, fairness or functionality) than if a router were to drop a
+   packet.
+
+   The first two possible changes, falsely reporting congestion or
+   disabling ECN-Capability for an individual packet, are no worse than
+   if the router were to simply drop the packet.  From a congestion
+   control point of view, setting the CE codepoint in the absence of
+   congestion by a non-compliant router would be no worse than a router
+   dropping a packet unnecessarily. By "erasing" an ECT codepoint of a
+   packet that is later dropped in the network, a router's actions could
+   result in an unnecessary packet drop for that packet later in the
+   network.
+
+   However, as discussed in Section 18, a router that erases the ECN
+   congestion indication or falsely indicates ECN-Capability could
+   potentially do more damage to the flow that if it has simply dropped
+   the packet.  A rogue or broken router that "erased" the CE codepoint
+   in arriving CE packets would prevent that indication of congestion
+   from reaching downstream receivers.  This could result in the failure
+   of congestion control for that flow and a resulting increase in
+   congestion in the network, ultimately resulting in subsequent packets
+   dropped for this flow as the average queue size increased at the
+   congested gateway.
+
+   Section 19 considers the potential repercussions of subverting end-
+   to-end congestion control by either falsely indicating ECN-
+   Capability, or by erasing the congestion indication in ECN (the CE-
+   codepoint).  We observe in Section 19 that the consequence of
+   subverting ECN-based congestion control may lead to potential
+   unfairness, but this is likely to be no worse than the subversion of
+   either ECN-based or packet-based congestion control by the end nodes.
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 24]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+8.1.  Complications Introduced by Split Paths
+
+   If a router or other network element has access to all of the packets
+   of a flow, then that router could do no more damage to a flow by
+   altering the ECN field than it could by simply dropping all of the
+   packets from that flow.  However, in some cases, a malicious or
+   broken router might have access to only a subset of the packets from
+   a flow.  The question is as follows:  can this router, by altering
+   the ECN field in this subset of the packets, do more damage to that
+   flow than if it has simply dropped that set of the packets?
+
+   This is also discussed in detail in Section 18, which concludes as
+   follows:  It is true that the adversary that has access only to a
+   subset of packets in an aggregate might, by subverting ECN-based
+   congestion control, be able to deny the benefits of ECN to the other
+   packets in the aggregate.  While this is undesirable, this is not a
+   sufficient concern to result in disabling ECN.
+
+9.  Encapsulated Packets
+
+9.1.  IP packets encapsulated in IP
+
+   The encapsulation of IP packet headers in tunnels is used in many
+   places, including IPsec and IP in IP [RFC2003].  This section
+   considers issues related to interactions between ECN and IP tunnels,
+   and specifies two alternative solutions.  This discussion is
+   complemented by RFC 2983's discussion of interactions between
+   Differentiated Services and IP tunnels of various forms [RFC 2983],
+   as Differentiated Services uses the remaining six bits of the IP
+   header octet that is used by ECN (see Figure 2 in Section 5).
+
+
+   Some IP tunnel modes are based on adding a new "outer" IP header that
+   encapsulates the original, or "inner" IP header and its associated
+   packet.  In many cases, the new "outer" IP header may be added and
+   removed at intermediate points along a connection, enabling the
+   network to establish a tunnel without requiring endpoint
+   participation.  We denote tunnels that specify that the outer header
+   be discarded at tunnel egress as "simple tunnels".
+
+   ECN uses the ECN field in the IP header for signaling between routers
+   and connection endpoints.  ECN interacts with IP tunnels based on the
+   treatment of the ECN field in the IP header.  In simple IP tunnels
+   the octet containing the ECN field is copied or mapped from the inner
+   IP header to the outer IP header at IP tunnel ingress, and the outer
+   header's copy of this field is discarded at IP tunnel egress.  If the
+   outer header were to be simply discarded without taking care to deal
+   with the ECN field, and an ECN-capable router were to set the CE
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 25]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   (Congestion Experienced) codepoint within a packet in a simple IP
+   tunnel, this indication would be discarded at tunnel egress, losing
+   the indication of congestion.
+
+   Thus, the use of ECN over simple IP tunnels would result in routers
+   attempting to use the outer IP header to signal congestion to
+   endpoints, but those congestion warnings never arriving because the
+   outer header is discarded at the tunnel egress point.  This problem
+   was encountered with ECN and IPsec in tunnel mode, and RFC 2481
+   recommended that ECN not be used with the older simple IPsec tunnels
+   in order to avoid this behavior and its consequences.  When ECN
+   becomes widely deployed, then simple tunnels likely to carry ECN-
+   capable traffic will have to be changed.  If ECN-capable traffic is
+   carried by a simple tunnel through a congested, ECN-capable router,
+   this could result in subsequent packets being dropped for this flow
+   as the average queue size increases at the congested router, as
+   discussed in Section 8 above.
+
+   From a security point of view, the use of ECN in the outer header of
+   an IP tunnel might raise security concerns because an adversary could
+   tamper with the ECN information that propagates beyond the tunnel
+   endpoint.  Based on an analysis in Sections 18 and 19 of these
+   concerns and the resultant risks, our overall approach is to make
+   support for ECN an option for IP tunnels, so that an IP tunnel can be
+   specified or configured either to use ECN or not to use ECN in the
+   outer header of the tunnel.  Thus, in environments or tunneling
+   protocols where the risks of using ECN are judged to outweigh its
+   benefits, the tunnel can simply not use ECN in the outer header.
+   Then the only indication of congestion experienced at routers within
+   the tunnel would be through packet loss.
+
+   The result is that there are two viable options for the behavior of
+   ECN-capable connections over an IP tunnel, including IPsec tunnels:
+
+      * A limited-functionality option in which ECN is preserved in the
+        inner header, but disabled in the outer header.  The only
+        mechanism available for signaling congestion occurring within
+        the tunnel in this case is dropped packets.
+
+      * A full-functionality option that supports ECN in both the inner
+        and outer headers, and propagates congestion warnings from nodes
+        within the tunnel to endpoints.
+
+   Support for these options requires varying amounts of changes to IP
+   header processing at tunnel ingress and egress.  A small subset of
+   these changes sufficient to support only the limited-functionality
+   option would be sufficient to eliminate any incompatibility between
+   ECN and IP tunnels.
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 26]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   One goal of this document is to give guidance about the tradeoffs
+   between the limited-functionality and full-functionality options.  A
+   full discussion of the potential effects of an adversary's
+   modifications of the ECN field is given in Sections 18 and 19.
+
+9.1.1.  The Limited-functionality and Full-functionality Options
+
+   The limited-functionality option for ECN encapsulation in IP tunnels
+   is for the not-ECT codepoint to be set in the outside (encapsulating)
+   header regardless of the value of the ECN field in the inside
+   (encapsulated) header.  With this option, the ECN field in the inner
+   header is not altered upon de-capsulation.  The disadvantage of this
+   approach is that the flow does not have ECN support for that part of
+   the path that is using IP tunneling, even if the encapsulated packet
+   (from the original TCP sender) is ECN-Capable.  That is, if the
+   encapsulated packet arrives at a congested router that is ECN-
+   capable, and the router can decide to drop or mark the packet as an
+   indication of congestion to the end nodes, the router will not be
+   permitted to set the CE codepoint in the packet header, but instead
+   will have to drop the packet.
+
+   The full-functionality option for ECN encapsulation is to copy the
+   ECN codepoint of the inside header to the outside header on
+   encapsulation if the inside header is not-ECT or ECT, and to set the
+   ECN codepoint of the outside header to ECT(0) if the ECN codepoint of
+   the inside header is CE.  On decapsulation, if the CE codepoint is
+   set on the outside header, then the CE codepoint is also set in the
+   inner header.  Otherwise, the ECN codepoint on the inner header is
+   left unchanged.  That is, for full ECN support the encapsulation and
+   decapsulation processing involves the following:  At tunnel ingress,
+   the full-functionality option sets the ECN codepoint in the outer
+   header.  If the ECN codepoint in the inner header is not-ECT or ECT,
+   then it is copied to the ECN codepoint in the outer header.  If the
+   ECN codepoint in the inner header is CE, then the ECN codepoint in
+   the outer header is set to ECT(0).  Upon decapsulation at the tunnel
+   egress, the full-functionality option sets the CE codepoint in the
+   inner header if the CE codepoint is set in the outer header.
+   Otherwise, no change is made to this field of the inner header.
+
+   With the full-functionality option, a flow can take advantage of ECN
+   in those parts of the path that might use IP tunneling.  The
+   disadvantage of the full-functionality option from a security
+   perspective is that the IP tunnel cannot protect the flow from
+   certain modifications to the ECN bits in the IP header within the
+   tunnel.  The potential dangers from modifications to the ECN bits in
+   the IP header are described in detail in Sections 18 and 19.
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 27]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+      (1) An IP tunnel MUST modify the handling of the DS field octet at
+      IP tunnel endpoints by implementing either the limited-
+      functionality or the full-functionality option.
+
+      (2) Optionally, an IP tunnel MAY enable the endpoints of an IP
+      tunnel to negotiate the choice between the limited-functionality
+      and the full-functionality option for ECN in the tunnel.
+
+   The minimum required to make ECN usable with IP tunnels is the
+   limited-functionality option, which prevents ECN from being enabled
+   in the outer header of the tunnel.  Full support for ECN requires the
+   use of the full-functionality option.  If there are no optional
+   mechanisms for the tunnel endpoints to negotiate a choice between the
+   limited-functionality or full-functionality option, there can be a
+   pre-existing agreement between the tunnel endpoints about whether to
+   support the limited-functionality or the full-functionality ECN
+   option.
+
+   All IP tunnels MUST implement the limited-functionality option, and
+   SHOULD support the full-functionality option.
+
+   In addition, it is RECOMMENDED that packets with the CE codepoint in
+   the outer header be dropped if they arrive at the tunnel egress point
+   for a tunnel that uses the limited-functionality option, or for a
+   tunnel that uses the full-functionality option but for which the
+   not-ECT codepoint is set in the inner header.  This is motivated by
+   backwards compatibility and to ensure that no unauthorized
+   modifications of the ECN field take place, and is discussed further
+   in the next Section (9.1.2).
+
+9.1.2.  Changes to the ECN Field within an IP Tunnel.
+
+   The presence of a copy of the ECN field in the inner header of an IP
+   tunnel mode packet provides an opportunity for detection of
+   unauthorized modifications to the ECN field in the outer header.
+   Comparison of the ECT fields in the inner and outer headers falls
+   into two categories for implementations that conform to this
+   document:
+
+      * If the IP tunnel uses the full-functionality option, then the
+        not-ECT codepoint should be set in the outer header if and only
+        if it is also set in the inner header.
+
+      * If the tunnel uses the limited-functionality option, then the
+        not-ECT codepoint should be set in the outer header.
+
+   Receipt of a packet not satisfying the appropriate condition could be
+   a cause of concern.
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 28]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   Consider the case of an IP tunnel where the tunnel ingress point has
+   not been updated to this document's requirements, while the tunnel
+   egress point has been updated to support ECN.  In this case, the IP
+   tunnel is not explicitly configured to support the full-functionality
+   ECN option. However, the tunnel ingress point is behaving identically
+   to a tunnel ingress point that supports the full-functionality
+   option.  If packets from an ECN-capable connection use this tunnel,
+   the ECT codepoint will be set in the outer header at the tunnel
+   ingress point.  Congestion within the tunnel may then result in ECN-
+   capable routers setting CE in the outer header.  Because the tunnel
+   has not been explicitly configured to support the full-functionality
+   option, the tunnel egress point expects the not-ECT codepoint to be
+   set in the outer header.  When an ECN-capable tunnel egress point
+   receives a packet with the ECT or CE codepoint in the outer header,
+   in a tunnel that has not been configured to support the full-
+   functionality option, that packet should be processed, according to
+   whether the CE codepoint was set, as follows.  It is RECOMMENDED that
+   on a tunnel that has not been configured to support the full-
+   functionality option, packets should be dropped at the egress point
+   if the CE codepoint is set in the outer header but not in the inner
+   header, and should be forwarded otherwise.
+
+   An IP tunnel cannot provide protection against erasure of congestion
+   indications based on changing the ECN codepoint from CE to ECT.  The
+   erasure of congestion indications may impact the network and other
+   flows in ways that would not be possible in the absence of ECN.  It
+   is important to note that erasure of congestion indications can only
+   be performed to congestion indications placed by nodes within the
+   tunnel; the copy of the ECN field in the inner header preserves
+   congestion notifications from nodes upstream of the tunnel ingress
+   (unless the inner header is also erased).  If erasure of congestion
+   notifications is judged to be a security risk that exceeds the
+   congestion management benefits of ECN, then tunnels could be
+   specified or configured to use the limited-functionality option.
+
+9.2.  IPsec Tunnels
+
+   IPsec supports secure communication over potentially insecure network
+   components such as intermediate routers.  IPsec protocols support two
+   operating modes, transport mode and tunnel mode, that span a wide
+   range of security requirements and operating environments.  Transport
+   mode security protocol header(s) are inserted between the IP (IPv4 or
+   IPv6) header and higher layer protocol headers (e.g., TCP), and hence
+   transport mode can only be used for end-to-end security on a
+   connection.  IPsec tunnel mode is based on adding a new "outer" IP
+   header that encapsulates the original, or "inner" IP header and its
+   associated packet.  Tunnel mode security headers are inserted between
+   these two IP headers.  In contrast to transport mode, the new "outer"
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 29]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   IP header and tunnel mode security headers can be added and removed
+   at intermediate points along a connection, enabling security gateways
+   to secure vulnerable portions of a connection without requiring
+   endpoint participation in the security protocols.  An important
+   aspect of tunnel mode security is that in the original specification,
+   the outer header is discarded at tunnel egress, ensuring that
+   security threats based on modifying the IP header do not propagate
+   beyond that tunnel endpoint.  Further discussion of IPsec can be
+   found in [RFC2401].
+
+   The IPsec protocol as originally defined in [ESP, AH] required that
+   the inner header's ECN field not be changed by IPsec decapsulation
+   processing at a tunnel egress node; this would have ruled out the
+   possibility of full-functionality mode for ECN.  At the same time,
+   this would ensure that an adversary's modifications to the ECN field
+   cannot be used to launch theft- or denial-of-service attacks across
+   an IPsec tunnel endpoint, as any such modifications will be discarded
+   at the tunnel endpoint.
+
+   In principle, permitting the use of ECN functionality in the outer
+   header of an IPsec tunnel raises security concerns because an
+   adversary could tamper with the information that propagates beyond
+   the tunnel endpoint.  Based on an analysis (included in Sections 18
+   and 19) of these concerns and the associated risks, our overall
+   approach has been to provide configuration support for IPsec changes
+   to remove the conflict with ECN.
+
+   In particular, in tunnel mode the IPsec tunnel MUST support the
+   limited-functionality option outlined in Section 9.1.1, and SHOULD
+   support the full-functionality option outlined in Section 9.1.1.
+
+   This makes permission to use ECN functionality in the outer header of
+   an IPsec tunnel a configurable part of the corresponding IPsec
+   Security Association (SA), so that it can be disabled in situations
+   where the risks are judged to outweigh the benefits.  The result is
+   that an IPsec security administrator is presented with two
+   alternatives for the behavior of ECN-capable connections within an
+   IPsec tunnel, the limited-functionality alternative and full-
+   functionality alternative described earlier.
+
+   In addition, this document specifies how the endpoints of an IPsec
+   tunnel could negotiate enabling ECN functionality in the outer
+   headers of that tunnel based on security policy.  The ability to
+   negotiate ECN usage between tunnel endpoints would enable a security
+   administrator to disable ECN in situations where she believes the
+   risks (e.g., of lost congestion notifications) outweigh the benefits
+   of ECN.
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 30]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   The IPsec protocol, as defined in [ESP, AH], does not include the IP
+   header's ECN field in any of its cryptographic calculations (in the
+   case of tunnel mode, the outer IP header's ECN field is not
+   included).  Hence modification of the ECN field by a network node has
+   no effect on IPsec's end-to-end security, because it cannot cause any
+   IPsec integrity check to fail.  As a consequence, IPsec does not
+   provide any defense against an adversary's modification of the ECN
+   field (i.e., a man-in-the-middle attack), as the adversary's
+   modification will also have no effect on IPsec's end-to-end security.
+   In some environments, the ability to modify the ECN field without
+   affecting IPsec integrity checks may constitute a covert channel; if
+   it is necessary to eliminate such a channel or reduce its bandwidth,
+   then the IPsec tunnel should be run in limited-functionality mode.
+
+9.2.1.  Negotiation between Tunnel Endpoints
+
+   This section describes the detailed changes to enable usage of ECN
+   over IPsec tunnels, including the negotiation of ECN support between
+   tunnel endpoints.  This is supported by three changes to IPsec:
+
+      * An optional Security Association Database (SAD) field indicating
+        whether tunnel encapsulation and decapsulation processing allows
+        or forbids ECN usage in the outer IP header.
+
+      * An optional Security Association Attribute that enables
+        negotiation of this SAD field between the two endpoints of an SA
+        that supports tunnel mode.
+
+      * Changes to tunnel mode encapsulation and decapsulation
+        processing to allow or forbid ECN usage in the outer IP header
+        based on the value of the SAD field.  When ECN usage is allowed
+        in the outer IP header, the ECT codepoint is set in the outer
+        header for ECN-capable connections and congestion notifications
+        (indicated by the CE codepoint) from such connections are
+        propagated to the inner header at tunnel egress.
+
+   If negotiation of ECN usage is implemented, then the SAD field SHOULD
+   also be implemented.  On the other hand, negotiation of ECN usage is
+   OPTIONAL in all cases, even for implementations that support the SAD
+   field.  The encapsulation and decapsulation processing changes are
+   REQUIRED, but MAY be implemented without the other two changes by
+   assuming that ECN usage is always forbidden.  The full-functionality
+   alternative for ECN usage over IPsec tunnels consists of the SAD
+   field and the full version of encapsulation and decapsulation
+   processing changes, with or without the OPTIONAL negotiation support.
+   The limited-functionality alternative consists of a subset of the
+   encapsulation and decapsulation changes that always forbids ECN
+   usage.
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 31]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   These changes are covered further in the following three subsections.
+
+9.2.1.1.  ECN Tunnel Security Association Database Field
+
+   Full ECN functionality adds a new field to the SAD (see [RFC2401]):
+
+      ECN Tunnel: allowed or forbidden.
+
+      Indicates whether ECN-capable connections using this SA in tunnel
+      mode are permitted to receive ECN congestion notifications for
+      congestion occurring within the tunnel.  The allowed value enables
+      ECN congestion notifications.  The forbidden value disables such
+      notifications, causing all congestion to be indicated via dropped
+      packets.
+
+      [OPTIONAL.  The value of this field SHOULD be assumed to be
+      "forbidden" in implementations that do not support it.]
+
+   If this attribute is implemented, then the SA specification in a
+   Security Policy Database (SPD) entry MUST support a corresponding
+   attribute, and this SPD attribute MUST be covered by the SPD
+   administrative interface (currently described in Section 4.4.1 of
+   [RFC2401]).
+
+9.2.1.2.  ECN Tunnel Security Association Attribute
+
+   A new IPsec Security Association Attribute is defined to enable the
+   support for ECN congestion notifications based on the outer IP header
+   to be negotiated for IPsec tunnels (see [RFC2407]).  This attribute
+   is OPTIONAL, although implementations that support it SHOULD also
+   support the SAD field defined in Section 9.2.1.1.
+
+   Attribute Type
+
+           class               value           type
+     -------------------------------------------------
+     ECN Tunnel                 10             Basic
+
+   The IPsec SA Attribute value 10 has been allocated by IANA to
+   indicate that the ECN Tunnel SA Attribute is being negotiated; the
+   type of this attribute is Basic (see Section 4.5 of [RFC2407]).  The
+   Class Values are used to conduct the negotiation.  See [RFC2407,
+   RFC2408, RFC2409] for further information including encoding formats
+   and requirements for negotiating this SA attribute.
+
+
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 32]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   Class Values
+
+   ECN Tunnel
+
+   Specifies whether ECN functionality is allowed to be used with Tunnel
+   Encapsulation Mode.  This affects tunnel encapsulation and
+   decapsulation processing - see Section 9.2.1.3.
+
+   RESERVED          0
+   Allowed           1
+   Forbidden         2
+
+   Values 3-61439 are reserved to IANA.  Values 61440-65535 are for
+   private use.
+
+   If unspecified, the default shall be assumed to be Forbidden.
+
+   ECN Tunnel is a new SA attribute, and hence initiators that use it
+   can expect to encounter responders that do not understand it, and
+   therefore reject proposals containing it.  For backwards
+   compatibility with such implementations initiators SHOULD always also
+   include a proposal without the ECN Tunnel attribute to enable such a
+   responder to select a transform or proposal that does not contain the
+   ECN Tunnel attribute.  RFC 2407 currently requires responders to
+   reject all proposals if any proposal contains an unknown attribute;
+   this requirement is expected to be changed to require a responder not
+   to select proposals or transforms containing unknown attributes.
+
+9.2.1.3.  Changes to IPsec Tunnel Header Processing
+
+   For full ECN support, the encapsulation and decapsulation processing
+   for the IPv4 TOS field and the IPv6 Traffic Class field are changed
+   from that specified in [RFC2401] to the following:
+
+                        <-- How Outer Hdr Relates to Inner Hdr -->
+                        Outer Hdr at                 Inner Hdr at
+   IPv4                 Encapsulator                 Decapsulator
+     Header fields:     --------------------         ------------
+       DS Field         copied from inner hdr (5)    no change
+       ECN Field        constructed (7)              constructed (8)
+
+   IPv6
+     Header fields:
+       DS Field         copied from inner hdr (6)    no change
+       ECN Field        constructed (7)              constructed (8)
+
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 33]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+      (5)(6) If the packet will immediately enter a domain for which the
+      DSCP value in the outer header is not appropriate, that value MUST
+      be mapped to an appropriate value for the domain [RFC 2474].  Also
+      see [RFC 2475] for further information.
+
+      (7) If the value of the ECN Tunnel field in the SAD entry for this
+      SA is "allowed" and the ECN field in the inner header is set to
+      any value other than CE, copy this ECN field to the outer header.
+      If the ECN field in the inner header is set to CE, then set the
+      ECN field in the outer header to ECT(0).
+
+      (8) If the value of the ECN tunnel field in the SAD entry for this
+      SA is "allowed" and the ECN field in the inner header is set to
+      ECT(0) or ECT(1) and the ECN field in the outer header is set to
+      CE, then copy the ECN field from the outer header to the inner
+      header.  Otherwise, make no change to the ECN field in the inner
+      header.
+
+      (5) and (6) are identical to match usage in [RFC2401], although
+      they are different in [RFC2401].
+
+   The above description applies to implementations that support the ECN
+   Tunnel field in the SAD; such implementations MUST implement this
+   processing instead of the processing of the IPv4 TOS octet and IPv6
+   Traffic Class octet defined in [RFC2401].  This constitutes the
+   full-functionality alternative for ECN usage with IPsec tunnels.
+
+   An implementation that does not support the ECN Tunnel field in the
+   SAD MUST implement this processing by assuming that the value of the
+   ECN Tunnel field of the SAD is "forbidden" for every SA.  In this
+   case, the processing of the ECN field reduces to:
+
+      (7) Set the ECN field to not-ECT in the outer header.
+      (8) Make no change to the ECN field in the inner header.
+
+   This constitutes the limited functionality alternative for ECN usage
+   with IPsec tunnels.
+
+   For backwards compatibility, packets with the CE codepoint set in the
+   outer header SHOULD be dropped if they arrive on an SA that is using
+   the limited-functionality option, or that is using the full-
+   functionality option with the not-ECN codepoint set in the inner
+   header.
+
+
+
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 34]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+9.2.2.  Changes to the ECN Field within an IPsec Tunnel.
+
+   If the ECN Field is changed inappropriately within an IPsec tunnel,
+   and this change is detected at the tunnel egress, then the receipt of
+   a packet not satisfying the appropriate condition for its SA is an
+   auditable event.  An implementation MAY create audit records with
+   per-SA counts of incorrect packets over some time period rather than
+   creating an audit record for each erroneous packet.  Any such audit
+   record SHOULD contain the headers from at least one erroneous packet,
+   but need not contain the headers from every packet represented by the
+   entry.
+
+9.2.3.  Comments for IPsec Support
+
+   Substantial comments were received on two areas of this document
+   during review by the IPsec working group.  This section describes
+   these comments and explains why the proposed changes were not
+   incorporated.
+
+   The first comment indicated that per-node configuration is easier to
+   implement than per-SA configuration.  After serious thought and
+   despite some initial encouragement of per-node configuration, it no
+   longer seems to be a good idea. The concern is that as ECN-awareness
+   is progressively deployed in IPsec, many ECN-aware IPsec
+   implementations will find themselves communicating with a mixture of
+   ECN-aware and ECN-unaware IPsec tunnel endpoints.  In such an
+   environment with per-node configuration, the only reasonable thing to
+   do is forbid ECN usage for all IPsec tunnels, which is not the
+   desired outcome.
+
+   In the second area, several reviewers noted that SA negotiation is
+   complex, and adding to it is non-trivial.  One reviewer suggested
+   using ICMP after tunnel setup as a possible alternative.  The
+   addition to SA negotiation in this document is OPTIONAL and will
+   remain so; implementers are free to ignore it.  The authors believe
+   that the assurance it provides can be useful in a number of
+   situations.  In practice, if this is not implemented, it can be
+   deleted at a subsequent stage in the standards process.  Extending
+   ICMP to negotiate ECN after tunnel setup is more complex than
+   extending SA attribute negotiation.  Some tunnels do not permit
+   traffic to be addressed to the tunnel egress endpoint, hence the ICMP
+   packet would have to be addressed to somewhere else, scanned for by
+   the egress endpoint, and discarded there or at its actual
+   destination.  In addition, ICMP delivery is unreliable, and hence
+   there is a possibility of an ICMP packet being dropped, entailing the
+   invention of yet another ack/retransmit mechanism.  It seems better
+   simply to specify an OPTIONAL extension to the existing SA
+   negotiation mechanism.
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 35]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+9.3.  IP packets encapsulated in non-IP Packet Headers.
+
+   A different set of issues are raised, relative to ECN, when IP
+   packets are encapsulated in tunnels with non-IP packet headers.  This
+   occurs with MPLS [MPLS], GRE [GRE], L2TP [L2TP], and PPTP [PPTP].
+   For these protocols, there is no conflict with ECN; it is just that
+   ECN cannot be used within the tunnel unless an ECN codepoint can be
+   specified for the header of the encapsulating protocol.  Earlier work
+   considered a preliminary proposal for incorporating ECN into MPLS,
+   and proposals for incorporating ECN into GRE, L2TP, or PPTP will be
+   considered as the need arises.
+
+10.  Issues Raised by Monitoring and Policing Devices
+
+   One possibility is that monitoring and policing devices (or more
+   informally, "penalty boxes") will be installed in the network to
+   monitor whether best-effort flows are appropriately responding to
+   congestion, and to preferentially drop packets from flows determined
+   not to be using adequate end-to-end congestion control procedures.
+
+   We recommend that any "penalty box" that detects a flow or an
+   aggregate of flows that is not responding to end-to-end congestion
+   control first change from marking to dropping packets from that flow,
+   before taking any additional action to restrict the bandwidth
+   available to that flow.  Thus, initially, the router may drop packets
+   in which the router would otherwise would have set the CE codepoint.
+   This could include dropping those arriving packets for that flow that
+   are ECN-Capable and that already have the CE codepoint set.  In this
+   way, any congestion indications seen by that router for that flow
+   will be guaranteed to also be seen by the end nodes, even in the
+   presence of malicious or broken routers elsewhere in the path.  If we
+   assume that the first action taken at any "penalty box" for an ECN-
+   capable flow will be to drop packets instead of marking them, then
+   there is no way that an adversary that subverts ECN-based end-to-end
+   congestion control can cause a flow to be characterized as being
+   non-cooperative and placed into a more severe action within the
+   "penalty box".
+
+   The monitoring and policing devices that are actually deployed could
+   fall short of the `ideal' monitoring device described above, in that
+   the monitoring is applied not to a single flow, but to an aggregate
+   of flows (e.g., those sharing a single IPsec tunnel).  In this case,
+   the switch from marking to dropping would apply to all of the flows
+   in that aggregate, denying the benefits of ECN to the other flows in
+   the aggregate also.  At the highest level of aggregation, another
+   form of the disabling of ECN happens even in the absence of
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 36]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   monitoring and policing devices, when ECN-Capable RED queues switch
+   from marking to dropping packets as an indication of congestion when
+   the average queue size has exceeded some threshold.
+
+11.  Evaluations of ECN
+
+11.1.  Related Work Evaluating ECN
+
+   This section discusses some of the related work evaluating the use of
+   ECN.  The ECN Web Page [ECN] has pointers to other papers, as well as
+   to implementations of ECN.
+
+   [Floyd94] considers the advantages and drawbacks of adding ECN to the
+   TCP/IP architecture.  As shown in the simulation-based comparisons,
+   one advantage of ECN is to avoid unnecessary packet drops for short
+   or delay-sensitive TCP connections.  A second advantage of ECN is in
+   avoiding some unnecessary retransmit timeouts in TCP.  This paper
+   discusses in detail the integration of ECN into TCP's congestion
+   control mechanisms.  The possible disadvantages of ECN discussed in
+   the paper are that a non-compliant TCP connection could falsely
+   advertise itself as ECN-capable, and that a TCP ACK packet carrying
+   an ECN-Echo message could itself be dropped in the network.  The
+   first of these two issues is discussed in the appendix of this
+   document, and the second is addressed by the addition of the CWR flag
+   in the TCP header.
+
+   Experimental evaluations of ECN include [RFC2884,K98].  The
+   conclusions of [K98] and [RFC2884] are that ECN TCP gets moderately
+   better throughput than non-ECN TCP; that ECN TCP flows are fair
+   towards non-ECN TCP flows; and that ECN TCP is robust with two-way
+   traffic (with congestion in both directions) and with multiple
+   congested gateways.  Experiments with many short web transfers show
+   that, while most of the short connections have similar transfer times
+   with or without ECN, a small percentage of the short connections have
+   very long transfer times for the non-ECN experiments as compared to
+   the ECN experiments.
+
+11.2.  A Discussion of the ECN nonce.
+
+   The use of two ECT codepoints, ECT(0) and ECT(1), can provide a one-
+   bit ECN nonce in packet headers [SCWA99].  The primary motivation for
+   this is the desire to allow mechanisms for the data sender to verify
+   that network elements are not erasing the CE codepoint, and that data
+   receivers are properly reporting to the sender the receipt of packets
+   with the CE codepoint set, as required by the transport protocol.
+   This section discusses issues of backwards compatibility with IP ECN
+   implementations in routers conformant with RFC 2481, in which only
+   one ECT codepoint was defined.  We do not believe that the
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 37]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   incremental deployment of ECN implementations that understand the
+   ECT(1) codepoint will cause significant operational problems.  This
+   is particularly likely to be the case when the deployment of the
+   ECT(1) codepoint begins with routers, before the ECT(1) codepoint
+   starts to be used by end-nodes.
+
+11.2.1.  The Incremental Deployment of ECT(1) in Routers.
+
+   ECN has been an Experimental standard since January 1999, and there
+   are already implementations of ECN in routers that do not understand
+   the ECT(1) codepoint.  When the use of the ECT(1) codepoint is
+   standardized for TCP or for other transport protocols, this could
+   mean that a data sender is using the ECT(1) codepoint, but that this
+   codepoint is not understood by a congested router on the path.
+
+   If allowed by the transport protocol, a data sender would be free not
+   to make use of ECT(1) at all, and to send all ECN-capable packets
+   with the codepoint ECT(0).  However, if an ECN-capable sender is
+   using ECT(1), and the congested router on the path did not understand
+   the ECT(1) codepoint, then the router would end up marking some of
+   the ECT(0) packets, and dropping some of the ECT(1) packets, as
+   indications of congestion.  Since TCP is required to react to both
+   marked and dropped packets, this behavior of dropping packets that
+   could have been marked poses no significant threat to the network,
+   and is consistent with the overall approach to ECN that allows
+   routers to determine when and whether to mark packets as they see fit
+   (see Section 5).
+
+12.  Summary of changes required in IP and TCP
+
+   This document specified two bits in the IP header to be used for ECN.
+   The not-ECT codepoint indicates that the transport protocol will
+   ignore the CE codepoint.  This is the default value for the ECN
+   codepoint.  The ECT codepoints indicate that the transport protocol
+   is willing and able to participate in ECN.
+
+   The router sets the CE codepoint to indicate congestion to the end
+   nodes.  The CE codepoint in a packet header MUST NOT be reset by a
+   router.
+
+   TCP requires three changes for ECN, a setup phase and two new flags
+   in the TCP header. The ECN-Echo flag is used by the data receiver to
+   inform the data sender of a received CE packet.  The Congestion
+   Window Reduced (CWR) flag is used by the data sender to inform the
+   data receiver that the congestion window has been reduced.
+
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 38]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   When ECN (Explicit Congestion Notification) is used, it is required
+   that congestion indications generated within an IP tunnel not be lost
+   at the tunnel egress.  We specified a minor modification to the IP
+   protocol's handling of the ECN field during encapsulation and de-
+   capsulation to allow flows that will undergo IP tunneling to use ECN.
+
+   Two options for ECN in tunnels were specified:
+
+   1) A limited-functionality option that does not use ECN inside the IP
+   tunnel, by setting the ECN field in the outer header to not-ECT, and
+   not altering the inner header at the time of decapsulation.
+
+   2) The full-functionality option, which sets the ECN field in the
+   outer header to either not-ECT or to one of the ECT codepoints,
+   depending on the ECN field in the inner header.  At decapsulation, if
+   the CE codepoint is set in the outer header, and the inner header is
+   set to one of the ECT codepoints, then the CE codepoint is copied to
+   the inner header.
+
+   For IPsec tunnels, this document also defines an optional IPsec
+   Security Association (SA) attribute that enables negotiation of ECN
+   usage within IPsec tunnels and an optional field in the Security
+   Association Database to indicate whether ECN is permitted in tunnel
+   mode on a SA.  The required changes to IPsec tunnels for ECN usage
+   modify RFC 2401 [RFC2401], which defines the IPsec architecture and
+   specifies some aspects of its implementation.  The new IPsec SA
+   attribute is in addition to those already defined in Section 4.5 of
+   [RFC2407].
+
+   This document obsoletes RFC 2481, "A Proposal to add Explicit
+   Congestion Notification (ECN) to IP", which defined ECN as an
+   Experimental Protocol for the Internet Community.  The rest of this
+   section describes the relationship between this document and its
+   predecessor.
+
+   RFC 2481 included a brief discussion of the use of ECN with
+   encapsulated packets, and noted that for the IPsec specifications at
+   the time (January 1999), flows could not safely use ECN if they were
+   to traverse IPsec tunnels.  RFC 2481 also described the changes that
+   could be made to IPsec tunnel specifications to made them compatible
+   with ECN.
+
+   This document also incorporates work that was done after RFC 2481.
+   First was to describe the changes to IPsec tunnels in detail, and
+   extensively discuss the security implications of ECN (now included as
+   Sections 18 and 19 of this document).  Second was to extend the
+   discussion of IPsec tunnels to include all IP tunnels.  Because older
+   IP tunnels are not compatible with a flow's use of ECN, the
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 39]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   deployment of ECN in the Internet will create strong pressure for
+   older IP tunnels to be updated to an ECN-compatible version, using
+   either the limited-functionality or the full-functionality option.
+
+   This document does not address the issue of including ECN in non-IP
+   tunnels such as MPLS, GRE, L2TP, or PPTP.  An earlier preliminary
+   document about adding ECN support to MPLS was not advanced.
+
+   A third new piece of work after RFC2481 was to describe the ECN
+   procedure with retransmitted data packets, that an ECT codepoint
+   should not be set on retransmitted data packets.  The motivation for
+   this additional specification is to eliminate a possible avenue for
+   denial-of-service attacks on an existing TCP connection.  Some prior
+   deployments of ECN-capable TCP might not conform to the (new)
+   requirement not to set an ECT codepoint on retransmitted packets; we
+   do not believe this will cause significant problems in practice.
+
+   This document also expands slightly on the specification of the use
+   of SYN packets for the negotiation of ECN.  While some prior
+   deployments of ECN-capable TCP might not conform to the requirements
+   specified in this document, we do not believe that this will lead to
+   any performance or compatibility problems for TCP connections with a
+   combination of TCP implementations at the endpoints.
+
+   This document also includes the specification of the ECT(1)
+   codepoint, which may be used by TCP as part of the implementation of
+   an ECN nonce.
+
+13.  Conclusions
+
+   Given the current effort to implement AQM, we believe this is the
+   right time to deploy congestion avoidance mechanisms that do not
+   depend on packet drops alone.  With the increased deployment of
+   applications and transports sensitive to the delay and loss of a
+   single packet (e.g., realtime traffic, short web transfers),
+   depending on packet loss as a normal congestion notification
+   mechanism appears to be insufficient (or at the very least, non-
+   optimal).
+
+   We examined the consequence of modifications of the ECN field within
+   the network, analyzing all the opportunities for an adversary to
+   change the ECN field.  In many cases, the change to the ECN field is
+   no worse than dropping a packet. However, we noted that some changes
+   have the more serious consequence of subverting end-to-end congestion
+   control.  However, we point out that even then the potential damage
+   is limited, and is similar to the threat posed by end-systems
+   intentionally failing to cooperate with end-to-end congestion
+   control.
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 40]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+14.  Acknowledgements
+
+   Many people have made contributions to this work and this document,
+   including many that we have not managed to directly acknowledge in
+   this document.  In addition, we would like to thank Kenjiro Cho for
+   the proposal for the TCP mechanism for negotiating ECN-Capability,
+   Kevin Fall for the proposal of the CWR bit, Steve Blake for material
+   on IPv4 Header Checksum Recalculation, Jamal Hadi-Salim for
+   discussions of ECN issues, and Steve Bellovin, Jim Bound, Brian
+   Carpenter, Paul Ferguson, Stephen Kent, Greg Minshall, and Vern
+   Paxson for discussions of security issues.  We also thank the
+   Internet End-to-End Research Group for ongoing discussions of these
+   issues.
+
+   Email discussions with a number of people, including Dax Kelson,
+   Alexey Kuznetsov, Jamal Hadi-Salim, and Venkat Venkatsubra, have
+   addressed the issues raised by non-conformant equipment in the
+   Internet that does not respond to TCP SYN packets with the ECE and
+   CWR flags set.  We thank Mark Handley, Jitentra Padhye, and others
+   for discussions on the TCP initialization procedures.
+
+   The discussion of ECN and IP tunnel considerations draws heavily on
+   related discussions and documents from the Differentiated Services
+   Working Group.  We thank Tabassum Bint Haque from Dhaka, Bangladesh,
+   for feedback on IP tunnels.  We thank Derrell Piper and Kero Tivinen
+   for proposing modifications to RFC 2407 that improve the usability of
+   negotiating the ECN Tunnel SA attribute.
+
+   We thank David Wetherall, David Ely, and Neil Spring for the proposal
+   for the ECN nonce.  We also thank Stefan Savage for discussions on
+   this issue.  We thank Bob Briscoe and Jon Crowcroft for raising the
+   issue of fragmentation in IP, on alternate semantics for the fourth
+   ECN codepoint, and several other topics.  We thank Richard Wendland
+   for feedback on several issues in the document.
+
+   We also thank the IESG, and in particular the Transport Area
+   Directors over the years, for their feedback and their work towards
+   the standardization of ECN.
+
+15.  References
+
+   [AH]         Kent, S. and R. Atkinson, "IP Authentication Header",
+                RFC 2402, November 1998.
+
+   [ECN]       "The ECN Web Page", URL
+                "http://www.aciri.org/floyd/ecn.html".  Reference for
+                informational purposes only.
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 41]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   [ESP]        Kent, S. and R. Atkinson, "IP Encapsulating Security
+                Payload", RFC 2406, November 1998.
+
+   [FIXES]      ECN-under-Linux Unofficial Vendor Support Page, URL
+                "http://gtf.org/garzik/ecn/".  Reference for
+                informational purposes only.
+
+   [FJ93]       Floyd, S., and Jacobson, V., "Random Early Detection
+                gateways for Congestion Avoidance", IEEE/ACM
+                Transactions on Networking, V.1 N.4, August 1993, p.
+                397-413.
+
+   [Floyd94]    Floyd, S., "TCP and Explicit Congestion Notification",
+                ACM Computer Communication Review, V. 24 N. 5, October
+                1994, p. 10-23.
+
+   [Floyd98]    Floyd, S., "The ECN Validation Test in the NS
+                Simulator", URL "http://www-mash.cs.berkeley.edu/ns/",
+                test tcl/test/test-all- ecn.  Reference for
+                informational purposes only.
+
+   [FF99]       Floyd, S., and Fall, K., "Promoting the Use of End-to-
+                End Congestion Control in the Internet", IEEE/ACM
+                Transactions on Networking, August 1999.
+
+   [FRED]       Lin, D., and Morris, R., "Dynamics of Random Early
+                Detection", SIGCOMM '97, September 1997.
+
+   [GRE]        Hanks, S., Li, T., Farinacci, D. and P. Traina, "Generic
+                Routing Encapsulation (GRE)", RFC 1701, October 1994.
+
+   [Jacobson88] V. Jacobson, "Congestion Avoidance and Control", Proc.
+                ACM SIGCOMM '88, pp. 314-329.
+
+   [Jacobson90] V. Jacobson, "Modified TCP Congestion Avoidance
+                Algorithm", Message to end2end-interest mailing list,
+                April 1990. URL
+                "ftp://ftp.ee.lbl.gov/email/vanj.90apr30.txt".
+
+   [K98]        Krishnan, H., "Analyzing Explicit Congestion
+                Notification (ECN) benefits for TCP", Master's thesis,
+                UCLA, 1998.  Citation for acknowledgement purposes only.
+
+   [L2TP]       Townsley, W., Valencia, A., Rubens, A., Pall, G., Zorn,
+                G. and B. Palter, "Layer Two Tunneling Protocol "L2TP"",
+                RFC 2661, August 1999.
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 42]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   [MJV96]      S. McCanne, V. Jacobson, and M. Vetterli, "Receiver-
+                driven Layered Multicast", SIGCOMM '96, August 1996, pp.
+                117-130.
+
+   [MPLS]       Awduche, D., Malcolm, J., Agogbua, J., O'Dell, M. and J.
+                McManus, Requirements for Traffic Engineering Over MPLS,
+                RFC 2702, September 1999.
+
+   [PPTP]       Hamzeh, K., Pall, G., Verthein, W., Taarud, J., Little,
+                W.  and G. Zorn, "Point-to-Point Tunneling Protocol
+                (PPTP)", RFC 2637, July 1999.
+
+   [RFC791]     Postel, J., "Internet Protocol", STD 5, RFC 791,
+                September 1981.
+
+   [RFC793]     Postel, J., "Transmission Control Protocol", STD 7, RFC
+                793, September 1981.
+
+   [RFC1141]    Mallory, T. and A. Kullberg, "Incremental Updating of
+                the Internet Checksum", RFC 1141, January 1990.
+
+   [RFC1349]    Almquist, P., "Type of Service in the Internet Protocol
+                Suite", RFC 1349, July 1992.
+
+   [RFC1455]    Eastlake, D., "Physical Link Security Type of Service",
+                RFC 1455, May 1993.
+
+   [RFC1701]    Hanks, S., Li, T., Farinacci, D. and P. Traina, "Generic
+                Routing Encapsulation (GRE)", RFC 1701, October 1994.
+
+   [RFC1702]    Hanks, S., Li, T., Farinacci, D. and P. Traina, "Generic
+                Routing Encapsulation over IPv4 networks", RFC 1702,
+                October 1994.
+
+   [RFC2003]    Perkins, C., "IP Encapsulation within IP", RFC 2003,
+                October 1996.
+
+   [RFC2119]    Bradner, S., "Key words for use in RFCs to Indicate
+                Requirement Levels", BCP 14, RFC 2119, March 1997.
+
+   [RFC2309]    Braden, B., et al., "Recommendations on Queue Management
+                and Congestion Avoidance in the Internet", RFC 2309,
+                April 1998.
+
+   [RFC2401]    Kent, S. and R. Atkinson, Security Architecture for the
+                Internet Protocol, RFC 2401, November 1998.
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 43]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   [RFC2407]    Piper, D., "The Internet IP Security Domain of
+                Interpretation for ISAKMP", RFC 2407, November 1998.
+
+   [RFC2408]    Maughan, D., Schertler, M., Schneider, M. and J. Turner,
+                "Internet Security Association and Key Management
+                Protocol (ISAKMP)", RFC 2409, November 1998.
+
+   [RFC2409]    Harkins D. and D. Carrel, "The Internet Key Exchange
+                (IKE)", RFC 2409, November 1998.
+
+   [RFC2474]    Nichols, K., Blake, S., Baker, F. and D. Black,
+                "Definition of the Differentiated Services Field (DS
+                Field) in the IPv4 and IPv6 Headers", RFC 2474, December
+                1998.
+
+   [RFC2475]    Blake, S., Black, D., Carlson, M., Davies, E., Wang, Z.
+                and W. Weiss, "An Architecture for Differentiated
+                Services", RFC 2475, December 1998.
+
+   [RFC2481]    Ramakrishnan K. and S. Floyd, "A Proposal to add
+                Explicit Congestion Notification (ECN) to IP", RFC 2481,
+                January 1999.
+
+   [RFC2581]    Alman, M., Paxson, V. and W. Stevens, "TCP Congestion
+                Control", RFC 2581, April 1999.
+
+   [RFC2884]    Hadi Salim, J. and U. Ahmed, "Performance Evaluation of
+                Explicit Congestion Notification (ECN) in IP Networks",
+                RFC 2884, July 2000.
+
+   [RFC2983]    Black, D., "Differentiated Services and Tunnels",
+                RFC2983, October 2000.
+
+   [RFC2780]    Bradner S. and V. Paxson, "IANA Allocation Guidelines
+                For Values In the Internet Protocol and Related
+                Headers", BCP 37, RFC 2780, March 2000.
+
+   [RJ90]       K. K. Ramakrishnan and Raj Jain, "A Binary Feedback
+                Scheme for Congestion Avoidance in Computer Networks",
+                ACM Transactions on Computer Systems, Vol.8, No.2, pp.
+                158-181, May 1990.
+
+   [SCWA99]     Stefan Savage, Neal Cardwell, David Wetherall, and Tom
+                Anderson, TCP Congestion Control with a Misbehaving
+                Receiver, ACM Computer Communications Review, October
+                1999.
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 44]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   [TBIT]       Jitendra Padhye and Sally Floyd, "Identifying the TCP
+                Behavior of Web Servers", ICSI TR-01-002, February 2001.
+                URL "http://www.aciri.org/tbit/".
+
+16.  Security Considerations
+
+   Security considerations have been discussed in Sections 7, 8, 18, and
+   19.
+
+17.  IPv4 Header Checksum Recalculation
+
+   IPv4 header checksum recalculation is an issue with some high-end
+   router architectures using an output-buffered switch, since most if
+   not all of the header manipulation is performed on the input side of
+   the switch, while the ECN decision would need to be made local to the
+   output buffer. This is not an issue for IPv6, since there is no IPv6
+   header checksum. The IPv4 TOS octet is the last byte of a 16-bit
+   half-word.
+
+   RFC 1141 [RFC1141] discusses the incremental updating of the IPv4
+   checksum after the TTL field is decremented.  The incremental
+   updating of the IPv4 checksum after the CE codepoint was set would
+   work as follows: Let HC be the original header checksum for an ECT(0)
+   packet, and let HC' be the new header checksum after the CE bit has
+   been set.  That is, the ECN field has changed from '10' to '11'.
+   Then for header checksums calculated with one's complement
+   subtraction, HC' would be recalculated as follows:
+
+        HC' = { HC - 1     HC > 1
+              { 0x0000     HC = 1
+
+   For header checksums calculated on two's complement machines, HC'
+   would be recalculated as follows after the CE bit was set:
+
+        HC' = { HC - 1     HC > 0
+              { 0xFFFE     HC = 0
+
+   A similar incremental updating of the IPv4 checksum can be carried
+   out when the ECN field is changed from ECT(1) to CE, that is, from '
+   01' to '11'.
+
+18.  Possible Changes to the ECN Field in the Network
+
+   This section discusses in detail possible changes to the ECN field in
+   the network, such as falsely reporting congestion, disabling ECN-
+   Capability for an individual packet, erasing the ECN congestion
+   indication, or falsely indicating ECN-Capability.
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 45]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+18.1.  Possible Changes to the IP Header
+
+18.1.1.  Erasing the Congestion Indication
+
+   First, we consider the changes that a router could make that would
+   result in effectively erasing the congestion indication after it had
+   been set by a router upstream.  The convention followed is:  ECN
+   codepoint of received packet -> ECN codepoint of packet transmitted.
+
+   Replacing the CE codepoint with the ECT(0) or ECT(1) codepoint
+   effectively erases the congestion indication.  However, with the use
+   of two ECT codepoints, a router erasing the CE codepoint has no way
+   to know whether the original ECT codepoint was ECT(0) or ECT(1).
+   Thus, it is possible for the transport protocol to deploy mechanisms
+   to detect such erasures of the CE codepoint.
+
+   The consequence of the erasure of the CE codepoint for the upstream
+   router is that there is a potential for congestion to build for a
+   time, because the congestion indication does not reach the source.
+   However, the packet would be received and acknowledged.
+
+   The potential effect of erasing the congestion indication is complex,
+   and is discussed in depth in Section 19 below.  Note that the effect
+   of erasing the congestion indication is different from dropping a
+   packet in the network.  When a data packet is dropped, the drop is
+   detected by the TCP sender, and interpreted as an indication of
+   congestion.  Similarly, if a sufficient number of consecutive
+   acknowledgement packets are dropped, causing the cumulative
+   acknowledgement field not to be advanced at the sender, the sender is
+   limited by the congestion window from sending additional packets, and
+   ultimately the retransmit timer expires.
+
+   In contrast, a systematic erasure of the CE bit by a downstream
+   router can have the effect of causing a queue buildup at an upstream
+   router, including the possible loss of packets due to buffer
+   overflow.  There is a potential of unfairness in that another flow
+   that goes through the congested router could react to the CE bit set
+   while the flow that has the CE bit erased could see better
+   performance.  The limitations on this potential unfairness are
+   discussed in more detail in Section 19 below.
+
+   The last of the three changes is to replace the CE codepoint with the
+   not-ECT codepoint, thus erasing the congestion indication and
+   disabling ECN-Capability at the same time.
+
+   The `erasure' of the congestion indication is only effective if the
+   packet does not end up being marked or dropped again by a downstream
+   router.  If the CE codepoint is replaced by an ECT codepoint, the
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 46]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   packet remains ECN-Capable, and could be either marked or dropped by
+   a downstream router as an indication of congestion.  If the CE
+   codepoint is replaced by the not-ECT codepoint, the packet is no
+   longer ECN-capable, and can therefore be dropped but not marked by a
+   downstream router as an indication of congestion.
+
+18.1.2.  Falsely Reporting Congestion
+
+   This change is to set the CE codepoint when an ECT codepoint was
+   already set, even though there was no congestion.  This change does
+   not affect the treatment of that packet along the rest of the path.
+   In particular, a router does not examine the CE codepoint in deciding
+   whether to drop or mark an arriving packet.
+
+   However, this could result in the application unnecessarily invoking
+   end-to-end congestion control, and reducing its arrival rate.  By
+   itself, this is no worse (for the application or for the network)
+   than if the tampering router had actually dropped the packet.
+
+18.1.3.  Disabling ECN-Capability
+
+   This change is to turn off the ECT codepoint of a packet.  This means
+   that if the packet later encounters congestion (e.g., by arriving to
+   a RED queue with a moderate average queue size), it will be dropped
+   instead of being marked.  By itself, this is no worse (for the
+   application) than if the tampering router had actually dropped the
+   packet.  The saving grace in this particular case is that there is no
+   congested router upstream expecting a reaction from setting the CE
+   bit.
+
+18.1.4.  Falsely Indicating ECN-Capability
+
+   This change would incorrectly label a packet as ECN-Capable. The
+   packet may have been sent either by an ECN-Capable transport or a
+   transport that is not ECN-Capable.
+
+   If the packet later encounters moderate congestion at an ECN-Capable
+   router, the router could set the CE codepoint instead of dropping the
+   packet.  If the transport protocol in fact is not ECN-Capable, then
+   the transport will never receive this indication of congestion, and
+   will not reduce its sending rate in response.  The potential
+   consequences of falsely indicating ECN-capability are discussed
+   further in Section 19 below.
+
+   If the packet never later encounters congestion at an ECN-Capable
+   router, then the first of these two changes would have no effect,
+   other than possibly interfering with the use of the ECN nonce by the
+   transport protocol.  The last change, however, would have the effect
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 47]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   of giving false reports of congestion to a monitoring device along
+   the path.  If the transport protocol is ECN-Capable, then this change
+   could also have an effect at the transport level, by combining
+   falsely indicating ECN-Capability with falsely reporting congestion.
+   For an ECN-capable transport, this would cause the transport to
+   unnecessarily react to congestion.  In this particular case, the
+   router that is incorrectly changing the ECN field could have dropped
+   the packet. Thus for this case of an ECN-capable transport, the
+   consequence of this change to the ECN field is no worse than dropping
+   the packet.
+
+18.2.  Information carried in the Transport Header
+
+   For TCP, an ECN-capable TCP receiver informs its TCP peer that it is
+   ECN-capable at the TCP level, conveying this information in the TCP
+   header at the time the connection is setup.  This document does not
+   consider potential dangers introduced by changes in the transport
+   header within the network.  We note that when IPsec is used, the
+   transport header is protected both in tunnel and transport modes
+   [ESP, AH].
+
+   Another issue concerns TCP packets with a spoofed IP source address
+   carrying invalid ECN information in the transport header.  For
+   completeness, we examine here some possible ways that a node spoofing
+   the IP source address of another node could use the two ECN flags in
+   the TCP header to launch a denial-of-service attack. However, these
+   attacks would require an ability for the attacker to use valid TCP
+   sequence numbers, and any attacker with this ability and with the
+   ability to spoof IP source addresses could damage the TCP connection
+   without using the ECN flags.  Therefore, ECN does not add any new
+   vulnerabilities in this respect.
+
+   An acknowledgement packet with a spoofed IP source address of the TCP
+   data receiver could include the ECE bit set.  If accepted by the TCP
+   data sender as a valid packet, this spoofed acknowledgement packet
+   could result in the TCP data sender unnecessarily halving its
+   congestion window.  However, to be accepted by the data sender, such
+   a spoofed acknowledgement packet would have to have the correct 32-
+   bit sequence number as well as a valid acknowledgement number.  An
+   attacker that could successfully send such a spoofed acknowledgement
+   packet could also send a spoofed RST packet, or do other equally
+   damaging operations to the TCP connection.
+
+   Packets with a spoofed IP source address of the TCP data sender could
+   include the CWR bit set.  Again, to be accepted, such a packet would
+   have to have a valid sequence number.  In addition, such a spoofed
+   packet would have a limited performance impact.  Spoofing a data
+   packet with the CWR bit set could result in the TCP data receiver
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 48]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   sending fewer ECE packets than it would otherwise, if the data
+   receiver was sending ECE packets when it received the spoofed CWR
+   packet.
+
+18.3.  Split Paths
+
+   In some cases, a malicious or broken router might have access to only
+   a subset of the packets from a flow.  The question is as follows:
+   can this router, by altering the ECN field in this subset of the
+   packets, do more damage to that flow than if it had simply dropped
+   that set of packets?
+
+   We will classify the packets in the flow as A packets and B packets,
+   and assume that the adversary only has access to A packets.  Assume
+   that the adversary is subverting end-to-end congestion control along
+   the path traveled by A packets only, by either falsely indicating
+   ECN-Capability upstream of the point where congestion occurs, or
+   erasing the congestion indication downstream.  Consider also that
+   there exists a monitoring device that sees both the A and B packets,
+   and will "punish" both the A and B packets if the total flow is
+   determined not to be properly responding to indications of
+   congestion.  Another key characteristic that we believe is likely to
+   be true is that the monitoring device, before `punishing' the A&B
+   flow, will first drop packets instead of setting the CE codepoint,
+   and will drop arriving packets of that flow that already have the CE
+   codepoint set.  If the end nodes are in fact using end-to-end
+   congestion control, they will see all of the indications of
+   congestion seen by the monitoring device, and will begin to respond
+   to these indications of congestion. Thus, the monitoring device is
+   successful in providing the indications to the flow at an early
+   stage.
+
+   It is true that the adversary that has access only to the A packets
+   might, by subverting ECN-based congestion control, be able to deny
+   the benefits of ECN to the other packets in the A&B aggregate.  While
+   this is unfortunate, this is not a reason to disable ECN.
+
+   A variant of falsely reporting congestion occurs when there are two
+   adversaries along a path, where the first adversary falsely reports
+   congestion, and the second adversary `erases' those reports. (Unlike
+   packet drops, ECN congestion reports can be `reversed' later in the
+   network by a malicious or broken router.  However, the use of the ECN
+   nonce could help the transport to detect this behavior.)  While this
+   would be transparent to the end node, it is possible that a
+   monitoring device between the first and second adversaries would see
+   the false indications of congestion.  Keep in mind our recommendation
+   in this document, that before `punishing' a flow for not responding
+   appropriately to congestion, the router will first switch to dropping
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 49]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   rather than marking as an indication of congestion, for that flow.
+   When this includes dropping arriving packets from that flow that have
+   the CE codepoint set, this ensures that these indications of
+   congestion are being seen by the end nodes.  Thus, there is no
+   additional harm that we are able to postulate as a result of multiple
+   conflicting adversaries.
+
+19.  Implications of Subverting End-to-End Congestion Control
+
+   This section focuses on the potential repercussions of subverting
+   end-to-end congestion control by either falsely indicating ECN-
+   Capability, or by erasing the congestion indication in ECN (the CE
+   codepoint).  Subverting end-to-end congestion control by either of
+   these two methods can have consequences both for the application and
+   for the network.  We discuss these separately below.
+
+   The first method to subvert end-to-end congestion control, that of
+   falsely indicating ECN-Capability, effectively subverts end-to-end
+   congestion control only if the packet later encounters congestion
+   that results in the setting of the CE codepoint.  In this case, the
+   transport protocol (which may not be ECN-capable) does not receive
+   the indication of congestion from these downstream congested routers.
+
+   The second method to subvert end-to-end congestion control, `erasing'
+   the CE codepoint in a packet, effectively subverts end-to-end
+   congestion control only when the CE codepoint in the packet was set
+   earlier by a congested router.  In this case, the transport protocol
+   does not receive the indication of congestion from the upstream
+   congested routers.
+
+   Either of these two methods of subverting end-to-end congestion
+   control can potentially introduce more damage to the network (and
+   possibly to the flow itself) than if the adversary had simply dropped
+   packets from that flow.  However, as we discuss later in this section
+   and in Section 7, this potential damage is limited.
+
+19.1.  Implications for the Network and for Competing Flows
+
+   The CE codepoint of the ECN field is only used by routers as an
+   indication of congestion during periods of *moderate* congestion.
+   ECN-capable routers should drop rather than mark packets during heavy
+   congestion even if the router's queue is not yet full.  For example,
+   for routers using active queue management based on RED, the router
+   should drop rather than mark packets that arrive while the average
+   queue sizes exceed the RED queue's maximum threshold.
+
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 50]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   One consequence for the network of subverting end-to-end congestion
+   control is that flows that do not receive the congestion indications
+   from the network might increase their sending rate until they drive
+   the network into heavier congestion.  Then, the congested router
+   could begin to drop rather than mark arriving packets.  For flows
+   that are not isolated by some form of per-flow scheduling or other
+   per-flow mechanisms, but are instead aggregated with other flows in a
+   single queue in an undifferentiated fashion, this packet-dropping at
+   the congested router would apply to all flows that share that queue.
+   Thus, the consequences would be to increase the level of congestion
+   in the network.
+
+   In some cases, the increase in the level of congestion will lead to a
+   substantial buffer buildup at the congested queue that will be
+   sufficient to drive the congested queue from the packet-marking to
+   the packet-dropping regime.  This transition could occur either
+   because of buffer overflow, or because of the active queue management
+   policy described above that drops packets when the average queue is
+   above RED's maximum threshold.  At this point, all flows, including
+   the subverted flow, will begin to see packet drops instead of packet
+   marks, and a malicious or broken router will no longer be able to `
+   erase' these indications of congestion in the network.  If the end
+   nodes are deploying appropriate end-to-end congestion control, then
+   the subverted flow will reduce its arrival rate in response to
+   congestion.  When the level of congestion is sufficiently reduced,
+   the congested queue can return from the packet-dropping regime to the
+   packet-marking regime.  The steady-state pattern could be one of the
+   congested queue oscillating between these two regimes.
+
+   In other cases, the consequences of subverting end-to-end congestion
+   control will not be severe enough to drive the congested link into
+   sufficiently-heavy congestion that packets are dropped instead of
+   being marked.  In this case, the implications for competing flows in
+   the network will be a slightly-increased rate of packet marking or
+   dropping, and a corresponding decrease in the bandwidth available to
+   those flows.  This can be a stable state if the arrival rate of the
+   subverted flow is sufficiently small, relative to the link bandwidth,
+   that the average queue size at the congested router remains under
+   control.  In particular, the subverted flow could have a limited
+   bandwidth demand on the link at this router, while still getting more
+   than its "fair" share of the link.  This limited demand could be due
+   to a limited demand from the data source; a limitation from the TCP
+   advertised window; a lower-bandwidth access pipe; or other factors.
+   Thus the subversion of ECN-based congestion control can still lead to
+   unfairness, which we believe is appropriate to note here.
+
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 51]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   The threat to the network posed by the subversion of ECN-based
+   congestion control in the network is essentially the same as the
+   threat posed by an end-system that intentionally fails to cooperate
+   with end-to-end congestion control.  The deployment of mechanisms in
+   routers to address this threat is an open research question, and is
+   discussed further in Section 10.
+
+   Let us take the example described in Section 18.1.1, where the CE
+   codepoint that was set in a packet is erased: {'11' -> '10' or '11'
+   -> '01'}.  The consequence for the congested upstream router that set
+   the CE codepoint is that this congestion indication does not reach
+   the end nodes for that flow. The source (even one which is completely
+   cooperative and not malicious) is thus allowed to continue to
+   increase its sending rate (if it is a TCP flow, by increasing its
+   congestion window).  The flow potentially achieves better throughput
+   than the other flows that also share the congested router, especially
+   if there are no policing mechanisms or per-flow queuing mechanisms at
+   that router.  Consider the behavior of the other flows, especially if
+   they are cooperative: that is, the flows that do not experience
+   subverted end-to-end congestion control.  They are likely to reduce
+   their load (e.g., by reducing their window size) on the congested
+   router, thus benefiting our subverted flow. This results in
+   unfairness.  As we discussed above, this unfairness could either be
+   transient (because the congested queue is driven into the packet-
+   marking regime), oscillatory (because the congested queue oscillates
+   between the packet marking and the packet dropping regime), or more
+   moderate but a persistent stable state (because the congested queue
+   is never driven to the packet dropping regime).
+
+   The results would be similar if the subverted flow was intentionally
+   avoiding end-to-end congestion control.  One difference is that a
+   flow that is intentionally avoiding end-to-end congestion control at
+   the end nodes can avoid end-to-end congestion control even when the
+   congested queue is in packet-dropping mode, by refusing to reduce its
+   sending rate in response to packet drops in the network.  Thus the
+   problems for the network from the subversion of ECN-based congestion
+   control are less severe than the problems caused by the intentional
+   avoidance of end-to-end congestion control in the end nodes.  It is
+   also the case that it is considerably more difficult to control the
+   behavior of the end nodes than it is to control the behavior of the
+   infrastructure itself.  This is not to say that the problems for the
+   network posed by the network's subversion of ECN-based congestion
+   control are small; just that they are dwarfed by the problems for the
+   network posed by the subversion of either ECN-based or other
+   currently known packet-based congestion control mechanisms by the end
+   nodes.
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 52]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+19.2.  Implications for the Subverted Flow
+
+   When a source indicates that it is ECN-capable, there is an
+   expectation that the routers in the network that are capable of
+   participating in ECN will use the CE codepoint for indication of
+   congestion. There is the potential benefit of using ECN in reducing
+   the amount of packet loss (in addition to the reduced queuing delays
+   because of active queue management policies).  When the packet flows
+   through an IPsec tunnel where the nodes that the tunneled packets
+   traverse are untrusted in some way, the expectation is that IPsec
+   will protect the flow from subversion that results in undesirable
+   consequences.
+
+   In many cases, a subverted flow will benefit from the subversion of
+   end-to-end congestion control for that flow in the network, by
+   receiving more bandwidth than it would have otherwise, relative to
+   competing non-subverted flows.  If the congested queue reaches the
+   packet-dropping stage, then the subversion of end-to-end congestion
+   control might or might not be of overall benefit to the subverted
+   flow, depending on that flow's relative tradeoffs between throughput,
+   loss, and delay.
+
+   One form of subverting end-to-end congestion control is to falsely
+   indicate ECN-capability by setting the ECT codepoint.  This has the
+   consequence of downstream congested routers setting the CE codepoint
+   in vain.  However, as described in Section 9.1.2, if an ECT codepoint
+   is changed in an IP tunnel, this can be detected at the egress point
+   of the tunnel, as long as the inner header was not changed within the
+   tunnel.
+
+   The second form of subverting end-to-end congestion control is to
+   erase the congestion indication by erasing the CE codepoint.  In this
+   case, it is the upstream congested routers that set the CE codepoint
+   in vain.
+
+   If an ECT codepoint is erased within an IP tunnel, then this can be
+   detected at the egress point of the tunnel, as long as the inner
+   header was not changed within the tunnel.  If the CE codepoint is set
+   upstream of the IP tunnel, then any erasure of the outer header's CE
+   codepoint within the tunnel will have no effect because the inner
+   header preserves the set value of the CE codepoint.  However, if the
+   CE codepoint is set within the tunnel, and erased either within or
+   downstream of the tunnel, this is not necessarily detected at the
+   egress point of the tunnel.
+
+   With this subversion of end-to-end congestion control, an end-system
+   transport does not respond to the congestion indication.  Along with
+   the increased unfairness for the non-subverted flows described in the
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 53]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   previous section, the congested router's queue could continue to
+   build, resulting in packet loss at the congested router - which is a
+   means for indicating congestion to the transport in any case.  In the
+   interim, the flow might experience higher queuing delays, possibly
+   along with an increased bandwidth relative to other non-subverted
+   flows.  But transports do not inherently make assumptions of
+   consistently experiencing carefully managed queuing in the path.  We
+   believe that these forms of subverting end-to-end congestion control
+   are no worse for the subverted flow than if the adversary had simply
+   dropped the packets of that flow itself.
+
+19.3.  Non-ECN-Based Methods of Subverting End-to-end Congestion Control
+
+   We have shown that, in many cases, a malicious or broken router that
+   is able to change the bits in the ECN field can do no more damage
+   than if it had simply dropped the packet in question.  However, this
+   is not true in all cases, in particular in the cases where the broken
+   router subverted end-to-end congestion control by either falsely
+   indicating ECN-Capability or by erasing the ECN congestion indication
+   (in the CE codepoint).  While there are many ways that a router can
+   harm a flow by dropping packets, a router cannot subvert end-to-end
+   congestion control by dropping packets.  As an example, a router
+   cannot subvert TCP congestion control by dropping data packets,
+   acknowledgement packets, or control packets.
+
+   Even though packet-dropping cannot be used to subvert end-to-end
+   congestion control, there *are* non-ECN-based methods for subverting
+   end-to-end congestion control that a broken or malicious router could
+   use.  For example, a broken router could duplicate data packets, thus
+   effectively negating the effects of end-to-end congestion control
+   along some portion of the path.  (For a router that duplicated
+   packets within an IPsec tunnel, the security administrator can cause
+   the duplicate packets to be discarded by configuring anti-replay
+   protection for the tunnel.)  This duplication of packets within the
+   network would have similar implications for the network and for the
+   subverted flow as those described in Sections 18.1.1 and 18.1.4
+   above.
+
+20.  The Motivation for the ECT Codepoints.
+
+20.1.  The Motivation for an ECT Codepoint.
+
+   The need for an ECT codepoint is motivated by the fact that ECN will
+   be deployed incrementally in an Internet where some transport
+   protocols and routers understand ECN and some do not. With an ECT
+   codepoint, the router can drop packets from flows that are not ECN-
+   capable, but can *instead* set the CE codepoint in packets that *are*
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 54]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   ECN-capable. Because an ECT codepoint allows an end node to have the
+   CE codepoint set in a packet *instead* of having the packet dropped,
+   an end node might have some incentive to deploy ECN.
+
+   If there was no ECT codepoint, then the router would have to set the
+   CE codepoint for packets from both ECN-capable and non-ECN-capable
+   flows.  In this case, there would be no incentive for end-nodes to
+   deploy ECN, and no viable path of incremental deployment from a non-
+   ECN world to an ECN-capable world.  Consider the first stages of such
+   an incremental deployment, where a subset of the flows are ECN-
+   capable.  At the onset of congestion, when the packet
+   dropping/marking rate would be low, routers would only set CE
+   codepoints, rather than dropping packets.  However, only those flows
+   that are ECN-capable would understand and respond to CE packets. The
+   result is that the ECN-capable flows would back off, and the non-
+   ECN-capable flows would be unaware of the ECN signals and would
+   continue to open their congestion windows.
+
+   In this case, there are two possible outcomes: (1) the ECN-capable
+   flows back off, the non-ECN-capable flows get all of the bandwidth,
+   and congestion remains mild, or (2) the ECN-capable flows back off,
+   the non-ECN-capable flows don't, and congestion increases until the
+   router transitions from setting the CE codepoint to dropping packets.
+   While this second outcome evens out the fairness, the ECN-capable
+   flows would still receive little benefit from being ECN-capable,
+   because the increased congestion would drive the router to packet-
+   dropping behavior.
+
+   A flow that advertised itself as ECN-Capable but does not respond to
+   CE codepoints is functionally equivalent to a flow that turns off
+   congestion control, as discussed earlier in this document.
+
+   Thus, in a world when a subset of the flows are ECN-capable, but
+   where ECN-capable flows have no mechanism for indicating that fact to
+   the routers, there would be less effective and less fair congestion
+   control in the Internet, resulting in a strong incentive for end
+   nodes not to deploy ECN.
+
+20.2.  The Motivation for two ECT Codepoints.
+
+   The primary motivation for the two ECT codepoints is to provide a
+   one-bit ECN nonce.  The ECN nonce allows the development of
+   mechanisms for the sender to probabilistically verify that network
+   elements are not erasing the CE codepoint, and that data receivers
+   are properly reporting to the sender the receipt of packets with the
+   CE codepoint set.
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 55]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   Another possibility for senders to detect misbehaving network
+   elements or receivers would be for the data sender to occasionally
+   send a data packet with the CE codepoint set, to see if the receiver
+   reports receiving the CE codepoint.  Of course, if these packets
+   encountered congestion in the network, the router might make no
+   change in the packets, because the CE codepoint would already be set.
+   Thus, for packets sent with the CE codepoint set, the TCP end-nodes
+   could not determine if some router intended to set the CE codepoint
+   in these packets.  For this reason, sending packets with the CE
+   codepoint would have to be done sparingly, and would be a less
+   effective check against misbehaving network elements and receivers
+   than would be the ECN nonce.
+
+   The assignment of the fourth ECN codepoint to ECT(1) precludes the
+   use of this codepoint for some other purposes.  For clarity, we
+   briefly list other possible purposes here.
+
+   One possibility might have been for the data sender to use the fourth
+   ECN codepoint to indicate an alternate semantics for ECN.  However,
+   this seems to us more appropriate to be signaled using a
+   differentiated services codepoint in the DS field.
+
+   A second possible use for the fourth ECN codepoint would have been to
+   give the router two separate codepoints for the indication of
+   congestion, CE(0) and CE(1), for mild and severe congestion
+   respectively.  While this could be useful in some cases, this
+   certainly does not seem a compelling requirement at this point.  If
+   there was judged to be a compelling need for this, the complications
+   of incremental deployment would most likely necessitate more that
+   just one codepoint for this function.
+
+   A third use that has been informally proposed for the ECN codepoint
+   is for use in some forms of multicast congestion control, based on
+   randomized procedures for duplicating marked packets at routers.
+   Some proposed multicast packet duplication procedures are based on a
+   new ECN codepoint that (1) conveys the fact that congestion occurred
+   upstream of the duplication point that marked the packet with this
+   codepoint and (2) can detect congestion downstream of that
+   duplication point.  ECT(1) can serve this purpose because it is both
+   distinct from ECT(0) and is replaced by CE when ECN marking occurs in
+   response to congestion or incipient congestion.  Explanation of how
+   this enhanced version of ECN would be used by multicast congestion
+   control is beyond the scope of this document, as are ECN-aware
+   multicast packet duplication procedures and the processing of the ECN
+   field at multicast receivers in all cases (i.e., irrespective of the
+   multicast packet duplication procedure(s) used).
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 56]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   The specification of IP tunnel modifications for ECN in this document
+   assumes that the only change made to the outer IP header's ECN field
+   between tunnel endpoints is to set the CE codepoint to indicate
+   congestion.  This is not consistent with some of the proposed uses of
+   ECT(1) by the multicast duplication procedures in the previous
+   paragraph, and such procedures SHOULD NOT be deployed unless this
+   inconsistency between multicast duplication procedures and IP tunnels
+   with full ECN functionality is resolved.  Limited ECN functionality
+   may be used instead, although in practice many tunnel protocols
+   (including IPsec) will not work correctly if multicast traffic
+   duplication occurs within the tunnel
+
+21.  Why use Two Bits in the IP Header?
+
+   Given the need for an ECT indication in the IP header, there still
+   remains the question of whether the ECT (ECN-Capable Transport) and
+   CE (Congestion Experienced) codepoints should have been overloaded on
+   a single bit.  This overloaded-one-bit alternative, explored in
+   [Floyd94], would have involved a single bit with two values.  One
+   value, "ECT and not CE", would represent an ECN-Capable Transport,
+   and the other value, "CE or not ECT", would represent either
+   Congestion Experienced or a non-ECN-Capable transport.
+
+   One difference between the one-bit and two-bit implementations
+   concerns packets that traverse multiple congested routers.  Consider
+   a CE packet that arrives at a second congested router, and is
+   selected by the active queue management at that router for either
+   marking or dropping.  In the one-bit implementation, the second
+   congested router has no choice but to drop the CE packet, because it
+   cannot distinguish between a CE packet and a non-ECT packet.  In the
+   two-bit implementation, the second congested router has the choice of
+   either dropping the CE packet, or of leaving it alone with the CE
+   codepoint set.
+
+   Another difference between the one-bit and two-bit implementations
+   comes from the fact that with the one-bit implementation, receivers
+   in a single flow cannot distinguish between CE and non-ECT packets.
+   Thus, in the one-bit implementation an ECN-capable data sender would
+   have to unambiguously indicate to the receiver or receivers whether
+   each packet had been sent as ECN-Capable or as non-ECN-Capable.  One
+   possibility would be for the sender to indicate in the transport
+   header whether the packet was sent as ECN-Capable.  A second
+   possibility that would involve a functional limitation for the one-
+   bit implementation would be for the sender to unambiguously indicate
+   that it was going to send *all* of its packets as ECN-Capable or as
+   non-ECN-Capable.  For a multicast transport protocol, this
+   unambiguous indication would have to be apparent to receivers joining
+   an on-going multicast session.
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 57]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   Another concern that was described earlier (and recommended in this
+   document) is that transports (particularly TCP) should not mark pure
+   ACK packets or retransmitted packets as being ECN-Capable.  A pure
+   ACK packet from a non-ECN-capable transport could be dropped, without
+   necessarily having an impact on the transport from a congestion
+   control perspective (because subsequent ACKs are cumulative).  An
+   ECN-capable transport reacting to the CE codepoint in a pure ACK
+   packet by reducing the window would be at a disadvantage in
+   comparison to a non-ECN-capable transport. For this reason (and for
+   reasons described earlier in relation to retransmitted packets), it
+   is desirable to have the ECT codepoint set on a per-packet basis.
+
+   Another advantage of the two-bit approach is that it is somewhat more
+   robust.  The most critical issue, discussed in Section 8, is that the
+   default indication should be that of a non-ECN-Capable transport.  In
+   a two-bit implementation, this requirement for the default value
+   simply means that the not-ECT codepoint should be the default.  In
+   the one-bit implementation, this means that the single overloaded bit
+   should by default be in the "CE or not ECT" position.  This is less
+   clear and straightforward, and possibly more open to incorrect
+   implementations either in the end nodes or in the routers.
+
+   In summary, while the one-bit implementation could be a possible
+   implementation, it has the following significant limitations relative
+   to the two-bit implementation.  First, the one-bit implementation has
+   more limited functionality for the treatment of CE packets at a
+   second congested router.  Second, the one-bit implementation requires
+   either that extra information be carried in the transport header of
+   packets from ECN-Capable flows (to convey the functionality of the
+   second bit elsewhere, namely in the transport header), or that
+   senders in ECN-Capable flows accept the limitation that receivers
+   must be able to determine a priori which packets are ECN-Capable and
+   which are not ECN-Capable. Third, the one-bit implementation is
+   possibly more open to errors from faulty implementations that choose
+   the wrong default value for the ECN bit.  We believe that the use of
+   the extra bit in the IP header for the ECT-bit is extremely valuable
+   to overcome these limitations.
+
+22.  Historical Definitions for the IPv4 TOS Octet
+
+   RFC 791 [RFC791] defined the ToS (Type of Service) octet in the IP
+   header.  In RFC 791, bits 6 and 7 of the ToS octet are listed as
+   "Reserved for Future Use", and are shown set to zero.  The first two
+   fields of the ToS octet were defined as the Precedence and Type of
+   Service (TOS) fields.
+
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 58]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+             0     1     2     3     4     5     6     7
+          +-----+-----+-----+-----+-----+-----+-----+-----+
+          |   PRECEDENCE    |       TOS       |  0  |  0  |  RFC 791
+          +-----+-----+-----+-----+-----+-----+-----+-----+
+
+   RFC 1122 included bits 6 and 7 in the TOS field, though it did not
+   discuss any specific use for those two bits:
+
+             0     1     2     3     4     5     6     7
+          +-----+-----+-----+-----+-----+-----+-----+-----+
+          |   PRECEDENCE    |       TOS                   |  RFC 1122
+          +-----+-----+-----+-----+-----+-----+-----+-----+
+
+   The IPv4 TOS octet was redefined in RFC 1349 [RFC1349] as follows:
+
+             0     1     2     3     4     5     6     7
+          +-----+-----+-----+-----+-----+-----+-----+-----+
+          |   PRECEDENCE    |       TOS             | MBZ |  RFC 1349
+          +-----+-----+-----+-----+-----+-----+-----+-----+
+
+   Bit 6 in the TOS field was defined in RFC 1349 for "Minimize Monetary
+   Cost".  In addition to the Precedence and Type of Service (TOS)
+   fields, the last field, MBZ (for "must be zero") was defined as
+   currently unused.  RFC 1349 stated that "The originator of a datagram
+   sets [the MBZ] field to zero (unless participating in an Internet
+   protocol experiment which makes use of that bit)."
+
+   RFC 1455 [RFC 1455] defined an experimental standard that used all
+   four bits in the TOS field to request a guaranteed level of link
+   security.
+
+   RFC 1349 and RFC 1455 have been obsoleted by "Definition of the
+   Differentiated Services Field (DS Field) in the IPv4 and IPv6
+   Headers" [RFC2474] in which bits 6 and 7 of the DS field are listed
+   as Currently Unused (CU).  RFC 2780 [RFC2780] specified ECN as an
+   experimental use of the two-bit CU field.  RFC 2780 updated the
+   definition of the DS Field to only encompass the first six bits of
+   this octet rather than all eight bits; these first six bits are
+   defined as the Differentiated Services CodePoint (DSCP):
+
+            0     1     2     3     4     5     6     7
+         +-----+-----+-----+-----+-----+-----+-----+-----+
+         |               DSCP                |    CU     |  RFCs 2474,
+         +-----+-----+-----+-----+-----+-----+-----+-----+    2780
+
+   Because of this unstable history, the definition of the ECN field in
+   this document cannot be guaranteed to be backwards compatible with
+   all past uses of these two bits.
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 59]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   Prior to RFC 2474, routers were not permitted to modify bits in
+   either the DSCP or ECN field of packets forwarded through them, and
+   hence routers that comply only with RFCs prior to 2474 should have no
+   effect on ECN.  For end nodes, bit 7 (the second ECN bit) must be
+   transmitted as zero for any implementation compliant only with RFCs
+   prior to 2474.  Such nodes may transmit bit 6 (the first ECN bit) as
+   one for the "Minimize Monetary Cost" provision of RFC 1349 or the
+   experiment authorized by RFC 1455; neither this aspect of RFC 1349
+   nor the experiment in RFC 1455 were widely implemented or used.  The
+   damage that could be done by a broken, non-conformant router would
+   include "erasing" the CE codepoint for an ECN-capable packet that
+   arrived at the router with the CE codepoint set, or setting the CE
+   codepoint even in the absence of congestion.  This has been discussed
+   in the section on "Non-compliance in the Network".
+
+   The damage that could be done in an ECN-capable environment by a
+   non-ECN-capable end-node transmitting packets with the ECT codepoint
+   set has been discussed in the section on "Non-compliance by the End
+   Nodes".
+
+23.  IANA Considerations
+
+   This section contains the namespaces that have either been created in
+   this specification, or the values assigned in existing namespaces
+   managed by IANA.
+
+23.1.  IPv4 TOS Byte and IPv6 Traffic Class Octet
+
+   The codepoints for the ECN Field of the IP header are specified by
+   the Standards Action of this RFC, as is required by RFC 2780.
+
+   When this document is published as an RFC, IANA should create a new
+   registry, "IPv4 TOS Byte and IPv6 Traffic Class Octet", with the
+   namespace as follows:
+
+   IPv4 TOS Byte and IPv6 Traffic Class Octet
+
+   Description:  The registrations are identical for IPv4 and IPv6.
+
+   Bits 0-5:  see Differentiated Services Field Codepoints Registry
+           (http://www.iana.org/assignments/dscp-registry)
+
+
+
+
+
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 60]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   Bits 6-7, ECN Field:
+
+   Binary  Keyword                                  References
+   ------  -------                                  ----------
+     00     Not-ECT (Not ECN-Capable Transport)     [RFC 3168]
+     01     ECT(1) (ECN-Capable Transport(1))       [RFC 3168]
+     10     ECT(0) (ECN-Capable Transport(0))       [RFC 3168]
+     11     CE (Congestion Experienced)             [RFC 3168]
+
+23.2.  TCP Header Flags
+
+   The codepoints for the CWR and ECE flags in the TCP header are
+   specified by the Standards Action of this RFC, as is required by RFC
+   2780.
+
+   When this document is published as an RFC, IANA should create a new
+   registry, "TCP Header Flags", with the namespace as follows:
+
+   TCP Header Flags
+
+   The Transmission Control Protocol (TCP) included a 6-bit Reserved
+   field defined in RFC 793, reserved for future use, in bytes 13 and 14
+   of the TCP header, as illustrated below.  The other six Control bits
+   are defined separately by RFC 793.
+
+     0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
+   +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
+   |               |                       | U | A | P | R | S | F |
+   | Header Length |        Reserved       | R | C | S | S | Y | I |
+   |               |                       | G | K | H | T | N | N |
+   +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
+
+   RFC 3168 defines two of the six bits from the Reserved field to be
+   used for ECN, as follows:
+
+     0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
+   +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
+   |               |               | C | E | U | A | P | R | S | F |
+   | Header Length |    Reserved   | W | C | R | C | S | S | Y | I |
+   |               |               | R | E | G | K | H | T | N | N |
+   +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
+
+
+
+
+
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 61]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+   TCP Header Flags
+
+   Bit      Name                                    Reference
+   ---      ----                                    ---------
+    8        CWR (Congestion Window Reduced)        [RFC 3168]
+    9        ECE (ECN-Echo)                         [RFC 3168]
+
+23.3. IPSEC Security Association Attributes
+
+   IANA allocated the IPSEC Security Association Attribute value 10 for
+   the ECN Tunnel use described in Section 9.2.1.2 above at the request
+   of David Black in November 1999.  The IANA has changed the Reference
+   for this allocation from David Black's request to this RFC.
+
+24.  Authors' Addresses
+
+   K. K. Ramakrishnan
+   TeraOptic Networks, Inc.
+
+   Phone: +1 (408) 666-8650
+   EMail: kk@teraoptic.com
+
+
+   Sally Floyd
+   ACIRI
+
+   Phone: +1 (510) 666-2989
+   EMail: floyd@aciri.org
+   URL: http://www.aciri.org/floyd/
+
+
+   David L. Black
+   EMC Corporation
+   42 South St.
+   Hopkinton, MA  01748
+
+   Phone:  +1 (508) 435-1000 x75140
+   EMail: black_david@emc.com
+
+
+
+
+
+
+
+
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 62]
+
+RFC 3168               The Addition of ECN to IP          September 2001
+
+
+25.  Full Copyright Statement
+
+   Copyright (C) The Internet Society (2001).  All Rights Reserved.
+
+   This document and translations of it may be copied and furnished to
+   others, and derivative works that comment on or otherwise explain it
+   or assist in its implementation may be prepared, copied, published
+   and distributed, in whole or in part, without restriction of any
+   kind, provided that the above copyright notice and this paragraph are
+   included on all such copies and derivative works.  However, this
+   document itself may not be modified in any way, such as by removing
+   the copyright notice or references to the Internet Society or other
+   Internet organizations, except as needed for the purpose of
+   developing Internet standards in which case the procedures for
+   copyrights defined in the Internet Standards process must be
+   followed, or as required to translate it into languages other than
+   English.
+
+   The limited permissions granted above are perpetual and will not be
+   revoked by the Internet Society or its successors or assigns.
+
+   This document and the information contained herein is provided on an
+   "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
+   TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
+   BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
+   HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
+   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+Acknowledgement
+
+   Funding for the RFC Editor function is currently provided by the
+   Internet Society.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Ramakrishnan, et al.        Standards Track                    [Page 63]
+
diff --git a/ext/picotcp/RFC/rfc3449.txt b/ext/picotcp/RFC/rfc3449.txt
new file mode 100644
index 0000000..46936b0
--- /dev/null
+++ b/ext/picotcp/RFC/rfc3449.txt
@@ -0,0 +1,2299 @@
+
+
+
+
+
+
+Network Working Group                                    H. Balakrishnan
+Request for Comments: 3449                                       MIT LCS
+BCP: 69                                                V. N. Padmanabhan
+Category: Best Current Practice                       Microsoft Research
+                                                            G. Fairhurst
+                                                       M. Sooriyabandara
+                                            University of Aberdeen, U.K.
+                                                           December 2002
+
+
+                     TCP Performance Implications
+                       of Network Path Asymmetry
+
+Status of this Memo
+
+   This document specifies an Internet Best Current Practices for the
+   Internet Community, and requests discussion and suggestions for
+   improvements.  Distribution of this memo is unlimited.
+
+Copyright Notice
+
+   Copyright (C) The Internet Society (2002).  All Rights Reserved.
+
+Abstract
+
+   This document describes TCP performance problems that arise because
+   of asymmetric effects.  These problems arise in several access
+   networks, including bandwidth-asymmetric networks and packet radio
+   subnetworks, for different underlying reasons.  However, the end
+   result on TCP performance is the same in both cases: performance
+   often degrades significantly because of imperfection and variability
+   in the ACK feedback from the receiver to the sender.
+
+   The document details several mitigations to these effects, which have
+   either been proposed or evaluated in the literature, or are currently
+   deployed in networks.  These solutions use a combination of local
+   link-layer techniques, subnetwork, and end-to-end mechanisms,
+   consisting of: (i) techniques to manage the channel used for the
+   upstream bottleneck link carrying the ACKs, typically using header
+   compression or reducing the frequency of TCP ACKs, (ii) techniques to
+   handle this reduced ACK frequency to retain the TCP sender's
+   acknowledgment-triggered self-clocking and (iii) techniques to
+   schedule the data and ACK packets in the reverse direction to improve
+   performance in the presence of two-way traffic.  Each technique is
+   described, together with known issues, and recommendations for use.
+   A summary of the recommendations is provided at the end of the
+   document.
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                  [Page 1]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+Table of Contents
+
+   1. Conventions used in this Document ...............................3
+     2. Motivation ....................................................4
+     2.1 Asymmetry due to Differences in Transmit
+         and Receive Capacity .........................................4
+     2.2 Asymmetry due to Shared Media in the Reverse Direction .......5
+     2.3 The General Problem ..........................................5
+   3. How does Asymmetry Degrade TCP Performance? .....................5
+     3.1 Asymmetric Capacity ..........................................5
+     3.2 MAC Protocol Interactions ....................................7
+     3.3 Bidirectional Traffic ........................................8
+     3.4 Loss in Asymmetric Network Paths ............................10
+   4. Improving TCP Performance using Host Mitigations ...............10
+     4.1 Modified Delayed ACKs .......................................11
+     4.2 Use of Large MSS ............................................12
+     4.3 ACK Congestion Control ......................................13
+     4.4 Window Prediction Mechanism .................................14
+     4.5 Acknowledgement based on Cwnd Estimation. ...................14
+     4.6 TCP Sender Pacing ...........................................14
+     4.7 TCP Byte Counting ...........................................15
+     4.8 Backpressure ................................................16
+   5. Improving TCP performance using Transparent Modifications ......17
+     5.1 TYPE 0: Header Compression ..................................18
+       5.1.1 TCP Header Compression ..................................18
+       5.1.2 Alternate Robust Header Compression Algorithms ..........19
+     5.2 TYPE 1: Reverse Link Bandwidth Management ...................19
+       5.2.1 ACK Filtering ...........................................20
+       5.2.2 ACK Decimation ..........................................21
+     5.3 TYPE 2: Handling Infrequent ACKs ............................22
+       5.3.1 ACK Reconstruction ......................................23
+       5.3.2 ACK Compaction and Companding ...........................25
+       5.3.3 Mitigating TCP packet bursts generated by
+             Infrequent ACKs .........................................26
+     5.4 TYPE 3: Upstream Link Scheduling ............................27
+       5.4.1 Per-Flow queuing at the Upstream Bottleneck Link ........27
+       5.4.2 ACKs-first Scheduling ...................................28
+   6. Security Considerations ........................................29
+   7. Summary ........................................................30
+   8. Acknowledgments ................................................32
+   9. References .....................................................32
+   10. IANA Considerations ...........................................37
+   Appendix: Examples of Subnetworks Exhibiting Network Path
+             Asymmetry ...............................................38
+   Authors' Addresses ................................................40
+   Full Copyright Statement ..........................................41
+
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                  [Page 2]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+1. Conventions used in this Document
+
+   FORWARD DIRECTION: The dominant direction of data transfer over an
+   asymmetric network path.  It corresponds to the direction with better
+   characteristics in terms of capacity, latency, error rate, etc.  Data
+   transfer in the forward direction is called "forward transfer".
+   Packets travelling in the forward direction follow the forward path
+   through the IP network.
+
+   REVERSE DIRECTION: The direction in which acknowledgments of a
+   forward TCP transfer flow.  Data transfer could also happen in this
+   direction (and is termed "reverse transfer"), but it is typically
+   less voluminous than that in the forward direction.  The reverse
+   direction typically exhibits worse characteristics than the forward
+   direction.  Packets travelling in the reverse direction follow the
+   reverse path through the IP network.
+
+   UPSTREAM LINK: The specific bottleneck link that normally has much
+   less capability than the corresponding downstream link.  Congestion
+   is not confined to this link alone, and may also occur at any point
+   along the forward and reverse directions (e.g., due to sharing with
+   other traffic flows).
+
+   DOWNSTREAM LINK: A link on the forward path, corresponding to the
+   upstream link.
+
+   ACK: A cumulative TCP acknowledgment [RFC791].  In this document,
+   this term refers to a TCP segment that carries a cumulative
+   acknowledgement (ACK), but no data.
+
+   DELAYED ACK FACTOR, d: The number of TCP data segments acknowledged
+   by a TCP ACK.  The minimum value of d is 1, since at most one ACK
+   should be sent for each data packet [RFC1122, RFC2581].
+
+   STRETCH ACK: Stretch ACKs are acknowledgements that cover more than 2
+   segments of previously unacknowledged data (d>2) [RFC2581].  Stretch
+   ACKs can occur by design (although this is not standard), due to
+   implementation bugs [All97b, RFC2525], or due to ACK loss [RFC2760].
+
+   NORMALIZED BANDWIDTH RATIO, k:  The ratio of the raw bandwidth
+   (capacity) of the forward direction to the return direction, divided
+   by the ratio of the packet sizes used in the two directions [LMS97].
+
+   SOFTSTATE: Per-flow state established in a network device that is
+   used by the protocol [Cla88].  The state expires after a period of
+   time (i.e., is not required to be explicitly deleted when a session
+
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                  [Page 3]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   expires), and is continuously refreshed while a flow continues (i.e.,
+   lost state may be reconstructed without needing to exchange
+   additional control messages).
+
+2. Motivation
+
+   Asymmetric characteristics are exhibited by several network
+   technologies, including cable data networks, (e.g., DOCSIS cable TV
+   networks [DS00, DS01]), direct broadcast satellite (e.g., an IP
+   service using Digital Video Broadcast, DVB, [EN97] with an
+   interactive return channel), Very Small Aperture satellite Terminals
+   (VSAT), Asymmetric Digital Subscriber Line (ADSL) [ITU02, ANS01], and
+   several packet radio networks.  These networks are increasingly being
+   deployed as high-speed Internet access networks, and it is therefore
+   highly desirable to achieve good TCP performance.  However, the
+   asymmetry of the network paths often makes this challenging.
+   Examples of some networks that exhibit asymmetry are provided in the
+   Appendix.
+
+   Asymmetry may manifest itself as a difference in transmit and receive
+   capacity, an imbalance in the packet loss rate, or differences
+   between the transmit and receive paths [RFC3077].  For example, when
+   capacity is asymmetric, such that there is reduced capacity on
+   reverse path used by TCP ACKs, slow or infrequent ACK feedback
+   degrades TCP performance in the forward direction.  Similarly,
+   asymmetry in the underlying Medium Access Control (MAC) and Physical
+   (PHY) protocols could make it expensive to transmit TCP ACKs
+   (disproportionately to their size), even when capacity is symmetric.
+
+2.1  Asymmetry due to Differences in Transmit and Receive Capacity
+
+   Network paths may be asymmetric because the upstream and downstream
+   links operate at different rates and/or are implemented using
+   different technologies.
+
+   The asymmetry in capacity may be substantially increased when best
+   effort IP flows carrying TCP ACKs share the available upstream
+   capacity with other traffic flows, e.g., telephony, especially flows
+   that have reserved upstream capacity.  This includes service
+   guarantees at the IP layer (e.g., the Guaranteed Service [RFC2212])
+   or at the subnet layer (e.g., support of Voice over IP [ITU01] using
+   the Unsolicited Grant service in DOCSIS [DS01], or CBR virtual
+   connections in ATM over ADSL [ITU02, ANS01]).
+
+   When multiple upstream links exist the asymmetry may be reduced by
+   dividing upstream traffic between a number of available upstream
+   links.
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                  [Page 4]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+2.2 Asymmetry due to Shared Media in the Reverse Direction
+
+   In networks employing centralized multiple access control, asymmetry
+   may be a fundamental consequence of the hub-and-spokes architecture
+   of the network (i.e., a single base node communicating with multiple
+   downstream nodes).  The central node often incurs less transmission
+   overhead and does not incur latency in scheduling its own downstream
+   transmissions.  In contrast, upstream transmission is subject to
+   additional overhead and latency (e.g., due to guard times between
+   transmission bursts, and contention intervals).  This can produce
+   significant network path asymmetry.
+
+   Upstream capacity may be further limited by the requirement that each
+   node must first request per-packet bandwidth using a contention MAC
+   protocol (e.g., DOCSIS 1.0 MAC restricts each node to sending at most
+   a single packet in each upstream time-division interval [DS00]).   A
+   satellite network employing dynamic Bandwidth on Demand (BoD), also
+   consumes MAC resources for each packet sent (e.g., [EN00]).  In these
+   schemes, the available uplink capacity is a function of the MAC
+   algorithm.  The MAC and PHY schemes also introduce overhead per
+   upstream transmission which could be so significant that transmitting
+   short packets (including TCP ACKs) becomes as costly as transmitting
+   MTU-sized data packets.
+
+2.3 The General Problem
+
+   Despite the technological differences between capacity-dependent and
+   MAC-dependent asymmetries, both kinds of network path suffer reduced
+   TCP performance for the same fundamental reason: the imperfection and
+   variability of ACK feedback.  This document discusses the problem in
+   detail and describes several techniques that may reduce or eliminate
+   the constraints.
+
+3. How does Asymmetry Degrade TCP Performance?
+
+   This section describes the implications of network path asymmetry on
+   TCP performance.  The reader is referred to [BPK99, Bal98, Pad98,
+   FSS01, Sam99] for more details and experimental results.
+
+3.1 Asymmetric Capacity
+
+   The problems that degrade unidirectional transfer performance when
+   the forward and return paths have very different capacities depend on
+   the characteristics of the upstream link.  Two types of situations
+   arise for unidirectional traffic over such network paths: when the
+   upstream bottleneck link has sufficient queuing to prevent packet
+   (ACK) losses, and when the upstream bottleneck link has a small
+   buffer.  Each is considered in turn.
+
+
+
+Balakrishnan et. al.     Best Current Practice                  [Page 5]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   If the upstream bottleneck link has deep queues, so that this does
+   not drop ACKs in the reverse direction, then performance is a strong
+   function of the normalized bandwidth ratio, k.  For example, for a 10
+   Mbps downstream link and a 50 Kbps upstream link, the raw capacity
+   ratio is 200.  With 1000-byte data packets and 40-byte ACKs, the
+   ratio of the packet sizes is 25.  This implies that k is 200/25 = 8.
+   Thus, if the receiver acknowledges more frequently than one ACK every
+   8 (k) data packets, the upstream link will become saturated before
+   the downstream link, limiting the throughput in the forward
+   direction.  Note that, the achieved TCP throughput is determined by
+   the minimum of the receiver advertised window or TCP congestion
+   window, cwnd [RFC2581].
+
+   If ACKs are not dropped (at the upstream bottleneck link) and k > 1
+   or k > 0.5 when delayed ACKs are used [RFC1122], TCP ACK-clocking
+   breaks down.  Consider two data packets transmitted by the sender in
+   quick succession.  En route to the receiver, these packets get spaced
+   apart according to the capacity of the smallest bottleneck link in
+   the forward direction.  The principle of ACK clocking is that the
+   ACKs generated in response to receiving these data packets reflects
+   this temporal spacing all the way back to the sender, enabling it to
+   transmit new data packets that maintain the same spacing [Jac88]. ACK
+   clocking with delayed ACKs, reflects the spacing between data packets
+   that actually trigger ACKs.  However, the limited upstream capacity
+   and queuing at the upstream bottleneck router alters the inter-ACK
+   spacing of the reverse path, and hence that observed at the sender.
+   When ACKs arrive at the upstream bottleneck link at a faster rate
+   than the link can support, they get queued behind one another.  The
+   spacing between them when they emerge from the link is dilated with
+   respect to their original spacing, and is a function of the upstream
+   bottleneck capacity.  Thus the TCP sender clocks out new data packets
+   at a slower rate than if there had been no queuing of ACKs.  The
+   performance of the connection is no longer dependent on the
+   downstream bottleneck link alone; instead, it is throttled by the
+   rate of arriving ACKs.  As a side effect, the sender's rate of cwnd
+   growth also slows down.
+
+   A second side effect arises when the upstream bottleneck link on the
+   reverse path is saturated.  The saturated link causes persistent
+   queuing of packets, leading to an increasing path Round Trip Time
+   (RTT) [RFC2998] observed by all end hosts using the bottleneck link.
+   This can impact the protocol control loops, and may also trigger
+   false time out (underestimation of the path RTT by the sending host).
+
+   A different situation arises when the upstream bottleneck link has a
+   relatively small amount of buffer space to accommodate ACKs.  As the
+   transmission window grows, this queue fills, and ACKs are dropped. If
+   the receiver were to acknowledge every packet, only one of every k
+
+
+
+Balakrishnan et. al.     Best Current Practice                  [Page 6]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   ACKs would get through to the sender, and the remaining (k-1) are
+   dropped due to buffer overflow at the upstream link buffer (here k is
+   the normalized bandwidth ratio as before).  In this case, the reverse
+   bottleneck link capacity and slow ACK arrival rate are not directly
+   responsible for any degraded performance.  However, the infrequency
+   of ACKs leads to three reasons for degraded performance:
+
+   1. The sender transmits data in large bursts of packets, limited only
+      by the available cwnd.  If the sender receives only one ACK in k,
+      it transmits data in bursts of k (or more) packets because each
+      ACK shifts the sliding window by at least k (acknowledged) data
+      packets (TCP data segments).  This increases the likelihood of
+      data packet loss along the forward path especially when k is
+      large, because routers do not handle large bursts of packets well.
+
+   2. Current TCP sender implementations increase their cwnd by counting
+      the number of ACKs they receive and not by how much data is
+      actually acknowledged by each ACK.  The later approach, also known
+      as byte counting (section 4.7), is a standard implementation
+      option for cwnd increase during the congestion avoidance period
+      [RFC2581].  Thus fewer ACKs imply a slower rate of growth of the
+      cwnd, which degrades performance over long-delay connections.
+
+   3. The sender TCP's Fast Retransmission and Fast Recovery algorithms
+      [RFC2581] are less effective when ACKs are lost.  The sender may
+      possibly not receive the threshold number of duplicate ACKs even
+      if the receiver transmits more than the DupACK threshold (> 3
+      DupACKs) [RFC2581].  Furthermore, the sender may possibly not
+      receive enough duplicate ACKs to adequately inflate its cwnd
+      during Fast Recovery.
+
+3.2 MAC Protocol Interactions
+
+   The interaction of TCP with MAC protocols may degrade end-to-end
+   performance.  Variable round-trip delays and ACK queuing are the main
+   symptoms of this problem.
+
+   One example is the impact on terrestrial wireless networks [Bal98]. A
+   high per-packet overhead may arise from the need for communicating
+   link nodes to first synchronise (e.g., via a Ready To Send / Clear to
+   Send (RTS/CTS) protocol) before communication and the significant
+   turn-around time for the wireless channel.  This overhead is
+   variable, since the RTS/CTS exchange may need to back-off
+   exponentially when the remote node is busy (e.g., engaged in a
+   conversation with a different node).  This leads to large and
+   variable communication latencies in packet-radio networks.
+
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                  [Page 7]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   An asymmetric workload (more downstream than upstream traffic) may
+   cause ACKs to be queued in some wireless nodes (especially in the end
+   host modems), exacerbating the variable latency.  Queuing may also
+   occur in other shared media, e.g., cable modem uplinks, BoD access
+   systems often employed on shared satellite channels.
+
+   Variable latency and ACK queuing reduces the smoothness of the TCP
+   data flow.  In particular, ACK traffic can interfere with the flow of
+   data packets, increasing the traffic load of the system.
+
+   TCP measures the path RTT, and from this calculates a smoothed RTT
+   estimate (srtt) and a linear deviation, rttvar.  These are used to
+   estimate a path retransmission timeout (RTO) [RFC2988], set to srtt +
+   4*rttvar.  For most wired TCP connections, the srtt remains constant
+   or has a low linear deviation.  The RTO therefore tracks the path
+   RTT, and the TCP sender will respond promptly when multiple losses
+   occur in a window.  In contrast, some wireless networks exhibit a
+   high variability in RTT, causing the RTO to significantly increase
+   (e.g., on the order of 10 seconds).  Paths traversing multiple
+   wireless hops are especially vulnerable to this effect, because this
+   increases the probability that the intermediate nodes may already be
+   engaged in conversation with other nodes.  The overhead in most MAC
+   schemes is a function of both the number and size of packets.
+   However, the MAC contention problem is a significant function of the
+   number of packets (e.g., ACKs) transmitted rather than their size.
+   In other words, there is a significant cost to transmitting a packet
+   regardless of packet size.
+
+   Experiments conducted on the Ricochet packet radio network in 1996
+   and 1997 demonstrated the impact of radio turnarounds and the
+   corresponding increased RTT variability, resulting in degraded TCP
+   performance.  It was not uncommon for TCP connections to experience
+   timeouts of 9 - 12 seconds, with the result that many connections
+   were idle for a significant fraction of their lifetime (e.g.,
+   sometimes 35% of the total transfer time).  This leads to under-
+   utilization of the available capacity.  These effects may also occur
+   in other wireless subnetworks.
+
+3.3 Bidirectional Traffic
+
+   Bidirectional traffic arises when there are simultaneous TCP
+   transfers in the forward and reverse directions over an asymmetric
+   network path, e.g., a user who sends an e-mail message in the reverse
+   direction while simultaneously receiving a web page in the forward
+   direction.  To simplify the discussion, only one TCP connection in
+   each direction is considered.  In many practical cases, several
+   simultaneous connections need to share the available capacity,
+   increasing the level of congestion.
+
+
+
+Balakrishnan et. al.     Best Current Practice                  [Page 8]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   Bidirectional traffic makes the effects discussed in section 3.1 more
+   pronounced, because part of the upstream link bandwidth is consumed
+   by the reverse transfer.  This effectively increases the degree of
+   bandwidth asymmetry.  Other effects also arise due to the interaction
+   between data packets of the reverse transfer and ACKs of the forward
+   transfer.  Suppose at the time the forward TCP connection is
+   initiated, the reverse TCP connection has already saturated the
+   bottleneck upstream link with data packets.  There is then a high
+   probability that many ACKs of the new forward TCP connection will
+   encounter a full upstream link buffer and hence get dropped.  Even
+   after these initial problems, ACKs of the forward connection could
+   get queued behind large data packets of the reverse connection.  The
+   larger data packets may have correspondingly long transmission times
+   (e.g., it takes about 280 ms to transmit a 1 Kbyte data packet over a
+   28.8 kbps line).  This causes the forward transfer to stall for long
+   periods of time.  It is only at times when the reverse connection
+   loses packets (due to a buffer overflow at an intermediate router)
+   and slows down, that the forward connection gets the opportunity to
+   make rapid progress and build up its cwnd.
+
+   When ACKs are queued behind other traffic for appreciable periods of
+   time, the burst nature of TCP traffic and self-synchronizing effects
+   can result in an effect known as ACK Compression [ZSC91], which
+   reduces the throughput of TCP.  It occurs when a series of ACKs, in
+   one direction are queued behind a burst of other packets (e.g., data
+   packets traveling in the same direction) and become compressed in
+   time.  This results in an intense burst of data packets in the other
+   direction, in response to the burst of compressed ACKs arriving at
+   the server.  This phenomenon has been investigated in detail for
+   bidirectional traffic, and recent analytical work [LMS97] has
+   predicted ACK Compression may also result from bi-directional
+   transmission with asymmetry, and was observed in practical asymmetric
+   satellite subnetworks [FSS01].  In the case of extreme asymmetry
+   (k>>1), the inter-ACK spacing can increase due to queuing (section
+   3.1), resulting in ACK dilation.
+
+   In summary, sharing of the upstream bottleneck link by multiple flows
+   (e.g., IP flows to the same end host, or flows to a number of end
+   hosts sharing a common upstream link) increases the level of ACK
+   Congestion.  The presence of bidirectional traffic exacerbates the
+   constraints introduced by bandwidth asymmetry because of the adverse
+   interaction between (large) data packets of a reverse direction
+   connection and the ACKs of a forward direction connection.
+
+
+
+
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                  [Page 9]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+3.4 Loss in Asymmetric Network Paths
+
+   Loss may occur in either the forward or reverse direction.  For data
+   transfer in the forward direction this results respectively in loss
+   of data packets and ACK packets.  Loss of ACKs is less significant
+   than loss of data packets, because it generally results in stretch
+   ACKs [CR98, FSS01].
+
+   In the case of long delay paths, a slow upstream link [RFC3150] can
+   lead to another complication when the end host uses TCP large windows
+   [RFC1323] to maximize throughput in the forward direction.  Loss of
+   data packets on the forward path, due to congestion, or link loss,
+   common for some wireless links, will generate a large number of
+   back-to-back duplicate ACKs (or TCP SACK packets [RFC2018]), for each
+   correctly received data packet following a loss.  The TCP sender
+   employs Fast Retransmission and Recovery [RFC2581] to recover from
+   the loss, but even if this is successful, the ACK to the
+   retransmitted data segment may be significantly delayed by other
+   duplicate ACKs still queued at the upstream link buffer.  This can
+   ultimately lead to a timeout [RFC2988] and a premature end to the TCP
+   Slow Start [RFC2581].  This results in poor forward path throughput.
+   Section 5.3 describes some mitigations to counter this.
+
+4. Improving TCP Performance using Host Mitigations
+
+   There are two key issues that need to be addressed to improve TCP
+   performance over asymmetric networks.  The first is to manage the
+   capacity of the upstream bottleneck link, used by ACKs and possibly
+   other traffic.  A number of techniques exist which work by reducing
+   the number of ACKs that flow in the reverse direction.  This has the
+   side effect of potentially destroying the desirable self-clocking
+   property of the TCP sender where transmission of new data packets is
+   triggered by incoming ACKs.  Thus, the second issue is to avoid any
+   adverse impact of infrequent ACKs.
+
+   Each of these issues can be handled by local link-layer solutions
+   and/or by end-to-end techniques.  This section discusses end-to-end
+   modifications.  Some techniques require TCP receiver changes
+   (sections 4.1 4.4, 4.5), some require TCP sender changes (sections
+   4.6, 4.7), and a pair requires changes to both the TCP sender and
+   receiver (sections 4.2, 4.3).  One technique requires a sender
+   modification at the receiving host (section 4.8).  The techniques may
+   be used independently, however some sets of techniques are
+   complementary, e.g., pacing (section 4.6) and byte counting (section
+   4.7) which have been bundled into a single TCP Sender Adaptation
+   scheme [BPK99].
+
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 10]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   It is normally envisaged that these changes would occur in the end
+   hosts using the asymmetric path, however they could, and have, been
+   used in a middle-box or Protocol Enhancing Proxy (PEP) [RFC3135]
+   employing split TCP.  This document does not discuss the issues
+   concerning PEPs.  Section 4 describes several techniques, which do
+   not require end-to-end changes.
+
+4.1 Modified Delayed ACKs
+
+   There are two standard methods that can be used by TCP receivers to
+   generate acknowledgments.  The method outlined in [RFC793] generates
+   an ACK for each incoming data segment (i.e., d=1).  [RFC1122] states
+   that hosts should use "delayed acknowledgments".  Using this
+   algorithm, an ACK is generated for at least every second full-sized
+   segment (d=2), or if a second full-sized segment does not arrive
+   within a given timeout (which must not exceed 500 ms [RFC1122],  and
+   is typically less than 200 ms).  Relaxing the latter constraint
+   (i.e., allowing d>2) may generate Stretch ACKs [RFC2760].  This
+   provides a possible mitigation, which reduces the rate at which ACKs
+   are returned by the receiver.  An implementer should only deviate
+   from this requirement after careful consideration of the implications
+   [RFC2581].
+
+   Reducing the number of ACKs per received data segment has a number of
+   undesirable effects including:
+
+   (i)    Increased path RTT
+   (ii)   Increased time for TCP to open the cwnd
+   (iii)  Increased TCP sender burst size, since cwnd opens in larger
+          steps
+
+   In addition, a TCP receiver is often unable to determine an optimum
+   setting for a large d, since it will normally be unaware of the
+   details of the properties of the links that form the path in the
+   reverse direction.
+
+   RECOMMENDATION: A TCP receiver must use the standard TCP algorithm
+   for sending ACKs as specified in [RFC2581].  That is, it may delay
+   sending an ACK after it receives a data segment [RFC1122].  When ACKs
+   are delayed, the receiver must generate an ACK within 500 ms and the
+   ACK should be generated for at least every second full sized segment
+   (MSS) of received data [RFC2581].  This will result in an ACK delay
+   factor (d) that does not exceed a value of 2.  Changing the algorithm
+   would require a host modification to the TCP receiver and awareness
+   by the receiving host that it is using a connection with an
+   asymmetric path.  Such a change has many drawbacks in the general
+   case and is currently not recommended for use within the Internet.
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 11]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+4.2 Use of Large MSS
+
+   A TCP sender that uses a large Maximum Segment Size (MSS) reduces the
+   number of ACKs generated per transmitted byte of data.
+
+   Although individual subnetworks may support a large MTU, the majority
+   of current Internet links employ an MTU of approx 1500 bytes (that of
+   Ethernet).  By setting the Don't Fragment (DF) bit in the IP header,
+   Path MTU (PMTU) discovery [RFC1191] may be used to determine the
+   maximum packet size (and hence MSS) a sender can use on a given
+   network path without being subjected to IP fragmentation, and
+   provides a way to automatically select a suitable MSS for a specific
+   path.  This also guarantees that routers will not perform IP
+   fragmentation of normal data packets.
+
+   By electing not to use PMTU Discovery, an end host may choose to use
+   IP fragmentation by routers along the path in the forward direction
+   [RFC793].  This allows an MSS larger than smallest MTU along the
+   path.  However, this increases the unit of error recovery (TCP
+   segment) above the unit of transmission (IP packet).  This is not
+   recommended, since it can increase the number of retransmitted
+   packets following loss of a single IP packet, leading to reduced
+   efficiency, and potentially aggravating network congestion [Ken87].
+   Choosing an MSS larger than the forward path minimum MTU also permits
+   the sender to transmit more initial packets (a burst of IP fragments
+   for each TCP segment) when a session starts or following RTO expiry,
+   increasing the aggressiveness of the sender compared to standard TCP
+   [RFC2581].  This can adversely impact other standard TCP sessions
+   that share a network path.
+
+   RECOMMENDATION:
+
+   A larger forward path MTU is desirable for paths with bandwidth
+   asymmetry.  Network providers may use a large MTU on links in the
+   forward direction.  TCP end hosts using Path MTU discovery may be
+   able to take advantage of a large MTU by automatically selecting an
+   appropriate larger MSS, without requiring modification.  The use of
+   Path MTU discovery [RFC1191] is therefore recommended.
+
+   Increasing the unit of error recovery and congestion control (MSS)
+   above the unit of transmission and congestion loss (the IP packet) by
+   using a larger end host MSS and IP fragmentation in routers is not
+   recommended.
+
+
+
+
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 12]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+4.3 ACK Congestion Control
+
+   ACK Congestion Control (ACC) is an experimental technique that
+   operates end to end.  ACC extends congestion control to ACKs, since
+   they may make non-negligible demands on resources (e.g., packet
+   buffers, and MAC transmission overhead) at an upstream bottleneck
+   link.  It has two parts: (a) a network mechanism indicating to the
+   receiver that the ACK path is congested, and (b) the receiver's
+   response to such an indication.
+
+   A router feeding an upstream bottleneck link may detect incipient
+   congestion, e.g., using an algorithm based on RED (Random Early
+   Detection) [FJ93].  This may track the average queue size over a time
+   window in the recent past.  If the average exceeds a threshold, the
+   router may select a packet at random.  If the packet IP header has
+   the Explicit Congestion Notification Capable Transport (ECT) bit set,
+   the router may mark the packet, i.e., sets an Explicit Congestion
+   Notification (ECN) [RFC3168] bit(s) in the IP header, otherwise the
+   packet is normally dropped.  The ECN notification received by the end
+   host is reflected back to the sending TCP end host, to trigger
+   congestion avoidance [RFC3168].  Note that routers implementing RED
+   with ECN, do not eliminate packet loss, and may drop a packet (even
+   when the ECT bit is set).  It is also possible to use an algorithm
+   other than RED to decide when to set the ECN bit.
+
+   ACC extends ECN so that both TCP data packets and ACKs set the ECT
+   bit and are thus candidates for being marked with an ECN bit.
+   Therefore, upon receiving an ACK with the ECN bit set [RFC3168], a
+   TCP receiver reduces the rate at which it sends ACKs.  It maintains a
+   dynamically varying delayed-ACK factor, d, and sends one ACK for
+   every d data packets received.  When it receives a packet with the
+   ECN bit set, it increases d multiplicatively, thereby
+   multiplicatively decreasing the frequency of ACKs.  For each
+   subsequent RTT (e.g., determined using the TCP RTTM option [RFC1323])
+   during which it does not receive an ECN, it linearly decreases the
+   factor d, increasing the frequency of ACKs.  Thus, the receiver
+   mimics the standard congestion control behavior of TCP senders in the
+   manner in which it sends ACKs.
+
+   The maximum value of d is determined by the TCP sender window size,
+   which could be conveyed to the receiver in a new (experimental) TCP
+   option.  The receiver should send at least one ACK (preferably more)
+   for each window of data from the sender (i.e., d < (cwnd/mss)) to
+   prevent the sender from stalling until the receiver's delayed ACK
+   timer triggers an ACK to be sent.
+
+
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 13]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   RECOMMENDATION: ACK Congestion Control (ACC) is an experimental
+   technique that requires TCP sender and receiver modifications.  There
+   is currently little experience of using such techniques in the
+   Internet.  Future versions of TCP may evolve to include this or
+   similar techniques.  These are the subject of ongoing research.  ACC
+   is not recommended for use within the Internet in its current form.
+
+4.4 Window Prediction Mechanism
+
+   The Window Prediction Mechanism (WPM) is a TCP receiver side
+   mechanism [CLP98] that uses a dynamic ACK delay factor (varying d)
+   resembling the ACC scheme (section 4.3).  The TCP receiver
+   reconstructs the congestion control behavior of the TCP sender by
+   predicting a cwnd value.  This value is used along with the allowed
+   window to adjust the receiver's value of d.  WPM accommodates for
+   unnecessary retransmissions resulting from losses due to link errors.
+
+   RECOMMENDATION: Window Prediction Mechanism (WPM) is an experimental
+   TCP receiver side modification.  There is currently little experience
+   of using such techniques in the Internet.  Future versions of TCP may
+   evolve to include this or similar techniques.  These are the subjects
+   of ongoing research.  WPM is not recommended for use within the
+   Internet in its current form.
+
+4.5 Acknowledgement based on Cwnd Estimation.
+
+   Acknowledgement based on Cwnd Estimation (ACE) [MJW00] attempts to
+   measure the cwnd at the TCP receiver and maintain a varying ACK delay
+   factor (d).  The cwnd is estimated by counting the number of packets
+   received during a path RTT.  The technique may improve accuracy of
+   prediction of a suitable cwnd.
+
+   RECOMMENDATION: Acknowledgement based on Cwnd Estimation (ACE) is an
+   experimental TCP receiver side modification.  There is currently
+   little experience of using such techniques in the Internet.  Future
+   versions of TCP may evolve to include this or similar techniques.
+   These are the subject of ongoing research.  ACE is not recommended
+   for use within the Internet in its current form.
+
+4.6 TCP Sender Pacing
+
+   Reducing the frequency of ACKs may alleviate congestion of the
+   upstream bottleneck link, but can lead to increased size of TCP
+   sender bursts (section 4.1).  This may slow the growth of cwnd, and
+   is undesirable when used over shared network paths since it may
+   significantly increase the maximum number of packets in the
+   bottleneck link buffer, potentially resulting in an increase in
+   network congestion.  This may also lead to ACK Compression [ZSC91].
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 14]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   TCP Pacing [AST00], generally referred to as TCP Sender pacing,
+   employs an adapted TCP sender to alleviating transmission burstiness.
+   A bound is placed on the maximum number of packets the TCP sender can
+   transmit back-to-back (at local line rate), even if the window(s)
+   allow the transmission of more data.  If necessary, more bursts of
+   data packets are scheduled for later points in time computed based on
+   the transmission rate of the TCP connection.  The transmission rate
+   may be estimated from the ratio cwnd/srtt.  Thus, large bursts of
+   data packets get broken up into smaller bursts spread over time.
+
+   A subnetwork may also provide pacing (e.g., Generic Traffic Shaping
+   (GTS)), but implies a significant increase in the per-packet
+   processing overhead and buffer requirement at the router where
+   shaping is performed (section 5.3.3).
+
+   RECOMMENDATIONS: TCP Sender Pacing requires a change to
+   implementation of the TCP sender.  It may be beneficial in the
+   Internet and will significantly reduce the burst size of packets
+   transmitted by a host.  This successfully mitigates the impact of
+   receiving Stretch ACKs.  TCP Sender Pacing implies increased
+   processing cost per packet, and requires a prediction algorithm to
+   suggest a suitable transmission rate.  There are hence performance
+   trade-offs between end host cost and network performance.
+   Specification of efficient algorithms remains an area of ongoing
+   research.  Use of TCP Sender Pacing is not expected to introduce new
+   problems.  It is an experimental mitigation for TCP hosts that may
+   control the burstiness of transmission (e.g., resulting from Type 1
+   techniques, section 5.1.2), however it is not currently widely
+   deployed.  It is not recommended for use within the Internet in its
+   current form.
+
+4.7 TCP Byte Counting
+
+   The TCP sender can avoid slowing growth of cwnd by taking into
+   account the volume of data acknowledged by each ACK, rather than
+   opening the cwnd based on the number of received ACKs.  So, if an ACK
+   acknowledges d data packets (or TCP data segments), the cwnd would
+   grow as if d separate ACKs had been received.  This is called TCP
+   Byte Counting [RFC2581, RFC2760].  (One could treat the single ACK as
+   being equivalent to d/2, instead of d ACKs, to mimic the effect of
+   the TCP delayed ACK algorithm.)  This policy works because cwnd
+   growth is only tied to the available capacity in the forward
+   direction, so the number of ACKs is immaterial.
+
+   This may mitigate the impact of asymmetry when used in combination
+   with other techniques (e.g., a combination of TCP Pacing
+   (section4.6), and ACC (section 4.3) associated with a duplicate ACK
+   threshold at the receiver.)
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 15]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   The main issue is that TCP byte counting may generate undesirable
+   long bursts of TCP packets at the sender host line rate.  An
+   implementation must also consider that data packets in the forward
+   direction and ACKs in the reverse direction may both travel over
+   network paths that perform some amount of packet reordering.
+   Reordering of IP packets is currently common, and may arise from
+   various causes [BPS00].
+
+   RECOMMENDATION: TCP Byte Counting requires a small TCP sender
+   modification.  In its simplest form, it can generate large bursts of
+   TCP data packets, particularly when Stretch ACKs are received.
+   Unlimited byte counting is therefore not allowed [RFC2581] for use
+   within the Internet.
+
+   It is therefore strongly recommended [RFC2581, RFC2760] that any byte
+   counting scheme should include a method to mitigate the potentially
+   large bursts of TCP data packets the algorithm can cause (e.g., TCP
+   Sender Pacing (section 4.6), ABC [abc-ID]).  If the burst size or
+   sending rate of the TCP sender can be controlled then the scheme may
+   be beneficial when Stretch ACKs are received.  Determining safe
+   algorithms remain an area of ongoing research.  Further
+   experimentation will then be required to assess the success of these
+   safeguards, before they can be recommended for use in the Internet.
+
+4.8 Backpressure
+
+   Backpressure is a technique to enhance the performance of
+   bidirectional traffic for end hosts directly connected to the
+   upstream bottleneck link [KVR98].  A limit is set on how many data
+   packets of upstream transfers can be enqueued at the upstream
+   bottleneck link.  In other words, the bottleneck link queue exerts
+   'backpressure' on the TCP (sender) layer.  This requires a modified
+   implementation, compared to that currently deployed in many TCP
+   stacks.  Backpressure ensures that ACKs of downstream connections do
+   not get starved at the upstream bottleneck, thereby improving
+   performance of the downstream connections.  Similar generic schemes
+   that may be implemented in hosts/routers are discussed in section
+   5.4.
+
+   Backpressure can be unfair to a reverse direction connection and make
+   its throughput highly sensitive to the dynamics of the forward
+   connection(s).
+
+   RECOMMENDATION: Backpressure requires an experimental modification to
+   the sender protocol stack of a host directly connected to an upstream
+   bottleneck link.  Use of backpressure is an implementation issue,
+   rather than a network protocol issue.  Where backpressure is
+   implemented, the optimizations described in this section could be
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 16]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   desirable and can benefit bidirectional traffic for hosts.
+   Specification of safe algorithms for providing backpressure is still
+   a subject of ongoing research.  The technique is not recommended for
+   use within the Internet in its current form.
+
+5. Improving TCP performance using Transparent Modifications
+
+   Various link and network layer techniques have been suggested to
+   mitigate the effect of an upstream bottleneck link.  These techniques
+   may provide benefit without modification to either the TCP sender or
+   receiver, or may alternately be used in conjunction with one or more
+   of the schemes identified in section 4.  In this document, these
+   techniques are known as "transparent" [RFC3135], because at the
+   transport layer, the TCP sender and receiver are not necessarily
+   aware of their existence.  This does not imply that they do not
+   modify the pattern and timing of packets as observed at the network
+   layer.  The techniques are classified here into three types based on
+   the point at which they are introduced.
+
+   Most techniques require the individual TCP connections passing over
+   the bottleneck link(s) to be separately identified and imply that
+   some per-flow state is maintained for active TCP connections.  A link
+   scheduler may also be employed (section 5.4).  The techniques (with
+   one exception, ACK Decimation (section 5.2.2) require:
+
+   (i)   Visibility of an unencrypted IP and TCP packet header (e.g., no
+         use of IPSec with payload encryption [RFC2406]).
+   (ii)  Knowledge of IP/TCP options and ability to inspect packets with
+         tunnel encapsulations (e.g., [RFC2784]) or to suspend
+         processing of packets with unknown formats.
+   (iii) Ability to demultiplex flows (by using address/protocol/port
+         number, or an explicit flow-id).
+
+   [RFC3135] describes a class of network device that provides more than
+   forwarding of packets, and which is known as a Protocol Enhancing
+   Proxy (PEP).  A large spectrum of PEP devices exists, ranging from
+   simple devices (e.g., ACK filtering) to more sophisticated devices
+   (e.g., stateful devices that split a TCP connection into two separate
+   parts).  The techniques described in section 5 of this document
+   belong to the simpler type, and do not inspect or modify any TCP or
+   UDP payload data.  They also do not modify port numbers or link
+   addresses.  Many of the risks associated with more complex PEPs do
+   not exist for these schemes.  Further information about the operation
+   and the risks associated with using PEPs are described in [RFC3135].
+
+
+
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 17]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+5.1 TYPE 0: Header Compression
+
+   A client may reduce the volume of bits used to send a single ACK by
+   using compression [RFC3150, RFC3135].  Most modern dial-up modems
+   support ITU-T V.42 bulk compression.  In contrast to bulk
+   compression, header compression is known to be very effective at
+   reducing the number of bits sent on the upstream link [RFC1144]. This
+   relies on the observation that most TCP packet headers vary only in a
+   few bit positions between successive packets in a flow, and that the
+   variations can often be predicted.
+
+5.1.1 TCP Header Compression
+
+   TCP header compression [RFC1144] (sometimes known as V-J compression)
+   is a Proposed Standard describing use over low capacity links running
+   SLIP or PPP [RFC3150].  It greatly reduces the size of ACKs on the
+   reverse link when losses are infrequent (a situation that ensures
+   that the state of the compressor and decompressor are synchronized).
+   However, this alone does not address all of the asymmetry issues:
+
+   (i)   In some (e.g., wireless) subnetworks there is a significant
+         per-packet MAC overhead that is independent of packet size
+         (section 3.2).
+   (ii)  A reduction in the size of ACKs does not prevent adverse
+         interaction with large upstream data packets in the presence
+         of bidirectional traffic (section 3.3).
+   (iii) TCP header compression cannot be used with packets that have
+         IP or TCP options (including IPSec [RFC2402, RFC2406], TCP
+         RTTM [RFC1323], TCP SACK [RFC2018], etc.).
+   (iv)  The performance of header compression described by RFC1144 is
+         significantly degraded when compressed packets are lost.  An
+         improvement, which can still incur significant penalty on
+         long network paths is described in [RFC2507].  This suggests
+         it should only be used on links (or paths) that experience a
+         low level of packet loss [RFC3150].
+   (v)   The normal implementation of Header Compression inhibits
+         compression when IP is used to support tunneling (e.g., L2TP,
+         GRE [RFC2794], IP-in-IP).  The tunnel encapsulation
+         complicates locating the appropriate packet headers.  Although
+         GRE allows Header Compression on the inner (tunneled) IP
+         header [RFC2784], this is not recommended, since loss of a
+         packet (e.g., due to router congestion along the tunnel path)
+         will result in discard of all packets for one RTT [RFC1144].
+
+   RECOMMENDATION: TCP Header Compression is a transparent modification
+   performed at both ends of the upstream bottleneck link.  It offers no
+   benefit for flows employing IPSec [RFC2402, RFC2406], or when
+   additional protocol headers are present (e.g., IP or TCP options,
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 18]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   and/or tunnel encapsulation headers).  The scheme is widely
+   implemented and deployed and used over Internet links.  It is
+   recommended to improve TCP performance for paths that have a low-to-
+   medium bandwidth asymmetry (e.g., k<10).
+
+   In the form described in [RFC1144], TCP performance is degraded when
+   used over links (or paths) that may exhibit appreciable rates of
+   packet loss [RFC3150].  It may also not provide significant
+   improvement for upstream links with bidirectional traffic.  It is
+   therefore not desirable for paths that have a high bandwidth
+   asymmetry (e.g., k>10).
+
+5.1.2 Alternate Robust Header Compression Algorithms
+
+   TCP header compression [RFC1144] and IP header compression [RFC2507]
+   do not perform well when subject to packet loss.  Further, they do
+   not compress packets with TCP option fields (e.g., SACK [RFC2018] and
+   Timestamp (RTTM) [RFC1323]).  However, recent work on more robust
+   schemes suggest that a new generation of compression algorithms may
+   be developed which are much more robust.  The IETF ROHC working group
+   has specified compression techniques for UDP-based traffic [RFC3095]
+   and is examining a number of schemes that may provide improve TCP
+   header compression.  These could be beneficial for asymmetric network
+   paths.
+
+   RECOMMENDATION: Robust header compression is a transparent
+   modification that may be performed at both ends of an upstream
+   bottleneck link.  This class of techniques may also be suited to
+   Internet paths that suffer low levels of re-ordering.  The techniques
+   benefit paths with a low-to-medium bandwidth asymmetry (e.g., k>10)
+   and may be robust to packet loss.
+
+   Selection of suitable compression algorithms remains an area of
+   ongoing research.  It is possible that schemes may be derived which
+   support IPSec authentication, but not IPSec payload encryption. Such
+   schemes do not alone provide significant improvement in asymmetric
+   networks with a high asymmetry and/or bidirectional traffic.
+
+5.2 TYPE 1: Reverse Link Bandwidth Management
+
+   Techniques beyond Type 0 header compression are required to address
+   the performance problems caused by appreciable asymmetry (k>>1). One
+   set of techniques is implemented only at one point on the reverse
+   direction path, within the router/host connected to the upstream
+   bottleneck link.  These use flow class or per-flow queues at the
+   upstream link interface to manage the queue of packets waiting for
+   transmission on the bottleneck upstream link.
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 19]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   This type of technique bounds the upstream link buffer queue size,
+   and employs an algorithm to remove (discard) excess ACKs from each
+   queue.  This relies on the cumulative nature of ACKs (section 4.1).
+   Two approaches are described which employ this type of mitigation.
+
+5.2.1 ACK Filtering
+
+   ACK Filtering (AF) [DMT96, BPK99] (also known as ACK Suppression
+   [SF98, Sam99, FSS01]) is a TCP-aware link-layer technique that
+   reduces the number of ACKs sent on the upstream link.  This technique
+   has been deployed in specific production networks (e.g., asymmetric
+   satellite networks [ASB96]).  The challenge is to ensure that the
+   sender does not stall waiting for ACKs, which may happen if ACKs are
+   indiscriminately removed.
+
+   When an ACK from the receiver is about to be enqueued at a upstream
+   bottleneck link interface, the router or the end host link layer (if
+   the host is directly connected to the upstream bottleneck link)
+   checks the transmit queue(s) for older ACKs belonging to the same TCP
+   connection.  If ACKs are found, some (or all of them) are removed
+   from the queue, reducing the number of ACKs.
+
+   Some ACKs also have other functions in TCP [RFC1144], and should not
+   be deleted to ensure normal operation.  AF should therefore not
+   delete an ACK that has any data or TCP flags set (SYN, RST, URG, and
+   FIN).  In addition, it should avoid deleting a series of 3 duplicate
+   ACKs that indicate the need for Fast Retransmission [RFC2581] or ACKs
+   with the Selective ACK option (SACK)[RFC2018] from the queue to avoid
+   causing problems to TCP's data-driven loss recovery mechanisms.
+   Appropriate treatment is also needed to preserve correct operation of
+   ECN feedback (carried in the TCP header) [RFC3168].
+
+   A range of policies to filter ACKs may be used.  These may be either
+   deterministic or random (similar to a random-drop gateway, but should
+   take into consideration the semantics of the items in the queue).
+   Algorithms have also been suggested to ensure a minimum ACK rate to
+   guarantee the TCP sender window is updated [Sam99, FSS01], and to
+   limit the number of data packets (TCP segments) acknowledged by a
+   Stretch ACK.  Per-flow state needs to be maintained only for
+   connections with at least one packet in the queue (similar to FRED
+   [LM97]).  This state is soft [Cla88], and if necessary, can easily be
+   reconstructed from the contents of the queue.
+
+   The undesirable effect of delayed DupACKs (section 3.4) can be
+   reduced by deleting duplicate ACKs above a threshold value [MJW00,
+   CLP98] allowing Fast Retransmission, but avoiding early TCP timeouts,
+   which may otherwise result from excessive queuing of DupACKs.
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 20]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   Future schemes may include more advanced rules allowing removal of
+   selected SACKs [RFC2018].  Such a scheme could prevent the upstream
+   link queue from becoming filled by back-to-back ACKs with SACK
+   blocks.  Since a SACK packet is much larger than an ACK, it would
+   otherwise add significantly to the path delay in the reverse
+   direction.  Selection of suitable algorithms remains an ongoing area
+   of research.
+
+   RECOMMENDATION: ACK Filtering requires a modification to the upstream
+   link interface.  The scheme has been deployed in some networks where
+   the extra processing overhead (per ACK) may be compensated for by
+   avoiding the need to modify TCP.  ACK Filtering can generate Stretch
+   ACKs resulting in large bursts of TCP data packets.  Therefore on its
+   own, it is not recommended for use in the general Internet.
+
+   ACK Filtering when used in combination with a scheme to mitigate the
+   effect of Stretch ACKs (i.e., control TCP sender burst size) is
+   recommended for paths with appreciable asymmetry (k>1) and/or with
+   bidirectional traffic.  Suitable algorithms to support IPSec
+   authentication, SACK, and ECN remain areas of ongoing research.
+
+5.2.2 ACK Decimation
+
+   ACK Decimation is based on standard router mechanisms.  By using an
+   appropriate configuration of (small) per-flow queues and a chosen
+   dropping policy (e.g., Weighted Fair Queuing, WFQ) at the upstream
+   bottleneck link, a similar effect to AF (section 5.2.1) may be
+   obtained, but with less control of the actual packets which are
+   dropped.
+
+   In this scheme, the router/host at the bottleneck upstream link
+   maintains per-flow queues and services them fairly (or with
+   priorities) by queuing and scheduling of ACKs and data packets in the
+   reverse direction.  A small queue threshold is maintained to drop
+   excessive ACKs from the tail of each queue, in order to reduce ACK
+   Congestion.  The inability to identify special ACK packets (c.f., AF)
+   introduces some major drawbacks to this approach, such as the
+   possibility of losing DupACKs, FIN/ACK, RST packets, or packets
+   carrying ECN information [RFC3168].  Loss of these packets does not
+   significantly impact network congestion, but does adversely impact
+   the performance of the TCP session observing the loss.
+
+   A WFQ scheduler may assign a higher priority to interactive traffic
+   (providing it has a mechanism to identify such traffic) and provide a
+   fair share of the remaining capacity to the bulk traffic.  In the
+   presence of bidirectional traffic, and with a suitable scheduling
+   policy, this may ensure fairer sharing for ACK and data packets.  An
+   increased forward transmission rate is achieved over asymmetric links
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 21]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   by an increased ACK Decimation rate, leading to generation of Stretch
+   ACKs.  As in AF, TCP sender burst size increases when Stretch ACKs
+   are received unless other techniques are used in combination with
+   this technique.
+
+   This technique has been deployed in specific networks (e.g., a
+   network with high bandwidth asymmetry supporting high-speed data
+   services to in-transit mobile hosts [Seg00]).  Although not optimal,
+   it offered a potential mitigation applicable when the TCP header is
+   difficult to identify or not visible to the link layer (e.g., due to
+   IPSec encryption).
+
+   RECOMMENDATION: ACK Decimation uses standard router mechanisms at the
+   upstream link interface to constrain the rate at which ACKs are fed
+   to the upstream link.  The technique is beneficial with paths having
+   appreciable asymmetry (k>1).  It is however suboptimal, in that it
+   may lead to inefficient TCP error recovery (and hence in some cases
+   degraded TCP performance), and provides only crude control of link
+   behavior.  It is therefore recommended that where possible, ACK
+   Filtering should be used in preference to ACK Decimation.
+
+   When ACK Decimation is used on paths with an appreciable asymmetry
+   (k>1) (or with bidirectional traffic) it increases the burst size of
+   the TCP sender, use of a scheme to mitigate the effect of Stretch
+   ACKs or control burstiness is therefore strongly recommended.
+
+5.3 TYPE 2: Handling Infrequent ACKs
+
+   TYPE 2 mitigations perform TYPE 1 upstream link bandwidth management,
+   but also employ a second active element which mitigates the effect of
+   the reduced ACK rate and burstiness of ACK transmission.  This is
+   desirable when end hosts use standard TCP sender implementations
+   (e.g., those not implementing the techniques in sections 4.6, 4.7).
+
+   Consider a path where a TYPE 1 scheme forwards a Stretch ACK covering
+   d TCP packets (i.e., where the acknowledgement number is d*MSS larger
+   than the last ACK received by the TCP sender).  When the TCP sender
+   receives this ACK, it can send a burst of d (or d+1) TCP data
+   packets.  The sender is also constrained by the current cwnd.
+   Received ACKs also serve to increase cwnd (by at most one MSS).
+
+   A TYPE 2 scheme mitigates the impact of the reduced ACK frequency
+   resulting when a TYPE 1 scheme is used.  This is achieved by
+   interspersing additional ACKs before each received Stretch ACK.  The
+   additional ACKs, together with the original ACK, provide the TCP
+   sender with sufficient ACKs to allow the TCP cwnd to open in the same
+   way as if each of the original ACKs sent by the TCP receiver had been
+   forwarded by the reverse path.  In addition, by attempting to restore
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 22]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   the spacing between ACKs, such a scheme can also restore the TCP
+   self-clocking behavior, and reduce the TCP sender burst size.  Such
+   schemes need to ensure conservative behavior (i.e., should not
+   introduce more ACKs than were originally sent) and reduce the
+   probability of ACK Compression [ZSC91].
+
+   The action is performed at two points on the return path: the
+   upstream link interface (where excess ACKs are removed), and a point
+   further along the reverse path (after the bottleneck upstream
+   link(s)), where replacement ACKs are inserted.  This attempts to
+   reconstruct the ACK stream sent by the TCP receiver when used in
+   combination with AF (section 5.2.1), or ACK Decimation (section
+   5.2.2).
+
+   TYPE 2 mitigations may be performed locally at the receive interface
+   directly following the upstream bottleneck link, or may alternatively
+   be applied at any point further along the reverse path (this is not
+   necessarily on the forward path, since asymmetric routing may employ
+   different forward and reverse internet paths).  Since the techniques
+   may generate multiple ACKs upon reception of each individual Stretch
+   ACK, it is strongly recommended that the expander implements a scheme
+   to prevent exploitation as a "packet amplifier" in a Denial-of-
+   Service (DoS) attack (e.g., to verify the originator of the ACK).
+   Identification of the sender could be accomplished by appropriately
+   configured packet filters and/or by tunnel authentication procedures
+   (e.g., [RFC2402, RFC2406]).  A limit on the number of reconstructed
+   ACKs that may be generated from a single packet may also be
+   desirable.
+
+5.3.1 ACK Reconstruction
+
+   ACK Reconstruction (AR) [BPK99] is used in conjunction with AF
+   (section 5.2.1).  AR deploys a soft-state [Cla88] agent called an ACK
+   Reconstructor on the reverse path following the upstream bottleneck
+   link.  The soft-state can be regenerated if lost, based on received
+   ACKs.  When a Stretch ACK is received, AR introduces additional ACKs
+   by filling gaps in the ACK sequence.  Some potential Denial-of-
+   Service vulnerabilities may arise (section 6) and need to be
+   addressed by appropriate security techniques.
+
+   The Reconstructor determines the number of additional ACKs, by
+   estimating the number of filtered ACKs.  This uses implicit
+   information present in the received ACK stream by observing the ACK
+   sequence number of each received ACK.  An example implementation
+   could set an ACK threshold, ackthresh, to twice the MSS (this assumes
+   the chosen MSS is known by the link).  The factor of two corresponds
+
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 23]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   to standard TCP delayed-ACK policy (d=2).  Thus, if successive ACKs
+   arrive separated by delta, the Reconstructor regenerates a maximum of
+   ((delta/ackthresh) - 2) ACKs.
+
+   To reduce the TCP sender burst size and allow the cwnd to increase at
+   a rate governed by the downstream link, the reconstructed ACKs must
+   be sent at a consistent rate (i.e., temporal spacing between
+   reconstructed ACKs).  One method is for the Reconstructor to measure
+   the arrival rate of ACKs using an exponentially weighted moving
+   average estimator.  This rate depends on the output rate from the
+   upstream link and on the presence of other traffic sharing the link.
+   The output of the estimator indicates the average temporal spacing
+   for the ACKs (and the average rate at which ACKs would reach the TCP
+   sender if there were no further losses or delays).  This may be used
+   by the Reconstructor to set the temporal spacing of reconstructed
+   ACKs.  The scheme may also be used in combination with TCP sender
+   adaptation (e.g., a combination of the techniques in sections 4.6 and
+   4.7).
+
+   The trade-off in AR is between obtaining less TCP sender burstiness,
+   and a better rate of cwnd increase, with a reduction in RTT
+   variation, versus a modest increase in the path RTT.  The technique
+   cannot perform reconstruction on connections using IPSec (AH
+   [RFC2402] or ESP [RFC2406]), since it is unable to generate
+   appropriate security information.  It also cannot regenerate other
+   packet header information (e.g., the exact pattern of bits carried in
+   the IP packet ECN field [RFC3168] or the TCP RTTM option [RFC1323]).
+
+   An ACK Reconstructor operates correctly (i.e., generates no spurious
+   ACKs and preserves the end-to-end semantics of TCP), providing:
+
+   (i)   the TCP receiver uses ACK Delay (d=2) [RFC2581]
+   (ii)  the Reconstructor receives only in-order ACKs
+   (iii) all ACKs are routed via the Reconstructor
+   (iv)  the Reconstructor correctly determines the TCP MSS used by
+         the session
+   (v)   the packets do not carry additional header information (e.g.,
+         TCP RTTM option [RFC1323], IPSec using AH [RFC2402]or ESP
+         [RFC2406]).
+
+   RECOMMENDATION: ACK Reconstruction is an experimental transparent
+   modification performed on the reverse path following the upstream
+   bottleneck link.  It is designed to be used in conjunction with a
+   TYPE 1 mitigation.  It reduces the burst size of TCP transmission in
+   the forward direction, which may otherwise increase when TYPE 1
+   schemes are used alone.  It requires modification of equipment after
+   the upstream link (including maintaining per-flow soft state).  The
+   scheme introduces implicit assumptions about the network path and has
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 24]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   potential Denial-of-Service vulnerabilities (i.e., acting as a packet
+   amplifier); these need to be better understood and addressed by
+   appropriate security techniques.
+
+   Selection of appropriate algorithms to pace the ACK traffic remains
+   an open research issue.  There is also currently little experience of
+   the implications of using such techniques in the Internet, and
+   therefore it is recommended that this technique should not be used
+   within the Internet in its current form.
+
+5.3.2 ACK Compaction and Companding
+
+   ACK Compaction and ACK Companding [SAM99, FSS01] are techniques that
+   operate at a point on the reverse path following the constrained ACK
+   bottleneck.  Like AR (section 5.3.1), ACK Compaction and ACK
+   Companding are both used in conjunction with an AF technique (section
+   5.2.1) and regenerate filtered ACKs, restoring the ACK stream.
+   However, they differ from AR in that they use a modified AF (known as
+   a compactor or compressor), in which explicit information is added to
+   all Stretch ACKs generated by the AF.  This is used to explicitly
+   synchronize the reconstruction operation (referred to here as
+   expansion).
+
+   The modified AF combines two modifications:  First, when the
+   compressor deletes an ACK from the upstream bottleneck link queue, it
+   appends explicit information (a prefix) to the remaining ACK (this
+   ACK is marked to ensure it is not subsequently deleted).  The
+   additional information contains details the conditions under which
+   ACKs were previously filtered.  A variety of information may be
+   encoded in the prefix.  This includes the number of ACKs deleted by
+   the AF and the average number of bytes acknowledged.  This may
+   subsequently be used by an expander at the remote end of the tunnel.
+   Further timing information may also be added to control the pacing of
+   the regenerated ACKs [FSS01].  The temporal spacing of the filtered
+   ACKs may also be encoded.
+
+   To encode the prefix requires the subsequent expander to recognize a
+   modified ACK header.  This would normally limit the expander to
+   link-local operation (at the receive interface of the upstream
+   bottleneck link).  If remote expansion is needed further along the
+   reverse path, a tunnel may be used to pass the modified ACKs to the
+   remote expander.  The tunnel introduces extra overhead, however
+   networks with asymmetric capacity and symmetric routing frequently
+   already employ such tunnels (e.g., in a UDLR network [RFC3077], the
+   expander may be co-located with the feed router).
+
+
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 25]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   ACK expansion uses a stateless algorithm to expand the ACK (i.e.,
+   each received packet is processed independently of previously
+   received packets).  It uses the prefix information together with the
+   acknowledgment field in the received ACK, to produce an equivalent
+   number of ACKs to those previously deleted by the compactor.  These
+   ACKs are forwarded to the original destination (i.e., the TCP
+   sender), preserving normal TCP ACK clocking.  In this way, ACK
+   Compaction, unlike AR, is not reliant on specific ACK policies, nor
+   must it see all ACKs associated with the reverse path (e.g., it may
+   be compatible with schemes such as DAASS [RFC2760]).
+
+   Some potential Denial-of-Service vulnerabilities may arise (section
+   6) and need to be addressed by appropriate security techniques.  The
+   technique cannot perform reconstruction on connections using IPSec,
+   since they are unable to regenerate appropriate security information.
+   It is possible to explicitly encode IPSec security information from
+   suppressed packets, allowing operation with IPSec AH, however this
+   remains an open research issue, and implies an additional overhead
+   per ACK.
+
+   RECOMMENDATION: ACK Compaction and Companding are experimental
+   transparent modifications performed on the reverse path following the
+   upstream bottleneck link.  They are designed to be used in
+   conjunction with a modified TYPE 1 mitigation and reduce the burst
+   size of TCP transmission in the forward direction, which may
+   otherwise increase when TYPE 1 schemes are used alone.
+
+   The technique is desirable, but requires modification of equipment
+   after the upstream bottleneck link (including processing of a
+   modified ACK header).  Selection of appropriate algorithms to pace
+   the ACK traffic also remains an open research issue.  Some potential
+   Denial-of-Service vulnerabilities may arise with any device that may
+   act as a packet amplifier.  These need to be addressed by appropriate
+   security techniques.  There is little experience of using the scheme
+   over Internet paths.  This scheme is a subject of ongoing research
+   and is not recommended for use within the Internet in its current
+   form.
+
+5.3.3 Mitigating TCP packet bursts generated by Infrequent ACKs
+
+   The bursts of data packets generated when a Type 1 scheme is used on
+   the reverse direction path may be mitigated by introducing a router
+   supporting Generic Traffic Shaping (GTS) on the forward path [Seg00].
+   GTS is a standard router mechanism implemented in many deployed
+   routers.  This technique does not eliminate the bursts of data
+   generated by the TCP sender, but attempts to smooth out the bursts by
+   employing scheduling and queuing techniques, producing traffic which
+   resembles that when TCP Pacing is used (section 4.6).  These
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 26]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   techniques require maintaining per-flow soft-state in the router, and
+   increase per-packet processing overhead.  Some additional buffer
+   capacity is needed to queue packets being shaped.
+
+   To perform GTS, the router needs to select appropriate traffic
+   shaping parameters, which require knowledge of the network policy,
+   connection behavior and/or downstream bottleneck characteristics. GTS
+   may also be used to enforce other network policies and promote
+   fairness between competing TCP connections (and also UDP and
+   multicast flows).  It also reduces the probability of ACK Compression
+   [ZSC91].
+
+   The smoothing of packet bursts reduces the impact of the TCP
+   transmission bursts on routers and hosts following the point at which
+   GTS is performed.  It is therefore desirable to perform GTS near to
+   the sending host, or at least at a point before the first forward
+   path bottleneck router.
+
+   RECOMMENDATIONS: Generic Traffic Shaping (GTS) is a transparent
+   technique employed at a router on the forward path.  The algorithms
+   to implement GTS are available in widely deployed routers and may be
+   used on an Internet link, but do imply significant additional per-
+   packet processing cost.
+
+   Configuration of a GTS is a policy decision of a network service
+   provider.  When appropriately configured the technique will reduce
+   size of TCP data packet bursts, mitigating the effects of Type 1
+   techniques.  GTS is recommended for use in the Internet in
+   conjunction with type 1 techniques such as ACK Filtering (section
+   5.2.1) and ACK Decimation (section 5.2.2).
+
+5.4 TYPE 3: Upstream Link Scheduling
+
+   Many of the above schemes imply using per flow queues (or per
+   connection queues in the case of TCP) at the upstream bottleneck
+   link.  Per-flow queuing (e.g., FQ, CBQ) offers benefit when used on
+   any slow link (where the time to transmit a packet forms an
+   appreciable part of the path RTT) [RFC3150].  Type 3 schemes offer
+   additional benefit when used with one of the above techniques.
+
+5.4.1 Per-Flow queuing at the Upstream Bottleneck Link
+
+   When bidirectional traffic exists in a bandwidth asymmetric network
+   competing ACK and packet data flows along the return path may degrade
+   the performance of both upstream and downstream flows [KVR98].
+   Therefore, it is highly desirable to use a queuing strategy combined
+   with a scheduling mechanism at the upstream link.  This has also been
+   called priority-based multiplexing [RFC3135].
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 27]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   On a slow upstream link, appreciable jitter may be introduced by
+   sending large data packets ahead of ACKs [RFC3150].  A simple scheme
+   may be implemented using per-flow queuing with a fair scheduler
+   (e.g., round robin service to all flows, or priority scheduling).  A
+   modified scheduler [KVR98] could place a limit on the number of ACKs
+   a host is allowed to transmit upstream before transmitting a data
+   packet (assuming at least one data packet is waiting in the upstream
+   link queue).  This guarantees at least a certain minimum share of the
+   capacity to flows in the reverse direction, while enabling flows in
+   the forward direction to improve TCP throughput.
+
+   Bulk (payload) compression, a small MTU, link level transparent
+   fragmentation [RFC1991, RFC2686] or link level suspend/resume
+   capability (where higher priority frames may pre-empt transmission of
+   lower priority frames) may be used to mitigate the impact (jitter) of
+   bidirectional traffic on low speed links [RFC3150]. More advanced
+   schemes (e.g., WFQ) may also be used to improve the performance of
+   transfers with multiple ACK streams such as http [Seg00].
+
+   RECOMMENDATION: Per-flow queuing is a transparent modification
+   performed at the upstream bottleneck link.  Per-flow (or per-class)
+   scheduling does not impact the congestion behavior of the Internet,
+   and may be used on any Internet link.  The scheme has particular
+   benefits for slow links.  It is widely implemented and widely
+   deployed on links operating at less than 2 Mbps.  This is recommended
+   as a mitigation on its own or in combination with one of the other
+   described techniques.
+
+5.4.2 ACKs-first Scheduling
+
+   ACKs-first Scheduling is an experimental technique to improve
+   performance of bidirectional transfers.  In this case data packets
+   and ACKs compete for resources at the upstream bottleneck link
+   [RFC3150].  A single First-In First-Out, FIFO, queue for both data
+   packets and ACKs could impact the performance of forward transfers.
+   For example, if the upstream bottleneck link is a 28.8 kbps dialup
+   line, the transmission of a 1 Kbyte sized data packet would take
+   about 280 ms.  So even if just two such data packets get queued ahead
+   of ACKs (not an uncommon occurrence since data packets are sent out
+   in pairs during slow start), they would shut out ACKs for well over
+   half a second.  If more than two data packets are queued up ahead of
+   an ACK, the ACKs would be delayed by even more [RFC3150].
+
+   A possible approach to alleviating this is to schedule data and ACKs
+   differently from FIFO.  One algorithm, in particular, is ACKs-first
+   scheduling, which accords a higher priority to ACKs over data
+   packets.  The motivation for such scheduling is that it minimizes the
+   idle time for the forward connection by minimizing the time that ACKs
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 28]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   spend queued behind data packets at the upstream link.  At the same
+   time, with Type 0 techniques such as header compression [RFC1144],
+   the transmission time of ACKs becomes small enough that the impact on
+   subsequent data packets is minimal.  (Subnetworks in which the per-
+   packet overhead of the upstream link is large, e.g., packet radio
+   subnetworks, are an exception, section 3.2.)  This scheduling scheme
+   does not require the upstream bottleneck router/host to explicitly
+   identify or maintain state for individual TCP connections.
+
+   ACKs-first scheduling does not help avoid a delay due to a data
+   packet in transmission.  Link fragmentation or suspend/resume may be
+   beneficial in this case.
+
+   RECOMMENDATION: ACKs-first scheduling is an experimental transparent
+   modification performed at the upstream bottleneck link.  If it is
+   used without a mechanism (such as ACK Congestion Control (ACC),
+   section 4.3) to regulate the volume of ACKs, it could lead to
+   starvation of data packets.  This is a performance penalty
+   experienced by end hosts using the link and does not modify Internet
+   congestion behavior.  Experiments indicate that ACKs-first scheduling
+   in combination with ACC is promising.  However, there is little
+   experience of using the technique in the wider Internet. Further
+   development of the technique remains an open research issue, and
+   therefore the scheme is not currently recommended for use within the
+   Internet.
+
+6. Security Considerations
+
+   The recommendations contained in this document do not impact the
+   integrity of TCP, introduce new security implications to the TCP
+   protocol, or applications using TCP.
+
+   Some security considerations in the context of this document arise
+   from the implications of using IPSec by the end hosts or routers
+   operating along the return path.  Use of IPSec prevents, or
+   complicates, some of the mitigations.  For example:
+
+   (i)  When IPSec ESP [RFC2406] is used to encrypt the IP payload, the
+        TCP header can neither be read nor modified by intermediate
+        entities.  This rules out header compression, ACK Filtering, ACK
+        Reconstruction, and the ACK Compaction.
+
+   (ii) The TCP header information may be visible, when some forms of
+        network layer security are used.  For example, using IPSec AH
+        [RFC2402], the TCP header may be read, but not modified, by
+        intermediaries.  This may in future allow extensions to support
+        ACK Filtering, but rules out the generation of new
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 29]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+        packets by intermediaries (e.g., ACK Reconstruction).  The
+        enhanced header compression scheme discussed in [RFC2507] would
+        also work with IPSec AH.
+
+   There are potential Denial-of-Service (DoS) implications when using
+   Type 2 schemes.  Unless additional security mechanisms are used, a
+   Reconstructor/expander could be exploited as a packet amplifier.  A
+   third party may inject unauthorized Stretch ACKs into the reverse
+   path, triggering the generation of additional ACKs.  These ACKs would
+   consume capacity on the return path and processing resources at the
+   systems along the path, including the destination host.  This
+   provides a potential platform for a DoS attack.  The usual
+   precautions must be taken to verify the correct tunnel end point, and
+   to ensure that applications cannot falsely inject packets that expand
+   to generate unwanted traffic.  Imposing a rate limit and bound on the
+   delayed ACK factor(d) would also lessen the impact of any undetected
+   exploitation.
+
+7. Summary
+
+   This document considers several TCP performance constraints that
+   arise from asymmetry in the properties of the forward and reverse
+   paths across an IP network.  Such performance constraints arise,
+   e.g., as a result of both bandwidth (capacity) asymmetry, asymmetric
+   shared media in the reverse direction, and interactions with Media
+   Access Control (MAC) protocols.  Asymmetric capacity may cause TCP
+   Acknowledgments (ACKs) to be lost or become inordinately delayed
+   (e.g., when a bottleneck link is shared between many flows, or when
+   there is bidirectional traffic).  This effect may be exacerbated with
+   media-access delays (e.g., in certain multi-hop radio subnetworks,
+   satellite Bandwidth on Demand access).  Asymmetry, and particular
+   high asymmetry, raises a set of TCP performance issues.
+
+   A set of techniques providing performance improvement is surveyed.
+   These include techniques to alleviate ACK Congestion and techniques
+   that enable a TCP sender to cope with infrequent ACKs without
+   destroying TCP self-clocking.  These techniques include both end-to-
+   end, local link-layer, and subnetwork schemes.  Many of these
+   techniques have been evaluated in detail via analysis, simulation,
+   and/or implementation on asymmetric subnetworks forming part of the
+   Internet.  There is however as yet insufficient operational
+   experience for some techniques, and these therefore currently remain
+   items of on-going research and experimentation.
+
+
+
+
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 30]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   The following table summarizes the current recommendations.
+   Mechanisms are classified as recommended (REC), not recommended (NOT
+   REC) or experimental (EXP).  Experimental techniques may not be well
+   specified.  These techniques will require further operational
+   experience before they can be recommended for use in the public
+   Internet.
+
+   The recommendations for end-to-end host modifications are summarized
+   in table 1.  This lists each technique, the section in which each
+   technique is discussed, and where it is applied (S denotes the host
+   sending TCP data packets in the forward direction, R denotes the host
+   which receives these data packets).
+
+     +------------------------+-------------+------------+--------+
+     | Technique              |  Use        | Section    | Where  |
+     +------------------------+-------------+------------+--------+
+     | Modified Delayed ACKs  | NOT REC     | 4.1        | TCP R  |
+     | Large MSS  & NO FRAG   | REC         | 4.2        | TCP SR |
+     | Large MSS  & IP FRAG   | NOT REC     | 4.2        | TCP SR |
+     | ACK Congestion Control | EXP         | 4.3        | TCP SR |
+     | Window Pred. Mech (WPM)| NOT REC     | 4.4        | TCP R  |
+     | Window Cwnd. Est. (ACE)| NOT REC     | 4.5        | TCP R  |
+     | TCP Sender Pacing      | EXP *1      | 4.6        | TCP S  |
+     | Byte Counting          | NOT REC *2  | 4.7        | TCP S  |
+     | Backpressure           | EXP *1      | 4.8        | TCP R  |
+     +------------------------+-------------+------------+--------+
+
+         Table 1: Recommendations concerning host modifications.
+
+   *1 Implementation of the technique may require changes to the
+      internal design of the protocol stack in end hosts.
+   *2 Dependent on a scheme for preventing excessive TCP transmission
+      burst.
+
+   The recommendations for techniques that do not require the TCP sender
+   and receiver to be aware of their existence (i.e., transparent
+   techniques) are summarized in table 2.  Each technique is listed
+   along with the section in which each mechanism is discussed, and
+   where the technique is applied (S denotes the sending interface prior
+   to the upstream bottleneck link, R denotes receiving interface
+   following the upstream bottleneck link).
+
+
+
+
+
+
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 31]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+     +------------------------+-------------+------------+--------+
+     | Mechanism              |  Use        | Section    | Type   |
+     +------------------------+-------------+------------+--------+
+     | Header Compr. (V-J)    | REC *1      | 5.1.1      | 0 SR   |
+     | Header Compr. (ROHC)   | REC *1 *2   | 5.1.2      | 0 SR   |
+     +------------------------+-------------+------------+--------+
+     | ACK Filtering (AF)     | EXP *3      | 5.2.1      | 1 S    |
+     | ACK Decimation         | EXP *3      | 5.2.2      | 1 S    |
+     +------------------------+-------------+------------+--------+
+     | ACK Reconstruction (AR)| NOT REC     | 5.3.1      | 2   *4 |
+     | ACK Compaction/Compand.| EXP         | 5.3.2      | 2 S *4 |
+     | Gen. Traff. Shap. (GTS)| REC         | 5.3.3      | 2   *5 |
+     +------------------------+-------------+------------+--------+
+     | Fair Queueing (FQ)     | REC         | 5.4.1      | 3 S    |
+     | ACKs-First Scheduling  | NOT REC     | 5.4.2      | 3 S    |
+     +------------------------+-------------+------------+--------+
+
+      Table 2: Recommendations concerning transparent modifications.
+
+   *1 At high asymmetry these schemes may degrade TCP performance, but
+      are not considered harmful to the Internet.
+   *2 Standardisation of new TCP compression protocols is the subject of
+      ongoing work within the ROHC WG, refer to other IETF RFCs on the
+      use of these techniques.
+   *3 Use in the Internet is dependent on a scheme for preventing
+      excessive TCP transmission burst.
+   *4 Performed at a point along the reverse path after the upstream
+      bottleneck link.
+   *5 Performed at a point along the forward path.
+
+8. Acknowledgments
+
+   This document has benefited from comments from the members of the
+   Performance Implications of Links (PILC) Working Group.  In
+   particular, the authors would like to thank John Border, Spencer
+   Dawkins, Aaron Falk, Dan Grossman, Randy Katz, Jeff Mandin, Rod
+   Ragland, Ramon Segura, Joe Touch, and Lloyd Wood for their useful
+   comments.  They also acknowledge the data provided by Metricom Inc.,
+   concerning operation of their packet data network.
+
+9. References
+
+   References of the form RFCnnnn are Internet Request for Comments
+   (RFC) documents available online at http://www.rfc-editor.org/.
+
+
+
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 32]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+9.1 Normative References
+
+   [RFC793]  Postel, J., "Transmission Control Protocol", STD 7, RFC
+             793, September 1981.
+
+   [RFC1122] Braden, R., Ed., "Requirements for Internet Hosts -
+             Communication Layers", STD 3, RFC 1122, October 1989.
+
+   [RFC1144] Jacobson, V., "Compressing TCP/IP Headers for Low-Speed
+             Serial Links", RFC 1144, February 1990.
+
+   [RFC1191] Mogul, J. and S. Deering, "Path MTU Discovery", RFC 1191,
+             November 1990.
+
+   [RFC2581] Allman, M., Paxson, V. and W. Stevens, "TCP Congestion
+             Control", RFC 2581, April 1999.
+
+   [RFC2784] Farinacci, D., Li, T., Hanks, S., Meyer, D. and P. Traina,
+             "Generic Routing Encapsulation (GRE)", RFC 2784, March
+             2000.
+
+   [RFC3135] Border, J., Kojo, M., Griner, J., Montenegro, G. and Z.
+             Shelby, "Performance Enhancing Proxies Intended to Mitigate
+             Link-Related Degradations", RFC 3135, June 2001.
+
+9.2 Informative References
+
+   [abc-ID]  Allman, M., "TCP Congestion Control with Appropriate Byte
+             Counting", Work in Progress.
+
+   [All97b]  Allman, M., "Fixing Two BSD TCP Bugs", Technical Report
+             CR-204151, NASA Lewis Research Center, October 1997.
+
+   [ANS01]   ANSI Standard T1.413, "Network to Customer Installation
+             Interfaces - Asymmetric Digital Subscriber Lines (ADSL)
+             Metallic Interface", November 1998.
+
+   [ASB96]   Arora, V., Suphasindhu, N., Baras, J.S. and D. Dillon,
+             "Asymmetric Internet Access over Satellite-Terrestrial
+             Networks", Proc. AIAA: 16th International Communications
+             Satellite Systems Conference and Exhibit, Part 1,
+             Washington, D.C., February 25-29, 1996, pp.476-482.
+
+   [AST00]   Aggarwal, A., Savage, S., and T. Anderson, "Understanding
+             the Performance of TCP Pacing", Proc. IEEE INFOCOM, Tel-
+             Aviv, Israel, V.3, March 2000, pp. 1157-1165.
+
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 33]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   [Bal98]   Balakrishnan, H., "Challenges to Reliable Data Transport
+             over Heterogeneous Wireless Networks", Ph.D. Thesis,
+             University of California at Berkeley, USA, August 1998.
+             http://nms.lcs.mit.edu/papers/hari-phd/
+
+   [BPK99]   Balakrishnan, H., Padmanabhan, V. N., and R. H. Katz, "The
+             Effects of Asymmetry on TCP Performance", ACM Mobile
+             Networks and Applications (MONET), Vol.4, No.3, 1999, pp.
+             219-241. An expanded version of a paper published at Proc.
+             ACM/IEEE Mobile Communications Conference (MOBICOM), 1997.
+
+   [BPS00]   Bennett, J. C., Partridge, C., and N. Schectman, "Packet
+             Reordering is Not Pathological Network Behaviour", IEEE/ACM
+             Transactions on Networking, Vol. 7, Issue. 6, 2000,
+             pp.789-798.
+
+   [Cla88]   Clark, D.D, "The Design Philosophy of the DARPA Internet
+             Protocols", ACM Computer Communications Review (CCR), Vol.
+             18, Issue 4, 1988, pp.106-114.
+
+   [CLC99]   Clausen, H., Linder, H., and B. Collini-Nocker, "Internet
+             over Broadcast Satellites", IEEE Communications Magazine,
+             Vol. 37, Issue. 6, 1999, pp.146-151.
+
+   [CLP98]   Calveras, A., Linares, J., and J. Paradells, "Window
+             Prediction Mechanism for Improving TCP in Wireless
+             Asymmetric Links". Proc. IEEE Global Communications
+             Conference (GLOBECOM), Sydney Australia, November 1998,
+             pp.533-538.
+
+   [CR98]    Cohen, R., and Ramanathan, S., "Tuning TCP for High
+             Performance in Hybrid Fiber Coaxial Broad-Band Access
+             Networks", IEEE/ACM Transactions on Networking, Vol.6,
+             No.1, 1998, pp.15-29.
+
+   [DS00]    Cable Television Laboratories, Inc., Data-Over-Cable
+             Service Interface Specifications---Radio Frequency
+             Interface Specification SP-RFIv1.1-I04-00407, 2000
+
+   [DS01]    Data-Over-Cable Service Interface Specifications, Radio
+             Frequency Interface Specification 1.0, SP-RFI-I05-991105,
+             Cable Television Laboratories, Inc., November 1999.
+
+   [DMT96]   Durst, R., Miller, G., and E. Travis, "TCP Extensions for
+             Space Communications", ACM/IEEE Mobile Communications
+             Conference (MOBICOM), New York, USA, November 1996, pp.15-
+             26.
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 34]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   [EN97]    "Digital Video Broadcasting (DVB); DVB Specification for
+             Data Broadcasting", European Standard (Telecommunications
+             series) EN 301 192, 1997.
+
+   [EN00]    "Digital Video Broadcasting (DVB); Interaction Channel for
+             Satellite Distribution Systems", Draft European Standard
+             (Telecommunications series) ETSI, Draft EN 301 790, v.1.2.1
+
+   [FJ93]    Floyd, S., and V. Jacobson, "Random Early Detection
+             gateways for Congestion Avoidance", IEEE/ACM Transactions
+             on Networking, Vol.1, No.4, 1993, pp.397-413.
+
+   [FSS01]   Fairhurst, G., Samaraweera, N.K.G, Sooriyabandara, M.,
+             Harun, H., Hodson, K., and R. Donardio, "Performance Issues
+             in Asymmetric Service Provision using Broadband Satellite",
+             IEE Proceedings on Communication, Vol.148, No.2, 2001,
+             pp.95-99.
+
+   [ITU01]   ITU-T Recommendation E.681, "Traffic Engineering Methods
+             For IP Access Networks Based on Hybrid Fiber/Coax System",
+             September 2001.
+
+   [ITU02]   ITU-T Recommendation G.992.1, "Asymmetrical Digital
+             Subscriber Line (ADSL) Transceivers", July 1999.
+
+   [Jac88]   Jacobson, V., "Congestion Avoidance and Control", Proc. ACM
+             SIGCOMM, Stanford, CA, ACM Computer Communications Review
+             (CCR), Vol.18, No.4, 1988, pp.314-329.
+
+   [Ken87]   Kent C.A., and J. C. Mogul, "Fragmentation Considered
+             Harmful", Proc. ACM SIGCOMM, USA, ACM Computer
+             Communications Review (CCR), Vol.17, No.5, 1988, pp.390-
+             401.
+
+   [KSG98]   Krout, T., Solsman, M., and J. Goldstein, "The Effects of
+             Asymmetric Satellite Networks on Protocols", Proc. IEEE
+             Military Communications Conference (MILCOM), Bradford, MA,
+             USA, Vol.3, 1998, pp.1072-1076.
+
+   [KVR98]   Kalampoukas, L., Varma, A., and Ramakrishnan, K.K.,
+             "Improving TCP Throughput over Two-Way Asymmetric Links:
+             Analysis and Solutions", Proc. ACM SIGMETRICS, Medison,
+             USA, 1998, pp.78-89.
+
+   [LM97]    Lin, D., and R. Morris, "Dynamics of Random Early
+             Detection", Proc. ACM SIGCOMM, Cannes, France, ACM Computer
+             Communications Review (CCR), Vol.27, No.4, 1997, pp.78-89.
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 35]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   [LMS97]   Lakshman, T.V., Madhow, U., and B. Suter, "Window-based
+             Error Recovery and Flow Control with a Slow Acknowledgement
+             Channel: A Study of TCP/IP Performance", Proc. IEEE
+             INFOCOM, Vol.3, Kobe, Japan, 1997, pp.1199-1209.
+
+   [MJW00]   Ming-Chit, I.T., Jinsong, D., and W. Wang,"Improving TCP
+             Performance Over Asymmetric Networks", ACM SIGCOMM, ACM
+             Computer Communications Review (CCR), Vol.30, No.3, 2000.
+
+   [Pad98]   Padmanabhan, V.N., "Addressing the Challenges of Web Data
+             Transport", Ph.D. Thesis, University of California at
+             Berkeley, USA, September 1998 (also Tech Report UCB/CSD-
+             98-1016). http://www.cs.berkeley.edu/~padmanab/phd-
+             thesis.html
+
+   [RFC1323] Jacobson, V., Braden, R. and D. Borman, "TCP Extensions for
+             High Performance", RFC 1323, May 1992.
+
+   [RFC2018] Mathis, B., Mahdavi, J., Floyd, S. and A. Romanow, "TCP
+             Selective Acknowledgment Options", RFC 2018, October 1996.
+
+   [RFC2402] Kent, S. and R. Atkinson, "IP Authentication Header", RFC
+             2402, November 1998.
+
+   [RFC2406] Kent, S. and R. Atkinson, "IP Encapsulating Security
+             Payload (ESP)", RFC 2406, November 1998.
+
+   [RFC2507] Degermark, M., Nordgren, B. and S. Pink, "IP Header
+             Compression", RFC 2507, February 1999.
+
+   [RFC2525] Paxson, V., Allman, M., Dawson, S., Heavens, I. and B.
+             Volz, "Known TCP Implementation Problems", RFC 2525, March
+             1999.
+
+   [RFC2686] Bormann, C., "The Multi-Class Extension to Multi-Link PPP",
+             RFC 2686, September 1999.
+
+   [RFC2760] Allman, M., Dawkins, S., Glover, D., Griner, J., Henderson,
+             T., Heidemann, J., Kruse, H., Ostermann, S., Scott, K.,
+             Semke, J., Touch, J. and D. Tran, "Ongoing TCP Research
+             Related to Satellites", RFC 2760, February 2000.
+
+   [RFC2988] Paxson, V. and M. Allman, "Computing TCP's Retransmission
+             Timer", RFC 2988, November 2000.
+
+   [RFC3077] Duros, E., Dabbous, W., Izumiyama, H., Fujii, N. and Y.
+             Zhang, "A link Layer tunneling mechanism for unidirectional
+             links", RFC 3077, March 2001.
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 36]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+   [RFC3095] Bormann, C., Burmeister, C., Degermark, M., Fukushima, H.,
+             Hannu, H., Jonsson, E., Hakenberg, R., Koren, T., Le, K.,
+             Liu, Z., Martensson, A., Miyazaki, A., Svanbro, K., Wiebke,
+             T., Yoshimura, T. and H. Zheng, "RObust Header Compression
+             (ROHC): Framework and four profiles: RTP, UDP ESP and
+             uncompressed", RFC 3095, July 2001.
+
+   [RFC3150] Dawkins, S., Montenegro, G., Kojo, M. and V. Magret, "End-
+             to-end Performance Implications of Slow Links", BCP 48, RFC
+             3150, July 2001.
+
+   [RFC3168] Ramakrishnan K., Floyd, S. and D. Black, "A Proposal to add
+             Explicit Congestion Notification (ECN) to IP", RFC 3168,
+             September 2001.
+
+   [Sam99]   Samaraweera, N.K.G, "Return Link Optimization for Internet
+             Service Provision Using DVB-S Networks", ACM Computer
+             Communications Review (CCR), Vol.29, No.3, 1999, pp.4-19.
+
+   [Seg00]   Segura R., "Asymmetric Networking Techniques For Hybrid
+             Satellite Communications", NC3A, The Hague, Netherlands,
+             NATO Technical Note 810, August 2000, pp.32-37.
+
+   [SF98]    Samaraweera, N.K.G., and G. Fairhurst. "High Speed Internet
+             Access using Satellite-based DVB Networks", Proc. IEEE
+             International Networks Conference (INC98), Plymouth, UK,
+             1998, pp.23-28.
+
+   [ZSC91]   Zhang, L., Shenker, S., and D. D. Clark, "Observations and
+             Dynamics of a Congestion Control Algorithm: The Effects of
+             Two-Way Traffic", Proc. ACM SIGCOMM, ACM Computer
+             Communications Review (CCR), Vol 21, No 4, 1991, pp.133-
+             147.
+
+10. IANA Considerations
+
+   There are no IANA considerations associated with this document.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 37]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+Appendix - Examples of Subnetworks Exhibiting Network Path Asymmetry
+
+   This appendix provides a list of some subnetworks which are known to
+   experience network path asymmetry.  The asymmetry in capacity of
+   these network paths can require mitigations to provide acceptable
+   overall performance.  Examples include the following:
+
+   -  IP service over some wide area and local area wireless networks.
+      In such networks, the predominant network path asymmetry arises
+      from the hub-and-spokes architecture of the network (e.g., a
+      single base station that communicates with multiple mobile
+      stations), this requires a Ready To Send / Clear To Send (RTS/CTS)
+      protocol and a Medium Access Control (MAC) protocol which needs to
+      accommodate the significant turn-around time for the radios.  A
+      high per-packet transmission overhead may lead to significant
+      network path asymmetry.
+
+   -  IP service over a forward satellite link utilizing Digital Video
+      Broadcast (DVB) transmission [EN97] (e.g., 38-45 Mbps), and a
+      slower upstream link using terrestrial network technology (e.g.,
+      dial-up modem, line of sight microwave, cellular radio) [CLC99].
+      Network path asymmetry arises from a difference in the upstream
+      and downstream link capacities.
+
+   -  Certain military networks [KSG98] providing Internet access to
+      in-transit or isolated hosts [Seg00] using a high capacity
+      downstream satellite link (e.g., 2-3 Mbps) with a narrowband
+      upstream link (e.g., 2.4-9.6 kbps) using either Demand Assigned
+      Multiple Access (DAMA) or fixed rate satellite links.  The main
+      factor contributing to network path asymmetry is the difference in
+      the upstream and downstream link capacities.  Some differences
+      between forward and reverse paths may arise from the way in which
+      upstream link capacity is allocated.
+
+   -  Most data over cable TV networks (e.g., DOCSIS [ITU01, DS00]),
+      where the analogue channels assigned for upstream communication
+      (i.e., in the reverse direction) are narrower and may be more
+      noisy than those assigned for the downstream link.  As a
+      consequence, the upstream and downstream links differ in their
+      transmission rate. For example, in DOCSIS 1.0 [DS00], the
+      downstream transmission rate is either 27 or 52 Mbps.  Upstream
+      transmission rates may be dynamically selected to be one of a
+      series of rates which range between 166 kbps to 9 Mbps.  Operators
+      may assign multiple upstream channels per downstream channel.
+      Physical layer (PHY) overhead (which accompanies upstream
+      transmissions, but is not present in the downstream link) can also
+      increase the network path asymmetry. The Best Effort service,
+      which is typically used to carry TCP, uses a
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 38]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+      contention/reservation MAC protocol.  A cable modem (CM) sending
+      an isolated packet (such as a TCP ACK) on the upstream link must
+      contend with other CMs to request capacity from the central cable
+      modem termination system (CMTS).  The CMTS then grants timeslots
+      to a CM for the upstream transmission.  The CM may "piggyback"
+      subsequent requests onto upstream packets, avoiding contention
+      cycles; as a result, spacing of TCP ACKs can be dramatically
+      altered due to minor variations in load of the cable data network
+      and inter-arrival times of TCP DATA packets.  Numerous other
+      complexities may add to, or mitigate, the asymmetry in rate and
+      access latency experienced by packets sent on the upstream link
+      relative to downstream packets in DOCSIS.  The asymmetry
+      experienced by end hosts may also change dynamically (e.g., with
+      network load), and when best effort services share capacity with
+      services that have symmetric reserved capacity (e.g., IP telephony
+      over the Unsolicited Grant service) [ITU01].
+
+   -  Asymmetric Digital Subscriber Line (ADSL), by definition, offers a
+      downstream link transmission rate that is higher than that of the
+      upstream link.  The available rates depend upon channel quality
+      and system configuration.  For example, one widely deployed ADSL
+      technology [ITU02, ANS01] operates at rates that are multiples of
+      32 kbps (up to 6.144 Mbps) in the downstream link, and up to 640
+      kbps for the upstream link.  The network path asymmetry
+      experienced by end hosts may be further increased when best effort
+      services, e.g., Internet access over ADSL, share the available
+      upstream capacity with reserved services (e.g., constant bit rate
+      voice telephony).
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 39]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+Authors' Addresses
+
+   Hari Balakrishnan
+   Laboratory for Computer Science
+   200 Technology Square
+   Massachusetts Institute of Technology
+   Cambridge, MA 02139
+   USA
+
+   Phone: +1-617-253-8713
+   EMail: hari@lcs.mit.edu
+   Web: http://nms.lcs.mit.edu/~hari/
+
+
+   Venkata N. Padmanabhan
+   Microsoft Research
+   One Microsoft Way
+   Redmond, WA 98052
+   USA
+
+   Phone: +1-425-705-2790
+   EMail: padmanab@microsoft.com
+   Web: http://www.research.microsoft.com/~padmanab/
+
+
+   Godred Fairhurst
+   Department of Engineering
+   Fraser Noble Building
+   University of Aberdeen
+   Aberdeen AB24 3UE
+   UK
+
+   EMail: gorry@erg.abdn.ac.uk
+   Web: http://www.erg.abdn.ac.uk/users/gorry
+
+
+   Mahesh Sooriyabandara
+   Department of Engineering
+   Fraser Noble Building
+   University of Aberdeen
+   Aberdeen AB24 3UE
+   UK
+
+   EMail: mahesh@erg.abdn.ac.uk
+   Web: http://www.erg.abdn.ac.uk/users/mahesh
+
+
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 40]
+
+RFC 3449                PILC - Asymmetric Links            December 2002
+
+
+Full Copyright Statement
+
+   Copyright (C) The Internet Society (2002).  All Rights Reserved.
+
+   This document and translations of it may be copied and furnished to
+   others, and derivative works that comment on or otherwise explain it
+   or assist in its implementation may be prepared, copied, published
+   and distributed, in whole or in part, without restriction of any
+   kind, provided that the above copyright notice and this paragraph are
+   included on all such copies and derivative works.  However, this
+   document itself may not be modified in any way, such as by removing
+   the copyright notice or references to the Internet Society or other
+   Internet organizations, except as needed for the purpose of
+   developing Internet standards in which case the procedures for
+   copyrights defined in the Internet Standards process must be
+   followed, or as required to translate it into languages other than
+   English.
+
+   The limited permissions granted above are perpetual and will not be
+   revoked by the Internet Society or its successors or assigns.
+
+   This document and the information contained herein is provided on an
+   "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
+   TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
+   BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
+   HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
+   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+Acknowledgement
+
+   Funding for the RFC Editor function is currently provided by the
+   Internet Society.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Balakrishnan et. al.     Best Current Practice                 [Page 41]
+
diff --git a/ext/picotcp/RFC/rfc3493.txt b/ext/picotcp/RFC/rfc3493.txt
new file mode 100644
index 0000000..5fea6c1
--- /dev/null
+++ b/ext/picotcp/RFC/rfc3493.txt
@@ -0,0 +1,2187 @@
+
+
+
+
+
+
+Network Working Group                                        R. Gilligan
+Request for Comments: 3493                                Intransa, Inc.
+Obsoletes: 2553                                               S. Thomson
+Category: Informational                                            Cisco
+                                                                J. Bound
+                                                               J. McCann
+                                                         Hewlett-Packard
+                                                              W. Stevens
+                                                           February 2003
+
+
+               Basic Socket Interface Extensions for IPv6
+
+Status of this Memo
+
+   This memo provides information for the Internet community.  It does
+   not specify an Internet standard of any kind.  Distribution of this
+   memo is unlimited.
+
+Copyright Notice
+
+   Copyright (C) The Internet Society (2003).  All Rights Reserved.
+
+Abstract
+
+   The de facto standard Application Program Interface (API) for TCP/IP
+   applications is the "sockets" interface.  Although this API was
+   developed for Unix in the early 1980s it has also been implemented on
+   a wide variety of non-Unix systems.  TCP/IP applications written
+   using the sockets API have in the past enjoyed a high degree of
+   portability and we would like the same portability with IPv6
+   applications.  But changes are required to the sockets API to support
+   IPv6 and this memo describes these changes.  These include a new
+   socket address structure to carry IPv6 addresses, new address
+   conversion functions, and some new socket options.  These extensions
+   are designed to provide access to the basic IPv6 features required by
+   TCP and UDP applications, including multicasting, while introducing a
+   minimum of change into the system and providing complete
+   compatibility for existing IPv4 applications.  Additional extensions
+   for advanced IPv6 features (raw sockets and access to the IPv6
+   extension headers) are defined in another document.
+
+
+
+
+
+
+
+
+
+
+Gilligan, et al.             Informational                      [Page 1]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+Table of Contents
+
+   1.  Introduction................................................3
+   2.  Design Considerations.......................................4
+       2.1  What Needs to be Changed...............................4
+       2.2  Data Types.............................................6
+       2.3  Headers................................................6
+       2.4  Structures.............................................6
+   3.  Socket Interface............................................6
+       3.1  IPv6 Address Family and Protocol Family................6
+       3.2  IPv6 Address Structure.................................7
+       3.3  Socket Address Structure for 4.3BSD-Based Systems......7
+       3.4  Socket Address Structure for 4.4BSD-Based Systems......9
+       3.5  The Socket Functions...................................9
+       3.6  Compatibility with IPv4 Applications..................10
+       3.7  Compatibility with IPv4 Nodes.........................11
+       3.8  IPv6 Wildcard Address.................................11
+       3.9  IPv6 Loopback Address.................................13
+       3.10 Portability Additions.................................14
+   4.  Interface Identification...................................16
+       4.1  Name-to-Index.........................................17
+       4.2  Index-to-Name.........................................17
+       4.3  Return All Interface Names and Indexes................18
+       4.4  Free Memory...........................................18
+   5.  Socket Options.............................................18
+       5.1  Unicast Hop Limit.....................................19
+       5.2  Sending and Receiving Multicast Packets...............19
+       5.3  IPV6_V6ONLY option for AF_INET6 Sockets...............22
+   6.  Library Functions..........................................22
+       6.1  Protocol-Independent Nodename and
+            Service Name Translation..............................23
+       6.2  Socket Address Structure to Node Name
+            and Service Name......................................28
+       6.3  Address Conversion Functions..........................31
+       6.4  Address Testing Macros................................33
+   7.  Summary of New Definitions.................................33
+   8.  Security Considerations....................................35
+   9.  Changes from RFC 2553......................................35
+   10. Acknowledgments............................................36
+   11. References.................................................37
+   12. Authors' Addresses.........................................38
+   13. Full Copyright Statement...................................39
+
+
+
+
+
+
+
+
+
+Gilligan, et al.             Informational                      [Page 2]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+1. Introduction
+
+   While IPv4 addresses are 32 bits long, IPv6 addresses are 128 bits
+   long.  The socket interface makes the size of an IP address quite
+   visible to an application; virtually all TCP/IP applications for
+   BSD-based systems have knowledge of the size of an IP address.  Those
+   parts of the API that expose the addresses must be changed to
+   accommodate the larger IPv6 address size.  IPv6 also introduces new
+   features, some of which must be made visible to applications via the
+   API.  This memo defines a set of extensions to the socket interface
+   to support the larger address size and new features of IPv6.  It
+   defines "basic" extensions that are of use to a broad range of
+   applications.  A companion document, the "advanced" API [4], covers
+   extensions that are of use to more specialized applications, examples
+   of which include routing daemons, and the "ping" and "traceroute"
+   utilities.
+
+   The development of this API was started in 1994 in the IETF IPng
+   working group.  The API has evolved over the years, published first
+   in RFC 2133, then again in RFC 2553, and reaching its final form in
+   this document.
+
+   As the API matured and stabilized, it was incorporated into the Open
+   Group's Networking Services (XNS) specification, issue 5.2, which was
+   subsequently incorporated into a joint Open Group/IEEE/ISO standard
+   [3].
+
+   Effort has been made to ensure that this document and [3] contain the
+   same information with regard to the API definitions.  However, the
+   reader should note that this document is for informational purposes
+   only, and that the official standard specification of the sockets API
+   is [3].
+
+   It is expected that any future standardization work on this API would
+   be done by the Open Group Base Working Group [6].
+
+   It should also be noted that this document describes only those
+   portions of the API needed for IPv4 and IPv6 communications.  Other
+   potential uses of the API, for example the use of getaddrinfo() and
+   getnameinfo() with the AF_UNIX address family, are beyond the scope
+   of this document.
+
+
+
+
+
+
+
+
+
+
+Gilligan, et al.             Informational                      [Page 3]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+2. Design Considerations
+
+   There are a number of important considerations in designing changes
+   to this well-worn API:
+
+   -  The API changes should provide both source and binary
+      compatibility for programs written to the original API.  That is,
+      existing program binaries should continue to operate when run on a
+      system supporting the new API.  In addition, existing applications
+      that are re-compiled and run on a system supporting the new API
+      should continue to operate.  Simply put, the API changes for IPv6
+      should not break existing programs.  An additional mechanism for
+      implementations to verify this is to verify the new symbols are
+      protected by Feature Test Macros as described in [3].  (Such
+      Feature Test Macros are not defined by this RFC.)
+
+   -  The changes to the API should be as small as possible in order to
+      simplify the task of converting existing IPv4 applications to
+      IPv6.
+
+   -  Where possible, applications should be able to use this API to
+      interoperate with both IPv6 and IPv4 hosts.  Applications should
+      not need to know which type of host they are communicating with.
+
+   -  IPv6 addresses carried in data structures should be 64-bit
+      aligned.  This is necessary in order to obtain optimum performance
+      on 64-bit machine architectures.
+
+   Because of the importance of providing IPv4 compatibility in the API,
+   these extensions are explicitly designed to operate on machines that
+   provide complete support for both IPv4 and IPv6.  A subset of this
+   API could probably be designed for operation on systems that support
+   only IPv6.  However, this is not addressed in this memo.
+
+2.1 What Needs to be Changed
+
+   The socket interface API consists of a few distinct components:
+
+   -  Core socket functions.
+
+   -  Address data structures.
+
+   -  Name-to-address translation functions.
+
+   -  Address conversion functions.
+
+
+
+
+
+
+Gilligan, et al.             Informational                      [Page 4]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+   The core socket functions -- those functions that deal with such
+   things as setting up and tearing down TCP connections, and sending
+   and receiving UDP packets -- were designed to be transport
+   independent.  Where protocol addresses are passed as function
+   arguments, they are carried via opaque pointers.  A protocol-specific
+   address data structure is defined for each protocol that the socket
+   functions support.  Applications must cast pointers to these
+   protocol-specific address structures into pointers to the generic
+   "sockaddr" address structure when using the socket functions.  These
+   functions need not change for IPv6, but a new IPv6-specific address
+   data structure is needed.
+
+   The "sockaddr_in" structure is the protocol-specific data structure
+   for IPv4.  This data structure actually includes 8-octets of unused
+   space, and it is tempting to try to use this space to adapt the
+   sockaddr_in structure to IPv6.  Unfortunately, the sockaddr_in
+   structure is not large enough to hold the 16-octet IPv6 address as
+   well as the other information (address family and port number) that
+   is needed.  So a new address data structure must be defined for IPv6.
+
+   IPv6 addresses are scoped [2] so they could be link-local, site,
+   organization, global, or other scopes at this time undefined.  To
+   support applications that want to be able to identify a set of
+   interfaces for a specific scope, the IPv6 sockaddr_in structure must
+   support a field that can be used by an implementation to identify a
+   set of interfaces identifying the scope for an IPv6 address.
+
+   The IPv4 name-to-address translation functions in the socket
+   interface are gethostbyname() and gethostbyaddr().  These are left as
+   is, and new functions are defined which support both IPv4 and IPv6.
+
+   The IPv4 address conversion functions -- inet_ntoa() and inet_addr()
+   -- convert IPv4 addresses between binary and printable form.  These
+   functions are quite specific to 32-bit IPv4 addresses.  We have
+   designed two analogous functions that convert both IPv4 and IPv6
+   addresses, and carry an address type parameter so that they can be
+   extended to other protocol families as well.
+
+   Finally, a few miscellaneous features are needed to support IPv6.  A
+   new interface is needed to support the IPv6 hop limit header field.
+   New socket options are needed to control the sending and receiving of
+   IPv6 multicast packets.
+
+   The socket interface will be enhanced in the future to provide access
+   to other IPv6 features.  Some of these extensions are described in
+   [4].
+
+
+
+
+
+Gilligan, et al.             Informational                      [Page 5]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+2.2 Data Types
+
+   The data types of the structure elements given in this memo are
+   intended to track the relevant standards.  uintN_t means an unsigned
+   integer of exactly N bits (e.g., uint16_t).  The sa_family_t and
+   in_port_t types are defined in [3].
+
+2.3 Headers
+
+   When function prototypes and structures are shown we show the headers
+   that must be #included to cause that item to be defined.
+
+2.4 Structures
+
+   When structures are described the members shown are the ones that
+   must appear in an implementation.  Additional, nonstandard members
+   may also be defined by an implementation.  As an additional
+   precaution nonstandard members could be verified by Feature Test
+   Macros as described in [3].  (Such Feature Test Macros are not
+   defined by this RFC.)
+
+   The ordering shown for the members of a structure is the recommended
+   ordering, given alignment considerations of multibyte members, but an
+   implementation may order the members differently.
+
+3. Socket Interface
+
+   This section specifies the socket interface changes for IPv6.
+
+3.1 IPv6 Address Family and Protocol Family
+
+   A new address family name, AF_INET6, is defined in <sys/socket.h>.
+   The AF_INET6 definition distinguishes between the original
+   sockaddr_in address data structure, and the new sockaddr_in6 data
+   structure.
+
+   A new protocol family name, PF_INET6, is defined in <sys/socket.h>.
+   Like most of the other protocol family names, this will usually be
+   defined to have the same value as the corresponding address family
+   name:
+
+      #define PF_INET6        AF_INET6
+
+   The AF_INET6 is used in the first argument to the socket() function
+   to indicate that an IPv6 socket is being created.
+
+
+
+
+
+
+Gilligan, et al.             Informational                      [Page 6]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+3.2 IPv6 Address Structure
+
+   A new in6_addr structure holds a single IPv6 address and is defined
+   as a result of including <netinet/in.h>:
+
+      struct in6_addr {
+          uint8_t  s6_addr[16];      /* IPv6 address */
+      };
+
+   This data structure contains an array of sixteen 8-bit elements,
+   which make up one 128-bit IPv6 address.  The IPv6 address is stored
+   in network byte order.
+
+   The structure in6_addr above is usually implemented with an embedded
+   union with extra fields that force the desired alignment level in a
+   manner similar to BSD implementations of "struct in_addr".  Those
+   additional implementation details are omitted here for simplicity.
+
+   An example is as follows:
+
+   struct in6_addr {
+        union {
+            uint8_t  _S6_u8[16];
+            uint32_t _S6_u32[4];
+            uint64_t _S6_u64[2];
+        } _S6_un;
+   };
+   #define s6_addr _S6_un._S6_u8
+
+3.3 Socket Address Structure for 4.3BSD-Based Systems
+
+   In the socket interface, a different protocol-specific data structure
+   is defined to carry the addresses for each protocol suite.  Each
+   protocol-specific data structure is designed so it can be cast into a
+   protocol-independent data structure -- the "sockaddr" structure.
+   Each has a "family" field that overlays the "sa_family" of the
+   sockaddr data structure.  This field identifies the type of the data
+   structure.
+
+   The sockaddr_in structure is the protocol-specific address data
+   structure for IPv4.  It is used to pass addresses between
+   applications and the system in the socket functions.  The following
+   sockaddr_in6 structure holds IPv6 addresses and is defined as a
+   result of including the <netinet/in.h> header:
+
+
+
+
+
+
+
+Gilligan, et al.             Informational                      [Page 7]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+struct sockaddr_in6 {
+    sa_family_t     sin6_family;    /* AF_INET6 */
+    in_port_t       sin6_port;      /* transport layer port # */
+    uint32_t        sin6_flowinfo;  /* IPv6 flow information */
+    struct in6_addr sin6_addr;      /* IPv6 address */
+    uint32_t        sin6_scope_id;  /* set of interfaces for a scope */
+};
+
+   This structure is designed to be compatible with the sockaddr data
+   structure used in the 4.3BSD release.
+
+   The sin6_family field identifies this as a sockaddr_in6 structure.
+   This field overlays the sa_family field when the buffer is cast to a
+   sockaddr data structure.  The value of this field must be AF_INET6.
+
+   The sin6_port field contains the 16-bit UDP or TCP port number.  This
+   field is used in the same way as the sin_port field of the
+   sockaddr_in structure.  The port number is stored in network byte
+   order.
+
+   The sin6_flowinfo field is a 32-bit field intended to contain flow-
+   related information.  The exact way this field is mapped to or from a
+   packet is not currently specified.  Until such time as its use is
+   specified, applications should set this field to zero when
+   constructing a sockaddr_in6, and ignore this field in a sockaddr_in6
+   structure constructed by the system.
+
+   The sin6_addr field is a single in6_addr structure (defined in the
+   previous section).  This field holds one 128-bit IPv6 address.  The
+   address is stored in network byte order.
+
+   The ordering of elements in this structure is specifically designed
+   so that when sin6_addr field is aligned on a 64-bit boundary, the
+   start of the structure will also be aligned on a 64-bit boundary.
+   This is done for optimum performance on 64-bit architectures.
+
+   The sin6_scope_id field is a 32-bit integer that identifies a set of
+   interfaces as appropriate for the scope [2] of the address carried in
+   the sin6_addr field.  The mapping of sin6_scope_id to an interface or
+   set of interfaces is left to implementation and future specifications
+   on the subject of scoped addresses.
+
+   Notice that the sockaddr_in6 structure will normally be larger than
+   the generic sockaddr structure.  On many existing implementations the
+   sizeof(struct sockaddr_in) equals sizeof(struct sockaddr), with both
+   being 16 bytes.  Any existing code that makes this assumption needs
+   to be examined carefully when converting to IPv6.
+
+
+
+
+Gilligan, et al.             Informational                      [Page 8]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+3.4 Socket Address Structure for 4.4BSD-Based Systems
+
+   The 4.4BSD release includes a small, but incompatible change to the
+   socket interface.  The "sa_family" field of the sockaddr data
+   structure was changed from a 16-bit value to an 8-bit value, and the
+   space saved used to hold a length field, named "sa_len".  The
+   sockaddr_in6 data structure given in the previous section cannot be
+   correctly cast into the newer sockaddr data structure.  For this
+   reason, the following alternative IPv6 address data structure is
+   provided to be used on systems based on 4.4BSD.  It is defined as a
+   result of including the <netinet/in.h> header.
+
+struct sockaddr_in6 {
+    uint8_t         sin6_len;       /* length of this struct */
+    sa_family_t     sin6_family;    /* AF_INET6 */
+    in_port_t       sin6_port;      /* transport layer port # */
+    uint32_t        sin6_flowinfo;  /* IPv6 flow information */
+    struct in6_addr sin6_addr;      /* IPv6 address */
+    uint32_t        sin6_scope_id;  /* set of interfaces for a scope */
+};
+
+   The only differences between this data structure and the 4.3BSD
+   variant are the inclusion of the length field, and the change of the
+   family field to a 8-bit data type.  The definitions of all the other
+   fields are identical to the structure defined in the previous
+   section.
+
+   Systems that provide this version of the sockaddr_in6 data structure
+   must also declare SIN6_LEN as a result of including the
+   <netinet/in.h> header.  This macro allows applications to determine
+   whether they are being built on a system that supports the 4.3BSD or
+   4.4BSD variants of the data structure.
+
+3.5 The Socket Functions
+
+   Applications call the socket() function to create a socket descriptor
+   that represents a communication endpoint.  The arguments to the
+   socket() function tell the system which protocol to use, and what
+   format address structure will be used in subsequent functions.  For
+   example, to create an IPv4/TCP socket, applications make the call:
+
+      s = socket(AF_INET, SOCK_STREAM, 0);
+
+   To create an IPv4/UDP socket, applications make the call:
+
+      s = socket(AF_INET, SOCK_DGRAM, 0);
+
+
+
+
+
+Gilligan, et al.             Informational                      [Page 9]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+   Applications may create IPv6/TCP and IPv6/UDP sockets (which may also
+   handle IPv4 communication as described in section 3.7) by simply
+   using the constant AF_INET6 instead of AF_INET in the first argument.
+   For example, to create an IPv6/TCP socket, applications make the
+   call:
+
+      s = socket(AF_INET6, SOCK_STREAM, 0);
+
+   To create an IPv6/UDP socket, applications make the call:
+
+      s = socket(AF_INET6, SOCK_DGRAM, 0);
+
+   Once the application has created a AF_INET6 socket, it must use the
+   sockaddr_in6 address structure when passing addresses in to the
+   system.  The functions that the application uses to pass addresses
+   into the system are:
+
+      bind()
+      connect()
+      sendmsg()
+      sendto()
+
+   The system will use the sockaddr_in6 address structure to return
+   addresses to applications that are using AF_INET6 sockets.  The
+   functions that return an address from the system to an application
+   are:
+
+      accept()
+      recvfrom()
+      recvmsg()
+      getpeername()
+      getsockname()
+
+   No changes to the syntax of the socket functions are needed to
+   support IPv6, since all of the "address carrying" functions use an
+   opaque address pointer, and carry an address length as a function
+   argument.
+
+3.6 Compatibility with IPv4 Applications
+
+   In order to support the large base of applications using the original
+   API, system implementations must provide complete source and binary
+   compatibility with the original API.  This means that systems must
+   continue to support AF_INET sockets and the sockaddr_in address
+   structure.  Applications must be able to create IPv4/TCP and IPv4/UDP
+   sockets using the AF_INET constant in the socket() function, as
+
+
+
+
+
+Gilligan, et al.             Informational                     [Page 10]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+   described in the previous section.  Applications should be able to
+   hold a combination of IPv4/TCP, IPv4/UDP, IPv6/TCP and IPv6/UDP
+   sockets simultaneously within the same process.
+
+   Applications using the original API should continue to operate as
+   they did on systems supporting only IPv4.  That is, they should
+   continue to interoperate with IPv4 nodes.
+
+3.7 Compatibility with IPv4 Nodes
+
+   The API also provides a different type of compatibility: the ability
+   for IPv6 applications to interoperate with IPv4 applications.  This
+   feature uses the IPv4-mapped IPv6 address format defined in the IPv6
+   addressing architecture specification [2].  This address format
+   allows the IPv4 address of an IPv4 node to be represented as an IPv6
+   address.  The IPv4 address is encoded into the low-order 32 bits of
+   the IPv6 address, and the high-order 96 bits hold the fixed prefix
+   0:0:0:0:0:FFFF.  IPv4-mapped addresses are written as follows:
+
+      ::FFFF:<IPv4-address>
+
+   These addresses can be generated automatically by the getaddrinfo()
+   function, as described in Section 6.1.
+
+   Applications may use AF_INET6 sockets to open TCP connections to IPv4
+   nodes, or send UDP packets to IPv4 nodes, by simply encoding the
+   destination's IPv4 address as an IPv4-mapped IPv6 address, and
+   passing that address, within a sockaddr_in6 structure, in the
+   connect() or sendto() call.  When applications use AF_INET6 sockets
+   to accept TCP connections from IPv4 nodes, or receive UDP packets
+   from IPv4 nodes, the system returns the peer's address to the
+   application in the accept(), recvfrom(), or getpeername() call using
+   a sockaddr_in6 structure encoded this way.
+
+   Few applications will likely need to know which type of node they are
+   interoperating with.  However, for those applications that do need to
+   know, the IN6_IS_ADDR_V4MAPPED() macro, defined in Section 6.4, is
+   provided.
+
+3.8 IPv6 Wildcard Address
+
+   While the bind() function allows applications to select the source IP
+   address of UDP packets and TCP connections, applications often want
+   the system to select the source address for them.  With IPv4, one
+   specifies the address as the symbolic constant INADDR_ANY (called the
+   "wildcard" address) in the bind() call, or simply omits the bind()
+   entirely.
+
+
+
+
+Gilligan, et al.             Informational                     [Page 11]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+   Since the IPv6 address type is a structure (struct in6_addr), a
+   symbolic constant can be used to initialize an IPv6 address variable,
+   but cannot be used in an assignment.  Therefore systems provide the
+   IPv6 wildcard address in two forms.
+
+   The first version is a global variable named "in6addr_any" that is an
+   in6_addr structure.  The extern declaration for this variable is
+   defined in <netinet/in.h>:
+
+      extern const struct in6_addr in6addr_any;
+
+   Applications use in6addr_any similarly to the way they use INADDR_ANY
+   in IPv4.  For example, to bind a socket to port number 23, but let
+   the system select the source address, an application could use the
+   following code:
+
+      struct sockaddr_in6 sin6;
+       . . .
+      sin6.sin6_family = AF_INET6;
+      sin6.sin6_flowinfo = 0;
+      sin6.sin6_port = htons(23);
+      sin6.sin6_addr = in6addr_any;  /* structure assignment */
+       . . .
+      if (bind(s, (struct sockaddr *) &sin6, sizeof(sin6)) == -1)
+              . . .
+
+   The other version is a symbolic constant named IN6ADDR_ANY_INIT and
+   is defined in <netinet/in.h>.  This constant can be used to
+   initialize an in6_addr structure:
+
+      struct in6_addr anyaddr = IN6ADDR_ANY_INIT;
+
+   Note that this constant can be used ONLY at declaration time.  It can
+   not be used to assign a previously declared in6_addr structure.  For
+   example, the following code will not work:
+
+      /* This is the WRONG way to assign an unspecified address */
+      struct sockaddr_in6 sin6;
+       . . .
+      sin6.sin6_addr = IN6ADDR_ANY_INIT; /* will NOT compile */
+
+   Be aware that the IPv4 INADDR_xxx constants are all defined in host
+   byte order but the IPv6 IN6ADDR_xxx constants and the IPv6
+   in6addr_xxx externals are defined in network byte order.
+
+
+
+
+
+
+
+Gilligan, et al.             Informational                     [Page 12]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+3.9 IPv6 Loopback Address
+
+   Applications may need to send UDP packets to, or originate TCP
+   connections to, services residing on the local node.  In IPv4, they
+   can do this by using the constant IPv4 address INADDR_LOOPBACK in
+   their connect(), sendto(), or sendmsg() call.
+
+   IPv6 also provides a loopback address to contact local TCP and UDP
+   services.  Like the unspecified address, the IPv6 loopback address is
+   provided in two forms -- a global variable and a symbolic constant.
+
+   The global variable is an in6_addr structure named
+   "in6addr_loopback."  The extern declaration for this variable is
+   defined in <netinet/in.h>:
+
+      extern const struct in6_addr in6addr_loopback;
+
+   Applications use in6addr_loopback as they would use INADDR_LOOPBACK
+   in IPv4 applications (but beware of the byte ordering difference
+   mentioned at the end of the previous section).  For example, to open
+   a TCP connection to the local telnet server, an application could use
+   the following code:
+
+   struct sockaddr_in6 sin6;
+    . . .
+   sin6.sin6_family = AF_INET6;
+   sin6.sin6_flowinfo = 0;
+   sin6.sin6_port = htons(23);
+   sin6.sin6_addr = in6addr_loopback;  /* structure assignment */
+    . . .
+   if (connect(s, (struct sockaddr *) &sin6, sizeof(sin6)) == -1)
+           . . .
+
+   The symbolic constant is named IN6ADDR_LOOPBACK_INIT and is defined
+   in <netinet/in.h>.  It can be used at declaration time ONLY; for
+   example:
+
+      struct in6_addr loopbackaddr = IN6ADDR_LOOPBACK_INIT;
+
+   Like IN6ADDR_ANY_INIT, this constant cannot be used in an assignment
+   to a previously declared IPv6 address variable.
+
+
+
+
+
+
+
+
+
+
+Gilligan, et al.             Informational                     [Page 13]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+3.10 Portability Additions
+
+   One simple addition to the sockets API that can help application
+   writers is the "struct sockaddr_storage".  This data structure can
+   simplify writing code that is portable across multiple address
+   families and platforms.  This data structure is designed with the
+   following goals.
+
+   - Large enough to accommodate all supported protocol-specific address
+      structures.
+
+   - Aligned at an appropriate boundary so that pointers to it can be
+      cast as pointers to protocol specific address structures and used
+      to access the fields of those structures without alignment
+      problems.
+
+   The sockaddr_storage structure contains field ss_family which is of
+   type sa_family_t.  When a sockaddr_storage structure is cast to a
+   sockaddr structure, the ss_family field of the sockaddr_storage
+   structure maps onto the sa_family field of the sockaddr structure.
+   When a sockaddr_storage structure is cast as a protocol specific
+   address structure, the ss_family field maps onto a field of that
+   structure that is of type sa_family_t and that identifies the
+   protocol's address family.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Gilligan, et al.             Informational                     [Page 14]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+   An example implementation design of such a data structure would be as
+   follows.
+
+/*
+ * Desired design of maximum size and alignment
+ */
+#define _SS_MAXSIZE    128  /* Implementation specific max size */
+#define _SS_ALIGNSIZE  (sizeof (int64_t))
+                         /* Implementation specific desired alignment */
+/*
+ * Definitions used for sockaddr_storage structure paddings design.
+ */
+#define _SS_PAD1SIZE   (_SS_ALIGNSIZE - sizeof (sa_family_t))
+#define _SS_PAD2SIZE   (_SS_MAXSIZE - (sizeof (sa_family_t) +
+                              _SS_PAD1SIZE + _SS_ALIGNSIZE))
+struct sockaddr_storage {
+    sa_family_t  ss_family;     /* address family */
+    /* Following fields are implementation specific */
+    char      __ss_pad1[_SS_PAD1SIZE];
+              /* 6 byte pad, this is to make implementation
+              /* specific pad up to alignment field that */
+              /* follows explicit in the data structure */
+    int64_t   __ss_align;     /* field to force desired structure */
+               /* storage alignment */
+    char      __ss_pad2[_SS_PAD2SIZE];
+              /* 112 byte pad to achieve desired size, */
+              /* _SS_MAXSIZE value minus size of ss_family */
+              /* __ss_pad1, __ss_align fields is 112 */
+};
+
+   The above example implementation illustrates a data structure which
+   will align on a 64-bit boundary.  An implementation-specific field
+   "__ss_align" along with "__ss_pad1" is used to force a 64-bit
+   alignment which covers proper alignment good enough for the needs of
+   sockaddr_in6 (IPv6), sockaddr_in (IPv4) address data structures.  The
+   size of padding field __ss_pad1 depends on the chosen alignment
+   boundary.  The size of padding field __ss_pad2 depends on the value
+   of overall size chosen for the total size of the structure.  This
+   size and alignment are represented in the above example by
+   implementation specific (not required) constants _SS_MAXSIZE (chosen
+   value 128) and _SS_ALIGNSIZE (with chosen value 8).  Constants
+   _SS_PAD1SIZE (derived value 6) and _SS_PAD2SIZE (derived value 112)
+   are also for illustration and not required.  The derived values
+   assume sa_family_t is 2 bytes.  The implementation specific
+   definitions and structure field names above start with an underscore
+   to denote implementation private namespace.  Portable code is not
+   expected to access or reference those fields or constants.
+
+
+
+
+Gilligan, et al.             Informational                     [Page 15]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+   On implementations where the sockaddr data structure includes a
+   "sa_len" field this data structure would look like this:
+
+/*
+ * Definitions used for sockaddr_storage structure paddings design.
+ */
+#define _SS_PAD1SIZE (_SS_ALIGNSIZE -
+                            (sizeof (uint8_t) + sizeof (sa_family_t))
+#define _SS_PAD2SIZE (_SS_MAXSIZE -
+                            (sizeof (uint8_t) + sizeof (sa_family_t) +
+                             _SS_PAD1SIZE + _SS_ALIGNSIZE))
+struct sockaddr_storage {
+    uint8_t      ss_len;        /* address length */
+    sa_family_t  ss_family;     /* address family */
+    /* Following fields are implementation specific */
+    char         __ss_pad1[_SS_PAD1SIZE];
+                  /* 6 byte pad, this is to make implementation
+                  /* specific pad up to alignment field that */
+                  /* follows explicit in the data structure */
+    int64_t      __ss_align;  /* field to force desired structure */
+                  /* storage alignment */
+    char         __ss_pad2[_SS_PAD2SIZE];
+                  /* 112 byte pad to achieve desired size, */
+                  /* _SS_MAXSIZE value minus size of ss_len, */
+                  /* __ss_family, __ss_pad1, __ss_align fields is 112 */
+};
+
+4. Interface Identification
+
+   This API uses an interface index (a small positive integer) to
+   identify the local interface on which a multicast group is joined
+   (Section 5.2).  Additionally, the advanced API [4] uses these same
+   interface indexes to identify the interface on which a datagram is
+   received, or to specify the interface on which a datagram is to be
+   sent.
+
+   Interfaces are normally known by names such as "le0", "sl1", "ppp2",
+   and the like.  On Berkeley-derived implementations, when an interface
+   is made known to the system, the kernel assigns a unique positive
+   integer value (called the interface index) to that interface.  These
+   are small positive integers that start at 1.  (Note that 0 is never
+   used for an interface index.)  There may be gaps so that there is no
+   current interface for a particular positive interface index.
+
+   This API defines two functions that map between an interface name and
+   index, a third function that returns all the interface names and
+   indexes, and a fourth function to return the dynamic memory allocated
+   by the previous function.  How these functions are implemented is
+
+
+
+Gilligan, et al.             Informational                     [Page 16]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+   left up to the implementation.  4.4BSD implementations can implement
+   these functions using the existing sysctl() function with the
+   NET_RT_IFLIST command.  Other implementations may wish to use ioctl()
+   for this purpose.
+
+4.1 Name-to-Index
+
+   The first function maps an interface name into its corresponding
+   index.
+
+      #include <net/if.h>
+
+      unsigned int  if_nametoindex(const char *ifname);
+
+   If ifname is the name of an interface, the if_nametoindex() function
+   shall return the interface index corresponding to name ifname;
+   otherwise, it shall return zero.  No errors are defined.
+
+4.2 Index-to-Name
+
+   The second function maps an interface index into its corresponding
+   name.
+
+      #include <net/if.h>
+
+      char  *if_indextoname(unsigned int ifindex, char *ifname);
+
+   When this function is called, the ifname argument shall point to a
+   buffer of at least IF_NAMESIZE bytes.  The function shall place in
+   this buffer the name of the interface with index ifindex.
+   (IF_NAMESIZE is also defined in <net/if.h> and its value includes a
+   terminating null byte at the end of the interface name.)  If ifindex
+   is an interface index, then the function shall return the value
+   supplied in ifname, which points to a buffer now containing the
+   interface name.  Otherwise, the function shall return a NULL pointer
+   and set errno to indicate the error.  If there is no interface
+   corresponding to the specified index, errno is set to ENXIO.  If
+   there was a system error (such as running out of memory), errno would
+   be set to the proper value (e.g., ENOMEM).
+
+
+
+
+
+
+
+
+
+
+
+
+Gilligan, et al.             Informational                     [Page 17]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+4.3 Return All Interface Names and Indexes
+
+   The if_nameindex structure holds the information about a single
+   interface and is defined as a result of including the <net/if.h>
+   header.
+
+   struct if_nameindex {
+     unsigned int   if_index;  /* 1, 2, ... */
+     char          *if_name;   /* null terminated name: "le0", ... */
+   };
+
+   The final function returns an array of if_nameindex structures, one
+   structure per interface.
+
+      #include <net/if.h>
+
+      struct if_nameindex  *if_nameindex(void);
+
+   The end of the array of structures is indicated by a structure with
+   an if_index of 0 and an if_name of NULL.  The function returns a NULL
+   pointer upon an error, and would set errno to the appropriate value.
+
+   The memory used for this array of structures along with the interface
+   names pointed to by the if_name members is obtained dynamically.
+   This memory is freed by the next function.
+
+4.4 Free Memory
+
+   The following function frees the dynamic memory that was allocated by
+   if_nameindex().
+
+      #include <net/if.h>
+
+      void  if_freenameindex(struct if_nameindex *ptr);
+
+   The ptr argument shall be a pointer that was returned by
+   if_nameindex().  After if_freenameindex() has been called, the
+   application shall not use the array of which ptr is the address.
+
+5. Socket Options
+
+   A number of new socket options are defined for IPv6.  All of these
+   new options are at the IPPROTO_IPV6 level.  That is, the "level"
+   parameter in the getsockopt() and setsockopt() calls is IPPROTO_IPV6
+   when using these options.  The constant name prefix IPV6_ is used in
+   all of the new socket options.  This serves to clearly identify these
+   options as applying to IPv6.
+
+
+
+
+Gilligan, et al.             Informational                     [Page 18]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+   The declaration for IPPROTO_IPV6, the new IPv6 socket options, and
+   related constants defined in this section are obtained by including
+   the header <netinet/in.h>.
+
+5.1 Unicast Hop Limit
+
+   A new setsockopt() option controls the hop limit used in outgoing
+   unicast IPv6 packets.  The name of this option is IPV6_UNICAST_HOPS,
+   and it is used at the IPPROTO_IPV6 layer.  The following example
+   illustrates how it is used:
+
+   int  hoplimit = 10;
+
+   if (setsockopt(s, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
+                  (char *) &hoplimit, sizeof(hoplimit)) == -1)
+       perror("setsockopt IPV6_UNICAST_HOPS");
+
+   When the IPV6_UNICAST_HOPS option is set with setsockopt(), the
+   option value given is used as the hop limit for all subsequent
+   unicast packets sent via that socket.  If the option is not set, the
+   system selects a default value.  The integer hop limit value (called
+   x) is interpreted as follows:
+
+      x < -1:        return an error of EINVAL
+      x == -1:       use kernel default
+      0 <= x <= 255: use x
+      x >= 256:      return an error of EINVAL
+
+   The IPV6_UNICAST_HOPS option may be used with getsockopt() to
+   determine the hop limit value that the system will use for subsequent
+   unicast packets sent via that socket.  For example:
+
+      int  hoplimit;
+      socklen_t  len = sizeof(hoplimit);
+
+      if (getsockopt(s, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
+                     (char *) &hoplimit, &len) == -1)
+          perror("getsockopt IPV6_UNICAST_HOPS");
+      else
+          printf("Using %d for hop limit.\n", hoplimit);
+
+5.2 Sending and Receiving Multicast Packets
+
+   IPv6 applications may send multicast packets by simply specifying an
+   IPv6 multicast address as the destination address, for example in the
+   destination address argument of the sendto() function.
+
+
+
+
+
+Gilligan, et al.             Informational                     [Page 19]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+   Three socket options at the IPPROTO_IPV6 layer control some of the
+   parameters for sending multicast packets.  Setting these options is
+   not required: applications may send multicast packets without using
+   these options.  The setsockopt() options for controlling the sending
+   of multicast packets are summarized below.  These three options can
+   also be used with getsockopt().
+
+      IPV6_MULTICAST_IF
+
+         Set the interface to use for outgoing multicast packets.  The
+         argument is the index of the interface to use.  If the
+         interface index is specified as zero, the system selects the
+         interface (for example, by looking up the address in a routing
+         table and using the resulting interface).
+
+         Argument type: unsigned int
+
+      IPV6_MULTICAST_HOPS
+
+         Set the hop limit to use for outgoing multicast packets.  (Note
+         a separate option - IPV6_UNICAST_HOPS - is provided to set the
+         hop limit to use for outgoing unicast packets.)
+
+         The interpretation of the argument is the same as for the
+         IPV6_UNICAST_HOPS option:
+
+            x < -1:        return an error of EINVAL
+            x == -1:       use kernel default
+            0 <= x <= 255: use x
+            x >= 256:      return an error of EINVAL
+
+            If IPV6_MULTICAST_HOPS is not set, the default is 1
+            (same as IPv4 today)
+
+         Argument type: int
+
+      IPV6_MULTICAST_LOOP
+
+         If a multicast datagram is sent to a group to which the sending
+         host itself belongs (on the outgoing interface), a copy of the
+         datagram is looped back by the IP layer for local delivery if
+         this option is set to 1.  If this option is set to 0 a copy is
+         not looped back.  Other option values return an error of
+         EINVAL.
+
+
+
+
+
+
+
+Gilligan, et al.             Informational                     [Page 20]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+         If IPV6_MULTICAST_LOOP is not set, the default is 1 (loopback;
+         same as IPv4 today).
+
+         Argument type: unsigned int
+
+   The reception of multicast packets is controlled by the two
+   setsockopt() options summarized below.  An error of EOPNOTSUPP is
+   returned if these two options are used with getsockopt().
+
+      IPV6_JOIN_GROUP
+
+         Join a multicast group on a specified local interface.
+         If the interface index is specified as 0,
+         the kernel chooses the local interface.
+         For example, some kernels look up the multicast group
+         in the normal IPv6 routing table and use the resulting
+         interface.
+
+         Argument type: struct ipv6_mreq
+
+      IPV6_LEAVE_GROUP
+
+         Leave a multicast group on a specified interface.
+         If the interface index is specified as 0, the system
+         may choose a multicast group membership to drop by
+         matching the multicast address only.
+
+         Argument type: struct ipv6_mreq
+
+   The argument type of both of these options is the ipv6_mreq
+   structure, defined as a result of including the <netinet/in.h>
+   header;
+
+   struct ipv6_mreq {
+       struct in6_addr ipv6mr_multiaddr; /* IPv6 multicast addr */
+       unsigned int    ipv6mr_interface; /* interface index */
+   };
+
+   Note that to receive multicast datagrams a process must join the
+   multicast group to which datagrams will be sent.  UDP applications
+   must also bind the UDP port to which datagrams will be sent.  Some
+   processes also bind the multicast group address to the socket, in
+   addition to the port, to prevent other datagrams destined to that
+   same port from being delivered to the socket.
+
+
+
+
+
+
+
+Gilligan, et al.             Informational                     [Page 21]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+5.3 IPV6_V6ONLY option for AF_INET6 Sockets
+
+   This socket option restricts AF_INET6 sockets to IPv6 communications
+   only.  As stated in section <3.7 Compatibility with IPv4 Nodes>,
+   AF_INET6 sockets may be used for both IPv4 and IPv6 communications.
+   Some applications may want to restrict their use of an AF_INET6
+   socket to IPv6 communications only.  For these applications the
+   IPV6_V6ONLY socket option is defined.  When this option is turned on,
+   the socket can be used to send and receive IPv6 packets only.  This
+   is an IPPROTO_IPV6 level option.  This option takes an int value.
+   This is a boolean option.  By default this option is turned off.
+
+   Here is an example of setting this option:
+
+      int on = 1;
+
+      if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
+                     (char *)&on, sizeof(on)) == -1)
+          perror("setsockopt IPV6_V6ONLY");
+      else
+          printf("IPV6_V6ONLY set\n");
+
+   Note - This option has no effect on the use of IPv4 Mapped addresses
+   which enter a node as a valid IPv6 addresses for IPv6 communications
+   as defined by Stateless IP/ICMP Translation Algorithm (SIIT) [5].
+
+   An example use of this option is to allow two versions of the same
+   server process to run on the same port, one providing service over
+   IPv6, the other providing the same service over IPv4.
+
+6. Library Functions
+
+   New library functions are needed to perform a variety of operations
+   with IPv6 addresses.  Functions are needed to lookup IPv6 addresses
+   in the Domain Name System (DNS).  Both forward lookup (nodename-to-
+   address translation) and reverse lookup (address-to-nodename
+   translation) need to be supported.  Functions are also needed to
+   convert IPv6 addresses between their binary and textual form.
+
+   We note that the two existing functions, gethostbyname() and
+   gethostbyaddr(), are left as-is.  New functions are defined to handle
+   both IPv4 and IPv6 addresses.
+
+   The commonly used function gethostbyname() is inadequate for many
+   applications, first because it provides no way for the caller to
+   specify anything about the types of addresses desired (IPv4 only,
+   IPv6 only, IPv4-mapped IPv6 are OK, etc.), and second because many
+   implementations of this function are not thread safe.  RFC 2133
+
+
+
+Gilligan, et al.             Informational                     [Page 22]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+   defined a function named gethostbyname2() but this function was also
+   inadequate, first because its use required setting a global option
+   (RES_USE_INET6) when IPv6 addresses were required, and second because
+   a flag argument is needed to provide the caller with additional
+   control over the types of addresses required.  The gethostbyname2()
+   function was deprecated in RFC 2553 and is no longer part of the
+   basic API.
+
+6.1 Protocol-Independent Nodename and Service Name Translation
+
+   Nodename-to-address translation is done in a protocol-independent
+   fashion using the getaddrinfo() function.
+
+#include <sys/socket.h>
+#include <netdb.h>
+
+
+int getaddrinfo(const char *nodename, const char *servname,
+                const struct addrinfo *hints, struct addrinfo **res);
+
+void freeaddrinfo(struct addrinfo *ai);
+
+struct addrinfo {
+  int     ai_flags;     /* AI_PASSIVE, AI_CANONNAME,
+                           AI_NUMERICHOST, .. */
+  int     ai_family;    /* AF_xxx */
+  int     ai_socktype;  /* SOCK_xxx */
+  int     ai_protocol;  /* 0 or IPPROTO_xxx for IPv4 and IPv6 */
+  socklen_t  ai_addrlen;   /* length of ai_addr */
+  char   *ai_canonname; /* canonical name for nodename */
+  struct sockaddr  *ai_addr; /* binary address */
+  struct addrinfo  *ai_next; /* next structure in linked list */
+};
+
+   The getaddrinfo() function translates the name of a service location
+   (for example, a host name) and/or a service name and returns a set of
+   socket addresses and associated information to be used in creating a
+   socket with which to address the specified service.
+
+   The nodename and servname arguments are either null pointers or
+   pointers to null-terminated strings.  One or both of these two
+   arguments must be a non-null pointer.
+
+   The format of a valid name depends on the address family or families.
+   If a specific family is not given and the name could be interpreted
+   as valid within multiple supported families, the implementation will
+   attempt to resolve the name in all supported families and, in absence
+   of errors, one or more results shall be returned.
+
+
+
+Gilligan, et al.             Informational                     [Page 23]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+   If the nodename argument is not null, it can be a descriptive name or
+   can be an address string.  If the specified address family is
+   AF_INET, AF_INET6, or AF_UNSPEC, valid descriptive names include host
+   names. If the specified address family is AF_INET or AF_UNSPEC,
+   address strings using Internet standard dot notation as specified in
+   inet_addr() are valid.  If the specified address family is AF_INET6
+   or AF_UNSPEC, standard IPv6 text forms described in inet_pton() are
+   valid.
+
+   If nodename is not null, the requested service location is named by
+   nodename; otherwise, the requested service location is local to the
+   caller.
+
+   If servname is null, the call shall return network-level addresses
+   for the specified nodename.  If servname is not null, it is a null-
+   terminated character string identifying the requested service.  This
+   can be either a descriptive name or a numeric representation suitable
+   for use with the address family or families.  If the specified
+   address family is AF_INET, AF_INET6 or AF_UNSPEC, the service can be
+   specified as a string specifying a decimal port number.
+
+   If the argument hints is not null, it refers to a structure
+   containing input values that may direct the operation by providing
+   options and by limiting the returned information to a specific socket
+   type, address family and/or protocol.  In this hints structure every
+   member other than ai_flags, ai_family, ai_socktype and ai_protocol
+   shall be set to zero or a null pointer.  A value of AF_UNSPEC for
+   ai_family means that the caller shall accept any address family.  A
+   value of zero for ai_socktype means that the caller shall accept any
+   socket type.  A value of zero for ai_protocol means that the caller
+   shall accept any protocol.  If hints is a null pointer, the behavior
+   shall be as if it referred to a structure containing the value zero
+   for the ai_flags, ai_socktype and ai_protocol fields, and AF_UNSPEC
+   for the ai_family field.
+
+   Note:
+
+   1. If the caller handles only TCP and not UDP, for example, then the
+      ai_protocol member of the hints structure should be set to
+      IPPROTO_TCP when getaddrinfo() is called.
+
+   2. If the caller handles only IPv4 and not IPv6, then the ai_family
+      member of the hints structure should be set to AF_INET when
+      getaddrinfo() is called.
+
+
+
+
+
+
+
+Gilligan, et al.             Informational                     [Page 24]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+   The ai_flags field to which hints parameter points shall be set to
+   zero or be the bitwise-inclusive OR of one or more of the values
+   AI_PASSIVE, AI_CANONNAME, AI_NUMERICHOST, AI_NUMERICSERV,
+   AI_V4MAPPED, AI_ALL, and AI_ADDRCONFIG.
+
+   If the AI_PASSIVE flag is specified, the returned address information
+   shall be suitable for use in binding a socket for accepting incoming
+   connections for the specified service (i.e., a call to bind()).  In
+   this case, if the nodename argument is null, then the IP address
+   portion of the socket address structure shall be set to INADDR_ANY
+   for an IPv4 address or IN6ADDR_ANY_INIT for an IPv6 address.  If the
+   AI_PASSIVE flag is not specified, the returned address information
+   shall be suitable for a call to connect() (for a connection-mode
+   protocol) or for a call to connect(), sendto() or sendmsg() (for a
+   connectionless protocol).  In this case, if the nodename argument is
+   null, then the IP address portion of the socket address structure
+   shall be set to the loopback address.  This flag is ignored if the
+   nodename argument is not null.
+
+   If the AI_CANONNAME flag is specified and the nodename argument is
+   not null, the function shall attempt to determine the canonical name
+   corresponding to nodename (for example, if nodename is an alias or
+   shorthand notation for a complete name).
+
+   If the AI_NUMERICHOST flag is specified, then a non-null nodename
+   string supplied shall be a numeric host address string.  Otherwise,
+   an [EAI_NONAME] error is returned.  This flag shall prevent any type
+   of name resolution service (for example, the DNS) from being invoked.
+
+   If the AI_NUMERICSERV flag is specified, then a non-null servname
+   string supplied shall be a numeric port string.  Otherwise, an
+   [EAI_NONAME] error shall be returned.  This flag shall prevent any
+   type of name resolution service (for example, NIS+) from being
+   invoked.
+
+   If the AI_V4MAPPED flag is specified along with an ai_family of
+   AF_INET6, then getaddrinfo() shall return IPv4-mapped IPv6 addresses
+   on finding no matching IPv6 addresses (ai_addrlen shall be 16).
+
+      For example, when using the DNS, if no AAAA records are found then
+      a query is made for A records and any found are returned as IPv4-
+      mapped IPv6 addresses.
+
+   The AI_V4MAPPED flag shall be ignored unless ai_family equals
+   AF_INET6.
+
+   If the AI_ALL flag is used with the AI_V4MAPPED flag, then
+   getaddrinfo() shall return all matching IPv6 and IPv4 addresses.
+
+
+
+Gilligan, et al.             Informational                     [Page 25]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+      For example, when using the DNS, queries are made for both AAAA
+      records and A records, and getaddrinfo() returns the combined
+      results of both queries.  Any IPv4 addresses found are returned as
+      IPv4-mapped IPv6 addresses.
+
+   The AI_ALL flag without the AI_V4MAPPED flag is ignored.
+
+      Note:
+
+      When ai_family is not specified (AF_UNSPEC), AI_V4MAPPED and
+      AI_ALL flags will only be used if AF_INET6 is supported.
+
+   If the AI_ADDRCONFIG flag is specified, IPv4 addresses shall be
+   returned only if an IPv4 address is configured on the local system,
+   and IPv6 addresses shall be returned only if an IPv6 address is
+   configured on the local system.  The loopback address is not
+   considered for this case as valid as a configured address.
+
+      For example, when using the DNS, a query for AAAA records should
+      occur only if the node has at least one IPv6 address configured
+      (other than IPv6 loopback) and a query for A records should occur
+      only if the node has at least one IPv4 address configured (other
+      than the IPv4 loopback).
+
+   The ai_socktype field to which argument hints points specifies the
+   socket type for the service, as defined for socket().  If a specific
+   socket type is not given (for example, a value of zero) and the
+   service name could be interpreted as valid with multiple supported
+   socket types, the implementation shall attempt to resolve the service
+   name for all supported socket types and, in the absence of errors,
+   all possible results shall be returned.  A non-zero socket type value
+   shall limit the returned information to values with the specified
+   socket type.
+
+   If the ai_family field to which hints points has the value AF_UNSPEC,
+   addresses shall be returned for use with any address family that can
+   be used with the specified nodename and/or servname.  Otherwise,
+   addresses shall be returned for use only with the specified address
+   family.  If ai_family is not AF_UNSPEC and ai_protocol is not zero,
+   then addresses are returned for use only with the specified address
+   family and protocol; the value of ai_protocol shall be interpreted as
+   in a call to the socket() function with the corresponding values of
+   ai_family and ai_protocol.
+
+   The freeaddrinfo() function frees one or more addrinfo structures
+   returned by getaddrinfo(), along with any additional storage
+   associated with those structures (for example, storage pointed to by
+   the ai_canonname and ai_addr fields; an application must not
+
+
+
+Gilligan, et al.             Informational                     [Page 26]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+   reference this storage after the associated addrinfo structure has
+   been freed).  If the ai_next field of the structure is not null, the
+   entire list of structures is freed.  The freeaddrinfo() function must
+   support the freeing of arbitrary sublists of an addrinfo list
+   originally returned by getaddrinfo().
+
+   Functions getaddrinfo() and freeaddrinfo() must be thread-safe.
+
+   A zero return value for getaddrinfo() indicates successful
+   completion; a non-zero return value indicates failure.  The possible
+   values for the failures are listed below under Error Return Values.
+
+   Upon successful return of getaddrinfo(), the location to which res
+   points shall refer to a linked list of addrinfo structures, each of
+   which shall specify a socket address and information for use in
+   creating a socket with which to use that socket address.  The list
+   shall include at least one addrinfo structure.  The ai_next field of
+   each structure contains a pointer to the next structure on the list,
+   or a null pointer if it is the last structure on the list.  Each
+   structure on the list shall include values for use with a call to the
+   socket() function, and a socket address for use with the connect()
+   function or, if the AI_PASSIVE flag was specified, for use with the
+   bind() function.  The fields ai_family, ai_socktype, and ai_protocol
+   shall be usable as the arguments to the socket() function to create a
+   socket suitable for use with the returned address.  The fields
+   ai_addr and ai_addrlen are usable as the arguments to the connect()
+   or bind() functions with such a socket, according to the AI_PASSIVE
+   flag.
+
+   If nodename is not null, and if requested by the AI_CANONNAME flag,
+   the ai_canonname field of the first returned addrinfo structure shall
+   point to a null-terminated string containing the canonical name
+   corresponding to the input nodename; if the canonical name is not
+   available, then ai_canonname shall refer to the nodename argument or
+   a string with the same contents.  The contents of the ai_flags field
+   of the returned structures are undefined.
+
+   All fields in socket address structures returned by getaddrinfo()
+   that are not filled in through an explicit argument (for example,
+   sin6_flowinfo) shall be set to zero.
+
+   Note: This makes it easier to compare socket address structures.
+
+
+
+
+
+
+
+
+
+Gilligan, et al.             Informational                     [Page 27]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+   Error Return Values:
+
+   The getaddrinfo() function shall fail and return the corresponding
+   value if:
+
+   [EAI_AGAIN]     The name could not be resolved at this time.  Future
+                   attempts may succeed.
+
+   [EAI_BADFLAGS]  The flags parameter had an invalid value.
+
+   [EAI_FAIL]      A non-recoverable error occurred when attempting to
+                   resolve the name.
+
+   [EAI_FAMILY]    The address family was not recognized.
+
+   [EAI_MEMORY]    There was a memory allocation failure when trying to
+                   allocate storage for the return value.
+
+   [EAI_NONAME]    The name does not resolve for the supplied
+                   parameters.  Neither nodename nor servname were
+                   supplied.  At least one of these must be supplied.
+
+   [EAI_SERVICE]   The service passed was not recognized for the
+                   specified socket type.
+
+   [EAI_SOCKTYPE]  The intended socket type was not recognized.
+
+   [EAI_SYSTEM]    A system error occurred; the error code can be found
+                   in errno.
+
+   The gai_strerror() function provides a descriptive text string
+   corresponding to an EAI_xxx error value.
+
+      #include <netdb.h>
+
+      const char *gai_strerror(int ecode);
+
+   The argument is one of the EAI_xxx values defined for the
+   getaddrinfo() and getnameinfo() functions.  The return value points
+   to a string describing the error.  If the argument is not one of the
+   EAI_xxx values, the function still returns a pointer to a string
+   whose contents indicate an unknown error.
+
+6.2 Socket Address Structure to Node Name and Service Name
+
+   The getnameinfo() function is used to translate the contents of a
+   socket address structure to a node name and/or service name.
+
+
+
+
+Gilligan, et al.             Informational                     [Page 28]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+   #include <sys/socket.h>
+   #include <netdb.h>
+
+   int getnameinfo(const struct sockaddr *sa, socklen_t salen,
+                       char *node, socklen_t nodelen,
+                       char *service, socklen_t servicelen,
+                         int flags);
+
+   The getnameinfo() function shall translate a socket address to a node
+   name and service location, all of which are defined as in
+   getaddrinfo().
+
+   The sa argument points to a socket address structure to be
+   translated.
+
+   The salen argument holds the size of the socket address structure
+   pointed to by sa.
+
+   If the socket address structure contains an IPv4-mapped IPv6 address
+   or an IPv4-compatible IPv6 address, the implementation shall extract
+   the embedded IPv4 address and lookup the node name for that IPv4
+   address.
+
+      Note: The IPv6 unspecified address ("::") and the IPv6 loopback
+      address ("::1") are not IPv4-compatible addresses.  If the address
+      is the IPv6 unspecified address ("::"), a lookup is not performed,
+      and the [EAI_NONAME] error is returned.
+
+   If the node argument is non-NULL and the nodelen argument is nonzero,
+   then the node argument points to a buffer able to contain up to
+   nodelen characters that receives the node name as a null-terminated
+   string.  If the node argument is NULL or the nodelen argument is
+   zero, the node name shall not be returned.  If the node's name cannot
+   be located, the numeric form of the node's address is returned
+   instead of its name.
+
+   If the service argument is non-NULL and the servicelen argument is
+   non-zero, then the service argument points to a buffer able to
+   contain up to servicelen bytes that receives the service name as a
+   null-terminated string.  If the service argument is NULL or the
+   servicelen argument is zero, the service name shall not be returned.
+   If the service's name cannot be located, the numeric form of the
+   service address (for example, its port number) shall be returned
+   instead of its name.
+
+   The arguments node and service cannot both be NULL.
+
+
+
+
+
+Gilligan, et al.             Informational                     [Page 29]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+   The flags argument is a flag that changes the default actions of the
+   function.  By default the fully-qualified domain name (FQDN) for the
+   host shall be returned, but:
+
+   -  If the flag bit NI_NOFQDN is set, only the node name portion of
+      the FQDN shall be returned for local hosts.
+
+   -  If the flag bit NI_NUMERICHOST is set, the numeric form of the
+      host's address shall be returned instead of its name, under all
+      circumstances.
+
+   -  If the flag bit NI_NAMEREQD is set, an error shall be returned if
+      the host's name cannot be located.
+
+   -  If the flag bit NI_NUMERICSERV is set, the numeric form of the
+      service address shall be returned (for example, its port number)
+      instead of its name, under all circumstances.
+
+   -  If the flag bit NI_DGRAM is set, this indicates that the service
+      is a datagram service (SOCK_DGRAM).  The default behavior shall
+      assume that the service is a stream service (SOCK_STREAM).
+
+   Note:
+
+   1. The NI_NUMERICxxx flags are required to support the "-n" flags
+      that many commands provide.
+
+   2. The NI_DGRAM flag is required for the few AF_INET and AF_INET6
+      port numbers (for example, [512,514]) that represent different
+      services for UDP and TCP.
+
+   The getnameinfo() function shall be thread safe.
+
+   A zero return value for getnameinfo() indicates successful
+   completion; a non-zero return value indicates failure.
+
+   Upon successful completion, getnameinfo() shall return the node and
+   service names, if requested, in the buffers provided.  The returned
+   names are always null-terminated strings.
+
+
+
+
+
+
+
+
+
+
+
+
+Gilligan, et al.             Informational                     [Page 30]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+   Error Return Values:
+
+   The getnameinfo() function shall fail and return the corresponding
+   value if:
+
+   [EAI_AGAIN]    The name could not be resolved at this time.
+                  Future attempts may succeed.
+
+   [EAI_BADFLAGS] The flags had an invalid value.
+
+   [EAI_FAIL]     A non-recoverable error occurred.
+
+   [EAI_FAMILY]   The address family was not recognized or the address
+                  length was invalid for the specified family.
+
+   [EAI_MEMORY]   There was a memory allocation failure.
+
+   [EAI_NONAME]   The name does not resolve for the supplied parameters.
+                  NI_NAMEREQD is set and the host's name cannot be
+                  located, or both nodename and servname were null.
+
+   [EAI_OVERFLOW] An argument buffer overflowed.
+
+   [EAI_SYSTEM]   A system error occurred.  The error code can be found
+                  in errno.
+
+6.3 Address Conversion Functions
+
+   The two IPv4 functions inet_addr() and inet_ntoa() convert an IPv4
+   address between binary and text form.  IPv6 applications need similar
+   functions.  The following two functions convert both IPv6 and IPv4
+   addresses:
+
+   #include <arpa/inet.h>
+
+   int inet_pton(int af, const char *src, void *dst);
+
+   const char *inet_ntop(int af, const void *src,
+                            char *dst, socklen_t size);
+
+   The inet_pton() function shall convert an address in its standard
+   text presentation form into its numeric binary form.  The af argument
+   shall specify the family of the address.  The AF_INET and AF_INET6
+   address families shall be supported.  The src argument points to the
+   string being passed in.  The dst argument points to a buffer into
+   which the function stores the numeric address; this shall be large
+   enough to hold the numeric address (32 bits for AF_INET, 128 bits for
+   AF_INET6).  The inet_pton() function shall return 1 if the conversion
+
+
+
+Gilligan, et al.             Informational                     [Page 31]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+   succeeds, with the address pointed to by dst in network byte order.
+   It shall return 0 if the input is not a valid IPv4 dotted-decimal
+   string or a valid IPv6 address string, or -1 with errno set to
+   EAFNOSUPPORT if the af argument is unknown.
+
+   If the af argument of inet_pton() is AF_INET, the src string shall be
+   in the standard IPv4 dotted-decimal form:
+
+      ddd.ddd.ddd.ddd
+
+   where "ddd" is a one to three digit decimal number between 0 and 255.
+   The inet_pton() function does not accept other formats (such as the
+   octal numbers, hexadecimal numbers, and fewer than four numbers that
+   inet_addr() accepts).
+
+   If the af argument of inet_pton() is AF_INET6, the src string shall
+   be in one of the standard IPv6 text forms defined in Section 2.2 of
+   the addressing architecture specification [2].
+
+   The inet_ntop() function shall convert a numeric address into a text
+   string suitable for presentation.  The af argument shall specify the
+   family of the address.  This can be AF_INET or AF_INET6.  The src
+   argument points to a buffer holding an IPv4 address if the af
+   argument is AF_INET, or an IPv6 address if the af argument is
+   AF_INET6; the address must be in network byte order.  The dst
+   argument points to a buffer where the function stores the resulting
+   text string; it shall not be NULL.  The size argument specifies the
+   size of this buffer, which shall be large enough to hold the text
+   string (INET_ADDRSTRLEN characters for IPv4, INET6_ADDRSTRLEN
+   characters for IPv6).
+
+   In order to allow applications to easily declare buffers of the
+   proper size to store IPv4 and IPv6 addresses in string form, the
+   following two constants are defined in <netinet/in.h>:
+
+      #define INET_ADDRSTRLEN    16
+      #define INET6_ADDRSTRLEN   46
+
+   The inet_ntop() function shall return a pointer to the buffer
+   containing the text string if the conversion succeeds, and NULL
+   otherwise.  Upon failure, errno is set to EAFNOSUPPORT if the af
+   argument is invalid or ENOSPC if the size of the result buffer is
+   inadequate.
+
+
+
+
+
+
+
+
+Gilligan, et al.             Informational                     [Page 32]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+6.4 Address Testing Macros
+
+   The following macros can be used to test for special IPv6 addresses.
+
+   #include <netinet/in.h>
+
+   int  IN6_IS_ADDR_UNSPECIFIED (const struct in6_addr *);
+   int  IN6_IS_ADDR_LOOPBACK    (const struct in6_addr *);
+   int  IN6_IS_ADDR_MULTICAST   (const struct in6_addr *);
+   int  IN6_IS_ADDR_LINKLOCAL   (const struct in6_addr *);
+   int  IN6_IS_ADDR_SITELOCAL   (const struct in6_addr *);
+   int  IN6_IS_ADDR_V4MAPPED    (const struct in6_addr *);
+   int  IN6_IS_ADDR_V4COMPAT    (const struct in6_addr *);
+
+   int  IN6_IS_ADDR_MC_NODELOCAL(const struct in6_addr *);
+   int  IN6_IS_ADDR_MC_LINKLOCAL(const struct in6_addr *);
+   int  IN6_IS_ADDR_MC_SITELOCAL(const struct in6_addr *);
+   int  IN6_IS_ADDR_MC_ORGLOCAL (const struct in6_addr *);
+   int  IN6_IS_ADDR_MC_GLOBAL   (const struct in6_addr *);
+
+   The first seven macros return true if the address is of the specified
+   type, or false otherwise.  The last five test the scope of a
+   multicast address and return true if the address is a multicast
+   address of the specified scope or false if the address is either not
+   a multicast address or not of the specified scope.
+
+   Note that IN6_IS_ADDR_LINKLOCAL and IN6_IS_ADDR_SITELOCAL return true
+   only for the two types of local-use IPv6 unicast addresses (Link-
+   Local and Site-Local) defined in [2], and that by this definition,
+   the IN6_IS_ADDR_LINKLOCAL macro returns false for the IPv6 loopback
+   address (::1).  These two macros do not return true for IPv6
+   multicast addresses of either link-local scope or site-local scope.
+
+7. Summary of New Definitions
+
+   The following list summarizes the constants, structure, and extern
+   definitions discussed in this memo, sorted by header.
+
+<net/if.h>      IF_NAMESIZE
+<net/if.h>      struct if_nameindex{};
+
+<netdb.h>       AI_ADDRCONFIG
+<netdb.h>       AI_ALL
+<netdb.h>       AI_CANONNAME
+<netdb.h>       AI_NUMERICHOST
+<netdb.h>       AI_NUMERICSERV
+<netdb.h>       AI_PASSIVE
+<netdb.h>       AI_V4MAPPED
+
+
+
+Gilligan, et al.             Informational                     [Page 33]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+<netdb.h>       EAI_AGAIN
+<netdb.h>       EAI_BADFLAGS
+<netdb.h>       EAI_FAIL
+<netdb.h>       EAI_FAMILY
+<netdb.h>       EAI_MEMORY
+<netdb.h>       EAI_NONAME
+<netdb.h>       EAI_OVERFLOW
+<netdb.h>       EAI_SERVICE
+<netdb.h>       EAI_SOCKTYPE
+<netdb.h>       EAI_SYSTEM
+<netdb.h>       NI_DGRAM
+<netdb.h>       NI_NAMEREQD
+<netdb.h>       NI_NOFQDN
+<netdb.h>       NI_NUMERICHOST
+<netdb.h>       NI_NUMERICSERV
+<netdb.h>       struct addrinfo{};
+
+<netinet/in.h>  IN6ADDR_ANY_INIT
+<netinet/in.h>  IN6ADDR_LOOPBACK_INIT
+<netinet/in.h>  INET6_ADDRSTRLEN
+<netinet/in.h>  INET_ADDRSTRLEN
+<netinet/in.h>  IPPROTO_IPV6
+<netinet/in.h>  IPV6_JOIN_GROUP
+<netinet/in.h>  IPV6_LEAVE_GROUP
+<netinet/in.h>  IPV6_MULTICAST_HOPS
+<netinet/in.h>  IPV6_MULTICAST_IF
+<netinet/in.h>  IPV6_MULTICAST_LOOP
+<netinet/in.h>  IPV6_UNICAST_HOPS
+<netinet/in.h>  IPV6_V6ONLY
+<netinet/in.h>  SIN6_LEN
+<netinet/in.h>  extern const struct in6_addr in6addr_any;
+<netinet/in.h>  extern const struct in6_addr in6addr_loopback;
+<netinet/in.h>  struct in6_addr{};
+<netinet/in.h>  struct ipv6_mreq{};
+<netinet/in.h>  struct sockaddr_in6{};
+
+<sys/socket.h>  AF_INET6
+<sys/socket.h>  PF_INET6
+<sys/socket.h>  struct sockaddr_storage;
+
+   The following list summarizes the function and macro prototypes
+   discussed in this memo, sorted by header.
+
+<arpa/inet.h>   int inet_pton(int, const char *, void *);
+<arpa/inet.h>   const char *inet_ntop(int, const void *,
+                               char *, socklen_t);
+
+
+
+
+
+Gilligan, et al.             Informational                     [Page 34]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+<net/if.h>      char *if_indextoname(unsigned int, char *);
+<net/if.h>      unsigned int if_nametoindex(const char *);
+<net/if.h>      void if_freenameindex(struct if_nameindex *);
+<net/if.h>      struct if_nameindex *if_nameindex(void);
+
+<netdb.h>       int getaddrinfo(const char *, const char *,
+                                const struct addrinfo *,
+                                struct addrinfo **);
+<netdb.h>       int getnameinfo(const struct sockaddr *, socklen_t,
+                  char *, socklen_t, char *, socklen_t, int);
+<netdb.h>       void freeaddrinfo(struct addrinfo *);
+<netdb.h>       const char *gai_strerror(int);
+
+<netinet/in.h>  int IN6_IS_ADDR_LINKLOCAL(const struct in6_addr *);
+<netinet/in.h>  int IN6_IS_ADDR_LOOPBACK(const struct in6_addr *);
+<netinet/in.h>  int IN6_IS_ADDR_MC_GLOBAL(const struct in6_addr *);
+<netinet/in.h>  int IN6_IS_ADDR_MC_LINKLOCAL(const struct in6_addr *);
+<netinet/in.h>  int IN6_IS_ADDR_MC_NODELOCAL(const struct in6_addr *);
+<netinet/in.h>  int IN6_IS_ADDR_MC_ORGLOCAL(const struct in6_addr *);
+<netinet/in.h>  int IN6_IS_ADDR_MC_SITELOCAL(const struct in6_addr *);
+<netinet/in.h>  int IN6_IS_ADDR_MULTICAST(const struct in6_addr *);
+<netinet/in.h>  int IN6_IS_ADDR_SITELOCAL(const struct in6_addr *);
+<netinet/in.h>  int IN6_IS_ADDR_UNSPECIFIED(const struct in6_addr *);
+<netinet/in.h>  int IN6_IS_ADDR_V4COMPAT(const struct in6_addr *);
+<netinet/in.h>  int IN6_IS_ADDR_V4MAPPED(const struct in6_addr *);
+
+8. Security Considerations
+
+   IPv6 provides a number of new security mechanisms, many of which need
+   to be accessible to applications.  Companion memos detailing the
+   extensions to the socket interfaces to support IPv6 security are
+   being written.
+
+9. Changes from RFC 2553
+
+   1. Add brief description of the history of this API and its relation
+      to the Open Group/IEEE/ISO standards.
+
+   2. Alignments with [3].
+
+   3. Removed all references to getipnodebyname() and getipnodebyaddr(),
+      which are deprecated in favor of getaddrinfo() and getnameinfo().
+
+   4. Added IPV6_V6ONLY IP level socket option to permit nodes to not
+      process IPv4 packets as IPv4 Mapped addresses in implementations.
+
+   5. Added SIIT to references and added new contributors.
+
+
+
+
+Gilligan, et al.             Informational                     [Page 35]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+   6. In previous versions of this specification, the sin6_flowinfo
+      field was associated with the IPv6 traffic class and flow label,
+      but its usage was not completely specified.  The complete
+      definition of the sin6_flowinfo field, including its association
+      with the traffic class or flow label, is now deferred to a future
+      specification.
+
+10. Acknowledgments
+
+   This specification's evolution and completeness were significantly
+   influenced by the efforts of Richard Stevens, who has passed on.
+   Richard's wisdom and talent made the specification what it is today.
+   The co-authors will long think of Richard with great respect.
+
+   Thanks to the many people who made suggestions and provided feedback
+   to this document, including:
+
+   Werner Almesberger, Ran Atkinson, Fred Baker, Dave Borman, Andrew
+   Cherenson, Alex Conta, Alan Cox, Steve Deering, Richard Draves,
+   Francis Dupont, Robert Elz, Brian Haberman, Jun-ichiro itojun Hagino,
+   Marc Hasson, Tom Herbert, Bob Hinden, Wan-Yen Hsu, Christian Huitema,
+   Koji Imada, Markus Jork, Ron Lee, Alan Lloyd, Charles Lynn, Dan
+   McDonald, Dave Mitton, Finnbarr Murphy, Thomas Narten, Josh Osborne,
+   Craig Partridge, Jean-Luc Richier, Bill Sommerfield, Erik Scoredos,
+   Keith Sklower, JINMEI Tatuya, Dave Thaler, Matt Thomas, Harvey
+   Thompson, Dean D. Throop, Karen Tracey, Glenn Trewitt, Paul Vixie,
+   David Waitzman, Carl Williams, Kazu Yamamoto, Vlad Yasevich, Stig
+   Venaas, and Brian Zill.
+
+   The getaddrinfo() and getnameinfo() functions are taken from an
+   earlier document by Keith Sklower.  As noted in that document,
+   William Durst, Steven Wise, Michael Karels, and Eric Allman provided
+   many useful discussions on the subject of protocol-independent name-
+   to-address translation, and reviewed early versions of Keith
+   Sklower's original proposal.  Eric Allman implemented the first
+   prototype of getaddrinfo().  The observation that specifying the pair
+   of name and service would suffice for connecting to a service
+   independent of protocol details was made by Marshall Rose in a
+   proposal to X/Open for a "Uniform Network Interface".
+
+   Craig Metz, Jack McCann, Erik Nordmark, Tim Hartrick, and Mukesh
+   Kacker made many contributions to this document.  Ramesh Govindan
+   made a number of contributions and co-authored an earlier version of
+   this memo.
+
+
+
+
+
+
+
+Gilligan, et al.             Informational                     [Page 36]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+11. References
+
+   [1]  Deering, S. and R. Hinden, "Internet Protocol, Version 6 (IPv6)
+        Specification", RFC 2460, December 1998.
+
+   [2]  Hinden, R. and S. Deering, "IP Version 6 Addressing
+        Architecture", RFC 2373, July 1998.
+
+   [3]  IEEE Std. 1003.1-2001 Standard for Information Technology --
+        Portable Operating System Interface (POSIX). Open Group
+        Technical Standard: Base Specifications, Issue 6, December 2001.
+        ISO/IEC 9945:2002.  http://www.opengroup.org/austin
+
+   [4]  Stevens, W. and M. Thomas, "Advanced Sockets API for IPv6", RFC
+        2292, February 1998.
+
+   [5]  Nordmark, E., "Stateless IP/ICMP Translation Algorithm (SIIT)",
+        RFC 2765, February 2000.
+
+   [6]  The Open Group Base Working Group
+        http://www.opengroup.org/platform/base.html
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Gilligan, et al.             Informational                     [Page 37]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+12. Authors' Addresses
+
+   Bob Gilligan
+   Intransa, Inc.
+   2870 Zanker Rd.
+   San Jose, CA 95134
+
+   Phone: 408-678-8647
+   EMail: gilligan@intransa.com
+
+
+   Susan Thomson
+   Cisco Systems
+   499 Thornall Street, 8th floor
+   Edison, NJ 08837
+
+   Phone: 732-635-3086
+   EMail:  sethomso@cisco.com
+
+
+   Jim Bound
+   Hewlett-Packard Company
+   110 Spitbrook Road ZKO3-3/W20
+   Nashua, NH 03062
+
+   Phone: 603-884-0062
+   EMail: Jim.Bound@hp.com
+
+
+   Jack McCann
+   Hewlett-Packard Company
+   110 Spitbrook Road ZKO3-3/W20
+   Nashua, NH 03062
+
+   Phone: 603-884-2608
+   EMail: Jack.McCann@hp.com
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Gilligan, et al.             Informational                     [Page 38]
+
+RFC 3493       Basic Socket Interface Extensions for IPv6  February 2003
+
+
+13. Full Copyright Statement
+
+   Copyright (C) The Internet Society (2003).  All Rights Reserved.
+
+   This document and translations of it may be copied and furnished to
+   others, and derivative works that comment on or otherwise explain it
+   or assist in its implementation may be prepared, copied, published
+   and distributed, in whole or in part, without restriction of any
+   kind, provided that the above copyright notice and this paragraph are
+   included on all such copies and derivative works.  However, this
+   document itself may not be modified in any way, such as by removing
+   the copyright notice or references to the Internet Society or other
+   Internet organizations, except as needed for the purpose of
+   developing Internet standards in which case the procedures for
+   copyrights defined in the Internet Standards process must be
+   followed, or as required to translate it into languages other than
+   English.
+
+   The limited permissions granted above are perpetual and will not be
+   revoked by the Internet Society or its successors or assigns.
+
+   This document and the information contained herein is provided on an
+   "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
+   TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
+   BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
+   HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
+   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+Acknowledgement
+
+   Funding for the RFC Editor function is currently provided by the
+   Internet Society.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Gilligan, et al.             Informational                     [Page 39]
+
diff --git a/ext/picotcp/RFC/rfc3649.txt b/ext/picotcp/RFC/rfc3649.txt
new file mode 100644
index 0000000..6a20e0d
--- /dev/null
+++ b/ext/picotcp/RFC/rfc3649.txt
@@ -0,0 +1,1907 @@
+
+
+
+
+
+
+Network Working Group                                           S. Floyd
+Request for Comments: 3649                                          ICSI
+Category: Experimental                                     December 2003
+
+
+               HighSpeed TCP for Large Congestion Windows
+
+Status of this Memo
+
+   This memo defines an Experimental Protocol for the Internet
+   community.  It does not specify an Internet standard of any kind.
+   Discussion and suggestions for improvement are requested.
+   Distribution of this memo is unlimited.
+
+Copyright Notice
+
+   Copyright (C) The Internet Society (2003).  All Rights Reserved.
+
+Abstract
+
+   The proposals in this document are experimental.  While they may be
+   deployed in the current Internet, they do not represent a consensus
+   that this is the best method for high-speed congestion control.  In
+   particular, we note that alternative experimental proposals are
+   likely to be forthcoming, and it is not well understood how the
+   proposals in this document will interact with such alternative
+   proposals.
+
+   This document proposes HighSpeed TCP, a modification to TCP's
+   congestion control mechanism for use with TCP connections with large
+   congestion windows.  The congestion control mechanisms of the current
+   Standard TCP constrains the congestion windows that can be achieved
+   by TCP in realistic environments.  For example, for a Standard TCP
+   connection with 1500-byte packets and a 100 ms round-trip time,
+   achieving a steady-state throughput of 10 Gbps would require an
+   average congestion window of 83,333 segments, and a packet drop rate
+   of at most one congestion event every 5,000,000,000 packets (or
+   equivalently, at most one congestion event every 1 2/3 hours).  This
+   is widely acknowledged as an unrealistic constraint.  To address this
+   limitation of TCP, this document proposes HighSpeed TCP, and solicits
+   experimentation and feedback from the wider community.
+
+
+
+
+
+
+
+
+
+
+Floyd                         Experimental                      [Page 1]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+Table of Contents
+
+   1. Introduction. . . . . . . . . . . . . . . . . . . . . . . . . .  2
+   2. The Problem Description.. . . . . . . . . . . . . . . . . . . .  3
+   3. Design Guidelines.. . . . . . . . . . . . . . . . . . . . . . .  4
+   4. Non-Goals.. . . . . . . . . . . . . . . . . . . . . . . . . . .  5
+   5. Modifying the TCP Response Function.. . . . . . . . . . . . . .  6
+   6. Fairness Implications of the HighSpeed Response
+      Function. . . . . . . . . . . . . . . . . . . . . . . . . . . .  9
+   7. Translating the HighSpeed Response Function into
+      Congestion Control Parameters . . . . . . . . . . . . . . . . . 12
+   8. An alternate, linear response functions.. . . . . . . . . . . . 13
+   9. Tradeoffs for Choosing Congestion Control Parameters. . . . . . 16
+      9.1. The Number of Round-Trip Times between Loss Events . . . . 17
+      9.2. The Number of Packet Drops per Loss Event, with Drop-Tail. 17
+   10. Related Issues . . . . . . . . . . . . . . . . . . . . . . . . 18
+      10.1. Slow-Start. . . . . . . . . . . . . . . . . . . . . . . . 18
+      10.2. Limiting burstiness on short time scales. . . . . . . . . 19
+      10.3. Other limitations on window size. . . . . . . . . . . . . 19
+      10.4. Implementation issues.. . . . . . . . . . . . . . . . . . 19
+   11. Deployment issues. . . . . . . . . . . . . . . . . . . . . . . 20
+      11.1. Deployment issues of HighSpeed TCP. . . . . . . . . . . . 20
+      11.2. Deployment issues of Scalable TCP . . . . . . . . . . . . 22
+   12. Related Work in HighSpeed TCP. . . . . . . . . . . . . . . . . 23
+   13. Relationship to other Work.. . . . . . . . . . . . . . . . . . 25
+   14. Conclusions. . . . . . . . . . . . . . . . . . . . . . . . . . 25
+   15. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . . 25
+   16. Normative References . . . . . . . . . . . . . . . . . . . . . 26
+   17. Informative References . . . . . . . . . . . . . . . . . . . . 26
+   18. Security Considerations. . . . . . . . . . . . . . . . . . . . 28
+   19. IANA Considerations. . . . . . . . . . . . . . . . . . . . . . 28
+   A.  TCP's Loss Event Rate in Steady-State. . . . . . . . . . . . . 29
+   B.  A table for a(w) and b(w). . . . . . . . . . . . . . . . . . . 30
+   C.  Exploring the time to converge to fairness . . . . . . . . . . 32
+       Author's Address . . . . . . . . . . . . . . . . . . . . . . . 33
+       Full Copyright Statement . . . . . . . . . . . . . . . . . . . 34
+
+1.  Introduction
+
+   This document proposes HighSpeed TCP, a modification to TCP's
+   congestion control mechanism for use with TCP connections with large
+   congestion windows.  In a steady-state environment, with a packet
+   loss rate p, the current Standard TCP's average congestion window is
+   roughly 1.2/sqrt(p) segments.  This places a serious constraint on
+   the congestion windows that can be achieved by TCP in realistic
+   environments.  For example, for a Standard TCP connection with 1500-
+   byte packets and a 100 ms round-trip time, achieving a steady-state
+   throughput of 10 Gbps would require an average congestion window of
+
+
+
+Floyd                         Experimental                      [Page 2]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+   83,333 segments, and a packet drop rate of at most one congestion
+   event every 5,000,000,000 packets (or equivalently, at most one
+   congestion event every 1 2/3 hours).  The average packet drop rate of
+   at most 2*10^(-10) needed for full link utilization in this
+   environment corresponds to a bit error rate of at most 2*10^(-14),
+   and this is an unrealistic requirement for current networks.
+
+   To address this fundamental limitation of TCP and of the TCP response
+   function (the function mapping the steady-state packet drop rate to
+   TCP's average sending rate in packets per round-trip time), this
+   document describes a modified TCP response function for regimes with
+   higher congestion windows.  This document also solicits
+   experimentation and feedback on HighSpeed TCP from the wider
+   community.
+
+   Because HighSpeed TCP's modified response function would only take
+   effect with higher congestion windows, HighSpeed TCP does not modify
+   TCP behavior in environments with heavy congestion, and therefore
+   does not introduce any new dangers of congestion collapse.  However,
+   if relative fairness between HighSpeed TCP connections is to be
+   preserved, then in our view any modification to the TCP response
+   function should be addressed in the IETF, rather than made as ad hoc
+   decisions by individual implementors or TCP senders.  Modifications
+   to the TCP response function would also have implications for
+   transport protocols that use TFRC and other forms of equation-based
+   congestion control, as these congestion control mechanisms directly
+   use the TCP response function [RFC3448].
+
+   This proposal for HighSpeed TCP focuses specifically on a proposed
+   change to the TCP response function, and its implications for TCP.
+   This document does not address what we view as a separate fundamental
+   issue, of the mechanisms required to enable best-effort connections
+   to *start* with large initial windows.  In our view, while HighSpeed
+   TCP proposes a somewhat fundamental change to the TCP response
+   function, at the same time it is a relatively simple change to
+   implement in a single TCP sender, and presents no dangers in terms of
+   congestion collapse.  In contrast, in our view, the problem of
+   enabling connections to *start* with large initial windows is
+   inherently more risky and structurally more difficult, requiring some
+   form of explicit feedback from all of the routers along the path.
+   This is another reason why we would propose addressing the problem of
+   starting with large initial windows separately, and on a separate
+   timetable, from the problem of modifying the TCP response function.
+
+
+
+
+
+
+
+
+Floyd                         Experimental                      [Page 3]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+2.  The Problem Description
+
+   This section describes the number of round-trip times between
+   congestion events required for a Standard TCP flow to achieve an
+   average throughput of B bps, given packets of D bytes and a round-
+   trip time of R seconds.  A congestion event refers to a window of
+   data with one or more dropped or ECN-marked packets (where ECN stands
+   for Explicit Congestion Notification).
+
+   From Appendix A, achieving an average TCP throughput of B bps
+   requires a loss event at most every BR/(12D) round-trip times.  This
+   is illustrated in Table 1, for R = 0.1 seconds and D = 1500 bytes.
+   The table also gives the average congestion window W of BR/(8D), and
+   the steady-state packet drop rate P of 1.5/W^2.
+
+    TCP Throughput (Mbps)   RTTs Between Losses     W       P
+    ---------------------   -------------------   ----    -----
+              1                    5.5             8.3    0.02
+             10                   55.5            83.3    0.0002
+            100                  555.5           833.3    0.000002
+           1000                 5555.5          8333.3    0.00000002
+          10000                55555.5         83333.3    0.0000000002
+
+   Table 1: RTTs Between Congestion Events for Standard TCP, for
+   1500-Byte Packets and a Round-Trip Time of 0.1 Seconds.
+
+   This document proposes HighSpeed TCP, a minimal modification to TCP's
+   increase and decrease parameters, for TCP connections with larger
+   congestion windows, to allow TCP to achieve high throughput with more
+   realistic requirements for the steady-state packet drop rate.
+   Equivalently, HighSpeed TCP has more realistic requirements for the
+   number of round-trip times between loss events.
+
+3.  Design Guidelines
+
+   Our proposal for HighSpeed TCP is motivated by the following
+   requirements:
+
+   *  Achieve high per-connection throughput without requiring
+      unrealistically low packet loss rates.
+
+   *  Reach high throughput reasonably quickly when in slow-start.
+
+   *  Reach high throughput without overly long delays when recovering
+      from multiple retransmit timeouts, or when ramping-up from a
+      period with small congestion windows.
+
+
+
+
+
+Floyd                         Experimental                      [Page 4]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+   *  No additional feedback or support required from routers:
+
+   For example, the goal is for acceptable performance in both ECN-
+   capable and non-ECN-capable environments, and with Drop-Tail as well
+   as with Active Queue Management such as RED in the routers.
+
+   *  No additional feedback required from TCP receivers.
+
+   *  TCP-compatible performance in environments with moderate or high
+      congestion (e.g., packet drop rates of 1% or higher):
+
+   Equivalently, the requirement is that there be no additional load on
+   the network (in terms of increased packet drop rates) in environments
+   with moderate or high congestion.
+
+   *  Performance at least as good as Standard TCP in environments with
+      moderate or high congestion.
+
+   *  Acceptable transient performance, in terms of increases in the
+      congestion window in one round-trip time, responses to severe
+      congestion, and convergence times to fairness.
+
+   Currently, users wishing to achieve throughputs of 1 Gbps or more
+   typically open up multiple TCP connections in parallel, or use MulTCP
+   [CO98,GRK99], which behaves roughly like the aggregate of N virtual
+   TCP connections.  While this approach suffices for the occasional
+   user on well-provisioned links, it leaves the parameter N to be
+   determined by the user, and results in more aggressive performance
+   and higher steady-state packet drop rates if used in environments
+   with periods of moderate or high congestion.  We believe that a new
+   approach is needed that offers more flexibility, more effectively
+   scales to a wide range of available bandwidths, and competes more
+   fairly with Standard TCP in congested environments.
+
+4.  Non-Goals
+
+   The following are explicitly *not* goals of our work:
+
+   *  Non-goal: TCP-compatible performance in environments with very low
+      packet drop rates.
+
+   We note that our proposal does not require, or deliver, TCP-
+   compatible performance in environments with very low packet drop
+   rates, e.g., with packet loss rates of 10^-5 or 10^-6.  As we discuss
+   later in this document, we assume that Standard TCP is unable to make
+   effective use of the available bandwidth in environments with loss
+
+
+
+
+
+Floyd                         Experimental                      [Page 5]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+   rates of 10^-6 in any case, so that it is acceptable and appropriate
+   for HighSpeed TCP to perform more aggressively than Standard TCP in
+   such an environment.
+
+   *  Non-goal: Ramping-up more quickly than allowed by slow-start.
+
+   It is our belief that ramping-up more quickly than allowed by slow-
+   start would necessitate more explicit feedback from routers along the
+   path.  The proposal for HighSpeed TCP is focused on changes to TCP
+   that could be effectively deployed in the current Internet
+   environment.
+
+   *  Non-goal: Avoiding oscillations in environments with only one-way,
+      long-lived flows all with the same round-trip times.
+
+   While we agree that attention to oscillatory behavior is useful,
+   avoiding oscillations in aggregate throughput has not been our
+   primary consideration, particularly for simplified environments
+   limited to one-way, long-lived flows all with the same, large round-
+   trip times.  Our assessment is that some oscillatory behavior in
+   these extreme environments is an acceptable price to pay for the
+   other benefits of HighSpeed TCP.
+
+5.  Modifying the TCP Response Function
+
+   The TCP response function, w = 1.2/sqrt(p), gives TCP's average
+   congestion window w in MSS-sized segments, as a function of the
+   steady-state packet drop rate p [FF98].  This TCP response function
+   is a direct consequence of TCP's Additive Increase Multiplicative
+   Decrease (AIMD) mechanisms of increasing the congestion window by
+   roughly one segment per round-trip time in the absence of congestion,
+   and halving the congestion window in response to a round-trip time
+   with a congestion event.  This response function for Standard TCP is
+   reflected in the table below.  In this proposal we restrict our
+   attention to TCP performance in environments with packet loss rates
+   of at most 10^-2, and so we can ignore the more complex response
+   functions that are required to model TCP performance in more
+   congested environments with retransmit timeouts.  From Appendix A, an
+   average congestion window of W corresponds to an average of 2/3 W
+   round-trip times between loss events for Standard TCP (with the
+   congestion window varying from 2/3 W to 4/3 W).
+
+
+
+
+
+
+
+
+
+
+Floyd                         Experimental                      [Page 6]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+     Packet Drop Rate P   Congestion Window W    RTTs Between Losses
+     ------------------   -------------------    -------------------
+            10^-2                     12                8
+            10^-3                     38               25
+            10^-4                    120               80
+            10^-5                    379              252
+            10^-6                   1200              800
+            10^-7                   3795             2530
+            10^-8                  12000             8000
+            10^-9                  37948            25298
+            10^-10                120000            80000
+
+   Table 2: TCP Response Function for Standard TCP.  The average
+   congestion window W in MSS-sized segments is given as a function of
+   the packet drop rate P.
+
+   To specify a modified response function for HighSpeed TCP, we use
+   three parameters, Low_Window, High_Window, and High_P.  To ensure TCP
+   compatibility, the HighSpeed response function uses the same response
+   function as Standard TCP when the current congestion window is at
+   most Low_Window, and uses the HighSpeed response function when the
+   current congestion window is greater than Low_Window.  In this
+   document we set Low_Window to 38 MSS-sized segments, corresponding to
+   a packet drop rate of 10^-3 for TCP.
+
+   To specify the upper end of the HighSpeed response function, we
+   specify the packet drop rate needed in the HighSpeed response
+   function to achieve an average congestion window of 83000 segments.
+   This is roughly the window needed to sustain 10 Gbps throughput, for
+   a TCP connection with the default packet size and round-trip time
+   used earlier in this document.  For High_Window set to 83000, we
+   specify High_P of 10^-7; that is, with HighSpeed TCP a packet drop
+   rate of 10^-7 allows the HighSpeed TCP connection to achieve an
+   average congestion window of 83000 segments.  We believe that this
+   loss rate sets an achievable target for high-speed environments,
+   while still allowing acceptable fairness for the HighSpeed response
+   function when competing with Standard TCP in environments with packet
+   drop rates of 10^-4 or 10^5.
+
+   For simplicity, for the HighSpeed response function we maintain the
+   property that the response function gives a straight line on a log-
+   log scale (as does the response function for Standard TCP, for low to
+   moderate congestion).  This results in the following response
+   function, for values of the average congestion window W greater than
+   Low_Window:
+
+     W = (p/Low_P)^S Low_Window,
+
+
+
+
+Floyd                         Experimental                      [Page 7]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+   for Low_P the packet drop rate corresponding to Low_Window, and for S
+   as following constant [FRS02]:
+
+     S = (log High_Window - log Low_Window)/(log High_P - log Low_P).
+
+   (In this paper, "log x" refers to the log base 10.)  For example, for
+   Low_Window set to 38, we have Low_P of 10^-3 (for compatibility with
+   Standard TCP).  Thus, for High_Window set to 83000 and High_P set to
+   10^-7, we get the following response function:
+
+     W = 0.12/p^0.835.                                    (1)
+
+   This HighSpeed response function is illustrated in Table 3 below.
+   For HighSpeed TCP, the number of round-trip times between losses,
+   1/(pW), equals 12.7 W^0.2, for W > 38 segments.
+
+     Packet Drop Rate P   Congestion Window W    RTTs Between Losses
+     ------------------   -------------------    -------------------
+            10^-2                    12                   8
+            10^-3                    38                  25
+            10^-4                   263                  38
+            10^-5                  1795                  57
+            10^-6                 12279                  83
+            10^-7                 83981                 123
+            10^-8                574356                 180
+            10^-9               3928088                 264
+            10^-10             26864653                 388
+
+   Table 3: TCP Response Function for HighSpeed TCP.  The average
+   congestion window W in MSS-sized segments is given as a function of
+   the packet drop rate P.
+
+   We believe that the problem of backward compatibility with Standard
+   TCP requires a response function that is quite close to that of
+   Standard TCP for loss rates of 10^-1, 10^-2, or 10^-3.  We believe,
+   however, that such stringent TCP-compatibility is not required for
+   smaller loss rates, and that an appropriate response function is one
+   that gives a plausible packet drop rate for a connection throughput
+   of 10 Gbps.  This also gives a slowly increasing number of round-trip
+   times between loss events as a function of a decreasing packet drop
+   rate.
+
+   Another way to look at the HighSpeed response function is to consider
+   that HighSpeed TCP is roughly emulating the congestion control
+   response of N parallel TCP connections, where N is initially one, and
+   where N increases as a function of the HighSpeed TCP's congestion
+   window.  Thus for the HighSpeed response function in Equation (1)
+   above, the response function can be viewed as equivalent to that of
+
+
+
+Floyd                         Experimental                      [Page 8]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+   N(W) parallel TCP connections, where N(W) varies as a function of the
+   congestion window W.  Recall that for a single standard TCP
+   connection, the average congestion window equals 1.2/sqrt(p).  For N
+   parallel TCP connections, the aggregate congestion window for the N
+   connections equals N*1.2/sqrt(p).  From the HighSpeed response
+   function in Equation (1) and the relationship above, we can derive
+   the following:
+
+    N(W) = 0.23*W^(0.4)
+
+   for N(W) the number of parallel TCP connections emulated by the
+   HighSpeed TCP response function, and for N(W) >= 1.  This is shown in
+   Table 4 below.
+
+     Congestion Window W         Number N(W) of Parallel TCPs
+     -------------------         -------------------------
+              1                            1
+             10                            1
+            100                            1.4
+          1,000                            3.6
+         10,000                            9.2
+        100,000                           23.0
+
+   Table 4: Number N(W) of parallel TCP connections roughly emulated by
+   the HighSpeed TCP response function.
+
+   In this document, we do not attempt to seriously evaluate the
+   HighSpeed response function for congestion windows greater than
+   100,000 packets.  We believe that we will learn more about the
+   requirements for sustaining the throughput of best-effort connections
+   in that range as we gain more experience with HighSpeed TCP with
+   congestion windows of thousands and tens of thousands of packets.
+   There also might be limitations to the per-connection throughput that
+   can be realistically achieved for best-effort traffic, in terms of
+   congestion window of hundreds of thousands of packets or more, in the
+   absence of additional support or feedback from the routers along the
+   path.
+
+6.  Fairness Implications of the HighSpeed Response Function
+
+   The Standard and Highspeed Response Functions can be used directly to
+   infer the relative fairness between flows using the two response
+   functions.  For example, given a packet drop rate P, assume that
+   Standard TCP has an average congestion window of W_Standard, and
+   HighSpeed TCP has a higher average congestion window of W_HighSpeed.
+
+
+
+
+
+
+Floyd                         Experimental                      [Page 9]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+   In this case, a single HighSpeed TCP connection is receiving
+   W_HighSpeed/W_Standard times the throughput of a single Standard TCP
+   connection competing in the same environment.
+
+   This relative fairness is illustrated below in Table 5, for the
+   parameters used for the Highspeed response function in the section
+   above.  The second column gives the relative fairness, for the
+   steady-state packet drop rate specified in the first column.  To help
+   calibrate, the third column gives the aggregate average congestion
+   window for the two TCP connections, and the fourth column gives the
+   bandwidth that would be needed by the two connections to achieve that
+   aggregate window and packet drop rate, given 100 ms round-trip times
+   and 1500-byte packets.
+
+     Packet Drop Rate P   Fairness  Aggregate Window  Bandwidth
+     ------------------   --------  ----------------  ---------
+            10^-2            1.0              24        2.8 Mbps
+            10^-3            1.0              76        9.1 Mbps
+            10^-4            2.2             383       45.9 Mbps
+            10^-5            4.7            2174      260.8 Mbps
+            10^-6           10.2           13479        1.6 Gbps
+            10^-7           22.1           87776       10.5 Gbps
+
+   Table 5: Relative Fairness between the HighSpeed and Standard
+   Response Functions.
+
+   Thus, for packet drop rates of 10^-4, a flow with the HighSpeed
+   response function can expect to receive 2.2 times the throughput of a
+   flow using the Standard response function, given the same round-trip
+   times and packet sizes.  With packet drop rates of 10^-6 (or 10^-7),
+   the unfairness is more severe, and we have entered the regime where a
+   Standard TCP connection requires at most one congestion event every
+   800 (or 2530) round-trip times in order to make use of the available
+   bandwidth.  Our judgement would be that there are not a lot of TCP
+   connections effectively operating in this regime today, with
+   congestion windows of thousands of packets, and that therefore the
+   benefits of the HighSpeed response function would outweigh the
+   unfairness that would be experienced by Standard TCP in this regime.
+   However, one purpose of this document is to solicit feedback on this
+   issue.  The parameter Low_Window determines directly the point of
+   divergence between the Standard and HighSpeed Response Functions.
+
+   The third column of Table 5, the Aggregate Window, gives the
+   aggregate congestion window of the two competing TCP connections,
+   with HighSpeed and Standard TCP, given the packet drop rate specified
+   in the first column.  From Table 5, a HighSpeed TCP connection would
+   receive ten times the bandwidth of a Standard TCP in an environment
+   with a packet drop rate of 10^-6.  This would occur when the two
+
+
+
+Floyd                         Experimental                     [Page 10]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+   flows sharing a single pipe achieved an aggregate window of 13479
+   packets.  Given a round-trip time of 100 ms and a packet size of 1500
+   bytes, this would occur with an available bandwidth for the two
+   competing flows of 1.6 Gbps.
+
+   Next we consider the time that it takes a standard or HighSpeed TCP
+   flow to converge to fairness against a pre-existing HighSpeed TCP
+   flow.  The worst case for convergence to fairness occurs when a new
+   flow is starting up, competing against a high-bandwidth existing
+   flow, and the new flow suffers a packet drop and exits slow-start
+   while its window is still small.  In the worst case, consider that
+   the new flow has entered the congestion avoidance phase while its
+   window is only one packet.  A standard TCP flow in congestion
+   avoidance increases its window by at most one packet per round-trip
+   time, and after N round-trip times has only achieved a window of N
+   packets (when starting with a window of 1 in the first round-trip
+   time).  In contrast, a HighSpeed TCP flows increases much faster than
+   a standard TCP flow while in the congestion avoidance phase, and we
+   can expect its convergence to fairness to be much better.  This is
+   shown in Table 6 below.  The script used to generate this table is
+   given in Appendix C.
+
+     RTT  HS_Window Standard_TCP_Window
+     ---  --------- -------------------
+     100       131        100
+     200       475        200
+     300      1131        300
+     400      2160        400
+     500      3601        500
+     600      5477        600
+     700      7799        700
+     800     10567        800
+     900     13774        900
+    1000     17409       1000
+    1100     21455       1100
+    1200     25893       1200
+    1300     30701       1300
+    1400     35856       1400
+    1500     41336       1500
+    1600     47115       1600
+    1700     53170       1700
+    1800     59477       1800
+    1900     66013       1900
+    2000     72754       2000
+
+   Table 6:  For a HighSpeed and a Standard TCP connection, the
+   congestion window during congestion avoidance phase (starting with a
+   congestion window of 1 packet during RTT 1).
+
+
+
+Floyd                         Experimental                     [Page 11]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+   The classic paper on relative fairness is from Chiu and Jain [CJ89].
+   This paper shows that AIMD (Additive Increase Multiplicative
+   Decrease) converges to fairness in an environment with synchronized
+   congestion events.  From [CJ89], it is easy to see that MIMD and AIAD
+   do not converge to fairness in this environment.  However, the
+   results of [CJ89] do not apply to an asynchronous environment such as
+   that of the current Internet, where the frequency of congestion
+   feedback can be different for different flows.  For example, it has
+   been shown that MIMD converges to fair states in a model with
+   proportional instead of synchronous feedback in terms of packet drops
+   [GV02].  Thus, we are not concerned about abandoning a strict model
+   of AIMD for HighSpeed TCP.  However, we note that in an environment
+   with Drop-Tail queue management, there is likely to be some
+   synchronization of packet drops.  In this environment, the model of
+   completely synchronous feedback does not hold, but the model of
+   completely asynchronous feedback is not accurate either.  Fairness in
+   Drop-Tail environments is discussed in more detail in Sections 9 and
+   12.
+
+7.  Translating the HighSpeed Response Function into Congestion Control
+     Parameters
+
+   For equation-based congestion control such as TFRC, the HighSpeed
+   Response Function above could be used directly by the TFRC congestion
+   control mechanism.  However, for TCP the HighSpeed response function
+   has to be translated into additive increase and multiplicative
+   decrease parameters.  The HighSpeed response function cannot be
+   achieved by TCP with an additive increase of one segment per round-
+   trip time and a multiplicative decrease of halving the current
+   congestion window; HighSpeed TCP will have to modify either the
+   increase or the decrease parameter, or both.  We have concluded that
+   HighSpeed TCP is most likely to achieve an acceptable compromise
+   between moderate increases and timely decreases by modifying both the
+   increase and the decrease parameter.
+
+   That is, for HighSpeed TCP let the congestion window increase by a(w)
+   segments per round-trip time in the absence of congestion, and let
+   the congestion window decrease to w(1-b(w)) segments in response to a
+   round-trip time with one or more loss events.  Thus, in response to a
+   single acknowledgement HighSpeed TCP increases its congestion window
+   in segments as follows:
+
+    w <- w + a(w)/w.
+
+   In response to a congestion event, HighSpeed TCP decreases as
+   follows:
+
+    w <- (1-b(w))w.
+
+
+
+Floyd                         Experimental                     [Page 12]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+   For Standard TCP, a(w) = 1 and b(w) = 1/2, regardless of the value of
+   w.  HighSpeed TCP uses the same values of a(w) and b(w) for w <=
+   Low_Window.  This section specifies a(w) and b(w) for HighSpeed TCP
+   for larger values of w.
+
+   For w = High_Window, we have specified a loss rate of High_P.  From
+   [FRS02], or from elementary calculations, this requires the following
+   relationship between a(w) and b(w) for w = High_Window:
+
+    a(w) = High_Window^2 * High_P * 2 * b(w)/(2-b(w)).     (2)
+
+   We use the parameter High_Decrease to specify the decrease parameter
+   b(w) for w = High_Window, and use Equation (2) to derive the increase
+   parameter a(w) for w = High_Window.  Along with High_P = 10^-7 and
+   High_Window = 83000, for example, we specify High_Decrease = 0.1,
+   specifying that b(83000) = 0.1, giving a decrease of 10% after a
+   congestion event.  Equation (2) then gives a(83000) = 72, for an
+   increase of 72 segments, or just under 0.1%, within a round-trip
+   time, for w = 83000.
+
+   This moderate decrease strikes us as acceptable, particularly when
+   coupled with the role of TCP's ACK-clocking in limiting the sending
+   rate in response to more severe congestion [BBFS01].  A more severe
+   decrease would require a more aggressive increase in the congestion
+   window for a round-trip time without congestion.  In particular, a
+   decrease factor High_Decrease of 0.5, as in Standard TCP, would
+   require an increase of 459 segments per round-trip time when w =
+   83000.
+
+   Given decrease parameters of b(w) = 1/2 for w = Low_Window, and b(w)
+   = High_Decrease for w = High_Window, we are left to specify the value
+   of b(w) for other values of w > Low_Window.  From [FRS02], we let
+   b(w) vary linearly as the log of w, as follows:
+
+    b(w) = (High_Decrease - 0.5) (log(w)-log(W)) / (log(W_1)-log(W)) +
+   0.5,
+
+   for W = Low_window and W_1 = High_window.  The increase parameter
+   a(w) can then be computed as follows:
+
+    a(w) = w^2 * p(w) * 2 * b(w)/(2-b(w)),
+
+   for p(w) the packet drop rate for congestion window w.  From
+   inverting Equation (1), we get p(w) as follows:
+
+    p(w) = 0.078/w^1.2.
+
+
+
+
+
+Floyd                         Experimental                     [Page 13]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+   We assume that experimental implementations of HighSpeed TCP for
+   further investigation will use a pre-computed look-up table for
+   finding a(w) and b(w).  For example, the implementation from Tom
+   Dunigan adjusts the a(w) and b(w) parameters every 0.1 seconds.  In
+   the appendix we give such a table for our default values of
+   Low_Window = 38, High_Window = 83,000, High_P = 10^-7, and
+   High_Decrease = 0.1.  These are also the default values in the NS
+   simulator; example simulations in NS can be run with the command
+   "./test-all-tcpHighspeed" in the directory tcl/test.
+
+8.  An alternate, linear response functions
+
+   In this section we explore an alternate, linear response function for
+   HighSpeed TCP that has been proposed by a number of other people, in
+   particular by Glenn Vinnicombe and Tom Kelly.  Similarly, it has been
+   suggested by others that a less "ad-hoc" guideline for a response
+   function for HighSpeed TCP would be to specify a constant value for
+   the number of round-trip times between congestion events.
+
+   Assume that we keep the value of Low_Window as 38 MSS-sized segments,
+   indicating when the HighSpeed response function diverges from the
+   current TCP response function, but that we modify the High_Window and
+   High_P parameters that specify the upper range of the HighSpeed
+   response function.  In particular, consider the response function
+   given by High_Window = 380,000 and High_P = 10^-7, with Low_Window =
+   38 and Low_P = 10^-3 as before.
+
+   Using the equations in Section 5, this would give the following
+   Linear response function, for w > Low_Window:
+
+     W = 0.038/p.
+
+   This Linear HighSpeed response function is illustrated in Table 7
+   below.  For HighSpeed TCP, the number of round-trip times between
+   losses, 1/(pW), equals 1/0.38, or equivalently, 26, for W > 38
+   segments.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Floyd                         Experimental                     [Page 14]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+     Packet Drop Rate P   Congestion Window W    RTTs Between Losses
+     ------------------   -------------------    -------------------
+            10^-2                    12                   8
+            10^-3                    38                  26
+            10^-4                   380                  26
+            10^-5                  3800                  26
+            10^-6                 38000                  26
+            10^-7                380000                  26
+            10^-8               3800000                  26
+            10^-9              38000000                  26
+            10^-10            380000000                  26
+
+   Table 7: An Alternate, Linear TCP Response Function for HighSpeed
+   TCP.  The average congestion window W in MSS-sized segments is given
+   as a function of the packet drop rate P.
+
+   Given a constant decrease b(w) of 1/2, this would give an increase
+   a(w) of w/Low_Window, or equivalently, a constant increase of
+   1/Low_Window packets per acknowledgement, for w > Low_Window.
+   Another possibility is Scalable TCP [K03], which uses a fixed
+   decrease b(w) of 1/8 and a fixed increase per acknowledgement of
+   0.01.  This gives an increase a(w) per window of 0.005 w, for a TCP
+   with delayed acknowledgements, for pure MIMD.
+
+   The relative fairness between the alternate Linear response function
+   and the standard TCP response function is illustrated below in Table
+   8.
+
+     Packet Drop Rate P   Fairness  Aggregate Window  Bandwidth
+     ------------------   --------  ----------------  ---------
+            10^-2            1.0              24        2.8 Mbps
+            10^-3            1.0              76        9.1 Mbps
+            10^-4            3.2             500       60.0 Mbps
+            10^-5           15.1            4179      501.4 Mbps
+            10^-6           31.6           39200        4.7 Gbps
+            10^-7          100.1          383795       46.0 Gbps
+
+   Table 8: Relative Fairness between the Linear HighSpeed and Standard
+   Response Functions.
+
+   One attraction of the linear response function is that it is scale-
+   invariant, with a fixed increase in the congestion window per
+   acknowledgement, and a fixed number of round-trip times between loss
+   events.  My own assumption would be that having a fixed length for
+   the congestion epoch in round-trip times, regardless of the packet
+   drop rate, would be a poor fit for an imprecise and imperfect world
+   with routers with a range of queue management mechanisms, such as the
+   Drop-Tail queue management that is common today.  For example, a
+
+
+
+Floyd                         Experimental                     [Page 15]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+   response function with a fixed length for the congestion epoch in
+   round-trip times might give less clearly-differentiated feedback in
+   an environment with steady-state background losses at fixed intervals
+   for all flows (as might occur with a wireless link with occasional
+   short error bursts, giving losses for all flows every N seconds
+   regardless of their sending rate).
+
+   While it is not a goal to have perfect fairness in an environment
+   with synchronized losses, it would be good to have moderately
+   acceptable performance in this regime.  This goal might argue against
+   a response function with a constant number of round-trip times
+   between congestion events.  However, this is a question that could
+   clearly use additional research and investigation.  In addition,
+   flows with different round-trip times would have different time
+   durations for congestion epochs even in the model with a linear
+   response function.
+
+   The third column of Table 8, the Aggregate Window, gives the
+   aggregate congestion window of two competing TCP connections, one
+   with Linear HighSpeed TCP and one with Standard TCP, given the packet
+   drop rate specified in the first column.  From Table 8, a Linear
+   HighSpeed TCP connection would receive fifteen times the bandwidth of
+   a Standard TCP in an environment with a packet drop rate of 10^-5.
+   This would occur when the two flows sharing a single pipe achieved an
+   aggregate window of 4179 packets.  Given a round-trip time of 100 ms
+   and a packet size of 1500 bytes, this would occur with an available
+   bandwidth for the two competing flows of 501 Mbps.  Thus, because the
+   Linear HighSpeed TCP is more aggressive than the HighSpeed TCP
+   proposed above, it also is less fair when competing with Standard TCP
+   in a high-bandwidth environment.
+
+9.  Tradeoffs for Choosing Congestion Control Parameters
+
+   A range of metrics can be used for evaluating choices for congestion
+   control parameters for HighSpeed TCP.  My assumption in this section
+   is that for a response function of the form w = c/p^d, for constant c
+   and exponent d, the only response functions that would be considered
+   are response functions with 1/2 <= d <= 1.  The two ends of this
+   spectrum are represented by current TCP, with d = 1/2, and by the
+   linear response function described in Section 8 above, with d = 1.
+   HighSpeed TCP lies somewhere in the middle of the spectrum, with d =
+   0.835.
+
+   Response functions with exponents less than 1/2 can be eliminated
+   from consideration because they would be even worse than standard TCP
+   in accommodating connections with high congestion windows.
+
+
+
+
+
+Floyd                         Experimental                     [Page 16]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+9.1.  The Number of Round-Trip Times between Loss Events
+
+   Response functions with exponents greater than 1 can be eliminated
+   from consideration because for these response functions, the number
+   of round-trip times between loss events decreases as congestion
+   decreases.  For a response function of w = c/p^d, with one loss event
+   or congestion event every 1/p packets, the number of round-trip times
+   between loss events is w^((1/d)-1)/c^(1/d).  Thus, for standard TCP
+   the number of round-trip times between loss events is linear in w.
+   In contrast, one attraction of the linear response function, as
+   described in Section 8 above, is that it is scale-invariant, in terms
+   of a fixed increase in the congestion window per acknowledgement, and
+   a fixed number of round-trip times between loss events.
+
+   However, for a response function with d > 1, the number of round-
+   trip times between loss events would be proportional to w^((1/d)-1),
+   for a negative exponent ((1/d)-1), setting smaller as w increases.
+   This would seem undesirable.
+
+9.2.  The Number of Packet Drops per Loss Event, with Drop-Tail
+
+   A TCP connection increases its sending rate by a(w) packets per
+   round-trip time, and in a Drop-Tail environment, this is likely to
+   result in a(w) dropped packets during a single loss event.  One
+   attraction of standard TCP is that it has a fixed increase per
+   round-trip time of one packet, minimizing the number of packets that
+   would be dropped in a Drop-Tail environment.  For an environment with
+   some form of Active Queue Management, and in particular for an
+   environment that uses ECN, the number of packets dropped in a single
+   congestion event would not be a problem.  However, even in these
+   environments, larger increases in the sending rate per round-trip
+   time result in larger stresses on the ability of the queues in the
+   router to absorb the fluctuations.
+
+   HighSpeed TCP plays a middle ground between the metrics of a moderate
+   number of round-trip times between loss events, and a moderate
+   increase in the sending rate per round-trip time.  As shown in
+   Appendix B, for a congestion window of 83,000 packets, HighSpeed TCP
+   increases its sending rate by 70 packets per round-trip time,
+   resulting in at most 70 packet drops when the buffer overflows in a
+   Drop-Tail environment.  This increased aggressiveness is the price
+   paid by HighSpeed TCP for its increased scalability.  A large number
+   of packets dropped per congestion event could result in synchronized
+   drops from multiple flows, with a possible loss of throughput as a
+   result.
+
+
+
+
+
+
+Floyd                         Experimental                     [Page 17]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+   Scalable TCP has an increase a(w) of 0.005 w packets per round-trip
+   time.  For a congestion window of 83,000 packets, this gives an
+   increase of 415 packets per round-trip time, resulting in roughly 415
+   packet drops per congestion event in a Drop-Tail environment.
+
+   Thus, HighSpeed TCP and its variants place increased demands on queue
+   management in routers, relative to Standard TCP.  (This is rather
+   similar to the increased demands on queue management that would
+   result from using N parallel TCP connections instead of a single
+   Standard TCP connection.)
+
+10.  Related Issues
+
+10.1.  Slow-Start
+
+   A companion internet-draft on "Limited Slow-Start for TCP with Large
+   Congestion Windows" [F02b] proposes a modification to TCP's slow-
+   start procedure that can significantly improve the performance of TCP
+   connections slow-starting up to large congestion windows.  For TCP
+   connections that are able to use congestion windows of thousands (or
+   tens of thousands) of MSS-sized segments (for MSS the sender's
+   MAXIMUM SEGMENT SIZE), the current slow-start procedure can result in
+   increasing the congestion window by thousands of segments in a single
+   round-trip time.  Such an increase can easily result in thousands of
+   packets being dropped in one round-trip time.  This is often
+   counter-productive for the TCP flow itself, and is also hard on the
+   rest of the traffic sharing the congested link.
+
+   [F02b] proposes Limited Slow-Start, limiting the number of segments
+   by which the congestion window is increased for one window of data
+   during slow-start, in order to improve performance for TCP
+   connections with large congestion windows.  We have separated out
+   Limited Slow-Start to a separate draft because it can be used both
+   with Standard or with HighSpeed TCP.
+
+   Limited Slow-Start is illustrated in the NS simulator, for snapshots
+   after May 1, 2002, in the tests "./test-all-tcpHighspeed tcp1A" and
+   "./test-all-tcpHighspeed tcpHighspeed1" in the subdirectory
+   "tcl/lib".
+
+   In order for best-effort flows to safely start-up faster than slow-
+   start, e.g., in future high-bandwidth networks, we believe that it
+   would be necessary for the flow to have explicit feedback from the
+   routers along the path.  There are a number of proposals for this,
+   ranging from a minimal proposal for an IP option that allows TCP SYN
+   packets to collect information from routers along the path about the
+   allowed initial sending rate [J02], to proposals with more power that
+   require more fine-tuned and continuous feedback from routers.  These
+
+
+
+Floyd                         Experimental                     [Page 18]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+   proposals are all somewhat longer-term proposals than the HighSpeed
+   TCP proposal in this document, requiring longer lead times and more
+   coordination for deployment, and will be discussed in later
+   documents.
+
+10.2.  Limiting burstiness on short time scales
+
+   Because the congestion window achieved by a HighSpeed TCP connection
+   could be quite large, there is a possibility for the sender to send a
+   large burst of packets in response to a single acknowledgement.  This
+   could happen, for example, when there is congestion or reordering on
+   the reverse path, and the sender receives an acknowledgement
+   acknowledging hundreds or thousands of new packets.  Such a burst
+   would also result if the application was idle for a short period of
+   time less than a round-trip time, and then suddenly had lots of data
+   available to send.  In this case, it would be useful for the
+   HighSpeed TCP connection to have some method for limiting bursts.
+
+   In this document, we do not specify TCP mechanisms for reducing the
+   short-term burstiness.  One possible mechanism is to use some form of
+   rate-based pacing, and another possibility is to use maxburst, which
+   limits the number of packets that are sent in response to a single
+   acknowledgement.  We would caution, however, against a permanent
+   reduction in the congestion window as a mechanism for limiting
+   short-term bursts.  Such a mechanism has been deployed in some TCP
+   stacks, and our view would be that using permanent reductions of the
+   congestion window to reduce transient bursts would be a bad idea
+   [Fl03].
+
+10.3.  Other limitations on window size
+
+   The TCP header uses a 16-bit field to report the receive window size
+   to the sender.  Unmodified, this allows a window size of at most
+   2**16 = 65K bytes.  With window scaling, the maximum window size is
+   2**30 = 1073M bytes [RFC 1323].  Given 1500-byte packets, this allows
+   a window of up to 715,000 packets.
+
+10.4.  Implementation issues
+
+   One implementation issue that has been raised with HighSpeed TCP is
+   that with congestion windows of 4MB or more, the handling of
+   successive SACK packets after a packet is dropped becomes very time-
+   consuming at the TCP sender [S03].  Tom Kelly's Scalable TCP includes
+   a "SACK Fast Path" patch that addresses this problem.
+
+   The issues addressed in the Web100 project, the Net100 project, and
+   related projects about the tuning necessary to achieve high bandwidth
+   data rates with TCP apply to HighSpeed TCP as well [Net100, Web100].
+
+
+
+Floyd                         Experimental                     [Page 19]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+11.  Deployment issues
+
+11.1.  Deployment issues of HighSpeed TCP
+
+   We do not claim that the HighSpeed TCP modification to TCP described
+   in this paper is an optimal transport protocol for high-bandwidth
+   environments.  Based on our experiences with HighSpeed TCP in the NS
+   simulator [NS], on simulation studies [SA03], and on experimental
+   reports [ABLLS03,D02,CC03,F03], we believe that HighSpeed TCP
+   improves the performance of TCP in high-bandwidth environments, and
+   we are documenting it for the benefit of the IETF community.  We
+   encourage the use of HighSpeed TCP, and of its underlying response
+   function, and we further encourage feedback about operational
+   experiences with this or related modifications.
+
+   We note that in environments typical of much of the current Internet,
+   HighSpeed TCP behaves exactly as does Standard TCP today.  This is
+   the case any time the congestion window is less than 38 segments.
+
+    Bandwidth   Avg Cwnd w (pkts)    Increase a(w)   Decrease b(w)
+    ---------   -----------------    -------------   -------------
+      1.5 Mbps         12.5               1              0.50
+     10 Mbps           83                 1              0.50
+    100 Mbps          833                 6              0.35
+      1 Gbps         8333                26              0.22
+     10 Gbps        83333                70              0.10
+
+   Table 9: Performance of a HighSpeed TCP connection
+
+   To help calibrate, Table 9 considers a TCP connection with 1500-byte
+   packets, an RTT of 100 ms (including average queueing delay), and no
+   competing traffic, and shows the average congestion window if that
+   TCP connection had a pipe all to itself and fully used the link
+   bandwidth, for a range of bandwidths for the pipe.  This assumes that
+   the TCP connection would use Table 12 in determining its increase and
+   decrease parameters.  The first column of Table 9 gives the
+   bandwidth, and the second column gives the average congestion window
+   w needed to utilize that bandwidth.  The third column shows the
+   increase a(w) in segments per RTT for window w.  The fourth column
+   shows the decrease b(w) for that window w (where the TCP sender
+   decreases the congestion window from w to w(1-b(w)) segments after a
+   loss event).  When a loss occurs we note that the actual congestion
+   window is likely to be greater than the average congestion window w
+   in column 2, so the decrease parameter used could be slightly smaller
+   than the one given in column 4 of Table 9.
+
+   Table 9 shows that a HighSpeed TCP over a 10 Mbps link behaves
+   exactly the same as a Standard TCP connection, even in the absence of
+
+
+
+Floyd                         Experimental                     [Page 20]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+   competing traffic.  One can think of the congestion window staying
+   generally in the range of 55 to 110 segments, with the HighSpeed TCP
+   behavior being exactly the same as the behavior of Standard TCP.  (If
+   the congestion window is ever 128 segments or more, then the
+   HighSpeed TCP increases by two segments per RTT instead of by one,
+   and uses a decrease parameter of 0.44 instead of 0.50.)
+
+   Table 9 shows that for a HighSpeed TCP connection over a 100 Mbps
+   link, with no competing traffic, HighSpeed TCP behaves roughly as
+   aggressively as six parallel TCP connections, increasing its
+   congestion window by roughly six segments per round-trip time, and
+   with a decrease parameter of roughly 1/3 (corresponding to decreasing
+   down to 2/3-rds of its old congestion window, rather than to half, in
+   response to a loss event).
+
+   For a Standard TCP connection in this environment, the congestion
+   window could be thought of as generally varying in the range of 550
+   to 1100 segments, with an average packet drop rate of 2.2 * 10^-6
+   (corresponding to a bit error rate of 1.8 * 10^-10), or equivalently,
+   roughly 55 seconds between congestion events.  While a Standard TCP
+   connection could sustain such a low packet drop rate in a carefully
+   controlled environment with minimal competing traffic, we would
+   contend that in an uncontrolled best-effort environment with even a
+   small amount of competing traffic, the occasional congestion events
+   from smaller competing flows could easily be sufficient to prevent a
+   Standard TCP flow with no lower-speed bottlenecks from fully
+   utilizing the available bandwidth of the underutilized 100 Mbps link.
+
+   That is, we would contend that in the environment of 100 Mbps links
+   with a significant amount of available bandwidth, Standard TCP would
+   sometimes be unable to fully utilize the link bandwidth, and that
+   HighSpeed TCP would be an improvement in this regard.  We would
+   further contend that in this environment, the behavior of HighSpeed
+   TCP is sufficiently close to that of Standard TCP that HighSpeed TCP
+   would be safe to deploy in the current Internet.  We note that
+   HighSpeed TCP can only use high congestion windows if allowed by the
+   receiver's advertised window size.  As a result, even if HighSpeed
+   TCP was ubiquitously deployed in the Internet, the impact would be
+   limited to those TCP connections with an advertised window from the
+   receiver of 118 MSS or larger.
+
+   We do not believe that the deployment of HighSpeed TCP would serve as
+   a block to the possible deployment of alternate experimental
+   protocols for high-speed congestion control, such as Scalable TCP,
+   XCP [KHR02], or FAST TCP [JWL03].  In particular, we don't expect
+   HighSpeed TCP to interact any more poorly with alternative
+   experimental proposals than would the N parallel TCP connections
+   commonly used today in the absence of HighSpeed TCP.
+
+
+
+Floyd                         Experimental                     [Page 21]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+11.2.  Deployment issues of Scalable TCP
+
+   We believe that Scalable TCP and HighSpeed TCP have sufficiently
+   similar response functions that they could easily coexist in the
+   Internet.  However, we have not investigated Scalable TCP
+   sufficiently to be able to claim, in this document, that Scalable TCP
+   is safe for a widespread deployment in the current Internet.
+
+    Bandwidth   Avg Cwnd w (pkts)    Increase a(w)   Decrease b(w)
+    ---------   -----------------    -------------   -------------
+      1.5 Mbps         12.5               1              0.50
+     10 Mbps           83                 0.4            0.125
+    100 Mbps          833                 4.1            0.125
+      1 Gbps         8333                41.6            0.125
+     10 Gbps        83333               416.5            0.125
+
+   Table 10: Performance of a Scalable TCP connection.
+
+   Table 10 shows the performance of a Scalable TCP connection with
+   1500-byte packets, an RTT of 100 ms (including average queueing
+   delay), and no competing traffic.  The TCP connection is assumed to
+   use delayed acknowledgements.  The first column of Table 10 gives the
+   bandwidth, the second column gives the average congestion window
+   needed to utilize that bandwidth, and the third and fourth columns
+   give the increase and decrease parameters.
+
+   Note that even in an environment with a 10 Mbps link, Scalable TCP's
+   behavior is considerably different from that of Standard TCP.  The
+   increase parameter is smaller than that of Standard TCP, and the
+   decrease is smaller also, 1/8-th instead of 1/2.  That is, for 10
+   Mbps links, Scalable TCP increases less aggressively than Standard
+   TCP or HighSpeed TCP, but decreases less aggressively as well.
+
+   In an environment with a 100 Mbps link, Scalable TCP has an increase
+   parameter of roughly four segments per round-trip time, with the same
+   decrease parameter of 1/8-th.  A comparison of Tables 9 and 10 shows
+   that for this scenario of 100 Mbps links, HighSpeed TCP increases
+   more aggressively than Scalable TCP.
+
+   Next we consider the relative fairness between Standard TCP,
+   HighSpeed TCP and Scalable TCP.  The relative fairness between
+   HighSpeed TCP and Standard TCP was shown in Table 5 earlier in this
+   document, and the relative fairness between Scalable TCP and Standard
+   TCP was shown in Table 8.  Following the approach in Section 6, for a
+   given packet drop rate p, for p < 10^-3, we can estimate the relative
+   fairness between Scalable and HighSpeed TCP as
+   W_Scalable/W_HighSpeed.  This relative fairness is shown in Table 11
+   below.  The bandwidth in the last column of Table 11 is the aggregate
+
+
+
+Floyd                         Experimental                     [Page 22]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+   bandwidth of the two competing flows given 100 ms round-trip times
+   and 1500-byte packets.
+
+    Packet Drop Rate P   Fairness  Aggregate Window  Bandwidth
+    ------------------   --------  ----------------  ---------
+         10^-2            1.0              24        2.8 Mbps
+         10^-3            1.0              76        9.1 Mbps
+         10^-4            1.4             643       77.1 Mbps
+         10^-5            2.1            5595      671.4 Mbps
+         10^-6            3.1           50279        6.0 Gbps
+         10^-7            4.5          463981       55.7 Gbps
+
+   Table 11: Relative Fairness between the Scalable and HighSpeed
+   Response Functions.
+
+   The second row of Table 11 shows that for a Scalable TCP and a
+   HighSpeed TCP flow competing in an environment with 100 ms RTTs and a
+   10 Mbps pipe, the two flows would receive essentially the same
+   bandwidth.  The next row shows that for a Scalable TCP and a
+   HighSpeed TCP flow competing in an environment with 100 ms RTTs and a
+   100 Mbps pipe, the Scalable TCP flow would receive roughly 50% more
+   bandwidth than would HighSpeed TCP.  Table 11 shows the relative
+   fairness in higher-bandwidth environments as well.  This relative
+   fairness seems sufficient that there should be no problems with
+   Scalable TCP and HighSpeed TCP coexisting in the same environment as
+   Experimental variants of TCP.
+
+   We note that one question that requires more investigation with
+   Scalable TCP is that of convergence to fairness in environments with
+   Drop-Tail queue management.
+
+12.  Related Work in HighSpeed TCP
+
+   HighSpeed TCP has been separately investigated in simulations by
+   Sylvia Ratnasamy and by Evandro de Souza [SA03].  The simulations in
+   [SA03] verify the fairness properties of HighSpeed TCP when sharing a
+   link with Standard TCP.
+
+   These simulations explore the relative fairness of HighSpeed TCP
+   flows when competing with Standard TCP.  The simulation environment
+   includes background forward and reverse-path TCP traffic limited by
+   the TCP receive window, along with a small amount of forward and
+   reverse-path traffic from the web traffic generator.  Most of the
+   simulations so far explore performance on a simple dumbbell topology
+   with a 1 Gbps link with a propagation delay of 50 ms.  Simulations
+   have been run with Adaptive RED and with DropTail queue management.
+
+
+
+
+
+Floyd                         Experimental                     [Page 23]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+   The simulations in [SA03] explore performance with a varying number
+   of competing flows, with the competing traffic being all standard
+   TCP; all HighSpeed TCP; or a mix of standard and HighSpeed TCP.  For
+   the simulations in [SA03] with RED queue management, the relative
+   fairness between standard and HighSpeed TCP is consistent with the
+   relative fairness predicted in Table 5.  For the simulations with
+   Drop Tail queues, the relative fairness is more skewed, with the
+   HighSpeed TCP flows receiving an even larger share of the link
+   bandwidth.  This is not surprising; with Active Queue Management at
+   the congested link, the fraction of packet drops received by each
+   flow should be roughly proportional to that flow's share of the link
+   bandwidth, while this property no longer holds with Drop Tail queue
+   management.  We also note that relative fairness in simulations with
+   Drop Tail queue management can sometimes depend on small details of
+   the simulation scenario, and that Drop Tail simulations need special
+   care to avoid phase effects [F92].
+
+   [SA03] explores the bandwidth `stolen' by HighSpeed TCP from standard
+   TCP by exploring the fraction of the link bandwidth N standard TCP
+   flows receive when competing against N other standard TCP flows, and
+   comparing this to the fraction of the link bandwidth the N standard
+   TCP flows receive when competing against N HighSpeed TCP flows.  For
+   the 1 Gbps simulation scenarios dominated by long-lived traffic, a
+   small number of standard TCP flows are able to achieve high link
+   utilization, and the HighSpeed TCP flows can be viewed as stealing
+   bandwidth from the competing standard TCP flows, as predicted in
+   Section 6 on the Fairness Implications of the HighSpeed Response
+   Function.  However, [SA03] shows that when even a small fraction of
+   the link bandwidth is used by more bursty, short TCP connections, the
+   standard TCP flows are unable to achieve high link utilization, and
+   the HighSpeed TCP flows in this case are not `stealing' bandwidth
+   from the standard TCP flows, but instead are using bandwidth that
+   otherwise would not be utilized.
+
+   The conclusions of [SA03] are that "HighSpeed TCP behaved as forseen
+   by its response function, and appears to be a real and viable option
+   for use on high-speed wide area TCP connections."
+
+   Future work that could be explored in more detail includes
+   convergence times after new flows start-up; recovery time after a
+   transient outage; the response to sudden severe congestion, and
+   investigations of the potential for oscillations.  We invite
+   contributions from others in this work.
+
+
+
+
+
+
+
+
+Floyd                         Experimental                     [Page 24]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+13.  Relationship to other Work
+
+   Our assumption is that HighSpeed TCP will be used with the TCP SACK
+   option, and also with the increased Initial Window of three or four
+   segments, as allowed by [RFC3390].  For paths that have substantial
+   reordering, TCP performance would be greatly improved by some of the
+   mechanisms still in the research stages for robust performance in the
+   presence of reordered packets.
+
+   Our view is that HighSpeed TCP is largely orthogonal to proposals for
+   higher PMTU (Path MTU) values [M02].  Unlike changes to the PMTU,
+   HighSpeed TCP does not require any changes in the network or at the
+   TCP receiver, and works well in the current Internet.  Our assumption
+   is that HighSpeed TCP would be useful even with larger values for the
+   PMTU.  Unlike the current congestion window, the PMTU gives no
+   information about the bandwidth-delay product available to that
+   particular flow.
+
+   A related approach is that of a virtual MTU, where the actual MTU of
+   the path might be limited [VMSS,S02].  The virtual MTU approach has
+   not been fully investigated, and we do not explore the virtual MTU
+   approach further in this document.
+
+14.  Conclusions
+
+   This document has proposed HighSpeed TCP, a modification to TCP's
+   congestion control mechanism for use with TCP connections with large
+   congestion windows.  We have explored this proposal in simulations,
+   and others have explored HighSpeed TCP with experiments, and we
+   believe HighSpeed TCP to be safe to deploy on the current Internet.
+   We would welcome additional analysis, simulations, and particularly,
+   experimentation.  More information on simulations and experiments is
+   available from the HighSpeed TCP Web Page [HSTCP].  There are several
+   independent implementations of HighSpeed TCP [D02,F03] and of
+   Scalable TCP [K03] for further investigation.
+
+15.  Acknowledgements
+
+   The HighSpeed TCP proposal is from joint work with Sylvia Ratnasamy
+   and Scott Shenker (and was initiated by Scott Shenker).  Additional
+   investigations of HighSpeed TCP were joint work with Evandro de Souza
+   and Deb Agarwal.  We thank Tom Dunigan for the implementation in the
+   Linux 2.4.16 Web100 kernel, and for resulting experimentation with
+   HighSpeed TCP.  We are grateful to the End-to-End Research Group, the
+   members of the Transport Area Working Group, and to members of the
+   IPAM program in Large Scale Communication Networks for feedback.  We
+   thank Glenn Vinnicombe for framing the Linear response function in
+   the parameters of HighSpeed TCP.  We are also grateful for
+
+
+
+Floyd                         Experimental                     [Page 25]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+   contributions and feedback from the following individuals: Les
+   Cottrell, Mitchell Erblich, Jeffrey Hsu, Tom Kelly, Chuck Jackson,
+   Matt Mathis, Jitendra Padhye, Andrew Reiter, Stanislav Shalunov, Alex
+   Solan, Paul Sutter, Brian Tierney, Joe Touch.
+
+16.  Normative References
+
+   [RFC2581]  Allman, M., Paxson, V. and W. Stevens, "TCP Congestion
+              Control", RFC 2581, April 1999.
+
+17.  Informative References
+
+   [ABLLS03]  A. Antony, J. Blom, C. de Laat, J. Lee, and W. Sjouw,
+              "Microscopic Examination of TCP Flows over Transatlantic
+              Links", iGrid2002 special issue, Future Generation
+              Computer Systems, volume 19 issue 6 (2003), URL
+              "http://www.science.uva.nl/~delaat/techrep-2003-2-
+              tcp.pdf".
+
+   [BBFS01]   Deepak Bansal, Hari Balakrishnan, Sally Floyd, and Scott
+              Shenker, "Dynamic Behavior of Slowly-Responsive Congestion
+              Control Algorithms", SIGCOMM 2001, August 2001.
+
+   [CC03]     Fabrizio Coccetti and Les Cottrell, "TCP Stack
+              Measurements on Lightly Loaded Testbeds", 2003.  URL
+              "http://www-iepm.slac.stanford.edu/monitoring/bulk/fast/".
+
+   [CJ89]     D. Chiu and R. Jain, "Analysis of the Increase and
+              Decrease Algorithms for Congestion Avoidance in Computer
+              Networks", Computer Networks and ISDN Systems, Vol. 17,
+              pp. 1-14, 1989.
+
+   [CO98]     J. Crowcroft and P. Oechslin, "Differentiated End-to-end
+              Services using a Weighted Proportional Fair Share TCP",
+              Computer Communication Review, 28(3):53--69, 1998.
+
+   [D02]      Tom Dunigan, "Floyd's TCP slow-start and AIMD mods", URL
+              "http://www.csm.ornl.gov/~dunigan/net100/floyd.html".
+
+   [F03]      Gareth Fairey, "High-Speed TCP", 2003.  URL
+              "http://www.hep.man.ac.uk/u/garethf/hstcp/".
+
+   [F92]      S. Floyd and V. Jacobson, "On Traffic Phase Effects in
+              Packet-Switched Gateways, Internetworking: Research and
+              Experience", V.3 N.3, September 1992, p.115-156.  URL
+              "http://www.icir.org/floyd/papers.html".
+
+
+
+
+
+Floyd                         Experimental                     [Page 26]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+   [Fl03]     Sally Floyd, "Re: [Tsvwg] taking NewReno (RFC 2582) to
+              Proposed Standard", Email to the tsvwg mailing list, May
+              14, 2003.
+
+   URLs       "http://www1.ietf.org/mail-archive/working-
+              groups/tsvwg/current/msg04086.html" and
+              "http://www1.ietf.org/mail-archive/working-
+              groups/tsvwg/current/msg04087.html".
+
+   [FF98]     Floyd, S., and Fall, K., "Promoting the Use of End-to-End
+              Congestion Control in the Internet", IEEE/ACM Transactions
+              on Networking, August 1999.
+
+   [FRS02]    Sally Floyd, Sylvia Ratnasamy, and Scott Shenker,
+              "Modifying TCP's Congestion Control for High Speeds", May
+              2002.  URL "http://www.icir.org/floyd/notes.html".
+
+   [GRK99]    Panos Gevros, Fulvio Risso and Peter Kirstein, "Analysis
+              of a Method for Differential TCP Service".  In Proceedings
+              of the IEEE GLOBECOM'99, Symposium on Global Internet ,
+              December 1999, Rio de Janeiro, Brazil.
+
+   [GV02]     S. Gorinsky and H. Vin, "Extended Analysis of Binary
+              Adjustment Algorithms", Technical Report TR2002-39,
+              Department of Computer Sciences, The University of Texas
+              at Austin, August 2002.  URL
+              "http://www.cs.utexas.edu/users/gorinsky/pubs.html".
+
+   [HSTCP]    HighSpeed TCP Web Page, URL
+              "http://www.icir.org/floyd/hstcp.html".
+
+   [J02]      Amit Jain and Sally Floyd, "Quick-Start for TCP and IP",
+              Work in Progress, 2002.
+
+   [JWL03]    Cheng Jin, David X. Wei and Steven H. Low, "FAST TCP for
+              High-speed Long-distance Networks", Work in Progress, June
+              2003.
+
+   [K03]      Tom Kelly, "Scalable TCP: Improving Performance in
+              HighSpeed Wide Area Networks", February 2003.  URL
+              "http://www-lce.eng.cam.ac.uk/~ctk21/scalable/".
+
+   [KHR02]    Dina Katabi, Mark Handley, and Charlie Rohrs, "Congestion
+              Control for High Bandwidth-Delay Product Networks",
+              SIGCOMM 2002.
+
+   [M02]      Matt Mathis, "Raising the Internet MTU", Web Page, URL
+              "http://www.psc.edu/~mathis/MTU/".
+
+
+
+Floyd                         Experimental                     [Page 27]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+   [Net100]   The DOE/MICS Net100 project.  URL
+              "http://www.csm.ornl.gov/~dunigan/net100/".
+
+   [NS]       The NS Simulator, "http://www.isi.edu/nsnam/ns/".
+
+   [RFC 1323] Jacobson, V., Braden, R. and D. Borman, "TCP Extensions
+              for High Performance", RFC 1323, May 1992.
+
+   [RFC3390]  Allman, M., Floyd, S. and C., Partridge, "Increasing TCP's
+              Initial Window", RFC 3390, October 2002.
+
+   [RFC3448]  Handley, M., Padhye, J., Floyd, S. and J. Widmer, "TCP
+              Friendly Rate Control (TFRC): Protocol Specification", RFC
+              3448, January 2003.
+
+   [SA03]     Souza, E. and D.A., Agarwal, "A HighSpeed TCP Study:
+              Characteristics and Deployment Issues", LBNL Technical
+              Report LBNL-53215.  URL
+              "http://www.icir.org/floyd/hstcp.html".
+
+   [S02]      Stanislav Shalunov, "TCP Armonk", Work in Progress, 2002,
+              URL "http://www.internet2.edu/~shalunov/tcpar/".
+
+   [S03]      Alex Solan, private communication, 2003.
+
+   [VMSS]     "Web100 at ORNL", Web Page,
+              "http://www.csm.ornl.gov/~dunigan/netperf/web100.html".
+
+   [Web100]   The Web100 project.  URL "http://www.web100.org/".
+
+18.  Security Considerations
+
+   This proposal makes no changes to the underlying security of TCP.
+
+19.  IANA Considerations
+
+   There are no IANA considerations regarding this document.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Floyd                         Experimental                     [Page 28]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+A.  TCP's Loss Event Rate in Steady-State
+
+   This section gives the number of round-trip times between congestion
+   events for a TCP flow with D-byte packets, for D=1500, as a function
+   of the connection's average throughput B in bps.  To achieve this
+   average throughput B, a TCP connection with round-trip time R in
+   seconds requires an average congestion window w of BR/(8D) segments.
+
+   In steady-state, TCP's average congestion window w is roughly
+   1.2/sqrt(p) segments.  This is equivalent to a lost event at most
+   once every 1/p packets, or at most once every 1/(pw) = w/1.5 round-
+   trip times.  Substituting for w, this is a loss event at most every
+   (BR)/12D)round-trip times.
+
+   An an example, for R = 0.1 seconds and D = 1500 bytes, this gives
+   B/180000 round-trip times between loss events.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Floyd                         Experimental                     [Page 29]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+B.  A table for a(w) and b(w).
+
+   This section gives a table for the increase and decrease parameters
+   a(w) and b(w) for HighSpeed TCP, for the default values of Low_Window
+   = 38, High_Window = 83000, High_P = 10^-7, and High_Decrease = 0.1.
+
+        w  a(w)  b(w)
+     ----  ----  ----
+       38     1  0.50
+      118     2  0.44
+      221     3  0.41
+      347     4  0.38
+      495     5  0.37
+      663     6  0.35
+      851     7  0.34
+     1058     8  0.33
+     1284     9  0.32
+     1529    10  0.31
+     1793    11  0.30
+     2076    12  0.29
+     2378    13  0.28
+     2699    14  0.28
+     3039    15  0.27
+     3399    16  0.27
+     3778    17  0.26
+     4177    18  0.26
+     4596    19  0.25
+     5036    20  0.25
+     5497    21  0.24
+     5979    22  0.24
+     6483    23  0.23
+     7009    24  0.23
+     7558    25  0.22
+     8130    26  0.22
+     8726    27  0.22
+     9346    28  0.21
+     9991    29  0.21
+    10661    30  0.21
+    11358    31  0.20
+    12082    32  0.20
+    12834    33  0.20
+    13614    34  0.19
+    14424    35  0.19
+    15265    36  0.19
+    16137    37  0.19
+    17042    38  0.18
+    17981    39  0.18
+    18955    40  0.18
+
+
+
+Floyd                         Experimental                     [Page 30]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+    19965    41  0.17
+    21013    42  0.17
+    22101    43  0.17
+    23230    44  0.17
+    24402    45  0.16
+    25618    46  0.16
+    26881    47  0.16
+    28193    48  0.16
+    29557    49  0.15
+    30975    50  0.15
+    32450    51  0.15
+    33986    52  0.15
+    35586    53  0.14
+    37253    54  0.14
+    38992    55  0.14
+    40808    56  0.14
+    42707    57  0.13
+    44694    58  0.13
+    46776    59  0.13
+    48961    60  0.13
+    51258    61  0.13
+    53677    62  0.12
+    56230    63  0.12
+    58932    64  0.12
+    61799    65  0.12
+    64851    66  0.11
+    68113    67  0.11
+    71617    68  0.11
+    75401    69  0.10
+    79517    70  0.10
+    84035    71  0.10
+    89053    72  0.10
+    94717    73  0.09
+
+   Table 12: Parameters for HighSpeed TCP.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Floyd                         Experimental                     [Page 31]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+   This table was computed with the following Perl program:
+
+    $top = 100000;
+    $num = 38;
+    if ($num == 38) {
+      print "     w  a(w)  b(w)\n";
+      print "  ----  ----  ----\n";
+      print "    38     1  0.50\n";
+      $oldb = 0.50;
+      $olda = 1;
+    }
+    while ($num < $top) {
+      $bw = (0.1 -0.5)*(log($num)-log(38))/(log(83000)-log(38))+0.5;
+      $aw = ($num**2*2.0*$bw) / ((2.0-$bw)*$num**1.2*12.8);
+      if ($aw > $olda + 1) {
+         printf "%6d %5d  %3.2f0, $num, $aw, $bw;
+         $olda = $aw;
+      }
+      $num ++;
+    }
+
+   Table 13: Perl Program for computing parameters for HighSpeed TCP.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Floyd                         Experimental                     [Page 32]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+C.  Exploring the time to converge to fairness.
+
+   This section gives the Perl program used to compute the congestion
+   window growth during congestion avoidance.
+
+    $top = 2001;
+    $hswin = 1;
+    $regwin = 1;
+    $rtt = 1;
+    $lastrtt = 0;
+    $rttstep = 100;
+    if ($hswin == 1) {
+      print "  RTT  HS_Window Standard_TCP_Window0;
+      print "  ---  --------- -------------------0;
+    }
+    while ($rtt < $top) {
+      $bw = (0.1 -0.5)*(log($hswin)-log(38))/(log(83000)-log(38))+0.5;
+      $aw = ($hswin**2*2.0*$bw) / ((2.0-$bw)*$hswin**1.2*12.8);
+      if ($aw < 1) {
+          $aw = 1;
+      }
+      if ($rtt >= $lastrtt + $rttstep) {
+        printf "%5d %9d %10d0, $rtt, $hswin, $regwin;
+        $lastrtt = $rtt;
+      }
+      $hswin += $aw;
+      $regwin += 1;
+      $rtt ++;
+    }
+
+   Table 14: Perl Program for computing the window in congestion
+   avoidance.
+
+Author's Address
+
+   Sally Floyd
+   ICIR (ICSI Center for Internet Research)
+
+   Phone: +1 (510) 666-2989
+   EMail: floyd@acm.org
+   URL: http://www.icir.org/floyd/
+
+
+
+
+
+
+
+
+
+
+Floyd                         Experimental                     [Page 33]
+
+RFC 3649                     HighSpeed TCP                 December 2003
+
+
+Full Copyright Statement
+
+   Copyright (C) The Internet Society (2003).  All Rights Reserved.
+
+   This document and translations of it may be copied and furnished to
+   others, and derivative works that comment on or otherwise explain it
+   or assist in its implementation may be prepared, copied, published
+   and distributed, in whole or in part, without restriction of any
+   kind, provided that the above copyright notice and this paragraph are
+   included on all such copies and derivative works.  However, this
+   document itself may not be modified in any way, such as by removing
+   the copyright notice or references to the Internet Society or other
+   Internet organizations, except as needed for the purpose of
+   developing Internet standards in which case the procedures for
+   copyrights defined in the Internet Standards process must be
+   followed, or as required to translate it into languages other than
+   English.
+
+   The limited permissions granted above are perpetual and will not be
+   revoked by the Internet Society or its successors or assignees.
+
+   This document and the information contained herein is provided on an
+   "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
+   TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
+   BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
+   HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
+   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+Acknowledgement
+
+   Funding for the RFC Editor function is currently provided by the
+   Internet Society.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Floyd                         Experimental                     [Page 34]
+
diff --git a/ext/picotcp/RFC/rfc3819.txt b/ext/picotcp/RFC/rfc3819.txt
new file mode 100644
index 0000000..b4a5e8b
--- /dev/null
+++ b/ext/picotcp/RFC/rfc3819.txt
@@ -0,0 +1,3363 @@
+
+
+
+
+
+
+Network Working Group                                       P. Karn, Ed.
+Request for Comments: 3819                                      Qualcomm
+BCP: 89                                                       C. Bormann
+Category: Best Current Practice                  Universitaet Bremen TZI
+                                                            G. Fairhurst
+                                                  University of Aberdeen
+                                                             D. Grossman
+                                                          Motorola, Inc.
+                                                               R. Ludwig
+                                                       Ericsson Research
+                                                              J. Mahdavi
+                                                                  Novell
+                                                           G. Montenegro
+                                   Sun Microsystems Laboratories, Europe
+                                                                J. Touch
+                                                                 USC/ISI
+                                                                 L. Wood
+                                                           Cisco Systems
+                                                               July 2004
+
+
+                Advice for Internet Subnetwork Designers
+
+Status of this Memo
+
+   This document specifies an Internet Best Current Practices for the
+   Internet Community, and requests discussion and suggestions for
+   improvements.  Distribution of this memo is unlimited.
+
+Copyright Notice
+
+   Copyright (C) The Internet Society (2004).
+
+Abstract
+
+   This document provides advice to the designers of digital
+   communication equipment, link-layer protocols, and packet-switched
+   local networks (collectively referred to as subnetworks), who wish to
+   support the Internet protocols but may be unfamiliar with the
+   Internet architecture and the implications of their design choices on
+   the performance and efficiency of the Internet.
+
+
+
+
+
+
+
+
+
+
+Karn, et al.             Best Current Practice                  [Page 1]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+Table of Contents
+
+   1.  Introduction and Overview. . . . . . . . . . . . . . . . . . .  2
+   2.  Maximum Transmission Units (MTUs) and IP Fragmentation . . . .  4
+       2.1.  Choosing the MTU in Slow Networks. . . . . . . . . . . .  6
+   3.  Framing on Connection-Oriented Subnetworks . . . . . . . . . .  7
+   4.  Connection-Oriented Subnetworks. . . . . . . . . . . . . . . .  9
+   5.  Broadcasting and Discovery . . . . . . . . . . . . . . . . . . 10
+   6.  Multicasting . . . . . . . . . . . . . . . . . . . . . . . . . 11
+   7.  Bandwidth on Demand (BoD) Subnets. . . . . . . . . . . . . . . 13
+   8.  Reliability and Error Control. . . . . . . . . . . . . . . . . 14
+       8.1.  TCP vs Link-Layer Retransmission . . . . . . . . . . . . 14
+       8.2.  Recovery from Subnetwork Outages . . . . . . . . . . . . 17
+       8.3.  CRCs, Checksums and Error Detection. . . . . . . . . . . 18
+       8.4.  How TCP Works. . . . . . . . . . . . . . . . . . . . . . 20
+       8.5.  TCP Performance Characteristics. . . . . . . . . . . . . 22
+             8.5.1.  The Formulae . . . . . . . . . . . . . . . . . . 22
+             8.5.2.  Assumptions. . . . . . . . . . . . . . . . . . . 23
+             8.5.3.  Analysis of Link-Layer Effects on TCP
+                     Performance. . . . . . . . . . . . . . . . . . . 24
+   9.  Quality-of-Service (QoS) Considerations. . . . . . . . . . . . 26
+   10. Fairness vs Performance. . . . . . . . . . . . . . . . . . . . 29
+   11. Delay Characteristics. . . . . . . . . . . . . . . . . . . . . 30
+   12. Bandwidth Asymmetries. . . . . . . . . . . . . . . . . . . . . 31
+   13. Buffering, Flow and Congestion Control . . . . . . . . . . . . 31
+   14. Compression. . . . . . . . . . . . . . . . . . . . . . . . . . 34
+   15. Packet Reordering. . . . . . . . . . . . . . . . . . . . . . . 36
+   16. Mobility . . . . . . . . . . . . . . . . . . . . . . . . . . . 37
+   17. Routing. . . . . . . . . . . . . . . . . . . . . . . . . . . . 39
+   18. Security Considerations. . . . . . . . . . . . . . . . . . . . 41
+   19. Contributors . . . . . . . . . . . . . . . . . . . . . . . . . 44
+   20. Informative References . . . . . . . . . . . . . . . . . . . . 45
+   21. Contributors' Addresses. . . . . . . . . . . . . . . . . . . . 57
+   22. Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . 58
+   23. Full Copyright Statement . . . . . . . . . . . . . . . . . . . 60
+
+1.  Introduction and Overview
+
+   IP, the Internet Protocol [RFC791] [RFC2460], is the core protocol of
+   the Internet.  IP defines a simple "connectionless" packet-switched
+   network.  The success of the Internet is largely attributed to IP's
+   simplicity, the "end-to-end principle" [SRC81] on which the Internet
+   is based, and the resulting ease of carrying IP on a wide variety of
+   subnetworks, not necessarily designed with IP in mind.  A subnetwork
+   refers to any network operating immediately below the IP layer to
+   connect two or more systems using IP (i.e., end hosts or routers).
+   In its simplest form, this may be a direct connection between the IP
+   systems (e.g., using a length of cable or a wireless medium).
+
+
+
+Karn, et al.             Best Current Practice                  [Page 2]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   This document defines a subnetwork as a layer 2 network, which is a
+   network that does not rely upon the services of IP routers to forward
+   packets between parts of the subnetwork.  However, IP routers may
+   bridge frames at Layer 2 between parts of a subnetwork.  Sometimes,
+   it is convenient to aggregate a group of such subnetworks into a
+   single logical subnetwork.  IP routing protocols (e.g., OSPF, IS-IS,
+   and PIM) can be configured to support this aggregation, but typically
+   present a layer-3 subnetwork rather than a layer-2 subnetwork.  This
+   may also result in a specific packet passing several times over the
+   same layer-2 subnetwork via an intermediate layer-3 gateway (router).
+   Because that aggregation requires layer-3 components, issues thereof
+   are beyond the scope of this document.
+
+   However, while many subnetworks carry IP, they do not necessarily do
+   so with maximum efficiency, minimum complexity, or cost, nor do they
+   implement certain features to efficiently support newer Internet
+   features of increasing importance, such as multicasting or quality of
+   service.
+
+   With the explosive growth of the Internet, IP packets comprise an
+   increasingly large fraction of the traffic carried by the world's
+   telecommunications networks.  It therefore makes sense to optimize
+   both existing and new subnetwork technologies for IP as much as
+   possible.
+
+   Optimizing a subnetwork for IP involves three complementary
+   considerations:
+
+   1.  Providing functionality sufficient to carry IP.
+
+   2.  Eliminating unnecessary functions that increase cost or
+       complexity.
+
+   3.  Choosing subnetwork parameters that maximize the performance of
+       the Internet protocols.
+
+   Because IP is so simple, consideration 2 is more of an issue than
+   consideration 1.  That is to say, subnetwork designers make many more
+   errors of commission than errors of omission.  However, certain
+   enhancements to Internet features, such as multicasting and quality-
+   of-service, benefit significantly from support given by the
+   underlying subnetworks beyond that necessary to carry "traditional"
+   unicast, best-effort IP.
+
+
+
+
+
+
+
+
+Karn, et al.             Best Current Practice                  [Page 3]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   A major consideration in the efficient design of any layered
+   communication network is the appropriate layer(s) in which to
+   implement a given function.  This issue was first addressed in the
+   seminal paper, "End-to-End Arguments in System Design" [SRC81].  That
+   paper argued that many functions can be implemented properly *only*
+   on an end-to-end basis, i.e., at the highest protocol layers, outside
+   the subnetwork.  These functions include ensuring the reliable
+   delivery of data and the use of cryptography to provide
+   confidentiality and message integrity.
+
+   Such functions cannot be provided solely by the concatenation of
+   hop-by-hop services; duplicating these functions at the lower
+   protocol layers (i.e., within the subnetwork) can be needlessly
+   redundant or even harmful to cost and performance.
+
+   However, partial duplication of functionality in a lower layer can
+   *sometimes* be justified by performance, security, or availability
+   considerations.  Examples include link-layer retransmission to
+   improve the performance of an unusually lossy channel, e.g., mobile
+   radio, link-level encryption intended to thwart traffic analysis, and
+   redundant transmission links to improve availability, increase
+   throughput, or to guarantee performance for certain classes of
+   traffic.  Duplication of protocol functions should be done only with
+   an understanding of system-level implications, including possible
+   interactions with higher-layer mechanisms.
+
+   The original architecture of the Internet was influenced by the
+   end-to-end principle [SRC81], and has been, in our view, part of the
+   reason for the Internet's success.
+
+   The remainder of this document discusses the various subnetwork
+   design issues that the authors consider relevant to efficient IP
+   support.
+
+2.  Maximum Transmission Units (MTUs) and IP Fragmentation
+
+   IPv4 packets (datagrams) vary in size, from 20 bytes (the size of the
+   IPv4 header alone) to a maximum of 65535 bytes.  Subnetworks need not
+   support maximum-sized (64KB) IP packets, as IP provides a scheme that
+   breaks packets that are too large for a given subnetwork into
+   fragments that travel as independent IP packets and are reassembled
+   at the destination.  The maximum packet size supported by a
+   subnetwork is known as its Maximum Transmission Unit (MTU).
+
+   Subnetworks may, but are not required to, indicate the length of each
+   packet they carry.  One example is Ethernet with the widely used DIX
+   [DIX82] (not IEEE 802.3 [IEEE8023]) header, which lacks a length
+
+
+
+
+Karn, et al.             Best Current Practice                  [Page 4]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   field to indicate the true data length when the packet is padded to a
+   minimum of 60 bytes.  This is not a problem for uncompressed IP
+   because each IP packet carries its own length field.
+
+   If optional header compression [RFC1144] [RFC2507] [RFC2508]
+   [RFC3095] is used, however, it is required that the link framing
+   indicate frame length because that is needed for the reconstruction
+   of the original header.
+
+   In IP version 4 (the version now in widespread use), fragmentation
+   can occur at either the sending host or in an intermediate router,
+   and fragments can be further fragmented at subsequent routers if
+   necessary.
+
+   In IP version 6 [RFC2460], fragmentation can occur only at the
+   sending host; it cannot occur in a router (called "router
+   fragmentation" in this document).
+
+   Both IPv4 and IPv6 provide a "path MTU discovery" procedure [RFC1191]
+   [RFC1435] [RFC1981] that allows the sending host to avoid
+   fragmentation by discovering the minimum MTU along a given path and
+   reduce its packet sizes accordingly.  This procedure is optional in
+   IPv4 and IPv6.
+
+   Path MTU discovery is widely deployed, but it sometimes encounters
+   problems.  Some routers fail to generate the ICMP messages that
+   convey path MTU information to the sender, and sometimes the ICMP
+   messages are blocked by overly restrictive firewalls.  The result can
+   be a "Path MTU Black Hole" [RFC2923] [RFC1435].
+
+   The Path MTU Discovery procedure, the persistence of path MTU black
+   holes, and the deletion of router fragmentation in IPv6 reflect a
+   consensus of the Internet technical community that router
+   fragmentation is best avoided.  This requires that subnetworks
+   support MTUs that are "reasonably" large.  All IPv4 end hosts are
+   required to accept and reassemble IP packets of size 576 bytes
+   [RFC791], but such a small value would clearly be inefficient.
+   Because IPv6 omits fragmentation by routers, [RFC2460] specifies a
+   larger minimum MTU of 1280 bytes.  Any subnetwork with an internal
+   packet payload smaller than 1280 bytes must implement a mechanism
+   that performs fragmentation/reassembly of IP packets to/from
+   subnetwork frames if it is to support IPv6.
+
+   If a subnetwork cannot directly support a "reasonable" MTU with
+   native framing mechanisms, it should internally fragment.  That is,
+   it should transparently break IP packets into internal data elements
+   and reassemble them at the other end of the subnetwork.
+
+
+
+
+Karn, et al.             Best Current Practice                  [Page 5]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   This leaves the question of what is a "reasonable" MTU.  Ethernet (10
+   and 100 Mb/s) has an MTU of 1500 bytes, and because of the ubiquity
+   of Ethernet few Internet paths currently have MTUs larger than this
+   value.  This severely limits the utility of larger MTUs provided by
+   other subnetworks.  Meanwhile, larger MTUs are increasingly desirable
+   on high-speed subnetworks to reduce the per-packet processing
+   overhead in host computers, and implementers are encouraged to
+   provide them even though they may not be usable when Ethernet is also
+   in the path.
+
+   Various "tunneling" schemes, such as GRE [RFC2784] or IP Security in
+   tunnel mode [RFC2406], treat IP as a subnetwork for IP.  Since
+   tunneling adds header overhead, it can trigger fragmentation, even
+   when the same physical subnetworks (e.g., Ethernet) are used on both
+   sides of the host performing IPsec encapsulation.  Tunneling has made
+   it more difficult to avoid router fragmentation and has increased the
+   incidence of path MTU black holes [RFC2401] [RFC2923].  Larger
+   subnetwork MTUs may help to alleviate this problem.
+
+2.1.  Choosing the MTU in Slow Networks
+
+   In slow networks, the largest possible packet may take a considerable
+   amount of time to send.  This is known as channelisation or
+   serialisation delay.  Total end-to-end interactive response time
+   should not exceed the well-known human factors limit of 100 to 200
+   ms.  This includes all sources of delay: electromagnetic propagation
+   delay, queuing delay, serialisation delay, and the store-and-forward
+   time, i.e., the time to transmit a packet at link speed.
+
+   At low link speeds, store-and-forward delays can dominate total
+   end-to-end delay; these are in turn directly influenced by the
+   maximum transmission unit (MTU) size.  Even when an interactive
+   packet is given a higher queuing priority, it may have to wait for a
+   large bulk transfer packet to finish transmission.  This worst-case
+   wait can be set by an appropriate choice of MTU.
+
+   For example, if the MTU is set to 1500 bytes, then an MTU-sized
+   packet will take about 8 milliseconds to send on a T1 (1.536 Mb/s)
+   link.  But if the link speed is 19.2kb/s, then the transmission time
+   becomes 625 ms -- well above our 100-200ms limit.  A 256-byte MTU
+   would lower this delay to a little over 100 ms.  However, care should
+   be taken not to lower the MTU excessively, as this will increase
+   header overhead and trigger frequent router fragmentation (if Path
+   MTU discovery is not in use).  This is likely to be the case with
+   multicast, where Path MTU discovery is ineffective.
+
+   One way to limit delay for interactive traffic without imposing a
+   small MTU is to give priority to this traffic and to preempt (abort)
+
+
+
+Karn, et al.             Best Current Practice                  [Page 6]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   the transmission of a lower-priority packet when a higher priority
+   packet arrives in the queue.  However, the link resources used to
+   send the aborted packet are lost, and overall throughput will
+   decrease.
+
+   Another way to limit delay is to implement a link-level multiplexing
+   scheme that allows several packets to be in progress simultaneously,
+   with transmission priority given to segments of higher-priority IP
+   packets.  For links using the Point-To-Point Protocol (PPP)
+   [RFC1661], multi-class multilink [RFC2686] [RFC2687] [RFC2689]
+   provides such a facility.
+
+   ATM (asynchronous transfer mode), where SNDUs are fragmented and
+   interleaved across smaller 53-byte ATM cells, is another example of
+   this technique.  However, ATM is generally used on high-speed links
+   where the store-and-forward delays are already minimal, and it
+   introduces significant (~9%) increases in overhead due to the
+   addition of 5-byte cell overhead to each 48-byte ATM cell.
+
+   A third example is the Data-Over-Cable Service Interface
+   Specification (DOCSIS) with typical upstream bandwidths of 2.56 Mb/s
+   or 5.12 Mb/s.  To reduce the impact of a 1500-byte MTU in DOCSIS 1.0
+   [DOCSIS1], a data link layer fragmentation mechanism is specified in
+   DOCSIS 1.1 [DOCSIS2].  To accommodate the installed base, DOCSIS 1.1
+   must be backward compatible with DOCSIS 1.0 cable modems, which
+   generally do not support fragmentation.  Under the co-existence of
+   DOCSIS 1.0 and DOCSIS 1.1, the unfragmented large data packets from
+   DOCSIS 1.0 cable modems may affect the quality of service for voice
+   packets from DOCSIS 1.1 cable modems.  In this case, it has been
+   shown in [DOCSIS3] that the use of bandwidth allocation algorithms
+   can mitigate this effect.
+
+   To summarize, there is a fundamental tradeoff between efficiency and
+   latency in the design of a subnetwork, and the designer should keep
+   this tradeoff in mind.
+
+3.  Framing on Connection-Oriented Subnetworks
+
+   IP requires that subnetworks mark the beginning and end of each
+   variable-length, asynchronous IP packet.  Some examples of links and
+   subnetworks that do not provide this as an intrinsic feature include:
+
+   1.  leased lines carrying a synchronous bit stream;
+
+   2.  ISDN B-channels carrying a synchronous octet stream;
+
+   3.  dialup telephone modems carrying an asynchronous octet stream;
+
+
+
+
+Karn, et al.             Best Current Practice                  [Page 7]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+       and
+
+   4.  Asynchronous Transfer Mode (ATM) networks carrying an
+       asynchronous stream of fixed-sized "cells".
+
+   The Internet community has defined packet framing methods for all
+   these subnetworks.  The Point-To-Point Protocol (PPP) [RFC1661],
+   which uses a variant of HDLC, is applicable to bit synchronous,
+   octet-synchronous, and octet asynchronous links (i.e., examples 1-3
+   above).  PPP is one preferred framing method for IP, since a large
+   number of systems interoperate with PPP.  ATM has its own framing
+   methods, described in [RFC2684] [RFC2364].
+
+   At high speeds, a subnetwork should provide a framed interface
+   capable of carrying asynchronous, variable-length IP datagrams.  The
+   maximum packet size supported by this interface is discussed above in
+   the MTU/Fragmentation section.  The subnetwork may implement this
+   facility in any convenient manner.
+
+   IP packet boundaries need not coincide with any framing or
+   synchronization mechanisms internal to the subnetwork.  When the
+   subnetwork implements variable sized data units, the most
+   straightforward approach is to place exactly one IP packet into each
+   subnetwork data unit (SNDU), and to rely on the subnetwork's existing
+   ability to delimit SNDUs to also delimit IP packets.  A good example
+   is Ethernet.  However, some subnetworks have SNDUs of one or more
+   fixed sizes, as dictated by switching, forward error correction
+   and/or interleaving considerations.  Examples of such subnetworks
+   include ATM, with a single cell payload size of 48 octets plus a 5-
+   octet header, and IS-95 digital cellular, with two "rate sets" of
+   four fixed frame sizes each that may be selected on 20 millisecond
+   boundaries.
+
+   Because IP packets are of variable length, they may not necessarily
+   fit into an integer multiple of fixed-sized SNDUs.  An "adaptation
+   layer" is needed to convert IP packets into SNDUs while marking the
+   boundary between each IP packet in some manner.
+
+   There are several approaches to this problem.  The first is to encode
+   each IP packet into one or more SNDUs with no SNDU containing pieces
+   of more than one IP packet, and to pad out the last SNDU of the
+   packet as needed.  Bits in a control header added to each SNDU
+   indicate where the data segment belongs in the IP packet.  If the
+   subnetwork provides in-order, at-most-once delivery, the header can
+   be as simple as a pair of bits indicating whether the SNDU is the
+   first and/or the last in the IP packet.  Alternatively, for
+   subnetworks that do not reorder the fragments of an SNDU, only the
+   last SNDU of the packet could be marked, as this would implicitly
+
+
+
+Karn, et al.             Best Current Practice                  [Page 8]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   indicate the next SNDU as the first in a new IP packet.  The AAL5
+   (ATM Adaptation Layer 5) scheme used with ATM is an example of this
+   approach, though it adds other features, including a payload length
+   field and a payload CRC.
+
+   In AAL5, the ATM User-User Indication, which is encoded in the
+   Payload Type field of an ATM cell, indicates the last cell of a
+   packet.  The packet trailer is located at the end of the SNDU and
+   contains the packet length and a CRC.
+
+   Another framing technique is to insert per-segment overhead to
+   indicate the presence of a segment option.  When present, the option
+   carries a pointer to the end of the packet.  This differs from AAL5
+   in that it permits another packet to follow within the same segment.
+   MPEG-2 Transport Streams [EN301192] [ISO13818] support this style of
+   fragmentation, and may either use padding (limiting each MPEG
+   transport stream packet to carry only part of one IP packet), or
+   allow a second IP packet to start in the same Transport Stream packet
+   (no padding).
+
+   A third approach is to insert a special flag sequence into the data
+   stream between each IP packet, and to pack the resulting data stream
+   into SNDUs without regard to SNDU boundaries.  This may have
+   implications when frames are lost.  The flag sequence can also pad
+   unused space at the end of an SNDU.  If the special flag appears in
+   the user data, it is escaped to an alternate sequence (usually larger
+   than a flag) to avoid being misinterpreted as a flag.  The HDLC-based
+   framing schemes used in PPP are all examples of this approach.
+
+   All three adaptation schemes introduce overhead; how much depends on
+   the distribution of IP packet sizes, the size(s) of the SNDUs, and in
+   the HDLC-like approaches, the content of the IP packet (since flag-
+   like sequences occurring in the packet must be escaped, which expands
+   them).  The designer must also weigh implementation complexity and
+   performance in the choice and design of an adaptation layer.
+
+4.  Connection-Oriented Subnetworks
+
+   IP has no notion of a "connection"; it is a purely connectionless
+   protocol.  When a connection is required by an application, it is
+   usually provided by TCP [RFC793], the Transmission Control Protocol,
+   running atop IP on an end-to-end basis.
+
+   Connection-oriented subnetworks can be (and are widely) used to carry
+   IP, but often with considerable complexity.  Subnetworks consisting
+   of few nodes can simply open a permanent connection between each pair
+   of nodes.  This is frequently done with ATM.  However, the number of
+   connections increases as the square of the number of nodes, so this
+
+
+
+Karn, et al.             Best Current Practice                  [Page 9]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   is clearly impractical for large subnetworks.  A "shim" layer between
+   IP and the subnetwork is therefore required to manage connections.
+   This is one of the most common functions of a Subnetwork Dependent
+   Convergence Function (SNDCF) sublayer between IP and a subnetwork.
+
+   SNDCFs typically open subnetwork connections as needed when an IP
+   packet is queued for transmission and close them after an idle
+   timeout.  There is no relation between subnetwork connections and any
+   connections that may exist at higher layers (e.g., TCP).
+
+   Because Internet traffic is typically bursty and transaction-
+   oriented, it is often difficult to pick an optimal idle timeout.  If
+   the timeout is too short, subnetwork connections are opened and
+   closed rapidly, possibly over-stressing the subnetwork connection
+   management system (especially if it was designed for voice traffic
+   call holding times).  If the timeout is too long, subnetwork
+   connections are idle much of the time, wasting any resources
+   dedicated to them by the subnetwork.
+
+   Purely connectionless subnets (such as Ethernet), which have no state
+   and dynamically share resources, are optimal for supporting best-
+   effort IP, which is stateless and dynamically shares resources.
+   Connection-oriented packet networks (such as ATM and Frame Relay),
+   which have state and dynamically share resources, are less optimal,
+   since best-effort IP does not benefit from the overhead of creating
+   and maintaining state.  Connection-oriented circuit-switched networks
+   (including the PSTN and ISDN) have state and statically allocate
+   resources for a call, and thus require state creation and maintenance
+   overhead, but do not benefit from the efficiencies of statistical
+   multiplexing sharing of capacity inherent in IP.
+
+   In any event, if an SNDCF that opens and closes subnet connections is
+   used to support IP, care should be taken to make sure that connection
+   processing in the subnet can keep up with relatively short holding
+   times.
+
+5.  Broadcasting and Discovery
+
+   Subnetworks fall into two categories: point-to-point and shared.  A
+   point-to-point subnet has exactly two endpoint components (hosts or
+   routers); a shared link has more than two endpoint components, using
+   either an inherently broadcast medium (e.g., Ethernet, radio) or a
+   switching layer hidden from the network layer (e.g., switched
+   Ethernet, Myrinet [MYR95], ATM).  Switched subnetworks handle
+   broadcast by copying broadcast packets, providing each interface that
+   supports one, or more, systems (hosts or routers) with a copy of each
+   packet.
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 10]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   Several Internet protocols for IPv4 make use of broadcast
+   capabilities, including link-layer address lookup (ARP), auto-
+   configuration (RARP, BOOTP, DHCP), and routing (RIP).
+
+   A lack of broadcast capability can impede the performance of these
+   protocols, or render them inoperable (e.g., DHCP).  ARP-like link
+   address lookup can be provided by a centralized database, but at the
+   expense of potentially higher response latency and the need for nodes
+   to have explicit knowledge of the ARP server address.  Shared links
+   should support native, link-layer subnet broadcast.
+
+   A corresponding set of IPv6 protocols uses multicasting (see next
+   section) instead of broadcasting to provide similar functions with
+   improved scaling in large networks.
+
+6.  Multicasting
+
+   The Internet model includes "multicasting", where IP packets are sent
+   to all the members of a multicast group [RFC1112] [RFC3376]
+   [RFC2710].  Multicast is an option in IPv4, but a standard feature of
+   IPv6.  IPv4 multicast is currently used by multimedia,
+   teleconferencing, gaming, and file distribution (web, peer-to-peer
+   sharing) applications, as well as by some key network and host
+   protocols (e.g., RIPv2, OSPF, NTP).  IPv6 additionally relies on
+   multicast for network configuration (DHCP-like autoconfiguration) and
+   link-layer address discovery [RFC2461] (replacing ARP).  In the case
+   of IPv6, this can allow autoconfiguration and address discovery to
+   span across routers, whereas the IPv4 broadcast-based services cannot
+   without ad-hoc router support [RFC1812].
+
+   Multicast-enabled IP routers organize each multicast group into a
+   spanning tree, and route multicast packets by making copies of each
+   multicast packet and forwarding the copies to each output interface
+   that includes at least one downstream member of the multicast group.
+
+   Multicasting is considerably more efficient when a subnetwork
+   explicitly supports it.  For example, a router relaying a multicast
+   packet onto an Ethernet segment need send only one copy of the
+   packet, no matter how many members of the multicast group are
+   connected to the segment.  Without native multicast support, routers
+   and switches on shared links would need to use broadcast with
+   software filters, such that every multicast packet sent incurs
+   software overhead for every node on the subnetwork, even if a node is
+   not a member of the multicast group.  Alternately, the router would
+   transmit a separate copy to every member of the multicast group on
+   the segment, as is done on multicast-incapable switched subnets.
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 11]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   Subnetworks using shared channels (e.g., radio LANs, Ethernets) are
+   especially suitable for native multicasting, and their designers
+   should make every effort to support it.  This involves designating a
+   section of the subnetwork's own address space for multicasting.  On
+   these networks, multicast is basically broadcast on the medium, with
+   Layer-2 receiver filters.
+
+   Subnet interfaces also need to be designed to accept packets
+   addressed to some number of multicast addresses, in addition to the
+   unicast packets specifically addressed to them.  The number of
+   multicast addresses that needs to be supported by a host depends on
+   the requirements of the associated host; at least several dozen will
+   meet most current needs.
+
+   On low-speed networks, the multicast address recognition function may
+   be readily implemented in host software, but on high-speed networks,
+   it should be implemented in subnetwork hardware.  This hardware need
+   not be complete; for example, many Ethernet interfaces implement a
+   "hashing" function where the IP layer receives all of the multicast
+   (and unicast) traffic to which the associated host subscribes, plus
+   some small fraction of multicast traffic to which the host does not
+   subscribe.  Host/router software then has to discard the unwanted
+   packets that pass the Layer-2 multicast address filter [RFC1112].
+
+   There does not need to be a one-to-one mapping between a Layer-2
+   multicast address and an IP multicast address.  An address overlap
+   may significantly degrade the filtering capability of a receiver's
+   hardware multicast address filter.  A subnetwork supporting only
+   broadcast should use this service for multicast and must rely on
+   software filtering.
+
+   Switched subnetworks must also provide a mechanism for copying
+   multicast packets to ensure the packets reach at least all members of
+   a multicast group.  One option is to "flood" multicast packets in the
+   same manner as broadcast.  This can lead to unnecessary transmissions
+   on some subnetwork links (notably non-multicast-aware Ethernet
+   switches).  Some subnetworks therefore allow multicast filter tables
+   to control which links receive packets belonging to a specific group.
+   To configure this automatically requires access to Layer-3 group
+   membership information (e.g., IGMP [RFC3376], or MLD [RFC2710]).
+   Various implementation options currently exist to provide a subnet
+   node with a list of mappings of multicast addresses to
+   ports/interfaces.  These employ a range of approaches, including
+   signaling from end hosts (e.g., IEEE 802 GARP/GMRP [802.1p]),
+   signaling from switches (e.g., CGMP [CGMP] and RGMP [RFC3488]),
+   interception and proxy of IP group membership packets (e.g., IGMP/MLD
+   Proxy [MAGMA-PROXY]), and enabling Layer-2 devices to
+   snoop/inspect/peek into forwarded Layer-3 protocol headers (e.g.,
+
+
+
+Karn, et al.             Best Current Practice                 [Page 12]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   IGMP, MLD, PIM) so that they may infer Layer-3 multicast group
+   membership [MAGMA-SNOOP].  These approaches differ in their
+   complexity, flexibility, and ability to support new protocols.
+
+7.  Bandwidth on Demand (BoD) Subnets
+
+   Some subnets allow a number of subnet nodes to share a channel
+   efficiently by assigning transmission opportunities dynamically.
+   Transmission opportunities are requested by a subnet node when it has
+   packets to send.  The subnet schedules and grants transmission
+   opportunities sufficient to allow the transmitting subnet node to
+   send one or more packets (or packet fragments).  We call these
+   subnets Bandwidth on Demand (BoD) subnets.  Examples of BoD subnets
+   include Demand Assignment Multiple Access (DAMA) satellite and
+   terrestrial wireless networks, IEEE 802.11 point coordination
+   function (PCF) mode, and DOCSIS.  A connection-oriented network (such
+   as the PSTN, ATM or Frame Relay) reserves resources on a much longer
+   timescale, and is therefore not a BoD subnet in our taxonomy.
+
+   The design parameters for BoD are similar to those in connection-
+   oriented subnetworks, although the implementations may vary
+   significantly.  In BoD, the user typically requests access to the
+   shared channel for some duration.  Access may be allocated for a
+   period of time at a specific rate, for a certain number of packets,
+   or until the user releases the channel.  Access may be coordinated
+   through a central management entity or with a distributed algorithm
+   amongst the users.  Examples of the resource that may be shared
+   include a terrestrial wireless hop, an upstream channel in a cable
+   television system, a satellite uplink, and an end-to-end satellite
+   channel.
+
+   Long-delay BoD subnets pose problems similar to connection-oriented
+   subnets in anticipating traffic.  While connection-oriented subnets
+   hold idle channels open expecting new data to arrive, BoD subnets
+   request channel access based on buffer occupancy (or expected buffer
+   occupancy) on the sending port.  Poor performance will likely result
+   if the sender does not anticipate additional traffic arriving at that
+   port during the time it takes to grant a transmission request.  It is
+   recommended that the algorithm have the capability to extend a hold
+   on the channel for data that has arrived after the original request
+   was generated (this may be done by piggybacking new requests on user
+   data).
+
+   There is a wide variety of BoD protocols available.  However, there
+   has been relatively little comprehensive research on the interactions
+   between BoD mechanisms and Internet protocol performance.  Research
+   on some specific mechanisms is available (e.g., [AR02]).  One item
+   that has been studied is TCP's retransmission timer [KY02].  BoD
+
+
+
+Karn, et al.             Best Current Practice                 [Page 13]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   systems can cause spurious timeouts when adjusting from a relatively
+   high data rate, to a relatively low data rate.  In this case, TCP's
+   transmitted data takes longer to get through the network than
+   predicted by the TCP sender's computed retransmission timeout.
+   Therefore, the TCP sender is prone to resending a segment
+   prematurely.
+
+8.  Reliability and Error Control
+
+   In the Internet architecture, the ultimate responsibility for error
+   recovery is at the end points [SRC81].  The Internet may occasionally
+   drop, corrupt, duplicate, or reorder packets, and the transport
+   protocol (e.g., TCP) or application (e.g., if UDP is used as the
+   transport protocol) must recover from these errors on an end-to-end
+   basis [RFC3155].  Error recovery in the subnetwork is therefore
+   justifiable only to the extent that it can enhance overall
+   performance.  It is important to recognize that a subnetwork can go
+   too far in attempting to provide error recovery services in the
+   Internet environment.  Subnet reliability should be "lightweight",
+   i.e., it only has to be "good enough", *not* perfect.
+
+   In this section, we discuss how to analyze characteristics of a
+   subnetwork to determine what is "good enough".  The discussion below
+   focuses on TCP, which is the most widely-used transport protocol in
+   the Internet.  It is widely believed (and is a stated goal within the
+   IETF) that non-TCP transport protocols should attempt to be "TCP-
+   friendly" and have many of the same performance characteristics.
+   Thus, the discussion below should be applicable, even to portions of
+   the Internet where TCP may not be the predominant protocol.
+
+8.1.  TCP vs Link-Layer Retransmission
+
+   Error recovery involves the generation and transmission of redundant
+   information computed from user data.  Depending on how much redundant
+   information is sent and how it is generated, the receiver can use it
+   to reliably detect transmission errors, correct up to some maximum
+   number of transmission errors, or both.  The general approach is
+   known as Error Control Coding, or ECC.
+
+   The use of ECC to detect transmission errors so that retransmissions
+   (hopefully without errors) can be requested is widely known as "ARQ"
+   (Automatic Repeat Request).
+
+   When enough ECC information is available to permit the receiver to
+   correct some transmission errors without a retransmission, the
+   approach is known as Forward Error Correction (FEC).  Due to the
+   greater complexity of the required ECC and the need to tailor its
+   design to the characteristics of a specific modem and channel, FEC
+
+
+
+Karn, et al.             Best Current Practice                 [Page 14]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   has traditionally been implemented in special-purpose hardware
+   integral to a modem.  This effectively makes it part of the physical
+   layer.
+
+   Unlike ARQ, FEC was rarely used for telecommunications outside of
+   space links prior to the 1990s.  It is now nearly universal in
+   telephone, cable and DSL modems, digital satellite links, and digital
+   mobile telephones.  FEC is also heavily used in optical and magnetic
+   storage where "retransmissions" are not possible.
+
+   Some systems use hybrid combinations of ARQ layered atop FEC; V.90
+   dialup modems (in the upstream direction) with V.42 error control are
+   one example.  Most errors are corrected by the trellis (FEC) code
+   within the V.90 modem, and most remaining errors are detected and
+   corrected by the ARQ mechanisms in V.42.
+
+   Work is now underway to apply FEC above the physical layer, primarily
+   in connection with reliable multicasting [RFC3048] [RFC3450-RFC3453]
+   where conventional ARQ mechanisms are inefficient or difficult to
+   implement.  However, in this discussion, we will assume that if FEC
+   is present, it is implemented within the physical layer.
+
+   Depending on the layer in which it is implemented, error control can
+   operate on an end-to-end basis or over a shorter span, such as a
+   single link.  TCP is the most important example of an end-to-end
+   protocol that uses an ARQ strategy.
+
+   Many link-layer protocols use ARQ, usually some flavor of HDLC
+   [ISO3309].  Examples include the X.25 link layer, the AX.25 protocol
+   used in amateur packet radio, 802.11 wireless LANs, and the reliable
+   link layer specified in IEEE 802.2.
+
+   Only end-to-end error recovery can ensure reliable service to the
+   application (see Section 8).  However, some subnetworks (e.g., many
+   wireless links) also have link-layer error recovery as a performance
+   enhancement [RFC3366].  For example, many cellular links have small
+   physical frame sizes (< 100 bytes) and relatively high frame loss
+   rates.  Relying solely on end-to-end error recovery can clearly yield
+   a performance degradation, as retransmissions across the end-to-end
+   path take much longer to be received than when link layer
+   retransmissions are used.  Thus, link-layer error recovery can often
+   increase end-to-end performance.  As a result, link-layer and end-
+   to-end recovery often co-exist; this can lead to the possibility of
+   inefficient interactions between the two layers of ARQ protocols.
+
+   This inter-layer "competition" might lead to the following wasteful
+   situation.  When the link layer retransmits (parts of) a packet, the
+   link latency momentarily increases.  Since TCP bases its
+
+
+
+Karn, et al.             Best Current Practice                 [Page 15]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   retransmission timeout on prior measurements of total end-to-end
+   latency, including that of the link in question, this sudden increase
+   in latency may trigger an unnecessary retransmission by TCP of a
+   packet that the link layer is still retransmitting.  Such spurious
+   end-to-end retransmissions generate unnecessary load and reduce end-
+   to-end throughput.  As a result, the link layer may even have
+   multiple copies of the same packet in the same link queue at the same
+   time.  In general, one could say the competing error recovery is
+   caused by an inner control loop (link-layer error recovery) reacting
+   to the same signal as an outer control loop (end-to-end error
+   recovery) without any coordination between the loops.  Note that this
+   is solely an efficiency issue; TCP continues to provide reliable
+   end-to-end delivery over such links.
+
+   This raises the question of how persistent a link-layer sender should
+   be in performing retransmission [RFC3366].  We define the link-layer
+   (LL) ARQ persistency as the maximum time that a particular link will
+   spend trying to transfer a packet before it can be discarded.  This
+   deliberately simplified definition says nothing about the maximum
+   number of retransmissions, retransmission strategies, queue sizes,
+   queuing disciplines, transmission delays, or the like.  The reason we
+   use the term LL ARQ persistency, instead of a term such as "maximum
+   link-layer packet holding time," is that the definition closely
+   relates to link-layer error recovery.  For example, on links that
+   implement straightforward error recovery strategies, LL ARQ
+   persistency will often correspond to a maximum number of
+   retransmissions permitted per link-layer frame.
+
+   For link layers that do not or cannot differentiate between flows
+   (e.g., due to network layer encryption), the LL ARQ persistency
+   should be small.  This avoids any harmful effects or performance
+   degradation resulting from indiscriminate high persistence.  A
+   detailed discussion of these issues is provided in [RFC3366].
+
+   However, when a link layer can identify individual flows and apply
+   ARQ selectively [LKJK02], then the link ARQ persistency should be
+   high for a flow using reliable unicast transport protocols (e.g.,
+   TCP) and must be low for all other flows.  Setting the link ARQ
+   persistency larger than the largest link outage allows TCP to rapidly
+   restore transmission without needing to wait for a retransmission
+   time out.  This generally improves TCP performance in the face of
+   transient outages.  However, excessively high persistence may be
+   disadvantageous; a practical upper limit of 30-60 seconds may be
+   desirable.  Implementation of such schemes remains a research issue.
+   (See also the following section "Recovery from Subnetwork Outages").
+
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 16]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   Many subnetwork designers have opportunities to reduce the
+   probability of packet loss, e.g., with FEC, ARQ, and interleaving, at
+   the cost of increased delay.  TCP performance improves with
+   decreasing loss but worsens with increasing end-to-end delay, so it
+   is important to find the proper balance through analysis and
+   simulation.
+
+8.2.  Recovery from Subnetwork Outages
+
+   Some types of subnetworks, particularly mobile radio, are subject to
+   frequent temporary outages.  For example, an active cellular data
+   user may drive or walk into an area (such as a tunnel) that is out of
+   range of any base station.  No packets will be delivered successfully
+   until the user returns to an area with coverage.
+
+   The Internet protocols currently provide no standard way for a
+   subnetwork to explicitly notify an upper layer protocol (e.g., TCP)
+   that it is experiencing an outage rather than severe congestion.
+
+   Under these circumstances TCP will, after each unsuccessful
+   retransmission, wait even longer before trying again; this is its
+   "exponential back-off" algorithm.  Furthermore, TCP will not discover
+   that the subnetwork outage has ended until its next retransmission
+   attempt.  If TCP has backed off, this may take some time.  This can
+   lead to extremely poor TCP performance over such subnetworks.
+
+   It is therefore highly desirable that a subnetwork subject to outages
+   does not silently discard packets during an outage.  Ideally, the
+   subnetwork should define an interface to the next higher layer (i.e.,
+   IP) that allows it to refuse packets during an outage, and to
+   automatically ask IP for new packets when it is again able to deliver
+   them.  If it cannot do this, then the subnetwork should hold onto at
+   least some of the packets it accepts during an outage and attempt to
+   deliver them when the outage ends.  When packets are discarded, IP
+   should be notified so that the appropriate ICMP messages can be sent.
+
+   Note that it is *not* necessary to completely avoid dropping packets
+   during an outage.  The purpose of holding onto a packet during an
+   outage, either in the subnetwork or at the IP layer, is so that its
+   eventual delivery will implicitly notify TCP that the subnetwork is
+   again operational.  This is to enhance performance, not to ensure
+   reliability -- reliability, as discussed earlier, can only be ensured
+   on an end-to-end basis.
+
+   Only a few packets per TCP connection, including ACKs, need be held
+   in this way to cause the TCP sender to recover from the additional
+   losses once the flow resumes [RFC3366].
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 17]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   Because it would be a layering violation (and possibly a performance
+   hit) for IP or a subnetwork layer to look at TCP headers (which would
+   in any event be impossible if IPsec encryption [RFC2401] is in use),
+   it would be reasonable for the IP or subnetwork layers to choose, as
+   a design parameter, some small number of packets that will be
+   retained during an outage.
+
+8.3.  CRCs, Checksums and Error Detection
+
+   The TCP [RFC793], UDP [RFC768], ICMP, and IPv4 [RFC791] protocols all
+   use the same simple 16-bit 1's complement checksum algorithm
+   [RFC1071] to detect corrupted packets.  The IPv4 header checksum
+   protects only the IPv4 header, while the TCP, ICMP, and UDP checksums
+   provide end-to-end error detection for both the transport pseudo
+   header (including network and transport layer information) and the
+   transport payload data.  Protection of the data is optional for
+   applications using UDP [RFC768] for IPv4, but is required for IPv6.
+
+   The Internet checksum is not very strong from a coding theory
+   standpoint, but it is easy to compute in software, and various
+   proposals to replace the Internet checksums with stronger checksums
+   have failed.  However, it is known that undetected errors can and do
+   occur in packets received by end hosts [SP2000].
+
+   To reduce processing costs, IPv6 has no IP header checksum.  The
+   destination host detects "important" errors in the IP header, such as
+   the delivery of the packet to the wrong destination.  This is done by
+   including the IP source and destination addresses (pseudo header) in
+   the computation of the checksum in the TCP or UDP header, a practice
+   already performed in IPv4.  Errors in other IPv6 header fields may go
+   undetected within the network; this was considered a reasonable price
+   to pay for a considerable reduction in the processing required by
+   each router, and it was assumed that subnetworks would use a strong
+   link CRC.
+
+   One way to provide additional protection for an IPv4 or IPv6 header
+   is by the authentication and packet integrity services of the IP
+   Security (IPsec) protocol [RFC2401].  However, this may not be a
+   choice available to the subnetwork designer.
+
+   Most subnetworks implement error detection just above the physical
+   layer.  Packets corrupted in transmission are detected and discarded
+   before delivery to the IP layer.  A 16-bit cyclic redundancy check
+   (CRC) is usually the minimum for error detection.  This is
+   significantly more robust against most patterns of errors than the
+   16-bit Internet checksum.  Note that the error detection properties
+   of a specific CRC code diminish with increasing frame size.  The
+   Point-to-Point Protocol [RFC1662] requires support of a 16-bit CRC
+
+
+
+Karn, et al.             Best Current Practice                 [Page 18]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   for each link frame, with a 32-bit CRC as an option.  (PPP is often
+   used in conjunction with a dialup modem, which provides its own error
+   control).  Other subnetworks, including 802.3/Ethernet, AAL5/ATM,
+   FDDI, Token Ring, and PPP over SONET/SDH all use a 32-bit CRC.  Many
+   subnetworks can also use other mechanisms to enhance the error
+   detection capability of the link CRC (e.g., FEC in dialup modems,
+   mobile radio and satellite channels).
+
+   Any new subnetwork designed to carry IP should therefore provide
+   error detection for each IP packet that is at least as strong as the
+   32-bit CRC specified in [ISO3309].  While this will achieve a very
+   low undetected packet error rate due to transmission errors, it will
+   not (and need not) achieve a very low packet loss rate as the
+   Internet protocols are better suited to dealing with lost packets
+   than to dealing with corrupted packets [SRC81].
+
+   Packet corruption may be, and is, also caused by bugs in host and
+   router hardware and software.  Even if every subnetwork implemented
+   strong error detection, it is still essential that end-to-end
+   checksums are used at the receiving end host [SP2000].
+
+   Designers of complex subnetworks consisting of internal links and
+   packet switches should consider implementing error detection on an
+   edge-to-edge basis to cover an entire SNDU (or IP packet).  A CRC
+   would be generated at the entry point to the subnetwork and checked
+   at the exit endpoint.  This may be used instead of, or in combination
+   with, error detection at the interface to each physical link.  An
+   edge-to-edge check has the significant advantage of protecting
+   against errors introduced anywhere within the subnetwork, not just
+   within its transmission links.  Examples of this approach include the
+   way in which the Ethernet CRC-32 is handled by LAN bridges [802.1D].
+   ATM AAL5 [ITU-I363] also uses an edge-to-edge CRC-32.
+
+   Some specific applications may be tolerant of residual errors in the
+   data they exchange, but removal of the link CRC may expose the
+   network to an undesirable increase in undetected errors in the IP and
+   transport headers.  Applications may also require a high level of
+   error protection for control information exchanged by protocols
+   acting above the transport layer.  One example is a voice codec,
+   which is robust against bit errors in the speech samples.  For such
+   mechanisms to work, the receiving application must be able to
+   tolerate receiving corrupted data.  This also requires that an
+   application uses a mechanism to signal that payload corruption is
+   permitted and to indicate the coverage (headers and data) required to
+   be protected by the subnetwork CRC.  The UDP-Lite protocol [RFC3828]
+   is the first Internet standards track transport protocol supporting
+   partial payload protection.  Receipt of corrupt data by arbitrary
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 19]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   application protocols carries a serious danger that a subnet delivers
+   data with errors that remain undetected by the application and hence
+   corrupt the communicated data [SRC81].
+
+8.4.  How TCP Works
+
+   One of TCP's functions is end-host based congestion control for the
+   Internet.  This is a critical part of the overall stability of the
+   Internet, so it is important that link-layer designers understand
+   TCP's congestion control algorithms.
+
+   TCP assumes that, at the most abstract level, the network consists of
+   links and queues.  Queues provide output-buffering on links that are
+   momentarily oversubscribed.  They smooth instantaneous traffic bursts
+   to fit the link bandwidth.  When demand exceeds link capacity long
+   enough to fill the queue, packets must be dropped.  The traditional
+   action of dropping the most recent packet ("tail dropping") is no
+   longer recommended [RFC2309] [RFC2914], but it is still widely
+   practiced.
+
+   TCP uses sequence numbering and acknowledgments (ACKs) on an
+   end-to-end basis to provide reliable, sequenced delivery.  TCP ACKs
+   are cumulative, i.e., each implicitly ACKs every segment received so
+   far.  If a packet with an unexpected sequence number is received, the
+   ACK field in the packets returned by the receiver will cease to
+   advance.  Using an optional enhancement, TCP can send selective
+   acknowledgments (SACKs) [RFC2018] to indicate which segments have
+   arrived at the receiver.
+
+   Since the most common cause of packet loss is congestion, TCP treats
+   packet loss as an indication of potential Internet congestion along
+   the path between TCP end hosts.  This happens automatically, and the
+   subnetwork need not know anything about IP or TCP.  A subnetwork node
+   simply drops packets whenever it must, though some packet-dropping
+   strategies (e.g., RED) are more fair to competing flows than others.
+
+   TCP recovers from packet losses in two different ways.  The most
+   important mechanism is the retransmission timeout.  If an ACK fails
+   to arrive after a certain period of time, TCP retransmits the oldest
+   unacked packet.  Taking this as a hint that the network is congested,
+   TCP waits for the retransmission to be ACKed before it continues, and
+   it gradually increases the number of packets in flight as long as a
+   timeout does not occur again.
+
+   A retransmission timeout can impose a significant performance
+   penalty, as the sender is idle during the timeout interval and
+   restarts with a congestion window of one TCP segment following the
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 20]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   timeout.  To allow faster recovery from the occasional lost packet in
+   a bulk transfer, an alternate scheme, known as "fast recovery", was
+   introduced [RFC2581] [RFC2582] [RFC2914] [TCPF98].
+
+   Fast recovery relies on the fact that when a single packet is lost in
+   a bulk transfer, the receiver continues to return ACKs to subsequent
+   data packets that do not actually acknowledge any newly-received
+   data.  These are known as "duplicate acknowledgments" or "dupacks".
+   The sending TCP can use dupacks as a hint that a packet has been lost
+   and retransmit it without waiting for a timeout.  Dupacks effectively
+   constitute a negative acknowledgment (NAK) for the packet sequence
+   number in the acknowledgment field.  TCP waits until a certain number
+   of dupacks (currently 3) are seen prior to assuming a loss has
+   occurred; this helps avoid an unnecessary retransmission during
+   out-of-sequence delivery.
+
+   A technique called "Explicit Congestion Notification" (ECN) [RFC3168]
+   allows routers to directly signal congestion to hosts without
+   dropping packets.  This is done by setting a bit in the IP header.
+   Since ECN support is likely to remain optional, the lack of an ECN
+   bit must *never* be interpreted as a lack of congestion.  Thus, for
+   the foreseeable future, TCP must interpret a lost packet as a signal
+   of congestion.
+
+   The TCP "congestion avoidance" [RFC2581] algorithm maintains a
+   congestion window (cwnd) controlling the amount of data TCP may have
+   in flight at any moment.  Reducing cwnd reduces the overall bandwidth
+   obtained by the connection; similarly, raising cwnd increases
+   performance, up to the limit of the available capacity.
+
+   TCP probes for available network capacity by initially setting cwnd
+   to one or two packets and then increasing cwnd by one packet for each
+   ACK returned from the receiver.  This is TCP's "slow start"
+   mechanism.  When a packet loss is detected (or congestion is signaled
+   by other mechanisms), cwnd is reset to one and the slow start process
+   is repeated until cwnd reaches one half of its previous setting
+   before the reset.  Cwnd continues to increase past this point, but at
+   a much slower rate than before.  If no further losses occur, cwnd
+   will ultimately reach the window size advertised by the receiver.
+
+   This is an "Additive Increase, Multiplicative Decrease" (AIMD)
+   algorithm.  The steep decrease of cwnd in response to congestion
+   provides for network stability; the AIMD algorithm also provides for
+   fairness between long running TCP connections sharing the same path.
+
+
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 21]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+8.5.  TCP Performance Characteristics
+
+   Caveat
+
+   Here we present a current "state-of-the-art" understanding of TCP
+   performance.  This analysis attempts to characterize the performance
+   of TCP connections over links of varying characteristics.
+
+   Link designers may wish to use the techniques in this section to
+   predict what performance TCP/IP may achieve over a new link-layer
+   design.  Such analysis is encouraged.  Because this is a relatively
+   new analysis, and the theory is based on single-stream TCP
+   connections under "ideal" conditions, it should be recognized that
+   the results of such analysis may differ from actual performance in
+   the Internet.  That being said, we have done our best to provide the
+   designers with helpful information to get an accurate picture of the
+   capabilities and limitations of TCP under various conditions.
+
+8.5.1.  The Formulae
+
+   The performance of TCP's AIMD Congestion Avoidance algorithm has been
+   extensively analyzed.  The current best formula for the performance
+   of the specific algorithms used by Reno TCP (i.e., the TCP specified
+   in [RFC2581]) is given by Padhye, et al. [PFTK98].  This formula is:
+
+                                         MSS
+           BW = --------------------------------------------------------
+                RTT*sqrt(1.33*p) + RTO*p*[1+32*p^2]*min[1,3*sqrt(.75*p)]
+
+   where
+
+           BW   is the maximum TCP throughout achievable by an
+                individual TCP flow
+           MSS  is the TCP segment size being used by the connection
+           RTT  is the end-to-end round trip time of the TCP connection
+           RTO  is the packet timeout (based on RTT)
+           p    is the packet loss rate for the path
+                (i.e., .01 if there is 1% packet loss)
+
+   Note that the speed of the links making up the Internet path does not
+   explicitly appear in this formula.  Attempting to send faster than
+   the slowest link in the path causes the queue to grow at the
+   transmitter driving the bottleneck.  This increases the RTT, which in
+   turn reduces the achievable throughput.
+
+   This is currently considered to be the best approximate formula for
+   Reno TCP performance.  A further simplification of this formula is
+   generally made by assuming that RTO is approximately 5*RTT.
+
+
+
+Karn, et al.             Best Current Practice                 [Page 22]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   TCP is constantly being improved.  A simpler formula, which gives an
+   upper bound on the performance of any AIMD algorithm which is likely
+   to be implemented in TCP in the future, was derived by Ott, et al.
+   [MSMO97].
+
+                     MSS   1
+           BW = C    --- -------
+                     RTT sqrt(p)
+
+   where C is 0.93.
+
+8.5.2.  Assumptions
+
+   Both formulae assume that the TCP Receiver Window is not limiting the
+   performance of the connection.  Because the receiver window is
+   entirely determined by end-hosts, we assume that hosts will maximize
+   the announced receiver window to maximize their network performance.
+
+   Both of these formulae allow BW to become infinite if there is no
+   loss.  However, an Internet path will drop packets at bottlenecked
+   queues if the load is too high.  Thus, a completely lossless TCP/IP
+   network can never occur (unless the network is being underutilized).
+
+   The RTT used is the arithmetic average, including queuing delays.
+
+   The formulae are for a single TCP connection.  If a path carries many
+   TCP connections, each will follow the formulae above independently.
+
+   The formulae assume long-running TCP connections.  For connections
+   that are extremely short (<10 packets) and don't lose any packets,
+   performance is driven by the TCP slow-start algorithm.  For
+   connections of medium length, where on average only a few segments
+   are lost, single connection performance will actually be slightly
+   better than given by the formulae above.
+
+   The difference between the simple and complex formulae above is that
+   the complex formula includes the effects of TCP retransmission
+   timeouts.  For very low levels of packet loss (significantly less
+   than 1%), timeouts are unlikely to occur, and the formulae lead to
+   very similar results.  At higher packet losses (1% and above), the
+   complex formula gives a more accurate estimate of performance (which
+   will always be significantly lower than the result from the simple
+   formula).
+
+   Note that these formulae break down as p approaches 100%.
+
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 23]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+8.5.3.  Analysis of Link-Layer Effects on TCP Performance
+
+   Consider the following example:
+
+   A designer invents a new wireless link layer which, on average, loses
+   1% of IP packets.  The link layer supports packets of up to 1040
+   bytes, and has a one-way delay of 20 msec.
+
+   If this link were to be used on an Internet path with a round trip
+   time greater than 80ms, the upper bound may be computed by:
+
+   For MSS, use 1000 bytes to exclude the 40 bytes of minimum IPv4 and
+   TCP headers.
+
+   For RTT, use 120 msec (80 msec for the Internet part, plus 20 msec
+   each way for the new wireless link).
+
+   For p, use .01.  For C, assume 1.
+
+   The simple formula gives:
+
+      BW = (1000 * 8 bits) / (.120 sec * sqrt(.01)) = 666 kbit/sec
+
+   The more complex formula gives:
+
+      BW = 402.9 kbit/sec
+
+   If this were a 2 Mb/s wireless LAN, the designers might be somewhat
+   disappointed.
+
+   Some observations on performance:
+
+   1.  We have assumed that the packet losses on the link layer are
+       interpreted as congestion by TCP.  This is a "fact of life" that
+       must be accepted.
+
+   2.  The equations for TCP performance are all expressed in terms of
+       packet loss, but many subnetwork designers think in terms of
+       bit-error ratio.  *If* channel bit errors are independent, then
+       the probability of a packet being corrupted is:
+
+         p = 1 - ([1 - BER]^[FRAME_SIZE*8])
+
+       Here we assume FRAME_SIZE is in bytes and "^" represents
+       exponentiation.  It includes the user data and all headers
+       (TCP,IP and subnetwork).  (Note: this analysis assumes the
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 24]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+       subnetwork does not perform ARQ or transparent fragmentation
+       [RFC3366].)  If the inequality
+
+         BER * [FRAME_SIZE*8] << 1
+
+       holds, the packet loss probability p can be approximated by:
+
+         p = BER * [FRAME_SIZE*8]
+
+       These equations can be used to apply BER to the performance
+       equations above.
+
+       Note that FRAME_SIZE can vary from one packet to the next.  Small
+       packets (such as TCP acks) generally have a smaller probability
+       of packet error than, say, a TCP packet carrying one MSS (maximum
+       segment size) of user data.  A flow of small TCP acks can be
+       expected to be slightly more reliable than a stream of larger TCP
+       data segments.
+
+       It bears repeating that the above analysis assumes that bit
+       errors are statistically independent.  Because this is not true
+       for many real links, our computation of p is actually an upper
+       bound, not the exact probability of packet loss.
+
+       There are many reasons why bit errors are not independent on real
+       links.  Many radio links are affected by propagation fading or by
+       interference that lasts over many bit times.  Also, links with
+       Forward Error Correction (FEC) generally have very non-uniform
+       bit error distributions that depend on the type of FEC, but in
+       general the uncorrected errors tend to occur in bursts even when
+       channel symbol errors are independent.  In all such cases, our
+       computation of p from BER can only place an upper limit on the
+       packet loss rate.
+
+       If the distribution of errors under the FEC scheme is known, one
+       could apply the same type of analysis as above, using the correct
+       distribution function for the BER.  It is more likely in these
+       FEC cases, however, that empirical methods are needed to
+       determine the actual packet loss rate.
+
+   3.  Note that the packet size plays an important role.  If the
+       subnetwork loss characteristics are such that large packets have
+       the same probability of loss as smaller packets, then larger
+       packets will yield improved performance.
+
+
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 25]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   4.  We have chosen a specific RTT that might occur on a wide-area
+       Internet path within the USA.  It is important to recognize that
+       a variety of RTT values are experienced in the Internet.
+
+       For example, RTTs are typically less than 10 msec in a wired LAN
+       environment when communicating with a local host.  International
+       connections may have RTTs of 200 msec or more.  Modems and other
+       low-capacity links can add considerable delay due to their long
+       packet transmission (serialisation) times.
+
+       Links over geostationary repeater satellites have one-way speed-
+       of-light delays of around 250ms, a minimum of 125ms propagation
+       delay up to the satellite and 125ms down.  The RTT of an end-to-
+       end TCP connection that includes such a link can be expected to
+       be greater than 250ms.
+
+       Queues on heavily-congested links may back up, increasing RTTs.
+       Finally, virtual private networks (VPNs) and other forms of
+       encryption and tunneling can add significant end-to-end delay to
+       network connections.
+
+9.  Quality-of-Service (QoS) considerations
+
+   It is generally recognized that specific service guarantees are
+   needed to support real-time multimedia, toll-quality telephony, and
+   other performance-critical applications.  The provision of such
+   Quality of Service guarantees in the Internet is an active area of
+   research and standardization.  The IETF has not converged on a single
+   service model, set of services, or single mechanism that will offer
+   useful guarantees to applications and be scalable to the Internet.
+   Indeed, the IETF does not have a single definition of Quality of
+   Service.  [RFC2990] represents a current understanding of the
+   challenges in architecting QoS for the Internet.
+
+   There are presently two architectural approaches to providing
+   mechanisms for QoS support in the Internet.
+
+   IP Integrated Services (Intserv) [RFC1633] provides fine-grained
+   service guarantees to individual flows.  Flows are identified by a
+   flow specification (flowspec), which creates a stateful association
+   between individual packets by matching fields in the packet header.
+   Capacity is reserved for the flow, and appropriate traffic
+   conditioning and scheduling is installed in routers along the path.
+   The ReSerVation Protocol (RSVP) [RFC2205] [RFC2210] is usually, but
+   need not necessarily be, used to install the flow QoS state.  Intserv
+   defines two services, in addition to the Default (best effort)
+   service.
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 26]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   1.  Guaranteed Service (GS) [RFC2212] offers hard upper bounds on
+       delay to flows that conform to a traffic specification (TSpec).
+       It uses a fluid-flow model to relate the TSpec and reserved
+       bandwidth (RSpec) to variable delay.  Non-conforming packets are
+       forwarded on a best-effort basis.
+
+   2.  Controlled Load Service (CLS) [RFC2211] offers delay and packet
+       loss equivalent to that of an unloaded network to flows that
+       conform to a TSpec, but no hard bounds.  Non-conforming packets
+       are forwarded on a best-effort basis.
+
+   Intserv requires installation of state information in every
+   participating router.  Performance guarantees cannot be made unless
+   this state is present in every router along the path.  This, along
+   with RSVP processing and the need for usage-based accounting, is
+   believed to have scalability problems, particularly in the core of
+   the Internet [RFC2208].
+
+   IP Differentiated Services (Diffserv) [RFC2475] provides a "toolkit"
+   offering coarse-grained controls to aggregates of flows.  Diffserv in
+   itself does *not* provide QoS guarantees, but can be used to
+   construct services with QoS guarantees across a Diffserv domain.
+   Diffserv attempts to address the scaling issues associated with
+   Intserv by requiring state awareness only at the edge of a Diffserv
+   domain.  At the edge, packets are classified into flows, and the
+   flows are conditioned (marked, policed, or shaped) to a traffic
+   conditioning specification (TCS).  A Diffserv Codepoint (DSCP),
+   identifying a per-hop behavior (PHB), is set in each packet header.
+   The DSCP is carried in the DS-field, subsuming six bits of the former
+   Type-of-Service (ToS) byte [RFC791] of the IP header [RFC2474].   The
+   PHB denotes the forwarding behavior to be applied to the packet in
+   each node in the Diffserv domain.  Although there is a "recommended"
+   DSCP associated with each PHB, the mappings from DSCPs to PHBs are
+   defined by the DS-domain.  In fact, there can be several DSCPs
+   associated with the same PHB.  Diffserv presently defines three PHBs.
+
+   1.  The class selector PHB [RFC2474] replaces the IP precedence field
+       of the former ToS byte.  It offers relative forwarding
+       priorities.
+
+   2.  The Expedited Forwarding (EF) PHB [RFC3246] [RFC3248] guarantees
+       that packets will have a well-defined minimum departure rate
+       which, if not exceeded, ensures that the associated queues are
+       short or empty.  EF is intended to support services that offer
+       tightly-bounded loss, delay, and delay jitter.
+
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 27]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   3.  The Assured Forwarding (AF) PHB group [RFC2597] offers different
+       levels of forwarding assurance for each aggregated flow of
+       packets.  Each AF group is independently allocated forwarding
+       resources.  Packets are marked with one of three drop
+       precedences; those with the highest drop precedence are dropped
+       with lower probability than those marked with the lowest drop
+       precedence.  DSCPs are recommended for four independent AF
+       groups, although a DS domain can have more or fewer AF groups.
+
+   Ongoing work in the IETF is addressing ways to support Intserv with
+   Diffserv.  There is some belief (e.g., as expressed in [RFC2990])
+   that such an approach will allow individual flows to receive service
+   guarantees and scale to the global Internet.
+
+   The QoS guarantees that can be offered by the IP layer are a product
+   of two factors:
+
+   1.  the concatenation of the QoS guarantees offered by the subnets
+       along the path of a flow.  This implies that a subnet may wish to
+       offer multiple services (with different QoS guarantees) to the IP
+       layer, which can then determine which flows use which subnet
+       service.  To put it another way, forwarding behavior in the
+       subnet needs to be "clued" by the forwarding behavior (service or
+       PHB) at the IP layer, and
+
+   2.  the operation of a set of cooperating mechanisms, such as
+       bandwidth reservation and admission control, policy management,
+       traffic classification, traffic conditioning (marking, policing
+       and/or shaping), selective discard, queuing, and scheduling.
+       Note that support for QoS in subnets may require similar
+       mechanisms, especially when these subnets are general topology
+       subnets (e.g., ATM, frame relay, or MPLS) or shared media
+       subnets.
+
+   Many subnetwork designers face inherent tradeoffs between delay,
+   throughput, reliability, and cost.  Other subnetworks have parameters
+   that manage bandwidth, internal connection state, and the like.
+   Therefore, the following subnetwork capabilities may be desirable,
+   although some might be trivial or moot if the subnet is a dedicated
+   point-to-point link.
+
+   1.  The subnetwork should have the ability to reserve bandwidth for a
+       connection or flow and schedule packets accordingly.
+
+   2.  Bandwidth reservations should be based on a one- or two-token
+       bucket model, depending on whether the service is intended to
+       support constant-rate or bursty traffic.
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 28]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   3.  If a connection or flow does not use its reserved bandwidth at a
+       given time, the unused bandwidth should be available for other
+       flows.
+
+   4.  Packets in excess of a connection or flow's agreed rate should be
+       forwarded as best-effort or discarded, depending on the service
+       offered by the subnet to the IP layer.
+
+   5.  If a subnet contains error control mechanisms (retransmission
+       and/or FEC), it should be possible for the IP layer to influence
+       the inherent tradeoffs between uncorrected errors, packet losses,
+       and delay.  These capabilities at the subnet/IP layer service
+       boundary correspond to selection of more or less error control
+       and/or to selection of particular error control mechanisms within
+       the subnetwork.
+
+   6.  The subnet layer should know, and be able to inform the IP layer,
+       how much fixed delay and delay jitter it offers for a flow or
+       connection.  If the Intserv model is used, the delay jitter
+       component may be best expressed in terms of the TSpec/RSpec model
+       described in [RFC2212].
+
+   7.  Support of the Diffserv class selectors [RFC2474] suggests that
+       the subnet might consider mechanisms that support priorities.
+
+10.  Fairness vs Performance
+
+   Subnetwork designers should be aware of the tradeoffs between
+   fairness and efficiency inherent in many transmission scheduling
+   algorithms.  For example, many local area networks use contention
+   protocols to resolve access to a shared transmission channel.  These
+   protocols represent overhead.  While limiting the amount of data that
+   a subnet node may transmit per contention cycle helps assure timely
+   access to the channel for each subnet node, it also increases
+   contention overhead per unit of data sent.
+
+   In some mobile radio networks, capacity is limited by interference,
+   which in turn depends on average transmitter power.  Some receivers
+   may require considerably more transmitter power (generating more
+   interference and consuming more channel capacity) than others.
+
+   In each case, the scheduling algorithm designer must balance
+   competing objectives: providing a fair share of capacity to each
+   subnet node while maximizing the total capacity of the network.  One
+   approach for balancing performance and fairness is outlined in
+   [ES00].
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 29]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+11.  Delay Characteristics
+
+   The TCP sender bases its retransmission timeout (RTO) on measurements
+   of the round trip delay experienced by previous packets.  This allows
+   TCP to adapt automatically to the very wide range of delays found on
+   the Internet.  The recommended algorithms are described in [RFC2988].
+   Evaluations of TCP's retransmission timer can be found in [AP99] and
+   [LS00].
+
+   These algorithms model the delay along an Internet path as a
+   normally-distributed random variable with a slowly-varying mean and
+   standard deviation.  TCP estimates these two parameters by
+   exponentially smoothing individual delay measurements, and it sets
+   the RTO to the estimated mean delay plus some fixed number of
+   standard deviations.  (The algorithm actually uses mean deviation as
+   an approximation to standard deviation, because it is easier to
+   compute.)
+
+   The goal is to compute an RTO that is small enough to detect and
+   recover from packet losses while minimizing unnecessary ("spurious")
+   retransmissions when packets are unexpectedly delayed but not lost.
+   Although these goals conflict, the algorithm works well when the
+   delay variance along the Internet path is low, or the packet loss
+   rate is low.
+
+   If the path delay variance is high, TCP sets an RTO that is much
+   larger than the mean of the measured delays.  If the packet loss rate
+   is low, the large RTO is of little consequence, as timeouts occur
+   only rarely.  Conversely, if the path delay variance is low, then TCP
+   recovers quickly from lost packets; again, the algorithm works well.
+   However, when delay variance and the packet loss rate are both high,
+   these algorithms perform poorly, especially when the mean delay is
+   also high.
+
+   Because TCP uses returning acknowledgments as a "clock" to time the
+   transmission of additional data, excessively high delays (even if the
+   delay variance is low) also affect TCP's ability to fully utilize a
+   high-speed transmission pipe.  It also slows the recovery of lost
+   packets, even when delay variance is small.
+
+   Subnetwork designers should therefore minimize all three parameters
+   (delay, delay variance, and packet loss) as much as possible.
+
+   In many subnetworks, these parameters are inherently in conflict.
+   For example, on a mobile radio channel, the subnetwork designer can
+   use retransmission (ARQ) and/or forward error correction (FEC) to
+   trade off delay, delay variance, and packet loss in an effort to
+   improve TCP performance.  While ARQ increases delay variance, FEC
+
+
+
+Karn, et al.             Best Current Practice                 [Page 30]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   does not.  However, FEC (especially when combined with interleaving)
+   often increases mean delay, even on good channels where ARQ
+   retransmissions are not needed and ARQ would not increase either the
+   delay or the delay variance.
+
+   The tradeoffs among these error control mechanisms and their
+   interactions with TCP can be quite complex, and are the subject of
+   much ongoing research.  We therefore recommend that subnetwork
+   designers provide as much flexibility as possible in the
+   implementation of these mechanisms, and provide access to them as
+   discussed above in the section on Quality of Service.
+
+12.  Bandwidth Asymmetries
+
+   Some subnetworks may provide asymmetric bandwidth (or may cause TCP
+   packet flows to experience asymmetry in the capacity) and the
+   Internet protocol suite will generally still work fine.  However,
+   there is a case when such a scenario reduces TCP performance.  Since
+   TCP data segments are "clocked" out by returning acknowledgments, TCP
+   senders are limited by the rate at which ACKs can be returned
+   [BPK98].  Therefore, when the ratio of the available capacity of the
+   Internet path carrying the data to the bandwidth of the return path
+   of the acknowledgments is too large, the slow return of the ACKs
+   directly impacts performance.  Since ACKs are generally smaller than
+   data segments, TCP can tolerate some asymmetry, but as a general
+   rule, designers of subnetworks should be aware that subnetworks with
+   significant asymmetry can result in reduced performance, unless
+   issues are taken to mitigate this [RFC3449].
+
+   Several strategies have been identified for reducing the impact of
+   asymmetry of the network path between two TCP end hosts, e.g.,
+   [RFC3449].  These techniques attempt to reduce the number of ACKs
+   transmitted over the return path (low bandwidth channel) by changes
+   at the end host(s), and/or by modification of subnetwork packet
+   forwarding.  While these solutions may mitigate the performance
+   issues caused by asymmetric subnetworks, they do have associated cost
+   and may have other implications.  A fuller discussion of strategies
+   and their implications is provided in [RFC3449].
+
+13.  Buffering, flow and congestion control
+
+   Many subnets include multiple links with varying traffic demands and
+   possibly different transmission speeds.  At each link there must be a
+   queuing system, including buffering, scheduling, and a capability to
+   discard excess subnet packets.  These queues may also be part of a
+   subnet flow control or congestion control scheme.
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 31]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   For the purpose of this discussion, we talk about packets without
+   regard to whether they refer to a complete IP packet or a subnetwork
+   frame.  At each queue, a packet experiences a delay that depends on
+   competing traffic and the scheduling discipline, and is subjected to
+   a local discarding policy.
+
+   Some subnets may have flow or congestion control mechanisms in
+   addition to packet dropping.  Such mechanisms can operate on
+   components in the subnet layer, such as schedulers, shapers, or
+   discarders, and can affect the operation of IP forwarders at the
+   edges of the subnet.  However, with the exception of Explicit
+   Congestion Notification [RFC3168] (discussed below), IP has no way to
+   pass explicit congestion or flow control signals to TCP.
+
+   TCP traffic, especially aggregated TCP traffic, is bursty.  As a
+   result, instantaneous queue depths can vary dramatically, even in
+   nominally stable networks.  For optimal performance, packets should
+   be dropped in a controlled fashion, not just when buffer space is
+   unavailable.  How much buffer space should be supplied is still a
+   matter of debate, but as a rule of thumb, each node should have
+   enough buffering to hold one link_bandwidth*link_delay product's
+   worth of data for each TCP connection sharing the link.
+
+   This is often difficult to estimate, since it depends on parameters
+   beyond the subnetwork's control or knowledge.  Internet nodes
+   generally do not implement admission control policies, and cannot
+   limit the number of TCP connections that use them.  In general, it is
+   wise to err in favor of too much buffering rather than too little.
+   It may also be useful for subnets to incorporate mechanisms that
+   measure propagation delays to assist in buffer sizing calculations.
+
+   There is a rough consensus in the research community that active
+   queue management is important to improving fairness, link
+   utilization, and throughput [RFC2309].  Although there are questions
+   and concerns about the effectiveness of active queue management
+   (e.g., [MBDL99]), it is widely considered an improvement over tail-
+   drop discard policies.
+
+   One form of active queue management is the Random Early Detection
+   (RED) algorithm [RED93], a family of related algorithms.  In one
+   version of RED, an exponentially-weighted moving average of the queue
+   depth is maintained:
+
+      When this average queue depth is between a maximum threshold
+      max_th and a minimum threshold min_th, the probability of packets
+      that are dropped is proportional to the amount by which the
+      average queue depth exceeds min_th.
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 32]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+      When this average queue depth is equal to max_th, the drop
+      probability is equal to a configurable parameter max_p.
+
+      When this average queue depth is greater than max_th, packets are
+      always dropped.
+
+   Numerous variants on RED appear in the literature, and there are
+   other active queue management algorithms which claim various
+   advantages over RED [GM02].
+
+   With an active queue management algorithm, dropped packets become a
+   feedback signal to trigger more appropriate congestion behavior by
+   the TCPs in the end hosts.  Randomization of dropping tends to break
+   up the observed tendency of TCP windows belonging to different TCP
+   connections to become synchronized by correlated drops, and it also
+   imposes a degree of fairness on those connections that implement TCP
+   congestion avoidance properly.  Another important property of active
+   queue management algorithms is that they attempt to keep average
+   queue depths short while accommodating large short-term bursts.
+
+   Since TCP neither knows nor cares whether congestive packet loss
+   occurs at the IP layer or in a subnet, it may be advisable for
+   subnets that perform queuing and discarding to consider implementing
+   some form of active queue management.  This is especially true if
+   large aggregates of TCP connections are likely to share the same
+   queue.  However, active queue management may be less effective in the
+   case of many queues carrying smaller aggregates of TCP connections,
+   e.g., in an ATM switch that implements per-VC queuing.
+
+   Note that the performance of active queue management algorithms is
+   highly sensitive to settings of configurable parameters, and also to
+   factors such as RTT [MBB00] [FB00].
+
+   Some subnets, most notably ATM, perform segmentation and reassembly
+   at the subnetwork edges.  Care should be taken here in designing
+   discard policies.  If the subnet discards a fragment of an IP packet,
+   then the remaining fragments become an unproductive load on the
+   subnet that can markedly degrade end-to-end performance [RF95].
+   Subnetworks should therefore attempt to discard these extra fragments
+   whenever one of them must be discarded.  If the IP packet has already
+   been partially forwarded when discarding becomes necessary, then
+   every remaining fragment except the one marking the end of the IP
+   packet should also be discarded.  For ATM subnets, this specifically
+   means using Early Packet Discard and Partial Packet Discard [ATMFTM].
+
+   Some subnets include flow control mechanisms that effectively require
+   that the rate of traffic flows be shaped upon entry to the subnet.
+   One example of such a subnet mechanism is in the ATM Available Bit
+
+
+
+Karn, et al.             Best Current Practice                 [Page 33]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   rate (ABR) service category [ATMFTM].  Such flow control mechanisms
+   have the effect of making the subnet nearly lossless by pushing
+   congestion into the IP routers at the edges of the subnet.  In such a
+   case, adequate buffering and discard policies are needed in these
+   routers to deal with a subnet that appears to have varying bandwidth.
+   Whether there is a benefit in this kind of flow control is
+   controversial; there are numerous simulation and analytical studies
+   that go both ways.  It appears that some of the issues leading to
+   such different results include sensitivity to ABR parameters, use of
+   binary rather than explicit rate feedback, use (or not) of per-VC
+   queuing, and the specific ATM switch algorithms selected for the
+   study.  Anecdotally, some large networks that used IP over ABR to
+   carry TCP traffic have claimed it to be successful, but have
+   published no results.
+
+   Another possible approach to flow control in the subnet would be to
+   work with TCP Explicit Congestion Notification (ECN) semantics
+   [RFC3168] through utilizing explicit congestion indicators in subnet
+   frames.  Routers at the edges of the subnet, rather than shaping,
+   would set the explicit congestion bit in those IP packets that are
+   received in subnet frames that have an ECN indication.  Nodes in the
+   subnet would need to implement an active queue management protocol
+   that marks subnet frames instead of dropping them.
+
+   ECN is currently a proposed standard, but it is not yet widely
+   deployed.
+
+14.  Compression
+
+   Application data compression is a function that can usually be
+   omitted in the subnetwork.  The endpoints typically have more CPU and
+   memory resources to run a compression algorithm and a better
+   understanding of what is being compressed.  End-to-end compression
+   benefits every network element in the path, while subnetwork-layer
+   compression, by definition, benefits only a single subnetwork.
+
+   Data presented to the subnetwork layer may already be in a compressed
+   format (e.g., a JPEG file), compressed at the application layer
+   (e.g., the optional "gzip", "compress", and "deflate" compression in
+   HTTP/1.1 [RFC2616]), or compressed at the IP layer (the IP Payload
+   Compression Protocol [RFC3173] supports DEFLATE [RFC2394] and LZS
+   [RFC2395]).  Compression at the subnetwork edges is of no benefit for
+   any of these cases.
+
+   The subnetwork may also process data that has been encrypted by the
+   application (OpenPGP [RFC2440] or S/MIME [RFC2633]), just above TCP
+   (SSL, TLS [RFC2246]), or just above IP (IPsec ESP [RFC2406]).
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 34]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   Ciphers generate high-entropy bit streams lacking any patterns that
+   can be exploited by a compression algorithm.
+
+   However, much data is still transmitted uncompressed over the
+   Internet, so subnetwork compression may be beneficial.  Any
+   subnetwork compression algorithm must not expand uncompressible data,
+   e.g., data that has already been compressed or encrypted.
+
+   We make a strong recommendation that subnetworks operating at low
+   speed or with small MTUs compress IP and transport-level headers (TCP
+   and UDP) using several header compression schemes developed within
+   the IETF [RFC3150].  An uncompressed 40-byte TCP/IP header takes
+   about 33 milliseconds to send at 9600 bps.  "VJ" TCP/IP header
+   compression [RFC1144] compresses most headers to 3-5 bytes, reducing
+   transmission time to several milliseconds on dialup modem links.
+   This is especially beneficial for small, latency-sensitive packets in
+   interactive sessions.
+
+   Similarly, RTP compression schemes, such as CRTP [RFC2508] and ROHC
+   [RFC3095], compress most IP/UDP/RTP headers to 1-4 bytes.  The
+   resulting savings are especially significant when audio packets are
+   kept small to minimize store-and-forward latency.
+
+   Designers should consider the effect of the subnetwork error rate on
+   the performance of header compression.  TCP ordinarily recovers from
+   lost packets by retransmitting only those packets that were actually
+   lost; packets arriving correctly after a packet loss are kept on a
+   resequencing queue and do not need to be retransmitted.  In VJ TCP/IP
+   [RFC1144] header compression, however, the receiver cannot explicitly
+   notify a sender of data corruption and subsequent loss of
+   synchronization between compressor and decompressor.  It relies
+   instead on TCP retransmission to re-synchronize the decompressor.
+   After a packet is lost, the decompressor must discard every
+   subsequent packet, even if the subnetwork makes no further errors,
+   until the sending TCP retransmits to re-synchronize the decompressor.
+   This effect can substantially magnify the effect of subnetwork packet
+   losses if the sending TCP window is large, as it will often be on a
+   path with a large bandwidth*delay product [LRKOJ99].
+
+   Alternate header compression schemes, such as those described in
+   [RFC2507], include an explicit request for retransmission of an
+   uncompressed packet to allow decompressor resynchronization without
+   waiting for a TCP retransmission.  However, these schemes are not yet
+   in widespread use.
+
+   Both TCP header compression schemes do not compress widely-used TCP
+   options such as selective acknowledgements (SACK).  Both fail to
+   compress TCP traffic that makes use of explicit congestion
+
+
+
+Karn, et al.             Best Current Practice                 [Page 35]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   notification (ECN).  Work is under way in the IETF ROHC WG to address
+   these shortcomings in a ROHC header compression scheme for TCP
+   [RFC3095] [RFC3096].
+
+   The subnetwork error rate also is important for RTP header
+   compression.  CRTP uses delta encoding, so a packet loss on the link
+   causes uncertainty about the subsequent packets, which often must be
+   discarded until the decompressor has notified the compressor and the
+   compressor has sent re-synchronizing information.  This typically
+   takes slightly more than the end-to-end path round-trip time.  For
+   links that combine significant error rates with latencies that
+   require multiple packets to be in flight at a time, this leads to
+   significant error propagation, i.e., subsequent losses caused by an
+   initial loss.
+
+   For links that are both high-latency (multiple packets in flight from
+   a typical RTP stream) and error-prone, RTP ROHC provides a more
+   robust way of RTP header compression, at a cost of higher complexity
+   at the compressor and decompressor.  For example, within a talk
+   spurt, only extended losses of (depending on the mode chosen) 12-64
+   packets typically cause error propagation.
+
+15.  Packet Reordering
+
+   The Internet architecture does not guarantee that packets will arrive
+   in the same order in which they were originally transmitted;
+   transport protocols like TCP must take this into account.
+
+   However, reordering does come at a cost with TCP as it is currently
+   defined.  Because TCP returns a cumulative acknowledgment (ACK)
+   indicating the last in-order segment that has arrived, out-of-order
+   segments cause a TCP receiver to transmit a duplicate acknowledgment.
+   When the TCP sender notices three duplicate acknowledgments, it
+   assumes that a segment was dropped by the network and uses the fast
+   retransmit algorithm [Jac90] [RFC2581] to resend the segment.  In
+   addition, the congestion window is reduced by half, effectively
+   halving TCP's sending rate.  If a subnetwork reorders segments
+   significantly such that three duplicate ACKs are generated, the TCP
+   sender needlessly reduces the congestion window and performance
+   suffers.
+
+   Packet reordering frequently occurs in parts of the Internet, and it
+   seems to be difficult or impossible to eliminate [BPS99].  For this
+   reason, research on improving TCP's behavior in the face of packet
+   reordering [LK00] [BA02] has begun.
+
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 36]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   [BPS99] cites reasons why it may even be undesirable to eliminate
+   reordering.  There are situations where average packet latency can be
+   reduced, link efficiency can be increased, and/or reliability can be
+   improved if reordering is permitted.  Examples include certain high
+   speed switches within the Internet backbone and the parallel links
+   used over many Internet paths for load splitting and redundancy.
+
+   This suggests that subnetwork implementers should try to avoid packet
+   reordering whenever possible, but not if doing so compromises
+   efficiency, impairs reliability, or increases average packet delay.
+
+   Note that every header compression scheme currently standardized for
+   the Internet requires in-order packet delivery on the link between
+   compressor and decompressor.  PPP is frequently used to carry
+   compressed TCP/IP packets; since it was originally designed for
+   point-to-point and dialup links, it is assumed to provide in-order
+   delivery.  For this reason, subnetwork implementers who provide PPP
+   interfaces to VPNs and other more complex subnetworks, must also
+   maintain in-order delivery of PPP frames.
+
+16.  Mobility
+
+   Internet users are increasingly mobile.  Not only are many Internet
+   nodes laptop computers, but pocket organizers and mobile embedded
+   systems are also becoming nodes on the Internet.  These nodes may
+   connect to many different access points on the Internet over time,
+   and they expect this to be largely transparent to their activities.
+   Except when they are not connected to the Internet at all, and for
+   performance differences when they are connected, they expect that
+   everything will "just work" regardless of their current Internet
+   attachment point or local subnetwork technology.
+
+   Changing a host's Internet attachment point involves one or more of
+   the following steps.
+
+   First, if use of the local subnetwork is restricted, the user's
+   credentials must be verified and access granted.  There are many ways
+   to do this.  A trivial example would be an "Internet cafe" that
+   grants physical access to the subnetwork for a fee.  Subnetworks may
+   implement technical access controls of their own; one example is IEEE
+   802.11 Wireless Equivalent Privacy [IEEE80211].  It is common
+   practice for both cellular telephone and Internet service providers
+   (ISPs) to agree to serve one anothers' users; RADIUS [RFC2865] is the
+   standard method for ISPs to exchange authorization information.
+
+   Second, the host may have to be reconfigured with IP parameters
+   appropriate for the local subnetwork.  This usually includes setting
+   an IP address, default router, and domain name system (DNS) servers.
+
+
+
+Karn, et al.             Best Current Practice                 [Page 37]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   On multiple-access networks, the Dynamic Host Configuration Protocol
+   (DHCP) [RFC2131] is almost universally used for this purpose.  On PPP
+   links, these functions are performed by the IP Control Protocol
+   (IPCP) [RFC1332].
+
+   Third, traffic destined for the mobile host must be routed to its
+   current location.  This roaming function is the most common meaning
+   of the term "Internet mobility".
+
+   Internet mobility can be provided at any of several layers in the
+   Internet protocol stack, and there is ongoing debate as to which is
+   the most appropriate and efficient.  Mobility is already a feature of
+   certain application layer protocols; the Post Office Protocol (POP)
+   [RFC1939] and the Internet Message Access Protocol (IMAP) [RFC3501]
+   were created specifically to provide mobility in the receipt of
+   electronic mail.
+
+   Mobility can also be provided at the IP layer [RFC3344].  This
+   mechanism provides greater transparency, viz., IP addresses that
+   remain fixed as the nodes move, but at the cost of potentially
+   significant network overhead and increased delay because of the sub-
+   optimal network routing and tunneling involved.
+
+   Some subnetworks may provide internal mobility, transparent to IP, as
+   a feature of their own internal routing mechanisms.  To the extent
+   that these simplify routing at the IP layer, reduce the need for
+   mechanisms like Mobile IP, or exploit mechanisms unique to the
+   subnetwork, this is generally desirable.  This is especially true
+   when the subnetwork covers a relatively small geographic area and the
+   users move rapidly between the attachment points within that area.
+   Examples of internal mobility schemes include Ethernet switching and
+   intra-system handoff in cellular telephony.
+
+   However, if the subnetwork is physically large and connects to other
+   parts of the Internet at multiple geographic points, care should be
+   taken to optimize the wide-area routing of packets between nodes on
+   the external Internet and nodes on the subnet.  This is generally
+   done with "nearest exit" routing strategies.  Because a given
+   subnetwork may be unaware of the actual physical location of a
+   destination on another subnetwork, it simply routes packets bound for
+   the other subnetwork to the nearest router between the two.  This
+   implies some awareness of IP addressing and routing within the
+   subnetwork.  The subnetwork may wish to use IP routing internally for
+   wide area routing and restrict subnetwork-specific routing to
+   constrained geographic areas where the effects of suboptimal routing
+   are minimized.
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 38]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+17.  Routing
+
+   Subnetworks connecting more than two systems must provide their own
+   internal Layer-2 forwarding mechanisms, either implicitly (e.g.,
+   broadcast) or explicitly (e.g., switched).  Since routing is the
+   major function of the Internet layer, the question naturally arises
+   as to the interaction between routing at the Internet layer and
+   routing in the subnet, and proper division of function between the
+   two.
+
+   Layer-2 subnetworks can be point-to-point, connecting two systems, or
+   multipoint.  Multipoint subnetworks can be broadcast (e.g., shared
+   media or emulated) or non-broadcast.  Generally, IP considers
+   multipoint subnetworks as broadcast, with shared-medium Ethernet as
+   the canonical (and historical) example, and point-to-point
+   subnetworks as a degenerate case.  Non-broadcast subnetworks may
+   require additional mechanisms, e.g., above IP at the routing layer
+   [RFC2328].
+
+   IP is ignorant of the topology of the subnetwork layer.  In
+   particular, reconfiguration of subnetwork paths is not tracked by the
+   IP layer.  IP is only affected by whether it can send/receive packets
+   sent to the remotely connected systems via the subnetwork interface
+   (i.e., the reachability from one router to another).  IP further
+   considers that subnetworks are largely static -- that both their
+   membership and existence are stable at routing timescales (tens of
+   seconds); changes to these are considered re-provisioning, rather
+   than routing.
+
+   Routing functionality in a subnetwork is related to addressing in
+   that subnetwork.  Resolution of addresses on subnetwork links is
+   required for forwarding IP packets across links (e.g., ARP for IPv4,
+   or ND for IPv6).  There is unlikely to be direct interaction between
+   subnetwork routing and IP routing.  Where broadcast is provided or
+   explicitly emulated, address resolution can be used directly; where
+   not provided, the link layer routing may interface to a protocol for
+   resolution, e.g., to the Next-Hop Resolution Protocol [RFC2322] to
+   provide context-dependent address resolution capabilities.
+
+   Subnetwork routing can either complement or compete with IP routing.
+   It complements IP when a subnetwork encapsulates its internal
+   routing, and where the effects of that routing are not visible at the
+   IP layer.  However, if different paths in the subnetwork have
+   characteristics that affect IP routing, it can affect or even inhibit
+   the convergence of IP routing.
+
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 39]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   Routing protocols generally consider Layer-2 subnetworks, i.e., with
+   subnet masks and no intermediate IP hops, to have uniform routing
+   metrics to all members.  Routing can break when a link's
+   characteristics do not match the routing metric, in this case, e.g.,
+   when some member pairs have different path characteristics.  Consider
+   a virtual Ethernet subnetwork that includes both nearby (sub-
+   millisecond latency) and remote (100's of milliseconds away) systems.
+   Presenting that group as a single subnetwork means that some routing
+   protocols will assume that all pairs have the same delay, and that
+   that delay is small.  Because this is not the case, the routing
+   tables constructed may be suboptimal or may even fail to converge.
+
+   When a subnetwork is used for transit between a set of routers, it
+   conventionally provides the equivalent of a full mesh of point-to-
+   point links.  Simplicity of the internal subnet structure can be used
+   (e.g., via NHRP [RFC2332]) to reduce the size of address resolution
+   tables, but routing exchanges will continue to reflect the full mesh
+   they emulate.  In general, subnetworks should not be used as a
+   transit among a set of routers where routing protocols would break if
+   a full mesh of equivalent point-to-point links were used.
+
+   Some subnetworks have special features that allow the use of more
+   effective or responsive routing mechanisms that cannot be implemented
+   in IP because of its need for generality.  One example is the self-
+   learning bridge algorithm widely used in Ethernet networks.  Learning
+   bridges perform Layer-2 subnetwork forwarding, avoiding the need for
+   dynamic routing at each subnetwork hop.  Another is the "handoff"
+   mechanism in cellular telephone networks, particularly the "soft
+   handoff" scheme in IS-95 CDMA.
+
+   Subnetworks that cover large geographic areas or include links of
+   widely-varying capabilities should be avoided.  IP routing generally
+   considers all multipoint subnets equivalent to a local, shared-medium
+   link with uniform metrics between any pair of systems, and ignores
+   internal subnetwork topology.  Where a subnetwork diverges from that
+   assumption, it is the obligation of subnetwork designers to provide
+   compensating mechanisms.  Not doing so can affect the scalability and
+   convergence of IP routing, as noted above.
+
+   The subnetwork designer who decides to implement internal routing
+   should consider whether a custom routing algorithm is warranted, or
+   if an existing Internet routing algorithm or protocol may suffice.
+   The designer should consider whether this decision is to reduce the
+   address resolution table size (possible, but with additional protocol
+   support required), or is trying to reduce routing table complexity.
+   The latter may be better achieved by partitioning the subnetwork,
+   either physically or logically, and using network-layer protocols to
+   support partitioning (e.g., AS's in BGP).  Protocols and routing
+
+
+
+Karn, et al.             Best Current Practice                 [Page 40]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   algorithms can be notoriously subtle, complex, and difficult to
+   implement correctly.  Much work can be avoided if existing protocols
+   or implementations can be readily reused.
+
+18.  Security Considerations
+
+   Security has become a high priority in the design and operation of
+   the Internet.  The Internet is vast, and countless organizations and
+   individuals own and operate its various components.  A consensus has
+   emerged for what might be called a "security placement principle": a
+   security mechanism is most effective when it is placed as close as
+   possible to, and under the direct control of the owner of the asset
+   that it protects.
+
+   A corollary of this principle is that end-to-end security (e.g.,
+   confidentiality, authentication, integrity, and access control)
+   cannot be ensured with subnetwork security mechanisms.  Not only are
+   end-to-end security mechanisms much more closely associated with the
+   end-user assets they protect, they are also much more comprehensive.
+   For example, end-to-end security mechanisms cover gaps that can
+   appear when otherwise good subnetwork mechanisms are concatenated.
+   This is an important application of the end-to-end principle [SRC81].
+
+   Several security mechanisms that can be used end-to-end have already
+   been deployed in the Internet and are enjoying increasing use.  The
+   most important are the Secure Sockets Layer (SSL) [SSL2] [SSL3] and
+   TLS [RFC2246] primarily used to protect web commerce, Pretty Good
+   Privacy (PGP) [RFC1991] and S/MIME [RFCs-2630-2634], primarily used
+   to protect and authenticate email and software distributions, the
+   Secure Shell (SSH), used for secure remote access and file transfer,
+   and IPsec [RFC2401], a general purpose encryption and authentication
+   mechanism that sits just above IP and can be used by any IP
+   application.  (IPsec can actually be used either on an end-to-end
+   basis or between security gateways that do not include either or both
+   end systems.)
+
+   Nonetheless, end-to-end security mechanisms are not used as widely as
+   might be desired.  However, the group could not reach consensus on
+   whether subnetwork designers should be actively encouraged to
+   implement mechanisms to protect user data.
+
+   The clear consensus of the working group held that subnetwork
+   security mechanisms, especially when weak or incorrectly implemented
+   [BGW01], may actually be counterproductive.  The argument is that
+   subnetwork security mechanisms can lull end users into a false sense
+   of security, diminish the incentive to deploy effective end-to-end
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 41]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   mechanisms, and encourage "risky" uses of the Internet that would not
+   be made if users understood the inherent limits of subnetwork
+   security mechanisms.
+
+   The other point of view encourages subnetwork security on the
+   principle that it is better than the default situation, which all too
+   often is no security at all.  Users of especially vulnerable subnets
+   (such as consumers who have wireless home networks and/or shared
+   media Internet access) often have control over at most one endpoint
+   -- usually a client -- and therefore cannot enforce the use of end-
+   to-end mechanisms.  However, subnet security can be entirely adequate
+   for protecting low-valued assets against the most likely threats.  In
+   any event, subnet mechanisms do not preclude the use of end-to-end
+   mechanisms, which are typically used to protect highly-valued assets.
+   This viewpoint recognizes that many security policies implicitly
+   assume that the entire end-to-end path is composed of a series of
+   concatenated links that are nominally physically secured.  That is,
+   these policies assume that all endpoints of all links are trusted,
+   and that access to the physical medium by attackers is difficult.  To
+   meet the assumptions of such policies, explicit mechanisms are needed
+   for links (especially shared medium links) that lack physical
+   protection.  This, for example, is the rationale that underlies Wired
+   Equivalent Privacy (WEP) in the IEEE 802.11 [IEEE80211] wireless LAN
+   standard, and the Baseline Privacy Interface in the DOCSIS [DOCSIS1]
+   [DOCSIS2] data over cable television networks standards.
+
+   We therefore recommend that subnetwork designers who choose to
+   implement security mechanisms to protect user data be as candid as
+   possible with the details of such security mechanisms and the
+   inherent limits of even the most secure mechanisms when implemented
+   in a subnetwork rather than on an end-to-end basis.
+
+   In keeping with the "placement principle", a clear consensus exists
+   for another subnetwork security role: the protection of the
+   subnetwork itself.  Possible threats to subnetwork assets include
+   theft of service and denial of service; shared media subnets tend to
+   be especially vulnerable to such attacks.  In some cases, mechanisms
+   that protect subnet assets can also improve (but cannot ensure) end-
+   to-end security.
+
+   One security service can be provided by the subnetwork that will aid
+   in the solution of an overall Internet problem: subnetwork security
+   should provide a mechanism to authenticate the source of a subnetwork
+   frame.  This function is missing in some current protocols, e.g., the
+   use of ARP [RFC826] to associate an IPv4 address with a MAC address.
+   The IPv6 Neighbor Discovery (ND) [RFC2461] performs a similar
+   function.
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 42]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   There are well-known security flaws with this address resolution
+   mechanism [Wilbur89].  However, the inclusion of subnetwork frame
+   source authentication will permit a secure subnetwork address.
+
+   Another potential role for subnetwork security is to protect users
+   against traffic analysis, i.e., identifying the communicating parties
+   and determining their communication patterns and volumes even when
+   their actual contents are protected by strong end-to-end security
+   mechanisms.  Lower-layer security can be more effective against
+   traffic analysis due to its inherent ability to aggregate the
+   communications of multiple parties sharing the same physical
+   facilities while obscuring higher-layer protocol information that
+   indicates specific end points, such as IP addresses and TCP/UDP port
+   numbers.
+
+   However, traffic analysis is a notoriously subtle and difficult
+   threat to understand and defeat, far more so than threats to
+   confidentiality and integrity.  We therefore urge extreme care in the
+   design of subnetwork security mechanisms specifically intended to
+   thwart traffic analysis.
+
+   Subnetwork designers must keep in mind that design and implementation
+   for security is difficult [Schneier00].  [Schneier95] describes
+   protocols and algorithms which are considered well-understood and
+   believed to be sound.
+
+   Poor design process, subtle design errors and flawed implementation
+   can result in gaping vulnerabilities.  In recent years, a number of
+   subnet standards have had problems exposed.  The following are
+   examples of mistakes that have been made:
+
+   1.  Use of weak and untested algorithms [Crypto9912] [BGW01].  For a
+       variety of reasons, algorithms were chosen which had subtle
+       flaws, making them vulnerable to a variety of attacks.
+
+   2.  Use of "security by obscurity" [Schneier00] [Crypto9912].  One
+       common mistake is to assume that keeping cryptographic algorithms
+       secret makes them more secure.  This is intuitive, but wrong.
+       Full public disclosure early in the design process attracts peer
+       review by knowledgeable cryptographers.  Exposure of flaws by
+       this review far outweighs any imagined benefit from forcing
+       attackers to reverse engineer security algorithms.
+
+   3.  Inclusion of trapdoors [Schneier00] [Crypto9912].  Trapdoors are
+       flaws surreptitiously left in an algorithm to allow it to be
+       broken.  This might be done to recover lost keys or to permit
+       surreptitious access by governmental agencies.  Trapdoors can be
+       discovered and exploited by malicious attackers.
+
+
+
+Karn, et al.             Best Current Practice                 [Page 43]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   4.  Sending passwords or other identifying information as clear text.
+       For many years, analog cellular telephones could be cloned and
+       used to steal service.  The cloners merely eavesdropped on the
+       registration protocols that exchanged everything in clear text.
+
+   5.  Keys which are common to all systems on a subnet [BGW01].
+
+   6.  Incorrect use of a sound mechanism.  For example [BGW01], one
+       subnet standard includes an initialization vector which is poorly
+       designed and poorly specified.  A determined attacker can easily
+       recover multiple ciphertexts encrypted with the same key stream
+       and perform statistical attacks to decipher them.
+
+   7.  Identifying information sent in clear text that can be resolved
+       to an individual, identifiable device.  This creates a
+       vulnerability to attacks targeted to that device (or its owner).
+
+   8.  Inability to renew and revoke shared secret information.
+
+   9.  Insufficient key length.
+
+   10. Failure to address "man-in-the-middle" attacks, e.g., with mutual
+       authentication.
+
+   11. Failure to provide a form of replay detection, e.g., to prevent a
+       receiver from accepting packets from an attacker that simply
+       resends previously captured network traffic.
+
+   12. Failure to provide integrity mechanisms when providing
+       confidentiality schemes [Bel98].
+
+   This list is by no means comprehensive.  Design problems are
+   difficult to avoid, but expert review is generally invaluable in
+   avoiding problems.
+
+   In addition, well-designed security protocols can be compromised by
+   implementation defects.  Examples of such defects include use of
+   predictable pseudo-random numbers [RFC1750], vulnerability to buffer
+   overflow attacks due to unsafe use of certain I/O system calls
+   [WFBA2000], and inadvertent exposure of secret data.
+
+19.  Contributors
+
+   This document represents a consensus of the members of the IETF
+   Performance Implications of Link Characteristics (PILC) working
+   group.
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 44]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   This document would not have been possible without the contributions
+   of a great number of people in the Performance Implications of Link
+   Characteristics Working Group.  In particular, the following people
+   provided major contributions of text, editing, and advice on this
+   document: Mark Allman provided the final editing to complete this
+   document.  Carsten Bormann provided text on robust header
+   compression.  Gorry Fairhurst provided text on broadcast and
+   multicast issues, routing,  and many valuable comments on the entire
+   document.  Aaron Falk provided text on bandwidth on demand.  Dan
+   Grossman provided text on many facets of the document.  Reiner Ludwig
+   provided thorough document review and text on TCP vs. Link-Layer
+   Retransmission.  Jamshid Mahdavi provided text on TCP performance
+   calculations.  Saverio Mascolo provided feedback on the document.
+   Gabriel Montenegro provided feedback on the document.  Marie-Jose
+   Montpetit provided text on bandwidth on demand.  Joe Touch provided
+   text on multicast, broadcast, and routing, and Lloyd Wood provided
+   many valuable comments on versions of the document.
+
+20.  Informative References
+
+   References of the form RFCnnnn are Internet Request for Comments
+   (RFC) documents available online at www.rfc-editor.org.
+
+   [802.1D]      Information Technology Telecommunications and
+                 information exchange between systems Local and
+                 metropolitan area networks, Common specifications Media
+                 access control (MAC) bridges, IEEE 802.1D, 1998.  ISO
+                 15802-3.
+
+   [802.1p]      IEEE, 802.1p, Standard for Local and Metropolitan Area
+                 Networks - Supplement to Media Access Control (MAC)
+                 Bridges: Traffic Class Expediting and Multicast.
+
+   [AP99]        Allman, M. and V. Paxson, On Estimating End-to-End
+                 Network Path Properties, In Proceedings of ACM SIGCOMM
+                 99.
+
+   [AR02]        Acar, G. and C. Rosenberg, Weighted Fair Bandwidth-on-
+                 Demand (WFBoD) for Geo-Stationary Satellite Networks
+                 with On-Board Processing, Computer Networks, 39(1),
+                 2002.
+
+   [ATMFTM]      The ATM Forum, "Traffic Management Specification,
+                 Version 4.0", April 1996, document af-tm-0056.000.
+                 http://www.atmforum.com/
+
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 45]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   [BA02]        Blanton, E. and M. Allman, On Making TCP More Robust to
+                 Packet Reordering. ACM Computer Communication Review,
+                 32(1), January 2002.
+
+   [Bel98]       Bellovin, S., "Cryptography and the Internet", in
+                 Proceedings of CRYPTO '98, August 1998.
+                 http://www.research.att.com/~smb/papers/inet-crypto.pdf
+
+   [BGW01]       Borisov, N., Goldberg, I. and D. Wagner, "Intercepting
+                 Mobile Communications: The Insecurity of 802.11," In
+                 Proceedings of ACM MobiCom, July 2001.
+
+   [BPK98]       Balakrishnan, H., Padmanabhan, V. and R. Katz.  "The
+                 Effects of Asymmetry on TCP Performance."  ACM Mobile
+                 Networks and Applications (MONET), 1998.
+
+   [BPS99]       Bennet,, J.C.R., Partridge, C. and N. Shectman, "Packet
+                 Reordering is Not Pathological Network Behavior",
+                 IEEE/ACM Transactions on Networking, Vol. 7, No. 6,
+                 December 1999.
+
+   [CGMP]        Farinacci D., Tweedly A. and T. Speakman, "Cisco Group
+                 Management Protocol (CGMP)", 1996/1997.
+                 ftp://ftpeng.cisco.com/ipmulticast/specs/cgmp.txt
+
+   [Crypto9912]  Schneier, B., "European Cellular Encryption Algorithms"
+                 Crypto-Gram, December 15, 1999.
+                 http://www.counterpane.com
+
+   [DIX82]       Digital Equipment Corp, Intel Corp, Xerox Corp,
+                 Ethernet Local Area Network Specification Version 2.0,
+                 November 1982.
+
+   [DOCSIS1]     Data-Over-Cable Service Interface Specifications, Radio
+                 Frequency Interface Specification 1.0, SP-RFI-I05-
+                 991105, November 1999, Cable Television Laboratories,
+                 Inc.
+
+   [DOCSIS2]     Data-Over-Cable Service Interface Specifications, Radio
+                 Frequency Interface Specification 1.1, SP-RFIv1.1-I05-
+                 000714, July 2000, Cable Television Laboratories, Inc.
+
+   [DOCSIS3]     Lai, W.S., "DOCSIS-Based Cable Networks: Impact of
+                 Large Data Packets on Upstream Capacity", 14th ITC
+                 Specialists Seminar on Access Networks and Systems,
+                 Barcelona, Spain, April 25-27, 2001.
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 46]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   [EN301192]    ETSI, European Broadcasting Union, Digital Video
+                 Broadcasting (DVB); DVB Specification for Data
+                 Broadcasting, European Standard (Telecommunications
+                 Series)  EN 301 192 v1.2.1(1999-06).
+
+   [ES00]        Eckhardt, D. and P. Steenkiste, "Effort-limited Fair
+                 (ELF) Scheduling for Wireless Networks, Proceedings of
+                 IEEE Infocom 2000.
+
+   [FB00]        Firoiu V. and M. Borden, "A Study of Active Queue
+                 Management for Congestion Control" to appear in Infocom
+                 2000.
+
+   [GM02]        Grieco1, L. and S. Mascolo, "TCP Westwood and Easy RED
+                 to Improve Fairness in High-Speed Networks",
+                 Proceedings of the 7th International Workshop on
+                 Protocols for High-Speed Networks, April 2002.
+
+   [IEEE8023]    IEEE 802.3 CSMA/CD Access Method.
+                 http://standards.ieee.org/
+
+   [IEEE80211]   IEEE 802.11 Wireless LAN standard.
+                 http://standards.ieee.org/
+
+   [ISO3309]     ISO/IEC 3309:1991(E), "Information Technology -
+                 Telecommunications and information exchange between
+                 systems - High-level data link control (HDLC)
+                 procedures - Frame structure", International
+                 Organization For Standardization, Fourth edition 1991-
+                 06-01.
+
+   [ISO13818]    ISO/IEC, ISO/IEC 13818-1:2000(E)  Information
+                 Technology - Generic coding of moving pictures and
+                 associated audio information:  Systems, Second edition,
+                 2000-12-01 International Organization for
+                 Standardization and International Electrotechnical
+                 Commission.
+
+   [ITU-I363]    ITU-T I.363.5 B-ISDN ATM Adaptation Layer Specification
+                 Type AAL5, International Standards Organisation (ISO),
+                 1996.
+
+   [Jac90]       Jacobson, V., Modified TCP Congestion Avoidance
+                 Algorithm.  Email to the end2end-interest mailing list,
+                 April 1990.
+                 ftp://ftp.ee.lbl.gov/email/vanj.90apr30.txt
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 47]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   [KY02]        Khafizov, F. and M. Yavuz, Running TCP Over IS-2000,
+                 Proceedings of IEEE ICC, 2002.
+
+   [LK00]        Ludwig, R. and R. H. Katz, "The Eifel Algorithm: Making
+                 TCP Robust Against Spurious Retransmissions", ACM
+                 Computer Communication Review, Vol. 30, No. 1, January
+                 2000.
+
+   [LKJK02]      Ludwig, R., Konrad, A., Joseph, A. D. and R. H. Katz,
+                 "Optimizing the End-to-End Performance of Reliable
+                 Flows over Wireless Links", Kluwer/ACM Wireless
+                 Networks Journal, Vol. 8, Nos. 2/3, pp. 289-299,
+                 March-May 2002.
+
+   [LRKOJ99]     Ludwig, R., Rathonyi, B., Konrad, A., Oden, K. and A.
+                 Joseph, Multi-Layer Tracing of TCP over a Reliable
+                 Wireless Link, pp. 144-154, In Proceedings of ACM
+                 SIGMETRICS 99.
+
+   [LS00]        Ludwig, R. and K. Sklower, The Eifel Retransmission
+                 Timer, ACM Computer Communication Review, Vol. 30, No.
+                 3, July 2000.
+
+   [MAGMA-PROXY] Fenner, B., He, H., Haberman, B. and H. Sandick,
+                 "IGMP/MLD-based Multicast Forwarding ("IGMP/MLD
+                 Proxying")", Work in Progress.
+
+   [MAGMA-SNOOP] Christensen, M., Kimball, K. and F. Solensky,
+                 "Considerations for IGMP and MLD Snooping Switches",
+                 Work in Progress.
+
+   [MBB00]       May, M., Bonald, T. and J-C. Bolot, "Analytic
+                 Evaluation of RED Performance", INFOCOM 2000.
+
+   [MBDL99]      May, M., Bolot, J., Diot, C. and B. Lyles, "Reasons not
+                 to deploy RED", Proc. of 7th. International Workshop on
+                 Quality of Service (IWQoS'99), June 1999.
+
+   [MSMO97]      Mathis, M., Semke, J., Mahdavi, J. and T. Ott, "The
+                 Macroscopic Behavior of the TCP Congestion Avoidance
+                 Algorithm", Computer Communication Review, Vol. 27,
+                 number 3, July 1997.
+
+   [MYR95]       Boden, N., Cohen, D., Felderman, R., Kulawik, A.,
+                 Seitz, C., et al.  MYRINET: A Gigabit per Second Local
+                 Area Network, IEEE-Micro, Vol. 15, No.1, February 1995,
+                 pp. 29-36.
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 48]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   [PFTK98]      Padhye, J., Firoiu, V., Towsley, D. and J. Kurose,
+                 "Modeling TCP Throughput: a Simple Model and its
+                 Empirical Validation", UMASS CMPSCI Tech Report TR98-
+                 008, Feb. 1998.
+
+   [RED93]       Floyd, S. and V. Jacobson, "Random Early Detection
+                 gateways for Congestion Avoidance", IEEE/ACM
+                 Transactions in Networking, Vol. 1 No. 4, August 1993.
+                 http://www.aciri.org/floyd/papers/red/red.html
+
+   [RF95]        Romanow, A. and S. Floyd, "Dynamics of TCP Traffic over
+                 ATM Networks".  IEEE Journal of Selected Areas in
+                 Communication, Vol.13 No.  4, May 1995, p. 633-641.
+
+   [RFC791]      Postel, J., "Internet Protocol", STD 5, RFC 791,
+                 September 1981.
+
+   [RFC793]      Postel, J., "Transmission Control Protocol", STD 7, RFC
+                 793, September 1981.
+
+   [RFC768]      Postel, J., "User Datagram Protocol", STD 6, RFC 768,
+                 August 1980.
+
+   [RFC826]      Plummer, D.C., "Ethernet Address Resolution Protocol:
+                 Or converting network protocol addresses to 48-bit
+                 Ethernet address for transmission on Ethernet
+                 hardware", STD 37, RFC 826, November 1982.
+
+   [RFC1071]     Braden, R., Borman, D. and C. Partridge, "Computing the
+                 Internet checksum", RFC 1071, September 1988.
+
+   [RFC1112]     Deering, S., "Host Extensions for IP Multicasting", STD
+                 5, RFC 1112, August 1989.
+
+   [RFC1144]     Jacobson, V., "Compressing TCP/IP Headers for Low-Speed
+                 Serial Links", RFC 1144, February 1990.
+
+   [RFC1191]     Mogul, J. and S. Deering, "Path MTU Discovery", RFC
+                 1191, November 1990.
+
+   [RFC1332]     McGregor, C., "The PPP Internet Protocol Control
+                 Protocol (IPCP)", RFC 1332, May 1992.
+
+   [RFC1435]     Knowles, S., "IESG Advice from Experience with Path MTU
+                 Discovery", RFC 1435, March 1993.
+
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 49]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   [RFC1633]     Braden, R., Clark, D. and S. Shenker, "Integrated
+                 Services in the Internet Architecture: an Overview",
+                 RFC 1633, June 1994.
+
+   [RFC1661]     Simpson, W., "The Point-to-Point Protocol (PPP)", STD
+                 51, RFC 1661, July 1994.
+
+   [RFC1662]     Simpson, W., Ed., "PPP in HDLC-like Framing", STD 51,
+                 RFC 1662, July 1994.
+
+   [RFC1750]     Eastlake 3rd, D., Crocker, S. and J. Schiller,
+                 "Randomness Recommendations for Security", RFC 1750,
+                 December 1994.
+
+   [RFC1812]     Baker, F., Ed., "Requirements for IP Version 4
+                 Routers", RFC 1812, June 1995.
+
+   [RFC1939]     Myers, J. and M. Rose, "Post Office Protocol - Version
+                 3", STD 53, RFC 1939, May 1996.
+
+   [RFC1981]     McCann, J., Deering, S. and J. Mogul, "Path MTU
+                 Discovery for IP version 6", RFC 1981, August 1996.
+
+   [RFC1991]     Atkins, D., Stallings, W. and P. Zimmermann, "PGP
+                 Message Exchange Formats", RFC 1991, August 1996.
+
+   [RFC2018]     Mathis, M., Mahdavi, J., Floyd, S. and A. Romanow, "TCP
+                 Selective Acknowledgement Options", RFC 2018, October
+                 1996.
+
+   [RFC2131]     Droms, R., "Dynamic Host Configuration Protocol", RFC
+                 2131, March 1997.
+
+   [RFC2205]     Braden, R., Ed., Zhang, L., Berson, S., Herzog, S. and
+                 S. Jamin, "Resource ReSerVation Protocol (RSVP) --
+                 Version 1 Functional Specification", RFC 2205,
+                 September 1997.
+
+   [RFC2208]     Mankin, A., Baker, F., Braden, B., Bradner, S., O`Dell,
+                 M., Romanow, A., Weinrib, A. and L. Zhang, "Resource
+                 ReSerVation Protocol (RSVP) -- Version 1 Applicability
+                 Statement Some Guidelines on Deployment", RFC 2208,
+                 September 1997.
+
+   [RFC2210]     Wroclawski, J., "The Use of RSVP with IETF Integrated
+                 Services", RFC 2210, September 1997.
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 50]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   [RFC2211]     Wroclawski, J., "Specification of the Controlled-Load
+                 Network Element Service", RFC 2211, September 1997.
+
+   [RFC2212]     Shenker, S., Partridge, C. and R. Guerin,
+                 "Specification of Guaranteed Quality of Service", RFC
+                 2212, September 1997.
+
+   [RFC2246]     Dierks, T. and C. Allen, "The TLS Protocol Version
+                 1.0", RFC 2246, January 1999.
+
+   [RFC2309]     Braden, B., Clark, D., Crowcroft, J., Davie, B.,
+                 Deering, S., Estrin, D., Floyd, S., Jacobson, V.,
+                 Minshall, G., Partridge, C., Peterson, L.,
+                 Ramakrishnan, K., Shenker, S., Wroclawski, J. and L.
+                 Zhang, "Recommendations on Queue Management and
+                 Congestion Avoidance in the Internet", RFC 2309, April
+                 1998.
+
+   [RFC2322]     van den Hout, K., Koopal, A. and R. van Mook,
+                 "Management of IP numbers by peg-dhcp", RFC 2322, 1
+                 April 1998.
+
+   [RFC2328]     Moy, J., "OSPF Version 2", STD 54, RFC 2328, April
+                 1998.
+
+   [RFC2332]     Luciani, J., Katz, D., Piscitello, D., Cole, B. and N.
+                 Doraswamy, "NBMA Next Hop Resolution Protocol (NHRP)",
+                 RFC 2332, April 1998.
+
+   [RFC2364]     Gross, G., Kaycee, M., Li, A., Malis, A. and J.
+                 Stephens, "PPP Over AAL5", RFC 2364, July 1998.
+
+   [RFC2394]     Pereira, R., "IP Payload Compression Using DEFLATE",
+                 RFC 2394, December 1998.
+
+   [RFC2395]     Friend, R. and R. Monsour, "IP Payload Compression
+                 Using LZS", RFC 2395, December 1998.
+
+   [RFC2401]     Kent, S. and R. Atkinson, "Security Architecture for
+                 the Internet Protocol", RFC 2401, November 1998.
+
+   [RFC2406]     Kent, S. and R. Atkinson, "IP Encapsulating Security
+                 Payload (ESP)", RFC 2406, November 1998.
+
+   [RFC2440]     Callas, J., Donnerhacke, L., Finney, H. and R. Thayer,
+                 "OpenPGP Message Format", RFC 2440, November 1998.
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 51]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   [RFC2460]     Deering, S. and R. Hinden, "Internet Protocol, Version
+                 6 (IPv6) Specification", RFC 2460, December 1998.
+
+   [RFC2461]     Narten, T., Nordmark, E. and W. Simpson, "Neighbor
+                 Discovery for IP Version 6 (IPv6)", RFC 2461, December
+                 1998.
+
+   [RFC2474]     Nichols, K., Blake, S., Baker, F. and D. Black,
+                 "Definition of the Differentiated Services Field (DS
+                 Field) in the IPv4 and IPv6 Headers", RFC 2474,
+                 December 1998.
+
+   [RFC2475]     Blake, S., Black, D., Carlson, M., Davies, E., Wang, Z.
+                 and W. Weiss, "An Architecture for Differentiated
+                 Services", RFC 2475, December 1998.
+
+   [RFC2507]     Degermark, M., Nordgren, B. and S. Pink, "IP Header
+                 Compression", RFC 2507, February 1999.
+
+   [RFC2508]     Casner, S. and V. Jacobson, "Compressing IP/UDP/RTP
+                 Headers for Low-Speed Serial Links", RFC 2508, February
+                 1999.
+
+   [RFC2581]     Allman, M., Paxson, V. and W. Stevens, "TCP Congestion
+                 Control", RFC 2581, April 1999.
+
+   [RFC2582]     Floyd, S. and T. Henderson, "The NewReno Modification
+                 to TCP's Fast Recovery Algorithm", RFC 2582, April
+                 1999.
+
+   [RFC2597]     Heinanen, J., Baker, F., Weiss, W. and J. Wroclawski,
+                 "Assured Forwarding PHB Group", RFC 2597, June 1999.
+
+   [RFC2616]     Fielding, R., Gettys, J., Mogul, J., Frystyk, H.,
+                 Masinter, L., Leach, P. and T. Berners-Lee, "Hypertext
+                 Transfer Protocol -- HTTP/1.1", RFC 2616, June 1999.
+
+   [RFC2630]     Housley, R., "Cryptographic Message Syntax", RFC 2630,
+                 June 1999.
+
+   [RFC2631]     Rescorla, E., "Diffie-Hellman Key Agreement Method",
+                 RFC 2631, June 1999.
+
+   [RFC2632]     Ramsdell, B., Ed., "S/MIME Version 3 Certificate
+                 Handling", RFC 2632, June 1999.
+
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 52]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   [RFC2633]     Ramsdell, B., "S/MIME Version 3 Message Specification",
+                 RFC 2633, June 1999.
+
+   [RFC2634]     Hoffman, P., "Enhanced Security Services for S/MIME",
+                 RFC 2634, June 1999.
+
+   [RFC2684]     Grossman, D. and J. Heinanen, "Multiprotocol
+                 Encapsulation over ATM Adaptation Layer 5", RFC 2684,
+                 September 1999.
+
+   [RFC2686]     Bormann, C., "The Multi-Class Extension to Multi-Link
+                 PPP", RFC 2686, September 1999.
+
+   [RFC2687]     Bormann, C., "PPP in a Real-time Oriented HDLC-like
+                 Framing", RFC 2687, September 1999.
+
+   [RFC2689]     Bormann, C., "Providing Integrated Services over Low-
+                 bitrate Links", RFC 2689, September 1999.
+
+   [RFC2710]     Deering, S., Fenner, W. and B. Haberman, "Multicast
+                 Listener Discovery (MLD) for IPv6", RFC 2710, October
+                 1999.
+
+   [RFC2784]     Farinacci, D., Li, T., Hanks, S., Meyer, D. and P.
+                 Traina, "Generic Routing Encapsulation (GRE)", RFC
+                 2784, March 2000.
+
+   [RFC2865]     Rigney, C., Willens, S., Rubens, A. and W. Simpson,
+                 "Remote Authentication Dial In User Service (RADIUS)",
+                 RFC 2865, June 2000.
+
+   [RFC2914]     Floyd, S., "Congestion Control Principles", BCP 41, RFC
+                 2914, September 2000.
+
+   [RFC2923]     Lahey, K., "TCP Problems with Path MTU Discovery", RFC
+                 2923, September 2000.
+
+   [RFC2988]     Paxson, V. and M. Allman, "Computing TCP's
+                 Retransmission Timer", RFC 2988, November 2000.
+
+   [RFC2990]     Huston, G., "Next Steps for the IP QoS Architecture",
+                 RFC 2990, November 2000.
+
+   [RFC3048]     Whetten, B., Vicisano, L., Kermode, R., Handley, M.,
+                 Floyd, S. and M. Luby, "Reliable Multicast Transport
+                 Building Blocks for One-to-Many Bulk-Data Transfer",
+                 RFC 3048, January 2001.
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 53]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   [RFC3095]     Bormann, C., Ed., Burmeister, C., Degermark, M.,
+                 Fukushima, H., Hannu, H., Jonsson, L-E., Hakenberg, R.,
+                 Koren, T., Le, K., Liu, Z., Martensson, A., Miyazaki,
+                 A., Svanbro, K., Wiebke, T., Yoshimura, T. and H.
+                 Zheng, "RObust Header Compression (ROHC):  Framework
+                 and four profiles: RTP, UDP, ESP, and uncompressed",
+                 RFC 3095, July 2001.
+
+   [RFC3096]     Degermark, M., Ed., "Requirements for robust IP/UDP/RTP
+                 header compression", RFC 3096, July 2001.
+
+   [RFC3150]     Dawkins, S., Montenegro, G., Kojo, M. and V. Magret,
+                 "End-to-end Performance Implications of Slow Links",
+                 BCP 48, RFC 3150, July 2001.
+
+   [RFC3155]     Dawkins, S., Montenegro, G., Kojo, M., Magret, V. and
+                 N. Vaidya, "End-to-end Performance Implications of
+                 Links with Errors", BCP 50, RFC 3155, August 2001.
+
+   [RFC3168]     Ramakrishnan, K., Floyd, S. and D. Black, "The Addition
+                 of Explicit Congestion Notification (ECN) to IP", RFC
+                 3168, September 2001.
+
+   [RFC3173]     Shacham, A., Monsour, B., Pereira, R. and M. Thomas,
+                 "IP Payload Compression Protocol (IPComp)", RFC 3173,
+                 September 2001.
+
+   [RFC3246]     Davie, B., Charny, A., Bennet, J.C.R., Benson, K., Le
+                 Boudec, J.Y., Courtney, W., Davari, S., Firoiu, V. and
+                 D. Stiliadis, "An Expedited Forwarding PHB (Per-Hop
+                 Behavior)", RFC 3246, March 2002.
+
+   [RFC3248]     Armitage, G., Carpenter, B., Casati, A., Crowcroft, J.,
+                 Halpern, J., Kumar, B. and J. Schnizlein, "A Delay
+                 Bound alternative revision of RFC 2598", RFC 3248,
+                 March 2002.
+
+   [RFC3344]     Perkins, C., Ed., "IP Mobility Support for IPv4", RFC
+                 3344, August 2002.
+
+   [RFC3366]     Fairhurst, G. and L. Wood, "Advice to link designers on
+                 link Automatic Repeat reQuest (ARQ)", BCP 62, RFC 3366,
+                 August 2002.
+
+
+
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 54]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   [RFC3376]     Cain, B., Deering, S., Kouvelas, I., Fenner, B. and A.
+                 Thyagarajan, "Internet Group Management Protocol,
+                 Version 3", RFC 3376, October 2002.
+
+   [RFC3449]     Balakrishnan, H., Padmanabhan, V., Fairhurst, G. and M.
+                 Sooriyabandara, "TCP Performance Implications of
+                 Network Path Asymmetry", BCP 69, RFC 3449, December
+                 2002.
+
+   [RFC3450]     Luby, M., Gemmell, J., Vicisano, L., Rizzo, L. and J.
+                 Crowcroft, "Asynchronous Layered Coding (ALC) Protocol
+                 Instantiation", RFC 3450, December 2002.
+
+   [RFC3451]     Luby, M., Gemmell, J., Vicisano, L., Rizzo, L.,
+                 Handley, M. and J. Crowcroft, "Layered Coding Transport
+                 (LCT) Building Block", RFC 3451, December 2002.
+
+   [RFC3452]     Luby, M., Vicisano, L., Gemmell, J., Rizzo, L.,
+                 Handley, M. and J. Crowcroft, "Forward Error Correction
+                 (FEC) Building Block", RFC 3452, December 2002.
+
+   [RFC3453]     Luby, M., Vicisano, L., Gemmell, J., Rizzo, L.,
+                 Handley, M. and J. Crowcroft, "The Use of Forward Error
+                 Correction (FEC) in Reliable Multicast", RFC 3453,
+                 December 2002.
+
+   [RFC3488]     Wu, I. and T. Eckert, "Cisco Systems Router-port Group
+                 Management Protocol (RGMP)", RFC 3488, February 2003.
+
+   [RFC3501]     Crispin, M., "INTERNET MESSAGE ACCESS PROTOCOL -
+                 VERSION 4rev1", RFC 3501, March 2003.
+
+   [RFC3828]     Larzon, L-A., Degermark, M., Pink, S., Jonsson, L-E.,
+                 Ed. and G. Fairhurst, Ed., "The User Datagram Protocol
+                 (UDP)-Lite Protocol", RFC 3828, June 2004.
+
+   [Schneier95]  Schneier, B., Applied Cryptography: Protocols,
+                 Algorithms and Source Code in C (John Wiley and Sons,
+                 October 1995).
+
+   [Schneier00]  Schneier, B., Secrets and Lies: Digital Security in a
+                 Networked World (John Wiley and Sons, August 2000).
+
+   [SP2000]      Stone, J. and C. Partridge, "When the CRC and TCP
+                 Checksum Disagree", ACM SIGCOMM, September 2000.
+                 http://www.acm.org/sigcomm/sigcomm2000/conf/
+                 paper/sigcomm2000-9-1.pdf
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 55]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   [SRC81]       Saltzer, J., Reed D. and D. Clark, "End-to-End
+                 Arguments in System Design".  Second International
+                 Conference on Distributed Computing Systems (April,
+                 1981) pages 509-512. Published with minor changes in
+                 ACM Transactions in Computer Systems 2, 4, November,
+                 1984, pages 277-288. Reprinted in Craig Partridge,
+                 editor Innovations in internetworking. Artech House,
+                 Norwood, MA, 1988, pages 195-206. ISBN 0-89006-337-0.
+
+   [SSL2]        Hickman, K., "The SSL Protocol", Netscape
+                 Communications Corp., Feb 9, 1995.
+
+   [SSL3]        Frier, A., Karlton, P. and P. Kocher, "The SSL 3.0
+                 Protocol", Netscape Communications Corp., Nov 18, 1996.
+
+   [TCPF98]      Lin, D. and H.T. Kung, "TCP Fast Recovery Strategies:
+                 Analysis and Improvements", IEEE Infocom, March 1998.
+                 http://www.eecs.harvard.edu/networking/papers/infocom-
+                 tcp-final-198.pdf
+
+   [WFBA2000]    Wagner, D., Foster, J., Brewer, E. and A. Aiken, "A
+                 First Step Toward Automated Detection of Buffer Overrun
+                 Vulnerabilities", Proceedings of NDSS2000.
+                 http://www.isoc.org/isoc/conferences/ndss/
+                 2000/proceedings/039.pdf
+
+   [Wilbur89]    Wilbur, Steve R., Jon Crowcroft, and Yuko Murayama.
+                 "MAC layer Security Measures in Local Area Networks",
+                 Local Area Network Security, Workshop LANSEC '89
+                 Proceedings, Springer-Verlag, April 1989, pp. 53-64.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 56]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+21. Contributors' Addresses
+
+   Aaron Falk
+   USC/Information Sciences Institute
+   4676 Admiralty Way
+   Marina Del Rey, CA 90292
+
+   Phone: 310-448-9327
+   EMail: falk@isi.edu
+
+
+   Saverio Mascolo
+   Dipartimento di Elettrotecnica ed Elettronica,
+   Politecnico di Bari Via Orabona 4, 70125 Bari, Italy
+
+   Phone: +39 080 596 3621
+   EMail: mascolo@poliba.it
+   URL: http://www-dee.poliba.it/dee-web/Personale/mascolo.html
+
+
+   Marie-Jose Montpetit
+   MJMontpetit.com
+
+   EMail: marie@mjmontpetit.com
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 57]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+22.  Authors' Addresses
+
+   Phil Karn, Editor
+   Qualcomm 5775 Morehouse Drive
+   San Diego CA 92121
+
+   Phone: 858 587 1121
+   EMail: karn@qualcomm.com
+
+
+   Carsten Bormann
+   Universitaet Bremen TZI
+   Postfach 330440
+   D-28334 Bremen, Germany
+
+   Phone: +49 421 218 7024
+   Fax:   +49 421 218 7000
+   EMail: cabo@tzi.org
+
+
+   Godred (Gorry) Fairhurst
+   Department of Engineering, University of Aberdeen,
+   Aberdeen, AB24 3UE, United Kingdom
+
+   EMail: gorry@erg.abdn.ac.uk
+   URL: http://www.erg.abdn.ac.uk/users/gorry
+
+
+   Dan Grossman
+   Motorola, Inc.
+   111 Locke Drive
+   Marlboro, MA 01752
+
+   EMail: Dan.Grossman@motorola.com
+
+
+   Reiner Ludwig
+   Ericsson Research
+   Ericsson Allee
+   1 52134 Herzogenrath, Germany
+
+   Phone: +49 2407 575 719
+   EMail: Reiner.Ludwig@ericsson.com
+
+
+
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 58]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+   Jamshid Mahdavi
+   Novell, Inc.
+
+   EMail: jmahdavi@earthlink.net
+
+
+   Gabriel Montenegro
+   Sun Microsystems Laboratories, Europe
+   180, Avenue de l'Europe
+   38334 Saint Ismier CEDEX
+   France
+
+   EMail: gab@sun.com
+
+
+   Joe Touch
+   USC/Information Sciences Institute
+   4676 Admiralty Way
+   Marina del Rey CA 90292
+
+   Phone: 310 448 9151
+   EMail: touch@isi.edu
+   URL: http://www.isi.edu/touch
+
+
+   Lloyd Wood
+   Cisco Systems
+   9 New Square Park, Bedfont Lakes
+   Feltham TW14 8HA
+   United Kingdom
+
+   Phone: +44 (0)20 8824 4236
+   EMail: lwood@cisco.com
+   URL: http://www.ee.surrey.ac.uk/Personal/L.Wood/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 59]
+
+RFC 3819        Advice for Internet Subnetwork Designers       July 2004
+
+
+23.  Full Copyright Statement
+
+   Copyright (C) The Internet Society (2004).  This document is subject
+   to the rights, licenses and restrictions contained in BCP 78, and
+   except as set forth therein, the authors retain all their rights.
+
+   This document and the information contained herein are provided on an
+   "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE
+   REPRESENTS OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE
+   INTERNET ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF
+   THE INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED
+   WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+Intellectual Property
+
+   The IETF takes no position regarding the validity or scope of any
+   Intellectual Property Rights or other rights that might be claimed
+   to pertain to the implementation or use of the technology
+   described in this document or the extent to which any license
+   under such rights might or might not be available; nor does it
+   represent that it has made any independent effort to identify any
+   such rights.  Information on the procedures with respect to
+   rights in RFC documents can be found in BCP 78 and BCP 79.
+
+   Copies of IPR disclosures made to the IETF Secretariat and any
+   assurances of licenses to be made available, or the result of an
+   attempt made to obtain a general license or permission for the use
+   of such proprietary rights by implementers or users of this
+   specification can be obtained from the IETF on-line IPR repository
+   at http://www.ietf.org/ipr.
+
+   The IETF invites any interested party to bring to its attention
+   any copyrights, patents or patent applications, or other
+   proprietary rights that may cover technology that may be required
+   to implement this standard.  Please address the information to the
+   IETF at ietf-ipr@ietf.org.
+
+Acknowledgement
+
+   Funding for the RFC Editor function is currently provided by the
+   Internet Society.
+
+
+
+
+
+
+
+
+
+Karn, et al.             Best Current Practice                 [Page 60]
+
diff --git a/ext/picotcp/RFC/rfc3927.txt b/ext/picotcp/RFC/rfc3927.txt
new file mode 100644
index 0000000..466b9eb
--- /dev/null
+++ b/ext/picotcp/RFC/rfc3927.txt
@@ -0,0 +1,1851 @@
+
+
+
+
+
+
+Network Working Group                                        S. Cheshire
+Request for Comments: 3927                                Apple Computer
+Category: Standards Track                                       B. Aboba
+                                                   Microsoft Corporation
+                                                              E. Guttman
+                                                        Sun Microsystems
+                                                                May 2005
+
+
+           Dynamic Configuration of IPv4 Link-Local Addresses
+
+Status of This Memo
+
+   This document specifies an Internet standards track protocol for the
+   Internet community, and requests discussion and suggestions for
+   improvements.  Please refer to the current edition of the "Internet
+   Official Protocol Standards" (STD 1) for the standardization state
+   and status of this protocol.  Distribution of this memo is unlimited.
+
+Copyright Notice
+
+   Copyright (C) The Internet Society (2005).
+
+
+Abstract
+
+   To participate in wide-area IP networking, a host needs to be
+   configured with IP addresses for its interfaces, either manually by
+   the user or automatically from a source on the network such as a
+   Dynamic Host Configuration Protocol (DHCP) server.  Unfortunately,
+   such address configuration information may not always be available.
+   It is therefore beneficial for a host to be able to depend on a
+   useful subset of IP networking functions even when no address
+   configuration is available.  This document describes how a host may
+   automatically configure an interface with an IPv4 address within the
+   169.254/16 prefix that is valid for communication with other devices
+   connected to the same physical (or logical) link.
+
+   IPv4 Link-Local addresses are not suitable for communication with
+   devices not directly connected to the same physical (or logical)
+   link, and are only used where stable, routable addresses are not
+   available (such as on ad hoc or isolated networks).  This document
+   does not recommend that IPv4 Link-Local addresses and routable
+   addresses be configured simultaneously on the same interface.
+
+
+
+
+
+
+
+Cheshire, et al.            Standards Track                     [Page 1]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+Table of Contents
+
+   1.  Introduction. . . . . . . . . . . . . . . . . . . . . . . . .  3
+       1.1.  Requirements. . . . . . . . . . . . . . . . . . . . . .  3
+       1.2.  Terminology . . . . . . . . . . . . . . . . . . . . . .  3
+       1.3.  Applicability . . . . . . . . . . . . . . . . . . . . .  5
+       1.4.  Application Layer Protocol Considerations . . . . . . .  6
+       1.5.  Autoconfiguration Issues. . . . . . . . . . . . . . . .  7
+       1.6.  Alternate Use Prohibition . . . . . . . . . . . . . . .  7
+       1.7.  Multiple Interfaces . . . . . . . . . . . . . . . . . .  8
+       1.8.  Communication with Routable Addresses . . . . . . . . .  8
+       1.9.  When to configure an IPv4 Link-Local Address. . . . . .  8
+   2.  Address Selection, Defense and Delivery . . . . . . . . . . .  9
+       2.1.  Link-Local Address Selection. . . . . . . . . . . . . . 10
+       2.2.  Claiming a Link-Local Address . . . . . . . . . . . . . 11
+       2.3.  Shorter Timeouts. . . . . . . . . . . . . . . . . . . . 13
+       2.4.  Announcing an Address . . . . . . . . . . . . . . . . . 13
+       2.5.  Conflict Detection and Defense. . . . . . . . . . . . . 13
+       2.6.  Address Usage and Forwarding Rules. . . . . . . . . . . 14
+       2.7.  Link-Local Packets Are Not Forwarded. . . . . . . . . . 16
+       2.8.  Link-Local Packets are Local. . . . . . . . . . . . . . 16
+       2.9.  Higher-Layer Protocol Considerations. . . . . . . . . . 17
+       2.10. Privacy Concerns. . . . . . . . . . . . . . . . . . . . 17
+       2.11. Interaction between DHCPv4 and IPv4 Link-Local
+             State Machines. . . . . . . . . . . . . . . . . . . . . 17
+   3.  Considerations for Multiple Interfaces. . . . . . . . . . . . 18
+       3.1.  Scoped Addresses. . . . . . . . . . . . . . . . . . . . 18
+       3.2.  Address Ambiguity . . . . . . . . . . . . . . . . . . . 19
+       3.3.  Interaction with Hosts with Routable Addresses. . . . . 20
+       3.4.  Unintentional Autoimmune Response . . . . . . . . . . . 21
+   4.  Healing of Network Partitions . . . . . . . . . . . . . . . . 22
+   5.  Security Considerations . . . . . . . . . . . . . . . . . . . 23
+   6.  Application Programming Considerations. . . . . . . . . . . . 24
+       6.1.  Address Changes, Failure and Recovery . . . . . . . . . 24
+       6.2.  Limited Forwarding of Locators. . . . . . . . . . . . . 24
+       6.3.  Address Ambiguity . . . . . . . . . . . . . . . . . . . 25
+   7.  Router Considerations . . . . . . . . . . . . . . . . . . . . 25
+   8.  IANA Considerations . . . . . . . . . . . . . . . . . . . . . 25
+   9.  Constants . . . . . . . . . . . . . . . . . . . . . . . . . . 26
+   10. References. . . . . . . . . . . . . . . . . . . . . . . . . . 26
+       10.1. Normative References. . . . . . . . . . . . . . . . . . 26
+       10.2. Informative References. . . . . . . . . . . . . . . . . 26
+   Acknowledgments . . . . . . . . . . . . . . . . . . . . . . . . . 27
+   Appendix A - Prior Implementations. . . . . . . . . . . . . . . . 28
+
+
+
+
+
+
+
+Cheshire, et al.            Standards Track                     [Page 2]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+1.  Introduction
+
+   As the Internet Protocol continues to grow in popularity, it becomes
+   increasingly valuable to be able to use familiar IP tools such as FTP
+   not only for global communication, but for local communication as
+   well.  For example, two people with laptop computers supporting IEEE
+   802.11 Wireless LANs [802.11] may meet and wish to exchange files.
+   It is desirable for these people to be able to use IP application
+   software without the inconvenience of having to manually configure
+   static IP addresses or set up a DHCP server [RFC2131].
+
+   This document describes a method by which a host may automatically
+   configure an interface with an IPv4 address in the 169.254/16 prefix
+   that is valid for Link-Local communication on that interface.  This
+   is especially valuable in environments where no other configuration
+   mechanism is available.  The IPv4 prefix 169.254/16 is registered
+   with the IANA for this purpose.  Allocation of IPv6 Link-Local
+   addresses is described in "IPv6 Stateless Address Autoconfiguration"
+   [RFC2462].
+
+   Link-Local communication using IPv4 Link-Local addresses is only
+   suitable for communication with other devices connected to the same
+   physical (or logical) link.  Link-Local communication using IPv4
+   Link-Local addresses is not suitable for communication with devices
+   not directly connected to the same physical (or logical) link.
+
+   Microsoft Windows 98 (and later) and Mac OS 8.5 (and later) already
+   support this capability.  This document standardizes usage,
+   prescribing rules for how IPv4 Link-Local addresses are to be treated
+   by hosts and routers.  In particular, it describes how routers are to
+   behave when receiving packets with IPv4 Link-Local addresses in the
+   source or destination address.  With respect to hosts, it discusses
+   claiming and defending addresses, maintaining Link-Local and routable
+   IPv4 addresses on the same interface, and multi-homing issues.
+
+1.1.  Requirements
+
+   The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
+   "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
+   document are to be interpreted as described in "Key words for use in
+   RFCs" [RFC2119].
+
+1.2.  Terminology
+
+   This document describes Link-Local addressing, for IPv4 communication
+   between two hosts on a single link.  A set of hosts is considered to
+   be "on the same link", if:
+
+
+
+
+Cheshire, et al.            Standards Track                     [Page 3]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+   -  when any host A from that set sends a packet to any other host B
+      in that set, using unicast, multicast, or broadcast, the entire
+      link-layer packet payload arrives unmodified, and
+
+   -  a broadcast sent over that link by any host from that set of hosts
+      can be received by every other host in that set
+
+   The link-layer *header* may be modified, such as in Token Ring Source
+   Routing [802.5], but not the link-layer *payload*.  In particular, if
+   any device forwarding a packet modifies any part of the IP header or
+   IP payload then the packet is no longer considered to be on the same
+   link.  This means that the packet may pass through devices such as
+   repeaters, bridges, hubs or switches and still be considered to be on
+   the same link for the purpose of this document, but not through a
+   device such as an IP router that decrements the TTL or otherwise
+   modifies the IP header.
+
+   This document uses the term "routable address" to refer to all valid
+   unicast IPv4 addresses outside the 169.254/16 prefix that may be
+   forwarded via routers.  This includes all global IP addresses and
+   private addresses such as Net 10/8 [RFC1918], but not loopback
+   addresses such as 127.0.0.1.
+
+   Wherever this document uses the term "host" when describing use of
+   IPv4 Link-Local addresses, the text applies equally to routers when
+   they are the source of or intended destination of packets containing
+   IPv4 Link-Local source or destination addresses.
+
+   Wherever this document uses the term "sender IP address" or "target
+   IP address" in the context of an ARP packet, it is referring to the
+   fields of the ARP packet identified in the ARP specification [RFC826]
+   as "ar$spa" (Sender Protocol Address) and "ar$tpa" (Target Protocol
+   Address) respectively.  For the usage of ARP described in this
+   document, each of these fields always contains an IP address.
+
+   In this document, the term "ARP Probe" is used to refer to an ARP
+   Request packet, broadcast on the local link, with an all-zero 'sender
+   IP address'.  The 'sender hardware address' MUST contain the hardware
+   address of the interface sending the packet.  The 'target hardware
+   address' field is ignored and SHOULD be set to all zeroes.  The
+   'target IP address' field MUST be set to the address being probed.
+
+   In this document, the term "ARP Announcement" is used to refer to an
+   ARP Request packet, broadcast on the local link, identical to the ARP
+   Probe described above, except that both the sender and target IP
+   address fields contain the IP address being announced.
+
+
+
+
+
+Cheshire, et al.            Standards Track                     [Page 4]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+   Constants are introduced in all capital letters.  Their values are
+   given in Section 9.
+
+1.3.  Applicability
+
+   This specification applies to all IEEE 802 Local Area Networks (LANs)
+   [802], including Ethernet [802.3], Token-Ring [802.5] and IEEE 802.11
+   wireless LANs [802.11], as well as to other link-layer technologies
+   that operate at data rates of at least 1 Mbps, have a round-trip
+   latency of at most one second, and support ARP [RFC826].  Wherever
+   this document uses the term "IEEE 802", the text applies equally to
+   any of these network technologies.
+
+   Link-layer technologies that support ARP but operate at rates below 1
+   Mbps or latencies above one second may need to specify different
+   values for the following parameters:
+
+   (a) the number of, and interval between, ARP probes, see PROBE_NUM,
+       PROBE_MIN, PROBE_MAX defined in Section 2.2.1
+
+   (b) the number of, and interval between, ARP announcements, see
+       ANNOUNCE_NUM and ANNOUNCE_INTERVAL defined in Section 2.4
+
+   (c) the maximum rate at which address claiming may be attempted, see
+       RATE_LIMIT_INTERVAL and MAX_CONFLICTS defined in Section 2.2.1
+
+   (d) the time interval between conflicting ARPs below which a host
+       MUST reconfigure instead of attempting to defend its address, see
+       DEFEND_INTERVAL defined in Section 2.5
+
+   Link-layer technologies that do not support ARP may be able to use
+   other techniques for determining whether a particular IP address is
+   currently in use.  However, the application of claim-and-defend
+   mechanisms to such networks is outside the scope of this document.
+
+   This specification is intended for use with small ad hoc networks --
+   a single link containing only a few hosts.  Although 65024 IPv4
+   Link-Local addresses are available in principle, attempting to use
+   all those addresses on a single link would result in a high
+   probability of address conflicts, requiring a host to take an
+   inordinate amount of time to find an available address.
+
+   Network operators with more than 1300 hosts on a single link may want
+   to consider dividing that single link into two or more subnets.  A
+   host connecting to a link that already has 1300 hosts, selecting an
+   IPv4 Link-Local address at random, has a 98% chance of selecting an
+   unused IPv4 Link-Local address on the first try.  A host has a 99.96%
+
+
+
+
+Cheshire, et al.            Standards Track                     [Page 5]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+   chance of selecting an unused IPv4 Link-Local address within two
+   tries.  The probability that it will have to try more than ten times
+   is about 1 in 10^17.
+
+1.4.  Application Layer Protocol Considerations
+
+   IPv4 Link-Local addresses and their dynamic configuration have
+   profound implications upon applications which use them.  This is
+   discussed in Section 6.  Many applications fundamentally assume that
+   addresses of communicating peers are routable, relatively unchanging
+   and unique.  These assumptions no longer hold with IPv4 Link-Local
+   addresses, or a mixture of Link-Local and routable IPv4 addresses.
+
+   Therefore while many applications will work properly with IPv4 Link-
+   Local addresses, or a mixture of Link-Local and routable IPv4
+   addresses, others may do so only after modification, or will exhibit
+   reduced or partial functionality.
+
+   In some cases it may be infeasible for the application to be modified
+   to operate under such conditions.
+
+   IPv4 Link-Local addresses should therefore only be used where stable,
+   routable addresses are not available (such as on ad hoc or isolated
+   networks) or in controlled situations where these limitations and
+   their impact on applications are understood and accepted.  This
+   document does not recommend that IPv4 Link-Local addresses and
+   routable addresses be configured simultaneously on the same
+   interface.
+
+   Use of IPv4 Link-Local addresses in off-link communication is likely
+   to cause application failures.  This can occur within any application
+   that includes embedded addresses, if an IPv4 Link-Local address is
+   embedded when communicating with a host that is not on the link.
+   Examples of applications that embed addresses include IPsec, Kerberos
+   4/5, FTP, RSVP, SMTP, SIP, X-Windows/Xterm/Telnet, Real Audio, H.323,
+   and SNMP [RFC3027].
+
+   To preclude use of IPv4 Link-Local addresses in off-link
+   communication, the following cautionary measures are advised:
+
+   a. IPv4 Link-Local addresses MUST NOT be configured in the DNS.
+      Mapping from IPv4 addresses to host names is conventionally done
+      by issuing DNS queries for names of the form,
+      "x.x.x.x.in-addr.arpa."  When used for link-local addresses, which
+      have significance only on the local link, it is inappropriate to
+      send such DNS queries beyond the local link.  DNS clients MUST NOT
+      send DNS queries for any name that falls within the
+      "254.169.in-addr.arpa." domain.
+
+
+
+Cheshire, et al.            Standards Track                     [Page 6]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+      DNS recursive name servers receiving queries from non-compliant
+      clients for names within the "254.169.in-addr.arpa." domain MUST
+      by default return RCODE 3, authoritatively asserting that no such
+      name exists in the Domain Name System.
+
+   b. Names that are globally resolvable to routable addresses should be
+      used within applications whenever they are available.  Names that
+      are resolvable only on the local link (such as through use of
+      protocols such as Link Local Multicast Name Resolution [LLMNR])
+      MUST NOT be used in off-link communication.  IPv4 addresses and
+      names that can only be resolved on the local link SHOULD NOT be
+      forwarded beyond the local link.  IPv4 Link-Local addresses SHOULD
+      only be sent when a Link-Local address is used as the source
+      and/or destination address.  This strong advice should hinder
+      limited scope addresses and names from leaving the context in
+      which they apply.
+
+   c. If names resolvable to globally routable addresses are not
+      available, but the globally routable addresses are, they should be
+      used instead of IPv4 Link-Local addresses.
+
+1.5.  Autoconfiguration Issues
+
+   Implementations of IPv4 Link-Local address autoconfiguration MUST
+   expect address conflicts, and MUST be prepared to handle them
+   gracefully by automatically selecting a new address whenever a
+   conflict is detected, as described in Section 2.  This requirement to
+   detect and handle address conflicts applies during the entire period
+   that a host is using a 169.254/16 IPv4 Link-Local address, not just
+   during initial interface configuration.  For example, address
+   conflicts can occur well after a host has completed booting if two
+   previously separate networks are joined, as described in Section 4.
+
+1.6.  Alternate Use Prohibition
+
+   Note that addresses in the 169.254/16 prefix SHOULD NOT be configured
+   manually or by a DHCP server.  Manual or DHCP configuration may cause
+   a host to use an address in the 169.254/16 prefix without following
+   the special rules regarding duplicate detection and automatic
+   configuration that pertain to addresses in this prefix.  While the
+   DHCP specification [RFC2131] indicates that a DHCP client SHOULD
+   probe a newly received address with ARP, this is not mandatory.
+   Similarly, while the DHCP specification recommends that a DHCP server
+   SHOULD probe an address using an ICMP Echo Request before allocating
+   it, this is also not mandatory, and even if the server does this,
+   IPv4 Link-Local addresses are not routable, so a DHCP server not
+   directly connected to a link cannot detect whether a host on that
+   link is already using the desired IPv4 Link-Local address.
+
+
+
+Cheshire, et al.            Standards Track                     [Page 7]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+   Administrators wishing to configure their own local addresses (using
+   manual configuration, a DHCP server, or any other mechanism not
+   described in this document) should use one of the existing private
+   address prefixes [RFC1918], not the 169.254/16 prefix.
+
+1.7.  Multiple Interfaces
+
+   Additional considerations apply to hosts that support more than one
+   active interface where one or more of these interfaces support IPv4
+   Link-Local address configuration.  These considerations are discussed
+   in Section 3.
+
+1.8.  Communication with Routable Addresses
+
+   There will be cases when devices with a configured Link-Local address
+   will need to communicate with a device with a routable address
+   configured on the same physical link, and vice versa.  The rules in
+   Section 2.6 allow this communication.
+
+   This allows, for example, a laptop computer with only a routable
+   address to communicate with web servers world-wide using its
+   globally-routable address while at the same time printing those web
+   pages on a local printer that has only an IPv4 Link-Local address.
+
+1.9.  When to configure an IPv4 Link-Local address
+
+   Having addresses of multiple different scopes assigned to an
+   interface, with no adequate way to determine in what circumstances
+   each address should be used, leads to complexity for applications and
+   confusion for users.  A host with an address on a link can
+   communicate with all other devices on that link, whether those
+   devices use Link-Local addresses, or routable addresses.  For these
+   reasons, a host SHOULD NOT have both an operable routable address and
+   an IPv4 Link-Local address configured on the same interface.  The
+   term "operable address" is used to mean an address which works
+   effectively for communication in the current network context (see
+   below).  When an operable routable address is available on an
+   interface, the host SHOULD NOT also assign an IPv4 Link-Local address
+   on that interface.  However, during the transition (in either
+   direction) between using routable and IPv4 Link-Local addresses both
+   MAY be in use at once subject to these rules:
+
+      1. The assignment of an IPv4 Link-Local address on an interface is
+         based solely on the state of the interface, and is independent
+         of any other protocols such as DHCP.  A host MUST NOT alter its
+         behavior and use of other protocols such as DHCP because the
+         host has assigned an IPv4 Link-Local address to an interface.
+
+
+
+
+Cheshire, et al.            Standards Track                     [Page 8]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+      2. If a host finds that an interface that was previously
+         configured with an IPv4 Link-Local address now has an operable
+         routable address available, the host MUST use the routable
+         address when initiating new communications, and MUST cease
+         advertising the availability of the IPv4 Link-Local address
+         through whatever mechanisms that address had been made known to
+         others.  The host SHOULD continue to use the IPv4 Link-Local
+         address for communications already underway, and MAY continue
+         to accept new communications addressed to the IPv4 Link-Local
+         address.  Ways in which an operable routable address might
+         become available on an interface include:
+
+               * Manual configuration
+               * Address assignment through DHCP
+               * Roaming of the host to a network on which a previously
+                 assigned address becomes operable
+
+      3. If a host finds that an interface no longer has an operable
+         routable address available, the host MAY identify a usable IPv4
+         Link-Local address (as described in section 2) and assign that
+         address to the interface.  Ways in which an operable routable
+         address might cease to be available on an interface include:
+
+               * Removal of the address from the interface through
+                 manual configuration
+               * Expiration of the lease on the address assigned through
+                 DHCP
+               * Roaming of the host to a new network on which the
+                 address is no longer operable.
+
+   The determination by the system of whether an address is "operable"
+   is not clear cut and many changes in the system context (e.g.,
+   router changes) may affect the operability of an address.  In
+   particular roaming of a host from one network to another is likely --
+   but not certain -- to change the operability of a configured address
+   but detecting such a move is not always trivial.
+
+   "Detection of Network Attachment (DNA) in IPv4" [DNAv4] provides
+   further discussion of address assignment and operability
+   determination.
+
+2.  Address Selection, Defense and Delivery
+
+   The following section explains the IPv4 Link-Local address selection
+   algorithm, how IPv4 Link-Local addresses are defended, and how IPv4
+   packets with IPv4 Link-Local addresses are delivered.
+
+
+
+
+
+Cheshire, et al.            Standards Track                     [Page 9]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+   Windows and Mac OS hosts that already implement Link-Local IPv4
+   address auto-configuration are compatible with the rules presented in
+   this section.  However, should any interoperability problem be
+   discovered, this document, not any prior implementation, defines the
+   standard.
+
+2.1.  Link-Local Address Selection
+
+   When a host wishes to configure an IPv4 Link-Local address, it
+   selects an address using a pseudo-random number generator with a
+   uniform distribution in the range from 169.254.1.0 to 169.254.254.255
+   inclusive.
+
+   The IPv4 prefix 169.254/16 is registered with the IANA for this
+   purpose.  The first 256 and last 256 addresses in the 169.254/16
+   prefix are reserved for future use and MUST NOT be selected by a host
+   using this dynamic configuration mechanism.
+
+   The pseudo-random number generation algorithm MUST be chosen so that
+   different hosts do not generate the same sequence of numbers.  If the
+   host has access to persistent information that is different for each
+   host, such as its IEEE 802 MAC address, then the pseudo-random number
+   generator SHOULD be seeded using a value derived from this
+   information.  This means that even without using any other persistent
+   storage, a host will usually select the same IPv4 Link-Local address
+   each time it is booted, which can be convenient for debugging and
+   other operational reasons.  Seeding the pseudo-random number
+   generator using the real-time clock or any other information which is
+   (or may be) identical in every host is NOT suitable for this purpose,
+   because a group of hosts that are all powered on at the same time
+   might then all generate the same sequence, resulting in a never-
+   ending series of conflicts as the hosts move in lock-step through
+   exactly the same pseudo-random sequence, conflicting on every address
+   they probe.
+
+   Hosts that are equipped with persistent storage MAY, for each
+   interface, record the IPv4 address they have selected.  On booting,
+   hosts with a previously recorded address SHOULD use that address as
+   their first candidate when probing.  This increases the stability of
+   addresses.  For example, if a group of hosts are powered off at
+   night, then when they are powered on the next morning they will all
+   resume using the same addresses, instead of picking different
+   addresses and potentially having to resolve conflicts that arise.
+
+
+
+
+
+
+
+
+Cheshire, et al.            Standards Track                    [Page 10]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+2.2.  Claiming a Link-Local Address
+
+   After it has selected an IPv4 Link-Local address, a host MUST test to
+   see if the IPv4 Link-Local address is already in use before beginning
+   to use it.  When a network interface transitions from an inactive to
+   an active state, the host does not have knowledge of what IPv4 Link-
+   Local addresses may currently be in use on that link, since the point
+   of attachment may have changed or the network interface may have been
+   inactive when a conflicting address was claimed.
+
+   Were the host to immediately begin using an IPv4 Link-Local address
+   which is already in use by another host, this would be disruptive to
+   that other host.  Since it is possible that the host has changed its
+   point of attachment, a routable address may be obtainable on the new
+   network, and therefore it cannot be assumed that an IPv4 Link-Local
+   address is to be preferred.
+
+   Before using the IPv4 Link-Local address (e.g., using it as the
+   source address in an IPv4 packet, or as the Sender IPv4 address in an
+   ARP packet) a host MUST perform the probing test described below to
+   achieve better confidence that using the IPv4 Link-Local address will
+   not cause disruption.
+
+   Examples of events that involve an interface becoming active include:
+
+      Reboot/startup
+      Wake from sleep (if network interface was inactive during sleep)
+      Bringing up previously inactive network interface
+      IEEE 802 hardware link-state change (appropriate for the
+           media type and security mechanisms which apply) indicates
+           that an interface has become active.
+      Association with a wireless base station or ad hoc network.
+
+   A host MUST NOT perform this check periodically as a matter of
+   course.  This would be a waste of network bandwidth, and is
+   unnecessary due to the ability of hosts to passively discover
+   conflicts, as described in Section 2.5.
+
+2.2.1.  Probe details
+
+   On a link-layer such as IEEE 802 that supports ARP, conflict
+   detection is done using ARP probes.  On link-layer technologies that
+   do not support ARP other techniques may be available for determining
+   whether a particular IPv4 address is currently in use.  However, the
+   application of claim-and-defend mechanisms to such networks is
+   outside the scope of this document.
+
+
+
+
+
+Cheshire, et al.            Standards Track                    [Page 11]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+   A host probes to see if an address is already in use by broadcasting
+   an ARP Request for the desired address.  The client MUST fill in the
+   'sender hardware address' field of the ARP Request with the hardware
+   address of the interface through which it is sending the packet.  The
+   'sender IP address' field MUST be set to all zeroes, to avoid
+   polluting ARP caches in other hosts on the same link in the case
+   where the address turns out to be already in use by another host.
+   The 'target hardware address' field is ignored and SHOULD be set to
+   all zeroes.  The 'target IP address' field MUST be set to the address
+   being probed.  An ARP Request constructed this way with an all-zero
+   'sender IP address' is referred to as an "ARP Probe".
+
+   When ready to begin probing, the host should then wait for a random
+   time interval selected uniformly in the range zero to PROBE_WAIT
+   seconds, and should then send PROBE_NUM probe packets, each of these
+   probe packets spaced randomly, PROBE_MIN to PROBE_MAX seconds apart.
+   If during this period, from the beginning of the probing process
+   until ANNOUNCE_WAIT seconds after the last probe packet is sent, the
+   host receives any ARP packet (Request *or* Reply) on the interface
+   where the probe is being performed where the packet's 'sender IP
+   address' is the address being probed for, then the host MUST treat
+   this address as being in use by some other host, and MUST select a
+   new pseudo-random address and repeat the process.  In addition, if
+   during this period the host receives any ARP Probe where the packet's
+   'target IP address' is the address being probed for, and the packet's
+   'sender hardware address' is not the hardware address of the
+   interface the host is attempting to configure, then the host MUST
+   similarly treat this as an address conflict and select a new address
+   as above.  This can occur if two (or more) hosts attempt to configure
+   the same IPv4 Link-Local address at the same time.
+
+   A host should maintain a counter of the number of address conflicts
+   it has experienced in the process of trying to acquire an address,
+   and if the number of conflicts exceeds MAX_CONFLICTS then the host
+   MUST limit the rate at which it probes for new addresses to no more
+   than one new address per RATE_LIMIT_INTERVAL.  This is to prevent
+   catastrophic ARP storms in pathological failure cases, such as a
+   rogue host that answers all ARP probes, causing legitimate hosts to
+   go into an infinite loop attempting to select a usable address.
+
+   If, by ANNOUNCE_WAIT seconds after the transmission of the last ARP
+   Probe no conflicting ARP Reply or ARP Probe has been received, then
+   the host has successfully claimed the desired IPv4 Link-Local
+   address.
+
+
+
+
+
+
+
+Cheshire, et al.            Standards Track                    [Page 12]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+2.3.  Shorter Timeouts
+
+   Network technologies may emerge for which shorter delays are
+   appropriate than those required by this document.  A subsequent IETF
+   publication may be produced providing guidelines for different values
+   for PROBE_WAIT, PROBE_NUM, PROBE_MIN and PROBE_MAX on those
+   technologies.
+
+2.4.  Announcing an Address
+
+   Having probed to determine a unique address to use, the host MUST
+   then announce its claimed address by broadcasting ANNOUNCE_NUM ARP
+   announcements, spaced ANNOUNCE_INTERVAL seconds apart.  An ARP
+   announcement is identical to the ARP Probe described above, except
+   that now the sender and target IP addresses are both set to the
+   host's newly selected IPv4 address.  The purpose of these ARP
+   announcements is to make sure that other hosts on the link do not
+   have stale ARP cache entries left over from some other host that may
+   previously have been using the same address.
+
+2.5.  Conflict Detection and Defense
+
+   Address conflict detection is not limited to the address selection
+   phase, when a host is sending ARP probes.  Address conflict detection
+   is an ongoing process that is in effect for as long as a host is
+   using an IPv4 Link-Local address.  At any time, if a host receives an
+   ARP packet (request *or* reply) on an interface where the 'sender IP
+   address' is the IP address the host has configured for that
+   interface, but the 'sender hardware address' does not match the
+   hardware address of that interface, then this is a conflicting ARP
+   packet, indicating an address conflict.
+
+   A host MUST respond to a conflicting ARP packet as described in
+   either (a) or (b) below:
+
+   (a) Upon receiving a conflicting ARP packet, a host MAY elect to
+   immediately configure a new IPv4 Link-Local address as described
+   above, or
+
+   (b) If a host currently has active TCP connections or other reasons
+   to prefer to keep the same IPv4 address, and it has not seen any
+   other conflicting ARP packets within the last DEFEND_INTERVAL
+   seconds, then it MAY elect to attempt to defend its address by
+   recording the time that the conflicting ARP packet was received, and
+   then broadcasting one single ARP announcement, giving its own IP and
+   hardware addresses as the sender addresses of the ARP.  Having done
+   this, the host can then continue to use the address normally without
+   any further special action.  However, if this is not the first
+
+
+
+Cheshire, et al.            Standards Track                    [Page 13]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+   conflicting ARP packet the host has seen, and the time recorded for
+   the previous conflicting ARP packet is recent, within DEFEND_INTERVAL
+   seconds, then the host MUST immediately cease using this address and
+   configure a new IPv4 Link-Local address as described above.  This is
+   necessary to ensure that two hosts do not get stuck in an endless
+   loop with both hosts trying to defend the same address.
+
+   A host MUST respond to conflicting ARP packets as described in either
+   (a) or (b) above.  A host MUST NOT ignore conflicting ARP packets.
+
+   Forced address reconfiguration may be disruptive, causing TCP
+   connections to be broken.  However, it is expected that such
+   disruptions will be rare, and if inadvertent address duplication
+   happens, then disruption of communication is inevitable, no matter
+   how the addresses were assigned.  It is not possible for two
+   different hosts using the same IP address on the same network to
+   operate reliably.
+
+   Before abandoning an address due to a conflict, hosts SHOULD actively
+   attempt to reset any existing connections using that address.  This
+   mitigates some security threats posed by address reconfiguration, as
+   discussed in Section 5.
+
+   Immediately configuring a new address as soon as the conflict is
+   detected is the best way to restore useful communication as quickly
+   as possible.  The mechanism described above of broadcasting a single
+   ARP announcement to defend the address mitigates the problem
+   somewhat, by helping to improve the chance that one of the two
+   conflicting hosts may be able to retain its address.
+
+   All ARP packets (*replies* as well as requests) that contain a Link-
+   Local 'sender IP address' MUST be sent using link-layer broadcast
+   instead of link-layer unicast.  This aids timely detection of
+   duplicate addresses.  An example illustrating how this helps is given
+   in Section 4.
+
+2.6.  Address Usage and Forwarding Rules
+
+   A host implementing this specification has additional rules to
+   conform to, whether or not it has an interface configured with an
+   IPv4 Link-Local address.
+
+2.6.1.  Source Address Usage
+
+   Since each interface on a host may have an IPv4 Link-Local address in
+   addition to zero or more other addresses configured by other means
+   (e.g., manually or via a DHCP server), a host may have to make a
+
+
+
+
+Cheshire, et al.            Standards Track                    [Page 14]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+   choice about what source address to use when it sends a packet or
+   initiates a TCP connection.
+
+   Where both an IPv4 Link-Local and a routable address are available on
+   the same interface, the routable address should be preferred as the
+   source address for new communications, but packets sent from or to
+   the IPv4 Link-Local address are still delivered as expected.  The
+   IPv4 Link-Local address may continue to be used as a source address
+   in communications where switching to a preferred address would cause
+   communications failure because of the requirements of an upper-layer
+   protocol (e.g., an existing TCP connection).  For more details, see
+   Section 1.7.
+
+   A multi-homed host needs to select an outgoing interface whether or
+   not the destination is an IPv4 Link-Local address.  Details of that
+   process are beyond the scope of this specification.  After selecting
+   an interface, the multi-homed host should send packets involving IPv4
+   Link-Local addresses as specified in this document, as if the
+   selected interface were the host's only interface.  See Section 3 for
+   further discussion of multi-homed hosts.
+
+2.6.2.  Forwarding Rules
+
+   Whichever interface is used, if the destination address is in the
+   169.254/16 prefix (excluding the address 169.254.255.255, which is
+   the broadcast address for the Link-Local prefix), then the sender
+   MUST ARP for the destination address and then send its packet
+   directly to the destination on the same physical link.  This MUST be
+   done whether the interface is configured with a Link-Local or a
+   routable IPv4 address.
+
+   In many network stacks, achieving this functionality may be as simple
+   as adding a routing table entry indicating that 169.254/16 is
+   directly reachable on the local link.  This approach will not work
+   for routers or multi-homed hosts.  Refer to section 3 for more
+   discussion of multi-homed hosts.
+
+   The host MUST NOT send a packet with an IPv4 Link-Local destination
+   address to any router for forwarding.
+
+   If the destination address is a unicast address outside the
+   169.254/16 prefix, then the host SHOULD use an appropriate routable
+   IPv4 source address, if it can.  If for any reason the host chooses
+   to send the packet with an IPv4 Link-Local source address (e.g., no
+   routable address is available on the selected interface), then it
+   MUST ARP for the destination address and then send its packet, with
+
+
+
+
+
+Cheshire, et al.            Standards Track                    [Page 15]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+   an IPv4 Link-Local source address and a routable destination IPv4
+   address, directly to its destination on the same physical link.  The
+   host MUST NOT send the packet to any router for forwarding.
+
+   In the case of a device with a single interface and only an Link-
+   Local IPv4 address, this requirement can be paraphrased as "ARP for
+   everything".
+
+   In many network stacks, achieving this "ARP for everything" behavior
+   may be as simple as having no primary IP router configured, having
+   the primary IP router address configured to 0.0.0.0, or having the
+   primary IP router address set to be the same as the host's own Link-
+   Local IPv4 address.  For suggested behavior in multi-homed hosts, see
+   Section 3.
+
+2.7.  Link-Local Packets Are Not Forwarded
+
+   A sensible default for applications which are sending from an IPv4
+   Link-Local address is to explicitly set the IPv4 TTL to 1.  This is
+   not appropriate in all cases as some applications may require that
+   the IPv4 TTL be set to other values.
+
+   An IPv4 packet whose source and/or destination address is in the
+   169.254/16 prefix MUST NOT be sent to any router for forwarding, and
+   any network device receiving such a packet MUST NOT forward it,
+   regardless of the TTL in the IPv4 header.  Similarly, a router or
+   other host MUST NOT indiscriminately answer all ARP Requests for
+   addresses in the 169.254/16 prefix.  A router may of course answer
+   ARP Requests for one or more IPv4 Link-Local address(es) that it has
+   legitimately claimed for its own use according to the claim-and-
+   defend protocol described in this document.
+
+   This restriction also applies to multicast packets.  IPv4 packets
+   with a Link-Local source address MUST NOT be forwarded outside the
+   local link even if they have a multicast destination address.
+
+2.8.  Link-Local Packets are Local
+
+   The non-forwarding rule means that hosts may assume that all
+   169.254/16 destination addresses are "on-link" and directly
+   reachable.  The 169.254/16 address prefix MUST NOT be subnetted.
+   This specification utilizes ARP-based address conflict detection,
+   which functions by broadcasting on the local subnet.  Since such
+   broadcasts are not forwarded, were subnetting to be allowed then
+   address conflicts could remain undetected.
+
+
+
+
+
+
+Cheshire, et al.            Standards Track                    [Page 16]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+   This does not mean that Link-Local devices are forbidden from any
+   communication outside the local link.  IP hosts that implement both
+   Link-Local and conventional routable IPv4 addresses may still use
+   their routable addresses without restriction as they do today.
+
+2.9.  Higher-Layer Protocol Considerations
+
+   Similar considerations apply at layers above IP.
+
+   For example, designers of Web pages (including automatically
+   generated web pages) SHOULD NOT contain links with embedded IPv4
+   Link-Local addresses if those pages are viewable from hosts outside
+   the local link where the addresses are valid.
+
+   As IPv4 Link-Local addresses may change at any time and have limited
+   scope, IPv4 Link-Local addresses MUST NOT be stored in the DNS.
+
+2.10.  Privacy Concerns
+
+   Another reason to restrict leakage of IPv4 Link-Local addresses
+   outside the local link is privacy concerns.  If IPv4 Link-Local
+   addresses are derived from a hash of the MAC address, some argue that
+   they could be indirectly associated with an individual, and thereby
+   used to track that individual's activities.  Within the local link
+   the hardware addresses in the packets are all directly observable, so
+   as long as IPv4 Link-Local addresses don't leave the local link they
+   provide no more information to an intruder than could be gained by
+   direct observation of hardware addresses.
+
+2.11.  Interaction between DHCPv4 client and IPv4 Link-Local State
+       Machines
+
+   As documented in Appendix A, early implementations of IPv4 Link-Local
+   have modified the DHCP state machine.  Field experience shows that
+   these modifications reduce the reliability of the DHCP service.
+
+   A device that implements both IPv4 Link-Local and a DHCPv4 client
+   should not alter the behavior of the DHCPv4 client to accommodate
+   IPv4 Link-Local configuration.  In particular configuration of an
+   IPv4 Link-Local address, whether or not a DHCP server is currently
+   responding, is not sufficient reason to unconfigure a valid DHCP
+   lease, to stop the DHCP client from attempting to acquire a new IP
+   address, to change DHCP timeouts or to change the behavior of the
+   DHCP state machine in any other way.
+
+   Further discussion of this issue is provided in "Detection of Network
+   Attachment (DNA) in IPv4" [DNAv4].
+
+
+
+
+Cheshire, et al.            Standards Track                    [Page 17]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+3.  Considerations for Multiple Interfaces
+
+   The considerations outlined here also apply whenever a host has
+   multiple IP addresses, whether or not it has multiple physical
+   interfaces.  Other examples of multiple interfaces include different
+   logical endpoints (tunnels, virtual private networks etc.) and
+   multiple logical networks on the same physical medium.  This is often
+   referred to as "multi-homing".
+
+   Hosts which have more than one active interface and elect to
+   implement dynamic configuration of IPv4 Link-Local addresses on one
+   or more of those interfaces will face various problems.  This section
+   lists these problems but does no more than indicate how one might
+   solve them.  At the time of this writing, there is no silver bullet
+   which solves these problems in all cases, in a general way.
+   Implementors must think through these issues before implementing the
+   protocol specified in this document on a system which may have more
+   than one active interface as part of a TCP/IP stack capable of
+   multi-homing.
+
+3.1.  Scoped Addresses
+
+   A host may be attached to more than one network at the same time.  It
+   would be nice if there was a single address space used in every
+   network, but this is not the case.  Addresses used in one network, be
+   it a network behind a NAT or a link on which IPv4 Link-Local
+   addresses are used, cannot be used in another network and have the
+   same effect.
+
+   It would also be nice if addresses were not exposed to applications,
+   but they are.  Most software using TCP/IP which await messages
+   receives from any interface at a particular port number, for a
+   particular transport protocol.  Applications are generally only aware
+   (and care) that they have received a message.  The application knows
+   the address of the sender to which the application will reply.
+
+   The first scoped address problem is source address selection.  A
+   multi-homed host has more than one address.  Which address should be
+   used as the source address when sending to a particular destination?
+   This question is usually answered by referring to a routing table,
+   which expresses on which interface (with which address) to send, and
+   how to send (should one forward to a router, or send directly).  The
+   choice is made complicated by scoped addresses because the address
+   range in which the destination lies may be ambiguous.  The table may
+   not be able to yield a good answer.  This problem is bound up with
+   next-hop selection, which is discussed in Section 3.2.
+
+
+
+
+
+Cheshire, et al.            Standards Track                    [Page 18]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+   The second scoped address problem arises from scoped parameters
+   leaking outside their scope.  This is discussed in Section 7.
+
+   It is possible to overcome these problems.  One way is to expose
+   scope information to applications such that they are always aware of
+   what scope a peer is in.  This way, the correct interface could be
+   selected, and a safe procedure could be followed with respect to
+   forwarding addresses and other scoped parameters.  There are other
+   possible approaches.  None of these methods have been standardized
+   for IPv4 nor are they specified in this document.  A good API design
+   could mitigate the problems, either by exposing address scopes to
+   'scoped-address aware' applications or by cleverly encapsulating the
+   scoping information and logic so that applications do the right thing
+   without being aware of address scoping.
+
+   An implementer could undertake to solve these problems, but cannot
+   simply ignore them.  With sufficient experience, it is hoped that
+   specifications will emerge explaining how to overcome scoped address
+   multi-homing problems.
+
+3.2.  Address Ambiguity
+
+   This is a core problem with respect to IPv4 Link-Local destination
+   addresses being reachable on more than one interface.  What should a
+   host do when it needs to send to Link-Local destination L and L can
+   be resolved using ARP on more than one link?
+
+   Even if a Link-Local address can be resolved on only one link at a
+   given moment, there is no guarantee that it will remain unambiguous
+   in the future.  Additional hosts on other interfaces may claim the
+   address L as well.
+
+   One possibility is to support this only in the case where the
+   application specifically expresses which interface to send from.
+
+   There is no standard or obvious solution to this problem.  Existing
+   application software written for the IPv4 protocol suite is largely
+   incapable of dealing with address ambiguity.  This does not preclude
+   an implementer from finding a solution, writing applications which
+   are able to use it, and providing a host which can support dynamic
+   configuration of IPv4 Link-Local addresses on more than one
+   interface.  This solution will almost surely not be generally
+   applicable to existing software and transparent to higher layers,
+   however.
+
+   Given that the IP stack must have the outbound interface associated
+   with a packet that needs to be sent to a Link-Local destination
+   address, interface selection must occur.  The outbound interface
+
+
+
+Cheshire, et al.            Standards Track                    [Page 19]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+   cannot be derived from the packet's header parameters such as source
+   or destination address (e.g., by using the forwarding table lookup).
+   Therefore, outbound interface association must be done explicitly
+   through other means.  The specification does not stipulate those
+   means.
+
+3.3.  Interaction with Hosts with Routable Addresses
+
+   Attention is paid in this specification to transition from the use of
+   IPv4 Link-Local addresses to routable addresses (see Section 1.5).
+   The intention is to allow a host with a single interface to first
+   support Link-Local configuration then gracefully transition to the
+   use of a routable address.  Since the host transitioning to the use
+   of a routable address may temporarily have more than one address
+   active, the scoped address issues described in Section 3.1 will
+   apply.  When a host acquires a routable address, it does not need to
+   retain its Link-Local address for the purpose of communicating with
+   other devices on the link that are themselves using only Link-Local
+   addresses: any host conforming to this specification knows that
+   regardless of source address an IPv4 Link-Local destination must be
+   reached by forwarding directly to the destination, not via a router;
+   it is not necessary for that host to have a Link-Local source address
+   in order to send to a Link-Local destination address.
+
+   A host with an IPv4 Link-Local address may send to a destination
+   which does not have an IPv4 Link-Local address.  If the host is not
+   multi-homed, the procedure is simple and unambiguous: Using ARP and
+   forwarding directly to on-link destinations is the default route.  If
+   the host is multi-homed, however, the routing policy is more complex,
+   especially if one of the interfaces is configured with a routable
+   address and the default route is (sensibly) directed at a router
+   accessible through that interface.  The following example illustrates
+   this problem and provides a common solution to it.
+
+                         i1 +---------+ i2   i3 +-------+
+               ROUTER-------=  HOST1  =---------= HOST2 |
+                      link1 +---------+  link2  +-------+
+
+   In the figure above, HOST1 is connected to link1 and link2.
+   Interface i1 is configured with a routable address, while i2 is an
+   IPv4 Link-Local address.  HOST1 has its default route set to ROUTER's
+   address, through i1.  HOST1 will route to destinations in 169.254/16
+   to i2, sending directly to the destination.
+
+   HOST2 has a configured (non-Link-Local) IPv4 address assigned to i3.
+
+
+
+
+
+
+Cheshire, et al.            Standards Track                    [Page 20]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+   Using a name resolution or service discovery protocol HOST1 can
+   discover HOST2's address.  Since HOST2's address is not in
+   169.254/16, HOST1's routing policy will send datagrams to HOST2 via
+   i1, to the ROUTER.  Unless there is a route from ROUTER to HOST2, the
+   datagrams sent from HOST1 to HOST2 will not reach it.
+
+   One solution to this problem is for a host to attempt to reach any
+   host locally (using ARP) for which it receives an unreachable ICMP
+   error message (ICMP message codes 0, 1, 6 or 7 [RFC792]).  The host
+   tries all its attached links in a round robin fashion.  This has been
+   implemented successfully for some IPv6 hosts, to circumvent exactly
+   this problem.  In terms of this example, HOST1 upon failing to reach
+   HOST2 via the ROUTER, will attempt to forward to HOST2 via i2 and
+   succeed.
+
+   It may also be possible to overcome this problem using techniques
+   described in section 3.2, or other means not discussed here.  This
+   specification does not provide a standard solution, nor does it
+   preclude implementers from supporting multi-homed configurations,
+   provided that they address the concerns in this section for the
+   applications which will be supported on the host.
+
+3.4.  Unintentional Autoimmune Response
+
+   Care must be taken if a multi-homed host can support more than one
+   interface on the same link, all of which support IPv4 Link-Local
+   autoconfiguration.  If these interfaces attempt to allocate the same
+   address, they will defend the host against itself -- causing the
+   claiming algorithm to fail.  The simplest solution to this problem is
+   to run the algorithm independently on each interface configured with
+   IPv4 Link-Local addresses.
+
+   In particular, ARP packets which appear to claim an address which is
+   assigned to a specific interface, indicate conflict only if they are
+   received on that interface and their hardware address is of some
+   other interface.
+
+   If a host has two interfaces on the same link, then claiming and
+   defending on those interfaces must ensure that they end up with
+   different addresses just as if they were on different hosts.  Note
+   that some of the ways a host may find itself with two interfaces on
+   the same link may be unexpected and non-obvious, such as when a host
+   has Ethernet and 802.11 wireless, but those two links are (possibly
+   even without the knowledge of the host's user) bridged together.
+
+
+
+
+
+
+
+Cheshire, et al.            Standards Track                    [Page 21]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+4.  Healing of Network Partitions
+
+   Hosts on disjoint network links may configure the same IPv4 Link-
+   Local address.  If these separate network links are later joined or
+   bridged together, then there may be two hosts which are now on the
+   same link, trying to use the same address.  When either host attempts
+   to communicate with any other host on the network, it will at some
+   point broadcast an ARP packet which will enable the hosts in question
+   to detect that there is an address conflict.
+
+   When these address conflicts are detected, the subsequent forced
+   reconfiguration may be disruptive, causing TCP connections to be
+   broken.  However, it is expected that such disruptions will be rare.
+   It should be relatively uncommon for networks to be joined while
+   hosts on those networks are active.  Also, 65024 addresses are
+   available for IPv4 Link-Local use, so even when two small networks
+   are joined, the chance of conflict for any given host is fairly
+   small.
+
+   When joining two large networks (defined as networks with a
+   substantial number of hosts per segment) there is a greater chance of
+   conflict.  In such networks, it is likely that the joining of
+   previously separated segments will result in one or more hosts
+   needing to change their IPv4 Link-Local address, with subsequent loss
+   of TCP connections.  In cases where separation and re-joining is
+   frequent, as in remotely bridged networks, this could prove
+   disruptive.  However, unless the number of hosts on the joined
+   segments is very large, the traffic resulting from the join and
+   subsequent address conflict resolution will be small.
+
+   Sending ARP replies that have IPv4 Link-Local sender addresses via
+   broadcast instead of unicast ensures that these conflicts can be
+   detected as soon as they become potential problems, but no sooner.
+   For example, if two disjoint network links are joined, where hosts A
+   and B have both configured the same Link-Local address, X, they can
+   remain in this state until A, B or some other host attempts to
+   initiate communication.  If some other host C now sends an ARP
+   request for address X, and hosts A and B were to both reply with
+   conventional unicast ARP replies, then host C might be confused, but
+   A and B still wouldn't know there is a problem because neither would
+   have seen the other's packet.  Sending these replies via broadcast
+   allows A and B to see each other's conflicting ARP packets and
+   respond accordingly.
+
+   Note that sending periodic gratuitous ARPs in an attempt to detect
+   these conflicts sooner is not necessary, wastes network bandwidth,
+   and may actually be detrimental.  For example, if the network links
+   were joined only briefly, and were separated again before any new
+
+
+
+Cheshire, et al.            Standards Track                    [Page 22]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+   communication involving A or B were initiated, then the temporary
+   conflict would have been benign and no forced reconfiguration would
+   have been required.  Triggering an unnecessary forced reconfiguration
+   in this case would not serve any useful purpose.  Hosts SHOULD NOT
+   send periodic gratuitous ARPs.
+
+5.  Security Considerations
+
+   The use of IPv4 Link-Local Addresses may open a network host to new
+   attacks.  In particular, a host that previously did not have an IP
+   address, and no IP stack running, was not susceptible to IP-based
+   attacks.  By configuring a working address, the host may now be
+   vulnerable to IP-based attacks.
+
+   The ARP protocol [RFC826] is insecure.  A malicious host may send
+   fraudulent ARP packets on the network, interfering with the correct
+   operation of other hosts.  For example, it is easy for a host to
+   answer all ARP requests with replies giving its own hardware address,
+   thereby claiming ownership of every address on the network.
+
+   NOTE: There are certain kinds of local links, such as wireless LANs,
+   that provide no physical security.  Because of the existence of these
+   links it would be very unwise for an implementer to assume that when
+   a device is communicating only on the local link it can dispense with
+   normal security precautions.  Failure to implement appropriate
+   security measures could expose users to considerable risks.
+
+   A host implementing IPv4 Link-Local configuration has an additional
+   vulnerability to selective reconfiguration and disruption.  It is
+   possible for an on-link attacker to issue ARP packets which would
+   cause a host to break all its connections by switching to a new
+   address.  The attacker could force the host implementing IPv4 Link-
+   Local configuration to select certain addresses, or prevent it from
+   ever completing address selection.  This is a distinct threat from
+   that posed by spoofed ARPs, described in the preceding paragraph.
+
+   Implementations and users should also note that a node that gives up
+   an address and reconfigures, as required by section 2.5, allows the
+   possibility that another node can easily and successfully hijack
+   existing TCP connections.
+
+   Implementers are advised that the Internet Protocol architecture
+   expects every networked device or host must implement security which
+   is adequate to protect the resources to which the device or host has
+   access, including the network itself, against known or credible
+   threats.  Even though use of IPv4 Link-Local addresses may reduce the
+
+
+
+
+
+Cheshire, et al.            Standards Track                    [Page 23]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+   number of threats to which a device is exposed, implementers of
+   devices supporting the Internet Protocol must not assume that a
+   customer's local network is free from security risks.
+
+   While there may be particular kinds of devices, or particular
+   environments, for which the security provided by the network is
+   adequate to protect the resources that are accessible by the device,
+   it would be misleading to make a general statement to the effect that
+   the requirement to provide security is reduced for devices using IPv4
+   Link-Local addresses as a sole means of access.
+
+   In all cases, whether or not IPv4 Link-Local addresses are used, it
+   is necessary for implementers of devices supporting the Internet
+   Protocol to analyze the known and credible threats to which a
+   specific host or device might be subjected, and to the extent that it
+   is feasible, to provide security mechanisms which ameliorate or
+   reduce the risks associated with such threats.
+
+6.  Application Programming Considerations
+
+   Use of IPv4 Link-Local autoconfigured addresses presents additional
+   challenges to writers of applications and may result in existing
+   application software failing.
+
+6.1.  Address Changes, Failure and Recovery
+
+   IPv4 Link-Local addresses used by an application may change over
+   time.  Some application software encountering an address change will
+   fail.  For example, existing client TCP connections will be aborted,
+   servers whose addresses change will have to be rediscovered, blocked
+   reads and writes will exit with an error condition, and so on.
+
+   Vendors producing application software which will be used on IP
+   implementations supporting IPv4 Link-Local address configuration
+   SHOULD detect and cope with address change events.  Vendors producing
+   IPv4 implementations supporting IPv4 Link-Local address configuration
+   SHOULD expose address change events to applications.
+
+6.2.  Limited Forwarding of Locators
+
+   IPv4 Link-Local addresses MUST NOT be forwarded via an application
+   protocol (for example in a URL), to a destination that is not on the
+   same link.  This is discussed further in Sections 2.9 and 3.
+
+   Existing distributed application software that forwards address
+   information may fail.  For example, FTP [RFC959] (when not using
+   passive mode) transmits the IP address of the client.  Suppose a
+   client starts up and obtains its IPv4 configuration at a time when it
+
+
+
+Cheshire, et al.            Standards Track                    [Page 24]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+   has only a Link-Local address.  Later, the host gets a global IP
+   address, and the client contacts an FTP server outside the local
+   link.  If the FTP client transmits its old Link-Local address instead
+   of its new global IP address in the FTP "port" command, then the FTP
+   server will be unable to open a data connection back to the client,
+   and the FTP operation will fail.
+
+6.3.  Address Ambiguity
+
+   Application software run on a multi-homed host that supports IPv4
+   Link-Local address configuration on more than one interface may fail.
+
+   This is because application software assumes that an IPv4 address is
+   unambiguous, that it can refer to only one host.  IPv4 Link-Local
+   addresses are unique only on a single link.  A host attached to
+   multiple links can easily encounter a situation where the same
+   address is present on more than one interface, or first on one
+   interface, later on another; in any case associated with more than
+   one host.  Most existing software is not prepared for this ambiguity.
+   In the future, application programming interfaces could be developed
+   to prevent this problem.  This issue is discussed in Section 3.
+
+7.  Router Considerations
+
+   A router MUST NOT forward a packet with an IPv4 Link-Local source or
+   destination address, irrespective of the router's default route
+   configuration or routes obtained from dynamic routing protocols.
+
+   A router which receives a packet with an IPv4 Link-Local source or
+   destination address MUST NOT forward the packet.  This prevents
+   forwarding of packets back onto the network segment from which they
+   originated, or to any other segment.
+
+8.  IANA Considerations
+
+   The IANA has allocated the prefix 169.254/16 for the use described in
+   this document.  The first and last 256 addresses in this range
+   (169.254.0.x and 169.254.255.x) are allocated by Standards Action, as
+   defined in "Guidelines for Writing an IANA" (BCP 26) [RFC2434].  No
+   other IANA services are required by this document.
+
+
+
+
+
+
+
+
+
+
+
+Cheshire, et al.            Standards Track                    [Page 25]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+9.  Constants
+
+   The following timing constants are used in this protocol; they are
+   not intended to be user configurable.
+
+   PROBE_WAIT           1 second   (initial random delay)
+   PROBE_NUM            3          (number of probe packets)
+   PROBE_MIN            1 second   (minimum delay till repeated probe)
+   PROBE_MAX            2 seconds  (maximum delay till repeated probe)
+   ANNOUNCE_WAIT        2 seconds  (delay before announcing)
+   ANNOUNCE_NUM         2          (number of announcement packets)
+   ANNOUNCE_INTERVAL    2 seconds  (time between announcement packets)
+   MAX_CONFLICTS       10          (max conflicts before rate limiting)
+   RATE_LIMIT_INTERVAL 60 seconds  (delay between successive attempts)
+   DEFEND_INTERVAL     10 seconds  (minimum interval between defensive
+                                    ARPs).
+
+10.  References
+
+10.1.  Normative References
+
+   [RFC792]  Postel, J., "Internet Control Message Protocol", STD 5, RFC
+             792, September 1981.
+
+   [RFC826]  Plummer, D., "Ethernet Address Resolution Protocol: Or
+             converting network protocol addresses to 48.bit Ethernet
+             address for transmission on Ethernet hardware", STD 37, RFC
+             826, November 1982.
+
+   [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate
+             Requirement Levels", BCP 14, RFC 2119, March 1997.
+
+   [RFC2434] Narten, T. and H. Alvestrand, "Guidelines for Writing an
+             IANA Considerations Section in RFCs", BCP 26, RFC 2434,
+             October 1998.
+
+10.2.  Informative References
+
+   [802]     IEEE Standards for Local and Metropolitan Area Networks:
+             Overview and Architecture, ANSI/IEEE Std 802, 1990.
+
+   [802.3]   ISO/IEC 8802-3 Information technology - Telecommunications
+             and information exchange between systems - Local and
+             metropolitan area networks - Common specifications - Part
+             3:  Carrier Sense Multiple Access with Collision Detection
+             (CSMA/CD) Access Method and Physical Layer Specifications,
+             (also ANSI/IEEE Std 802.3- 1996), 1996.
+
+
+
+
+Cheshire, et al.            Standards Track                    [Page 26]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+   [802.5]   ISO/IEC 8802-5 Information technology - Telecommunications
+             and information exchange between systems - Local and
+             metropolitan area networks - Common specifications - Part
+             5: Token ring access method and physical layer
+             specifications, (also ANSI/IEEE Std 802.5-1998), 1998.
+
+   [802.11]  Information technology - Telecommunications and information
+             exchange between systems - Local and metropolitan area
+             networks - Specific Requirements Part 11:  Wireless LAN
+             Medium Access Control (MAC) and Physical Layer (PHY)
+             Specifications, IEEE Std. 802.11-1999, 1999.
+
+   [RFC959]  Postel, J. and J. Reynolds, "File Transfer Protocol", STD
+             9, RFC 959, October 1985.
+
+   [RFC1918] Rekhter, Y., Moskowitz, B., Karrenberg, D., de Groot, G.,
+             and E. Lear, "Address Allocation for Private Internets",
+             BCP 5, RFC 1918, February 1996.
+
+   [RFC2131] Droms, R., "Dynamic Host Configuration Protocol", RFC 2131,
+             March 1997.
+
+   [RFC2462] Thomson, S. and T. Narten, "IPv6 Stateless Address
+             Autoconfiguration", RFC 2462, December 1998.
+
+   [RFC3027] Holdrege, M. and P. Srisuresh, "Protocol Complications with
+             the IP Network Address Translator", RFC 3027, January 2001.
+
+   [DNAv4]   Aboba, B., "Detection of Network Attachment (DNA) in IPv4",
+             Work in Progress, July 2004.
+
+   [LLMNR]   Esibov, L., Aboba, B. and D. Thaler, "Linklocal Multicast
+             Name Resolution (LLMNR)", Work in Progress, June 2004.
+
+Acknowledgments
+
+   We would like to thank (in alphabetical order) Jim Busse, Pavani
+   Diwanji, Donald Eastlake 3rd, Robert Elz, Peter Ford, Spencer
+   Giacalone, Josh Graessley, Brad Hards, Myron Hattig, Hugh Holbrook,
+   Christian Huitema, Richard Johnson, Kim Yong-Woon, Mika Liljeberg,
+   Rod Lopez, Keith Moore, Satish Mundra, Thomas Narten, Erik Nordmark,
+   Philip Nye, Howard Ridenour, Daniel Senie, Dieter Siegmund, Valery
+   Smyslov, and Ryan Troll for their contributions.
+
+
+
+
+
+
+
+
+Cheshire, et al.            Standards Track                    [Page 27]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+Appendix A - Prior Implementations
+
+A.1.  Apple Mac OS 8.x and 9.x.
+
+   Mac OS chooses the IP address on a pseudo-random basis.  The selected
+   address is saved in persistent storage for continued use after
+   reboot, when possible.
+
+   Mac OS sends nine DHCPDISCOVER packets, with an interval of two
+   seconds between packets.  If no response is received from any of
+   these requests (18 seconds), it will autoconfigure.
+
+   Upon finding that a selected address is in use, Mac OS will select a
+   new random address and try again, at a rate limited to no more than
+   one attempt every two seconds.
+
+   Autoconfigured Mac OS systems check for the presence of a DHCP server
+   every five minutes.  If a DHCP server is found but Mac OS is not
+   successful in obtaining a new lease, it keeps the existing
+   autoconfigured IP address.  If Mac OS is successful at obtaining a
+   new lease, it drops all existing connections without warning.  This
+   may cause users to lose sessions in progress.  Once a new lease is
+   obtained, Mac OS will not allocate further connections using the
+   autoconfigured IP address.
+
+   Mac OS systems do not send packets addressed to a Link-Local address
+   to the default gateway if one is present; these addresses are always
+   resolved on the local segment.
+
+   Mac OS systems by default send all outgoing unicast packets with a
+   TTL of 255.  All multicast and broadcast packets are also sent with a
+   TTL of 255 if they have a source address in the 169.254/16 prefix.
+
+   Mac OS implements media sense where the hardware (and driver
+   software) supports this.  As soon as network connectivity is
+   detected, a DHCPDISCOVER will be sent on the interface.  This means
+   that systems will immediately transition out of autoconfigured mode
+   as soon as connectivity is restored.
+
+A.2.  Apple Mac OS X Version 10.2
+
+   Mac OS X chooses the IP address on a pseudo-random basis.  The
+   selected address is saved in memory so that it can be re-used during
+   subsequent autoconfiguration attempts during a single boot of the
+   system.
+
+
+
+
+
+
+Cheshire, et al.            Standards Track                    [Page 28]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+   Autoconfiguration of a Link-Local address depends on the results of
+   the DHCP process.  DHCP sends two packets, with timeouts of one and
+   two seconds.  If no response is received (three seconds), it begins
+   autoconfiguration.  DHCP continues sending packets in parallel for a
+   total time of 60 seconds.
+
+   At the start of autoconfiguration, it generates 10 unique random IP
+   addresses, and probes each one in turn for 2 seconds.  It stops
+   probing after finding an address that is not in use, or the list of
+   addresses is exhausted.
+
+   If DHCP is not successful, it waits five minutes before starting over
+   again.  Once DHCP is successful, the autoconfigured Link-Local
+   address is given up.  The Link-Local subnet, however, remains
+   configured.
+
+   Autoconfiguration is only attempted on a single interface at any
+   given moment in time.
+
+   Mac OS X ensures that the connected interface with the highest
+   priority is associated with the Link-Local subnet.  Packets addressed
+   to a Link-Local address are never sent to the default gateway, if one
+   is present.  Link-local addresses are always resolved on the local
+   segment.
+
+   Mac OS X implements media sense where the hardware and driver support
+   it.  When the network media indicates that it has been connected, the
+   autoconfiguration process begins again, and attempts to re-use the
+   previously assigned Link-Local address.  When the network media
+   indicates that it has been disconnected, the system waits four
+   seconds before de-configuring the Link-Local address and subnet.  If
+   the connection is restored before that time, the autoconfiguration
+   process begins again.  If the connection is not restored before that
+   time, the system chooses another interface to autoconfigure.
+
+   Mac OS X by default sends all outgoing unicast packets with a TTL of
+   255.  All multicast and broadcast packets are also sent with a TTL of
+   255 if they have a source address in the 169.254/16 prefix.
+
+A.3.  Microsoft Windows 98/98SE
+
+   Windows 98/98SE systems choose their IPv4 Link-Local address on a
+   pseudo-random basis.  The address selection algorithm is based on
+   computing a hash on the interface's MAC address, so that a large
+   collection of hosts should obey the uniform probability distribution
+   in choosing addresses within the 169.254/16 address space.  Deriving
+
+
+
+
+
+Cheshire, et al.            Standards Track                    [Page 29]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+   the initial IPv4 Link-Local address from the interface's MAC address
+   also ensures that systems rebooting will obtain the same
+   autoconfigured address, unless a conflict is detected.
+
+   When in INIT state, the Windows 98/98SE DHCP Client sends out a total
+   of 4 DHCPDISCOVERs, with an inter-packet interval of 6 seconds.  When
+   no response is received after all 4 packets (24 seconds), it will
+   autoconfigure an address.
+
+   The autoconfigure retry count for Windows 98/98SE systems is 10.
+   After trying 10 autoconfigured IPv4 addresses, and finding all are
+   taken, the host will boot without an IPv4 address.
+
+   Autoconfigured Windows 98/98SE systems check for the presence of a
+   DHCP server every five minutes.  If a DHCP server is found but
+   Windows 98 is not successful in obtaining a new lease, it keeps the
+   existing autoconfigured IPv4 Link-Local address.  If Windows 98/98SE
+   is successful at obtaining a new lease, it drops all existing
+   connections without warning.  This may cause users to lose sessions
+   in progress.  Once a new lease is obtained, Windows 98/98SE will not
+   allocate further connections using the autoconfigured IPv4 Link-Local
+   address.
+
+   Windows 98/98SE systems with an IPv4 Link-Local address do not send
+   packets addressed to an IPv4 Link-Local address to the default
+   gateway if one is present; these addresses are always resolved on the
+   local segment.
+
+   Windows 98/98SE systems by default send all outgoing unicast packets
+   with a TTL of 128.  TTL configuration is performed by setting the
+   Windows Registry Key
+   HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services:\Tcpip\
+   Parameters\DefaultTTL of type REG_DWORD to the appropriate value.
+   However, this default TTL will apply to all packets.  While this
+   facility could be used to set the default TTL to 255, it cannot be
+   used to set the default TTL of IPv4 Link-Local packets to one (1),
+   while allowing other packets to be sent with a TTL larger than one.
+
+   Windows 98/98SE systems do not implement media sense.  This means
+   that network connectivity issues (such as a loose cable) may prevent
+   a system from contacting the DHCP server, thereby causing it to
+   auto-configure.  When the connectivity problem is fixed (such as when
+   the cable is re-connected) the situation will not immediately correct
+   itself.  Since the system will not sense the re-connection, it will
+   remain in autoconfigured mode until an attempt is made to reach the
+   DHCP server.
+
+
+
+
+
+Cheshire, et al.            Standards Track                    [Page 30]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+   The DHCP server included with Windows 98SE Internet Connection
+   Sharing (ICS) (a NAT implementation) allocates out of the 192.168/16
+   private address space by default.
+
+   However, it is possible to change the allocation prefix via a
+   registry key, and no checks are made to prevent allocation out of the
+   IPv4 Link-Local prefix.  When configured to do so, Windows 98SE ICS
+   will rewrite packets from the IPv4 Link-Local prefix and forward them
+   beyond the local link.  Windows 98SE ICS does not automatically route
+   for the IPv4 Link-Local prefix, so that hosts obtaining addresses via
+   DHCP cannot communicate with autoconfigured-only devices.
+
+   Other home gateways exist that allocate addresses out of the IPv4
+   Link-Local prefix by default.  Windows 98/98SE systems can use a
+   169.254/16 IPv4 Link-Local address as the source address when
+   communicating with non-Link-Local hosts.  Windows 98/98SE does not
+   support router solicitation/advertisement.  Windows 98/98SE systems
+   will not automatically discover a default gateway when in
+   autoconfigured mode.
+
+A.4.  Windows XP, 2000, and ME
+
+   The autoconfiguration behavior of Windows XP, Windows 2000, and
+   Windows ME systems is identical to Windows 98/98SE except in the
+   following respects:
+
+   Media Sense
+   Router Discovery
+   Silent RIP
+
+   Windows XP, 2000, and ME implement media sense.  As soon as network
+   connectivity is detected, a DHCPREQUEST or DHCPDISCOVER will be sent
+   on the interface.  This means that systems will immediately
+   transition out of autoconfigured mode as soon as connectivity is
+   restored.
+
+   Windows XP, 2000, and ME also support router discovery, although it
+   is turned off by default.  Windows XP and 2000 also support a RIP
+   listener.  This means that they may inadvertently discover a default
+   gateway while in autoconfigured mode.
+
+   ICS on Windows XP/2000/ME behaves identically to Windows 98SE with
+   respect to address allocation and NATing of Link-Local prefixes.
+
+
+
+
+
+
+
+
+Cheshire, et al.            Standards Track                    [Page 31]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+Authors' Addresses
+
+   Stuart Cheshire
+   Apple Computer, Inc.
+   1 Infinite Loop
+   Cupertino
+   California 95014, USA
+
+   Phone: +1 408 974 3207
+   EMail: rfc@stuartcheshire.org
+
+
+   Bernard Aboba
+   Microsoft Corporation
+   One Microsoft Way
+   Redmond, WA 98052
+
+   Phone: +1 425 818 4011
+   EMail: bernarda@microsoft.com
+
+
+   Erik Guttman
+   Sun Microsystems
+   Eichhoelzelstr. 7
+   74915 Waibstadt Germany
+
+   Phone: +49 7263 911 701
+   EMail: erik@spybeam.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Cheshire, et al.            Standards Track                    [Page 32]
+
+RFC 3927                    IPv4 Link-Local                     May 2005
+
+
+Full Copyright Statement
+
+   Copyright (C) The Internet Society (2005).
+
+   This document is subject to the rights, licenses and restrictions
+   contained in BCP 78, and except as set forth therein, the authors
+   retain all their rights.
+
+   This document and the information contained herein are provided on an
+   "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS
+   OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET
+   ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED,
+   INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE
+   INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED
+   WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+Intellectual Property
+
+   The IETF takes no position regarding the validity or scope of any
+   Intellectual Property Rights or other rights that might be claimed to
+   pertain to the implementation or use of the technology described in
+   this document or the extent to which any license under such rights
+   might or might not be available; nor does it represent that it has
+   made any independent effort to identify any such rights.  Information
+   on the procedures with respect to rights in RFC documents can be
+   found in BCP 78 and BCP 79.
+
+   Copies of IPR disclosures made to the IETF Secretariat and any
+   assurances of licenses to be made available, or the result of an
+   attempt made to obtain a general license or permission for the use of
+   such proprietary rights by implementers or users of this
+   specification can be obtained from the IETF on-line IPR repository at
+   http://www.ietf.org/ipr.
+
+   The IETF invites any interested party to bring to its attention any
+   copyrights, patents or patent applications, or other proprietary
+   rights that may cover technology that may be required to implement
+   this standard.  Please address the information to the IETF at ietf-
+   ipr@ietf.org.
+
+Acknowledgement
+
+   Funding for the RFC Editor function is currently provided by the
+   Internet Society.
+
+
+
+
+
+
+
+Cheshire, et al.            Standards Track                    [Page 33]
+
diff --git a/ext/picotcp/RFC/rfc4614.txt b/ext/picotcp/RFC/rfc4614.txt
new file mode 100644
index 0000000..80d08af
--- /dev/null
+++ b/ext/picotcp/RFC/rfc4614.txt
@@ -0,0 +1,1851 @@
+
+
+
+
+
+
+Network Working Group                                            M. Duke
+Request for Comments: 4614                          Boeing Phantom Works
+Category: Informational                                        R. Braden
+                                      USC Information Sciences Institute
+                                                                 W. Eddy
+                                         Verizon Federal Network Systems
+                                                              E. Blanton
+                                      Purdue University Computer Science
+                                                          September 2006
+
+
+           A Roadmap for Transmission Control Protocol (TCP)
+                        Specification Documents
+
+Status of This Memo
+
+   This memo provides information for the Internet community.  It does
+   not specify an Internet standard of any kind.  Distribution of this
+   memo is unlimited.
+
+Copyright Notice
+
+   Copyright (C) The Internet Society (2006).
+
+Abstract
+
+   This document contains a "roadmap" to the Requests for Comments (RFC)
+   documents relating to the Internet's Transmission Control Protocol
+   (TCP).  This roadmap provides a brief summary of the documents
+   defining TCP and various TCP extensions that have accumulated in the
+   RFC series.  This serves as a guide and quick reference for both TCP
+   implementers and other parties who desire information contained in
+   the TCP-related RFCs.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Duke, et al.                 Informational                      [Page 1]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+Table of Contents
+
+   1. Introduction ....................................................2
+   2. Basic Functionality .............................................4
+   3. Recommended Enhancements ........................................6
+      3.1. Congestion Control and Loss Recovery Extensions ............7
+      3.2. SACK-Based Loss Recovery and Congestion Control ............8
+      3.3. Dealing with Forged Segments ...............................9
+   4. Experimental Extensions ........................................10
+   5. Historic Extensions ............................................13
+   6. Support Documents ..............................................14
+      6.1. Foundational Works ........................................15
+      6.2. Difficult Network Environments ............................16
+      6.3. Implementation Advice .....................................19
+      6.4. Management Information Bases ..............................20
+      6.5. Tools and Tutorials .......................................22
+      6.6. Case Studies ..............................................22
+   7. Undocumented TCP Features ......................................23
+   8. Security Considerations ........................................24
+   9. Acknowledgments ................................................24
+   10. Informative References ........................................25
+      10.1. Basic Functionality ......................................25
+      10.2. Recommended Enhancements .................................25
+      10.3. Experimental Extensions ..................................26
+      10.4. Historic Extensions ......................................27
+      10.5. Support Documents ........................................28
+      10.6. Informative References Outside the RFC Series ............31
+
+1.  Introduction
+
+   A correct and efficient implementation of the Transmission Control
+   Protocol (TCP) is a critical part of the software of most Internet
+   hosts.  As TCP has evolved over the years, many distinct documents
+   have become part of the accepted standard for TCP.  At the same time,
+   a large number of more experimental modifications to TCP have also
+   been published in the RFC series, along with informational notes,
+   case studies, and other advice.
+
+   As an introduction to newcomers and an attempt to organize the
+   plethora of information for old hands, this document contains a
+   "roadmap" to the TCP-related RFCs.  It provides a brief summary of
+   the RFC documents that define TCP.  This should provide guidance to
+   implementers on the relevance and significance of the standards-track
+   extensions, informational notes, and best current practices that
+   relate to TCP.
+
+
+
+
+
+
+Duke, et al.                 Informational                      [Page 2]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+   This document is not an update of RFC 1122 and is not a rigorous
+   standard for what needs to be implemented in TCP.  This document is
+   merely an informational roadmap that captures, organizes, and
+   summarizes most of the RFC documents that a TCP implementer,
+   experimenter, or student should be aware of.  Particular comments or
+   broad categorizations that this document makes about individual
+   mechanisms and behaviors are not to be taken as definitive, nor
+   should the content of this document alone influence implementation
+   decisions.
+
+   This roadmap includes a brief description of the contents of each
+   TCP-related RFC.  In some cases, we simply supply the abstract or a
+   key summary sentence from the text as a terse description.  In
+   addition, a letter code after an RFC number indicates its category in
+   the RFC series (see BCP 9 [RFC2026] for explanation of these
+   categories):
+
+      S - Standards Track (Proposed Standard, Draft Standard, or
+          Standard)
+
+      E - Experimental
+
+      B - Best Current Practice
+
+      I - Informational
+
+   Note that the category of an RFC does not necessarily reflect its
+   current relevance.  For instance, RFC 2581 is nearly universally
+   deployed although it is only a Proposed Standard.  Similarly, some
+   Informational RFCs contain significant technical proposals for
+   changing TCP.
+
+   This roadmap is divided into four main sections.  Section 2 lists the
+   RFCs that describe absolutely required TCP behaviors for proper
+   functioning and interoperability.  Further RFCs that describe
+   strongly encouraged, but non-essential, behaviors are listed in
+   Section 3.  Experimental extensions that are not yet standard
+   practices, but that potentially could be in the future, are described
+   in Section 4.
+
+   The reader will probably notice that these three sections are broadly
+   equivalent to MUST/SHOULD/MAY specifications (per RFC 2119), and
+   although the authors support this intuition, this document is merely
+   descriptive; it does not represent a binding standards-track
+   position.  Individual implementers still need to examine the
+   standards documents themselves to evaluate specific requirement
+   levels.
+
+
+
+
+Duke, et al.                 Informational                      [Page 3]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+   A small number of older experimental extensions that have not been
+   widely implemented, deployed, and used are noted in Section 5.  Many
+   other supporting documents that are relevant to the development,
+   implementation, and deployment of TCP are described in Section 6.
+   Within each section, RFCs are listed in the chronological order of
+   their publication dates.
+
+   A small number of fairly ubiquitous important implementation
+   practices that are not currently documented in the RFC series are
+   listed in Section 7.
+
+2.  Basic Functionality
+
+   A small number of documents compose the core specification of TCP.
+   These define the required basic functionalities of TCP's header
+   parsing, state machine, congestion control, and retransmission
+   timeout computation.  These base specifications must be correctly
+   followed for interoperability.
+
+   RFC 793 S: "Transmission Control Protocol", STD 7 (September 1981)
+
+      This is the fundamental TCP specification document [RFC0793].
+      Written by Jon Postel as part of the Internet protocol suite's
+      core, it describes the TCP packet format, the TCP state machine
+      and event processing, and TCP's semantics for data transmission,
+      reliability, flow control, multiplexing, and acknowledgment.
+
+      Section 3.6 of RFC 793, describing TCP's handling of the IP
+      precedence and security compartment, is mostly irrelevant today.
+      RFC 2873 changed the IP precedence handling, and the security
+      compartment portion of the API is no longer implemented or used.
+      In addition, RFC 793 did not describe any congestion control
+      mechanism.  Otherwise, however, the majority of this document
+      still accurately describes modern TCPs.  RFC 793 is the last of a
+      series of developmental TCP specifications, starting in the
+      Internet Experimental Notes (IENs) and continuing in the RFC
+      series.
+
+   RFC 1122 S: "Requirements for Internet Hosts - Communication Layers"
+   (October 1989)
+
+      This document [RFC1122] updates and clarifies RFC 793, fixing some
+      specification bugs and oversights.  It also explains some features
+      such as keep-alives and Karn's and Jacobson's RTO estimation
+      algorithms [KP87][Jac88][JK92].  ICMP interactions are mentioned,
+      and some tips are given for efficient implementation.  RFC 1122 is
+      an Applicability Statement, listing the various features that
+      MUST, SHOULD, MAY, SHOULD NOT, and MUST NOT be present in
+
+
+
+Duke, et al.                 Informational                      [Page 4]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+      standards-conforming TCP implementations.  Unlike a purely
+      informational "roadmap", this Applicability Statement is a
+      standards document and gives formal rules for implementation.
+
+   RFC 2460 S: "Internet Protocol, Version 6 (IPv6) Specification
+   (December 1998)
+
+      This document [RFC2460] is of relevance to TCP because it defines
+      how the pseudo-header for TCP's checksum computation is derived
+      when 128-bit IPv6 addresses are used instead of 32-bit IPv4
+      addresses.  Additionally, RFC 2675 describes TCP changes required
+      to support IPv6 jumbograms.
+
+   RFC 2581 S: "TCP Congestion Control" (April 1999)
+
+      Although RFC 793 did not contain any congestion control
+      mechanisms, today congestion control is a required component of
+      TCP implementations.  This document [RFC2581] defines the current
+      versions of Van Jacobson's congestion avoidance and control
+      mechanisms for TCP, based on his 1988 SIGCOMM paper [Jac88].  RFC
+      2001 was a conceptual precursor that was obsoleted by RFC 2581.
+
+      A number of behaviors that together constitute what the community
+      refers to as "Reno TCP" are described in RFC 2581.  The name
+      "Reno" comes from the Net/2 release of the 4.3 BSD operating
+      system.  This is generally regarded as the least common
+      denominator among TCP flavors currently found running on Internet
+      hosts.  Reno TCP includes the congestion control features of slow
+      start, congestion avoidance, fast retransmit, and fast recovery.
+
+      RFC 1122 mandates the implementation of a congestion control
+      mechanism, and RFC 2581 details the currently accepted mechanism.
+      RFC 2581 differs slightly from the other documents listed in this
+      section, as it does not affect the ability of two TCP endpoints to
+      communicate; however, congestion control remains a critical
+      component of any widely deployed TCP implementation and is
+      required for the avoidance of congestion collapse and to ensure
+      fairness among competing flows.
+
+   RFC 2873 S: "TCP Processing of the IPv4 Precedence Field" (June 2000)
+
+      This document [RFC2873] removes from the TCP specification all
+      processing of the precedence bits of the TOS byte of the IP
+      header.  This resolves a conflict over the use of these bits
+      between RFC 793 and Differentiated Services [RFC2474].
+
+
+
+
+
+
+Duke, et al.                 Informational                      [Page 5]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+   RFC 2988 S: "Computing TCP's Retransmission Timer" (November 2000)
+
+      Abstract: "This document defines the standard algorithm that
+      Transmission Control Protocol (TCP) senders are required to use to
+      compute and manage their retransmission timer.  It expands on the
+      discussion in section 4.2.3.1 of RFC 1122 and upgrades the
+      requirement of supporting the algorithm from a SHOULD to a MUST."
+      [RFC2988]
+
+3.  Recommended Enhancements
+
+   This section describes recommended TCP modifications that improve
+   performance and security.  RFCs 1323 and 3168 represent fundamental
+   changes to the protocol.  RFC 1323, based on RFCs 1072 and 1185,
+   allows better utilization of high bandwidth-delay product paths by
+   providing some needed mechanisms for high-rate transfers.  RFC 3168
+   describes a change to the Internet's architecture, whereby routers
+   signal end-hosts of growing congestion levels and can do so before
+   packet losses are forced.  Section 3.1 lists improvements in the
+   congestion control and loss recovery mechanisms specified in RFC
+   2581.  Section 3.2 describes further refinements that make use of
+   selective acknowledgments.  Section 3.3 deals with the problem of
+   preventing forged segments.
+
+   RFC 1323 S:  "TCP Extensions for High Performance" (May 1992)
+
+      This document [RFC1323] defines TCP extensions for window scaling,
+      timestamps, and protection against wrapped sequence numbers, for
+      efficient and safe operation over paths with large bandwidth-delay
+      products.  These extensions are commonly found in currently used
+      systems; however, they may require manual tuning and
+      configuration.  One issue in this specification that is still
+      under discussion concerns a modification to the algorithm for
+      estimating the mean RTT when timestamps are used.
+
+   RFC 2675 S: "IPv6 Jumbograms" (August 1999)
+
+      IPv6 supports longer datagrams than were allowed in IPv4.  These
+      are known as Jumbograms, and use with TCP has necessitated changes
+      to the handling of TCP's MSS and Urgent fields (both 16 bits).
+      This document [RFC2675] explains those changes.  Although it
+      describes changes to basic header semantics, these changes should
+      only affect the use of very large segments, such as IPv6
+      jumbograms, which are currently rarely used in the general
+      Internet.  Supporting the behavior described in this document does
+      not affect interoperability with other TCP implementations when
+      IPv4 or non-jumbogram IPv6 is used.  This document states that
+      jumbograms are to only be used when it can be guaranteed that all
+
+
+
+Duke, et al.                 Informational                      [Page 6]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+      receiving nodes, including each router in the end-to-end path,
+      will support jumbograms.  If even a single node that does not
+      support jumbograms is attached to a local network, then no host on
+      that network may use jumbograms.  This explains why jumbogram use
+      has been rare, and why this document is considered a performance
+      optimization and not part of TCP over IPv6's basic functionality.
+
+   RFC 3168 S: "The Addition of Explicit Congestion Notification (ECN)
+   to IP" (September 2001)
+
+      This document [RFC3168] defines a means for end hosts to detect
+      congestion before congested routers are forced to discard packets.
+      Although congestion notification takes place at the IP level, ECN
+      requires support at the transport level (e.g., in TCP) to echo the
+      bits and adapt the sending rate.  This document updates RFC 793 to
+      define two previously unused flag bits in the TCP header for ECN
+      support.  RFC 3540 provides a supplementary (experimental) means
+      for more secure use of ECN, and RFC 2884 provides some sample
+      results from using ECN.
+
+3.1.  Congestion Control and Loss Recovery Extensions
+
+   Two of the most important aspects of TCP are its congestion control
+   and loss recovery features.  TCP traditionally treats lost packets as
+   indicating congestion-related loss, and cannot distinguish between
+   congestion-related loss and loss due to transmission errors.  Even
+   when ECN is in use, there is a rather intimate coupling between
+   congestion control and loss recovery mechanisms.  There are several
+   extensions to both features, and more often than not, a particular
+   extension applies to both.  In this sub-section, we group
+   enhancements to either congestion control, loss recovery, or both,
+   which can be performed unilaterally; that is, without negotiating
+   support between endpoints.  In the next sub-section, we group the
+   extensions that specify or rely on the SACK option, which must be
+   negotiated bilaterally.  TCP implementations should include the
+   enhancements from both sub-sections so that TCP senders can perform
+   well without regard to the feature sets of other hosts they connect
+   to.  For example, if SACK use is not successfully negotiated, a host
+   should use the NewReno behavior as a fall back.
+
+
+
+
+
+
+
+
+
+
+
+
+Duke, et al.                 Informational                      [Page 7]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+   RFC 3042 S: "Enhancing TCP's Loss Recovery Using Limited Transmit"
+   (January 2001)
+
+      Abstract: "This document proposes Limited Transmit, a new
+      Transmission Control Protocol (TCP) mechanism that can be used to
+      more effectively recover lost segments when a connection's
+      congestion window is small, or when a large number of segments are
+      lost in a single transmission window."  [RFC3042] Tests from 2004
+      showed that Limited Transmit was deployed in roughly one third of
+      the web servers tested [MAF04].
+
+   RFC 3390 S: "Increasing TCP's Initial Window" (October 2002)
+
+      This document [RFC3390] updates RFC 2581 to permit an initial TCP
+      window of three or four segments during the slow-start phase,
+      depending on the segment size.
+
+   RFC 3782 S: "The NewReno Modification to TCP's Fast Recovery
+   Algorithm" (April 2004)
+
+      This document [RFC3782] specifies a modification to the standard
+      Reno fast recovery algorithm, whereby a TCP sender can use partial
+      acknowledgments to make inferences determining the next segment to
+      send in situations where SACK would be helpful but isn't
+      available.  Although it is only a slight modification, the NewReno
+      behavior can make a significant difference in performance when
+      multiple segments are lost from a single window of data.
+
+3.2.  SACK-Based Loss Recovery and Congestion Control
+
+   The base TCP specification in RFC 793 provided only a simple
+   cumulative acknowledgment mechanism.  However, a selective
+   acknowledgment (SACK) mechanism provides performance improvement in
+   the presence of multiple packet losses from the same flight, more
+   than outweighing the modest increase in complexity.  A TCP should be
+   expected to implement SACK; however, SACK is a negotiated option and
+   is only used if support is advertised by both sides of a connection.
+
+   RFC 2018 S: "TCP Selective Acknowledgment Options" (October 1996)
+
+      This document [RFC2018] defines the basic selective acknowledgment
+      (SACK) mechanism for TCP.
+
+   RFC 2883 S: "An Extension to the Selective Acknowledgement (SACK)
+   Option for TCP" (July 2000)
+
+      This document [RFC2883] extends RFC 2018 to cover the case of
+      acknowledging duplicate segments.
+
+
+
+Duke, et al.                 Informational                      [Page 8]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+   RFC 3517 S: "A Conservative Selective Acknowledgment (SACK)-based
+   Loss Recovery Algorithm for TCP" (April 2003)
+
+      This document [RFC3517] describes a relatively sophisticated
+      algorithm that a TCP sender can use for loss recovery when SACK
+      reports more than one segment lost from a single flight of data.
+      Although support for the exchange of SACK information is widely
+      implemented, not all implementations use an algorithm as
+      sophisticated as that described in RFC 3517.
+
+3.3.  Dealing with Forged Segments
+
+   By default, TCP lacks any cryptographic structures to differentiate
+   legitimate segments and those spoofed from malicious hosts.  Spoofing
+   valid segments requires correctly guessing a number of fields.  The
+   documents in this sub-section describe ways to make that guessing
+   harder, or to prevent it from being able to affect a connection
+   negatively.
+
+   The TCPM working group is currently in progress towards fully
+   understanding and defining mechanisms for preventing spoofing attacks
+   (including both spoofed TCP segments and ICMP datagrams).  Some of
+   the solutions being considered rely on TCP modifications, whereas
+   others rely on security at lower layers (like IPsec) for protection.
+
+   RFC 1948 I: "Defending Against Sequence Number Attacks" (May 1996)
+
+      This document [RFC1948] describes the TCP vulnerability that
+      allows an attacker to send forged TCP packets, by guessing the
+      initial sequence number in the three-way handshake.  Simple
+      defenses against exploitation are then described.  Some variation
+      is implemented in most currently used operating systems.
+
+   RFC 2385 S: "Protection of BGP Sessions via the TCP MD5 Signature
+   Option" (August 1998)
+
+      From document: "This document describes current existing practice
+      for securing BGP against certain simple attacks.  It is understood
+      to have security weaknesses against concerted attacks.
+
+      This memo describes a TCP extension to enhance security for BGP.
+      It defines a new TCP option for carrying an MD5 digest in a TCP
+      segment.  This digest acts like a signature for that segment,
+      incorporating information known only to the connection end points.
+      Since BGP uses TCP as its transport, using this option in the way
+      described in this paper significantly reduces the danger from
+      certain security attacks on BGP."  [RFC2385]
+
+
+
+
+Duke, et al.                 Informational                      [Page 9]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+      TCP MD5 options are currently only used in very limited contexts,
+      primarily for defending BGP exchanges between routers.  Some
+      deployment notes for those using TCP MD5 are found in the later
+      RFC 3562, "Key Management Considerations for the TCP MD5 Signature
+      Option" [RFC3562].  RFC 4278 deprecates the use of TCP MD5 outside
+      BGP [RFC4278].
+
+4.  Experimental Extensions
+
+   The RFCs in this section are still experimental, but they may become
+   proposed standards in the future.  At least part of the reason that
+   they are still experimental is to gain more wide-scale experience
+   with them before a standards track decision is made.  By their
+   publication as experimental RFCs, it is hoped that the community of
+   TCP researchers will analyze and test the contents of these RFCs.
+   Although experimentation is encouraged, there is not yet formal
+   consensus that these are fully logical and safe behaviors.  Wide-
+   scale deployment of implementations that use these features should be
+   well thought-out in terms of consequences.
+
+   RFC 2140 I: "TCP Control Block Interdependence" (April 1997)
+
+      This document [RFC2140] suggests how TCP connections between the
+      same endpoints might share information, such as their congestion
+      control state.  To some degree, this is done in practice by a few
+      operating systems; for example, Linux currently has a destination
+      cache.  Although this RFC is technically informational, the
+      concepts it describes are in experimental use, so we include it in
+      this section.
+
+      A related proposal, the Congestion Manager, is specified in RFC
+      3124 [RFC3124].  The idea behind the Congestion Manager, moving
+      congestion control outside of individual TCP connections,
+      represents a modification to the core of TCP, which supports
+      sharing information among TCP connections as well.  Although a
+      Proposed Standard, some pieces of the Congestion Manager support
+      architecture have not been specified yet, and it has not achieved
+      use or implementation beyond experimental stacks, so it is not
+      listed among the standard TCP enhancements in this roadmap.
+
+   RFC 2861 E: "TCP Congestion Window Validation" (June 2000)
+
+      This document [RFC2861] suggests reducing the congestion window
+      over time when no packets are flowing.  This behavior is more
+      aggressive than that specified in RFC 2581, which says that a TCP
+      sender SHOULD set its congestion window to the initial window
+      after an idle period of an RTO or greater.
+
+
+
+
+Duke, et al.                 Informational                     [Page 10]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+   RFC 3465 E: "TCP Congestion Control with Appropriate Byte Counting
+   (ABC)" (February 2003)
+
+      This document [RFC3465] suggests that congestion control use the
+      number of bytes acknowledged instead of the number of
+      acknowledgments received.  This has been implemented in Linux.
+      The ABC mechanism behaves differently from the standard method
+      when there is not a one-to-one relationship between data segments
+      and acknowledgments.  ABC still operates within the accepted
+      guidelines, but is more robust to delayed ACKs and ACK-division
+      [SCWA99][RFC3449].
+
+   RFC 3522 E: "The Eifel Detection Algorithm for TCP" (April 2003)
+
+      The Eifel detection algorithm [RFC3522] allows a TCP sender to
+      detect a posteriori whether it has entered loss recovery
+      unnecessarily.
+
+   RFC 3540 E: "Robust Explicit Congestion Notification (ECN) signaling
+   with Nonces" (June 2003)
+
+      This document [RFC3540] suggests a modified ECN to address
+      security concerns and updates RFC 3168.
+
+   RFC 3649 E: "HighSpeed TCP for Large Congestion Windows" (December
+   2003)
+
+      This document [RFC3649] suggests a modification to TCP's steady-
+      state behavior to use very large windows efficiently.
+
+   RFC 3708 E: "Using TCP Duplicate Selective Acknowledgement (DSACKs)
+   and Stream Control Transmission Protocol (SCTP) Duplicate
+   Transmission Sequence Numbers (TSNs) to Detect Spurious
+   Retransmissions" (February 2004)
+
+      Abstract: "TCP and Stream Control Transmission Protocol (SCTP)
+      provide notification of duplicate segment receipt through
+      Duplicate Selective Acknowledgement (DSACKs) and Duplicate
+      Transmission Sequence Number (TSN) notification, respectively.
+      This document presents conservative methods of using this
+      information to identify unnecessary retransmissions for various
+      applications."  [RFC3708]
+
+
+
+
+
+
+
+
+
+Duke, et al.                 Informational                     [Page 11]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+   RFC 3742 E: "Limited Slow-Start for TCP with Large Congestion
+   Windows" (March 2004)
+
+      This document [RFC3742] describes a more conservative slow-start
+      behavior to prevent massive packet losses when a connection uses a
+      very large window.
+
+   RFC 4015 S: "The Eifel Response Algorithm for TCP" (February 2005)
+
+      This document [RFC4015] describes the response portion of the
+      Eifel algorithm, which can be used in conjunction with one of
+      several methods of detecting when loss recovery has been
+      spuriously entered, such as the Eifel detection algorithm in RFC
+      3522, the algorithm in RFC 3708, or F-RTO in RFC 4138.
+
+      Abstract: "Based on an appropriate detection algorithm, the Eifel
+      response algorithm provides a way for a TCP sender to respond to a
+      detected spurious timeout.  It adapts the retransmission timer to
+      avoid further spurious timeouts, and can avoid - depending on the
+      detection algorithm - the often unnecessary go-back-N retransmits
+      that would otherwise be sent.  In addition, the Eifel response
+      algorithm restores the congestion control state in such a way that
+      packet bursts are avoided."
+
+      RFC 4015 is itself a Proposed Standard.  The consensus of the TCPM
+      working group was to place it in this section of the roadmap
+      document due to three factors.
+
+      1.  RFC 4015 operates on the output of a detection algorithm, for
+          which there is currently no available mechanism on the
+          standards track.
+
+      2.  The working group was not aware of any wide deployment and use
+          of RFC 4015.
+
+      3.  The consensus of the working group, after a discussion of the
+          known Intellectual Property Rights claims on the techniques
+          described in RFC 4015, identified this section of the roadmap
+          as an appropriate location.
+
+   RFC 4138 E: "Forward RTO-Recovery (F-RTO): An Algorithm for Detecting
+   Spurious Retransmission Timeouts with TCP and the Stream Control
+   Transmission Protocol" (August 2005)
+
+      The F-RTO detection algorithm [RFC4138] provides another option
+      for inferring spurious retransmission timeouts.  Unlike some
+      similar detection methods, F-RTO does not rely on the use of any
+      TCP options.
+
+
+
+Duke, et al.                 Informational                     [Page 12]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+5.  Historic Extensions
+
+   The RFCs listed here define extensions that have thus far failed to
+   arouse substantial interest from implementers, or that were found to
+   be defective for general use.
+
+   RFC 1106 "TCP Big Window and NAK Options" (June 1989): found
+   defective
+
+      This RFC [RFC1106] defined an alternative to the Window Scale
+      option for using large windows and described the "negative
+      acknowledgement" or NAK option.  There is a comparison of NAK and
+      SACK methods, and early discussion of TCP over satellite issues.
+      RFC 1110 explains some problems with the approaches described in
+      RFC 1106.  The options described in this document have not been
+      adopted by the larger community, although NAKs are used in the
+      SCPS-TP adaptation of TCP for satellite and spacecraft use,
+      developed by the Consultative Committee for Space Data Systems
+      (CCSDS).
+
+   RFC 1110 "A Problem with the TCP Big Window Option" (August 1989):
+   deprecates RFC 1106
+
+      Abstract: "The TCP Big Window option discussed in RFC 1106 will
+      not work properly in an Internet environment which has both a high
+      bandwidth * delay product and the possibility of disordering and
+      duplicating packets.  In such networks, the window size must not
+      be increased without a similar increase in the sequence number
+      space.  Therefore, a different approach to big windows should be
+      taken in the Internet."  [RFC1110]
+
+   RFC 1146 E "TCP Alternate Checksum Options" (March 1990): lack of
+   interest
+
+      This document [RFC1146] defined more robust TCP checksums than the
+      16-bit ones-complement in use today.  A typographical error in RFC
+      1145 is fixed in RFC 1146; otherwise, the documents are the same.
+
+   RFC 1263 "TCP Extensions Considered Harmful" (October 1991) - lack of
+   interest
+
+      This document [RFC1263] argues against "backwards compatible" TCP
+      extensions.  Specifically mentioned are several TCP enhancements
+      that have been successful, including timestamps, window scaling,
+      PAWS, and SACK.  RFC 1263 presents an alternative approach called
+      "protocol evolution", whereby several evolutionary versions of TCP
+      would exist on hosts.  These distinct TCP versions would represent
+      upgrades to each other and could be header-incompatible.
+
+
+
+Duke, et al.                 Informational                     [Page 13]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+      Interoperability would be provided by having a virtualization
+      layer select the right TCP version for a particular connection.
+      This idea did not catch on with the community, although the type
+      of extensions RFC 1263 specifically targeted as harmful did become
+      popular.
+
+   RFC 1379 I "Extending TCP for Transactions -- Concepts" (November
+   1992): found defective
+
+      See RFC 1644.
+
+   RFC 1644 E "T/TCP -- TCP Extensions for Transactions Functional
+   Specification" (July 1994): found defective
+
+      The inventors of TCP believed that cached connection state could
+      have been used to eliminate TCP's 3-way handshake, to support
+      two-packet request/response exchanges.  RFCs 1379 [RFC1379] and
+      1644 [RFC1644] show that this is far from simple.  Furthermore,
+      T/TCP floundered on the ease of denial-of-service attacks that can
+      result.  One idea pioneered by T/TCP lives on in RFC 2140, in the
+      sharing of state across connections.
+
+   RFC 1693 E "An Extension to TCP: Partial Order Service" (November
+   1994): lack of interest
+
+      This document [RFC1693] defines a TCP extension for applications
+      that do not care about the order in which application-layer
+      objects are received.  Examples are multimedia and database
+      applications.  In practice, these applications either accept the
+      possible performance loss because of TCP's strict ordering or use
+      more specialized transport protocols.
+
+6.  Support Documents
+
+   This section contains several classes of documents that do not
+   necessarily define current protocol behaviors, but that are
+   nevertheless of interest to TCP implementers.  Section 6.1 describes
+   several foundational RFCs that give modern readers a better
+   understanding of the principles underlying TCP's behaviors and
+   development over the years.  The documents listed in Section 6.2
+   provide advice on using TCP in various types of network situations
+   that pose challenges above those of typical wired links.  Some
+   implementation notes can be found in Section 6.3.  The TCP Management
+   Information Bases are described in Section 6.4.  RFCs that describe
+   tools for testing and debugging TCP implementations or that contain
+   high-level tutorials on the protocol are listed Section 6.5, and
+   Section 6.6 lists a number of case studies that have explored TCP
+   performance.
+
+
+
+Duke, et al.                 Informational                     [Page 14]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+6.1.  Foundational Works
+
+   The documents listed in this section contain information that is
+   largely duplicated by the standards documents previously discussed.
+   However, some of them contain a greater depth of problem statement
+   explanation or other context.  Particularly, RFCs 813 - 817 (known as
+   the "Dave Clark Five") describe some early problems and solutions
+   (RFC 815 only describes the reassembly of IP fragments and is not
+   included in this TCP roadmap).
+
+   RFC 813: "Window and Acknowledgement Strategy in TCP" (July 1982)
+
+      This document [RFC0813] contains an early discussion of Silly
+      Window Syndrome and its avoidance and motivates and describes the
+      use of delayed acknowledgments.
+
+   RFC 814: "Name, Addresses, Ports, and Routes" (July 1982)
+
+      Suggestions and guidance for the design of tables and algorithms
+      to keep track of various identifiers within a TCP/IP
+      implementation are provided by this document [RFC0814].
+
+   RFC 816: "Fault Isolation and Recovery" (July 1982)
+
+      In this document [RFC0816], TCP's response to indications of
+      network error conditions such as timeouts or received ICMP
+      messages is discussed.
+
+   RFC 817: "Modularity and Efficiency in Protocol Implementation" (July
+   1982)
+
+      This document [RFC0817] contains implementation suggestions that
+      are general and not TCP specific.  However, they have been used to
+      develop TCP implementations and to describe some performance
+      implications of the interactions between various layers in the
+      Internet stack.
+
+   RFC 872: "TCP-ON-A-LAN" (September 1982)
+
+      Conclusion: "The sometimes-expressed fear that using TCP on a
+      local net is a bad idea is unfounded."  [RFC0872]
+
+   RFC 896: "Congestion Control in IP/TCP Internetworks" (January 1984)
+
+      This document  [RFC0896] contains some early experiences with
+      congestion collapse and some initial thoughts on how to avoid it
+      using congestion control in TCP.
+
+
+
+
+Duke, et al.                 Informational                     [Page 15]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+   RFC 964: "Some Problems with the Specification of the Military
+   Standard Transmission Control Protocol" (November 1985)
+
+      This document [RFC0964] points out several specification bugs in
+      the US Military's MIL-STD-1778 document, which was intended as a
+      successor to RFC 793.  This serves to remind us of the difficulty
+      in specification writing (even when we work from existing
+      documents!).
+
+   RFC 1072: "TCP Extensions for Long-Delay Paths" (October 1988)
+
+      This document [RFC1072] contains early explanations of the
+      mechanisms that were later described by RFCs 1323 and 2018, which
+      obsolete it.
+
+   RFC 1185: "TCP Extension for High-Speed Paths" (October 1990)
+
+      This document [RFC1185] builds on RFC 1072 to describe more
+      advanced strategies for dealing with sequence number wrapping and
+      detecting duplicates from earlier connections.  This document was
+      obsoleted by RFC 1323.
+
+   RFC 2914 B: "Congestion Control Principles" (September 2000)
+
+      This document [RFC2914] motivates the use of end-to-end congestion
+      control for preventing congestion collapse and providing fairness
+      to TCP.
+
+6.2.  Difficult Network Environments
+
+   As the internetworking field has explored wireless, satellite,
+   cellular telephone, and other kinds of link-layer technologies, a
+   large body of work has built up on enhancing TCP performance for such
+   links.  The RFCs listed in this section describe some of these more
+   challenging network environments and how TCP interacts with them.
+
+   RFC 2488 B: "Enhancing TCP Over Satellite Channels using Standard
+   Mechanisms" (January 1999)
+
+      From abstract: "While TCP works over satellite channels there are
+      several IETF standardized mechanisms that enable TCP to more
+      effectively utilize the available capacity of the network path.
+      This document outlines some of these TCP mitigations.  At this
+      time, all mitigations discussed in this document are IETF
+      standards track mechanisms (or are compliant with IETF
+      standards)."  [RFC2488]
+
+
+
+
+
+Duke, et al.                 Informational                     [Page 16]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+   RFC 2757 I: "Long Thin Networks" (January 2000)
+
+      Several methods of improving TCP performance over long thin
+      networks, such as geosynchronous satellite links, are discussed in
+      this document [RFC2757].  A particular set of TCP options is
+      developed that should work well in such environments and be safe
+      to use in the global Internet.  The implications of such
+      environments have been further discussed in RFC 3150 and RFC 3155,
+      and these documents should be preferred where there is overlap
+      between them and RFC 2757.
+
+   RFC 2760 I: "Ongoing TCP Research Related to Satellites" (February
+   2000)
+
+      This document [RFC2760] discusses the advantages and disadvantages
+      of several different experimental means of improving TCP
+      performance over long-delay or error-prone paths.  These include
+      T/TCP, larger initial windows, byte counting, delayed
+      acknowledgments, slow start thresholds, NewReno and SACK-based
+      loss recovery, FACK [MM96], ECN, various corruption-detection
+      mechanisms, congestion avoidance changes for fairness, use of
+      multiple parallel flows, pacing, header compression, state
+      sharing, and ACK congestion control, filtering, and
+      reconstruction.  Although RFC 2488 looks at standard extensions,
+      this document focuses on more experimental means of performance
+      enhancement.
+
+   RFC 3135 I: "Performance Enhancing Proxies Intended to Mitigate
+   Link-Related Degradations" (June 2001)
+
+      From abstract: "This document is a survey of Performance Enhancing
+      Proxies (PEPs) often employed to improve degraded TCP performance
+      caused by characteristics of specific link environments, for
+      example, in satellite, wireless WAN, and wireless LAN
+      environments.  Different types of Performance Enhancing Proxies
+      are described as well as the mechanisms used to improve
+      performance."  [RFC3135]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Duke, et al.                 Informational                     [Page 17]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+   RFC 3150 B: "End-to-end Performance Implications of Slow Links" (July
+   2001)
+
+      From abstract: "This document makes performance-related
+      recommendations for users of network paths that traverse "very low
+      bit-rate" links....This recommendation may be useful in any
+      network where hosts can saturate available bandwidth, but the
+      design space for this recommendation explicitly includes
+      connections that traverse 56 Kb/second modem links or 4.8 Kb/
+      second wireless access links - both of which are widely deployed."
+      [RFC3150]
+
+   RFC 3155 B: "End-to-end Performance Implications of Links with
+   Errors" (August 2001)
+
+      From abstract: "This document discusses the specific TCP
+      mechanisms that are problematic in environments with high
+      uncorrected error rates, and discusses what can be done to
+      mitigate the problems without introducing intermediate devices
+      into the connection."  [RFC3155]
+
+   RFC 3366 "Advice to link designers on link Automatic Repeat reQuest
+   (ARQ)" (August 2002)
+
+      From abstract: "This document provides advice to the designers of
+      digital communication equipment and link-layer protocols employing
+      link-layer Automatic Repeat reQuest (ARQ) techniques.  This
+      document presumes that the designers wish to support Internet
+      protocols, but may be unfamiliar with the architecture of the
+      Internet and with the implications of their design choices for the
+      performance and efficiency of Internet traffic carried over their
+      links."  [RFC3366]
+
+   RFC 3449 B: "TCP Performance Implications of Network Path Asymmetry"
+   (December 2002)
+
+      From abstract: "This document describes TCP performance problems
+      that arise because of asymmetric effects.  These problems arise in
+      several access networks, including bandwidth-asymmetric networks
+      and packet radio subnetworks, for different underlying reasons.
+      However, the end result on TCP performance is the same in both
+      cases: performance often degrades significantly because of
+      imperfection and variability in the ACK feedback from the receiver
+      to the sender.
+
+      The document details several mitigations to these effects, which
+      have either been proposed or evaluated in the literature, or are
+      currently deployed in networks."  [RFC3449]
+
+
+
+Duke, et al.                 Informational                     [Page 18]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+   RFC 3481 B: "TCP over Second (2.5G) and Third (3G) Generation
+   Wireless Networks" (February 2003)
+
+      From abstract: "This document describes a profile for optimizing
+      TCP to adapt so that it handles paths including second (2.5G) and
+      third (3G) generation wireless networks."  [RFC3481]
+
+   RFC 3819 B: "Advice for Internet Subnetwork Designers" (July 2004)
+
+      This document [RFC3819] describes how TCP performance can be
+      negatively affected by some particular lower-layer behaviors and
+      provides guidance in designing lower-layer networks and protocols
+      to be amicable to TCP.
+
+6.3.  Implementation Advice
+
+   RFC 879: "The TCP Maximum Segment Size and Related Topics" (November
+   1983)
+
+      Abstract: "This memo discusses the TCP Maximum Segment Size Option
+      and related topics.  The purposes is to clarify some aspects of
+      TCP and its interaction with IP.  This memo is a clarification to
+      the TCP specification, and contains information that may be
+      considered as 'advice to implementers'."  [RFC0879]
+
+   RFC 1071: "Computing the Internet Checksum" (September 1988)
+
+      This document [RFC1071] lists a number of implementation
+      techniques for efficiently computing the Internet checksum (used
+      by TCP).
+
+   RFC 1624 I: "Computation of the Internet Checksum via Incremental
+   Update" (May 1994)
+
+      Incrementally updating the Internet checksum is useful to routers
+      in updating IP checksums.  Some middleboxes that alter TCP headers
+      may also be able to update the TCP checksum incrementally.  This
+      document [RFC1624] expands upon the explanation of the incremental
+      update procedure in RFC 1071.
+
+   RFC 1936 I: "Implementing the Internet Checksum in Hardware" (April
+   1996)
+
+      This document [RFC1936] describes the motivation for implementing
+      the Internet checksum in hardware, rather than in software, and
+      provides an implementation example.
+
+
+
+
+
+Duke, et al.                 Informational                     [Page 19]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+   RFC 2525 I: "Known TCP Implementation Problems" (March 1999)
+
+      From abstract: "This memo catalogs a number of known TCP
+      implementation problems.  The goal in doing so is to improve
+      conditions in the existing Internet by enhancing the quality of
+      current TCP/IP implementations."  [RFC2525]
+
+   RFC 2923 I: "TCP Problems with Path MTU Discovery" (September 2000)
+
+      From abstract: "This memo catalogs several known Transmission
+      Control Protocol (TCP) implementation problems dealing with Path
+      Maximum Transmission Unit Discovery (PMTUD), including the long-
+      standing black hole problem, stretch acknowlegements (ACKs) due to
+      confusion between Maximum Segment Size (MSS) and segment size, and
+      MSS advertisement based on PMTU."  [RFC2923]
+
+   RFC 3360 B: "Inappropriate TCP Resets Considered Harmful" (August
+   2002)
+
+      This document [RFC3360] is a plea that firewall vendors not send
+      gratuitous TCP RST (Reset) packets when unassigned TCP header bits
+      are used.  This practice prevents desirable extension and
+      evolution of the protocol and thus is potentially harmful to the
+      future of the Internet.
+
+   RFC 3493 I: "Basic Socket Interface Extensions for IPv6" (February
+   2003)
+
+      This document [RFC3493] describes the de facto standard sockets
+      API for programming with TCP.  This API is implemented nearly
+      ubiquitously in modern operating systems and programming
+      languages.
+
+6.4.  Management Information Bases
+
+   The first MIB module defined for use with Simple Network Management
+   Protocol (SNMP) (in RFC 1066 and its update, RFC 1156) was a single
+   monolithic MIB module, called MIB-I.  This evolved over time to be
+   MIB-II (RFC 1213).  It then became apparent that having a single
+   monolithic MIB module was not scalable, given the number and breadth
+   of MIB data definitions that needed to be included.  Thus, additional
+   MIB modules were defined, and those parts of MIB-II that needed to
+   evolve were split off.  Eventually, the remaining parts of MIB-II
+   were also split off, the TCP-specific part being documented in RFC
+   2012.
+
+
+
+
+
+
+Duke, et al.                 Informational                     [Page 20]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+   RFC 2012 was obsoleted by RFC 4022, which is the primary TCP MIB
+   document today.  MIB-I, defined in RFC 1156, has been obsoleted by
+   the MIB-II specification in RFC 1213.  For current TCP implementers,
+   RFC 4022 should be supported.
+
+   RFC 1066: "Management Information Base for Network Management of
+   TCP/IP-based Internets" (August 1988)
+
+      This document [RFC1066] was the description of the TCP MIB.  It
+      was obsoleted by RFC 1156.
+
+   RFC 1156 S: "Management Information Base for Network Management of
+   TCP/IP-based Internets" (May 1990)
+
+      This document [RFC1156] describes the required MIB fields for TCP
+      implementations, with minor corrections and no technical changes
+      from RFC 1066, which it obsoletes.  This is the standards track
+      document for MIB-I.
+
+   RFC 1213 S: "Management Information Base for Network Management of
+   TCP/IP-based Internets: MIB-II" (March 1991)
+
+      This document [RFC1213] describes the second version of the MIB in
+      a monolithic form.  RFC 2012 updates this document by splitting
+      out the TCP-specific portions.
+
+   RFC 2012 S: "SNMPv2 Management Information Base for the Transmission
+   Control Protocol using SMIv2" (November 1996)
+
+      This document [RFC2012] defined the TCP MIB, in an update to RFC
+      1213.  It is now obsoleted by RFC 4022.
+
+   RFC 2452 S: "IP Version 6 Management Information Base for the
+   Transmission Control Protocol" (December 1998)
+
+      This document [RFC2452] augments RFC 2012 by adding an IPv6-
+      specific connection table.  The rest of 2012 holds for any IP
+      version.  RFC 2012 is now obsoleted by RFC 4022.
+
+      Although it is a standards track document, RFC 2452 is considered
+      a historic mistake by the MIB community, as it is based on the
+      idea of parallel IPv4 and IPv6 structures.  Although IPv6 requires
+      new structures, the community has decided to define a single
+      generic structure for both IPv4 and IPv6.  This will aid in
+      definition, implementation, and transition between IPv4 and IPv6.
+
+
+
+
+
+
+Duke, et al.                 Informational                     [Page 21]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+   RFC 4022 S: "Management Information Base for the Transmission Control
+   Protocol (TCP)" (March 2005)
+
+      This document [RFC4022] obsoletes RFC 2012 and RFC 2452 and
+      specifies the current standard for the TCP MIB that should be
+      deployed.
+
+6.5.  Tools and Tutorials
+
+   RFC 1180 I: "TCP/IP Tutorial" (January 1991)
+
+      This document [RFC1180] is an extremely brief overview of the
+      TCP/IP protocol suite as a whole.  It gives some explanation as to
+      how and where TCP fits in.
+
+   RFC 1470 I: "FYI on a Network Management Tool Catalog: Tools for
+   Monitoring and Debugging TCP/IP Internets and Interconnected Devices"
+   (June 1993)
+
+      A few of the tools that this document [RFC1470] describes are
+      still maintained and in use today; for example, ttcp and tcpdump.
+      However, many of the tools described do not relate specifically to
+      TCP and are no longer used or easily available.
+
+   RFC 2398 I: "Some Testing Tools for TCP Implementors" (August 1998)
+
+      This document [RFC2398] describes a number of TCP packet
+      generation and analysis tools.  Although some of these tools are
+      no longer readily available or widely used, for the most part they
+      are still relevant and usable.
+
+6.6.  Case Studies
+
+   RFC 1337 I: "TIME-WAIT Assassination Hazards in TCP" (May 1992)
+
+      This document [RFC1337] points out a problem with acting on
+      received reset segments while one is in the TIME-WAIT state.  The
+      main recommendation is that hosts in TIME-WAIT ignore resets.
+      This recommendation might not currently be widely implemented.
+
+   RFC 2415 I: "Simulation Studies of Increased Initial TCP Window Size"
+   (September 1998)
+
+      This document [RFC2415] presents results of some simulations using
+      TCP initial windows greater than 1 segment.  The analysis
+      indicates that user-perceived performance can be improved by
+      increasing the initial window to 3 segments.
+
+
+
+
+Duke, et al.                 Informational                     [Page 22]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+   RFC 2416 I: "When TCP Starts Up With Four Packets Into Only Three
+   Buffers" (September 1998)
+
+      This document [RFC2416] uses simulation results to clear up some
+      concerns about using an initial window of 4 segments when the
+      network path has less provisioning.
+
+   RFC 2884 I: "Performance Evaluation of Explicit Congestion
+   Notification (ECN) in IP Networks" (July 2000)
+
+      This document [RFC2884] describes experimental results that show
+      some improvements to the performance of both short- and long-lived
+      connections due to ECN.
+
+7.  Undocumented TCP Features
+
+   There are a few important implementation tactics for the TCP that
+   have not yet been described in any RFC.  Although this roadmap is
+   primarily concerned with mapping the TCP RFCs, this section is
+   included because an implementer needs to be aware of these important
+   issues.
+
+   SYN Cookies
+
+      A mechanism known as "SYN cookies" is widely used to thwart TCP
+      SYN flooding attacks, in which an attacker sends a flood of SYNs
+      to a victim but fails to complete the 3-way handshake.  The result
+      is exhaustion of resources at the server.  The SYN cookie
+      mechanism allows the server to return a cleverly chosen initial
+      sequence number that has all the required state for the secure
+      completion of the handshake.  Then the server can avoid saving
+      connection state during the 3-way handshake and thus survive a SYN
+      flooding attack.
+
+      A web search for "SYN cookies" will reveal a number of useful
+      descriptions of this mechanism, although there is currently no RFC
+      on the matter.
+
+   Header Prediction
+
+      Header prediction is a trick to speed up the processing of
+      segments.  Van Jacobson and Mike Karels developed the technique in
+      the late 1980s.  The basic idea is that some processing time can
+      be saved when most of a segment's fields can be predicted from
+      previous segments.  A good description of this was sent to the
+      TCP-IP mailing list by Van Jacobson on March 9, 1988:
+
+
+
+
+
+Duke, et al.                 Informational                     [Page 23]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+         Quite a bit of the speedup comes from an algorithm that we
+         ('we' refers to collaborator Mike Karels and myself) are
+         calling "header prediction".  The idea is that if you're in the
+         middle of a bulk data transfer and have just seen a packet, you
+         know what the next packet is going to look like:  It will look
+         just like the current packet with either the sequence number or
+         ack number updated (depending on whether you're the sender or
+         receiver).  Combining this with the "Use hints" epigram from
+         Butler Lampson's classic "Epigrams for System Designers", you
+         start to think of the tcp state (rcv.nxt, snd.una, etc.) as
+         "hints" about what the next packet should look like.
+
+         If you arrange those "hints" so they match the layout of a tcp
+         packet header, it takes a single 14-byte compare to see if your
+         prediction is correct (3 longword compares to pick up the send
+         & ack sequence numbers, header length, flags and window, plus a
+         short compare on the length).  If the prediction is correct,
+         there's a single test on the length to see if you're the sender
+         or receiver followed by the appropriate processing.  E.g., if
+         the length is non-zero (you're the receiver), checksum and
+         append the data to the socket buffer then wake any process
+         that's sleeping on the buffer.  Update rcv.nxt by the length of
+         this packet (this updates your "prediction" of the next
+         packet).  Check if you can handle another packet the same size
+         as the current one.  If not, set one of the unused flag bits in
+         your header prediction to guarantee that the prediction will
+         fail on the next packet and force you to go through full
+         protocol processing.  Otherwise, you're done with this packet.
+         So, the *total* tcp protocol processing, exclusive of
+         checksumming, is on the order of 6 compares and an add.
+
+8.  Security Considerations
+
+   This document introduces no new security considerations.  Each RFC
+   listed in this document attempts to address the security
+   considerations of the specification it contains.
+
+9.  Acknowledgments
+
+   This document grew out of a discussion on the end2end-interest
+   mailing list, the public list of the End-to-End Research Group of the
+   IRTF, and continued development under the IETF's TCP Maintenance and
+   Minor Extensions (TCPM) working group.  We thank Joe Touch, Reiner
+   Ludwig, Pekka Savola, Gorry Fairhurst, and Sally Floyd for their
+   contributions, in particular.  The chairs of the TCPM working group,
+   Mark Allman and Ted Faber, have been instrumental in the development
+   of this document.  Keith McCloghrie provided some useful notes and
+   clarification on the various MIB-related RFCs.
+
+
+
+Duke, et al.                 Informational                     [Page 24]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+10.  Informative References
+
+10.1.  Basic Functionality
+
+   [RFC0793]  Postel, J., "Transmission Control Protocol", STD 7, RFC
+              793, September 1981.
+
+   [RFC1122]  Braden, R., "Requirements for Internet Hosts -
+              Communication Layers", STD 3, RFC 1122, October 1989.
+
+   [RFC2026]  Bradner, S., "The Internet Standards Process -- Revision
+              3", BCP 9, RFC 2026, October 1996.
+
+   [RFC2460]  Deering, S. and R. Hinden, "Internet Protocol, Version 6
+              (IPv6) Specification", RFC 2460, December 1998.
+
+   [RFC2474]  Nichols, K., Blake, S., Baker, F., and D. Black,
+              "Definition of the Differentiated Services Field (DS
+              Field) in the IPv4 and IPv6 Headers", RFC 2474, December
+              1998.
+
+   [RFC2581]  Allman, M., Paxson, V., and W. Stevens, "TCP Congestion
+              Control", RFC 2581, April 1999.
+
+   [RFC2675]  Borman, D., Deering, S., and R. Hinden, "IPv6 Jumbograms",
+              RFC 2675, August 1999.
+
+   [RFC2873]  Xiao, X., Hannan, A., Paxson, V., and E. Crabbe, "TCP
+              Processing of the IPv4 Precedence Field", RFC 2873, June
+              2000.
+
+   [RFC2988]  Paxson, V. and M. Allman, "Computing TCP's Retransmission
+              Timer", RFC 2988, November 2000.
+
+10.2.  Recommended Enhancements
+
+   [RFC1323]  Jacobson, V., Braden, R., and D. Borman, "TCP Extensions
+              for High Performance", RFC 1323, May 1992.
+
+   [RFC1948]  Bellovin, S., "Defending Against Sequence Number Attacks",
+              RFC 1948, May 1996.
+
+   [RFC2018]  Mathis, M., Mahdavi, J., Floyd, S., and A. Romanow, "TCP
+              Selective Acknowledgment Options", RFC 2018, October 1996.
+
+   [RFC2385]  Heffernan, A., "Protection of BGP Sessions via the TCP MD5
+              Signature Option", RFC 2385, August 1998.
+
+
+
+
+Duke, et al.                 Informational                     [Page 25]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+   [RFC2883]  Floyd, S., Mahdavi, J., Mathis, M., and M. Podolsky, "An
+              Extension to the Selective Acknowledgement (SACK) Option
+              for TCP", RFC 2883, July 2000.
+
+   [RFC3042]  Allman, M., Balakrishnan, H., and S. Floyd, "Enhancing
+              TCP's Loss Recovery Using Limited Transmit", RFC 3042,
+              January 2001.
+
+   [RFC3168]  Ramakrishnan, K., Floyd, S., and D. Black, "The Addition
+              of Explicit Congestion Notification (ECN) to IP", RFC
+              3168, September 2001.
+
+   [RFC3390]  Allman, M., Floyd, S., and C. Partridge, "Increasing TCP's
+              Initial Window", RFC 3390, October 2002.
+
+   [RFC3517]  Blanton, E., Allman, M., Fall, K., and L. Wang, "A
+              Conservative Selective Acknowledgment (SACK)-based Loss
+              Recovery Algorithm for TCP", RFC 3517, April 2003.
+
+   [RFC3562]  Leech, M., "Key Management Considerations for the TCP MD5
+              Signature Option", RFC 3562, July 2003.
+
+   [RFC3782]  Floyd, S., Henderson, T., and A. Gurtov, "The NewReno
+              Modification to TCP's Fast Recovery Algorithm", RFC 3782,
+              April 2004.
+
+   [RFC4015]  Ludwig, R. and A. Gurtov, "The Eifel Response Algorithm
+              for TCP", RFC 4015, February 2005.
+
+   [RFC4278]  Bellovin, S. and A. Zinin, "Standards Maturity Variance
+              Regarding the TCP MD5 Signature Option (RFC 2385) and the
+              BGP-4 Specification", RFC 4278, January 2006.
+
+10.3.  Experimental Extensions
+
+   [RFC2140]  Touch, J., "TCP Control Block Interdependence", RFC 2140,
+              April 1997.
+
+   [RFC2861]  Handley, M., Padhye, J., and S. Floyd, "TCP Congestion
+              Window Validation", RFC 2861, June 2000.
+
+   [RFC3124]  Balakrishnan, H. and S. Seshan, "The Congestion Manager",
+              RFC 3124, June 2001.
+
+   [RFC3465]  Allman, M., "TCP Congestion Control with Appropriate Byte
+              Counting (ABC)", RFC 3465, February 2003.
+
+
+
+
+
+Duke, et al.                 Informational                     [Page 26]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+   [RFC3522]  Ludwig, R. and M. Meyer, "The Eifel Detection Algorithm
+              for TCP", RFC 3522, April 2003.
+
+   [RFC3540]  Spring, N., Wetherall, D., and D. Ely, "Robust Explicit
+              Congestion Notification (ECN) Signaling with Nonces", RFC
+              3540, June 2003.
+
+   [RFC3649]  Floyd, S., "HighSpeed TCP for Large Congestion Windows",
+              RFC 3649, December 2003.
+
+   [RFC3708]  Blanton, E. and M. Allman, "Using TCP Duplicate Selective
+              Acknowledgement (DSACKs) and Stream Control Transmission
+              Protocol (SCTP) Duplicate Transmission Sequence Numbers
+              (TSNs) to Detect Spurious Retransmissions", RFC 3708,
+              February 2004.
+
+   [RFC3742]  Floyd, S., "Limited Slow-Start for TCP with Large
+              Congestion Windows", RFC 3742, March 2004.
+
+   [RFC4138]  Sarolahti, P. and M. Kojo, "Forward RTO-Recovery (F-RTO):
+              An Algorithm for Detecting Spurious Retransmission
+              Timeouts with TCP and the Stream Control Transmission
+              Protocol (SCTP)", RFC 4138, August 2005.
+
+10.4.  Historic Extensions
+
+   [RFC1106]  Fox, R., "TCP big window and NAK options", RFC 1106, June
+              1989.
+
+   [RFC1110]  McKenzie, A., "Problem with the TCP big window option",
+              RFC 1110, August 1989.
+
+   [RFC1146]  Zweig, J. and C. Partridge, "TCP alternate checksum
+              options", RFC 1146, March 1990.
+
+   [RFC1263]  O'Malley, S. and L. Peterson, "TCP Extensions Considered
+              Harmful", RFC 1263, October 1991.
+
+   [RFC1379]  Braden, R., "Extending TCP for Transactions -- Concepts",
+              RFC 1379, November 1992.
+
+   [RFC1644]  Braden, R., "T/TCP -- TCP Extensions for Transactions
+              Functional Specification", RFC 1644, July 1994.
+
+   [RFC1693]  Connolly, T., Amer, P., and P. Conrad, "An Extension to
+              TCP : Partial Order Service", RFC 1693, November 1994.
+
+
+
+
+
+Duke, et al.                 Informational                     [Page 27]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+10.5.  Support Documents
+
+   [RFC0813]  Clark, D., "Window and Acknowledgement Strategy in TCP",
+              RFC 813, July 1982.
+
+   [RFC0814]  Clark, D., "Name, addresses, ports, and routes", RFC 814,
+              July 1982.
+
+   [RFC0816]  Clark, D., "Fault isolation and recovery", RFC 816, July
+              1982.
+
+   [RFC0817]  Clark, D., "Modularity and efficiency in protocol
+              implementation", RFC 817, July 1982.
+
+   [RFC0872]  Padlipsky, M., "TCP-on-a-LAN", RFC 872, September 1982.
+
+   [RFC0879]  Postel, J., "TCP maximum segment size and related topics",
+              RFC 879, November 1983.
+
+   [RFC0896]  Nagle, J., "Congestion control in IP/TCP internetworks",
+              RFC 896, January 1984.
+
+   [RFC0964]  Sidhu, D. and T. Blumer, "Some problems with the
+              specification of the Military Standard Transmission
+              Control Protocol", RFC 964, November 1985.
+
+   [RFC1066]  McCloghrie, K. and M. Rose, "Management Information Base
+              for Network Management of TCP/IP-based internets", RFC
+              1066, August 1988.
+
+   [RFC1071]  Braden, R., Borman, D., and C. Partridge, "Computing the
+              Internet checksum", RFC 1071, September 1988.
+
+   [RFC1072]  Jacobson, V. and R. Braden, "TCP extensions for long-delay
+              paths", RFC 1072, October 1988.
+
+   [RFC1156]  McCloghrie, K. and M. Rose, "Management Information Base
+              for network management of TCP/IP-based internets", RFC
+              1156, May 1990.
+
+   [RFC1180]  Socolofsky, T. and C. Kale, "TCP/IP tutorial", RFC 1180,
+              January 1991.
+
+   [RFC1185]  Jacobson, V., Braden, B., and L. Zhang, "TCP Extension for
+              High-Speed Paths", RFC 1185, October 1990.
+
+
+
+
+
+
+Duke, et al.                 Informational                     [Page 28]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+   [RFC1213]  McCloghrie, K. and M. Rose, "Management Information Base
+              for Network Management of TCP/IP-based internets: MIB-II",
+              STD 17, RFC 1213, March 1991.
+
+   [RFC1337]  Braden, R., "TIME-WAIT Assassination Hazards in TCP", RFC
+              1337, May 1992.
+
+   [RFC1470]  Enger, R. and J. Reynolds, "FYI on a Network Management
+              Tool Catalog: Tools for Monitoring and Debugging TCP/IP
+              Internets and Interconnected Devices", FYI 2, RFC 1470,
+              June 1993.
+
+   [RFC1624]  Rijsinghani, A., "Computation of the Internet Checksum via
+              Incremental Update", RFC 1624, May 1994.
+
+   [RFC1936]  Touch, J. and B. Parham, "Implementing the Internet
+              Checksum in Hardware", RFC 1936, April 1996.
+
+   [RFC2012]  McCloghrie, K., "SNMPv2 Management Information Base for
+              the Transmission Control Protocol using SMIv2", RFC 2012,
+              November 1996.
+
+   [RFC2398]  Parker, S. and C. Schmechel, "Some Testing Tools for TCP
+              Implementors", RFC 2398, August 1998.
+
+   [RFC2415]  Poduri, K. and K. Nichols, "Simulation Studies of
+              Increased Initial TCP Window Size", RFC 2415, September
+              1998.
+
+   [RFC2416]  Shepard, T. and C. Partridge, "When TCP Starts Up With
+              Four Packets Into Only Three Buffers", RFC 2416, September
+              1998.
+
+   [RFC2452]  Daniele, M., "IP Version 6 Management Information Base for
+              the Transmission Control Protocol", RFC 2452, December
+              1998.
+
+   [RFC2488]  Allman, M., Glover, D., and L. Sanchez, "Enhancing TCP
+              Over Satellite Channels using Standard Mechanisms", BCP
+              28, RFC 2488, January 1999.
+
+   [RFC2525]  Paxson, V., Allman, M., Dawson, S., Fenner, W., Griner,
+              J., Heavens, I., Lahey, K., Semke, J., and B. Volz, "Known
+              TCP Implementation Problems", RFC 2525, March 1999.
+
+   [RFC2757]  Montenegro, G., Dawkins, S., Kojo, M., Magret, V., and N.
+              Vaidya, "Long Thin Networks", RFC 2757, January 2000.
+
+
+
+
+Duke, et al.                 Informational                     [Page 29]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+   [RFC2760]  Allman, M., Dawkins, S., Glover, D., Griner, J., Tran, D.,
+              Henderson, T., Heidemann, J., Touch, J., Kruse, H.,
+              Ostermann, S., Scott, K., and J. Semke, "Ongoing TCP
+              Research Related to Satellites", RFC 2760, February 2000.
+
+   [RFC2884]  Hadi Salim, J. and U. Ahmed, "Performance Evaluation of
+              Explicit Congestion Notification (ECN) in IP Networks",
+              RFC 2884, July 2000.
+
+   [RFC2914]  Floyd, S., "Congestion Control Principles", BCP 41, RFC
+              2914, September 2000.
+
+   [RFC2923]  Lahey, K., "TCP Problems with Path MTU Discovery", RFC
+              2923, September 2000.
+
+   [RFC3135]  Border, J., Kojo, M., Griner, J., Montenegro, G., and Z.
+              Shelby, "Performance Enhancing Proxies Intended to
+              Mitigate Link-Related Degradations", RFC 3135, June 2001.
+
+   [RFC3150]  Dawkins, S., Montenegro, G., Kojo, M., and V. Magret,
+              "End-to-end Performance Implications of Slow Links", BCP
+              48, RFC 3150, July 2001.
+
+   [RFC3155]  Dawkins, S., Montenegro, G., Kojo, M., Magret, V., and N.
+              Vaidya, "End-to-end Performance Implications of Links with
+              Errors", BCP 50, RFC 3155, August 2001.
+
+   [RFC3360]  Floyd, S., "Inappropriate TCP Resets Considered Harmful",
+              BCP 60, RFC 3360, August 2002.
+
+   [RFC3366]  Fairhurst, G. and L. Wood, "Advice to link designers on
+              link Automatic Repeat reQuest (ARQ)", BCP 62, RFC 3366,
+              August 2002.
+
+   [RFC3449]  Balakrishnan, H., Padmanabhan, V., Fairhurst, G., and M.
+              Sooriyabandara, "TCP Performance Implications of Network
+              Path Asymmetry", BCP 69, RFC 3449, December 2002.
+
+   [RFC3481]  Inamura, H., Montenegro, G., Ludwig, R., Gurtov, A., and
+              F. Khafizov, "TCP over Second (2.5G) and Third (3G)
+              Generation Wireless Networks", BCP 71, RFC 3481, February
+              2003.
+
+   [RFC3493]  Gilligan, R., Thomson, S., Bound, J., McCann, J., and W.
+              Stevens, "Basic Socket Interface Extensions for IPv6", RFC
+              3493, February 2003.
+
+
+
+
+
+Duke, et al.                 Informational                     [Page 30]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+   [RFC3819]  Karn, P., Bormann, C., Fairhurst, G., Grossman, D.,
+              Ludwig, R., Mahdavi, J., Montenegro, G., Touch, J., and L.
+              Wood, "Advice for Internet Subnetwork Designers", BCP 89,
+              RFC 3819, July 2004.
+
+   [RFC4022]  Raghunarayan, R., "Management Information Base for the
+              Transmission Control Protocol (TCP)", RFC 4022, March
+              2005.
+
+10.6.  Informative References Outside the RFC Series
+
+   [JK92]     Jacobson, V. and M. Karels, "Congestion Avoidance and
+              Control", This paper is a revised version of [Jac88], that
+              includes an additional appendix.  This paper has not been
+              traditionally published, but is currently available at
+              ftp://ftp.ee.lbl.gov/papers/congavoid.ps.Z. 1992.
+
+   [Jac88]    Jacobson, V., "Congestion Avoidance and Control", ACM
+              SIGCOMM 1988 Proceedings, in ACM Computer Communication
+              Review, 18 (4), pp. 314-329, August 1988.
+
+   [KP87]     Karn, P. and C. Partridge, "Round Trip Time Estimation",
+              ACM SIGCOMM 1987 Proceedings, in ACM Computer
+              Communication Review, 17 (5), pp. 2-7, August 1987
+
+   [MAF04]    Medina, A., Allman, M., and S. Floyd, "Measuring the
+              Evolution of Transport Protocols in the Internet", ACM
+              Computer Communication Review, 35 (2), April 2005.
+
+   [MM96]     Mathis, M. and J. Mahdavi, "Forward Acknowledgement:
+              Refining TCP Congestion Control", ACM SIGCOMM 1996
+              Proceedings, in ACM Computer Communication Review 26 (4),
+              pp. 281-292, October 1996.
+
+   [SCWA99]   Savage, S., Cardwell, N., Wetherall, D., and T. Anderson,
+              "TCP Congestion Control with a Misbehaving Receiver", ACM
+              Computer Communication Review, 29 (5), pp. 71-78, October
+              1999.
+
+
+
+
+
+
+
+
+
+
+
+
+
+Duke, et al.                 Informational                     [Page 31]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+Authors' Addresses
+
+   Martin H. Duke
+   The Boeing Company
+   PO Box 3707, MC 7L-49
+   Seattle, WA  98124-2207
+
+   Phone: 425-373-2852
+   EMail: martin.duke@boeing.com
+
+
+   Robert Braden
+   USC Information Sciences Institute
+   Marina del Rey, CA  90292-6695
+
+   Phone: 310-448-9173
+   EMail: braden@isi.edu
+
+
+   Wesley M. Eddy
+   Verizon Federal Network Systems
+   21000 Brookpark Rd, MS 54-5
+   Cleveland, OH  44135
+
+   Phone: 216-433-6682
+   EMail: weddy@grc.nasa.gov
+
+
+   Ethan Blanton
+   Purdue University Computer Science
+   250 N. University St.
+   West Lafayette, IN  47907
+
+   EMail: eblanton@cs.purdue.edu
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Duke, et al.                 Informational                     [Page 32]
+
+RFC 4614                      TCP Roadmap                 September 2006
+
+
+Full Copyright Statement
+
+   Copyright (C) The Internet Society (2006).
+
+   This document is subject to the rights, licenses and restrictions
+   contained in BCP 78, and except as set forth therein, the authors
+   retain all their rights.
+
+   This document and the information contained herein are provided on an
+   "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS
+   OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET
+   ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED,
+   INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE
+   INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED
+   WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+Intellectual Property
+
+   The IETF takes no position regarding the validity or scope of any
+   Intellectual Property Rights or other rights that might be claimed to
+   pertain to the implementation or use of the technology described in
+   this document or the extent to which any license under such rights
+   might or might not be available; nor does it represent that it has
+   made any independent effort to identify any such rights.  Information
+   on the procedures with respect to rights in RFC documents can be
+   found in BCP 78 and BCP 79.
+
+   Copies of IPR disclosures made to the IETF Secretariat and any
+   assurances of licenses to be made available, or the result of an
+   attempt made to obtain a general license or permission for the use of
+   such proprietary rights by implementers or users of this
+   specification can be obtained from the IETF on-line IPR repository at
+   http://www.ietf.org/ipr.
+
+   The IETF invites any interested party to bring to its attention any
+   copyrights, patents or patent applications, or other proprietary
+   rights that may cover technology that may be required to implement
+   this standard.  Please address the information to the IETF at
+   ietf-ipr@ietf.org.
+
+Acknowledgement
+
+   Funding for the RFC Editor function is provided by the IETF
+   Administrative Support Activity (IASA).
+
+
+
+
+
+
+
+Duke, et al.                 Informational                     [Page 33]
+
diff --git a/ext/picotcp/RFC/rfc6762.txt b/ext/picotcp/RFC/rfc6762.txt
new file mode 100644
index 0000000..2c44359
--- /dev/null
+++ b/ext/picotcp/RFC/rfc6762.txt
@@ -0,0 +1,3923 @@
+
+
+
+
+
+
+Internet Engineering Task Force (IETF)                       S. Cheshire
+Request for Comments: 6762                                   M. Krochmal
+Category: Standards Track                                     Apple Inc.
+ISSN: 2070-1721                                            February 2013
+
+
+                             Multicast DNS
+
+Abstract
+
+   As networked devices become smaller, more portable, and more
+   ubiquitous, the ability to operate with less configured
+   infrastructure is increasingly important.  In particular, the ability
+   to look up DNS resource record data types (including, but not limited
+   to, host names) in the absence of a conventional managed DNS server
+   is useful.
+
+   Multicast DNS (mDNS) provides the ability to perform DNS-like
+   operations on the local link in the absence of any conventional
+   Unicast DNS server.  In addition, Multicast DNS designates a portion
+   of the DNS namespace to be free for local use, without the need to
+   pay any annual fee, and without the need to set up delegations or
+   otherwise configure a conventional DNS server to answer for those
+   names.
+
+   The primary benefits of Multicast DNS names are that (i) they require
+   little or no administration or configuration to set them up, (ii)
+   they work when no infrastructure is present, and (iii) they work
+   during infrastructure failures.
+
+Status of This Memo
+
+   This is an Internet Standards Track document.
+
+   This document is a product of the Internet Engineering Task Force
+   (IETF).  It represents the consensus of the IETF community.  It has
+   received public review and has been approved for publication by the
+   Internet Engineering Steering Group (IESG).  Further information on
+   Internet Standards is available in Section 2 of RFC 5741.
+
+   Information about the current status of this document, any errata,
+   and how to provide feedback on it may be obtained at
+   http://www.rfc-editor.org/info/rfc6762.
+
+
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                    [Page 1]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+Copyright Notice
+
+   Copyright (c) 2013 IETF Trust and the persons identified as the
+   document authors.  All rights reserved.
+
+   This document is subject to BCP 78 and the IETF Trust's Legal
+   Provisions Relating to IETF Documents
+   (http://trustee.ietf.org/license-info) in effect on the date of
+   publication of this document.  Please review these documents
+   carefully, as they describe your rights and restrictions with respect
+   to this document.  Code Components extracted from this document must
+   include Simplified BSD License text as described in Section 4.e of
+   the Trust Legal Provisions and are provided without warranty as
+   described in the Simplified BSD License.
+
+   This document may contain material from IETF Documents or IETF
+   Contributions published or made publicly available before November
+   10, 2008.  The person(s) controlling the copyright in some of this
+   material may not have granted the IETF Trust the right to allow
+   modifications of such material outside the IETF Standards Process.
+   Without obtaining an adequate license from the person(s) controlling
+   the copyright in such materials, this document may not be modified
+   outside the IETF Standards Process, and derivative works of it may
+   not be created outside the IETF Standards Process, except to format
+   it for publication as an RFC or to translate it into languages other
+   than English.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                    [Page 2]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+Table of Contents
+
+   1. Introduction ....................................................4
+   2. Conventions and Terminology Used in This Document ...............4
+   3. Multicast DNS Names .............................................5
+   4. Reverse Address Mapping .........................................7
+   5. Querying ........................................................8
+   6. Responding .....................................................13
+   7. Traffic Reduction ..............................................22
+   8. Probing and Announcing on Startup ..............................25
+   9. Conflict Resolution ............................................31
+   10. Resource Record TTL Values and Cache Coherency ................33
+   11. Source Address Check ..........................................38
+   12. Special Characteristics of Multicast DNS Domains ..............40
+   13. Enabling and Disabling Multicast DNS ..........................41
+   14. Considerations for Multiple Interfaces ........................42
+   15. Considerations for Multiple Responders on the Same Machine ....43
+   16. Multicast DNS Character Set ...................................45
+   17. Multicast DNS Message Size ....................................46
+   18. Multicast DNS Message Format ..................................47
+   19. Summary of Differences between Multicast DNS and Unicast DNS ..51
+   20. IPv6 Considerations ...........................................52
+   21. Security Considerations .......................................52
+   22. IANA Considerations ...........................................53
+   23. Acknowledgments ...............................................56
+   24. References ....................................................56
+   Appendix A. Design Rationale for Choice of UDP Port Number ........60
+   Appendix B. Design Rationale for Not Using Hashed Multicast
+               Addresses .............................................61
+   Appendix C. Design Rationale for Maximum Multicast DNS Name
+               Length ................................................62
+   Appendix D. Benefits of Multicast Responses .......................64
+   Appendix E. Design Rationale for Encoding Negative Responses ......65
+   Appendix F. Use of UTF-8 ..........................................66
+   Appendix G. Private DNS Namespaces ................................67
+   Appendix H. Deployment History ....................................67
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                    [Page 3]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+1.  Introduction
+
+   Multicast DNS and its companion technology DNS-Based Service
+   Discovery [RFC6763] were created to provide IP networking with the
+   ease-of-use and autoconfiguration for which AppleTalk was well-known
+   [RFC6760].  When reading this document, familiarity with the concepts
+   of Zero Configuration Networking [Zeroconf] and automatic link-local
+   addressing [RFC3927] [RFC4862] is helpful.
+
+   Multicast DNS borrows heavily from the existing DNS protocol
+   [RFC1034] [RFC1035] [RFC6195], using the existing DNS message
+   structure, name syntax, and resource record types.  This document
+   specifies no new operation codes or response codes.  This document
+   describes how clients send DNS-like queries via IP multicast, and how
+   a collection of hosts cooperate to collectively answer those queries
+   in a useful manner.
+
+2.  Conventions and Terminology Used in This Document
+
+   The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
+   "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
+   document are to be interpreted as described in "Key words for use in
+   RFCs to Indicate Requirement Levels" [RFC2119].
+
+   When this document uses the term "Multicast DNS", it should be taken
+   to mean: "Clients performing DNS-like queries for DNS-like resource
+   records by sending DNS-like UDP query and response messages over IP
+   Multicast to UDP port 5353".  The design rationale for selecting UDP
+   port 5353 is discussed in Appendix A.
+
+   This document uses the term "host name" in the strict sense to mean a
+   fully qualified domain name that has an IPv4 or IPv6 address record.
+   It does not use the term "host name" in the commonly used but
+   incorrect sense to mean just the first DNS label of a host's fully
+   qualified domain name.
+
+   A DNS (or mDNS) packet contains an IP Time to Live (TTL) in the IP
+   header, which is effectively a hop-count limit for the packet, to
+   guard against routing loops.  Each resource record also contains a
+   TTL, which is the number of seconds for which the resource record may
+   be cached.  This document uses the term "IP TTL" to refer to the IP
+   header TTL (hop limit), and the term "RR TTL" or just "TTL" to refer
+   to the resource record TTL (cache lifetime).
+
+   DNS-format messages contain a header, a Question Section, then
+   Answer, Authority, and Additional Record Sections.  The Answer,
+   Authority, and Additional Record Sections all hold resource records
+
+
+
+
+Cheshire & Krochmal          Standards Track                    [Page 4]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   in the same format.  Where this document describes issues that apply
+   equally to all three sections, it uses the term "Resource Record
+   Sections" to refer collectively to these three sections.
+
+   This document uses the terms "shared" and "unique" when referring to
+   resource record sets [RFC1034]:
+
+      A "shared" resource record set is one where several Multicast DNS
+      responders may have records with the same name, rrtype, and
+      rrclass, and several responders may respond to a particular query.
+
+      A "unique" resource record set is one where all the records with
+      that name, rrtype, and rrclass are conceptually under the control
+      or ownership of a single responder, and it is expected that at
+      most one responder should respond to a query for that name,
+      rrtype, and rrclass.  Before claiming ownership of a unique
+      resource record set, a responder MUST probe to verify that no
+      other responder already claims ownership of that set, as described
+      in Section 8.1, "Probing".  (For fault-tolerance and other
+      reasons, sometimes it is permissible to have more than one
+      responder answering for a particular "unique" resource record set,
+      but such cooperating responders MUST give answers containing
+      identical rdata for these records.  If they do not give answers
+      containing identical rdata, then the probing step will reject the
+      data as being inconsistent with what is already being advertised
+      on the network for those names.)
+
+   Strictly speaking, the terms "shared" and "unique" apply to resource
+   record sets, not to individual resource records.  However, it is
+   sometimes convenient to talk of "shared resource records" and "unique
+   resource records".  When used this way, the terms should be
+   understood to mean a record that is a member of a "shared" or
+   "unique" resource record set, respectively.
+
+3.  Multicast DNS Names
+
+   A host that belongs to an organization or individual who has control
+   over some portion of the DNS namespace can be assigned a globally
+   unique name within that portion of the DNS namespace, such as,
+   "cheshire.example.com.".  For those of us who have this luxury, this
+   works very well.  However, the majority of home computer users do not
+   have easy access to any portion of the global DNS namespace within
+   which they have the authority to create names.  This leaves the
+   majority of home computers effectively anonymous for practical
+   purposes.
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                    [Page 5]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   To remedy this problem, this document allows any computer user to
+   elect to give their computers link-local Multicast DNS host names of
+   the form: "single-dns-label.local.".  For example, a laptop computer
+   may answer to the name "MyComputer.local.".  Any computer user is
+   granted the authority to name their computer this way, provided that
+   the chosen host name is not already in use on that link.  Having
+   named their computer this way, the user has the authority to continue
+   utilizing that name until such time as a name conflict occurs on the
+   link that is not resolved in the user's favor.  If this happens, the
+   computer (or its human user) MUST cease using the name, and SHOULD
+   attempt to allocate a new unique name for use on that link.  These
+   conflicts are expected to be relatively rare for people who choose
+   reasonably imaginative names, but it is still important to have a
+   mechanism in place to handle them when they happen.
+
+   This document specifies that the DNS top-level domain ".local." is a
+   special domain with special semantics, namely that any fully
+   qualified name ending in ".local." is link-local, and names within
+   this domain are meaningful only on the link where they originate.
+   This is analogous to IPv4 addresses in the 169.254/16 prefix or IPv6
+   addresses in the FE80::/10 prefix, which are link-local and
+   meaningful only on the link where they originate.
+
+   Any DNS query for a name ending with ".local." MUST be sent to the
+   mDNS IPv4 link-local multicast address 224.0.0.251 (or its IPv6
+   equivalent FF02::FB).  The design rationale for using a fixed
+   multicast address instead of selecting from a range of multicast
+   addresses using a hash function is discussed in Appendix B.
+   Implementers MAY choose to look up such names concurrently via other
+   mechanisms (e.g., Unicast DNS) and coalesce the results in some
+   fashion.  Implementers choosing to do this should be aware of the
+   potential for user confusion when a given name can produce different
+   results depending on external network conditions (such as, but not
+   limited to, which name lookup mechanism responds faster).
+
+   It is unimportant whether a name ending with ".local." occurred
+   because the user explicitly typed in a fully qualified domain name
+   ending in ".local.", or because the user entered an unqualified
+   domain name and the host software appended the suffix ".local."
+   because that suffix appears in the user's search list.  The ".local."
+   suffix could appear in the search list because the user manually
+   configured it, or because it was received via DHCP [RFC2132] or via
+   any other mechanism for configuring the DNS search list.  In this
+   respect the ".local." suffix is treated no differently from any other
+   search domain that might appear in the DNS search list.
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                    [Page 6]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   DNS queries for names that do not end with ".local." MAY be sent to
+   the mDNS multicast address, if no other conventional DNS server is
+   available.  This can allow hosts on the same link to continue
+   communicating using each other's globally unique DNS names during
+   network outages that disrupt communication with the greater Internet.
+   When resolving global names via local multicast, it is even more
+   important to use DNS Security Extensions (DNSSEC) [RFC4033] or other
+   security mechanisms to ensure that the response is trustworthy.
+   Resolving global names via local multicast is a contentious issue,
+   and this document does not discuss it further, instead concentrating
+   on the issue of resolving local names using DNS messages sent to a
+   multicast address.
+
+   This document recommends a single flat namespace for dot-local host
+   names, (i.e., the names of DNS "A" and "AAAA" records, which map
+   names to IPv4 and IPv6 addresses), but other DNS record types (such
+   as those used by DNS-Based Service Discovery [RFC6763]) may contain
+   as many labels as appropriate for the desired usage, up to a maximum
+   of 255 bytes, plus a terminating zero byte at the end.  Name length
+   issues are discussed further in Appendix C.
+
+   Enforcing uniqueness of host names is probably desirable in the
+   common case, but this document does not mandate that.  It is
+   permissible for a collection of coordinated hosts to agree to
+   maintain multiple DNS address records with the same name, possibly
+   for load-balancing or fault-tolerance reasons.  This document does
+   not take a position on whether that is sensible.  It is important
+   that both modes of operation be supported.  The Multicast DNS
+   protocol allows hosts to verify and maintain unique names for
+   resource records where that behavior is desired, and it also allows
+   hosts to maintain multiple resource records with a single shared name
+   where that behavior is desired.  This consideration applies to all
+   resource records, not just address records (host names).  In summary:
+   It is required that the protocol have the ability to detect and
+   handle name conflicts, but it is not required that this ability be
+   used for every record.
+
+4.  Reverse Address Mapping
+
+   Like ".local.", the IPv4 and IPv6 reverse mapping domains are also
+   defined to be link-local:
+
+      Any DNS query for a name ending with "254.169.in-addr.arpa." MUST
+      be sent to the mDNS IPv4 link-local multicast address 224.0.0.251
+      or the mDNS IPv6 multicast address FF02::FB.  Since names under
+      this domain correspond to IPv4 link-local addresses, it is logical
+      that the local link is the best place to find information
+      pertaining to those names.
+
+
+
+Cheshire & Krochmal          Standards Track                    [Page 7]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+      Likewise, any DNS query for a name within the reverse mapping
+      domains for IPv6 link-local addresses ("8.e.f.ip6.arpa.",
+      "9.e.f.ip6.arpa.", "a.e.f.ip6.arpa.", and "b.e.f.ip6.arpa.") MUST
+      be sent to the mDNS IPv6 link-local multicast address FF02::FB or
+      the mDNS IPv4 link-local multicast address 224.0.0.251.
+
+5.  Querying
+
+   There are two kinds of Multicast DNS queries: one-shot queries of the
+   kind made by legacy DNS resolvers, and continuous, ongoing Multicast
+   DNS queries made by fully compliant Multicast DNS queriers, which
+   support asynchronous operations including DNS-Based Service Discovery
+   [RFC6763].
+
+   Except in the rare case of a Multicast DNS responder that is
+   advertising only shared resource records and no unique records, a
+   Multicast DNS responder MUST also implement a Multicast DNS querier
+   so that it can first verify the uniqueness of those records before it
+   begins answering queries for them.
+
+5.1.  One-Shot Multicast DNS Queries
+
+   The most basic kind of Multicast DNS client may simply send standard
+   DNS queries blindly to 224.0.0.251:5353, without necessarily even
+   being aware of what a multicast address is.  This change can
+   typically be implemented with just a few lines of code in an existing
+   DNS resolver library.  If a name being queried falls within one of
+   the reserved Multicast DNS domains (see Sections 3 and 4), then,
+   rather than using the configured Unicast DNS server address, the
+   query is instead sent to 224.0.0.251:5353 (or its IPv6 equivalent
+   [FF02::FB]:5353).  Typically, the timeout would also be shortened to
+   two or three seconds.  It's possible to make a minimal Multicast DNS
+   resolver with only these simple changes.  These queries are typically
+   done using a high-numbered ephemeral UDP source port, but regardless
+   of whether they are sent from a dynamic port or from a fixed port,
+   these queries MUST NOT be sent using UDP source port 5353, since
+   using UDP source port 5353 signals the presence of a fully compliant
+   Multicast DNS querier, as described below.
+
+   A simple DNS resolver like this will typically just take the first
+   response it receives.  It will not listen for additional UDP
+   responses, but in many instances this may not be a serious problem.
+   If a user types "http://MyPrinter.local." into their web browser, and
+   their simple DNS resolver just takes the first response it receives,
+   and the user gets to see the status and configuration web page for
+   their printer, then the protocol has met the user's needs in this
+   case.
+
+
+
+
+Cheshire & Krochmal          Standards Track                    [Page 8]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   While a basic DNS resolver like this may be adequate for simple host
+   name lookup, it may not get ideal behavior in other cases.
+   Additional refinements to create a fully compliant Multicast DNS
+   querier are described below.
+
+5.2.  Continuous Multicast DNS Querying
+
+   In one-shot queries, the underlying assumption is that the
+   transaction begins when the application issues a query, and ends when
+   the first response is received.  There is another type of query
+   operation that is more asynchronous, in which having received one
+   response is not necessarily an indication that there will be no more
+   relevant responses, and the querying operation continues until no
+   further responses are required.  Determining when no further
+   responses are required depends on the type of operation being
+   performed.  If the operation is looking up the IPv4 and IPv6
+   addresses of another host, then no further responses are required
+   once a successful connection has been made to one of those IPv4 or
+   IPv6 addresses.  If the operation is browsing to present the user
+   with a list of DNS-SD services found on the network [RFC6763], then
+   no further responses are required once the user indicates this to the
+   user-interface software, e.g., by closing the network browsing window
+   that was displaying the list of discovered services.
+
+   Imagine some hypothetical software that allows users to discover
+   network printers.  The user wishes to discover all printers on the
+   local network, not only the printer that is quickest to respond.
+   When the user is actively looking for a network printer to use, they
+   open a network browsing window that displays the list of discovered
+   printers.  It would be convenient for the user if they could rely on
+   this list of network printers to stay up to date as network printers
+   come and go, rather than displaying out-of-date stale information,
+   and requiring the user explicitly to click a "refresh" button any
+   time they want to see accurate information (which, from the moment it
+   is displayed, is itself already beginning to become out-of-date and
+   stale).  If we are to display a continuously updated live list like
+   this, we need to be able to do it efficiently, without naive constant
+   polling, which would be an unreasonable burden on the network.  It is
+   not expected that all users will be browsing to discover new printers
+   all the time, but when a user is browsing to discover service
+   instances for an extended period, we want to be able to support that
+   operation efficiently.
+
+   Therefore, when retransmitting Multicast DNS queries to implement
+   this kind of continuous monitoring, the interval between the first
+   two queries MUST be at least one second, the intervals between
+   successive queries MUST increase by at least a factor of two, and the
+   querier MUST implement Known-Answer Suppression, as described below
+
+
+
+Cheshire & Krochmal          Standards Track                    [Page 9]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   in Section 7.1.  The Known-Answer Suppression mechanism tells
+   responders which answers are already known to the querier, thereby
+   allowing responders to avoid wasting network capacity with pointless
+   repeated transmission of those answers.  A querier retransmits its
+   question because it wishes to receive answers it may have missed the
+   first time, not because it wants additional duplicate copies of
+   answers it already received.  Failure to implement Known-Answer
+   Suppression can result in unacceptable levels of network traffic.
+   When the interval between queries reaches or exceeds 60 minutes, a
+   querier MAY cap the interval to a maximum of 60 minutes, and perform
+   subsequent queries at a steady-state rate of one query per hour.  To
+   avoid accidental synchronization when, for some reason, multiple
+   clients begin querying at exactly the same moment (e.g., because of
+   some common external trigger event), a Multicast DNS querier SHOULD
+   also delay the first query of the series by a randomly chosen amount
+   in the range 20-120 ms.
+
+   When a Multicast DNS querier receives an answer, the answer contains
+   a TTL value that indicates for how many seconds this answer is valid.
+   After this interval has passed, the answer will no longer be valid
+   and SHOULD be deleted from the cache.  Before the record expiry time
+   is reached, a Multicast DNS querier that has local clients with an
+   active interest in the state of that record (e.g., a network browsing
+   window displaying a list of discovered services to the user) SHOULD
+   reissue its query to determine whether the record is still valid.
+
+   To perform this cache maintenance, a Multicast DNS querier should
+   plan to retransmit its query after at least 50% of the record
+   lifetime has elapsed.  This document recommends the following
+   specific strategy.
+
+   The querier should plan to issue a query at 80% of the record
+   lifetime, and then if no answer is received, at 85%, 90%, and 95%.
+   If an answer is received, then the remaining TTL is reset to the
+   value given in the answer, and this process repeats for as long as
+   the Multicast DNS querier has an ongoing interest in the record.  If
+   no answer is received after four queries, the record is deleted when
+   it reaches 100% of its lifetime.  A Multicast DNS querier MUST NOT
+   perform this cache maintenance for records for which it has no local
+   clients with an active interest.  If the expiry of a particular
+   record from the cache would result in no net effect to any client
+   software running on the querier device, and no visible effect to the
+   human user, then there is no reason for the Multicast DNS querier to
+   waste network capacity checking whether the record remains valid.
+
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 10]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   To avoid the case where multiple Multicast DNS queriers on a network
+   all issue their queries simultaneously, a random variation of 2% of
+   the record TTL should be added, so that queries are scheduled to be
+   performed at 80-82%, 85-87%, 90-92%, and then 95-97% of the TTL.
+
+   An additional efficiency optimization SHOULD be performed when a
+   Multicast DNS response is received containing a unique answer (as
+   indicated by the cache-flush bit being set, described in Section
+   10.2, "Announcements to Flush Outdated Cache Entries").  In this
+   case, there is no need for the querier to continue issuing a stream
+   of queries with exponentially increasing intervals, since the receipt
+   of a unique answer is a good indication that no other answers will be
+   forthcoming.  In this case, the Multicast DNS querier SHOULD plan to
+   issue its next query for this record at 80-82% of the record's TTL,
+   as described above.
+
+   A compliant Multicast DNS querier, which implements the rules
+   specified in this document, MUST send its Multicast DNS queries from
+   UDP source port 5353 (the well-known port assigned to mDNS), and MUST
+   listen for Multicast DNS replies sent to UDP destination port 5353 at
+   the mDNS link-local multicast address (224.0.0.251 and/or its IPv6
+   equivalent FF02::FB).
+
+5.3.  Multiple Questions per Query
+
+   Multicast DNS allows a querier to place multiple questions in the
+   Question Section of a single Multicast DNS query message.
+
+   The semantics of a Multicast DNS query message containing multiple
+   questions is identical to a series of individual DNS query messages
+   containing one question each.  Combining multiple questions into a
+   single message is purely an efficiency optimization and has no other
+   semantic significance.
+
+5.4.  Questions Requesting Unicast Responses
+
+   Sending Multicast DNS responses via multicast has the benefit that
+   all the other hosts on the network get to see those responses,
+   enabling them to keep their caches up to date and detect conflicting
+   responses.
+
+   However, there are situations where all the other hosts on the
+   network don't need to see every response.  Some examples are a laptop
+   computer waking from sleep, the Ethernet cable being connected to a
+   running machine, or a previously inactive interface being activated
+   through a configuration change.  At the instant of wake-up or link
+   activation, the machine is a brand new participant on a new network.
+   Its Multicast DNS cache for that interface is empty, and it has no
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 11]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   knowledge of its peers on that link.  It may have a significant
+   number of questions that it wants answered right away, to discover
+   information about its new surroundings and present that information
+   to the user.  As a new participant on the network, it has no idea
+   whether the exact same questions may have been asked and answered
+   just seconds ago.  In this case, triggering a large sudden flood of
+   multicast responses may impose an unreasonable burden on the network.
+
+   To avoid large floods of potentially unnecessary responses in these
+   cases, Multicast DNS defines the top bit in the class field of a DNS
+   question as the unicast-response bit.  When this bit is set in a
+   question, it indicates that the querier is willing to accept unicast
+   replies in response to this specific query, as well as the usual
+   multicast responses.  These questions requesting unicast responses
+   are referred to as "QU" questions, to distinguish them from the more
+   usual questions requesting multicast responses ("QM" questions).  A
+   Multicast DNS querier sending its initial batch of questions
+   immediately on wake from sleep or interface activation SHOULD set the
+   unicast-response bit in those questions.
+
+   When a question is retransmitted (as described in Section 5.2), the
+   unicast-response bit SHOULD NOT be set in subsequent retransmissions
+   of that question.  Subsequent retransmissions SHOULD be usual "QM"
+   questions.  After the first question has received its responses, the
+   querier should have a large Known-Answer list (Section 7.1) so that
+   subsequent queries should elicit few, if any, further responses.
+   Reverting to multicast responses as soon as possible is important
+   because of the benefits that multicast responses provide (see
+   Appendix D).  In addition, the unicast-response bit SHOULD be set
+   only for questions that are active and ready to be sent the moment of
+   wake from sleep or interface activation.  New questions created by
+   local clients afterwards should be treated as normal "QM" questions
+   and SHOULD NOT have the unicast-response bit set on the first
+   question of the series.
+
+   When receiving a question with the unicast-response bit set, a
+   responder SHOULD usually respond with a unicast packet directed back
+   to the querier.  However, if the responder has not multicast that
+   record recently (within one quarter of its TTL), then the responder
+   SHOULD instead multicast the response so as to keep all the peer
+   caches up to date, and to permit passive conflict detection.  In the
+   case of answering a probe question (Section 8.1) with the unicast-
+   response bit set, the responder should always generate the requested
+   unicast response, but it may also send a multicast announcement if
+   the time since the last multicast announcement of that record is more
+   than a quarter of its TTL.
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 12]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   Unicast replies are subject to all the same packet generation rules
+   as multicast replies, including the cache-flush bit (Section 10.2)
+   and (except when defending a unique name against a probe from another
+   host) randomized delays to reduce network collisions (Section 6).
+
+5.5.  Direct Unicast Queries to Port 5353
+
+   In specialized applications there may be rare situations where it
+   makes sense for a Multicast DNS querier to send its query via unicast
+   to a specific machine.  When a Multicast DNS responder receives a
+   query via direct unicast, it SHOULD respond as it would for "QU"
+   questions, as described above in Section 5.4.  Since it is possible
+   for a unicast query to be received from a machine outside the local
+   link, responders SHOULD check that the source address in the query
+   packet matches the local subnet for that link (or, in the case of
+   IPv6, the source address has an on-link prefix) and silently ignore
+   the packet if not.
+
+   There may be specialized situations, outside the scope of this
+   document, where it is intended and desirable to create a responder
+   that does answer queries originating outside the local link.  Such a
+   responder would need to ensure that these non-local queries are
+   always answered via unicast back to the querier, since an answer sent
+   via link-local multicast would not reach a querier outside the local
+   link.
+
+6.  Responding
+
+   When a Multicast DNS responder constructs and sends a Multicast DNS
+   response message, the Resource Record Sections of that message must
+   contain only records for which that responder is explicitly
+   authoritative.  These answers may be generated because the record
+   answers a question received in a Multicast DNS query message, or at
+   certain other times that the responder determines than an unsolicited
+   announcement is warranted.  A Multicast DNS responder MUST NOT place
+   records from its cache, which have been learned from other responders
+   on the network, in the Resource Record Sections of outgoing response
+   messages.  Only an authoritative source for a given record is allowed
+   to issue responses containing that record.
+
+   The determination of whether a given record answers a given question
+   is made using the standard DNS rules: the record name must match the
+   question name, the record rrtype must match the question qtype unless
+   the qtype is "ANY" (255) or the rrtype is "CNAME" (5), and the record
+   rrclass must match the question qclass unless the qclass is "ANY"
+   (255).  As with Unicast DNS, generally only DNS class 1 ("Internet")
+   is used, but should client software use classes other than 1, the
+   matching rules described above MUST be used.
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 13]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   A Multicast DNS responder MUST only respond when it has a positive,
+   non-null response to send, or it authoritatively knows that a
+   particular record does not exist.  For unique records, where the host
+   has already established sole ownership of the name, it MUST return
+   negative answers to queries for records that it knows not to exist.
+   For example, a host with no IPv6 address, that has claimed sole
+   ownership of the name "host.local." for all rrtypes, MUST respond to
+   AAAA queries for "host.local." by sending a negative answer
+   indicating that no AAAA records exist for that name.  See Section
+   6.1, "Negative Responses".  For shared records, which are owned by no
+   single host, the nonexistence of a given record is ascertained by the
+   failure of any machine to respond to the Multicast DNS query, not by
+   any explicit negative response.  For shared records, NXDOMAIN and
+   other error responses MUST NOT be sent.
+
+   Multicast DNS responses MUST NOT contain any questions in the
+   Question Section.  Any questions in the Question Section of a
+   received Multicast DNS response MUST be silently ignored.  Multicast
+   DNS queriers receiving Multicast DNS responses do not care what
+   question elicited the response; they care only that the information
+   in the response is true and accurate.
+
+   A Multicast DNS responder on Ethernet [IEEE.802.3] and similar shared
+   multiple access networks SHOULD have the capability of delaying its
+   responses by up to 500 ms, as described below.
+
+   If a large number of Multicast DNS responders were all to respond
+   immediately to a particular query, a collision would be virtually
+   guaranteed.  By imposing a small random delay, the number of
+   collisions is dramatically reduced.  On a full-sized Ethernet using
+   the maximum cable lengths allowed and the maximum number of repeaters
+   allowed, an Ethernet frame is vulnerable to collisions during the
+   transmission of its first 256 bits.  On 10 Mb/s Ethernet, this
+   equates to a vulnerable time window of 25.6 microseconds.  On higher-
+   speed variants of Ethernet, the vulnerable time window is shorter.
+
+   In the case where a Multicast DNS responder has good reason to
+   believe that it will be the only responder on the link that will send
+   a response (i.e., because it is able to answer every question in the
+   query message, and for all of those answer records it has previously
+   verified that the name, rrtype, and rrclass are unique on the link),
+   it SHOULD NOT impose any random delay before responding, and SHOULD
+   normally generate its response within at most 10 ms.  In particular,
+   this applies to responding to probe queries with the unicast-response
+   bit set.  Since receiving a probe query gives a clear indication that
+   some other responder is planning to start using this name in the very
+   near future, answering such probe queries to defend a unique record
+   is a high priority and needs to be done without delay.  A probe query
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 14]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   can be distinguished from a normal query by the fact that a probe
+   query contains a proposed record in the Authority Section that
+   answers the question in the Question Section (for more details, see
+   Section 8.2, "Simultaneous Probe Tiebreaking").
+
+   Responding without delay is appropriate for records like the address
+   record for a particular host name, when the host name has been
+   previously verified unique.  Responding without delay is *not*
+   appropriate for things like looking up PTR records used for DNS-Based
+   Service Discovery [RFC6763], where a large number of responses may be
+   anticipated.
+
+   In any case where there may be multiple responses, such as queries
+   where the answer is a member of a shared resource record set, each
+   responder SHOULD delay its response by a random amount of time
+   selected with uniform random distribution in the range 20-120 ms.
+   The reason for requiring that the delay be at least 20 ms is to
+   accommodate the situation where two or more query packets are sent
+   back-to-back, because in that case we want a responder with answers
+   to more than one of those queries to have the opportunity to
+   aggregate all of its answers into a single response message.
+
+   In the case where the query has the TC (truncated) bit set,
+   indicating that subsequent Known-Answer packets will follow,
+   responders SHOULD delay their responses by a random amount of time
+   selected with uniform random distribution in the range 400-500 ms, to
+   allow enough time for all the Known-Answer packets to arrive, as
+   described in Section 7.2, "Multipacket Known-Answer Suppression".
+
+   The source UDP port in all Multicast DNS responses MUST be 5353 (the
+   well-known port assigned to mDNS).  Multicast DNS implementations
+   MUST silently ignore any Multicast DNS responses they receive where
+   the source UDP port is not 5353.
+
+   The destination UDP port in all Multicast DNS responses MUST be 5353,
+   and the destination address MUST be the mDNS IPv4 link-local
+   multicast address 224.0.0.251 or its IPv6 equivalent FF02::FB, except
+   when generating a reply to a query that explicitly requested a
+   unicast response:
+
+      * via the unicast-response bit,
+      * by virtue of being a legacy query (Section 6.7), or
+      * by virtue of being a direct unicast query.
+
+   Except for these three specific cases, responses MUST NOT be sent via
+   unicast, because then the "Passive Observation of Failures"
+   mechanisms described in Section 10.5 would not work correctly.  Other
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 15]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   benefits of sending responses via multicast are discussed in Appendix
+   D.  A Multicast DNS querier MUST only accept unicast responses if
+   they answer a recently sent query (e.g., sent within the last two
+   seconds) that explicitly requested unicast responses.  A Multicast
+   DNS querier MUST silently ignore all other unicast responses.
+
+   To protect the network against excessive packet flooding due to
+   software bugs or malicious attack, a Multicast DNS responder MUST NOT
+   (except in the one special case of answering probe queries) multicast
+   a record on a given interface until at least one second has elapsed
+   since the last time that record was multicast on that particular
+   interface.  A legitimate querier on the network should have seen the
+   previous transmission and cached it.  A querier that did not receive
+   and cache the previous transmission will retry its request and
+   receive a subsequent response.  In the special case of answering
+   probe queries, because of the limited time before the probing host
+   will make its decision about whether or not to use the name, a
+   Multicast DNS responder MUST respond quickly.  In this special case
+   only, when responding via multicast to a probe, a Multicast DNS
+   responder is only required to delay its transmission as necessary to
+   ensure an interval of at least 250 ms since the last time the record
+   was multicast on that interface.
+
+6.1.  Negative Responses
+
+   In the early design of Multicast DNS it was assumed that explicit
+   negative responses would never be needed.  A host can assert the
+   existence of the set of records that it claims to exist, and the
+   union of all such sets on a link is the set of Multicast DNS records
+   that exist on that link.  Asserting the nonexistence of every record
+   in the complement of that set -- i.e., all possible Multicast DNS
+   records that could exist on this link but do not at this moment --
+   was felt to be impractical and unnecessary.  The nonexistence of a
+   record would be ascertained by a querier querying for it and failing
+   to receive a response from any of the hosts currently attached to the
+   link.
+
+   However, operational experience showed that explicit negative
+   responses can sometimes be valuable.  One such example is when a
+   querier is querying for a AAAA record, and the host name in question
+   has no associated IPv6 addresses.  In this case, the responding host
+   knows it currently has exclusive ownership of that name, and it knows
+   that it currently does not have any IPv6 addresses, so an explicit
+   negative response is preferable to the querier having to retransmit
+   its query multiple times, and eventually give up with a timeout,
+   before it can conclude that a given AAAA record does not exist.
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 16]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   Any time a responder receives a query for a name for which it has
+   verified exclusive ownership, for a type for which that name has no
+   records, the responder MUST (except as allowed in (a) below) respond
+   asserting the nonexistence of that record using a DNS NSEC record
+   [RFC4034].  In the case of Multicast DNS the NSEC record is not being
+   used for its usual DNSSEC [RFC4033] security properties, but simply
+   as a way of expressing which records do or do not exist with a given
+   name.
+
+   On receipt of a question for a particular name, rrtype, and rrclass,
+   for which a responder does have one or more unique answers, the
+   responder MAY also include an NSEC record in the Additional Record
+   Section indicating the nonexistence of other rrtypes for that name
+   and rrclass.
+
+   Implementers working with devices with sufficient memory and CPU
+   resources MAY choose to implement code to handle the full generality
+   of the DNS NSEC record [RFC4034], including bitmaps up to 65,536 bits
+   long.  To facilitate use by devices with limited memory and CPU
+   resources, Multicast DNS queriers are only REQUIRED to be able to
+   parse a restricted form of the DNS NSEC record.  All compliant
+   Multicast DNS implementations MUST at least correctly generate and
+   parse the restricted DNS NSEC record format described below:
+
+      o The 'Next Domain Name' field contains the record's own name.
+        When used with name compression, this means that the 'Next
+        Domain Name' field always takes exactly two bytes in the
+        message.
+
+      o The Type Bit Map block number is 0.
+
+      o The Type Bit Map block length byte is a value in the range 1-32.
+
+      o The Type Bit Map data is 1-32 bytes, as indicated by length
+        byte.
+
+   Because this restricted form of the DNS NSEC record is limited to
+   Type Bit Map block number zero, it cannot express the existence of
+   rrtypes above 255.  Consequently, if a Multicast DNS responder were
+   to have records with rrtypes above 255, it MUST NOT generate these
+   restricted-form NSEC records for those names, since to do so would
+   imply that the name has no records with rrtypes above 255, which
+   would be false.  In such cases a Multicast DNS responder MUST either
+   (a) emit no NSEC record for that name, or (b) emit a full NSEC record
+   containing the appropriate Type Bit Map block(s) with the correct
+   bits set for all the record types that exist.  In practice this is
+   not a significant limitation, since rrtypes above 255 are not
+   currently in widespread use.
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 17]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   If a Multicast DNS implementation receives an NSEC record where the
+   'Next Domain Name' field is not the record's own name, then the
+   implementation SHOULD ignore the 'Next Domain Name' field and process
+   the remainder of the NSEC record as usual.  In Multicast DNS the
+   'Next Domain Name' field is not currently used, but it could be used
+   in a future version of this protocol, which is why a Multicast DNS
+   implementation MUST NOT reject or ignore an NSEC record it receives
+   just because it finds an unexpected value in the 'Next Domain Name'
+   field.
+
+   If a Multicast DNS implementation receives an NSEC record containing
+   more than one Type Bit Map, or where the Type Bit Map block number is
+   not zero, or where the block length is not in the range 1-32, then
+   the Multicast DNS implementation MAY silently ignore the entire NSEC
+   record.  A Multicast DNS implementation MUST NOT ignore an entire
+   message just because that message contains one or more NSEC record(s)
+   that the Multicast DNS implementation cannot parse.  This provision
+   is to allow future enhancements to the protocol to be introduced in a
+   backwards-compatible way that does not break compatibility with older
+   Multicast DNS implementations.
+
+   To help differentiate these synthesized NSEC records (generated
+   programmatically on-the-fly) from conventional Unicast DNS NSEC
+   records (which actually exist in a signed DNS zone), the synthesized
+   Multicast DNS NSEC records MUST NOT have the NSEC bit set in the Type
+   Bit Map, whereas conventional Unicast DNS NSEC records do have the
+   NSEC bit set.
+
+   The TTL of the NSEC record indicates the intended lifetime of the
+   negative cache entry.  In general, the TTL given for an NSEC record
+   SHOULD be the same as the TTL that the record would have had, had it
+   existed.  For example, the TTL for address records in Multicast DNS
+   is typically 120 seconds (see Section 10), so the negative cache
+   lifetime for an address record that does not exist should also be 120
+   seconds.
+
+   A responder MUST only generate negative responses to queries for
+   which it has legitimate ownership of the name, rrtype, and rrclass in
+   question, and can legitimately assert that no record with that name,
+   rrtype, and rrclass exists.  A responder can assert that a specified
+   rrtype does not exist for one of its names if it knows a priori that
+   it has exclusive ownership of that name (e.g., names of reverse
+   address mapping PTR records, which are derived from IP addresses,
+   which should be unique on the local link) or if it previously claimed
+   unique ownership of that name using probe queries for rrtype "ANY".
+   (If it were to use probe queries for a specific rrtype, then it would
+   only own the name for that rrtype, and could not assert that other
+   rrtypes do not exist.)
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 18]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   The design rationale for this mechanism for encoding negative
+   responses is discussed further in Appendix E.
+
+6.2.  Responding to Address Queries
+
+   When a Multicast DNS responder sends a Multicast DNS response message
+   containing its own address records, it MUST include all addresses
+   that are valid on the interface on which it is sending the message,
+   and MUST NOT include addresses that are not valid on that interface
+   (such as addresses that may be configured on the host's other
+   interfaces).  For example, if an interface has both an IPv6 link-
+   local and an IPv6 routable address, both should be included in the
+   response message so that queriers receive both and can make their own
+   choice about which to use.  This allows a querier that only has an
+   IPv6 link-local address to connect to the link-local address, and a
+   different querier that has an IPv6 routable address to connect to the
+   IPv6 routable address instead.
+
+   When a Multicast DNS responder places an IPv4 or IPv6 address record
+   (rrtype "A" or "AAAA") into a response message, it SHOULD also place
+   any records of the other address type with the same name into the
+   additional section, if there is space in the message.  This is to
+   provide fate sharing, so that all a device's addresses are delivered
+   atomically in a single message, to reduce the risk that packet loss
+   could cause a querier to receive only the IPv4 addresses and not the
+   IPv6 addresses, or vice versa.
+
+   In the event that a device has only IPv4 addresses but no IPv6
+   addresses, or vice versa, then the appropriate NSEC record SHOULD be
+   placed into the additional section, so that queriers can know with
+   certainty that the device has no addresses of that kind.
+
+   Some Multicast DNS responders treat a physical interface with both
+   IPv4 and IPv6 address as a single interface with two addresses.
+   Other Multicast DNS responders may treat this case as logically two
+   interfaces (one with one or more IPv4 addresses, and the other with
+   one or more IPv6 addresses), but responders that operate this way
+   MUST NOT put the corresponding automatic NSEC records in replies they
+   send (i.e., a negative IPv4 assertion in their IPv6 responses, and a
+   negative IPv6 assertion in their IPv4 responses) because this would
+   cause incorrect operation in responders on the network that work the
+   former way.
+
+6.3.  Responding to Multiquestion Queries
+
+   Multicast DNS responders MUST correctly handle DNS query messages
+   containing more than one question, by answering any or all of the
+   questions to which they have answers.  Unlike single-question
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 19]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   queries, where responding without delay is allowed in appropriate
+   cases, for query messages containing more than one question, all
+   (non-defensive) answers SHOULD be randomly delayed in the range
+   20-120 ms, or 400-500 ms if the TC (truncated) bit is set.  This is
+   because when a query message contains more than one question, a
+   Multicast DNS responder cannot generally be certain that other
+   responders will not also be simultaneously generating answers to
+   other questions in that query message.  (Answers defending a name, in
+   response to a probe for that name, are not subject to this delay rule
+   and are still sent immediately.)
+
+6.4.  Response Aggregation
+
+   When possible, a responder SHOULD, for the sake of network
+   efficiency, aggregate as many responses as possible into a single
+   Multicast DNS response message.  For example, when a responder has
+   several responses it plans to send, each delayed by a different
+   interval, then earlier responses SHOULD be delayed by up to an
+   additional 500 ms if that will permit them to be aggregated with
+   other responses scheduled to go out a little later.
+
+6.5.  Wildcard Queries (qtype "ANY" and qclass "ANY")
+
+   When responding to queries using qtype "ANY" (255) and/or qclass
+   "ANY" (255), a Multicast DNS responder MUST respond with *ALL* of its
+   records that match the query.  This is subtly different from how
+   qtype "ANY" and qclass "ANY" work in Unicast DNS.
+
+   A common misconception is that a Unicast DNS query for qtype "ANY"
+   will elicit a response containing all matching records.  This is
+   incorrect.  If there are any records that match the query, the
+   response is required only to contain at least one of them, not
+   necessarily all of them.
+
+   This somewhat surprising behavior is commonly seen with caching
+   (i.e., "recursive") name servers.  If a caching server receives a
+   qtype "ANY" query for which it has at least one valid answer, it is
+   allowed to return only those matching answers it happens to have
+   already in its cache, and it is not required to reconsult the
+   authoritative name server to check if there are any more records that
+   also match the qtype "ANY" query.
+
+   For example, one might imagine that a query for qtype "ANY" for name
+   "host.example.com" would return both the IPv4 (A) and the IPv6 (AAAA)
+   address records for that host.  In reality, what happens is that it
+   depends on the history of what queries have been previously received
+   by intervening caching servers.  If a caching server has no records
+   for "host.example.com", then it will consult another server (usually
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 20]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   the authoritative name server for the name in question), and, in that
+   case, it will typically return all IPv4 and IPv6 address records.
+   However, if some other host has recently done a query for qtype "A"
+   for name "host.example.com", so that the caching server already has
+   IPv4 address records for "host.example.com" in its cache but no IPv6
+   address records, then it will return only the IPv4 address records it
+   already has cached, and no IPv6 address records.
+
+   Multicast DNS does not share this property that qtype "ANY" and
+   qclass "ANY" queries return some undefined subset of the matching
+   records.  When responding to queries using qtype "ANY" (255) and/or
+   qclass "ANY" (255), a Multicast DNS responder MUST respond with *ALL*
+   of its records that match the query.
+
+6.6.  Cooperating Multicast DNS Responders
+
+   If a Multicast DNS responder ("A") observes some other Multicast DNS
+   responder ("B") send a Multicast DNS response message containing a
+   resource record with the same name, rrtype, and rrclass as one of A's
+   resource records, but *different* rdata, then:
+
+      o If A's resource record is intended to be a shared resource
+        record, then this is no conflict, and no action is required.
+
+      o If A's resource record is intended to be a member of a unique
+        resource record set owned solely by that responder, then this is
+        a conflict and MUST be handled as described in Section 9,
+        "Conflict Resolution".
+
+   If a Multicast DNS responder ("A") observes some other Multicast DNS
+   responder ("B") send a Multicast DNS response message containing a
+   resource record with the same name, rrtype, and rrclass as one of A's
+   resource records, and *identical* rdata, then:
+
+      o If the TTL of B's resource record given in the message is at
+        least half the true TTL from A's point of view, then no action
+        is required.
+
+      o If the TTL of B's resource record given in the message is less
+        than half the true TTL from A's point of view, then A MUST mark
+        its record to be announced via multicast.  Queriers receiving
+        the record from B would use the TTL given by B and, hence, may
+        delete the record sooner than A expects.  By sending its own
+        multicast response correcting the TTL, A ensures that the record
+        will be retained for the desired time.
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 21]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   These rules allow multiple Multicast DNS responders to offer the same
+   data on the network (perhaps for fault-tolerance reasons) without
+   conflicting with each other.
+
+6.7.  Legacy Unicast Responses
+
+   If the source UDP port in a received Multicast DNS query is not port
+   5353, this indicates that the querier originating the query is a
+   simple resolver such as described in Section 5.1, "One-Shot Multicast
+   DNS Queries", which does not fully implement all of Multicast DNS.
+   In this case, the Multicast DNS responder MUST send a UDP response
+   directly back to the querier, via unicast, to the query packet's
+   source IP address and port.  This unicast response MUST be a
+   conventional unicast response as would be generated by a conventional
+   Unicast DNS server; for example, it MUST repeat the query ID and the
+   question given in the query message.  In addition, the cache-flush
+   bit described in Section 10.2, "Announcements to Flush Outdated Cache
+   Entries", MUST NOT be set in legacy unicast responses.
+
+   The resource record TTL given in a legacy unicast response SHOULD NOT
+   be greater than ten seconds, even if the true TTL of the Multicast
+   DNS resource record is higher.  This is because Multicast DNS
+   responders that fully participate in the protocol use the cache
+   coherency mechanisms described in Section 10, "Resource Record TTL
+   Values and Cache Coherency", to update and invalidate stale data.
+   Were unicast responses sent to legacy resolvers to use the same high
+   TTLs, these legacy resolvers, which do not implement these cache
+   coherency mechanisms, could retain stale cached resource record data
+   long after it is no longer valid.
+
+7.  Traffic Reduction
+
+   A variety of techniques are used to reduce the amount of traffic on
+   the network.
+
+7.1.  Known-Answer Suppression
+
+   When a Multicast DNS querier sends a query to which it already knows
+   some answers, it populates the Answer Section of the DNS query
+   message with those answers.
+
+   Generally, this applies only to Shared records, not Unique records,
+   since if a Multicast DNS querier already has at least one Unique
+   record in its cache then it should not be expecting further different
+   answers to this question, since the Unique record(s) it already has
+   comprise the complete answer, so it has no reason to be sending the
+   query at all.  In contrast, having some Shared records in its cache
+   does not necessarily imply that a Multicast DNS querier will not
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 22]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   receive further answers to this query, and it is in this case that it
+   is beneficial to use the Known-Answer list to suppress repeated
+   sending of redundant answers that the querier already knows.
+
+   A Multicast DNS responder MUST NOT answer a Multicast DNS query if
+   the answer it would give is already included in the Answer Section
+   with an RR TTL at least half the correct value.  If the RR TTL of the
+   answer as given in the Answer Section is less than half of the true
+   RR TTL as known by the Multicast DNS responder, the responder MUST
+   send an answer so as to update the querier's cache before the record
+   becomes in danger of expiration.
+
+   Because a Multicast DNS responder will respond if the remaining TTL
+   given in the Known-Answer list is less than half the true TTL, it is
+   superfluous for the querier to include such records in the Known-
+   Answer list.  Therefore, a Multicast DNS querier SHOULD NOT include
+   records in the Known-Answer list whose remaining TTL is less than
+   half of their original TTL.  Doing so would simply consume space in
+   the message without achieving the goal of suppressing responses and
+   would, therefore, be a pointless waste of network capacity.
+
+   A Multicast DNS querier MUST NOT cache resource records observed in
+   the Known-Answer Section of other Multicast DNS queries.  The Answer
+   Section of Multicast DNS queries is not authoritative.  By placing
+   information in the Answer Section of a Multicast DNS query, the
+   querier is stating that it *believes* the information to be true.  It
+   is not asserting that the information *is* true.  Some of those
+   records may have come from other hosts that are no longer on the
+   network.  Propagating that stale information to other Multicast DNS
+   queriers on the network would not be helpful.
+
+7.2.  Multipacket Known-Answer Suppression
+
+   Sometimes a Multicast DNS querier will already have too many answers
+   to fit in the Known-Answer Section of its query packets.  In this
+   case, it should issue a Multicast DNS query containing a question and
+   as many Known-Answer records as will fit.  It MUST then set the TC
+   (Truncated) bit in the header before sending the query.  It MUST
+   immediately follow the packet with another query packet containing no
+   questions and as many more Known-Answer records as will fit.  If
+   there are still too many records remaining to fit in the packet, it
+   again sets the TC bit and continues until all the Known-Answer
+   records have been sent.
+
+   A Multicast DNS responder seeing a Multicast DNS query with the TC
+   bit set defers its response for a time period randomly selected in
+   the interval 400-500 ms.  This gives the Multicast DNS querier time
+   to send additional Known-Answer packets before the responder
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 23]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   responds.  If the responder sees any of its answers listed in the
+   Known-Answer lists of subsequent packets from the querying host, it
+   MUST delete that answer from the list of answers it is planning to
+   give (provided that no other host on the network has also issued a
+   query for that record and is waiting to receive an answer).
+
+   If the responder receives additional Known-Answer packets with the TC
+   bit set, it SHOULD extend the delay as necessary to ensure a pause of
+   400-500 ms after the last such packet before it sends its answer.
+   This opens the potential risk that a continuous stream of Known-
+   Answer packets could, theoretically, prevent a responder from
+   answering indefinitely.  In practice, answers are never actually
+   delayed significantly, and should a situation arise where significant
+   delays did happen, that would be a scenario where the network is so
+   overloaded that it would be desirable to err on the side of caution.
+   The consequence of delaying an answer may be that it takes a user
+   longer than usual to discover all the services on the local network;
+   in contrast, the consequence of incorrectly answering before all the
+   Known-Answer packets have been received would be wasted capacity
+   sending unnecessary answers on an already overloaded network.  In
+   this (rare) situation, sacrificing speed to preserve reliable network
+   operation is the right trade-off.
+
+7.3.  Duplicate Question Suppression
+
+   If a host is planning to transmit (or retransmit) a query, and it
+   sees another host on the network send a query containing the same
+   "QM" question, and the Known-Answer Section of that query does not
+   contain any records that this host would not also put in its own
+   Known-Answer Section, then this host SHOULD treat its own query as
+   having been sent.  When multiple queriers on the network are querying
+   for the same resource records, there is no need for them to all be
+   repeatedly asking the same question.
+
+7.4.  Duplicate Answer Suppression
+
+   If a host is planning to send an answer, and it sees another host on
+   the network send a response message containing the same answer
+   record, and the TTL in that record is not less than the TTL this host
+   would have given, then this host SHOULD treat its own answer as
+   having been sent, and not also send an identical answer itself.  When
+   multiple responders on the network have the same data, there is no
+   need for all of them to respond.
+
+
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 24]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   The opportunity for duplicate answer suppression occurs when a host
+   has received a query, and is delaying its response for some pseudo-
+   random interval up to 500 ms, as described elsewhere in this
+   document, and then, before the host sends its response, it sees some
+   other host on the network send a response message containing the same
+   answer record.
+
+   This feature is particularly useful when Multicast DNS Proxy Servers
+   are in use, where there could be more than one proxy on the network
+   giving Multicast DNS answers on behalf of some other host (e.g.,
+   because that other host is currently asleep and is not itself
+   responding to queries).
+
+8.  Probing and Announcing on Startup
+
+   Typically a Multicast DNS responder should have, at the very least,
+   address records for all of its active interfaces.  Creating and
+   advertising an HINFO record on each interface as well can be useful
+   to network administrators.
+
+   Whenever a Multicast DNS responder starts up, wakes up from sleep,
+   receives an indication of a network interface "Link Change" event, or
+   has any other reason to believe that its network connectivity may
+   have changed in some relevant way, it MUST perform the two startup
+   steps below: Probing (Section 8.1) and Announcing (Section 8.3).
+
+8.1.  Probing
+
+   The first startup step is that, for all those resource records that a
+   Multicast DNS responder desires to be unique on the local link, it
+   MUST send a Multicast DNS query asking for those resource records, to
+   see if any of them are already in use.  The primary example of this
+   is a host's address records, which map its unique host name to its
+   unique IPv4 and/or IPv6 addresses.  All probe queries SHOULD be done
+   using the desired resource record name and class (usually class 1,
+   "Internet"), and query type "ANY" (255), to elicit answers for all
+   types of records with that name.  This allows a single question to be
+   used in place of several questions, which is more efficient on the
+   network.  It also allows a host to verify exclusive ownership of a
+   name for all rrtypes, which is desirable in most cases.  It would be
+   confusing, for example, if one host owned the "A" record for
+   "myhost.local.", but a different host owned the "AAAA" record for
+   that name.
+
+
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 25]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   The ability to place more than one question in a Multicast DNS query
+   is useful here, because it can allow a host to use a single message
+   to probe for all of its resource records instead of needing a
+   separate message for each.  For example, a host can simultaneously
+   probe for uniqueness of its "A" record and all its SRV records
+   [RFC6763] in the same query message.
+
+   When ready to send its Multicast DNS probe packet(s) the host should
+   first wait for a short random delay time, uniformly distributed in
+   the range 0-250 ms.  This random delay is to guard against the case
+   where several devices are powered on simultaneously, or several
+   devices are connected to an Ethernet hub, which is then powered on,
+   or some other external event happens that might cause a group of
+   hosts to all send synchronized probes.
+
+   250 ms after the first query, the host should send a second; then,
+   250 ms after that, a third.  If, by 250 ms after the third probe, no
+   conflicting Multicast DNS responses have been received, the host may
+   move to the next step, announcing.  (Note that probing is the one
+   exception from the normal rule that there should be at least one
+   second between repetitions of the same question, and the interval
+   between subsequent repetitions should at least double.)
+
+   When sending probe queries, a host MUST NOT consult its cache for
+   potential answers.  Only conflicting Multicast DNS responses received
+   "live" from the network are considered valid for the purposes of
+   determining whether probing has succeeded or failed.
+
+   In order to allow services to announce their presence without
+   unreasonable delay, the time window for probing is intentionally set
+   quite short.  As a result of this, from the time the first probe
+   packet is sent, another device on the network using that name has
+   just 750 ms to respond to defend its name.  On networks that are
+   slow, or busy, or both, it is possible for round-trip latency to
+   account for a few hundred milliseconds, and software delays in slow
+   devices can add additional delay.  Hence, it is important that when a
+   device receives a probe query for a name that it is currently using,
+   it SHOULD generate its response to defend that name immediately and
+   send it as quickly as possible.  The usual rules about random delays
+   before responding, to avoid sudden bursts of simultaneous answers
+   from different hosts, do not apply here since normally at most one
+   host should ever respond to a given probe question.  Even when a
+   single DNS query message contains multiple probe questions, it would
+   be unusual for that message to elicit a defensive response from more
+   than one other host.  Because of the mDNS multicast rate-limiting
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 26]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   rules, the probes SHOULD be sent as "QU" questions with the unicast-
+   response bit set, to allow a defending host to respond immediately
+   via unicast, instead of potentially having to wait before replying
+   via multicast.
+
+   During probing, from the time the first probe packet is sent until
+   250 ms after the third probe, if any conflicting Multicast DNS
+   response is received, then the probing host MUST defer to the
+   existing host, and SHOULD choose new names for some or all of its
+   resource records as appropriate.  Apparently conflicting Multicast
+   DNS responses received *before* the first probe packet is sent MUST
+   be silently ignored (see discussion of stale probe packets in Section
+   8.2, "Simultaneous Probe Tiebreaking", below).  In the case of a host
+   probing using query type "ANY" as recommended above, any answer
+   containing a record with that name, of any type, MUST be considered a
+   conflicting response and handled accordingly.
+
+   If fifteen conflicts occur within any ten-second period, then the
+   host MUST wait at least five seconds before each successive
+   additional probe attempt.  This is to help ensure that, in the event
+   of software bugs or other unanticipated problems, errant hosts do not
+   flood the network with a continuous stream of multicast traffic.  For
+   very simple devices, a valid way to comply with this requirement is
+   to always wait five seconds after any failed probe attempt before
+   trying again.
+
+   If a responder knows by other means that its unique resource record
+   set name, rrtype, and rrclass cannot already be in use by any other
+   responder on the network, then it SHOULD skip the probing step for
+   that resource record set.  For example, when creating the reverse
+   address mapping PTR records, the host can reasonably assume that no
+   other host will be trying to create those same PTR records, since
+   that would imply that the two hosts were trying to use the same IP
+   address, and if that were the case, the two hosts would be suffering
+   communication problems beyond the scope of what Multicast DNS is
+   designed to solve.  Similarly, if a responder is acting as a proxy,
+   taking over from another Multicast DNS responder that has already
+   verified the uniqueness of the record, then the proxy SHOULD NOT
+   repeat the probing step for those records.
+
+8.2.  Simultaneous Probe Tiebreaking
+
+   The astute reader will observe that there is a race condition
+   inherent in the previous description.  If two hosts are probing for
+   the same name simultaneously, neither will receive any response to
+   the probe, and the hosts could incorrectly conclude that they may
+   both proceed to use the name.  To break this symmetry, each host
+   populates the query message's Authority Section with the record or
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 27]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   records with the rdata that it would be proposing to use, should its
+   probing be successful.  The Authority Section is being used here in a
+   way analogous to the way it is used as the "Update Section" in a DNS
+   Update message [RFC2136] [RFC3007].
+
+   When a host is probing for a group of related records with the same
+   name (e.g., the SRV and TXT record describing a DNS-SD service), only
+   a single question need be placed in the Question Section, since query
+   type "ANY" (255) is used, which will elicit answers for all records
+   with that name.  However, for tiebreaking to work correctly in all
+   cases, the Authority Section must contain *all* the records and
+   proposed rdata being probed for uniqueness.
+
+   When a host that is probing for a record sees another host issue a
+   query for the same record, it consults the Authority Section of that
+   query.  If it finds any resource record(s) there which answers the
+   query, then it compares the data of that (those) resource record(s)
+   with its own tentative data.  We consider first the simple case of a
+   host probing for a single record, receiving a simultaneous probe from
+   another host also probing for a single record.  The two records are
+   compared and the lexicographically later data wins.  This means that
+   if the host finds that its own data is lexicographically later, it
+   simply ignores the other host's probe.  If the host finds that its
+   own data is lexicographically earlier, then it defers to the winning
+   host by waiting one second, and then begins probing for this record
+   again.  The logic for waiting one second and then trying again is to
+   guard against stale probe packets on the network (possibly even stale
+   probe packets sent moments ago by this host itself, before some
+   configuration change, which may be echoed back after a short delay by
+   some Ethernet switches and some 802.11 base stations).  If the
+   winning simultaneous probe was from a real other host on the network,
+   then after one second it will have completed its probing, and will
+   answer subsequent probes.  If the apparently winning simultaneous
+   probe was in fact just an old stale packet on the network (maybe from
+   the host itself), then when it retries its probing in one second, its
+   probes will go unanswered, and it will successfully claim the name.
+
+   The determination of "lexicographically later" is performed by first
+   comparing the record class (excluding the cache-flush bit described
+   in Section 10.2), then the record type, then raw comparison of the
+   binary content of the rdata without regard for meaning or structure.
+   If the record classes differ, then the numerically greater class is
+   considered "lexicographically later".  Otherwise, if the record types
+   differ, then the numerically greater type is considered
+   "lexicographically later".  If the rrtype and rrclass both match,
+   then the rdata is compared.
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 28]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   In the case of resource records containing rdata that is subject to
+   name compression [RFC1035], the names MUST be uncompressed before
+   comparison.  (The details of how a particular name is compressed is
+   an artifact of how and where the record is written into the DNS
+   message; it is not an intrinsic property of the resource record
+   itself.)
+
+   The bytes of the raw uncompressed rdata are compared in turn,
+   interpreting the bytes as eight-bit UNSIGNED values, until a byte is
+   found whose value is greater than that of its counterpart (in which
+   case, the rdata whose byte has the greater value is deemed
+   lexicographically later) or one of the resource records runs out of
+   rdata (in which case, the resource record which still has remaining
+   data first is deemed lexicographically later).  The following is an
+   example of a conflict:
+
+     MyPrinter.local. A 169.254.99.200
+     MyPrinter.local. A 169.254.200.50
+
+   In this case, 169.254.200.50 is lexicographically later (the third
+   byte, with value 200, is greater than its counterpart with value 99),
+   so it is deemed the winner.
+
+   Note that it is vital that the bytes are interpreted as UNSIGNED
+   values in the range 0-255, or the wrong outcome may result.  In the
+   example above, if the byte with value 200 had been incorrectly
+   interpreted as a signed eight-bit value, then it would be interpreted
+   as value -56, and the wrong address record would be deemed the
+   winner.
+
+8.2.1.  Simultaneous Probe Tiebreaking for Multiple Records
+
+   When a host is probing for a set of records with the same name, or a
+   message is received containing multiple tiebreaker records answering
+   a given probe question in the Question Section, the host's records
+   and the tiebreaker records from the message are each sorted into
+   order, and then compared pairwise, using the same comparison
+   technique described above, until a difference is found.
+
+   The records are sorted using the same lexicographical order as
+   described above, that is, if the record classes differ, the record
+   with the lower class number comes first.  If the classes are the same
+   but the rrtypes differ, the record with the lower rrtype number comes
+   first.  If the class and rrtype match, then the rdata is compared
+   bytewise until a difference is found.  For example, in the common
+   case of advertising DNS-SD services with a TXT record and an SRV
+   record, the TXT record comes first (the rrtype value for TXT is 16)
+   and the SRV record comes second (the rrtype value for SRV is 33).
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 29]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   When comparing the records, if the first records match perfectly,
+   then the second records are compared, and so on.  If either list of
+   records runs out of records before any difference is found, then the
+   list with records remaining is deemed to have won the tiebreak.  If
+   both lists run out of records at the same time without any difference
+   being found, then this indicates that two devices are advertising
+   identical sets of records, as is sometimes done for fault tolerance,
+   and there is, in fact, no conflict.
+
+8.3.  Announcing
+
+   The second startup step is that the Multicast DNS responder MUST send
+   an unsolicited Multicast DNS response containing, in the Answer
+   Section, all of its newly registered resource records (both shared
+   records, and unique records that have completed the probing step).
+   If there are too many resource records to fit in a single packet,
+   multiple packets should be used.
+
+   In the case of shared records (e.g., the PTR records used by DNS-
+   Based Service Discovery [RFC6763]), the records are simply placed as
+   is into the Answer Section of the DNS response.
+
+   In the case of records that have been verified to be unique in the
+   previous step, they are placed into the Answer Section of the DNS
+   response with the most significant bit of the rrclass set to one.
+   The most significant bit of the rrclass for a record in the Answer
+   Section of a response message is the Multicast DNS cache-flush bit
+   and is discussed in more detail below in Section 10.2, "Announcements
+   to Flush Outdated Cache Entries".
+
+   The Multicast DNS responder MUST send at least two unsolicited
+   responses, one second apart.  To provide increased robustness against
+   packet loss, a responder MAY send up to eight unsolicited responses,
+   provided that the interval between unsolicited responses increases by
+   at least a factor of two with every response sent.
+
+   A Multicast DNS responder MUST NOT send announcements in the absence
+   of information that its network connectivity may have changed in some
+   relevant way.  In particular, a Multicast DNS responder MUST NOT send
+   regular periodic announcements as a matter of course.
+
+   Whenever a Multicast DNS responder receives any Multicast DNS
+   response (solicited or otherwise) containing a conflicting resource
+   record, the conflict MUST be resolved as described in Section 9,
+   "Conflict Resolution".
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 30]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+8.4.  Updating
+
+   At any time, if the rdata of any of a host's Multicast DNS records
+   changes, the host MUST repeat the Announcing step described above to
+   update neighboring caches.  For example, if any of a host's IP
+   addresses change, it MUST re-announce those address records.  The
+   host does not need to repeat the Probing step because it has already
+   established unique ownership of that name.
+
+   In the case of shared records, a host MUST send a "goodbye"
+   announcement with RR TTL zero (see Section 10.1, "Goodbye Packets")
+   for the old rdata, to cause it to be deleted from peer caches, before
+   announcing the new rdata.  In the case of unique records, a host
+   SHOULD omit the "goodbye" announcement, since the cache-flush bit on
+   the newly announced records will cause old rdata to be flushed from
+   peer caches anyway.
+
+   A host may update the contents of any of its records at any time,
+   though a host SHOULD NOT update records more frequently than ten
+   times per minute.  Frequent rapid updates impose a burden on the
+   network.  If a host has information to disseminate which changes more
+   frequently than ten times per minute, then it may be more appropriate
+   to design a protocol for that specific purpose.
+
+9.  Conflict Resolution
+
+   A conflict occurs when a Multicast DNS responder has a unique record
+   for which it is currently authoritative, and it receives a Multicast
+   DNS response message containing a record with the same name, rrtype
+   and rrclass, but inconsistent rdata.  What may be considered
+   inconsistent is context sensitive, except that resource records with
+   identical rdata are never considered inconsistent, even if they
+   originate from different hosts.  This is to permit use of proxies and
+   other fault-tolerance mechanisms that may cause more than one
+   responder to be capable of issuing identical answers on the network.
+
+   A common example of a resource record type that is intended to be
+   unique, not shared between hosts, is the address record that maps a
+   host's name to its IP address.  Should a host witness another host
+   announce an address record with the same name but a different IP
+   address, then that is considered inconsistent, and that address
+   record is considered to be in conflict.
+
+   Whenever a Multicast DNS responder receives any Multicast DNS
+   response (solicited or otherwise) containing a conflicting resource
+   record in any of the Resource Record Sections, the Multicast DNS
+   responder MUST immediately reset its conflicted unique record to
+   probing state, and go through the startup steps described above in
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 31]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   Section 8, "Probing and Announcing on Startup".  The protocol used in
+   the Probing phase will determine a winner and a loser, and the loser
+   MUST cease using the name, and reconfigure.
+
+   It is very important that any host receiving a resource record that
+   conflicts with one of its own MUST take action as described above.
+   In the case of two hosts using the same host name, where one has been
+   configured to require a unique host name and the other has not, the
+   one that has not been configured to require a unique host name will
+   not perceive any conflict, and will not take any action.  By
+   reverting to Probing state, the host that desires a unique host name
+   will go through the necessary steps to ensure that a unique host name
+   is obtained.
+
+   The recommended course of action after probing and failing is as
+   follows:
+
+      1. Programmatically change the resource record name in an attempt
+         to find a new name that is unique.  This could be done by
+         adding some further identifying information (e.g., the model
+         name of the hardware) if it is not already present in the name,
+         or appending the digit "2" to the name, or incrementing a
+         number at the end of the name if one is already present.
+
+      2. Probe again, and repeat as necessary until a unique name is
+         found.
+
+      3. Once an available unique name has been determined, by probing
+         without receiving any conflicting response, record this newly
+         chosen name in persistent storage so that the device will use
+         the same name the next time it is power-cycled.
+
+      4. Display a message to the user or operator informing them of the
+         name change.  For example:
+
+            The name "Bob's Music" is in use by another music server on
+            the network.  Your music collection has been renamed to
+            "Bob's Music (2)".  If you want to change this name, use
+            [describe appropriate menu item or preference dialog here].
+
+         The details of how the user or operator is informed of the new
+         name depends on context.  A desktop computer with a screen
+         might put up a dialog box.  A headless server in the closet may
+         write a message to a log file, or use whatever mechanism
+         (email, SNMP trap, etc.) it uses to inform the administrator of
+         error conditions.  On the other hand, a headless server in the
+         closet may not inform the user at all -- if the user cares,
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 32]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+         they will notice the name has changed, and connect to the
+         server in the usual way (e.g., via web browser) to configure a
+         new name.
+
+      5. After one minute of probing, if the Multicast DNS responder has
+         been unable to find any unused name, it should log an error
+         message to inform the user or operator of this fact.  This
+         situation should never occur in normal operation.  The only
+         situations that would cause this to happen would be either a
+         deliberate denial-of-service attack, or some kind of very
+         obscure hardware or software bug that acts like a deliberate
+         denial-of-service attack.
+
+   These considerations apply to address records (i.e., host names) and
+   to all resource records where uniqueness (or maintenance of some
+   other defined constraint) is desired.
+
+10.  Resource Record TTL Values and Cache Coherency
+
+   As a general rule, the recommended TTL value for Multicast DNS
+   resource records with a host name as the resource record's name
+   (e.g., A, AAAA, HINFO) or a host name contained within the resource
+   record's rdata (e.g., SRV, reverse mapping PTR record) SHOULD be 120
+   seconds.
+
+   The recommended TTL value for other Multicast DNS resource records is
+   75 minutes.
+
+   A querier with an active outstanding query will issue a query message
+   when one or more of the resource records in its cache are 80% of the
+   way to expiry.  If the TTL on those records is 75 minutes, this
+   ongoing cache maintenance process yields a steady-state query rate of
+   one query every 60 minutes.
+
+   Any distributed cache needs a cache coherency protocol.  If Multicast
+   DNS resource records follow the recommendation and have a TTL of 75
+   minutes, that means that stale data could persist in the system for a
+   little over an hour.  Making the default RR TTL significantly lower
+   would reduce the lifetime of stale data, but would produce too much
+   extra traffic on the network.  Various techniques are available to
+   minimize the impact of such stale data, outlined in the five
+   subsections below.
+
+10.1.  Goodbye Packets
+
+   In the case where a host knows that certain resource record data is
+   about to become invalid (for example, when the host is undergoing a
+   clean shutdown), the host SHOULD send an unsolicited Multicast DNS
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 33]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   response packet, giving the same resource record name, rrtype,
+   rrclass, and rdata, but an RR TTL of zero.  This has the effect of
+   updating the TTL stored in neighboring hosts' cache entries to zero,
+   causing that cache entry to be promptly deleted.
+
+   Queriers receiving a Multicast DNS response with a TTL of zero SHOULD
+   NOT immediately delete the record from the cache, but instead record
+   a TTL of 1 and then delete the record one second later.  In the case
+   of multiple Multicast DNS responders on the network described in
+   Section 6.6 above, if one of the responders shuts down and
+   incorrectly sends goodbye packets for its records, it gives the other
+   cooperating responders one second to send out their own response to
+   "rescue" the records before they expire and are deleted.
+
+10.2.  Announcements to Flush Outdated Cache Entries
+
+   Whenever a host has a resource record with new data, or with what
+   might potentially be new data (e.g., after rebooting, waking from
+   sleep, connecting to a new network link, or changing IP address), the
+   host needs to inform peers of that new data.  In cases where the host
+   has not been continuously connected and participating on the network
+   link, it MUST first probe to re-verify uniqueness of its unique
+   records, as described above in Section 8.1, "Probing".
+
+   Having completed the Probing step, if necessary, the host MUST then
+   send a series of unsolicited announcements to update cache entries in
+   its neighbor hosts.  In these unsolicited announcements, if the
+   record is one that has been verified unique, the host sets the most
+   significant bit of the rrclass field of the resource record.  This
+   bit, the cache-flush bit, tells neighboring hosts that this is not a
+   shared record type.  Instead of merging this new record additively
+   into the cache in addition to any previous records with the same
+   name, rrtype, and rrclass, all old records with that name, rrtype,
+   and rrclass that were received more than one second ago are declared
+   invalid, and marked to expire from the cache in one second.
+
+   The semantics of the cache-flush bit are as follows: normally when a
+   resource record appears in a Resource Record Section of the DNS
+   response it means, "This is an assertion that this information is
+   true".  When a resource record appears in a Resource Record Section
+   of the DNS response with the cache-flush bit set, it means, "This is
+   an assertion that this information is the truth and the whole truth,
+   and anything you may have heard more than a second ago regarding
+   records of this name/rrtype/rrclass is no longer true".
+
+   To accommodate the case where the set of records from one host
+   constituting a single unique RRSet is too large to fit in a single
+   packet, only cache records that are more than one second old are
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 34]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   flushed.  This allows the announcing host to generate a quick burst
+   of packets back-to-back on the wire containing all the members of the
+   RRSet.  When receiving records with the cache-flush bit set, all
+   records older than one second are marked to be deleted one second in
+   the future.  One second after the end of the little packet burst, any
+   records not represented within that packet burst will then be expired
+   from all peer caches.
+
+   Any time a host sends a response packet containing some members of a
+   unique RRSet, it MUST send the entire RRSet, preferably in a single
+   packet, or if the entire RRSet will not fit in a single packet, in a
+   quick burst of packets sent as close together as possible.  The host
+   MUST set the cache-flush bit on all members of the unique RRSet.
+
+   Another reason for waiting one second before deleting stale records
+   from the cache is to accommodate bridged networks.  For example, a
+   host's address record announcement on a wireless interface may be
+   bridged onto a wired Ethernet and may cause that same host's Ethernet
+   address records to be flushed from peer caches.  The one-second delay
+   gives the host the chance to see its own announcement arrive on the
+   wired Ethernet, and immediately re-announce its Ethernet interface's
+   address records so that both sets remain valid and live in peer
+   caches.
+
+   These rules, about when to set the cache-flush bit and about sending
+   the entire rrset, apply regardless of *why* the response message is
+   being generated.  They apply to startup announcements as described in
+   Section 8.3, "Announcing", and to responses generated as a result of
+   receiving query messages.
+
+   The cache-flush bit is only set in records in the Resource Record
+   Sections of Multicast DNS responses sent to UDP port 5353.
+
+   The cache-flush bit MUST NOT be set in any resource records in a
+   response message sent in legacy unicast responses to UDP ports other
+   than 5353.
+
+   The cache-flush bit MUST NOT be set in any resource records in the
+   Known-Answer list of any query message.
+
+   The cache-flush bit MUST NOT ever be set in any shared resource
+   record.  To do so would cause all the other shared versions of this
+   resource record with different rdata from different responders to be
+   immediately deleted from all the caches on the network.
+
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 35]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   The cache-flush bit does *not* apply to questions listed in the
+   Question Section of a Multicast DNS message.  The top bit of the
+   rrclass field in questions is used for an entirely different purpose
+   (see Section 5.4, "Questions Requesting Unicast Responses").
+
+   Note that the cache-flush bit is NOT part of the resource record
+   class.  The cache-flush bit is the most significant bit of the second
+   16-bit word of a resource record in a Resource Record Section of a
+   Multicast DNS message (the field conventionally referred to as the
+   rrclass field), and the actual resource record class is the least
+   significant fifteen bits of this field.  There is no Multicast DNS
+   resource record class 0x8001.  The value 0x8001 in the rrclass field
+   of a resource record in a Multicast DNS response message indicates a
+   resource record with class 1, with the cache-flush bit set.  When
+   receiving a resource record with the cache-flush bit set,
+   implementations should take care to mask off that bit before storing
+   the resource record in memory, or otherwise ensure that it is given
+   the correct semantic interpretation.
+
+   The reuse of the top bit of the rrclass field only applies to
+   conventional resource record types that are subject to caching, not
+   to pseudo-RRs like OPT [RFC2671], TSIG [RFC2845], TKEY [RFC2930],
+   SIG0 [RFC2931], etc., that pertain only to a particular transport
+   level message and not to any actual DNS data.  Since pseudo-RRs
+   should never go into the Multicast DNS cache, the concept of a cache-
+   flush bit for these types is not applicable.  In particular, the
+   rrclass field of an OPT record encodes the sender's UDP payload size,
+   and should be interpreted as a sixteen-bit length value in the range
+   0-65535, not a one-bit flag and a fifteen-bit length.
+
+10.3.  Cache Flush on Topology change
+
+   If the hardware on a given host is able to indicate physical changes
+   of connectivity, then when the hardware indicates such a change, the
+   host should take this information into account in its Multicast DNS
+   cache management strategy.  For example, a host may choose to
+   immediately flush all cache records received on a particular
+   interface when that cable is disconnected.  Alternatively, a host may
+   choose to adjust the remaining TTL on all those records to a few
+   seconds so that if the cable is not reconnected quickly, those
+   records will expire from the cache.
+
+   Likewise, when a host reboots, wakes from sleep, or undergoes some
+   other similar discontinuous state change, the cache management
+   strategy should take that information into account.
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 36]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+10.4.  Cache Flush on Failure Indication
+
+   Sometimes a cache record can be determined to be stale when a client
+   attempts to use the rdata it contains, and the client finds that
+   rdata to be incorrect.
+
+   For example, the rdata in an address record can be determined to be
+   incorrect if attempts to contact that host fail, either because (for
+   an IPv4 address on a local subnet) ARP requests for that address go
+   unanswered, because (for an IPv6 address with an on-link prefix) ND
+   requests for that address go unanswered, or because (for an address
+   on a remote network) a router returns an ICMP "Host Unreachable"
+   error.
+
+   The rdata in an SRV record can be determined to be incorrect if
+   attempts to communicate with the indicated service at the host and
+   port number indicated are not successful.
+
+   The rdata in a DNS-SD PTR record can be determined to be incorrect if
+   attempts to look up the SRV record it references are not successful.
+
+   The software implementing the Multicast DNS resource record cache
+   should provide a mechanism so that clients detecting stale rdata can
+   inform the cache.
+
+   When the cache receives this hint that it should reconfirm some
+   record, it MUST issue two or more queries for the resource record in
+   dispute.  If no response is received within ten seconds, then, even
+   though its TTL may indicate that it is not yet due to expire, that
+   record SHOULD be promptly flushed from the cache.
+
+   The end result of this is that if a printer suffers a sudden power
+   failure or other abrupt disconnection from the network, its name may
+   continue to appear in DNS-SD browser lists displayed on users'
+   screens.  Eventually, that entry will expire from the cache
+   naturally, but if a user tries to access the printer before that
+   happens, the failure to successfully contact the printer will trigger
+   the more hasty demise of its cache entries.  This is a sensible
+   trade-off between good user experience and good network efficiency.
+   If we were to insist that printers should disappear from the printer
+   list within 30 seconds of becoming unavailable, for all failure
+   modes, the only way to achieve this would be for the client to poll
+   the printer at least every 30 seconds, or for the printer to announce
+   its presence at least every 30 seconds, both of which would be an
+   unreasonable burden on most networks.
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 37]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+10.5.  Passive Observation Of Failures (POOF)
+
+   A host observes the multicast queries issued by the other hosts on
+   the network.  One of the major benefits of also sending responses
+   using multicast is that it allows all hosts to see the responses (or
+   lack thereof) to those queries.
+
+   If a host sees queries, for which a record in its cache would be
+   expected to be given as an answer in a multicast response, but no
+   such answer is seen, then the host may take this as an indication
+   that the record may no longer be valid.
+
+   After seeing two or more of these queries, and seeing no multicast
+   response containing the expected answer within ten seconds, then even
+   though its TTL may indicate that it is not yet due to expire, that
+   record SHOULD be flushed from the cache.  The host SHOULD NOT perform
+   its own queries to reconfirm that the record is truly gone.  If every
+   host on a large network were to do this, it would cause a lot of
+   unnecessary multicast traffic.  If host A sends multicast queries
+   that remain unanswered, then there is no reason to suppose that host
+   B or any other host is likely to be any more successful.
+
+   The previous section, "Cache Flush on Failure Indication", describes
+   a situation where a user trying to print discovers that the printer
+   is no longer available.  By implementing the passive observation
+   described here, when one user fails to contact the printer, all hosts
+   on the network observe that failure and update their caches
+   accordingly.
+
+11.  Source Address Check
+
+   All Multicast DNS responses (including responses sent via unicast)
+   SHOULD be sent with IP TTL set to 255.  This is recommended to
+   provide backwards-compatibility with older Multicast DNS queriers
+   (implementing a draft version of this document, posted in February
+   2004) that check the IP TTL on reception to determine whether the
+   packet originated on the local link.  These older queriers discard
+   all packets with TTLs other than 255.
+
+   A host sending Multicast DNS queries to a link-local destination
+   address (including the 224.0.0.251 and FF02::FB link-local multicast
+   addresses) MUST only accept responses to that query that originate
+   from the local link, and silently discard any other response packets.
+   Without this check, it could be possible for remote rogue hosts to
+   send spoof answer packets (perhaps unicast to the victim host), which
+   the receiving machine could misinterpret as having originated on the
+   local link.
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 38]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   The test for whether a response originated on the local link is done
+   in two ways:
+
+      * All responses received with a destination address in the IP
+        header that is the mDNS IPv4 link-local multicast address
+        224.0.0.251 or the mDNS IPv6 link-local multicast address
+        FF02::FB are necessarily deemed to have originated on the local
+        link, regardless of source IP address.  This is essential to
+        allow devices to work correctly and reliably in unusual
+        configurations, such as multiple logical IP subnets overlayed on
+        a single link, or in cases of severe misconfiguration, where
+        devices are physically connected to the same link, but are
+        currently misconfigured with completely unrelated IP addresses
+        and subnet masks.
+
+      * For responses received with a unicast destination address in the
+        IP header, the source IP address in the packet is checked to see
+        if it is an address on a local subnet.  An IPv4 source address
+        is determined to be on a local subnet if, for (one of) the
+        address(es) configured on the interface receiving the packet, (I
+        & M) == (P & M), where I and M are the interface address and
+        subnet mask respectively, P is the source IP address from the
+        packet, '&' represents the bitwise logical 'and' operation, and
+        '==' represents a bitwise equality test.  An IPv6 source address
+        is determined to be on the local link if, for any of the on-link
+        IPv6 prefixes on the interface receiving the packet (learned via
+        IPv6 router advertisements or otherwise configured on the host),
+        the first 'n' bits of the IPv6 source address match the first
+        'n' bits of the prefix address, where 'n' is the length of the
+        prefix being considered.
+
+   Since queriers will ignore responses apparently originating outside
+   the local subnet, a responder SHOULD avoid generating responses that
+   it can reasonably predict will be ignored.  This applies particularly
+   in the case of overlayed subnets.  If a responder receives a query
+   addressed to the mDNS IPv4 link-local multicast address 224.0.0.251,
+   from a source address not apparently on the same subnet as the
+   responder (or, in the case of IPv6, from a source IPv6 address for
+   which the responder does not have any address with the same prefix on
+   that interface), then even if the query indicates that a unicast
+   response is preferred (see Section 5.4, "Questions Requesting Unicast
+   Responses"), the responder SHOULD elect to respond by multicast
+   anyway, since it can reasonably predict that a unicast response with
+   an apparently non-local source address will probably be ignored.
+
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 39]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+12.  Special Characteristics of Multicast DNS Domains
+
+   Unlike conventional DNS names, names that end in ".local." have only
+   local significance.  The same is true of names within the IPv4 link-
+   local reverse mapping domain "254.169.in-addr.arpa." and the IPv6
+   link-local reverse mapping domains "8.e.f.ip6.arpa.",
+   "9.e.f.ip6.arpa.", "a.e.f.ip6.arpa.", and "b.e.f.ip6.arpa.".
+
+   These names function primarily as protocol identifiers, rather than
+   as user-visible identifiers.  Even though they may occasionally be
+   visible to end users, that is not their primary purpose.  As such,
+   these names should be treated as opaque identifiers.  In particular,
+   the string "local" should not be translated or localized into
+   different languages, much as the name "localhost" is not translated
+   or localized into different languages.
+
+   Conventional Unicast DNS seeks to provide a single unified namespace,
+   where a given DNS query yields the same answer no matter where on the
+   planet it is performed or to which recursive DNS server the query is
+   sent.  In contrast, each IP link has its own private ".local.",
+   "254.169.in-addr.arpa." and IPv6 link-local reverse mapping
+   namespaces, and the answer to any query for a name within those
+   domains depends on where that query is asked.  (This characteristic
+   is not unique to Multicast DNS.  Although the original concept of DNS
+   was a single global namespace, in recent years, split views,
+   firewalls, intranets, DNS geolocation, and the like have increasingly
+   meant that the answer to a given DNS query has become dependent on
+   the location of the querier.)
+
+   The IPv4 name server address for a Multicast DNS domain is
+   224.0.0.251.  The IPv6 name server address for a Multicast DNS domain
+   is FF02::FB.  These are multicast addresses; therefore, they identify
+   not a single host but a collection of hosts, working in cooperation
+   to maintain some reasonable facsimile of a competently managed DNS
+   zone.  Conceptually, a Multicast DNS domain is a single DNS zone;
+   however, its server is implemented as a distributed process running
+   on a cluster of loosely cooperating CPUs rather than as a single
+   process running on a single CPU.
+
+   Multicast DNS domains are not delegated from their parent domain via
+   use of NS (Name Server) records, and there is also no concept of
+   delegation of subdomains within a Multicast DNS domain.  Just because
+   a particular host on the network may answer queries for a particular
+   record type with the name "example.local." does not imply anything
+   about whether that host will answer for the name
+   "child.example.local.", or indeed for other record types with the
+   name "example.local.".
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 40]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   There are no NS records anywhere in Multicast DNS domains.  Instead,
+   the Multicast DNS domains are reserved by IANA, and there is
+   effectively an implicit delegation of all Multicast DNS domains to
+   the 224.0.0.251:5353 and [FF02::FB]:5353 multicast groups, by virtue
+   of client software implementing the protocol rules specified in this
+   document.
+
+   Multicast DNS zones have no SOA (Start of Authority) record.  A
+   conventional DNS zone's SOA record contains information such as the
+   email address of the zone administrator and the monotonically
+   increasing serial number of the last zone modification.  There is no
+   single human administrator for any given Multicast DNS zone, so there
+   is no email address.  Because the hosts managing any given Multicast
+   DNS zone are only loosely coordinated, there is no readily available
+   monotonically increasing serial number to determine whether or not
+   the zone contents have changed.  A host holding part of the shared
+   zone could crash or be disconnected from the network at any time
+   without informing the other hosts.  There is no reliable way to
+   provide a zone serial number that would, whenever such a crash or
+   disconnection occurred, immediately change to indicate that the
+   contents of the shared zone had changed.
+
+   Zone transfers are not possible for any Multicast DNS zone.
+
+13.  Enabling and Disabling Multicast DNS
+
+   The option to fail-over to Multicast DNS for names not ending in
+   ".local." SHOULD be a user-configured option, and SHOULD be disabled
+   by default because of the possible security issues related to
+   unintended local resolution of apparently global names.  Enabling
+   Multicast DNS for names not ending in ".local." may be appropriate on
+   a secure isolated network, or on some future network were machines
+   exclusively use DNSSEC for all DNS queries, and have Multicast DNS
+   responders capable of generating the appropriate cryptographic DNSSEC
+   signatures, thereby guarding against spoofing.
+
+   The option to look up unqualified (relative) names by appending
+   ".local." (or not) is controlled by whether ".local." appears (or
+   not) in the client's DNS search list.
+
+   No special control is needed for enabling and disabling Multicast DNS
+   for names explicitly ending with ".local." as entered by the user.
+   The user doesn't need a way to disable Multicast DNS for names ending
+   with ".local.", because if the user doesn't want to use Multicast
+   DNS, they can achieve this by simply not using those names.  If a
+   user *does* enter a name ending in ".local.", then we can safely
+   assume the user's intention was probably that it should work.  Having
+   user configuration options that can be (intentionally or
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 41]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   unintentionally) set so that local names don't work is just one more
+   way of frustrating the user's ability to perform the tasks they want,
+   perpetuating the view that, "IP networking is too complicated to
+   configure and too hard to use".
+
+14.  Considerations for Multiple Interfaces
+
+   A host SHOULD defend its dot-local host name on all active interfaces
+   on which it is answering Multicast DNS queries.
+
+   In the event of a name conflict on *any* interface, a host should
+   configure a new host name, if it wishes to maintain uniqueness of its
+   host name.
+
+   A host may choose to use the same name (or set of names) for all of
+   its address records on all interfaces, or it may choose to manage its
+   Multicast DNS interfaces independently, potentially answering to a
+   different name (or set of names) on different interfaces.
+
+   Except in the case of proxying and other similar specialized uses,
+   addresses in IPv4 or IPv6 address records in Multicast DNS responses
+   MUST be valid for use on the interface on which the response is being
+   sent.
+
+   Just as the same link-local IP address may validly be in use
+   simultaneously on different links by different hosts, the same link-
+   local host name may validly be in use simultaneously on different
+   links, and this is not an error.  A multihomed host with connections
+   to two different links may be able to communicate with two different
+   hosts that are validly using the same name.  While this kind of name
+   duplication should be rare, it means that a host that wants to fully
+   support this case needs network programming APIs that allow
+   applications to specify on what interface to perform a link-local
+   Multicast DNS query, and to discover on what interface a Multicast
+   DNS response was received.
+
+   There is one other special precaution that multihomed hosts need to
+   take.  It's common with today's laptop computers to have an Ethernet
+   connection and an 802.11 [IEEE.802.11] wireless connection active at
+   the same time.  What the software on the laptop computer can't easily
+   tell is whether the wireless connection is in fact bridged onto the
+   same network segment as its Ethernet connection.  If the two networks
+   are bridged together, then packets the host sends on one interface
+   will arrive on the other interface a few milliseconds later, and care
+   must be taken to ensure that this bridging does not cause problems:
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 42]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   When the host announces its host name (i.e., its address records) on
+   its wireless interface, those announcement records are sent with the
+   cache-flush bit set, so when they arrive on the Ethernet segment,
+   they will cause all the peers on the Ethernet to flush the host's
+   Ethernet address records from their caches.  The Multicast DNS
+   protocol has a safeguard to protect against this situation: when
+   records are received with the cache-flush bit set, other records are
+   not deleted from peer caches immediately, but are marked for deletion
+   in one second.  When the host sees its own wireless address records
+   arrive on its Ethernet interface, with the cache-flush bit set, this
+   one-second grace period gives the host time to respond and re-
+   announce its Ethernet address records, to reinstate those records in
+   peer caches before they are deleted.
+
+   As described, this solves one problem, but creates another, because
+   when those Ethernet announcement records arrive back on the wireless
+   interface, the host would again respond defensively to reinstate its
+   wireless records, and this process would continue forever,
+   continuously flooding the network with traffic.  The Multicast DNS
+   protocol has a second safeguard, to solve this problem: the cache-
+   flush bit does not apply to records received very recently, within
+   the last second.  This means that when the host sees its own Ethernet
+   address records arrive on its wireless interface, with the cache-
+   flush bit set, it knows there's no need to re-announce its wireless
+   address records again because it already sent them less than a second
+   ago, and this makes them immune from deletion from peer caches.  (See
+   Section 10.2.)
+
+15.  Considerations for Multiple Responders on the Same Machine
+
+   It is possible to have more than one Multicast DNS responder and/or
+   querier implementation coexist on the same machine, but there are
+   some known issues.
+
+15.1.  Receiving Unicast Responses
+
+   In most operating systems, incoming *multicast* packets can be
+   delivered to *all* open sockets bound to the right port number,
+   provided that the clients take the appropriate steps to allow this.
+   For this reason, all Multicast DNS implementations SHOULD use the
+   SO_REUSEPORT and/or SO_REUSEADDR options (or equivalent as
+   appropriate for the operating system in question) so they will all be
+   able to bind to UDP port 5353 and receive incoming multicast packets
+   addressed to that port.  However, unlike multicast packets, incoming
+   unicast UDP packets are typically delivered only to the first socket
+   to bind to that port.  This means that "QU" responses and other
+   packets sent via unicast will be received only by the first Multicast
+   DNS responder and/or querier on a system.  This limitation can be
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 43]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   partially mitigated if Multicast DNS implementations detect when they
+   are not the first to bind to port 5353, and in that case they do not
+   request "QU" responses.  One way to detect if there is another
+   Multicast DNS implementation already running is to attempt binding to
+   port 5353 without using SO_REUSEPORT and/or SO_REUSEADDR, and if that
+   fails it indicates that some other socket is already bound to this
+   port.
+
+15.2.  Multipacket Known-Answer lists
+
+   When a Multicast DNS querier issues a query with too many Known
+   Answers to fit into a single packet, it divides the Known-Answer list
+   into two or more packets.  Multicast DNS responders associate the
+   initial truncated query with its continuation packets by examining
+   the source IP address in each packet.  Since two independent
+   Multicast DNS queriers running on the same machine will be sending
+   packets with the same source IP address, from an outside perspective
+   they appear to be a single entity.  If both queriers happened to send
+   the same multipacket query at the same time, with different Known-
+   Answer lists, then they could each end up suppressing answers that
+   the other needs.
+
+15.3.  Efficiency
+
+   If different clients on a machine were each to have their own
+   independent Multicast DNS implementation, they would lose certain
+   efficiency benefits.  Apart from the unnecessary code duplication,
+   memory usage, and CPU load, the clients wouldn't get the benefit of a
+   shared system-wide cache, and they would not be able to aggregate
+   separate queries into single packets to reduce network traffic.
+
+15.4.  Recommendation
+
+   Because of these issues, this document encourages implementers to
+   design systems with a single Multicast DNS implementation that
+   provides Multicast DNS services shared by all clients on that
+   machine, much as most operating systems today have a single TCP
+   implementation, which is shared between all clients on that machine.
+   Due to engineering constraints, there may be situations where
+   embedding a "user-level" Multicast DNS implementation in the client
+   application software is the most expedient solution, and while this
+   will usually work in practice, implementers should be aware of the
+   issues outlined in this section.
+
+
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 44]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+16.  Multicast DNS Character Set
+
+   Historically, Unicast DNS has been used with a very restricted set of
+   characters.  Indeed, conventional DNS is usually limited to just
+   twenty-six letters, ten digits and the hyphen character, not even
+   allowing spaces or other punctuation.  Attempts to remedy this for
+   Unicast DNS have been badly constrained by the perceived need to
+   accommodate old buggy legacy DNS implementations.  In reality, the
+   DNS specification itself actually imposes no limits on what
+   characters may be used in names, and good DNS implementations handle
+   any arbitrary eight-bit data without trouble.  "Clarifications to the
+   DNS Specification" [RFC2181] directly discusses the subject of
+   allowable character set in Section 11 ("Name syntax"), and explicitly
+   states that DNS names may contain arbitrary eight-bit data.  However,
+   the old rules for ARPANET host names back in the 1980s required host
+   names to be just letters, digits, and hyphens [RFC1034], and since
+   the predominant use of DNS is to store host address records, many
+   have assumed that the DNS protocol itself suffers from the same
+   limitation.  It might be accurate to say that there could be
+   hypothetical bad implementations that do not handle eight-bit data
+   correctly, but it would not be accurate to say that the protocol
+   doesn't allow names containing eight-bit data.
+
+   Multicast DNS is a new protocol and doesn't (yet) have old buggy
+   legacy implementations to constrain the design choices.  Accordingly,
+   it adopts the simple obvious elegant solution: all names in Multicast
+   DNS MUST be encoded as precomposed UTF-8 [RFC3629] "Net-Unicode"
+   [RFC5198] text.
+
+   Some users of 16-bit Unicode have taken to stuffing a "zero-width
+   nonbreaking space" character (U+FEFF) at the start of each UTF-16
+   file, as a hint to identify whether the data is big-endian or little-
+   endian, and calling it a "Byte Order Mark" (BOM).  Since there is
+   only one possible byte order for UTF-8 data, a BOM is neither
+   necessary nor permitted.  Multicast DNS names MUST NOT contain a
+   "Byte Order Mark".  Any occurrence of the Unicode character U+FEFF at
+   the start or anywhere else in a Multicast DNS name MUST be
+   interpreted as being an actual intended part of the name,
+   representing (just as for any other legal unicode value) an actual
+   literal instance of that character (in this case a zero-width non-
+   breaking space character).
+
+   For names that are restricted to US-ASCII [RFC0020] letters, digits,
+   and hyphens, the UTF-8 encoding is identical to the US-ASCII
+   encoding, so this is entirely compatible with existing host names.
+   For characters outside the US-ASCII range, UTF-8 encoding is used.
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 45]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   Multicast DNS implementations MUST NOT use any other encodings apart
+   from precomposed UTF-8 (US-ASCII being considered a compatible subset
+   of UTF-8).  The reasons for selecting UTF-8 instead of Punycode
+   [RFC3492] are discussed further in Appendix F.
+
+   The simple rules for case-insensitivity in Unicast DNS [RFC1034]
+   [RFC1035] also apply in Multicast DNS; that is to say, in name
+   comparisons, the lowercase letters "a" to "z" (0x61 to 0x7A) match
+   their uppercase equivalents "A" to "Z" (0x41 to 0x5A).  Hence, if a
+   querier issues a query for an address record with the name
+   "myprinter.local.", then a responder having an address record with
+   the name "MyPrinter.local." should issue a response.  No other
+   automatic equivalences should be assumed.  In particular, all UTF-8
+   multibyte characters (codes 0x80 and higher) are compared by simple
+   binary comparison of the raw byte values.  Accented characters are
+   *not* defined to be automatically equivalent to their unaccented
+   counterparts.  Where automatic equivalences are desired, this may be
+   achieved through the use of programmatically generated CNAME records.
+   For example, if a responder has an address record for an accented
+   name Y, and a querier issues a query for a name X, where X is the
+   same as Y with all the accents removed, then the responder may issue
+   a response containing two resource records: a CNAME record "X CNAME
+   Y", asserting that the requested name X (unaccented) is an alias for
+   the true (accented) name Y, followed by the address record for Y.
+
+17.  Multicast DNS Message Size
+
+   The 1987 DNS specification [RFC1035] restricts DNS messages carried
+   by UDP to no more than 512 bytes (not counting the IP or UDP
+   headers).  For UDP packets carried over the wide-area Internet in
+   1987, this was appropriate.  For link-local multicast packets on
+   today's networks, there is no reason to retain this restriction.
+   Given that the packets are by definition link-local, there are no
+   Path MTU issues to consider.
+
+   Multicast DNS messages carried by UDP may be up to the IP MTU of the
+   physical interface, less the space required for the IP header (20
+   bytes for IPv4; 40 bytes for IPv6) and the UDP header (8 bytes).
+
+   In the case of a single Multicast DNS resource record that is too
+   large to fit in a single MTU-sized multicast response packet, a
+   Multicast DNS responder SHOULD send the resource record alone, in a
+   single IP datagram, using multiple IP fragments.  Resource records
+   this large SHOULD be avoided, except in the very rare cases where
+   they really are the appropriate solution to the problem at hand.
+   Implementers should be aware that many simple devices do not
+   reassemble fragmented IP datagrams, so large resource records SHOULD
+   NOT be used except in specialized cases where the implementer knows
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 46]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   that all receivers implement reassembly, or where the large resource
+   record contains optional data which is not essential for correct
+   operation of the client.
+
+   A Multicast DNS packet larger than the interface MTU, which is sent
+   using fragments, MUST NOT contain more than one resource record.
+
+   Even when fragmentation is used, a Multicast DNS packet, including IP
+   and UDP headers, MUST NOT exceed 9000 bytes.
+
+   Note that 9000 bytes is also the maximum payload size of an Ethernet
+   "Jumbo" packet [Jumbo].  However, in practice Ethernet "Jumbo"
+   packets are not widely used, so it is advantageous to keep packets
+   under 1500 bytes whenever possible.  Even on hosts that normally
+   handle Ethernet "Jumbo" packets and IP fragment reassembly, it is
+   becoming more common for these hosts to implement power-saving modes
+   where the main CPU goes to sleep and hands off packet reception tasks
+   to a more limited processor in the network interface hardware, which
+   may not support Ethernet "Jumbo" packets or IP fragment reassembly.
+
+18.  Multicast DNS Message Format
+
+   This section describes specific rules pertaining to the allowable
+   values for the header fields of a Multicast DNS message, and other
+   message format considerations.
+
+18.1.  ID (Query Identifier)
+
+   Multicast DNS implementations SHOULD listen for unsolicited responses
+   issued by hosts booting up (or waking up from sleep or otherwise
+   joining the network).  Since these unsolicited responses may contain
+   a useful answer to a question for which the querier is currently
+   awaiting an answer, Multicast DNS implementations SHOULD examine all
+   received Multicast DNS response messages for useful answers, without
+   regard to the contents of the ID field or the Question Section.  In
+   Multicast DNS, knowing which particular query message (if any) is
+   responsible for eliciting a particular response message is less
+   interesting than knowing whether the response message contains useful
+   information.
+
+   Multicast DNS implementations MAY cache data from any or all
+   Multicast DNS response messages they receive, for possible future
+   use, provided of course that normal TTL aging is performed on these
+   cached resource records.
+
+   In multicast query messages, the Query Identifier SHOULD be set to
+   zero on transmission.
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 47]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   In multicast responses, including unsolicited multicast responses,
+   the Query Identifier MUST be set to zero on transmission, and MUST be
+   ignored on reception.
+
+   In legacy unicast response messages generated specifically in
+   response to a particular (unicast or multicast) query, the Query
+   Identifier MUST match the ID from the query message.
+
+18.2.  QR (Query/Response) Bit
+
+   In query messages the QR bit MUST be zero.
+   In response messages the QR bit MUST be one.
+
+18.3.  OPCODE
+
+   In both multicast query and multicast response messages, the OPCODE
+   MUST be zero on transmission (only standard queries are currently
+   supported over multicast).  Multicast DNS messages received with an
+   OPCODE other than zero MUST be silently ignored.
+
+18.4.  AA (Authoritative Answer) Bit
+
+   In query messages, the Authoritative Answer bit MUST be zero on
+   transmission, and MUST be ignored on reception.
+
+   In response messages for Multicast domains, the Authoritative Answer
+   bit MUST be set to one (not setting this bit would imply there's some
+   other place where "better" information may be found) and MUST be
+   ignored on reception.
+
+18.5.  TC (Truncated) Bit
+
+   In query messages, if the TC bit is set, it means that additional
+   Known-Answer records may be following shortly.  A responder SHOULD
+   record this fact, and wait for those additional Known-Answer records,
+   before deciding whether to respond.  If the TC bit is clear, it means
+   that the querying host has no additional Known Answers.
+
+   In multicast response messages, the TC bit MUST be zero on
+   transmission, and MUST be ignored on reception.
+
+   In legacy unicast response messages, the TC bit has the same meaning
+   as in conventional Unicast DNS: it means that the response was too
+   large to fit in a single packet, so the querier SHOULD reissue its
+   query using TCP in order to receive the larger response.
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 48]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+18.6.  RD (Recursion Desired) Bit
+
+   In both multicast query and multicast response messages, the
+   Recursion Desired bit SHOULD be zero on transmission, and MUST be
+   ignored on reception.
+
+18.7.  RA (Recursion Available) Bit
+
+   In both multicast query and multicast response messages, the
+   Recursion Available bit MUST be zero on transmission, and MUST be
+   ignored on reception.
+
+18.8.  Z (Zero) Bit
+
+   In both query and response messages, the Zero bit MUST be zero on
+   transmission, and MUST be ignored on reception.
+
+18.9.  AD (Authentic Data) Bit
+
+   In both multicast query and multicast response messages, the
+   Authentic Data bit [RFC2535] MUST be zero on transmission, and MUST
+   be ignored on reception.
+
+18.10.  CD (Checking Disabled) Bit
+
+   In both multicast query and multicast response messages, the Checking
+   Disabled bit [RFC2535] MUST be zero on transmission, and MUST be
+   ignored on reception.
+
+18.11.  RCODE (Response Code)
+
+   In both multicast query and multicast response messages, the Response
+   Code MUST be zero on transmission.  Multicast DNS messages received
+   with non-zero Response Codes MUST be silently ignored.
+
+18.12.  Repurposing of Top Bit of qclass in Question Section
+
+   In the Question Section of a Multicast DNS query, the top bit of the
+   qclass field is used to indicate that unicast responses are preferred
+   for this particular question.  (See Section 5.4.)
+
+18.13.  Repurposing of Top Bit of rrclass in Resource Record Sections
+
+   In the Resource Record Sections of a Multicast DNS response, the top
+   bit of the rrclass field is used to indicate that the record is a
+   member of a unique RRSet, and the entire RRSet has been sent together
+   (in the same packet, or in consecutive packets if there are too many
+   records to fit in a single packet).  (See Section 10.2.)
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 49]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+18.14.  Name Compression
+
+   When generating Multicast DNS messages, implementations SHOULD use
+   name compression wherever possible to compress the names of resource
+   records, by replacing some or all of the resource record name with a
+   compact two-byte reference to an appearance of that data somewhere
+   earlier in the message [RFC1035].
+
+   This applies not only to Multicast DNS responses, but also to
+   queries.  When a query contains more than one question, successive
+   questions in the same message often contain similar names, and
+   consequently name compression SHOULD be used, to save bytes.  In
+   addition, queries may also contain Known Answers in the Answer
+   Section, or probe tiebreaking data in the Authority Section, and
+   these names SHOULD similarly be compressed for network efficiency.
+
+   In addition to compressing the *names* of resource records, names
+   that appear within the *rdata* of the following rrtypes SHOULD also
+   be compressed in all Multicast DNS messages:
+
+     NS, CNAME, PTR, DNAME, SOA, MX, AFSDB, RT, KX, RP, PX, SRV, NSEC
+
+   Until future IETF Standards Action [RFC5226] specifying that names in
+   the rdata of other types should be compressed, names that appear
+   within the rdata of any type not listed above MUST NOT be compressed.
+
+   Implementations receiving Multicast DNS messages MUST correctly
+   decode compressed names appearing in the Question Section, and
+   compressed names of resource records appearing in other sections.
+
+   In addition, implementations MUST correctly decode compressed names
+   appearing within the *rdata* of the rrtypes listed above.  Where
+   possible, implementations SHOULD also correctly decode compressed
+   names appearing within the *rdata* of other rrtypes known to the
+   implementers at the time of implementation, because such forward-
+   thinking planning helps facilitate the deployment of future
+   implementations that may have reason to compress those rrtypes.  It
+   is possible that no future IETF Standards Action [RFC5226] will be
+   created that mandates or permits the compression of rdata in new
+   types, but having implementations designed such that they are capable
+   of decompressing all known types helps keep future options open.
+
+   One specific difference between Unicast DNS and Multicast DNS is that
+   Unicast DNS does not allow name compression for the target host in an
+   SRV record, because Unicast DNS implementations before the first SRV
+   specification in 1996 [RFC2052] may not decode these compressed
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 50]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   records properly.  Since all Multicast DNS implementations were
+   created after 1996, all Multicast DNS implementations are REQUIRED to
+   decode compressed SRV records correctly.
+
+   In legacy unicast responses generated to answer legacy queries, name
+   compression MUST NOT be performed on SRV records.
+
+19.  Summary of Differences between Multicast DNS and Unicast DNS
+
+   Multicast DNS shares, as much as possible, the familiar APIs, naming
+   syntax, resource record types, etc., of Unicast DNS.  There are, of
+   course, necessary differences by virtue of it using multicast, and by
+   virtue of it operating in a community of cooperating peers, rather
+   than a precisely defined hierarchy controlled by a strict chain of
+   formal delegations from the root.  These differences are summarized
+   below:
+
+   Multicast DNS...
+   * uses multicast
+   * uses UDP port 5353 instead of port 53
+   * operates in well-defined parts of the DNS namespace
+   * has no SOA (Start of Authority) records
+   * uses UTF-8, and only UTF-8, to encode resource record names
+   * allows names up to 255 bytes plus a terminating zero byte
+   * allows name compression in rdata for SRV and other record types
+   * allows larger UDP packets
+   * allows more than one question in a query message
+   * defines consistent results for qtype "ANY" and qclass "ANY" queries
+   * uses the Answer Section of a query to list Known Answers
+   * uses the TC bit in a query to indicate additional Known Answers
+   * uses the Authority Section of a query for probe tiebreaking
+   * ignores the Query ID field (except for generating legacy responses)
+   * doesn't require the question to be repeated in the response message
+   * uses unsolicited responses to announce new records
+   * uses NSEC records to signal nonexistence of records
+   * defines a unicast-response bit in the rrclass of query questions
+   * defines a cache-flush bit in the rrclass of response records
+   * uses DNS RR TTL 0 to indicate that a record has been deleted
+   * recommends AAAA records in the additional section when responding
+     to rrtype "A" queries, and vice versa
+   * monitors queries to perform Duplicate Question Suppression
+   * monitors responses to perform Duplicate Answer Suppression...
+   * ... and Ongoing Conflict Detection
+   * ... and Opportunistic Caching
+
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 51]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+20.  IPv6 Considerations
+
+   An IPv4-only host and an IPv6-only host behave as "ships that pass in
+   the night".  Even if they are on the same Ethernet, neither is aware
+   of the other's traffic.  For this reason, each physical link may have
+   *two* unrelated ".local." zones, one for IPv4 and one for IPv6.
+   Since for practical purposes, a group of IPv4-only hosts and a group
+   of IPv6-only hosts on the same Ethernet act as if they were on two
+   entirely separate Ethernet segments, it is unsurprising that their
+   use of the ".local." zone should occur exactly as it would if they
+   really were on two entirely separate Ethernet segments.
+
+   A dual-stack (v4/v6) host can participate in both ".local." zones,
+   and should register its name(s) and perform its lookups both using
+   IPv4 and IPv6.  This enables it to reach, and be reached by, both
+   IPv4-only and IPv6-only hosts.  In effect, this acts like a
+   multihomed host, with one connection to the logical "IPv4 Ethernet
+   segment", and a connection to the logical "IPv6 Ethernet segment".
+   When such a host generates NSEC records, if it is using the same host
+   name for its IPv4 addresses and its IPv6 addresses on that network
+   interface, its NSEC records should indicate that the host name has
+   both A and AAAA records.
+
+21.  Security Considerations
+
+   The algorithm for detecting and resolving name conflicts is, by its
+   very nature, an algorithm that assumes cooperating participants.  Its
+   purpose is to allow a group of hosts to arrive at a mutually disjoint
+   set of host names and other DNS resource record names, in the absence
+   of any central authority to coordinate this or mediate disputes.  In
+   the absence of any higher authority to resolve disputes, the only
+   alternative is that the participants must work together cooperatively
+   to arrive at a resolution.
+
+   In an environment where the participants are mutually antagonistic
+   and unwilling to cooperate, other mechanisms are appropriate, like
+   manually configured DNS.
+
+   In an environment where there is a group of cooperating participants,
+   but clients cannot be sure that there are no antagonistic hosts on
+   the same physical link, the cooperating participants need to use
+   IPsec signatures and/or DNSSEC [RFC4033] signatures so that they can
+   distinguish Multicast DNS messages from trusted participants (which
+   they process as usual) from Multicast DNS messages from untrusted
+   participants (which they silently discard).
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 52]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   If DNS queries for *global* DNS names are sent to the mDNS multicast
+   address (during network outages which disrupt communication with the
+   greater Internet) it is *especially* important to use DNSSEC, because
+   the user may have the impression that he or she is communicating with
+   some authentic host, when in fact he or she is really communicating
+   with some local host that is merely masquerading as that name.  This
+   is less critical for names ending with ".local.", because the user
+   should be aware that those names have only local significance and no
+   global authority is implied.
+
+   Most computer users neglect to type the trailing dot at the end of a
+   fully qualified domain name, making it a relative domain name (e.g.,
+   "www.example.com").  In the event of network outage, attempts to
+   positively resolve the name as entered will fail, resulting in
+   application of the search list, including ".local.", if present.  A
+   malicious host could masquerade as "www.example.com." by answering
+   the resulting Multicast DNS query for "www.example.com.local.".  To
+   avoid this, a host MUST NOT append the search suffix ".local.", if
+   present, to any relative (partially qualified) host name containing
+   two or more labels.  Appending ".local." to single-label relative
+   host names is acceptable, since the user should have no expectation
+   that a single-label host name will resolve as is.  However, users who
+   have both "example.com" and "local" in their search lists should be
+   aware that if they type "www" into their web browser, it may not be
+   immediately clear to them whether the page that appears is
+   "www.example.com" or "www.local".
+
+   Multicast DNS uses UDP port 5353.  On operating systems where only
+   privileged processes are allowed to use ports below 1024, no such
+   privilege is required to use port 5353.
+
+22.  IANA Considerations
+
+   IANA has allocated the UDP port 5353 for the Multicast DNS protocol
+   described in this document [SN].
+
+   IANA has allocated the IPv4 link-local multicast address 224.0.0.251
+   for the use described in this document [MC4].
+
+   IANA has allocated the IPv6 multicast address set FF0X::FB (where "X"
+   indicates any hexadecimal digit from '1' to 'F') for the use
+   described in this document [MC6].  Only address FF02::FB (link-local
+   scope) is currently in use by deployed software, but it is possible
+   that in the future implementers may experiment with Multicast DNS
+   using larger-scoped addresses, such as FF05::FB (site-local scope)
+   [RFC4291].
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 53]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   IANA has implemented the following DNS records:
+
+      MDNS.MCAST.NET.            IN  A    224.0.0.251
+      251.0.0.224.IN-ADDR.ARPA.  IN  PTR  MDNS.MCAST.NET.
+
+   Entries for the AAAA and corresponding PTR records have not been made
+   as there is not yet an RFC providing direction for the management of
+   the IP6.ARPA domain relating to the IPv6 multicast address space.
+
+   The reuse of the top bit of the rrclass field in the Question and
+   Resource Record Sections means that Multicast DNS can only carry DNS
+   records with classes in the range 0-32767.  Classes in the range
+   32768 to 65535 are incompatible with Multicast DNS.  IANA has noted
+   this fact, and if IANA receives a request to allocate a DNS class
+   value above 32767, IANA will make sure the requester is aware of this
+   implication before proceeding.  This does not mean that allocations
+   of DNS class values above 32767 should be denied, only that they
+   should not be allowed until the requester has indicated that they are
+   aware of how this allocation will interact with Multicast DNS.
+   However, to date, only three DNS classes have been assigned by IANA
+   (1, 3, and 4), and only one (1, "Internet") is actually in widespread
+   use, so this issue is likely to remain a purely theoretical one.
+
+   IANA has recorded the list of domains below as being Special-Use
+   Domain Names [RFC6761]:
+
+      .local.
+      .254.169.in-addr.arpa.
+      .8.e.f.ip6.arpa.
+      .9.e.f.ip6.arpa.
+      .a.e.f.ip6.arpa.
+      .b.e.f.ip6.arpa.
+
+22.1.  Domain Name Reservation Considerations
+
+   The six domains listed above, and any names falling within those
+   domains (e.g., "MyPrinter.local.", "34.12.254.169.in-addr.arpa.",
+   "Ink-Jet._pdl-datastream._tcp.local.") are special [RFC6761] in the
+   following ways:
+
+      1. Users may use these names as they would other DNS names,
+         entering them anywhere that they would otherwise enter a
+         conventional DNS name, or a dotted decimal IPv4 address, or a
+         literal IPv6 address.
+
+         Since there is no central authority responsible for assigning
+         dot-local names, and all devices on the local network are
+         equally entitled to claim any dot-local name, users SHOULD be
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 54]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+         aware of this and SHOULD exercise appropriate caution.  In an
+         untrusted or unfamiliar network environment, users SHOULD be
+         aware that using a name like "www.local" may not actually
+         connect them to the web site they expected, and could easily
+         connect them to a different web page, or even a fake or spoof
+         of their intended web site, designed to trick them into
+         revealing confidential information.  As always with networking,
+         end-to-end cryptographic security can be a useful tool.  For
+         example, when connecting with ssh, the ssh host key
+         verification process will inform the user if it detects that
+         the identity of the entity they are communicating with has
+         changed since the last time they connected to that name.
+
+      2. Application software may use these names as they would other
+         similar DNS names, and is not required to recognize the names
+         and treat them specially.  Due to the relative ease of spoofing
+         dot-local names, end-to-end cryptographic security remains
+         important when communicating across a local network, just as it
+         is when communicating across the global Internet.
+
+      3. Name resolution APIs and libraries SHOULD recognize these names
+         as special and SHOULD NOT send queries for these names to their
+         configured (unicast) caching DNS server(s).  This is to avoid
+         unnecessary load on the root name servers and other name
+         servers, caused by queries for which those name servers do not
+         have useful non-negative answers to give, and will not ever
+         have useful non-negative answers to give.
+
+      4. Caching DNS servers SHOULD recognize these names as special and
+         SHOULD NOT attempt to look up NS records for them, or otherwise
+         query authoritative DNS servers in an attempt to resolve these
+         names.  Instead, caching DNS servers SHOULD generate immediate
+         NXDOMAIN responses for all such queries they may receive (from
+         misbehaving name resolver libraries).  This is to avoid
+         unnecessary load on the root name servers and other name
+         servers.
+
+      5. Authoritative DNS servers SHOULD NOT by default be configurable
+         to answer queries for these names, and, like caching DNS
+         servers, SHOULD generate immediate NXDOMAIN responses for all
+         such queries they may receive.  DNS server software MAY provide
+         a configuration option to override this default, for testing
+         purposes or other specialized uses.
+
+      6. DNS server operators SHOULD NOT attempt to configure
+         authoritative DNS servers to act as authoritative for any of
+         these names.  Configuring an authoritative DNS server to act as
+         authoritative for any of these names may not, in many cases,
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 55]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+         yield the expected result.  Since name resolver libraries and
+         caching DNS servers SHOULD NOT send queries for those names
+         (see 3 and 4 above), such queries SHOULD be suppressed before
+         they even reach the authoritative DNS server in question, and
+         consequently it will not even get an opportunity to answer
+         them.
+
+      7. DNS Registrars MUST NOT allow any of these names to be
+         registered in the normal way to any person or entity.  These
+         names are reserved protocol identifiers with special meaning
+         and fall outside the set of names available for allocation by
+         registrars.  Attempting to allocate one of these names as if it
+         were a normal domain name will probably not work as desired,
+         for reasons 3, 4, and 6 above.
+
+23.  Acknowledgments
+
+   The concepts described in this document have been explored,
+   developed, and implemented with help from Ran Atkinson, Richard
+   Brown, Freek Dijkstra, Erik Guttman, Kyle McKay, Pasi Sarolahti,
+   Pekka Savola, Robby Simpson, Mark Townsley, Paul Vixie, Bill
+   Woodcock, and others.  Special thanks go to Bob Bradley, Josh
+   Graessley, Scott Herscher, Rory McGuire, Roger Pantos, and Kiren
+   Sekar for their significant contributions.  Special thanks also to
+   Kerry Lynn for converting the document to xml2rfc form in May 2010,
+   and to Area Director Ralph Droms for shepherding the document through
+   its final steps.
+
+24.  References
+
+24.1.  Normative References
+
+   [MC4]      IANA, "IPv4 Multicast Address Space Registry",
+              <http://www.iana.org/assignments/multicast-addresses/>.
+
+   [MC6]      IANA, "IPv6 Multicast Address Space Registry",
+              <http://www.iana.org/assignments/
+              ipv6-multicast-addresses/>.
+
+   [RFC0020]  Cerf, V., "ASCII format for network interchange", RFC 20,
+              October 1969.
+
+   [RFC1034]  Mockapetris, P., "Domain names - concepts and facilities",
+              STD 13, RFC 1034, November 1987.
+
+   [RFC1035]  Mockapetris, P., "Domain names - implementation and
+              specification", STD 13, RFC 1035, November 1987.
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 56]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   [RFC2119]  Bradner, S., "Key words for use in RFCs to Indicate
+              Requirement Levels", BCP 14, RFC 2119, March 1997.
+
+   [RFC3629]  Yergeau, F., "UTF-8, a transformation format of ISO
+              10646", STD 63, RFC 3629, November 2003.
+
+   [RFC4034]  Arends, R., Austein, R., Larson, M., Massey, D., and S.
+              Rose, "Resource Records for the DNS Security Extensions",
+              RFC 4034, March 2005.
+
+   [RFC5198]  Klensin, J. and M. Padlipsky, "Unicode Format for Network
+              Interchange", RFC 5198, March 2008.
+
+   [RFC6195]  Eastlake 3rd, D., "Domain Name System (DNS) IANA
+              Considerations", BCP 42, RFC 6195, March 2011.
+
+   [RFC6761]  Cheshire, S. and M. Krochmal, "Special-Use Domain Names",
+              RFC 6761, February 2013.
+
+   [SN]       IANA, "Service Name and Transport Protocol Port Number
+              Registry", <http://www.iana.org/assignments/
+              service-names-port-numbers/>.
+
+24.2.  Informative References
+
+   [B4W]      "Bonjour for Windows",
+              <http://en.wikipedia.org/wiki/Bonjour_(software)>.
+
+   [BJ]       Apple Bonjour Open Source Software,
+              <http://developer.apple.com/bonjour/>.
+
+   [IEEE.802.3]
+              "Information technology - Telecommunications and
+              information exchange between systems - Local and
+              metropolitan area networks - Specific requirements - Part
+              3: Carrier Sense Multiple Access with Collision Detection
+              (CMSA/CD) Access Method and Physical Layer
+              Specifications", IEEE Std 802.3-2008, December 2008,
+              <http://standards.ieee.org/getieee802/802.3.html>.
+
+   [IEEE.802.11]
+              "Information technology - Telecommunications and
+              information exchange between systems - Local and
+              metropolitan area networks - Specific requirements - Part
+              11: Wireless LAN Medium Access Control (MAC) and Physical
+              Layer (PHY) Specifications", IEEE Std 802.11-2007, June
+              2007, <http://standards.ieee.org/getieee802/802.11.html>.
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 57]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   [Jumbo]    "Ethernet Jumbo Frames", November 2009,
+              <http://www.ethernetalliance.org/library/whitepaper/
+              ethernet-jumbo-frames/>.
+
+   [NIAS]     Cheshire, S. "Discovering Named Instances of Abstract
+              Services using DNS", Work in Progress, July 2001.
+
+   [NSD]      "NsdManager | Android Developer", June 2012,
+              <http://developer.android.com/reference/
+              android/net/nsd/NsdManager.html>.
+
+   [RFC2052]  Gulbrandsen, A. and P. Vixie, "A DNS RR for specifying the
+              location of services (DNS SRV)", RFC 2052, October 1996.
+
+   [RFC2132]  Alexander, S. and R. Droms, "DHCP Options and BOOTP Vendor
+              Extensions", RFC 2132, March 1997.
+
+   [RFC2136]  Vixie, P., Ed., Thomson, S., Rekhter, Y., and J. Bound,
+              "Dynamic Updates in the Domain Name System (DNS UPDATE)",
+              RFC 2136, April 1997.
+
+   [RFC2181]  Elz, R. and R. Bush, "Clarifications to the DNS
+              Specification", RFC 2181, July 1997.
+
+   [RFC2535]  Eastlake 3rd, D., "Domain Name System Security
+              Extensions", RFC 2535, March 1999.
+
+   [RFC2671]  Vixie, P., "Extension Mechanisms for DNS (EDNS0)", RFC
+              2671, August 1999.
+
+   [RFC2845]  Vixie, P., Gudmundsson, O., Eastlake 3rd, D., and B.
+              Wellington, "Secret Key Transaction Authentication for DNS
+              (TSIG)", RFC 2845, May 2000.
+
+   [RFC2930]  Eastlake 3rd, D., "Secret Key Establishment for DNS (TKEY
+              RR)", RFC 2930, September 2000.
+
+   [RFC2931]  Eastlake 3rd, D., "DNS Request and Transaction Signatures
+              ( SIG(0)s )", RFC 2931, September 2000.
+
+   [RFC3007]  Wellington, B., "Secure Domain Name System (DNS) Dynamic
+              Update", RFC 3007, November 2000.
+
+   [RFC3492]  Costello, A., "Punycode: A Bootstring encoding of Unicode
+              for Internationalized Domain Names in Applications
+              (IDNA)", RFC 3492, March 2003.
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 58]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   [RFC3927]  Cheshire, S., Aboba, B., and E. Guttman, "Dynamic
+              Configuration of IPv4 Link-Local Addresses", RFC 3927, May
+              2005.
+
+   [RFC4033]  Arends, R., Austein, R., Larson, M., Massey, D., and S.
+              Rose, "DNS Security Introduction and Requirements", RFC
+              4033, March 2005.
+
+   [RFC4291]  Hinden, R. and S. Deering, "IP Version 6 Addressing
+              Architecture", RFC 4291, February 2006.
+
+   [RFC4795]  Aboba, B., Thaler, D., and L. Esibov, "Link-local
+              Multicast Name Resolution (LLMNR)", RFC 4795, January
+              2007.
+
+   [RFC4861]  Narten, T., Nordmark, E., Simpson, W., and H. Soliman,
+              "Neighbor Discovery for IP version 6 (IPv6)", RFC 4861,
+              September 2007.
+
+   [RFC4862]  Thomson, S., Narten, T., and T. Jinmei, "IPv6 Stateless
+              Address Autoconfiguration", RFC 4862, September 2007.
+
+   [RFC5226]  Narten, T. and H. Alvestrand, "Guidelines for Writing an
+              IANA Considerations Section in RFCs", BCP 26, RFC 5226,
+              May 2008.
+
+   [RFC5890]  Klensin, J., "Internationalized Domain Names for
+              Applications (IDNA): Definitions and Document Framework",
+              RFC 5890, August 2010.
+
+   [RFC6281]  Cheshire, S., Zhu, Z., Wakikawa, R., and L. Zhang,
+              "Understanding Apple's Back to My Mac (BTMM) Service", RFC
+              6281, June 2011.
+
+   [RFC6760]  Cheshire, S. and M. Krochmal, "Requirements for a Protocol
+              to Replace the AppleTalk Name Binding Protocol (NBP)", RFC
+              6760, February 2013.
+
+   [RFC6763]  Cheshire, S. and M. Krochmal, "DNS-Based Service
+              Discovery", RFC 6763, February 2013.
+
+   [Zeroconf] Cheshire, S. and D. Steinberg, "Zero Configuration
+              Networking: The Definitive Guide", O'Reilly Media, Inc.,
+              ISBN 0-596-10100-7, December 2005.
+
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 59]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+Appendix A.  Design Rationale for Choice of UDP Port Number
+
+   Arguments were made for and against using UDP port 53, the standard
+   Unicast DNS port.  Some of the arguments are given below.  The
+   arguments for using a different port were greater in number and more
+   compelling, so that option was ultimately selected.  The UDP port
+   "5353" was selected for its mnemonic similarity to "53".
+
+   Arguments for using UDP port 53:
+
+   * This is "just DNS", so it should be the same port.
+
+   * There is less work to be done updating old resolver libraries to do
+     simple Multicast DNS queries.  Only the destination address need be
+     changed.  In some cases, this can be achieved without any code
+     changes, just by adding the address 224.0.0.251 to a configuration
+     file.
+
+   Arguments for using a different port (UDP port 5353):
+
+   * This is not "just DNS".  This is a DNS-like protocol, but
+     different.
+
+   * Changing resolver library code to use a different port number is
+     not hard.  In some cases, this can be achieved without any code
+     changes, just by adding the address 224.0.0.251:5353 to a
+     configuration file.
+
+   * Using the same port number makes it hard to run a Multicast DNS
+     responder and a conventional Unicast DNS server on the same
+     machine.  If a conventional Unicast DNS server wishes to implement
+     Multicast DNS as well, it can still do that, by opening two
+     sockets.  Having two different port numbers allows this
+     flexibility.
+
+   * Some VPN software hijacks all outgoing traffic to port 53 and
+     redirects it to a special DNS server set up to serve those VPN
+     clients while they are connected to the corporate network.  It is
+     questionable whether this is the right thing to do, but it is
+     common, and redirecting link-local multicast DNS packets to a
+     remote server rarely produces any useful results.  It does mean,
+     for example, that a user of such VPN software becomes unable to
+     access their local network printer sitting on their desk right next
+     to their computer.  Using a different UDP port helps avoid this
+     particular problem.
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 60]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   * On many operating systems, unprivileged software may not send or
+     receive packets on low-numbered ports.  This means that any
+     software sending or receiving Multicast DNS packets on port 53
+     would have to run as "root", which is an undesirable security risk.
+     Using a higher-numbered UDP port avoids this restriction.
+
+Appendix B.  Design Rationale for Not Using Hashed Multicast Addresses
+
+   Some discovery protocols use a range of multicast addresses, and
+   determine the address to be used by a hash function of the name being
+   sought.  Queries are sent via multicast to the address as indicated
+   by the hash function, and responses are returned to the querier via
+   unicast.  Particularly in IPv6, where multicast addresses are
+   extremely plentiful, this approach is frequently advocated.  For
+   example, IPv6 Neighbor Discovery [RFC4861] sends Neighbor
+   Solicitation messages to the "solicited-node multicast address",
+   which is computed as a function of the solicited IPv6 address.
+
+   There are some disadvantages to using hashed multicast addresses like
+   this in a service discovery protocol:
+
+   * When a host has a large number of records with different names, the
+     host may have to join a large number of multicast groups.  Each
+     time a host joins or leaves a multicast group, this results in
+     Internet Group Management Protocol (IGMP) or Multicast Listener
+     Discovery (MLD) traffic on the network announcing this fact.
+     Joining a large number of multicast groups can place undue burden
+     on the Ethernet hardware, which typically supports a limited number
+     of multicast addresses efficiently.  When this number is exceeded,
+     the Ethernet hardware may have to resort to receiving all
+     multicasts and passing them up to the host networking code for
+     filtering in software, thereby defeating much of the point of using
+     a multicast address range in the first place.  Finally, many IPv6
+     stacks have a fixed limit IPV6_MAX_MEMBERSHIPS, and the code simply
+     fails with an error if a client attempts to exceed this limit.
+     Common values for IPV6_MAX_MEMBERSHIPS are 20 or 31.
+
+   * Multiple questions cannot be placed in one packet if they don't all
+     hash to the same multicast address.
+
+   * Duplicate Question Suppression doesn't work if queriers are not
+     seeing each other's queries.
+
+   * Duplicate Answer Suppression doesn't work if responders are not
+     seeing each other's responses.
+
+   * Opportunistic Caching doesn't work.
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 61]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   * Ongoing Conflict Detection doesn't work.
+
+Appendix C.  Design Rationale for Maximum Multicast DNS Name Length
+
+   Multicast DNS names may be up to 255 bytes long (in the on-the-wire
+   message format), not counting the terminating zero byte at the end.
+
+   "Domain Names - Implementation and Specification" [RFC1035] says:
+
+      Various objects and parameters in the DNS have size limits.  They
+      are listed below.  Some could be easily changed, others are more
+      fundamental.
+
+      labels          63 octets or less
+
+      names           255 octets or less
+
+      ...
+
+      the total length of a domain name (i.e., label octets and label
+      length octets) is restricted to 255 octets or less.
+
+   This text does not state whether this 255-byte limit includes the
+   terminating zero at the end of every name.
+
+   Several factors lead us to conclude that the 255-byte limit does
+   *not* include the terminating zero:
+
+   o It is common in software engineering to have size limits that are a
+     power of two, or a multiple of a power of two, for efficiency.  For
+     example, an integer on a modern processor is typically 2, 4, or 8
+     bytes, not 3 or 5 bytes.  The number 255 is not a power of two, nor
+     is it to most people a particularly noteworthy number.  It is
+     noteworthy to computer scientists for only one reason -- because it
+     is exactly one *less* than a power of two.  When a size limit is
+     exactly one less than a power of two, that suggests strongly that
+     the one extra byte is being reserved for some specific reason -- in
+     this case reserved, perhaps, to leave room for a terminating zero
+     at the end.
+
+   o In the case of DNS label lengths, the stated limit is 63 bytes.  As
+     with the total name length, this limit is exactly one less than a
+     power of two.  This label length limit also excludes the label
+     length byte at the start of every label.  Including that extra
+     byte, a 63-byte label takes 64 bytes of space in memory or in a DNS
+     message.
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 62]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   o It is common in software engineering for the semantic "length" of
+     an object to be one less than the number of bytes it takes to store
+     that object.  For example, in C, strlen("foo") is 3, but
+     sizeof("foo") (which includes the terminating zero byte at the end)
+     is 4.
+
+   o The text describing the total length of a domain name mentions
+     explicitly that label length and data octets are included, but does
+     not mention the terminating zero at the end.  The zero byte at the
+     end of a domain name is not a label length.  Indeed, the value zero
+     is chosen as the terminating marker precisely because it is not a
+     legal length byte value -- DNS prohibits empty labels.  For
+     example, a name like "bad..name." is not a valid domain name
+     because it contains a zero-length label in the middle, which cannot
+     be expressed in a DNS message, because software parsing the message
+     would misinterpret a zero label-length byte as being a zero "end of
+     name" marker instead.
+
+   Finally, "Clarifications to the DNS Specification" [RFC2181] offers
+   additional confirmation that, in the context of DNS specifications,
+   the stated "length" of a domain name does not include the terminating
+   zero byte at the end.  That document refers to the root name, which
+   is typically written as "." and is represented in a DNS message by a
+   single lone zero byte (i.e., zero bytes of data plus a terminating
+   zero), as the "zero length full name":
+
+      The zero length full name is defined as representing the root of
+      the DNS tree, and is typically written and displayed as ".".
+
+   This wording supports the interpretation that, in a DNS context, when
+   talking about lengths of names, the terminating zero byte at the end
+   is not counted.  If the root name (".") is considered to be zero
+   length, then to be consistent, the length (for example) of "org" has
+   to be 4 and the length of "ietf.org" has to be 9, as shown below:
+
+                                                  ------
+                                                 | 0x00 |   length = 0
+                                                  ------
+
+                             ------------------   ------
+                            | 0x03 | o | r | g | | 0x00 |   length = 4
+                             ------------------   ------
+
+      -----------------------------------------   ------
+     | 0x04 | i | e | t | f | 0x03 | o | r | g | | 0x00 |   length = 9
+      -----------------------------------------   ------
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 63]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   This means that the maximum length of a domain name, as represented
+   in a Multicast DNS message, up to but not including the final
+   terminating zero, must not exceed 255 bytes.
+
+   However, many Unicast DNS implementers have read these RFCs
+   differently, and argue that the 255-byte limit does include the
+   terminating zero, and that the "Clarifications to the DNS
+   Specification" [RFC2181] statement that "." is the "zero length full
+   name" was simply a mistake.
+
+   Hence, implementers should be aware that other Unicast DNS
+   implementations may limit the maximum domain name to 254 bytes plus a
+   terminating zero, depending on how that implementer interpreted the
+   DNS specifications.
+
+   Compliant Multicast DNS implementations MUST support names up to 255
+   bytes plus a terminating zero, i.e., 256 bytes total.
+
+Appendix D.  Benefits of Multicast Responses
+
+   Some people have argued that sending responses via multicast is
+   inefficient on the network.  In fact, using multicast responses can
+   result in a net lowering of overall multicast traffic for a variety
+   of reasons, and provides other benefits too:
+
+   * Opportunistic Caching.  One multicast response can update the
+     caches on all machines on the network.  If another machine later
+     wants to issue the same query, and it already has the answer in its
+     cache, it may not need to even transmit that multicast query on the
+     network at all.
+
+   * Duplicate Query Suppression.  When more than one machine has the
+     same ongoing long-lived query running, every machine does not have
+     to transmit its own independent query.  When one machine transmits
+     a query, all the other hosts see the answers, so they can suppress
+     their own queries.
+
+   * Passive Observation Of Failures (POOF).  When a host sees a
+     multicast query, but does not see the corresponding multicast
+     response, it can use this information to promptly delete stale data
+     from its cache.  To achieve the same level of user-interface
+     quality and responsiveness without multicast responses would
+     require lower cache lifetimes and more frequent network polling,
+     resulting in a higher packet rate.
+
+   * Passive Conflict Detection.  Just because a name has been
+     previously verified to be unique does not guarantee it will
+     continue to be so indefinitely.  By allowing all Multicast DNS
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 64]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+     responders to constantly monitor their peers' responses, conflicts
+     arising out of network topology changes can be promptly detected
+     and resolved.  If responses were not sent via multicast, some other
+     conflict detection mechanism would be needed, imposing its own
+     additional burden on the network.
+
+   * Use on devices with constrained memory resources: When using
+     delayed responses to reduce network collisions, responders need to
+     maintain a list recording to whom each answer should be sent.  The
+     option of multicast responses allows responders with limited
+     storage, which cannot store an arbitrarily long list of response
+     addresses, to choose to fail-over to a single multicast response in
+     place of multiple unicast responses, when appropriate.
+
+   * Overlayed Subnets.  In the case of overlayed subnets, multicast
+     responses allow a receiver to know with certainty that a response
+     originated on the local link, even when its source address may
+     apparently suggest otherwise.
+
+   * Robustness in the face of misconfiguration: Link-local multicast
+     transcends virtually every conceivable network misconfiguration.
+     Even if you have a collection of devices where every device's IP
+     address, subnet mask, default gateway, and DNS server address are
+     all wrong, packets sent by any of those devices addressed to a
+     link-local multicast destination address will still be delivered to
+     all peers on the local link.  This can be extremely helpful when
+     diagnosing and rectifying network problems, since it facilitates a
+     direct communication channel between client and server that works
+     without reliance on ARP, IP routing tables, etc.  Being able to
+     discover what IP address a device has (or thinks it has) is
+     frequently a very valuable first step in diagnosing why it is
+     unable to communicate on the local network.
+
+Appendix E.  Design Rationale for Encoding Negative Responses
+
+   Alternative methods of asserting nonexistence were considered, such
+   as using an NXDOMAIN response, or emitting a resource record with
+   zero-length rdata.
+
+   Using an NXDOMAIN response does not work well with Multicast DNS.  A
+   Unicast DNS NXDOMAIN response applies to the entire message, but for
+   efficiency Multicast DNS allows (and encourages) multiple responses
+   in a single message.  If the error code in the header were NXDOMAIN,
+   it would not be clear to which name(s) that error code applied.
+
+   Asserting nonexistence by emitting a resource record with zero-length
+   rdata would mean that there would be no way to differentiate between
+   a record that doesn't exist, and a record that does exist, with zero-
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 65]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   length rdata.  By analogy, most file systems today allow empty files,
+   so a file that exists with zero bytes of data is not considered
+   equivalent to a filename that does not exist.
+
+   A benefit of asserting nonexistence through NSEC records instead of
+   through NXDOMAIN responses is that NSEC records can be added to the
+   Additional Section of a DNS response to offer additional information
+   beyond what the querier explicitly requested.  For example, in
+   response to an SRV query, a responder should include A record(s)
+   giving its IPv4 addresses in the Additional Section, and an NSEC
+   record indicating which other types it does or does not have for this
+   name.  If the responder is running on a host that does not support
+   IPv6 (or does support IPv6 but currently has no IPv6 address on that
+   interface) then this NSEC record in the Additional Section will
+   indicate this absence of AAAA records.  In effect, the responder is
+   saying, "Here's my SRV record, and here are my IPv4 addresses, and
+   no, I don't have any IPv6 addresses, so don't waste your time
+   asking".  Without this information in the Additional Section, it
+   would take the querier an additional round-trip to perform an
+   additional query to ascertain that the target host has no AAAA
+   records.  (Arguably Unicast DNS could also benefit from this ability
+   to express nonexistence in the Additional Section, but that is
+   outside the scope of this document.)
+
+Appendix F.  Use of UTF-8
+
+   After many years of debate, as a result of the perceived need to
+   accommodate certain DNS implementations that apparently couldn't
+   handle any character that's not a letter, digit, or hyphen (and
+   apparently never would be updated to remedy this limitation), the
+   Unicast DNS community settled on an extremely baroque encoding called
+   "Punycode" [RFC3492].  Punycode is a remarkably ingenious encoding
+   solution, but it is complicated, hard to understand, and hard to
+   implement, using sophisticated techniques including insertion unsort
+   coding, generalized variable-length integers, and bias adaptation.
+   The resulting encoding is remarkably compact given the constraints,
+   but it's still not as good as simple straightforward UTF-8, and it's
+   hard even to predict whether a given input string will encode to a
+   Punycode string that fits within DNS's 63-byte limit, except by
+   simply trying the encoding and seeing whether it fits.  Indeed, the
+   encoded size depends not only on the input characters, but on the
+   order they appear, so the same set of characters may or may not
+   encode to a legal Punycode string that fits within DNS's 63-byte
+   limit, depending on the order the characters appear.  This is
+   extremely hard to present in a user interface that explains to users
+   why one name is allowed, but another name containing the exact same
+   characters is not.  Neither Punycode nor any other of the "ASCII-
+   Compatible Encodings" [RFC5890] proposed for Unicast DNS may be used
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 66]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   in Multicast DNS messages.  Any text being represented internally in
+   some other representation must be converted to canonical precomposed
+   UTF-8 before being placed in any Multicast DNS message.
+
+Appendix G.  Private DNS Namespaces
+
+   The special treatment of names ending in ".local." has been
+   implemented in Macintosh computers since the days of Mac OS 9, and
+   continues today in Mac OS X and iOS.  There are also implementations
+   for Microsoft Windows [B4W], Linux, and other platforms.
+
+   Some network operators setting up private internal networks
+   ("intranets") have used unregistered top-level domains, and some may
+   have used the ".local" top-level domain.  Using ".local" as a private
+   top-level domain conflicts with Multicast DNS and may cause problems
+   for users.  Clients can be configured to send both Multicast and
+   Unicast DNS queries in parallel for these names, and this does allow
+   names to be looked up both ways, but this results in additional
+   network traffic and additional delays in name resolution, as well as
+   potentially creating user confusion when it is not clear whether any
+   given result was received via link-local multicast from a peer on the
+   same link, or from the configured unicast name server.  Because of
+   this, we recommend against using ".local" as a private Unicast DNS
+   top-level domain.  We do not recommend use of unregistered top-level
+   domains at all, but should network operators decide to do this, the
+   following top-level domains have been used on private internal
+   networks without the problems caused by trying to reuse ".local." for
+   this purpose:
+
+      .intranet.
+      .internal.
+      .private.
+      .corp.
+      .home.
+      .lan.
+
+Appendix H.  Deployment History
+
+   In July 1997, in an email to the net-thinkers@thumper.vmeng.com
+   mailing list, Stuart Cheshire first proposed the idea of running the
+   AppleTalk Name Binding Protocol [RFC6760] over IP.  As a result of
+   this and related IETF discussions, the IETF Zeroconf working group
+   was chartered September 1999.  After various working group
+   discussions and other informal IETF discussions, several Internet-
+   Drafts were written that were loosely related to the general themes
+   of DNS and multicast, but did not address the service discovery
+   aspect of NBP.
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 67]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   In April 2000, Stuart Cheshire registered IPv4 multicast address
+   224.0.0.251 with IANA [MC4] and began writing code to test and
+   develop the idea of performing NBP-like service discovery using
+   Multicast DNS, which was documented in a group of three Internet-
+   Drafts:
+
+   o "Requirements for a Protocol to Replace the AppleTalk Name Binding
+     Protocol (NBP)" [RFC6760] is an overview explaining the AppleTalk
+     Name Binding Protocol, because many in the IETF community had
+     little first-hand experience using AppleTalk, and confusion in the
+     IETF community about what AppleTalk NBP did was causing confusion
+     about what would be required in an IP-based replacement.
+
+   o "Discovering Named Instances of Abstract Services using DNS" [NIAS]
+     proposed a way to perform NBP-like service discovery using DNS-
+     compatible names and record types.
+
+   o "Multicast DNS" (this document) specifies a way to transport those
+     DNS-compatible queries and responses using IP multicast, for zero-
+     configuration environments where no conventional Unicast DNS server
+     was available.
+
+   In 2001, an update to Mac OS 9 added resolver library support for
+   host name lookup using Multicast DNS.  If the user typed a name such
+   as "MyPrinter.local." into any piece of networking software that used
+   the standard Mac OS 9 name lookup APIs, then those name lookup APIs
+   would recognize the name as a dot-local name and query for it by
+   sending simple one-shot Multicast DNS queries to 224.0.0.251:5353.
+   This enabled the user to, for example, enter the name
+   "MyPrinter.local." into their web browser in order to view a
+   printer's status and configuration web page, or enter the name
+   "MyPrinter.local." into the printer setup utility to create a print
+   queue for printing documents on that printer.
+
+   Multicast DNS responder software, with full service discovery, first
+   began shipping to end users in volume with the launch of Mac OS X
+   10.2 "Jaguar" in August 2002, and network printer makers (who had
+   historically supported AppleTalk in their network printers and were
+   receptive to IP-based technologies that could offer them similar
+   ease-of-use) started adopting Multicast DNS shortly thereafter.
+
+   In September 2002, Apple released the source code for the
+   mDNSResponder daemon as Open Source under Apple's standard Apple
+   Public Source License (APSL).
+
+   Multicast DNS responder software became available for Microsoft
+   Windows users in June 2004 with the launch of Apple's "Rendezvous for
+   Windows" (now "Bonjour for Windows"), both in executable form (a
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 68]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+   downloadable installer for end users) and as Open Source (one of the
+   supported platforms within Apple's body of cross-platform code in the
+   publicly accessible mDNSResponder CVS source code repository) [BJ].
+
+   In August 2006, Apple re-licensed the cross-platform mDNSResponder
+   source code under the Apache License, Version 2.0.
+
+   In addition to desktop and laptop computers running Mac OS X and
+   Microsoft Windows, Multicast DNS is now implemented in a wide range
+   of hardware devices, such as Apple's "AirPort" wireless base
+   stations, iPhone and iPad, and in home gateways from other vendors,
+   network printers, network cameras, TiVo DVRs, etc.
+
+   The Open Source community has produced many independent
+   implementations of Multicast DNS, some in C like Apple's
+   mDNSResponder daemon, and others in a variety of different languages
+   including Java, Python, Perl, and C#/Mono.
+
+   In January 2007, the IETF published the Informational RFC "Link-Local
+   Multicast Name Resolution (LLMNR)" [RFC4795], which is substantially
+   similar to Multicast DNS, but incompatible in some small but
+   important ways.  In particular, the LLMNR design explicitly excluded
+   support for service discovery, which made it an unsuitable candidate
+   for a protocol to replace AppleTalk NBP [RFC6760].
+
+   While the original focus of Multicast DNS and DNS-Based Service
+   Discovery was for zero-configuration environments without a
+   conventional Unicast DNS server, DNS-Based Service Discovery also
+   works using Unicast DNS servers, using DNS Update [RFC2136] [RFC3007]
+   to create service discovery records and standard DNS queries to query
+   for them.  Apple's Back to My Mac service, launched with Mac OS X
+   10.5 "Leopard" in October 2007, uses DNS-Based Service Discovery over
+   Unicast DNS [RFC6281].
+
+   In June 2012, Google's Android operating system added native support
+   for DNS-SD and Multicast DNS with the android.net.nsd.NsdManager
+   class in Android 4.1 "Jelly Bean" (API Level 16) [NSD].
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 69]
+
+RFC 6762                      Multicast DNS                February 2013
+
+
+Authors' Addresses
+
+   Stuart Cheshire
+   Apple Inc.
+   1 Infinite Loop
+   Cupertino, CA  95014
+   USA
+
+   Phone: +1 408 974 3207
+   EMail: cheshire@apple.com
+
+
+   Marc Krochmal
+   Apple Inc.
+   1 Infinite Loop
+   Cupertino, CA  95014
+   USA
+
+   Phone: +1 408 974 4368
+   EMail: marc@apple.com
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Cheshire & Krochmal          Standards Track                   [Page 70]
+
diff --git a/include/ZT1Service.h b/include/ZT1Service.h
index 5cd0694..a55c088 100644
--- a/include/ZT1Service.h
+++ b/include/ZT1Service.h
@@ -103,7 +103,7 @@ void disableTaps();
  * @param addrlen
  * @return
  */
-void zts_get_ipv4_address(const char *nwid, char *addrstr, const int addrlen);
+void zts_get_ipv4_address(const char *nwid, char *addrstr, const size_t addrlen);
 
 /**
  * @brief Gets the VirtualTap's (interface) IPv6 address
@@ -114,7 +114,7 @@ void zts_get_ipv4_address(const char *nwid, char *addrstr, const int addrlen);
  * @param addrlen
  * @return
  */
-void zts_get_ipv6_address(const char *nwid, char *addrstr, const int addrlen);
+void zts_get_ipv6_address(const char *nwid, char *addrstr, const size_t addrlen);
 
 /**
  * @brief Returns whether the VirtualTap has an assigned IPv4 address
@@ -198,9 +198,9 @@ int zts_running();
  * that one call this at the beginning of your application code since it may take several seconds to fully
  * come online.
  * @param path Where this instance of ZeroTier will store its identity and configuration files
- * @return
+ * @return Returns 1 if ZeroTier is currently running, and 0 if it is not
  */
-void zts_start(const char *path);
+int zts_start(const char *path);
 
 /**
  * @brief Alternative to zts_start(). Start an instance of libzt, wait for an address to be issues, and join
@@ -211,15 +211,15 @@ void zts_start(const char *path);
  * come online.
  * @param path
  * @param nwid A 16-digit hexidecimal virtual network ID
- * @return
+ * @return Returns 0 on success, -1 on failure
  */
-void zts_simple_start(const char *path, const char *nwid);
+int zts_simple_start(const char *path, const char *nwid);
 
 /**
  * @brief Stops libzt (ZeroTier core services, stack drivers, stack threads, etc)
  *
  * @usage This should be called at the end of your program or when you do not anticipate communicating over ZeroTier
- * @return
+ * @return Returns 0 on success, -1 on failure
  */
 void zts_stop();
 
@@ -231,7 +231,7 @@ void zts_stop();
  * @param len
  * @return
  */
-void zts_get_homepath(char *homePath, int len);
+void zts_get_homepath(char *homePath, size_t len);
 
 /**
  * @brief Copies the hexidecimal representation of this nodeID into the provided buffer
diff --git a/include/libzt.h b/include/libzt.h
index 5fa678b..4cd5471 100644
--- a/include/libzt.h
+++ b/include/libzt.h
@@ -53,7 +53,6 @@ extern "C" {
 #endif
 
 // forward declarations from ZT1Service.h
-void zts_simple_start(const char *path, const char *nwid);
 int zts_get_device_id(char *devID);
 
 void init_network_stack();
@@ -71,7 +70,7 @@ void init_network_stack();
  * @param nwid A 16-digit hexidecimal network identifier (e.g. Earth: `8056c2e21c000001`)
  * @return 0 if successful; or 1 if failed
  */
-void zts_start(const char *path);
+int zts_start(const char *path);
 
 /**
  * @brief Starts libzt
@@ -86,7 +85,7 @@ void zts_start(const char *path);
  * @param nwid A 16-digit hexidecimal network identifier (e.g. Earth: `8056c2e21c000001`)
  * @return 0 if successful; or 1 if failed
  */
-void zts_simple_start(const char *path, const char *nwid);
+int zts_simple_start(const char *path, const char *nwid);
 
 /**
  * @brief Stops the ZeroTier core service and disconnects from all virtual networks
@@ -143,7 +142,7 @@ void zts_leave_soft(const char * filepath, const char * nwid);
  * @param len
  * @return
  */
-void zts_get_homepath(char *homePath, const int len);
+void zts_get_homepath(char *homePath, const size_t len);
 
 /**
  * @brief Get device ID (10-digit hex + NULL byte)
@@ -198,7 +197,7 @@ int zts_has_address(const char *nwid);
  * @param addrlen
  * @return
  */
-void zts_get_ipv4_address(const char *nwid, char *addrstr, const int addrlen);
+void zts_get_ipv4_address(const char *nwid, char *addrstr, const size_t addrlen);
 
 /**
  * @brief Get IPV6 Address for this device on a given network
@@ -209,7 +208,7 @@ void zts_get_ipv4_address(const char *nwid, char *addrstr, const int addrlen);
  * @param addrlen
  * @return
  */
-void zts_get_ipv6_address(const char *nwid, char *addrstr, const int addrlen);
+void zts_get_ipv6_address(const char *nwid, char *addrstr, const size_t addrlen);
 
 /**
  * @brief Returns a 6PLANE IPv6 address given a network ID and zerotier ID
diff --git a/src/VirtualTap.hpp b/src/VirtualTap.hpp
index 6ad6a7c..dd61e5c 100644
--- a/src/VirtualTap.hpp
+++ b/src/VirtualTap.hpp
@@ -206,7 +206,7 @@ namespace ZeroTier {
 		char vtap_abbr_name[16];
 
 		static int devno;
-		int ifindex = 0;
+		size_t ifindex = 0;
 
 		std::vector<InetAddress> ips() const;
 		std::vector<InetAddress> _ips;
diff --git a/src/ZT1Service.cpp b/src/ZT1Service.cpp
index 085ba4a..b8dd44c 100644
--- a/src/ZT1Service.cpp
+++ b/src/ZT1Service.cpp
@@ -258,7 +258,7 @@ void disableTaps()
 	ZeroTier::_vtaps_lock.unlock();
 }
 
-void zts_get_ipv4_address(const char *nwid, char *addrstr, const int addrlen)
+void zts_get_ipv4_address(const char *nwid, char *addrstr, const size_t addrlen)
 {
 	if (ZeroTier::zt1Service) {
 		uint64_t nwid_int = strtoull(nwid, NULL, 16);
@@ -280,7 +280,7 @@ void zts_get_ipv4_address(const char *nwid, char *addrstr, const int addrlen)
 		memcpy(addrstr, "\0", 1);
 }
 
-void zts_get_ipv6_address(const char *nwid, char *addrstr, const int addrlen)
+void zts_get_ipv6_address(const char *nwid, char *addrstr, size_t addrlen)
 {
 	if (ZeroTier::zt1Service) {
 		uint64_t nwid_int = strtoull(nwid, NULL, 16);
@@ -391,10 +391,10 @@ int zts_running() {
 	return ZeroTier::zt1Service == NULL ? false : ZeroTier::zt1Service->isRunning();
 }
 
-void zts_start(const char *path)
+int zts_start(const char *path)
 {
 	if (ZeroTier::zt1Service) {
-		return;
+		return 0; // already initialized, ok
 	}
 	if (path) {
 		ZeroTier::homeDir = path;
@@ -403,12 +403,12 @@ void zts_start(const char *path)
 		WSAStartup(MAKEWORD(2, 2), &wsaData); // initialize WinSock. Used in Phy for loopback pipe
 #endif
 	pthread_t service_thread;
-	pthread_create(&service_thread, NULL, zts_start_service, NULL);
+	return pthread_create(&service_thread, NULL, zts_start_service, NULL);
 }
 
-void zts_simple_start(const char *path, const char *nwid)
+int zts_simple_start(const char *path, const char *nwid)
 {
-	zts_start(path);
+	int err = zts_start(path);
 	while (zts_running() == false) {
 		DEBUG_EXTRA("waiting for service to start");
 		nanosleep((const struct timespec[]) {{0, (ZTO_WRAPPER_CHECK_INTERVAL * 1000000)}}, NULL);
@@ -426,6 +426,7 @@ void zts_simple_start(const char *path, const char *nwid)
 	while (zts_has_address(nwid) == false) {
 		nanosleep((const struct timespec[]) {{0, (ZTO_WRAPPER_CHECK_INTERVAL * 1000000)}}, NULL);
 	}
+	return err;
 }
 
 void zts_stop() {
@@ -438,10 +439,10 @@ void zts_stop() {
 #endif
 }
 
-void zts_get_homepath(char *homePath, int len) {
+void zts_get_homepath(char *homePath, size_t len) {
 	if (ZeroTier::homeDir.length()) {
 		memset(homePath, 0, len);
-		int buf_len = len < ZeroTier::homeDir.length() ? len : ZeroTier::homeDir.length();
+		size_t buf_len = len < ZeroTier::homeDir.length() ? len : ZeroTier::homeDir.length();
 		memcpy(homePath, ZeroTier::homeDir.c_str(), buf_len);
 	}
 }