tcp拥塞算法分析五（vegas）

本文分析linux-4.14.69代码的vegas拥塞算法

传统的基于丢包的拥塞算法（例如cubic,reno）都是（填满网络直到）有丢包了就认为出现拥塞．vegas会根据rtt来计算网络吞吐量和当前实际传输量，来判断是否出现拥塞．

关键变量介绍

/* Vegas variables */
struct vegas {
	u32	beg_snd_nxt;	/* right edge during last RTT */
	u32	beg_snd_una;	/* left edge  during last RTT */
	u32	beg_snd_cwnd;	/* saves the size of the cwnd */
	u8	doing_vegas_now;/* if true, do vegas for this RTT */
	u16	cntRTT;		/* # of RTTs measured within last RTT */
	u32	minRTT;		/* min of RTTs measured within last RTT (in usec) */
	u32	baseRTT;	/* the min of all Vegas RTT measurements seen (in usec) */
};

static int alpha = 2;
static int beta  = 4;
static int gamma = 1;

beg_snd_nxt：vegas把一个rtt来回看作一轮．beg_snd_nxt指向这一轮的右边界. 每次收到ack调用tcp_vegas_cong_avoid时，会判断ack序号，只有ack是新一轮的，即ack＞beg_snd_nxt才会走vegas流程，并更新beg_snd_nxt，保证一个rtt进行一次vegas窗口调整．如下图，当收到ack1（假设ack1>beg_snd_nxt）,会更新beg_snd_nxt指向seq5报文下一个字节，在后续收到ack2,到ack5的时候，都会认为是旧一轮的ack，只有在收到ack6的时候，才会走vegas算法流程．

cntRTT：最近一轮里面的rtt采样个数．每次收到ack就会调用tcp_vegas_cong_avoid和tcp_vegas_pkts_acked． tcp_vegas_cong_avoid检测到是新ack，就会把cntRTT清０，tcp_vegas_pkts_acked只管把cntRTT＋１．

minRTT：最近一轮最小的rtt．每次收到ack就会调用tcp_vegas_cong_avoid和tcp_vegas_pkts_acked． tcp_vegas_cong_avoid检测到是新ack，就会把minRTT设置为最大值，tcp_vegas_pkts_acked只管把采样最小的rtt赋值给minRTT．

baseRTT:是连接开始以来最小的rtt.和minRTT相比，少了tcp_vegas_cong_avoid中的设置最大值的过程．

target_cwnd: tp->snd_cwnd * baseRTT / minRTT. 计算出＂我们拥有的窗口＂，也就是网络的bdp

diff: tp->snd_cwnd * (minRTT-baseRTT) / baseRTT. 表示＂我们拥有的窗口＂与＂我们想要拥有的窗口＂的差值. 看公式意思貌似是通过snd_cwnd/baseRTT计算带宽bw，然后用bw＊minRTT和bw"baseRTT分别计算想拥有的窗口"和"拥有的窗口".

gamma: 用来限制慢启动的时候长太快．当慢启动的时候检测到diff > gamma，就停止慢启动，而是把当前窗口设置为target_cwnd＋１

beta: 用来限制拥塞避免长太快．当拥塞避免时候diff>beta，就把当前窗口减１

alpha：用来限制拥塞避免长太慢．当拥塞避免时候diff<alpha，就把当前窗口加１

关键函数

tcp_vegas_pkts_acked更新最近一轮的最小rtt－－minRTT，已经历史以来的最小rtt--baseRTT

/* Do RTT sampling needed for Vegas.
 * Basically we:
 *   o min-filter RTT samples from within an RTT to get the current
 *     propagation delay + queuing delay (we are min-filtering to try to
 *     avoid the effects of delayed ACKs)
 *   o min-filter RTT samples from a much longer window (forever for now)
 *     to find the propagation delay (baseRTT)
 */
void tcp_vegas_pkts_acked(struct sock *sk, const struct ack_sample *sample)
{
	struct vegas *vegas = inet_csk_ca(sk);
	u32 vrtt;

	if (sample->rtt_us < 0)
		return;

	/* Never allow zero rtt or baseRTT */
	vrtt = sample->rtt_us + 1;

	/* Filter to find propagation delay: */
	if (vrtt < vegas->baseRTT)
		vegas->baseRTT = vrtt;

	/* Find the min RTT during the last RTT to find
	 * the current prop. delay + queuing delay:
	 */
	vegas->minRTT = min(vegas->minRTT, vrtt);
	vegas->cntRTT++;
}

tcp_vegas_cong_avoid根据minRTT和baseRTT估算拥塞情况diff=cwnd*(1-minRTT/baseRTT)．根据diff和alpha，gamma，beta的大小关系调整速度．

static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct vegas *vegas = inet_csk_ca(sk);

	if (!vegas->doing_vegas_now) {
		tcp_reno_cong_avoid(sk, ack, acked);
		return;
	}
	/*
	beg_snd_nxt用于标记一轮开始的时候对应的tp->snd_nxt，收到了该轮
	某个ack后就会更新beg_snd_nxt为新的tp-<snd_nxt,小于等于新beg_snd_next
	的都是旧一轮的，会直接被忽略．		
	*/
	if (after(ack, vegas->beg_snd_nxt)) {
		/* Do the Vegas once-per-RTT cwnd adjustment. */

		/* Save the extent of the current window so we can use this
		 * at the end of the next RTT.
		 */
		vegas->beg_snd_nxt  = tp->snd_nxt;

		/* We do the Vegas calculations only if we got enough RTT
		 * samples that we can be reasonably sure that we got
		 * at least one RTT sample that wasn't from a delayed ACK.
		 * If we only had 2 samples total,
		 * then that means we're getting only 1 ACK per RTT, which
		 * means they're almost certainly delayed ACKs.
		 * If  we have 3 samples, we should be OK.
		 */
		/*确保每一轮样例数至少为3,避免延迟ack影响*/
		if (vegas->cntRTT <= 2) {
			/* We don't have enough RTT samples to do the Vegas
			 * calculation, so we'll behave like Reno.
			 */
			tcp_reno_cong_avoid(sk, ack, acked);
		} else {
			u32 rtt, diff;
			u64 target_cwnd;

			/* We have enough RTT samples, so, using the Vegas
			 * algorithm, we determine if we should increase or
			 * decrease cwnd, and by how much.
			 */

			/* Pluck out the RTT we are using for the Vegas
			 * calculations. This is the min RTT seen during the
			 * last RTT. Taking the min filters out the effects
			 * of delayed ACKs, at the cost of noticing congestion
			 * a bit later.
			 */
			rtt = vegas->minRTT;

			/* Calculate the cwnd we should have, if we weren't
			 * going too fast.
			 *
			 * This is:
			 *     (actual rate in segments) * baseRTT
			 */
			/*
				计算我们拥有的窗口值:cwnd*baseRTT/minRTT
				target_cwnd = snd_cwnd / rtt * baseRTT
			*/
			target_cwnd = (u64)tp->snd_cwnd * vegas->baseRTT;
			do_div(target_cwnd, rtt);

			/* Calculate the difference between the window we had,
			 * and the window we would like to have. This quantity
			 * is the "Diff" from the Arizona Vegas papers.
			 */
			/*计算＂想拥有的窗口＂与＂已有窗口＂的差值*/
			diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT;

			if (diff > gamma && tcp_in_slow_start(tp)) {
				/* Going too fast. Time to slow down
				 * and switch to congestion avoidance.
				 */

				/* Set cwnd to match the actual rate
				 * exactly:
				 *   cwnd = (actual rate) * baseRTT
				 * Then we add 1 because the integer
				 * truncation robs us of full link
				 * utilization.
				 */
				/*
					如果diff>gamma,且处于慢启动，就降速，减少发送窗口为target_cwnd+1．
					并更新ssthresh不大于发送窗口
				*/
				tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1);
				tp->snd_ssthresh = tcp_vegas_ssthresh(tp);

			} else if (tcp_in_slow_start(tp)) {
				/* Slow start.  */
				tcp_slow_start(tp, acked);
			} else {
				/* Congestion avoidance. */

				/* Figure out where we would like cwnd
				 * to be.
				 */
				if (diff > beta) {
					/* The old window was too fast, so
					 * we slow down.
					 */
					tp->snd_cwnd--;
					tp->snd_ssthresh
						= tcp_vegas_ssthresh(tp);
				} else if (diff < alpha) {
					/* We don't have enough extra packets
					 * in the network, so speed up.
					 */
					tp->snd_cwnd++;
				} else {
					/* Sending just as fast as we
					 * should be.
					 */
				}
			}
			/*发送窗口在2和snd_cwnd_clamp之间*/
			if (tp->snd_cwnd < 2)
				tp->snd_cwnd = 2;
			else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
				tp->snd_cwnd = tp->snd_cwnd_clamp;

			tp->snd_ssthresh = tcp_current_ssthresh(sk);
		}

		/* Wipe the slate clean for the next RTT. */
		/*新一轮，重置cntRTT,minRTT*/
		vegas->cntRTT = 0;
		vegas->minRTT = 0x7fffffff;
	}
	/* Use normal slow start */
	else if (tcp_in_slow_start(tp))
		tcp_slow_start(tp, acked);
}

tcp拥塞算法分析五（vegas）

猜你喜欢