Table of Contents
2.1 TCP状态处理接口 tcp_rcv_state_process
2.2 SYN_SENT状态输入报文处理 tcp_rcv_synsent_state_process
1 connect 阻塞模式概述
当客户端调用 connect 发送 SYN 之后,会将其状态切换成 SYN_SENT,此时如果套接字sock为非阻塞模式,connect系统调用会直接返回,如果sock为阻塞模式,connect 内核的最后会阻塞在inet_wait_for_connect,sock的状态维持在 SYN_SENT或SYN_RECV 两种状态下,等待唤醒将sock切换成 TCP_ESTABLISHED(收到SYN+ACK)或者TCP_CLOSE(收到RST)
2. 客户端收到SYN+ACK报文
发送SYN请求报文后,TCB的状态由TCP_CLOSE迁移到TCP_SYN_SENT,所以在收到接收响应后,将由tcp_rcv_state_process()处理。
2.1 TCP状态处理接口 tcp_rcv_state_process
/*
* This function implements the receiving procedure of RFC 793 for
* all states except ESTABLISHED and TIME_WAIT.
* It's called from both tcp_v4_rcv and tcp_v6_rcv and should be
* address independent.
*/
int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
struct tcphdr *th, unsigned len)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
int queued = 0;
tp->rx_opt.saw_tstamp = 0;
switch (sk->sk_state) {
...
case TCP_SYN_SENT:
//由tcp_rcv_synsent_state_process()处理输入报文
queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
//返回值大于0表示处理失败,这会导致调用者向服务器端发送RST报文
if (queued >= 0)
return queued;
/* Do step6 onward by hand. */
//处理紧急数据
tcp_urg(sk, skb, th);
__kfree_skb(skb);
//检测是否有数据要发送,对于客户端,三次握手完成,可以继续发送数据
tcp_data_snd_check(sk);
return 0;
}
}
2.2 SYN_SENT状态输入报文处理 tcp_rcv_synsent_state_process
@返回1:发送复位报文;
@返回0:处理正常,停止对数据包的后续处理;
@返回-1:处理正常,继续处理紧急数据,并且尝试触发发送逻辑
static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
struct tcphdr *th, unsigned len)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
int saved_clamp = tp->rx_opt.mss_clamp;
//解析TCP选项
tcp_parse_options(skb, &tp->rx_opt, 0);
//报文中携带了ACK标记
if (th->ack) {
/* rfc793:
* "If the state is SYN-SENT then
* first check the ACK bit
* If the ACK bit is set
* If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send
* a reset (unless the RST bit is set, if so drop
* the segment and return)"
*
* We do not send data with SYN, so that RFC-correct
* test reduces to:
*/
//输入报文不是对SYN报文的确认,会向对端发送RST报文
if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
goto reset_and_undo;
//时间戳选项
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
!between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
tcp_time_stamp)) {
NET_INC_STATS_BH(LINUX_MIB_PAWSACTIVEREJECTED);
goto reset_and_undo;
}
/* Now ACK is acceptable.
*
* "If the RST bit is set
* If the ACK was acceptable then signal the user "error:
* connection reset", drop the segment, enter CLOSED state,
* delete TCB, and return."
*/
//上面的检查保证了是ACK报文,这里检查是否是RST报文,是则复位TCB
if (th->rst) {
tcp_reset(sk);
goto discard;
}
/* rfc793:
* "fifth, if neither of the SYN or RST bits is set then
* drop the segment and return."
*
* See note below!
* --ANK(990513)
*/
//这个状态 SYN_SENT 的收到的不是 SYN+ACK 报文,不处理
if (!th->syn)
goto discard_and_undo;
//这个状态 SYN_SENT 的收到 SYN+ACK 报文
//初始化TCB中的一些字段
/* rfc793:
* "If the SYN bit is on ...
* are acceptable then ...
* (our SYN has been ACKed), change the connection
* state to ESTABLISHED..."
*/
TCP_ECN_rcv_synack(tp, th);
tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
tcp_ack(sk, skb, FLAG_SLOWPATH);
/* Ok.. it's good. Set up sequence numbers and
* move to established.
*/
tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
/* RFC1323: The window in SYN & SYN/ACK segments is
* never scaled.
*/
tp->snd_wnd = ntohs(th->window);
tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq);
if (!tp->rx_opt.wscale_ok) {
tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
tp->window_clamp = min(tp->window_clamp, 65535U);
}
if (tp->rx_opt.saw_tstamp) {
tp->rx_opt.tstamp_ok = 1;
tp->tcp_header_len =
sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
tcp_store_ts_recent(tp);
} else {
tp->tcp_header_len = sizeof(struct tcphdr);
}
if (tcp_is_sack(tp) && sysctl_tcp_fack)
tcp_enable_fack(tp);
tcp_mtup_init(sk);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);
/* Remember, tcp_poll() does not lock socket!
* Change state from SYN-SENT only after copied_seq
* is initialized. */
tp->copied_seq = tp->rcv_nxt;
smp_mb();
//对于客户端,收到SYN+ACK后就可以将TCB状态迁移到TCP_ESTABLISHED了
tcp_set_state(sk, TCP_ESTABLISHED);
security_inet_conn_established(sk, skb);
/* Make sure socket is routed, for correct metrics. */
icsk->icsk_af_ops->rebuild_header(sk);
tcp_init_metrics(sk);
//初始化拥塞控制
tcp_init_congestion_control(sk);
/* Prevent spurious tcp_cwnd_restart() on first data
* packet.
*/
tp->lsndtime = tcp_time_stamp;
//初始化收发缓冲区
tcp_init_buffer_space(sk);
//如果需要,启动保活定时器
if (sock_flag(sk, SOCK_KEEPOPEN))
inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
//设置首部预测标记
if (!tp->rx_opt.snd_wscale)
__tcp_fast_path_on(tp, tp->snd_wnd);
else
tp->pred_flags = 0;
//唤醒connect()系统调用,因为调用者很有可能在阻塞等待
if (!sock_flag(sk, SOCK_DEAD)) {
sk->sk_state_change(sk);
sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
}
//确定是执行快速ACK还是延时ACK
if (sk->sk_write_pending ||
icsk->icsk_accept_queue.rskq_defer_accept ||
icsk->icsk_ack.pingpong) {
/* Save one ACK. Data will be ready after
* several ticks, if write_pending is set.
*
* It may be deleted, but with this feature tcpdumps
* look so _wonderfully_ clever, that I was not able
* to stand against the temptation 8) --ANK
*/
inet_csk_schedule_ack(sk);
icsk->icsk_ack.lrcvtime = tcp_time_stamp;
icsk->icsk_ack.ato = TCP_ATO_MIN;
tcp_incr_quickack(sk);
tcp_enter_quickack_mode(sk);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
TCP_DELACK_MAX, TCP_RTO_MAX);
discard:
__kfree_skb(skb);
return 0;
} else {
//立即确认
tcp_send_ack(sk);
}
return -1;
}
//输入报文中携带了复位标记,返回1,向对端发送RST
if (th->rst) {
/* rfc793:
* "If the RST bit is set
*
* Otherwise (no ACK) drop the segment and return."
*/
goto discard_and_undo;
}
/* PAWS check. */
if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
tcp_paws_check(&tp->rx_opt, 0))
goto discard_and_undo;
//收到了SYN请求报文,属于同时打开的场景
if (th->syn) {
/* We see SYN without ACK. It is attempt of
* simultaneous connect with crossed SYNs.
* Particularly, it can be connect to self.
*/
//当前状态为TCP_SYN_SENT,收到SYN后状态迁移到TCP_SYN_RECV
tcp_set_state(sk, TCP_SYN_RECV);
//重新初始化TCB的一些字段
if (tp->rx_opt.saw_tstamp) {
tp->rx_opt.tstamp_ok = 1;
tcp_store_ts_recent(tp);
tp->tcp_header_len =
sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
} else {
tp->tcp_header_len = sizeof(struct tcphdr);
}
tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
/* RFC1323: The window in SYN & SYN/ACK segments is
* never scaled.
*/
tp->snd_wnd = ntohs(th->window);
tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
tp->max_window = tp->snd_wnd;
TCP_ECN_rcv_syn(tp, th);
tcp_mtup_init(sk);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);
//向服务器端发送SYN+ACK报文,当再次收到服务器端的ACK后,三次握手成功,进入连接态
tcp_send_synack(sk);
//丢弃收到的SYN报文,然后返回0,停止后续处理
goto discard;
}
/* "fifth, if neither of the SYN or RST bits is set then
* drop the segment and return."
*/
discard_and_undo:
tcp_clear_options(&tp->rx_opt);
tp->rx_opt.mss_clamp = saved_clamp;
goto discard;
reset_and_undo:
//清空选项,会向对端发送RST
tcp_clear_options(&tp->rx_opt);
tp->rx_opt.mss_clamp = saved_clamp;
return 1;
}
2.2.1 关于同时打开状态迁移
同时打开的状态迁移过程如下图所示:
本质上来讲,这种情形客户端最终是和服务器端三次握手的处理过程吻合了。
3. 向服务器端发送ACK报文 tcp_send_ack
无论是立即确认还是延时确认,最终都是通过tcp_send_ack()完成的,该函数会生成并且发送ACK报文,然后更新发送窗口。
/* This routine sends an ack and also updates the window. */
void tcp_send_ack(struct sock *sk)
{
struct sk_buff *buff;
/* If we have been reset, we may not send again. */
if (sk->sk_state == TCP_CLOSE)
return;
/* We are not putting this on the write queue, so
* tcp_transmit_skb() will set the ownership to this
* sock.
*/
//分配skb用于组装ACK报文
buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
if (buff == NULL) {
inet_csk_schedule_ack(sk);
inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
TCP_DELACK_MAX, TCP_RTO_MAX);
return;
}
/* Reserve space for headers and prepare control bits. */
//初始化ACK报文
skb_reserve(buff, MAX_TCP_HEADER);
tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPCB_FLAG_ACK);
/* Send it off, this clears delayed acks for us. */
//发送该确认报文
TCP_SKB_CB(buff)->when = tcp_time_stamp;
tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
}