res_nsend()
@statp: resolver状态结构
@buf:组装好的DNS查询报文
@buflen:查询报文长度
@ans:用户保存响应报文的缓存区
@anssiz:响应报文的长度
int res_nsend(res_state statp, const u_char *buf, int buflen, u_char *ans, int anssiz)
{
int gotsomewhere, terrno, try, v_circuit, resplen, ns, n;
char abuf[NI_MAXHOST];
ResolvCacheStatus cache_status = RESOLV_CACHE_UNSUPPORTED;
//检查保存查询结果的buffer是否小于DNS报文的首部固定长度12字节
if (anssiz < HFIXEDSZ) {
errno = EINVAL;
return (-1);
}
//如果选项中设置了RES_USEVC或者查询包文超过512字节,那么使用TCP的方式查询
v_circuit = (statp->options & RES_USEVC) || buflen > PACKETSZ;
gotsomewhere = 0;
terrno = ETIMEDOUT;
//先查询cache,这里暂时认为cache没有命中,关于cache相关内容后面会专门介绍
int anslen = 0;
cache_status = _resolv_cache_lookup(statp->netid, buf, buflen,
ans, anssiz, &anslen);
//cache命中了,直接返回成功
if (cache_status == RESOLV_CACHE_FOUND) {
return anslen;
} else if (cache_status != RESOLV_CACHE_UNSUPPORTED) {
// had a cache miss for a known network, so populate the thread private
// data so the normal resolve path can do its thing
//cache没有命中,从cache中获取DNS配置信息,这步非常重要,因为DNS服务器地址就是从这里获取到的
_resolv_populate_res_for_net(statp);
}
//如果没有可用的DNS服务器地址,那么返回错误
if (statp->nscount == 0) {
// We have no nameservers configured, so there's no point trying.
// Tell the cache the query failed, or any retries and anyone else asking the same
// question will block for PENDING_REQUEST_TIMEOUT seconds instead of failing fast.
_resolv_cache_query_failed(statp->netid, buf, buflen);
errno = ESRCH;
return (-1);
}
/*
* If the ns_addr_list in the resolver context has changed, then
* invalidate our cached copy and the associated timing data.
*/
//从_resolv_populate_res_for_net()的代码逻辑来看,这部分代码是没有效果的,因为在该函数中
//将所有的statp->nsaddr_list[ns].sin_family设置为0,所以最终的needclose一定是0
if (EXT(statp).nscount != 0) {
int needclose = 0;
struct sockaddr_storage peer;
socklen_t peerlen;
if (EXT(statp).nscount != statp->nscount) {
needclose++;
} else {
for (ns = 0; ns < statp->nscount; ns++) {
if (statp->nsaddr_list[ns].sin_family &&
!sock_eq((struct sockaddr *)(void *)&statp->nsaddr_list[ns],
(struct sockaddr *)(void *)&EXT(statp).ext->nsaddrs[ns])) {
needclose++;
break;
}
if (EXT(statp).nssocks[ns] == -1)
continue;
peerlen = sizeof(peer);
if (getpeername(EXT(statp).nssocks[ns],
(struct sockaddr *)(void *)&peer, &peerlen) < 0) {
needclose++;
break;
}
if (!sock_eq((struct sockaddr *)(void *)&peer,
get_nsaddr(statp, (size_t)ns))) {
needclose++;
break;
}
}
}
if (needclose) {
res_nclose(statp);
EXT(statp).nscount = 0;
}
}
/*
* Maybe initialize our private copy of the ns_addr_list.
*/
if (EXT(statp).nscount == 0) {
for (ns = 0; ns < statp->nscount; ns++) {
EXT(statp).nstimes[ns] = RES_MAXTIME;
EXT(statp).nssocks[ns] = -1;
if (!statp->nsaddr_list[ns].sin_family)
continue;
EXT(statp).ext->nsaddrs[ns].sin =
statp->nsaddr_list[ns];
}
EXT(statp).nscount = statp->nscount;
}
/*
* Some resolvers want to even out the load on their nameservers.
* Note that RES_BLAST overrides RES_ROTATE.
*/
//Android中并没有设置这两个选项,先忽略
if ((statp->options & RES_ROTATE) != 0U &&
(statp->options & RES_BLAST) == 0U) {
union res_sockaddr_union inu;
struct sockaddr_in ina;
int lastns = statp->nscount - 1;
int fd;
u_int16_t nstime;
if (EXT(statp).ext != NULL)
inu = EXT(statp).ext->nsaddrs[0];
ina = statp->nsaddr_list[0];
fd = EXT(statp).nssocks[0];
nstime = EXT(statp).nstimes[0];
for (ns = 0; ns < lastns; ns++) {
if (EXT(statp).ext != NULL)
EXT(statp).ext->nsaddrs[ns] =
EXT(statp).ext->nsaddrs[ns + 1];
statp->nsaddr_list[ns] = statp->nsaddr_list[ns + 1];
EXT(statp).nssocks[ns] = EXT(statp).nssocks[ns + 1];
EXT(statp).nstimes[ns] = EXT(statp).nstimes[ns + 1];
}
if (EXT(statp).ext != NULL)
EXT(statp).ext->nsaddrs[lastns] = inu;
statp->nsaddr_list[lastns] = ina;
EXT(statp).nssocks[lastns] = fd;
EXT(statp).nstimes[lastns] = nstime;
}
//发送查询报文,最多重试retry次,或者成功
for (try = 0; try < statp->retry; try++) {
struct __res_stats stats[MAXNS];
struct __res_params params;
int revision_id = _resolv_cache_get_resolver_stats(statp->netid, ¶ms, stats);
bool usable_servers[MAXNS];
android_net_res_stats_get_usable_servers(¶ms, stats, statp->nscount,
usable_servers);
//每次都会将所有配置的DNS服务器地址遍历一遍
for (ns = 0; ns < statp->nscount; ns++) {
if (!usable_servers[ns]) continue;
struct sockaddr *nsap;
int nsaplen;
time_t now = 0;
int rcode = RCODE_INTERNAL_ERROR;
int delay = 0;
nsap = get_nsaddr(statp, (size_t)ns);
nsaplen = get_salen(nsap);
statp->_flags &= ~RES_F_LASTMASK;
statp->_flags |= (ns << RES_F_LASTSHIFT);
same_ns:
//如果定义了查询hook函数,则调用该函数,Android中并未使用,所以忽略这部分代码
if (statp->qhook) {
int done = 0, loops = 0;
do {
res_sendhookact act;
act = (*statp->qhook)(&nsap, &buf, &buflen,
ans, anssiz, &resplen);
switch (act) {
case res_goahead:
done = 1;
break;
case res_nextns:
res_nclose(statp);
goto next_ns;
case res_done:
if (cache_status == RESOLV_CACHE_NOTFOUND) {
_resolv_cache_add(statp->netid, buf, buflen,
ans, resplen);
}
return (resplen);
case res_modified:
/* give the hook another try */
if (++loops < 42) /*doug adams*/
break;
/*FALLTHROUGH*/
case res_error:
/*FALLTHROUGH*/
default:
goto fail;
}
} while (!done);
}
//使用TCP方式进行查询,我们不关注
if (v_circuit) {
/* Use VC; at most one attempt per server. */
try = statp->retry;
n = send_vc(statp, buf, buflen, ans, anssiz, &terrno,
ns, &now, &rcode, &delay);
/*
* Only record stats the first time we try a query. This ensures that
* queries that deterministically fail (e.g., a name that always returns
* SERVFAIL or times out) do not unduly affect the stats.
*/
if (try == 0) {
struct __res_sample sample;
_res_stats_set_sample(&sample, now, rcode, delay);
_resolv_cache_add_resolver_stats_sample(statp->netid, revision_id,
ns, &sample, params.max_samples);
}
if (n < 0)
goto fail;
if (n == 0)
goto next_ns;
resplen = n;
} else {
//使用UDP方式查询并获取查询结果,返回响应报文长度
n = send_dg(statp, buf, buflen, ans, anssiz, &terrno,
ns, &v_circuit, &gotsomewhere, &now, &rcode, &delay);
//多次尝试中,只把第一次的查询信息记录到cache中
if (try == 0) {
struct __res_sample sample;
_res_stats_set_sample(&sample, now, rcode, delay);
_resolv_cache_add_resolver_stats_sample(statp->netid, revision_id,
ns, &sample, params.max_samples);
}
//send_dg()如果返回小于0,那么不再继续重试,直接返回错误
if (n < 0)
goto fail;
//send_dg()如果返回0,那么继续在下一个DNS服务器地址上尝试
if (n == 0)
goto next_ns;
//如果send_dg()中指示用TCP方式查询,则在同一个DNS服务器地址上面进行重试
if (v_circuit)
goto same_ns;
resplen = n;
}
//能执行到这里,说明已经获取到了响应报文,这并不代表DNS查询成功,因为响应报文中有可能没有结果
//如果前面没有命中cache,则将查询结果进行缓存
if (cache_status == RESOLV_CACHE_NOTFOUND) {
_resolv_cache_add(statp->netid, buf, buflen,
ans, resplen);
}
/*
* If we have temporarily opened a virtual circuit,
* or if we haven't been asked to keep a socket open,
* close the socket.
*/
//检查是否需要关闭查询套接字
if ((v_circuit && (statp->options & RES_USEVC) == 0U) ||
(statp->options & RES_STAYOPEN) == 0U) {
res_nclose(statp);
}
//如果定义了响应报文hook,则调用该接口。Android中并未定义该接口,忽略
if (statp->rhook) {
int done = 0, loops = 0;
do {
res_sendhookact act;
act = (*statp->rhook)(nsap, buf, buflen,
ans, anssiz, &resplen);
switch (act) {
case res_goahead:
case res_done:
done = 1;
break;
case res_nextns:
res_nclose(statp);
goto next_ns;
case res_modified:
/* give the hook another try */
if (++loops < 42) /*doug adams*/
break;
/*FALLTHROUGH*/
case res_error:
/*FALLTHROUGH*/
default:
goto fail;
}
} while (!done);
}
//返回响应报文长度
return (resplen);
next_ns: ;
} /*foreach ns*/
} /*foreach retry*/
//到这里,说明查询失败了,关闭socket并设置错误码
res_nclose(statp);
if (!v_circuit) {
if (!gotsomewhere)
errno = ECONNREFUSED; /* no nameservers found */
else
errno = ETIMEDOUT; /* no answer obtained */
} else
errno = terrno;
_resolv_cache_query_failed(statp->netid, buf, buflen);
return (-1);
fail:
_resolv_cache_query_failed(statp->netid, buf, buflen);
res_nclose(statp);
return (-1);
}
send_dg()
static int send_dg(res_state statp,
const u_char *buf, int buflen, u_char *ans, int anssiz,
int *terrno, int ns, int *v_circuit, int *gotsomewhere,
time_t *at, int *rcode, int* delay)
{
*at = time(NULL);
*rcode = RCODE_INTERNAL_ERROR;
*delay = 0;
const HEADER *hp = (const HEADER *)(const void *)buf;
HEADER *anhp = (HEADER *)(void *)ans;
const struct sockaddr *nsap;
int nsaplen;
struct timespec now, timeout, finish, done;
fd_set dsmask;
struct sockaddr_storage from;
socklen_t fromlen;
int resplen, seconds, n, s;
//获取DNS服务器地址
nsap = get_nsaddr(statp, (size_t)ns);
nsaplen = get_salen(nsap);
//如果socket文件描述尚未打开,则执行socket打开操作
if (EXT(statp).nssocks[ns] == -1) {
EXT(statp).nssocks[ns] = socket(nsap->sa_family, SOCK_DGRAM | SOCK_CLOEXEC, 0);
//因为后面要使用select,所以这里限定打开的文件描述符不能超过FD_SETSIZE
if (EXT(statp).nssocks[ns] > highestFD) {
res_nclose(statp);
errno = ENOTSOCK;
}
//打开socket失败,错误处理
if (EXT(statp).nssocks[ns] < 0) {
switch (errno) {
case EPROTONOSUPPORT:
#ifdef EPFNOSUPPORT
case EPFNOSUPPORT:
#endif
case EAFNOSUPPORT:
//上面这三种错误码,说明单个DNS服务器端有问题,返回0,可以继续在下一个DNS服务器地址上面进行重试
Perror(statp, stderr, "socket(dg)", errno);
return (0);
default:
//其它错误不可以恢复,直接返回-1,终止后续的重试过程
*terrno = errno;
Perror(statp, stderr, "socket(dg)", errno);
return (-1);
}
}
//如果查询时设定了mark值,那么将其设定到内核
if (statp->_mark != MARK_UNSET) {
if (setsockopt(EXT(statp).nssocks[ns], SOL_SOCKET,
SO_MARK, &(statp->_mark), sizeof(statp->_mark)) < 0) {
res_nclose(statp);
return -1;
}
}
#ifndef CANNOT_CONNECT_DGRAM
/*
* On a 4.3BSD+ machine (client and server,
* actually), sending to a nameserver datagram
* port with no nameserver will cause an
* ICMP port unreachable message to be returned.
* If our datagram socket is "connected" to the
* server, we get an ECONNREFUSED error on the next
* socket operation, and select returns if the
* error message is received. We can thus detect
* the absence of a nameserver without timing out.
*/
//随机绑定一个端口
if (random_bind(EXT(statp).nssocks[ns], nsap->sa_family) < 0) {
Aerror(statp, stderr, "bind(dg)", errno, nsap,
nsaplen);
res_nclose(statp);
return (0);
}
//执行connect操作
if (__connect(EXT(statp).nssocks[ns], nsap, (socklen_t)nsaplen) < 0) {
Aerror(statp, stderr, "connect(dg)", errno, nsap,
nsaplen);
res_nclose(statp);
return (0);
}
#endif /* !CANNOT_CONNECT_DGRAM */
}
//后面用s操作
s = EXT(statp).nssocks[ns];
//如果前面执行过connect(),那么使用send()发送即可,否则使用sendto()发送查询报文
#ifndef CANNOT_CONNECT_DGRAM
if (send(s, (const char*)buf, (size_t)buflen, 0) != buflen) {
Perror(statp, stderr, "send", errno);
res_nclose(statp);
return (0);
}
#else /* !CANNOT_CONNECT_DGRAM */
if (sendto(s, (const char*)buf, buflen, 0, nsap, nsaplen) != buflen)
{
Aerror(statp, stderr, "sendto", errno, nsap, nsaplen);
res_nclose(statp);
return (0);
}
#endif /* !CANNOT_CONNECT_DGRAM */
/*
* Wait for reply.
*/
//根据res_stats的配置,获取一个超时等待时间
seconds = get_timeout(statp, ns);
now = evNowTime();
timeout = evConsTime((long)seconds, 0L);
finish = evAddTime(now, timeout);
retry:
//retrying_select()就是调用pselect()系统调用等待s是否可读,最多等待finish
//时长,函数返回值就是pselect()的返回值
n = retrying_select(s, &dsmask, NULL, &finish);
//返回值为0表示超时了,返回0,上层调用会在下一个DNS服务器地址上面重试
if (n == 0) {
*rcode = RCODE_TIMEOUT;
Dprint(statp->options & RES_DEBUG, (stdout, ";; timeout\n"));
*gotsomewhere = 1;
return (0);
}
//返回-1表示调用失败了,返回0,上层调用会在下一个DNS服务器地址上面重试
if (n < 0) {
Perror(statp, stderr, "select", errno);
res_nclose(statp);
return (0);
}
//有响应数据可读
errno = 0;
fromlen = sizeof(from);
//读取DNS响应数据到ans中,DNS服务器地址保存在from中
resplen = recvfrom(s, (char*)ans, (size_t)anssiz,0,
(struct sockaddr *)(void *)&from, &fromlen);
//读取发生错误返回0
if (resplen <= 0) {
Perror(statp, stderr, "recvfrom", errno);
res_nclose(statp);
return (0);
}
*gotsomewhere = 1;
//读取的响应报文太小,不足DSN报文固定的首部长度,查询失败,返回0
if (resplen < HFIXEDSZ) {
*terrno = EMSGSIZE;
res_nclose(statp);
return (0);
}
//检查响应报文中的会话ID(anhp->id)和查询报文时指定的会话ID(hp->id)是否一致,
//即检查查询和响应是否匹配。如果不匹配会继续在该socket上面等待可读数据(注意超时时间并没有重新计算)
if (hp->id != anhp->id) {
/*
* response from old query, ignore it.
* XXX - potential security hazard could
* be detected here.
*/
DprintQ((statp->options & RES_DEBUG) ||
(statp->pfcode & RES_PRF_REPLY),
(stdout, ";; old answer:\n"),
ans, (resplen > anssiz) ? anssiz : resplen);
goto retry;
}
if (!(statp->options & RES_INSECURE1) &&
!res_ourserver_p(statp, (struct sockaddr *)(void *)&from)) {
/*
* response from wrong server? ignore it.
* XXX - potential security hazard could
* be detected here.
*/
DprintQ((statp->options & RES_DEBUG) ||
(statp->pfcode & RES_PRF_REPLY),
(stdout, ";; not our server:\n"),
ans, (resplen > anssiz) ? anssiz : resplen);
goto retry;
}
#ifdef RES_USE_EDNS0
if (anhp->rcode == FORMERR && (statp->options & RES_USE_EDNS0) != 0U) {
/*
* Do not retry if the server do not understand EDNS0.
* The case has to be captured here, as FORMERR packet do not
* carry query section, hence res_queriesmatch() returns 0.
*/
DprintQ(statp->options & RES_DEBUG,
(stdout, "server rejected query with EDNS0:\n"),
ans, (resplen > anssiz) ? anssiz : resplen);
/* record the error */
statp->_flags |= RES_F_EDNS0ERR;
res_nclose(statp);
return (0);
}
#endif
//进一步从报文内容层次检查查询和响应是否匹配,如果不匹配,那么继续读取响应
if (!(statp->options & RES_INSECURE2) &&
!res_queriesmatch(buf, buf + buflen,
ans, ans + anssiz)) {
/*
* response contains wrong query? ignore it.
* XXX - potential security hazard could
* be detected here.
*/
DprintQ((statp->options & RES_DEBUG) ||
(statp->pfcode & RES_PRF_REPLY),
(stdout, ";; wrong query name:\n"),
ans, (resplen > anssiz) ? anssiz : resplen);
goto retry;;
}
//delay保存了本次查询耗时,以毫秒为单位
done = evNowTime();
*delay = _res_stats_calculate_rtt(&done, &now);
//服务器端响应了查询,但是指示查询失败
if (anhp->rcode == SERVFAIL ||
anhp->rcode == NOTIMP ||
anhp->rcode == REFUSED) {
res_nclose(statp);
/* don't retry if called from dig */
if (!statp->pfcode) {
*rcode = anhp->rcode;
return (0);
}
}
//如果没有开启RES_IGNTC选项,但是响应报文中tc置位,那么表示报文被截断,需要使用TCP方式查询
if (!(statp->options & RES_IGNTC) && anhp->tc) {
//v_circuit置1,上一级调用函数会使用TCP重试
*v_circuit = 1;
res_nclose(statp);
return (1);
}
/*
* All is well, or the error is fatal. Signal that the
* next nameserver ought not be tried.
*/
//rcode保存了响应报文中的返回码
if (resplen > 0) {
*rcode = anhp->rcode;
}
//返回响应报文的长度
return (resplen);
}
其它细节
每次查询时DNS地址的选择
res_nsend()在调用send_dg()时传入了一个ns参数,通过该参数,send_dg()从statp中取得本次查询使用的DNS服务器地址。
/*
* pick appropriate nsaddr_list for use. see res_init() for initialization.
*/
static struct sockaddr *get_nsaddr(statp, n)
res_state statp;
size_t n;
{
//从statp->nsaddr_list或者statp->_u._ext.ext->nsaddrs中获取DNS服务器地址
//Android中都是走第一个分支,原因见_resolv_populate_res_for_net()
if (!statp->nsaddr_list[n].sin_family && EXT(statp).ext) {
/*
* - EXT(statp).ext->nsaddrs[n] holds an address that is larger
* than struct sockaddr, and
* - user code did not update statp->nsaddr_list[n].
*/
return (struct sockaddr *)(void *)&EXT(statp).ext->nsaddrs[n];
} else {
/*
* - user code updated statp->nsaddr_list[n], or
* - statp->nsaddr_list[n] has the same content as
* EXT(statp).ext->nsaddrs[n].
*/
return (struct sockaddr *)(void *)&statp->nsaddr_list[n];
}
}
等待响应的超时时间
@ns:使用第几个DNS服务器地址,可取的值为0~3
static int get_timeout(const res_state statp, const int ns)
{
//retrans为重试间隔(当前配置为5),timeout初始值为retrans左移ns位,
//这表示随着DNS请求失败次数的增加,对同一个域名解析时等待响应的时间是递增的
int timeout = (statp->retrans << ns);
if (ns > 0) {
timeout /= statp->nscount;
}
//如果计算结果小于等于0,那么调整超时时间为1s
if (timeout <= 0) {
timeout = 1;
}
return timeout;
}
上面的函数表明,如果配置的DNS服务器地址个数为4,那么timeout最终的取值依次为:5,2,5,10;如果如果配置的DNS服务器地址个数为2,那么timeout最终的取值依次为:5,5