核心数据结构

resolv_cache_info

每个网卡都有一个该结构，用于保存该网卡相关的DNS配置信息，以及在该网卡上进行的DNS查询结果缓存信息，系统中所有网卡的该结构信息组织成一个单链表。

struct resolv_cache_info {
	//网卡的netid
    unsigned                    netid;
    //DNS查询缓存结果
    Cache*                      cache;
    //所有的cache_info构成一个列表
    struct resolv_cache_info*   next;
    //设置的DNS服务器地址的数目，即下面nameservers数组中有效数据由几个
    int                         nscount;
    //保存设置的DNS服务器地址，当前限制最多可以设置4个DNS服务器地址
    char*                       nameservers[MAXNS];
    //转换后的DNS服务器地址信息，用于查询
    struct addrinfo*            nsaddrinfo[MAXNS];
    //见注释，DNS服务器地址每变更一次，该成员的值加1
    int                         revision_id; // # times the nameservers have been replaced
    struct __res_params         params;
    struct __res_stats          nsstats[MAXNS];
    //这两个参数用于域名搜索，具体见hostname(7)，Android中基本上不使用，可以忽略
    char                        defdname[MAXDNSRCHPATH];
    int                         dnsrch_offset[MAXDNSRCH+1];  // offsets into defdname
};

链表初始化

// Head of the list of caches.  Protected by _res_cache_list_lock.
static struct resolv_cache_info _res_cache_list;

static void _res_cache_init(void)
{
    memset(&_res_cache_list, 0, sizeof(_res_cache_list));
    pthread_mutex_init(&_res_cache_list_lock, NULL);
}

初始化resolv_cache_info链表的表头结构以及其互斥锁。_res_cache_list结构本身只作为表头使用，并不保存任何网卡的cache信息，即链表中真正的第一个cache_info信息是从_res_cache_info.next开始的。

另外，resolv_cache_info结构的创建是在设置DNS地址的时候完成的，具体可以参考笔记“DNS参数设置”。

查询结果cache表头Cache

typedef struct resolv_cache {
	//Cache中最多可以容纳多少项
    int              max_entries;
    //Cache中当前已容纳多少项
    int              num_entries;
    //MRU表头
    Entry            mru_list;
    int              last_id;
    //Cache表,表的分配时在设置DNS地址的时候完成的
    Entry*           entries;
    //当多个线程同时请求同一个域名查询时，实际上只有第一个会触发网络查询，
    //其它后续请求都会阻塞等待第一个查询请求返回，见下文分析
    PendingReqInfo   pending_requests;
} Cache;

一次查询结果的cache项Entry

/* cache entry. for simplicity, 'hash' and 'hlink' are inlined in this
 * structure though they are conceptually part of the hash table.
 *
 * similarly, mru_next and mru_prev are part of the global MRU list
 */
typedef struct Entry {
	//该hash值是根据查询报文内容计算出来的
    unsigned int     hash;   /* hash value */
    //指向冲突链中的下一个成员
    struct Entry*    hlink;  /* next in collision chain */
    struct Entry*    mru_prev;
    struct Entry*    mru_next;

	//query和answer分别为查询报文和响应报文
    const uint8_t*   query;
    int              querylen;
    const uint8_t*   answer;
    int              answerlen;
    //DNS响应报文的有效期，记录的是墙上时钟，即事件超过expires，则认为失效
    time_t           expires;   /* time_t when the entry isn't valid any more */
    int              id;        /* for debugging purpose */
} Entry;

cache的创建

cache的创建是和resolv_cache_info结构一起创建的，所以其创建过程也是在设置DNS地址的时候执行的，创建cahce的接口是_resolv_cache_create()，其代码如下：

#define  CONFIG_MAX_ENTRIES    64 * 2 * 5

static int _res_cache_get_max_entries( void )
{
	//系统cache大小为64*2*5
    int cache_size = CONFIG_MAX_ENTRIES;
	//非Netd调用者是不会分配cache的
    const char* cache_mode = getenv("ANDROID_DNS_MODE");
    if (cache_mode == NULL || strcmp(cache_mode, "local") != 0) {
        // Don't use the cache in local mode. This is used by the proxy itself.
        cache_size = 0;
    }

    XLOG("cache size: %d", cache_size);
    return cache_size;
}

static struct resolv_cache* _resolv_cache_create( void )
{
    struct resolv_cache*  cache;

	//分配cache表头
    cache = calloc(sizeof(*cache), 1);
    if (cache) {
    	//为cache哈希表分配内存
        cache->max_entries = _res_cache_get_max_entries();
        cache->entries = calloc(sizeof(*cache->entries), cache->max_entries);
        if (cache->entries) {
        	//初始化MRU链表为空
            cache->mru_list.mru_prev = cache->mru_list.mru_next = &cache->mru_list;
            XLOG("%s: cache created\n", __FUNCTION__);
        } else {
            free(cache);
            cache = NULL;
        }
    }
    return cache;
}

抛开_res_cache_list链表不说（很简单了），cache的组织结构如下图所示：
cache组织结构

MRU双向链表

从上面的cache结构图中可以看出，缓存项除了用哈希表管理外，还额外链接成一个双向链表，从指针名字看，我们姑且称之为MRU(the Most Recently Update ?)链表，该链表是有序链表，实际维护时是按照最近访问的时间倒叙排列，最近访问的缓存项会被放在表头，这样设计是为了在缓存项已满，但是又需要加入新的缓存项时，可以快速的移除最旧的（MRU链表末尾结点即可）。

MRU链表的作用就这一点，相关代码就是基本的双向链表操作，后面也不再赘述。

查询成功结果添加

在res_nsend()中，如果完成一次成功的查询，那么会将查询结果进行缓存，这通过调用_resolv_cache_add()完成。

@netid:在哪个网卡上发起的查询
@query：查询报文
@querylen：查询报文缓存区长度
@answer：响应报文
@answerlen：响应报文缓存区长度
void _resolv_cache_add( unsigned              netid,
                   const void*           query,
                   int                   querylen,
                   const void*           answer,
                   int                   answerlen )
{
    Entry    key[1];
    Entry*   e;
    Entry**  lookup;
    u_long   ttl;
    Cache*   cache = NULL;

	//根据查询报文，初始化key，key的类型就是Entry，所以从这里可以看出，
    //缓存项就是用查询报文信息索引的
    /* don't assume that the query has already been cached */
    if (!entry_init_key( key, query, querylen )) {
        XLOG( "%s: passed invalid query ?", __FUNCTION__);
        return;
    }

    pthread_mutex_lock(&_res_cache_list_lock);
	//找到该netid的cache信息头部,即该netid对应的resolv_cache_info结构中的Cache成员
    //寻找方法也非常简单，就是遍历_res_resolv_list链表，寻找指定netid的结点，不再赘述
    cache = _find_named_cache_locked(netid);
    if (cache == NULL) {
        goto Exit;
    }
	//在添加之前首先查一下是否已经有了，这样可以避免添加重复项
    lookup = _cache_lookup_p(cache, key);
    e      = *lookup;
	//cache中已有，这应该不太可能发生，因为调用者应该只会在cache没有命中的情况下才添加
    if (e != NULL) { /* should not happen */
        XLOG("%s: ALREADY IN CACHE (%p) ? IGNORING ADD",
             __FUNCTION__, e);
        goto Exit;
    }
	//如果缓存已满，为了将新的cache放入缓存，那么需要移除最旧的
    if (cache->num_entries >= cache->max_entries) {
    	//优先将所有超过期限的cache项移除掉
        _cache_remove_expired(cache);
        //如果没有超期的cache项，那么还需要移除，则继续移除那些最旧的
        if (cache->num_entries >= cache->max_entries) {
            _cache_remove_oldest(cache);
        }
        //这里为什么要再查一遍，不理解...
        lookup = _cache_lookup_p(cache, key);
        e      = *lookup;
        if (e != NULL) {
            XLOG("%s: ALREADY IN CACHE (%p) ? IGNORING ADD",
                __FUNCTION__, e);
            goto Exit;
        }
    }
	//从响应报文中获取本次查询结果获取到的地址的的生命周期
    ttl = answer_getTTL(answer, answerlen);
    if (ttl > 0) {
    	//ttl大于0，表示该地址可以保留一段时间，那么创建一个新的cache项，
        //然后设定其有效期，并将其加入到cache中
        e = entry_alloc(key, answer, answerlen);
        if (e != NULL) {
            e->expires = ttl + _time_now();
            _cache_add_p(cache, lookup, e);
        }
    }

Exit:
    if (cache != NULL) {
    	//向所有等待结果的线程发送广播，该机制见下文的分析
      	_cache_notify_waiting_tid_locked(cache, key);
    }
    pthread_mutex_unlock(&_res_cache_list_lock);
}

cache查询

在res_nsend()真正向DNS服务器发起DNS查询请求之前，会首先向自己的cache查询，如果cache可以命中，那么直接返回，否则才继续向DNS服务器查询。查询cache是通过_resolv_cache_lookup()完成的。

//函数返回值
typedef enum {
	//返回这种值表示一种错误
    RESOLV_CACHE_UNSUPPORTED,  /* the cache can't handle that kind of queries */
                               /* or the answer buffer is too small */
    //查询过程没有问题，但是cache没有命中
    RESOLV_CACHE_NOTFOUND,     /* the cache doesn't know about this query */
    //查询过程没有问题，而且命中了
    RESOLV_CACHE_FOUND         /* the cache found the answer */
} ResolvCacheStatus;

/*
 * @netid：cache是基于网卡保存的
 * @query&querylen：查询报文和查询报文长度
 * @answer&answersize：响应报文和响应报文长度
 * @ret: cache查询结果
 */
ResolvCacheStatus _resolv_cache_lookup( unsigned              netid,
                      const void*           query,
                      int                   querylen,
                      void*                 answer,
                      int                   answersize,
                      int                  *answerlen )
{
    Entry      key[1];
    Entry**    lookup;
    Entry*     e;
    time_t     now;
    Cache*     cache;

    ResolvCacheStatus  result = RESOLV_CACHE_NOTFOUND;

    XLOG("%s: lookup", __FUNCTION__);
    XLOG_QUERY(query, querylen);

    //下面几个步骤和前面_resolv_cache_add()一样，不再赘述
    if (!entry_init_key(key, query, querylen)) {
        XLOG("%s: unsupported query", __FUNCTION__);
        return RESOLV_CACHE_UNSUPPORTED;
    }

    pthread_once(&_res_cache_once, _res_cache_init);
    pthread_mutex_lock(&_res_cache_list_lock);

    cache = _find_named_cache_locked(netid);
    if (cache == NULL) {
        result = RESOLV_CACHE_UNSUPPORTED;
        goto Exit;
    }

    /* see the description of _lookup_p to understand this.
     * the function always return a non-NULL pointer.
     */
    lookup = _cache_lookup_p(cache, key);
    e      = *lookup;

	//cache中没有待查询的请求，下面这段逻辑很重要
    if (e == NULL) {
        XLOG( "NOT IN CACHE");
        // calling thread will wait if an outstanding request is found
        // that matching this query
		//返回0，表示没有请求发出，这时直接返回，这会发起DNS请求
        //返回1，表示是阻塞返回
        if (!_cache_check_pending_request_locked(&cache, key, netid) || cache == NULL) {
            goto Exit;
        } else {
        	//阻塞返回，需要重新查询，因为查询结果已经加入到了cache中
            lookup = _cache_lookup_p(cache, key);
            e = *lookup;
            if (e == NULL) {
                goto Exit;
            }
        }
    }

	//到这里，说明是阻塞调用返回的，因为响应结果不是自己查询出来的，而且中间有可能由于调度等因素，
    //查询结果可能已经无效了，所以这里需要判断生效周期
    now = _time_now();

    /* remove stale entries here */
    //查询结果无效，返回没有查询到结果
    if (now >= e->expires) {
        XLOG( " NOT IN CACHE (STALE ENTRY %p DISCARDED)", *lookup );
        XLOG_QUERY(e->query, e->querylen);
        _cache_remove_p(cache, lookup);
        goto Exit;
    }
	//接收缓冲区过小，返回错误
    *answerlen = e->answerlen;
    if (e->answerlen > answersize) {
        /* NOTE: we return UNSUPPORTED if the answer buffer is too short */
        result = RESOLV_CACHE_UNSUPPORTED;
        XLOG(" ANSWER TOO LONG");
        goto Exit;
    }
	//都ok，拷贝响应报文到调用者提供的缓存中
    memcpy( answer, e->answer, e->answerlen );

	//由于该cache项被访问了，所以需要将其更新到MRU链表的首部，表示该cache项是被最新访问过的，
    //这样可避免该cache向被_cache_remove_oldest()删除
    /* bump up this entry to the top of the MRU list */
    if (e != cache->mru_list.mru_next) {
        entry_mru_remove( e );
        entry_mru_add( e, &cache->mru_list );
    }
	//返回查询成功
    XLOG( "FOUND IN CACHE entry=%p", e );
    result = RESOLV_CACHE_FOUND;

Exit:
    pthread_mutex_unlock(&_res_cache_list_lock);
    return result;
}

/*
 * Return 0 if no pending request is found matching the key.
 * If a matching request is found the calling thread will wait until
 * the matching request completes, then update *cache and return 1.
 */
//从上面的注释中可以看出该函数的作用
static int _cache_check_pending_request_locked( struct resolv_cache** cache, Entry* key, unsigned netid )
{
    struct pending_req_info *ri, *prev;
    int exist = 0;

    if (*cache && key) {
    	//检查pending_request，寻找看下是否有与查询报文hash值一样的结点
        //hash值是基于查询报文内容算出来的，所以hash值相等意味着两次查询完全相同
        ri = (*cache)->pending_requests.next;
        prev = &(*cache)->pending_requests;
        while (ri) {
            if (ri->hash == key->hash) {
                exist = 1;
                break;
            }
            prev = ri;
            ri = ri->next;
        }
		//如果没有找到，说明没有挂起的请求，那么创建一个请求，然后将其加入到pending_request列表中
        if (!exist) {
            ri = calloc(1, sizeof(struct pending_req_info));
            if (ri) {
                ri->hash = key->hash;
                pthread_cond_init(&ri->cond, NULL);
                prev->next = ri;
            }
        } else {
        	//如果找到了，说明之前已经有请求发出去了，所以调用进程无需继续请求，只需要阻塞等待响应结果即可
            struct timespec ts = {0,0};
            XLOG("Waiting for previous request");
            //最多等待20s，该值超过了配置的DNS请求超时时间，应该是足够了
            ts.tv_sec = _time_now() + PENDING_REQUEST_TIMEOUT;
            //调用线程会阻塞到这里
            pthread_cond_timedwait(&ri->cond, &_res_cache_list_lock, &ts);
            /* Must update *cache as it could have been deleted. */
            *cache = _find_named_cache_locked(netid);
        }
    }
	//返回值可以表示是否已经有相同的请求被发送出去了
    return exist;
}

查询失败时cache的处理

从上面的cache查询中，可以看出有些请求是会加入到pending_request中并阻塞等待的，所以如果在res_nsend()中发起了一次DNS查询，但是查询失败了，那么必须将查询失败的结果告诉cache来处理这一事件，这是通过调用_resolv_cache_query_failed()完成的。

/* notify the cache that the query failed */
void _resolv_cache_query_failed( unsigned netid, const void* query, int querylen)
{
    Entry    key[1];
    Cache*   cache;

    if (!entry_init_key(key, query, querylen))
        return;

    pthread_mutex_lock(&_res_cache_list_lock);

    cache = _find_named_cache_locked(netid);

    if (cache) {
    	//前面的步骤已经很熟悉了，重点看这一步
        _cache_notify_waiting_tid_locked(cache, key);
    }

    pthread_mutex_unlock(&_res_cache_list_lock);
}

/* notify any waiting thread that waiting on a request
 * matching the key has been added to the cache */
static void _cache_notify_waiting_tid_locked( struct resolv_cache* cache, Entry* key )
{
    struct pending_req_info *ri, *prev;

    if (cache && key) {
        ri = cache->pending_requests.next;
        prev = &cache->pending_requests;
        while (ri) {
        	//向所有等待本次查询结果的线程发送广播，唤醒这些阻塞的线程
            if (ri->hash == key->hash) {
                pthread_cond_broadcast(&ri->cond);
                break;
            }
            prev = ri;
            ri = ri->next;
        }

        // remove item from list and destroy
        if (ri) {
            prev->next = ri->next;
            pthread_cond_destroy(&ri->cond);
            free(ri);
        }
    }
}

其它

_cache_lookup_p()

前面多次用到该函数，该函数的作用是从整个Cache表(cache参数指定)中寻找是否有指定的缓存项(key参数指定)。

/* This function tries to find a key within the hash table
 * In case of success, it will return a *pointer* to the hashed key.
 * In case of failure, it will return a *pointer* to NULL
 *
 * So, the caller must check '*result' to check for success/failure.
 *
 * The main idea is that the result can later be used directly in
 * calls to _resolv_cache_add or _resolv_cache_remove as the 'lookup'
 * parameter. This makes the code simpler and avoids re-searching
 * for the key position in the htable.
 *
 * The result of a lookup_p is only valid until you alter the hash
 * table.
 */
//见注释，如果找到key，那么返回指向缓存项的指针的地址；如果没有找到，那么返回指向NULL的指针
//也就是说，调用者应该判断*ret，ret为返回值
static Entry** _cache_lookup_p( Cache* cache, Entry* key )
{
	//哈希算法也非常简单，就是求余
    int      index = key->hash % cache->max_entries;
    Entry**  pnode = (Entry**) &cache->entries[ index ];

	//遍历冲突链
    while (*pnode != NULL) {
        Entry*  node = *pnode;

        if (node == NULL)
            break;
		//hash值要一致；查询报文要一致，关于查询报文的比较不再赘述，关心的可以继续往下跟
        if (node->hash == key->hash && entry_equals(node, key))
            break;

        pnode = &node->hlink;
    }
    return pnode;
}

Android DNS之查询结果缓存