Android Performance 问题方法总结

系统性能问题分 framework 和 kernel 2 层，

framework层的性能问题，主要依赖于android 自带的一些工具和手段，比如，systrace,traceview,android profiler.

kernel 层的性能问题，主要依赖于linux 一些调试工具和手段。比如top, strace, ftrace.

先来说说之前遇到一个整体性能变卡的问题。

对于这种问题，首先需要确认是上层的问题还是kernel层的问题。

判别的具体方法如下：

1. 使用adb shell stop， adb shell start命令判断重启后是否还卡顿。

2. 使用一个简单的命令用strace 对比正常机器是否所有系统调用变慢

strace -c cat /sys/kernel/debug/tracing/tracing_on

strace -tt -T -v -f -p pid -o /sdcard/logname.log

3. 制作简单的循环脚本去执行chmod, cat, ls等命令，与正常机器做对比是否都变慢

如果确认上面几项都变慢，那么基本上可以判定kernel的syscall都变了。

那么在kernel syscall 大部分都变慢的情况下，现在试下来没有什么好的方法，只有通过自己写test case 去确认哪个系统调用变慢，需要结合抓取操作变慢的场景下ramdump分析源码里 code逻辑和变量值。如何写test case?

可以写一个daemon, 循环去执行ls, cat ,chmod等基本命令，跟正常的机器对比，确认出哪个命令差距较大。然后在对该命令做进一步排查

以chmod调用慢为例，首先需要编写一个daemon，也就是一个bin档可以循环去执行chmod, 然后编写一个ko, 去打印chmod源码流程里的每个函数的耗时，来确认哪个函数出了问题，再来看对应

函数可能发生的耗时操作。

当时查到user_path_at->security_path_chmod->security_inode_setattr 这几个函数耗时较长，最后返现跟selinux有关。

所以以selinux为例，

selinux 有这样一个机制，当系统调用去检查selinux policy的时候，首先会检查avc cache里面policy, 如果存在就直接使用avc cache里面的值，如果不存在就会db重新读取到avc cache.

所以这个是跟性能关系很大，是可疑点。

同时也可以用cat /sys/fs/selinux/avc/cache_stats来查看，执行结果如下：

lookups hits misses allocations reclaims frees
1774786 1759569 15217 15604 14509 10688
2134509 2119175 15334 15934 15187 9169
1785290 1770881 14409 14687 14112 17670
1442715 1428358 14357 14978 14528 16211
407859 401975 5884 6009 6016 6408
318615 313979 4636 4668 4576 6008
244824 241135 3689 3783 3760 6642
221858 217765 4093 4252 4288 6614

其中 reclaims 这一项每次增加很多说明就有问题，因为reclaim操作是比较耗时的， misses每次增加很多，也是有影响，但是耗时没reclaim那么严重。

通过结合ramdump分析reclaim次数为什么增加这么多，

static struct avc_node *avc_alloc_node(void)

{

struct avc_node *node;

node = kmem_cache_zalloc(avc_node_cachep, GFP_NOWAIT);

if (!node)

goto out;

INIT_HLIST_NODE(&node->list);

avc_cache_stats_incr(allocations);

if (atomic_inc_return(&avc_cache.active_nodes) > avc_cache_threshold)

avc_reclaim_node();

out:

return node;

}

源码中有写道avc_cache.active_nodes大于某个阈值的时候，就回去reclaim, 那么就需要通过ramdump解析后，查看当时卡顿时候avc_cache.active_nodes变量值，以及avc_cache的状态来判断。

当时确认avc_cache.active_nodes超过了avc_cache_threshold这个阈值512.

原因是上面源码里atomic_inc_return(&avc_cache.active_nodes)有这样一句话，增加了avc_cache.active_nodes值之后，也就是在alloc_node成功之后，并在插入链表失败的情况下，并没有去减1

操作，源码如下：

static struct avc_node *avc_insert(u32 ssid, u32 tsid, u16 tclass,

struct av_decision *avd,

struct avc_xperms_node *xp_node)

{

struct avc_node *pos, *node = NULL;

int hvalue;

unsigned long flag;

if (avc_latest_notif_update(avd->seqno, 1))

goto out;

node = avc_alloc_node();

if (node) {

struct hlist_head *head;

spinlock_t *lock;

int rc = 0;

hvalue = avc_hash(ssid, tsid, tclass);

avc_node_populate(node, ssid, tsid, tclass, avd);

rc = avc_xperms_populate(node, xp_node);

if (rc) {

　　　 //此处应该有减1操作，但并没有

kmem_cache_free(avc_node_cachep, node);

return NULL;

}

head = &avc_cache.slots[hvalue];

lock = &avc_cache.slots_lock[hvalue];

spin_lock_irqsave(lock, flag);

hlist_for_each_entry(pos, head, list) {

if (pos->ae.ssid == ssid &&

pos->ae.tsid == tsid &&

pos->ae.tclass == tclass) {

avc_node_replace(node, pos);

goto found;

}

hlist_add_head_rcu(&node->list, head);

found:

spin_unlock_irqrestore(lock, flag);

}

out:

return node;

}

//插入链表的函数

static int avc_xperms_populate(struct avc_node *node,

struct avc_xperms_node *src)

{

struct avc_xperms_node *dest;

struct avc_xperms_decision_node *dest_xpd;

struct avc_xperms_decision_node *src_xpd;

if (src->xp.len == 0)

return 0;

dest = avc_xperms_alloc();

if (!dest)

return -ENOMEM;

memcpy(dest->xp.drivers.p, src->xp.drivers.p, sizeof(dest->xp.drivers.p));

dest->xp.len = src->xp.len;

/* for each source xpd allocate a destination xpd and copy */

list_for_each_entry(src_xpd, &src->xpd_head, xpd_list) {

dest_xpd = avc_xperms_decision_alloc(src_xpd->xpd.used);

if (!dest_xpd)

goto error;

avc_copy_xperms_decision(&dest_xpd->xpd, &src_xpd->xpd);

list_add(&dest_xpd->xpd_list, &dest->xpd_head);

}

node->ae.xp_node = dest;

return 0;

error:

avc_xperms_free(dest);

return -ENOMEM;

}

所以在插入链表失败之后，应该主动去减1

修改方案如下：

static struct avc_node *avc_insert(u32 ssid, u32 tsid, u16 tclass,

struct av_decision *avd,

struct avc_xperms_node *xp_node)

{

struct avc_node *pos, *node = NULL;

int hvalue;

unsigned long flag;

......

hvalue = avc_hash(ssid, tsid, tclass);

avc_node_populate(node, ssid, tsid, tclass, avd);

rc = avc_xperms_populate(node, xp_node);

if (rc) {

　　　 atomic_dec(&avc_cache.active_nodes);

kmem_cache_free(avc_node_cachep, node);

return NULL;

}

......

found:

spin_unlock_irqrestore(lock, flag);

}

out:

return node;

}

Android Performance 问题方法总结

猜你喜欢