1.too many open files Log Pattern
当一个Android进程打开1024个fd的时候,就会提示too many open files,进程就会crash
system_server crash
#第一种Log pattern
CursorWindow: could not create cursorwindow from parcel due to error -24,process fd count = 0 //error -24 too many open files
Process: system_server
android.database.cursorwindowAllocationException:CursorWindow could not create cursorwindow from binder
at android.database.cursorwindow<init>(CursorWindow.java:137)
at android.database.cursorwindow$1.createFromPacel(CursorWindow.java:685)
#第二种Log pattern
Looper(200): could not create wake pipe. errno=24
#第三种Log pattern
ActivityManager: Activity Manager Crash
ActivityManager: java.lang.RuntimeException: Too many open files
ActivityManager: atandroid.os.Parcel.nativeWriteFileDescriptor(Native Method)
ActivityManager: atandroid.os.Parcel.writeFileDescriptor(Parcel.java:615)
ActivityManager: atandroid.os.ParcelFileDescriptor.writeToParcel(ParcelFileDescriptor.java:975)
ActivityManager: atandroid.app.ActivityManager$TaskThumbnail.writeToParcel(ActivityManager.java:1954)
ActivityManager: at android.app.ActivityManagerNative.onTransact(ActivityManagerNative.java:700)
ActivityManager: atandroid.app.ActivityManagerNativeEx.onTransact(ActivityManagerNativeEx.java:149)
ActivityManager: atcom.android.server.am.ActivityManagerService.onTransact(ActivityManagerService.java:2774)
2.errno意思
@/external/syslinux/dos/errno.h
#define EPERM 1 /* Operation not permitted */
#define ENOENT 2 /* No such file or directory */
#define ESRCH 3 /* No such process */
#define EINTR 4 /* Interrupted system call */
#define EIO 5 /* I/O error */
#define ENXIO 6 /* No such device or address */
#define E2BIG 7 /* Argument list too long */
#define ENOEXEC 8 /* Exec format error */
#define EBADF 9 /* Bad file number */
#define ECHILD 10 /* No child processes */
#define EAGAIN 11 /* Try again */
#define ENOMEM 12 /* Out of memory */
#define EACCES 13 /* Permission denied */
#define EFAULT 14 /* Bad address */
#define ENOTBLK 15 /* Block device required */
#define EBUSY 16 /* Device or resource busy */
#define EEXIST 17 /* File exists */
#define EXDEV 18 /* Cross-device link */
#define ENODEV 19 /* No such device */
#define ENOTDIR 20 /* Not a directory */
#define EISDIR 21 /* Is a directory */
#define EINVAL 22 /* Invalid argument */
#define ENFILE 23 /* File table overflow */
#define EMFILE 24 /* Too many open files */
#define ENOTTY 25 /* Not a typewriter */
#define ETXTBSY 26 /* Text file busy */
#define EFBIG 27 /* File too large */
#define ENOSPC 28 /* No space left on device */
#define ESPIPE 29 /* Illegal seek */
#define EROFS 30 /* Read-only file system */
#define EMLINK 31 /* Too many links */
#define EPIPE 32 /* Broken pipe */
#define EDOM 33 /* Math argument out of domain of func */
#define ERANGE 34 /* Math result not representable */
3.分析方法
(1). 确定是哪类文件打开太多,没有关闭.
* fd leaks, 通常伴随着此进程会出现Java Exception, Native Exception 等. 在mtk 的AEE DB 中, 有一支文件 PROCESS_FILE_STATE 描述, 此进程的打开的所有文件.
查看此文件, 确定哪个或者哪种文件打开数量最多,即追查此类文件打开如此多, 而没有被关闭的原因.
* 如果没有DB, 当发生文件句柄泄露到1024 时, 在L 版本后, 在Kernel Log 中search "FDLEAK", 在L 版本之前, 在Kernel Log 中search "FS_TAG", 即可枚举出所有的此进程所打开的文件.
* 如果问题容易复现,可以直接 adb shell ls -a -l /proc/pid/fd , 直接打印出当前此process 所有打开的文件.
(2). 确定此类文件是在哪里打开.
对于一些确定的文件, 比如/data/data/xxxx_app/yyyy 之类的文件, 通常开发者自己可以快速的确定打开文件的位置,基本上不需要debug.
对于一些另外一些常见的场景说明如下:
* 大批量的打开“anon_inode:[eventpoll]” 和 "pipe", 超过100个eventpoll, 通常情况下是开启了太多的HandlerThread/Looper/MessageQueue, 线程忘记关闭, 或者looper 没有释放. 可以抓取hprof 进行快速分析. 抓取hprof 可以参考
* 对于system server, 如果有大批量的socket 打开, 可能是因为Input Channel 没有关闭, 此类同样抓取hprof, 查看system server 中WindowState 的情况.
* 大批量的打开“/dev/ashmem”, 如果是Context provider, 或者其他app, 很可能是打开数据库没有关闭, 或者数据库链接频繁打开忘记关闭.
tips:意思就是说需要到PROCESS_FILE_STATUS 或者 adb shell ls -a -l /proc/pid/fd来查看是那种类型
泄露或者文件泄露。
4.Android O 新增OpenFdMonitor来dump fdleak信息
1.实现位置
xref: /frameworks/base/services/core/java/com/android/server/Watchdog.java
public class Watchdog extends Thread //system_server thread
2.定义和初始化
//定义
final OpenFdMonitor mOpenFdMonitor;
//初始化watchdog时,初始化mOpenFdMonitor
private Watchdog() {
super("watchdog");
// Initialize monitor for Binder threads.
addMonitor(new BinderThreadMonitor());
mOpenFdMonitor = OpenFdMonitor.create();
}
3.OpenFdMonitor分析
public static final class OpenFdMonitor {
/**
* Number of FDs below the soft limit that we trigger a runtime restart at. This was
* chosen arbitrarily, but will need to be at least 6 in order to have a sufficient number
* of FDs in reserve to complete a dump.
*/
private static final int FD_HIGH_WATER_MARK = 12;
private final File mDumpDir;
private final File mFdHighWaterMark;
public static OpenFdMonitor create() {
// Only run the FD monitor on debuggable builds (such as userdebug and eng builds).
if (!Build.IS_DEBUGGABLE) { //user版本不支持
return null;
}
// Don't run the FD monitor on builds that have a global ANR trace file. We're using
// the ANR trace directory as a quick hack in order to get these traces in bugreports
// and we wouldn't want to overwrite something important.
final String dumpDirStr = SystemProperties.get("dalvik.vm.stack-trace-dir", "");
if (dumpDirStr.isEmpty()) {
return null;
}
final StructRlimit rlimit;
try {
rlimit = android.system.Os.getrlimit(OsConstants.RLIMIT_NOFILE);
} catch (ErrnoException errno) {
Slog.w(TAG, "Error thrown from getrlimit(RLIMIT_NOFILE)", errno);
return null;
}
// The assumption we're making here is that FD numbers are allocated (more or less)
// sequentially, which is currently (and historically) true since open is currently
// specified to always return the lowest-numbered non-open file descriptor for the
// current process.
//
// We do this to avoid having to enumerate the contents of /proc/self/fd in order to
// count the number of descriptors open in the process.
final File fdThreshold = new File("/proc/self/fd/" + (rlimit.rlim_cur - FD_HIGH_WATER_MARK));
return new OpenFdMonitor(new File(dumpDirStr), fdThreshold);
}
OpenFdMonitor(File dumpDir, File fdThreshold) {
mDumpDir = dumpDir;
mFdHighWaterMark = fdThreshold;
}
private void dumpOpenDescriptors() {
try {
File dumpFile = File.createTempFile("anr_fd_", "", mDumpDir); //dumpFile 目录 /data/anr/anr_fd_xxx.temp
java.lang.Process proc = new ProcessBuilder()
.command("/system/bin/lsof", "-p", String.valueOf(Process.myPid())) //lsof -p pid等同于adb shell ls -a -l /proc/pid/fd
.redirectErrorStream(true)
.redirectOutput(dumpFile)
.start();
int returnCode = proc.waitFor();
if (returnCode != 0) {
Slog.w(TAG, "Unable to dump open descriptors, lsof return code: "
+ returnCode);
dumpFile.delete();
}
} catch (IOException | InterruptedException ex) {
Slog.w(TAG, "Unable to dump open descriptors: " + ex);
}
}
/**
* @return {@code true} if the high water mark was breached and a dump was written,
* {@code false} otherwise.
*/
public boolean monitor() {
if (mFdHighWaterMark.exists()) {
dumpOpenDescriptors();
return true;
}
return false;
}
}
4.watchdog线程启动时,每30s检测一次fd
@Override
public void run() {
boolean waitedHalf = false;
while (true) {
synchronized (this) {
long start = SystemClock.uptimeMillis();
boolean fdLimitTriggered = false;
if (mOpenFdMonitor != null) { //user版本mOpenFdMonitor = null,就不支持此功能
fdLimitTriggered = mOpenFdMonitor.monitor();
}
if (!fdLimitTriggered) { //fdLimitTriggered为true时,跳过watchdog waitState检测,直接触发watchdog
final int waitState = evaluateCheckerCompletionLocked();
if (waitState == COMPLETED) {
} else if (waitState == WAITING) {
} else if (waitState == WAITED_HALF) {
}
} else {
blockedCheckers = Collections.emptyList();
subject = "Open FD high water mark reached"; //device log就会显示*** WATCHDOG KILLING SYSTEM PROCESS: Open FD high water mark reached
}
}
Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + subject);
}
}
5.monitor
public boolean monitor() {
//true时表示final File fdThreshold = /proc/self/fd/1012存在,意味着一家超过1012个fd
if (mFdHighWaterMark.exists()) {
dumpOpenDescriptors(); //dump command("/system/bin/lsof", "-p", String.valueOf(Process.myPid(
return true;
}
return false;
}
5.user添加dump hprof方法
修改如下:
public static OpenFdMonitor create() {
// Only run the FD monitor on debuggable builds (such as userdebug and eng builds).
//if (!Build.IS_DEBUGGABLE) {
// return null;
//}
}
2.添加dump hprof
public void dumpphprof() {
SimpleDateFormat mDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String filename = mDateFormat.format(new Date());
filename = filename.replaceAll("","_");
try{
Debug.dumpHprofData("/data/anr/" + filename + ".hprof");
}catch(IOException e){
Slog.w(TAG,"system server heap dump failed");
}
}
3.添加到monitor方法
public boolean monitor() {
if (mFdHighWaterMark.exists()) {
dumpOpenDescriptors();
dumpphprof(); //add this
return true;
}
return false;
}