统计hdfs中小文件的占比

package kaoshi831;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Options.CreateOpts.BlockSize;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;

//编写程序统计出HDFS文件系统中文件大小小于HDFS集群中的默认块大小的文件占比
public class tongji {

    public static void main(String[] args) throws IOException, InterruptedException, URISyntaxException {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop01:9000"), conf, "hadoop");
        Path path = new Path("/");
        long blockSize;
        int smallfile = 0;
        int filecount = 0;
        RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(path, false);
        while(listFiles.hasNext()){
            filecount++;

            LocatedFileStatus next = listFiles.next();
            //获取每个文件的大小
            long len = next.getLen();
            //获取hdfs的默认块大小
            blockSize = next.getBlockSize();
            //如果小于默认block块的大小，则视为小文件
            if(len < blockSize){
                //指定目录下小文件计数器
                smallfile++;
            }
        }
        //计算小文件的占比，并输出
                String result = (smallfile * 1f /filecount * 100)+"%";
                System.out.println(result);
                System.out.println(blockSize);
        fs.close();
    }

}
统计hdfs中小文件的占比

猜你喜欢