package kaoshi831;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Options.CreateOpts.BlockSize;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
//编写程序统计出HDFS文件系统中文件大小小于HDFS集群中的默认块大小的文件占比
public class tongji {
public static void main(String[] args) throws IOException, InterruptedException, URISyntaxException {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://hadoop01:9000"), conf, "hadoop");
Path path = new Path("/");
long blockSize;
int smallfile = 0;
int filecount = 0;
RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(path, false);
while(listFiles.hasNext()){
filecount++;
LocatedFileStatus next = listFiles.next();
//获取每个文件的大小
long len = next.getLen();
//获取hdfs的默认块大小
blockSize = next.getBlockSize();
//如果小于默认block块的大小,则视为小文件
if(len < blockSize){
//指定目录下小文件计数器
smallfile++;
}
}
//计算小文件的占比,并输出
String result = (smallfile * 1f /filecount * 100)+"%";
System.out.println(result);
System.out.println(blockSize);
fs.close();
}
}
统计hdfs中小文件的占比
猜你喜欢
转载自blog.csdn.net/YZY_001/article/details/82314205
今日推荐
周排行