/**
* 我们利用通配符和PathFilter 对象,将本地多种格式的文件上传至 HDFS文件系统,并过滤掉 txt文本格式以外的文件。
*/
import java.io.IOException;
importjava.net.URI;
importjava.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
public class CopyManyFilesToHdfs {
protected static FileSystem fsDst;
protected static FileSystem fsLocal;
public static void main(String[] args) throws IOException, URISyntaxException {
//全路径和相对路径都可
//String dstPath = "/middle/filter/";
String dstPath = "hdfs://dajiangtai:9000/middle/filter/";
FilterUpfiles(dstPath);
}
public static void FilterUpfiles(String dstPath) throws IOException, URISyntaxException{
Configuration conf = new Configuration();
// fsDst=FileSystem.get(new URI("hdfs://dajiangtai:9000"), conf);//可以
fsDst=FileSystem.get(new URI("hdfs://dajiangtai:9000/"), conf);//可以
// fsDst=FileSystem.get(new URI("hdfs://dajiangtai:9000/middle/filter/"), conf);//居然也能成功
fsLocal=FileSystem.getLocal(conf);
//此处不能用listStatus只能用globStatus来处理文件组
// FileStatus[] listStatus = fsLocal.listStatus(new Path("D://大数据文件/ppt/data/205/205_data/*"), new RegexAcceptPathFilter("^.*txt$"));//匹配符的问题
FileStatus[] listStatus = fsLocal.globStatus(new Path("D://大数据文件/ppt/data/205/205_data/*"), new RegexAcceptPathFilter("^.*txt$"));//匹配符的问题
Path[] paths = FileUtil.stat2Paths(listStatus);
for (Path path : paths) {
fsDst.copyFromLocalFile(path, new Path(dstPath));
}
}
public static class RegexAcceptPathFilter implements PathFilter{
private final String regex;
public RegexAcceptPathFilter(String regex) {
super();
this.regex = regex;
}
@Override
public boolean accept(Path path) {
boolean flag=path.toString().matches(regex);
return flag;
}
}
}