HDFS 使用文件模式,实现多文件上传至HDFS

/**

 * 我们利用通配符和PathFilter 对象,将本地多种格式的文件上传至 HDFS文件系统,并过滤掉 txt文本格式以外的文件。

 */

import java.io.IOException;

importjava.net.URI;

importjava.net.URISyntaxException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileStatus;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.FileUtil;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.fs.PathFilter;

public class CopyManyFilesToHdfs {

    protected static FileSystem fsDst;

    protected static FileSystem fsLocal;

    public static void main(String[] args) throws IOException, URISyntaxException {

                        //全路径和相对路径都可

        //String dstPath = "/middle/filter/";

        String dstPath = "hdfs://dajiangtai:9000/middle/filter/";

        FilterUpfiles(dstPath);

    }

    public static void FilterUpfiles(String dstPath) throws IOException, URISyntaxException{

        Configuration conf = new Configuration();

//      fsDst=FileSystem.get(new URI("hdfs://dajiangtai:9000"), conf);//可以

        fsDst=FileSystem.get(new URI("hdfs://dajiangtai:9000/"), conf);//可以

//      fsDst=FileSystem.get(new URI("hdfs://dajiangtai:9000/middle/filter/"), conf);//居然也能成功

        fsLocal=FileSystem.getLocal(conf);

        //此处不能用listStatus只能用globStatus来处理文件组

//      FileStatus[] listStatus = fsLocal.listStatus(new Path("D://大数据文件/ppt/data/205/205_data/*"), new RegexAcceptPathFilter("^.*txt$"));//匹配符的问题

        FileStatus[] listStatus = fsLocal.globStatus(new Path("D://大数据文件/ppt/data/205/205_data/*"), new RegexAcceptPathFilter("^.*txt$"));//匹配符的问题

        Path[] paths = FileUtil.stat2Paths(listStatus);

        for (Path path : paths) {

            fsDst.copyFromLocalFile(path, new Path(dstPath));

        }

    }

    public static class RegexAcceptPathFilter implements PathFilter{

        private final String regex;

        

        public RegexAcceptPathFilter(String regex) {

            super();

            this.regex = regex;

        }

        @Override

        public boolean accept(Path path) {

            boolean flag=path.toString().matches(regex);

            return flag;

        }

    }

}

猜你喜欢

转载自blog.csdn.net/lrf2454224026/article/details/82049001