读取hdfs文件上的第二个块的数据

package com.ghgj.cn.zy;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.io.IOUtils;

public class InputSecondBlock {
    //读取第二个块的数据，并输出到hdfs上
    public static void main(String[] args) throws IOException, InterruptedException, URISyntaxException {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop01:9000"), conf, "hadoop");
        //数据路径
        Path p = new Path("/tt/aa/ff.txt");
        //listfiles中可以获取到块的信息
        RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(p, false);
            LocatedFileStatus next = listFiles.next();
            BlockLocation[] bl = next.getBlockLocations();
            long offset = bl[1].getOffset();//获取偏移量
            long length = bl[1].getLength();
        //输入流
        FSDataInputStream in = fs.open(p);
        //设置偏移量
        in.seek(offset);
        //输出流
        FSDataOutputStream out = fs.create(new Path("/test01"));
        IOUtils.copyBytes(in, out, length, true);
    }

}
读取hdfs文件上的第二个块的数据

猜你喜欢