一、通过API操作HDFS
1、HDFS获取文件系统
//获取文件系统
@Test
public void getFileSystem() throws IOException, InterruptedException, URISyntaxException {
//0、创建配置信息对象
Configuration configuration = new Configuration();
//1、获取文件系统
// FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
FileSystem fs = FileSystem.get(configuration);
//2、打印文件系统
System.out.println(fs.toString());
fs.close();
}
将core-site.xml拷贝到项目的根目录下
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- 指定HDFS中NameNode的地址 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop102:9000</value>
</property>
<!-- 指定hadoop运行时产生文件的存储目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/module/hadoop-2.7.2/data/tmp</value>
</property>
</configuration>
2、HDFS文件上传
//上传文件
@Test
public void putFileToHDFS() throws IOException, InterruptedException, URISyntaxException {
Configuration configuration = new Configuration();
//1、获取文件系统 获取配置信息的优先级:代码>根目录下配置文件>集群中的配置信息
// FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
FileSystem fs = FileSystem.get(configuration);
//2、执行上传文件命令
fs.copyFromLocalFile(true, new Path("d:/xiyou.txt"),new Path("/user"));
//3、关闭资源
fs.close();
}
3、文件下载
//文件下载
@Test
public void getFileFromHDFS() throws Exception, Exception, Exception {
//1、获取文件系统
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
//2、执行下载文件命令
fs.copyToLocalFile(new Path("/user/xiyou.txt"),new Path("d:/xiyou.txt"));
// fs.copyToLocalFile(true,new Path("/user/xiyou.txt"),new Path("d:/xiyou.txt"),true);
//3、关流
fs.close();
}
4、创建目录
//创建目录
@Test
public void mkdirAtHDFS() throws Exception{
//1、获取文件系统
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
//2、执行创建文件夹操作
fs.mkdirs(new Path("/user/atguigu"));
//3、关闭资源
fs.close();
}
5、删除文件夹
//删除文件夹
@Test
public void deleteAtHDFS() throws Exception{
//1、获取文件系统
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
//2、执行删除操作
fs.delete(new Path("/user/atguigu"),true);
//3、关闭资源
fs.close();
}
6、更改文件名称
//更改文件名称
@Test
public void renameAtHDFS() throws Exception{
//1、获取文件系统
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
//2、执行更改名称操作
fs.rename(new Path("/user/xiyou.txt"),new Path("/user/mjy.txt"));
//3、关闭资源
fs.close();
}
7、查看文件详情
@Test
public void readFileAtHDFS() throws Exception{
//1、获取文件系统
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
//2、执行读取文件操作
RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"),true);
while (listFiles.hasNext()) {
LocatedFileStatus status = listFiles.next();
//文件名称
System.out.println(status.getPath().getName());
//块大小
System.out.println(status.getBlockSize());
//文件内容长度
System.out.println(status.getLen());
//文件权限
System.out.println(status.getPermission());
System.out.println("--------------------------");
//、文件块的具体信息
BlockLocation[] blockLocations = status.getBlockLocations();
for(BlockLocation block : blockLocations) {
System.out.println(block.getOffset()); // 0 1 2 Block
String[] hosts = block.getHosts();
for(String host : hosts) {
System.out.println(host); //hadoop102 103 104
}
}
}
//3、关闭资源
fs.close();
}
8、获取文件夹信息
@Test
public void readfolderAtHDFS() throws Exception, InterruptedException, URISyntaxException {
//1、获取文件系统
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
//2、判断是文件夹还是文件
FileStatus[] listStatus = fs.listStatus(new Path("/user"));
for(FileStatus status : listStatus) {
if(status.isFile()) {
System.out.println("f---"+status.getPath().getName()); //文件
}else {
System.out.println("d---"+status.getPath().getName()); //文件夹
}
}
//3、关闭资源
fs.close();
}
二、通过IO流操作HDFS
1、HDFS文件上传
@Test
public void putFileToHDFS() throws IOException, InterruptedException, URISyntaxException {
//1、获取文件系统
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
//2、获取输出流
FSDataOutputStream fos = fs.create(new Path("/user/atguigu/hadoop-2.7.2.tar.gz"));
//3、获取输入流
FileInputStream fis = new FileInputStream(new File("e:/hadoop-2.7.2.tar.gz"));
try {
//4、流对接
IOUtils.copyBytes(fis, fos, configuration);
}catch (Exception e) {
}finally {
//5、关闭资源
IOUtils.closeStream(fis);
IOUtils.closeStream(fos);
}
}
2、下载文件
@Test
public void getFileFromHDFS() throws IOException, InterruptedException, URISyntaxException {
//1、获取文件系统
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
//2、获取输入流
FSDataInputStream fis = fs.open(new Path("/user/mjy.txt"));
//3、创建输出流
FileOutputStream fos = new FileOutputStream(new File("m:/mjy.txt"));
//4、流的对接
try {
IOUtils.copyBytes(fis,fos,configuration);
} catch (Exception e) {
// TODO: handle exception
}finally {
//5、关闭资源
IOUtils.closeStream(fis);
IOUtils.closeStream(fos);
}
}
3、下载大文件的第一块数据
@Test
public void getFileFromHDFSSeek1() throws IOException, InterruptedException, URISyntaxException {
//1、获取文件系统
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
//2、获取输入流
FSDataInputStream fis = fs.open(new Path("/user/atguigu/hadoop-2.7.2.tar.gz"));
//3、创建输出流
FileOutputStream fos = new FileOutputStream(new File("e:/hadoop-2.7.2.tar.gz.part1"));
//4、流对接(只读取128M)
byte[] buf = new byte[1024];
for( int i = 0; i < 1024 * 128;i++) {
fis.read(buf);
fos.write(buf);
}
try {
//5、关闭资源
IOUtils.closeStream(fis);
IOUtils.closeStream(fos);
} catch (Exception e) {
// TODO: handle exception
}
}
4、下载大文件的第二块数据
@Test
public void getFileFromHDFSSeek2() throws IOException, InterruptedException, URISyntaxException {
//1、获取文件系统
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
//2、获取输入流
FSDataInputStream fis = fs.open(new Path("/user/atguigu/hadoop-2.7.2.tar.gz"));
//3、创建输出流
FileOutputStream fos = new FileOutputStream(new File("e:/hadoop-2.7.2.tar.gz.part2"));
//4、流对接(指向第二块数据的首地址)
//定位到128M
fis.seek(1024*1024*128);
IOUtils.copyBytes(fis, fos, configuration);
try {
//5、关闭资源
IOUtils.closeStream(fis);
IOUtils.closeStream(fos);
} catch (Exception e) {
// TODO: handle exception
}
}
5、在windows上将两块数据进行合并
type hadoop-2.7.2.tar.gz.part2 >> hadoop-2.7.2.tar.gz.part1