学习总结篇——HDFS文件系统_2

一、通过API操作HDFS

1、HDFS获取文件系统

//获取文件系统
	@Test
	public void getFileSystem() throws IOException, InterruptedException, URISyntaxException {
		//0、创建配置信息对象
		Configuration configuration = new Configuration();
		//1、获取文件系统
//		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
		FileSystem fs = FileSystem.get(configuration);
		
		//2、打印文件系统
		System.out.println(fs.toString());
		fs.close();
	}

将core-site.xml拷贝到项目的根目录下

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>
<!-- 指定HDFS中NameNode的地址 -->
	<property>
		<name>fs.defaultFS</name>
        <value>hdfs://hadoop102:9000</value>
	</property>

	<!-- 指定hadoop运行时产生文件的存储目录 -->
	<property>
		<name>hadoop.tmp.dir</name>
		<value>/opt/module/hadoop-2.7.2/data/tmp</value>
	</property>
</configuration>

2、HDFS文件上传

//上传文件
	@Test
	public void putFileToHDFS() throws IOException, InterruptedException, URISyntaxException {
		Configuration configuration = new Configuration();
		//1、获取文件系统    获取配置信息的优先级:代码>根目录下配置文件>集群中的配置信息
//		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
		FileSystem fs = FileSystem.get(configuration);
		//2、执行上传文件命令
		fs.copyFromLocalFile(true, new Path("d:/xiyou.txt"),new Path("/user"));
		//3、关闭资源
		fs.close();
	}

3、文件下载

//文件下载
	@Test
	public void getFileFromHDFS() throws Exception, Exception, Exception {
		//1、获取文件系统
		Configuration configuration = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
		//2、执行下载文件命令
		fs.copyToLocalFile(new Path("/user/xiyou.txt"),new Path("d:/xiyou.txt"));
//		fs.copyToLocalFile(true,new Path("/user/xiyou.txt"),new Path("d:/xiyou.txt"),true);
		//3、关流
		fs.close();
	}

4、创建目录

//创建目录
	@Test
	public void mkdirAtHDFS() throws Exception{
		//1、获取文件系统
		Configuration configuration = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
		//2、执行创建文件夹操作
		fs.mkdirs(new Path("/user/atguigu"));
		//3、关闭资源
		fs.close();
	}

5、删除文件夹

//删除文件夹
	@Test
	public void deleteAtHDFS() throws Exception{
		//1、获取文件系统
		Configuration configuration = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
		//2、执行删除操作
		fs.delete(new Path("/user/atguigu"),true);
		//3、关闭资源
		fs.close();
	}

6、更改文件名称

//更改文件名称
	@Test
	public void renameAtHDFS() throws Exception{
		//1、获取文件系统
		Configuration configuration = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
		//2、执行更改名称操作
		fs.rename(new Path("/user/xiyou.txt"),new Path("/user/mjy.txt"));
		//3、关闭资源
		fs.close();
	}

7、查看文件详情

@Test
	public void readFileAtHDFS() throws Exception{
		//1、获取文件系统
		Configuration configuration = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
		//2、执行读取文件操作
		RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"),true);
		while (listFiles.hasNext()) {
			LocatedFileStatus status = listFiles.next();
			//文件名称
			System.out.println(status.getPath().getName());
			//块大小
			System.out.println(status.getBlockSize());
			//文件内容长度
			System.out.println(status.getLen());
			//文件权限
			System.out.println(status.getPermission());
			System.out.println("--------------------------");
			
			//、文件块的具体信息
			BlockLocation[] blockLocations = status.getBlockLocations();
			for(BlockLocation block : blockLocations) {
				System.out.println(block.getOffset()); // 0 1 2 Block
				String[] hosts = block.getHosts();
				for(String host : hosts) {	
					System.out.println(host);			//hadoop102 103 104
				}
			}
		}
		//3、关闭资源
		fs.close();
	}

8、获取文件夹信息

@Test
	public void readfolderAtHDFS() throws Exception, InterruptedException, URISyntaxException {
		//1、获取文件系统
		Configuration configuration = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
		//2、判断是文件夹还是文件
		FileStatus[] listStatus = fs.listStatus(new Path("/user"));
		for(FileStatus status : listStatus) {
			if(status.isFile()) {
				System.out.println("f---"+status.getPath().getName());    //文件
			}else {
				System.out.println("d---"+status.getPath().getName());   //文件夹
			}
		}
		//3、关闭资源
		fs.close();
	}

二、通过IO流操作HDFS

1、HDFS文件上传

@Test
	public void putFileToHDFS() throws IOException, InterruptedException, URISyntaxException {
		//1、获取文件系统
		Configuration configuration = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
		//2、获取输出流
		FSDataOutputStream fos = fs.create(new Path("/user/atguigu/hadoop-2.7.2.tar.gz"));
		//3、获取输入流
		FileInputStream fis = new FileInputStream(new File("e:/hadoop-2.7.2.tar.gz"));
		try {
			//4、流对接
			IOUtils.copyBytes(fis, fos, configuration);
		}catch (Exception e) {
		}finally {
			//5、关闭资源
			IOUtils.closeStream(fis);
			IOUtils.closeStream(fos);
		}
	}

2、下载文件

@Test
	public void getFileFromHDFS() throws IOException, InterruptedException, URISyntaxException {
		//1、获取文件系统
		Configuration configuration = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
		//2、获取输入流
		FSDataInputStream fis = fs.open(new Path("/user/mjy.txt"));
		//3、创建输出流
		FileOutputStream fos = new FileOutputStream(new File("m:/mjy.txt"));
		//4、流的对接
		try {
			IOUtils.copyBytes(fis,fos,configuration);
		} catch (Exception e) {
			// TODO: handle exception
		}finally {
			//5、关闭资源
			IOUtils.closeStream(fis);
			IOUtils.closeStream(fos);
		}
	}

3、下载大文件的第一块数据

@Test
	public void getFileFromHDFSSeek1() throws IOException, InterruptedException, URISyntaxException {
		//1、获取文件系统
		Configuration configuration = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
		//2、获取输入流
		FSDataInputStream fis = fs.open(new Path("/user/atguigu/hadoop-2.7.2.tar.gz"));
		//3、创建输出流
		FileOutputStream fos = new FileOutputStream(new File("e:/hadoop-2.7.2.tar.gz.part1"));
		//4、流对接(只读取128M)
		byte[] buf = new byte[1024];
		for( int i = 0; i < 1024 * 128;i++) {
			fis.read(buf);
			fos.write(buf);
		}
		try {
			//5、关闭资源
			IOUtils.closeStream(fis);
			IOUtils.closeStream(fos);
		} catch (Exception e) {
			// TODO: handle exception
		}
	}

4、下载大文件的第二块数据

@Test
	public void getFileFromHDFSSeek2() throws IOException, InterruptedException, URISyntaxException {
		//1、获取文件系统
		Configuration configuration = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:8020"),configuration,"atguigu");
		//2、获取输入流
		FSDataInputStream fis = fs.open(new Path("/user/atguigu/hadoop-2.7.2.tar.gz"));
		//3、创建输出流
		FileOutputStream fos = new FileOutputStream(new File("e:/hadoop-2.7.2.tar.gz.part2"));
		//4、流对接(指向第二块数据的首地址)
		//定位到128M
		fis.seek(1024*1024*128);
		IOUtils.copyBytes(fis, fos, configuration);
		try {
			//5、关闭资源
			IOUtils.closeStream(fis);
			IOUtils.closeStream(fos);	
		} catch (Exception e) {
			// TODO: handle exception
		}
	}

5、在windows上将两块数据进行合并

type hadoop-2.7.2.tar.gz.part2 >> hadoop-2.7.2.tar.gz.part1

猜你喜欢

转载自blog.csdn.net/qq_40310148/article/details/86546061