#源码
package com.sxt.location;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
/*
*
*网络爬虫 +模拟浏览器
*
*作者:不忘初心
*
*/
public class SpiderTest01 {
public static void main(String[] args) throws IOException {
//获取URL
URL url=new URL("https://www.dianping.com");
//下载资源
//InputStream is =url.openStream();
HttpURLConnection conn=(HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
conn.setRequestProperty("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36");
BufferedReader br =new BufferedReader( new InputStreamReader(conn.getInputStream(),"UTF-8"));
String msg=null;
while(null!=(msg=br.readLine())) {
System.out.println(msg);
}
br.close();
//分析
//处理
}
}
##截取一部分爬取到的HTML文件
#参考文献
【1】java核心技术卷一
【2】裴新,java300集