简介 : HttpClient
是Apache Jakarta Common下的子项目,用于提供高效的,功能丰富的支持HTTP协议的客户编程工具包,其主要功能如下:
- 实现了所有HTTP的方法 : GET,POST,PUT,HEAD ..
- 支持自动重定向
- 支持HTTPS协议
- 支持代理服务器
关于Http请求的方法说明,参考大佬整理的博客:
https://www.cnblogs.com/williamjie/p/9099940.html
一、环境准备
1 JDK1.8
2 IntelliJ IDEA
3 IDEA自带的Maven
创建Maven工程itcast-crawler-first并给pom.xml加入依赖
<dependencies> <!-- HttpClient --> <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> <version>4.5.3</version> </dependency> <!-- 日志 --> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> <version>1.7.25</version> </dependency> </dependencies>
关于日志的配置文件
log4j.rootLogger=DEBUG,A1
log4j.logger.cn.itcast = DEBUG
log4j.appender.A1=org.apache.log4j.ConsoleAppender
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-d{yyyy-MM-dd HH:mm:ss,SSS} [%t] [%c]-[%p] %m%n
log4j可以将日志以文件的形式输出,也可以输出打印在控制台上,同时可以设置输出的日志内容显示格式、日志文件的生成方式(追加、覆盖、设置日志文件大小等等)。我这里就是直接将日志打印到控制台上。org.apache.log4j.ConsoleAppender
二、编写代码
编写最简单的爬虫,抓取传智播客首页:http://www.itcast.cn/
public class CrawlerFirst { public static void main(String[] args) throws Exception { //1. 打开浏览器,创建HttpClient对象 CloseableHttpClient httpClient = HttpClients.createDefault(); //2. 输入网址,发起get请求创建HttpGet对象 HttpGet httpGet = new HttpGet("http://www.itcast.cn"); //3.按回车,发起请求,返回响应,使用HttpClient对象发起请求 CloseableHttpResponse response = httpClient.execute(httpGet); //4. 解析响应,获取数据 //判断状态码是否是200 if (response.getStatusLine().getStatusCode() == 200) { HttpEntity httpEntity = response.getEntity(); String content = EntityUtils.toString(httpEntity, "utf8"); System.out.println(content); } } }
三、Get请求
package cn.itcast.crawler.test; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import java.io.IOException; public class HttpGetTest { public static void main(String[] args) { //创建HttpClient对象 CloseableHttpClient httpClient = HttpClients.createDefault(); //创建HttpGet对象,设置url访问地址 HttpGet httpGet = new HttpGet("http://www.itcast.cn"); CloseableHttpResponse response = null; try { //使用HttpClient发起请求,获取response response = httpClient.execute(httpGet); //解析响应 if (response.getStatusLine().getStatusCode() == 200) { String content = EntityUtils.toString(response.getEntity(), "utf8"); System.out.println(content.length()); } } catch (IOException e) { e.printStackTrace(); }finally { //关闭response try { response.close(); } catch (IOException e) { e.printStackTrace(); } try { httpClient.close(); } catch (IOException e) { e.printStackTrace(); } } } }
四、带参数的GET请求
public class HttpGetParamTest { public static void main(String[] args) throws Exception { //创建HttpClient对象 CloseableHttpClient httpClient = HttpClients.createDefault(); //设置请求地址是:http://yun.itheima.com/search?keys=Java //创建URIBuilder URIBuilder uriBuilder = new URIBuilder("http://yun.itheima.com/search"); //设置参数 uriBuilder.setParameter("keys","Java"); //创建HttpGet对象,设置url访问地址 HttpGet httpGet = new HttpGet(uriBuilder.build()); System.out.println("发起请求的信息:"+httpGet); CloseableHttpResponse response = null; try { //使用HttpClient发起请求,获取response response = httpClient.execute(httpGet); //解析响应 if (response.getStatusLine().getStatusCode() == 200) { String content = EntityUtils.toString(response.getEntity(), "utf8"); System.out.println(content.length()); } } catch (IOException e) { e.printStackTrace(); }finally { //关闭response try { response.close(); } catch (IOException e) { e.printStackTrace(); } try { httpClient.close(); } catch (IOException e) { e.printStackTrace(); } } } }
五、POST请求
public class HttpPostTest { public static void main(String[] args) { //创建HttpClient对象 CloseableHttpClient httpClient = HttpClients.createDefault(); //创建HttpPost对象,设置url访问地址 HttpPost httpPost = new HttpPost("http://www.itcast.cn"); CloseableHttpResponse response = null; try { //使用HttpClient发起请求,获取response response = httpClient.execute(httpPost); //解析响应 if (response.getStatusLine().getStatusCode() == 200) { String content = EntityUtils.toString(response.getEntity(), "utf8"); System.out.println(content.length()); } } catch (IOException e) { e.printStackTrace(); }finally { //关闭response try { response.close(); } catch (IOException e) { e.printStackTrace(); } try { httpClient.close(); } catch (IOException e) { e.printStackTrace(); } } } }
六、带参数的POST请求
public class HttpPostParamTest { public static void main(String[] args) throws Exception { //创建HttpClient对象 CloseableHttpClient httpClient = HttpClients.createDefault(); //创建HttpPost对象,设置url访问地址 HttpPost httpPost = new HttpPost("http://yun.itheima.com/search"); //声明List集合,封装表单中的参数 List<NameValuePair> params = new ArrayList<NameValuePair>(); //设置请求地址是:http://yun.itheima.com/search?keys=Java params.add(new BasicNameValuePair("keys","Java")); //创建表单的Entity对象,第一个参数就是封装好的表单数据,第二个参数就是编码 UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(params,"utf8"); //设置表单的Entity对象到Post请求中 httpPost.setEntity(formEntity); CloseableHttpResponse response = null; try { //使用HttpClient发起请求,获取response response = httpClient.execute(httpPost); //解析响应 if (response.getStatusLine().getStatusCode() == 200) { String content = EntityUtils.toString(response.getEntity(), "utf8"); System.out.println(content.length()); } } catch (IOException e) { e.printStackTrace(); }finally { //关闭response try { response.close(); } catch (IOException e) { e.printStackTrace(); } try { httpClient.close(); } catch (IOException e) { e.printStackTrace(); } } } }
七、HTTP连接池
如果每次请求都要创建HttpClient,会有频繁创建和销毁的问题,可以使用连接池来解决这个问题。
测试以下代码,并断点查看每次获取的HttpClient都是不一样的。
public class HttpClientPoolTest { public static void main(String[] args) { //创建连接池管理器 PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager(); //设置最大连接数 cm.setMaxTotal(100); //设置每个主机的最大连接数 cm.setDefaultMaxPerRoute(10); //使用连接池管理器发起请求 doGet(cm); doGet(cm); } private static void doGet(PoolingHttpClientConnectionManager cm) { //不是每次创建新的HttpClient,而是从连接池中获取HttpClient对象 CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build(); HttpGet httpGet = new HttpGet("http://www.itcast.cn"); CloseableHttpResponse response = null; try { response = httpClient.execute(httpGet); if (response.getStatusLine().getStatusCode() == 200) { String content = EntityUtils.toString(response.getEntity(), "utf8"); System.out.println(content.length()); } } catch (IOException e) { e.printStackTrace(); }finally { if (response != null) { try { response.close(); } catch (IOException e) { e.printStackTrace(); } //不能关闭HttpClient,由连接池管理HttpClient //httpClient.close(); } } } }