连接池
1、为什么要用连接池
如果每一次请求都要创建HttpClient,会有频繁创建和销毁的问题,可以使用连接池来解决这个问题。
如何创建项目请看:https://blog.csdn.net/weixin_44588495/article/details/90580722
2、创建连接池对象
利用PoolingHttpClientConnectionManager类创建。必须要设置最大的连接数和每一个主机的最大连接数。原因就是如果同时爬取百度新闻和淘宝的数据的话,若爬取百度的连接数为100,淘宝的连接数就为0了。
PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager();
//设置最大连接数
cm.setMaxTotal(100);
//设置每个主机最大连接数
cm.setDefaultMaxPerRoute(10);
3、创建doGet方法
public static void doGet(PoolingHttpClientConnectionManager cm){
//从连接池中拿取httpClient对象
CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();
CloseableHttpResponse response = null;
try {
response = httpClient.execute(httpGet);
if(response.getStatusLine().getStatusCode() == 200){
HttpEntity httpEntity = response.getEntity();
String content = EntityUtils.toString(httpEntity,"utf-8");
System.out.println(content);
}
} catch (IOException e) {
e.printStackTrace();
}finally {
try {
response.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
httpClient.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
4、完整的代码如下:
package com.crawler;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.util.EntityUtils;
import java.io.IOException;
public class HttpClientPoolTest {
public static void main(String[] args) {
//创建连接池管理器
PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager();
//设置最大连接数
cm.setMaxTotal(100);
//设置每个主机最大连接数
cm.setDefaultMaxPerRoute(10);
//doGet请求
doGet(cm);
doGet(cm);
}
public static void doGet(PoolingHttpClientConnectionManager cm){
//从连接池中拿取httpClient对象
CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();
//输入网址,创建发起Get请求,创建HttpGet对象
HttpGet httpGet = new HttpGet("http://www.itcast.cn");
CloseableHttpResponse response = null;
try {
response = httpClient.execute(httpGet);
if(response.getStatusLine().getStatusCode() == 200){
HttpEntity httpEntity = response.getEntity();
String content = EntityUtils.toString(httpEntity,"utf-8");
System.out.println(content.length());
}
} catch (IOException e) {
e.printStackTrace();
}finally {
if (response != null){
try {
response.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
5、设置请求的时间
最后将该代码块放置在doPost方法内
//配置请求的信息
RequestConfig config = RequestConfig.custom().setConnectTimeout(1000) //设置连接最长时间
.setConnectionRequestTimeout(500) //设置获取连接最长时间
.setSocketTimeout(10 * 1000) //设置数据传输最长时间
.build();
//给请求设置配置信息
httpGet.setConfig(config);