1.编写一个Tool用来解析url获取文本
(注:从博客列表点击某条博客进去时,记得要先设置请求头,也就是setHead()那部分内容,需要根据自己的信息进行编写,这里我已经将个人的删除,因为其中加载的是动态数据)
package 刷博客阅读量;
import org.apache.http.Consts;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.params.CookiePolicy;
import org.apache.http.client.params.HttpClientParams;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
/**
* 1.创建HttpUtil类来写一个简单的http请求方法,访问地址
*/
public class Tool {
// 获取页面数据
//这个是获取博客的列表的url
public static String doGet(String url) {
String body = "";
//连接
HttpClient httpClient = HttpClientBuilder.create().build();
// HttpClientParams.setCookiePolicy(httpClient.getParams(), CookiePolicy.BROWSER_COMPATIBILITY);
//请求
HttpGet httpGet = new HttpGet(url);
//设置浏览器代理
httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36");
try{
//响应
HttpResponse httpResponse = httpClient.execute(httpGet);
HttpEntity httpEntity = httpResponse.getEntity();
body = EntityUtils.toString(httpEntity, Consts.UTF_8);
// System.out.println("body:"+body);
//
// parse(body);
//释放连接
httpGet.releaseConnection();
} catch (IOException e) {
e.printStackTrace();
}
return body;
}
// 获取页面数据
//这个跟前面的doGet是一样的,只是输出内容过多,debug测试加的
//我把他加进来容易修改点,若不需要可以删掉,并在第三步将doget2()改为doget()
public static String doGet2(String url) {
String body = "";
//连接
HttpClient httpClient = HttpClientBuilder.create().build();
// HttpClientParams.setCookiePolicy(httpClient.getParams(), CookiePolicy.BROWSER_COMPATIBILITY);
//请求
HttpGet httpGet = new HttpGet(url);
//设置浏览器代理
// httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36");
//设置头部信息进行模拟登录(添加登录后的Cookie)
httpGet.setHeader("Accept", "");
httpGet.setHeader("Accept-Encoding", "");
httpGet.setHeader("Accept-Language", "");
httpGet.setHeader("Cookie", "");
httpGet.setHeader("User-Agent", "");
try{
//响应
HttpResponse httpResponse = httpClient.execute(httpGet);
HttpEntity httpEntity = httpResponse.getEntity();
body = EntityUtils.toString(httpEntity, Consts.UTF_8);
// System.out.println("body:"+body);
//
// parse(body);
//释放连接
httpGet.releaseConnection();
} catch (IOException e) {
e.printStackTrace();
}
return body;
}
// 获取a标签
public static Elements parse(String html) {
//解析html获取Document
Document doc= Jsoup.parse(html);
//获取spu
Elements eles=doc.select("h4>a");
return eles;
}
}
2.开启Springboot的定时调度
package 刷博客阅读量;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.scheduling.annotation.EnableScheduling;
/**
* 2.创建定时任务Scheduled,每隔1分钟来请求一次去访问博客
* 可以用Spirng的@Scheduled来完成这个定时任务,我的项目是Spring boot,
* 在启动类DemoApplication上方加上@EnableScheduling 来开启定时任务。
*/
@SpringBootApplication
@EnableScheduling//开启定时任务
public class DemoApplication {
public static void main(String[] args) {
SpringApplication.run(DemoApplication.class, args);
}
}
3.编写定时调度的内容
(记得把url改成要查找的地址!)
package 刷博客阅读量;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import static 刷博客阅读量.Tool.doGet;
import static 刷博客阅读量.Tool.doGet2;
import static 刷博客阅读量.Tool.parse;
/**
* 3.创建一个SchedulingTest类来完成这个任务
*/
@Component
public class SchedulingTest {
private int i = 0;
@Scheduled(fixedRate = 60 * 1000)//具体时间间隔,60*1000也就是1分钟执行一次
void doSomethingWith() {
String url = "个人查找的url地址";
String body=Tool.doGet(url);
Elements elements=parse(body);
for (Element ele:elements)
{
String urlEle=ele.attr("href");
System.out.println("urlEle:"+urlEle);
doGet2(urlEle);
}
i++;
System.out.println("第" + i + "次访问");
}
}
4.编写pom文件
(emm…这个才是第一步)
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.itcast</groupId>
<artifactId>algorithm</artifactId>
<version>1.0-SNAPSHOT</version>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.1.6.RELEASE</version>
<!--<relativePath/> <!– lookup parent from repository –>-->
</parent>
<dependencies>
<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.3</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpcore -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<version>4.4.10</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.3</version>
</dependency>
</dependencies>
</project>