本项目重点在爬虫,难点也在爬虫.由于此前并未接触过爬虫,所以爬虫的相关代码是从网上得到的.
1.首先需要导入fastjson,jsoup两个jar包.
2.编写爬虫方法.
此段代码可以爬取数据并导入数据库,之后的操作同上一篇日志.
package main; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.text.SimpleDateFormat; import java.util.Date; import java.util.HashMap; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import main.CollectDataClass; import java.io.IOException; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import javax.servlet.ServletException; import javax.servlet.annotation.WebServlet; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import javax.servlet.http.HttpSession; import javax.net.ssl.HttpsURLConnection; import org.jsoup.Jsoup; import com.alibaba.fastjson.JSONArray; public class CollectDataClass { public CollectDataClass() throws IOException { try { getAreaStat(); } catch (ClassNotFoundException e) { // TODO 自动生成的 catch 块 e.printStackTrace(); } } // 根URL private static String httpRequset(String requesturl) throws IOException { StringBuffer buffer = null; BufferedReader bufferedReader = null; InputStreamReader inputStreamReader = null; InputStream inputStream = null; HttpsURLConnection httpsURLConnection = null; try { URL url = new URL(requesturl); httpsURLConnection = (HttpsURLConnection) url.openConnection(); httpsURLConnection.setDoInput(true); httpsURLConnection.setRequestMethod("GET"); inputStream = httpsURLConnection.getInputStream(); inputStreamReader = new InputStreamReader(inputStream, "utf-8"); bufferedReader = new BufferedReader(inputStreamReader); buffer = new StringBuffer(); String str = null; while ((str = bufferedReader.readLine()) != null) { buffer.append(str); } } catch (MalformedURLException e) { // TODO Auto-generated catch block e.printStackTrace(); } return buffer.toString(); } /** * 获取全国各个省市的确诊、死亡和治愈人数 * * @return * @throws ClassNotFoundException */ public static String getAreaStat() throws ClassNotFoundException { String url = "https://ncov.dxy.cn/ncovh5/view/pneumonia"; String htmlResult = ""; try { htmlResult = httpRequset(url); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } // System.out.println(htmlResult); // 正则获取数据 // 因为html的数据格式看着就像json格式,所以我们正则获取json String reg = "window.getAreaStat = (.*?)\\}(?=catch)"; Pattern totalPattern = Pattern.compile(reg); Matcher totalMatcher = totalPattern.matcher(htmlResult); String result = ""; if (totalMatcher.find()) { result = totalMatcher.group(1); System.out.println(result); // 各个省市的是一个列表List,如果想保存到数据库中,要遍历结果,下面是demo JSONArray array = JSONArray.parseArray(result); Connection con = null; String urls = "jdbc:mysql://localhost:3306/cs?&useSSL=false&serverTimezone=UTC&useUnicode=yes&characterEncoding=utf8"; try { Class.forName("com.mysql.cj.jdbc.Driver"); con = DriverManager.getConnection(urls, "root", "root") ; }catch (ClassNotFoundException e) { System.out.println("加载驱动失败"); } catch (SQLException e) { System.out.println("连接数据库失败"); } SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");//设置日期格式 String date=new String(df.format(new Date())); int id=1001; PreparedStatement ps = null; PreparedStatement ps2 = null; for (int i = 0; i <= 30; i++) { com.alibaba.fastjson.JSONObject jsonObject = com.alibaba.fastjson.JSONObject .parseObject(array.getString(i)); String provinceName = jsonObject.getString("provinceName"); String current = jsonObject.getString("currentConfirmedCount"); String confirmed = jsonObject.getString("confirmedCount"); String cured = jsonObject.getString("curedCount"); String dead = jsonObject.getString("deadCount"); String suspect = jsonObject.getString("suspectedCount"); String City=new String(""); id++; String code=new String("0"); try { String sql = "INSERT INTO info1(Id,Date,Province,City,Confirmed_num,Yisi_num,Cured_num,Dead_num,Code) VALUES (?,?,?,?,?,?,?,?,?)"; ps = con.prepareStatement(sql); ps.setInt(1, id); ps.setString(2, date); ps.setString(3, provinceName); ps.setString(4, City); ps.setString(5, confirmed); ps.setString(6, suspect); ps.setString(7, cured); ps.setString(8, dead); ps.setString(9, code); System.out.println(1); int row=0; row = ps.executeUpdate(); // if (row > 0) // System.out.println("添加" + row + "对象"); }catch (SQLException e) { System.out.println("添加失败"); } JSONArray array2 = jsonObject.getJSONArray("cities"); for (int j = 0; j < array2.size(); j++) { com.alibaba.fastjson.JSONObject jsonObject2 = com.alibaba.fastjson.JSONObject .parseObject(array2.getString(j)); id++; String cityname = jsonObject2.getString("cityName"); String current2 = jsonObject2.getString("currentConfirmedCount"); String confirmed2 = jsonObject2.getString("confirmedCount"); String cured2 = jsonObject2.getString("curedCount"); String dead2 = jsonObject2.getString("deadCount"); String suspect2 = jsonObject2.getString("suspectedCount"); System.out.println(); try { String sql = "INSERT INTO info1(Id,Date,Province,City,Confirmed_num,Yisi_num,Cured_num,Dead_num,Code) VALUES (?,?,?,?,?,?,?,?,?)"; ps2 = con.prepareStatement(sql); ps2.setInt(1, id); ps2.setString(2, date); ps2.setString(3, provinceName); ps2.setString(4, cityname); ps2.setString(5, confirmed2); ps2.setString(6, suspect2); ps2.setString(7, cured2); ps2.setString(8, dead2); ps2.setString(9, code); System.out.println(1); int row=0; row = ps2.executeUpdate(); if (row > 0) // System.out.println("添加" + row + "对象"); }catch (SQLException e) { System.out.println("添加失败"); } } } } return result; } }