/*
* Created by JFormDesigner on Fri Aug 21 17:15:14 CST 2020
*/
package josupa;
import java.awt.*;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import javax.swing.*;
/**
* @author 1
*/
public class JsoupJframeTest extends JFrame {
public static void main(String[] args) {
new JsoupJframeTest().setVisible(true);
}
public JsoupJframeTest() {
initComponents();
setTitle("大众点评"); // 标题
setResizable(false); // 固定窗体
//setDefaultCloseOperation(JFrame.DO_NOTHING_ON_CLOSE); //退出窗口不适用
setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); //关闭窗口 ,退出进程
//显示屏幕中央
int width = 640; //宽度
int height = 535; // 高度
Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();
/** 屏幕宽度 */
int screenWidth = screenSize.width;
/** 屏幕高度 */
int screenHeight = screenSize.height;
setLocation((screenWidth - width) / 2, (screenHeight - height) / 2);
setMinimumSize(new Dimension(width,height)); //窗体大小
}
private void initComponents() {
// JFormDesigner - Component initialization - DO NOT MODIFY //GEN-BEGIN:initComponents
textField1 = new JTextField();
button1 = new JButton();
scrollPane1 = new JScrollPane();
//table1 = new JTable();
label1 = new JLabel();
label2 = new JLabel();
textField2 = new JTextField();
textField1.setText("http://www.dianping.com/shanghai/ch10/g101p");
textField2.setText("navCtgScroll=100; _lxsdk_cuid=17212861813c8-0732406bf2d0a6-3e385b04-100200-1721286181366; _lxsdk=17212861813c8-0732406bf2d0a6-3e385b04-100200-1721286181366; _hc.v=1045e80c-8a95-04c7-8a3c-da114313dede.1589448678; fspop=test; cy=1; cye=shanghai; s_ViewType=10; thirdtoken=5a77eb50-3768-4f5a-b331-90bf1fb7e12d; _thirdu.c=7bfa71bdc942c0382a17cd44b3c5fc38; Hm_lvt_602b80cf8079ae6591966cc70a3940e7=1597992820,1597993916; dplet=14e36aa35571643cec9cde0bde90b12d; dper=95aa894edc5da0bbd2a32fcd5680a3065ee3a929c2374e86c5a12a28aa60606efbf406133a910d1804608a392c9d47a0e922a664a5603ce174b2035990ab20dee5f279c08ffef0fd590811d1dcc0feb5b10d0c934c29521d819e3d47fbc6be61; ll=7fd06e815b796be3df069dec7836c3df; ua=dpuser_5993284789; ctu=d8f4e03e3e7b70b44786831a696b2b5df41365da651e001c6f6bb6696ff5c7fd; uamo=15601946082; Hm_lpvt_602b80cf8079ae6591966cc70a3940e7=1597994247; _lxsdk_s=1740fcb5c22-cb0-849-815%7C%7C629");
//======== this ========
Container contentPane = getContentPane();
contentPane.setLayout(null);
contentPane.add(textField1);
textField1.setBounds(75, 45, 430, 30);
//---- button1 ----
button1.setText("\u6293\u53d6");
contentPane.add(button1);
button1.setBounds(new Rectangle(new Point(535, 50), button1.getPreferredSize()));
String[][] playerInfo = new String[500][3];
List<String []> listStr = new ArrayList<>();
button1.addActionListener(new ActionListener() {
@Override
public void actionPerformed(ActionEvent e) {
String url = textField1.getText();
String cookie = textField2.getText();
if(url == null || "".equals(url)){
JOptionPane.showMessageDialog(null,"抓取地址不存在");
return;
}
if(cookie == null || "".equals(cookie)){
JOptionPane.showMessageDialog(null,"cookie不存在");
return;
}
try {
JsoupTest.sendHtml(url,cookie);
} catch (IOException ex) {
ex.printStackTrace();
}
List<Map> list = JsoupTest.list;
if(list == null){
JOptionPane.showMessageDialog(null,"未抓取到数据");
return;
}
Runnable runnable = new Runnable() {
@Override
public void run() {
for(int i=0;i<list.size(); i++){
Map<String,String> map = list.get(i);
playerInfo[i][0] = map.get("url");
playerInfo[i][1] = map.get("mer");
playerInfo[i][2] = map.get("score");
}
}
};
SwingUtilities.invokeLater(runnable);
}
});
String [] names = {"地址","门店","评分"};
table1 = new JTable(playerInfo,names);
//======== scrollPane1 ========
{
scrollPane1.setViewportView(table1);
}
contentPane.add(scrollPane1);
//contentPane.repaint();
scrollPane1.setBounds(25, 90, 580, 395);
//---- label1 ----
label1.setText("\u5730\u5740\uff1a");
contentPane.add(label1);
label1.setBounds(new Rectangle(new Point(30, 50), label1.getPreferredSize()));
//---- label2 ----
label2.setText("Cookie\uff1a");
contentPane.add(label2);
label2.setBounds(new Rectangle(new Point(15, 15), label2.getPreferredSize()));
contentPane.add(textField2);
textField2.setBounds(75, 15, 530, textField2.getPreferredSize().height);
{
// compute preferred size
Dimension preferredSize = new Dimension();
for(int i = 0; i < contentPane.getComponentCount(); i++) {
Rectangle bounds = contentPane.getComponent(i).getBounds();
preferredSize.width = Math.max(bounds.x + bounds.width, preferredSize.width);
preferredSize.height = Math.max(bounds.y + bounds.height, preferredSize.height);
}
Insets insets = contentPane.getInsets();
preferredSize.width += insets.right;
preferredSize.height += insets.bottom;
contentPane.setMinimumSize(preferredSize);
contentPane.setPreferredSize(preferredSize);
}
pack();
setLocationRelativeTo(getOwner());
// JFormDesigner - End of component initialization //GEN-END:initComponents
}
// JFormDesigner - Variables declaration - DO NOT MODIFY //GEN-BEGIN:variables
private JTextField textField1;
private JButton button1;
private JScrollPane scrollPane1;
private JTable table1;
private JLabel label1;
private JLabel label2;
private JTextField textField2;
// JFormDesigner - End of variables declaration //GEN-END:variables
}
package josupa;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Created with IntelliJ IDEA.
*
*/
public class JsoupTest {
private static int countPage = 50;
private static int start = 1;
public static List<Map> list = new ArrayList<>();
public static void sendHtml(String url,String cookie) throws IOException {
//String url = "http://www.dianping.com/shanghai/ch10/g101p";
run(url , start,countPage,cookie);
}
public static void main(String[] args) throws IOException {
String url = "http://www.dianping.com/shanghai/ch10/g101p";
String cookie = "";
run(url , start,countPage,cookie);
for(Map m: list){
System.out.println(m.toString());
}
}
private static void run(String url,int start,int countPage,String cookie)throws IOException{
Document document = Jsoup.connect(url + start)
//cookie
.header("Cookie",cookie)
.get();
// Elements elementsByTag = document.getElementById("shop-all-list").getElementsByClass("tit");
// for(Element tag : elementsByTag){
// System.out.println(tag);
// for(Element a : tag.getElementsByAttributeValue("data-hippo-type","shop")){
// System.out.println(" 门店:" + a.attr("title") +", 【地址:"+a.attr("href") +"】");
// }
// }
try {
Elements elementsByTag = document.getElementById("shop-all-list").getElementsByClass("txt");
if(elementsByTag != null){
for(Element tag : elementsByTag){
Map<String,String> map = new HashMap<>();
for(Element a : tag.getElementsByAttributeValue("data-hippo-type","shop")){
System.out.println(" 门店:" + a.attr("title") +", 【地址:"+a.attr("href") +"】");
map.put("url",a.attr("href"));
map.put("mer",a.attr("title"));
}
Elements comments = tag.getElementsByClass("comment");
for(Element com : comments){
System.out.println(com.getElementsByClass("star_score").text());;
map.put("score",com.getElementsByClass("star_score").text());
}
list.add(map);
}
}
} catch (Exception e) {
e.printStackTrace();
}
//获取页数
Elements elements = document.getElementsByClass("PageLink");
String page = elements.get(elements.size()-1).text();
System.out.println("第"+start+"页,"+page);
if(countPage == Integer.valueOf(page)){
start++;
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
run(url,start,countPage,cookie);
}
}
}
跑几次就封了。。。