java北京师范大学珠海分校电费网站爬虫

版权声明:转载注明康弘雲 https://blog.csdn.net/weixin_43560803/article/details/85652459
package program;
import com.google.gson.JsonArray;
import com.google.gson.JsonIOException;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.google.gson.JsonSyntaxException;
import java.io.IOException;  
import java.util.ArrayList;  
import java.util.HashMap;   
import java.util.List;
import java.util.Set;  
  
import org.apache.http.Header;  
import org.apache.http.HeaderElement;  
import org.apache.http.HttpEntity;  
import org.apache.http.HttpResponse;  
import org.apache.http.NameValuePair;  
import org.apache.http.ParseException;  
import org.apache.http.client.ClientProtocolException;  
import org.apache.http.client.entity.UrlEncodedFormEntity;  
import org.apache.http.client.methods.HttpGet;  
import org.apache.http.client.methods.HttpPost;  
import org.apache.http.client.params.ClientPNames;  
import org.apache.http.impl.client.DefaultHttpClient;  
import org.apache.http.message.BasicNameValuePair;  
import org.apache.http.util.EntityUtils;  
import org.jsoup.Jsoup;  
import org.jsoup.nodes.Document;  
import org.jsoup.nodes.Element;  
import org.jsoup.select.Elements;  
 
//使用了org.jsoup,commons-lang3-3.8.1,httpcomponents-client-4.5.6的包,在使用前需要先把这些包导入工程里面

public class test {  
    private static String LoginUrl = "http://kl.bnuz.edu.cn/";  
    private static String Host = "http://kl.bnuz.edu.cn/";  
    private static String mainUrl = "";  
    //private static String borrowedBooksUrl = "";  
    private static String cookie = "";  
    private static String location = "";
	private static int i;  
	private static String address;
	private static String room;
	private static String electricity;
	public static int user=3356;
  
    /** 
     * @param args 
     * @throws IOException 
     * @throws ParseException 
     */  
    public static void main(String[] args) throws ParseException, IOException {  
    	for(int x=0;x<1;x++){
    		user+=1;

		    getMyBorrowedBooks(); 

		    String borrowedBooksUrl = "http://kl.bnuz.edu.cn/Ajax/ConfigAjax.ashx?Action"
		    		+ "=getOrderQuery&ksdate=2016-12-17&jsdate=2018-12-17&_search=false&"
		    		+ "rows=5&page=1&sidx=BUY_DATE&sord=desc";
		    String search =getHtml(borrowedBooksUrl);
		    //System.out.println(search); 
		    //解析json字符串
		    getjson(search);
		    
		    //程序延时,降低速度,防止服务器可能的反爬
		    try {
				Thread.currentThread().sleep(2000);
			} catch (InterruptedException e) {
				// TODO 自动生成的 catch 块
				e.printStackTrace();
			}
    	}
    } 
    public static void getjson(String json){
    	//total,记录条数,page第几页,records总共记录条数
    	//订单id,表号,倍率,单价,日期,购买次数,购买金额,购买电量,
    	//退补金额,退补电量,充值方式,是否充表
    	String order_id;
    	String meter_number;
    	String meter_amp;
    	String avg_price;
    	String date;
    	String buy_elec_times;
    	String buy_money;
    	String buy_electricity;
    	String back_money;
    	String back_electricity;
    	String szffs;
    	String flag;

    	JsonParser parser=new JsonParser();
    	JsonObject object=(JsonObject) parser.parse(json); 
    	System.out.println("page="+object.get("page").getAsString());
    	System.out.println("total="+object.get("total").getAsString());
    	System.out.println("records="+object.get("records").getAsString());
    	JsonArray array=object.get("rows").getAsJsonArray(); 
    	for(int i=0;i<array.size();i++){
            JsonObject subObject=array.get(i).getAsJsonObject();
            System.out.println("id="+subObject.get("id").getAsInt());
            JsonArray array1=subObject.get("cell").getAsJsonArray(); 
            System.out.println(array1);
            //x = myObj.sites[0];
            for (int i1 = 0; i1 < array1.size(); i1++) {
            	//JsonObject subObject1=array1.get(i1).getAsJsonObject();
            	System.out.println(array1.get(i1));
            	}
 

        }
    }
  
    public static void getMyBorrowedBooks() {  
        try {  
            Document document = Jsoup.parse(login()); 
            //宿舍地址
            Elements elements1 = document.select("span[id=ctl00_cphContent_lblUsername]");
            address=elements1.get(i).text();
            //获取html中的房间名
            Elements elements2 = document.select("span[id=ctl00_cphContent_lblAddress]");
            room=elements2.get(i).text();
            //获取当前房间剩余电量
            Elements elements3 = document.select("span[id=ctl00_cphContent_LabelSY]");
            electricity=elements3.get(i).text();
            System.out.println(address);
            System.out.println(room);
            System.out.println(electricity); 
  
        } catch (IOException e) {  
            // TODO Auto-generated catch block  
            e.printStackTrace();  
        }  
    }  
  
 
  
    /** 
     * 电费网站登录登陆 
     *  
     * @param context 
     * @return 返回登陆后的界面Html代码 
     * @throws ClientProtocolException 
     * @throws IOException 
     */  
    
    public static String login() throws ClientProtocolException, IOException {  
        List<NameValuePair> parmasList = new ArrayList<NameValuePair>();  
        //输入用户名和密码
        parmasList = initLoginParmas("00000000"+user, "123456");  
        HttpPost post = new HttpPost(LoginUrl);  
        post.getParams().setParameter(ClientPNames.HANDLE_REDIRECTS, false);  
        // 阻止自动重定向,目的是获取第一个ResponseHeader的Cookie和Location  
        post.setHeader("Content-Type",  
                "application/x-www-form-urlencoded;charset=gbk");  
        // 设置编码为GBK  
        post.setEntity(new UrlEncodedFormEntity(parmasList, "GBK")); 
        HttpResponse response = new DefaultHttpClient().execute(post);  
        cookie = response.getFirstHeader("Set-Cookie").getValue();  
        // 取得cookie并保存起来  
         //System.out.println("cookie= " + cookie);  
        location = response.getFirstHeader("Location").getValue();  
        // 重定向地址,目的是连接到主页  
        mainUrl = Host + location;  
        // 构建主页地址  
        //System.out.println(mainUrl);
        String html = getHtml(mainUrl);  
        //登陆跳转后的html
        //System.out.println(html);
        return html;  
  
    }  
  
    /** 
     * 获取网页HTML源代码 
     *  
     * @param url 
     * @return 
     * @throws ParseException 
     * @throws IOException 
     */  
  
    private static String getHtml(String url) throws ParseException,  
            IOException {  
        // TODO Auto-generated method stub  
        HttpGet get = new HttpGet(url);  
        if ("" != cookie) {  
            get.addHeader("Cookie", cookie);  
        }  
        HttpResponse httpResponse = new DefaultHttpClient().execute(get);  
        HttpEntity entity = httpResponse.getEntity();  
        return EntityUtils.toString(entity);  
    }  
  
    /** 
     * 初始化参数 
     *  
     * @param userName 
     * @param passWord 
     * @return 
     * @throws ParseException 
     * @throws IOException 
     */  
    public static List<NameValuePair> initLoginParmas(String userName,  
            String passWord) throws ParseException, IOException {  
        List<NameValuePair> parmasList = new ArrayList<NameValuePair>();  
        HashMap<String, String> parmasMap = getLoginFormData(LoginUrl);  
        Set<String> keySet = parmasMap.keySet();  
  
        for (String temp : keySet) {  
            if (temp.contains("Username")) {  
                parmasMap.put(temp, userName);  
            } else if (temp.contains("txtPas")) {  
                parmasMap.put(temp, passWord);  
            }  
        }  
  
        Set<String> keySet2 = parmasMap.keySet();  
        //System.out.println("表单内容:");  
        for (String temp : keySet2) {  
            //System.out.println(temp + " = " + parmasMap.get(temp));  
        }  
        for (String temp : keySet2) {  
            parmasList.add(new BasicNameValuePair(temp, parmasMap.get(temp)));  
        }  
  
         //System.out.println("initParams \n" + parmasMap);  
        //System.out.println("cccccccc");
        return parmasList;  
  
    }  
  
    /** 
     * 获取登录表单input内容 
     *  
     * @param url 
     * @return 
     * @throws IOException 
     * @throws ParseException 
     */  
    public static HashMap<String, String> getLoginFormData(String url) 
    //由于网站有隐藏input标签反爬,登录时需要获取这些隐藏的参数
            throws ParseException, IOException {  
        Document document = Jsoup.parse(getHtml(url));  
        Elements element1 = document.getElementsByTag("form");// 找出所有form表单  
        Element element = element1.select("[method=post]").first();// 筛选出提交方法为post的表单  
        Elements elements = element.select("input[name]");// 把表单中带有name属性的input标签取出  
        HashMap<String, String> parmas = new HashMap<String, String>();  
        for (Element temp : elements) {  
            parmas.put(temp.attr("name"), temp.attr("value"));// 把所有取出的input,取出其name,放入Map中  
        }  
        return parmas;  
    }
}
  
    

猜你喜欢

转载自blog.csdn.net/weixin_43560803/article/details/85652459