简述:(PS: 大神勿喷,接触爬虫较少)
最近与朋友聊天,关于京东上的业务,遇到某些烦恼,故有此需求,次需求针对性较强,翻来覆去也查阅不少资料。
分析细节
1. 京东账号分为,普通账户,企业账户,专卖店账号,此处说明下(专卖店账号加入购物车后,享有专卖店价格,只有购物车内才能看见)此处为专卖店账户、(京东购物车上限为120个)
2.关于京东登陆,采用的是滑块,那么自动采集价格分为三个方向,一个是自动化Selenium方向,一个是httpclient底层协议方向、两者均有优劣(此处优劣后续再说)
京东PC登陆地址:https://passport.jd.com/new/login.aspx
废话不多说 上代码
//main方法
public static void main(String[] args) throws InterruptedException {
//登陆bean
UuserJDinfo uuserJDinfo1 =new UuserJDinfo();
uuserJDinfo1.setEid("JOTQ5MHYUQKCZV2BF45VVUXM2TEPQ4JG24UHT3OJNBFFJ3YYTHQKKPGZGZ6L36MDBXBFX3XQXR2XKKS43DBCWMXKBU");
uuserJDinfo1.setFp("66d75ba968161fd0e9f2562ac32a453b");
uuserJDinfo1.setLoginname("*******");
uuserJDinfo1.setLoginpwd("********");
uuserJDinfo1.setNloginpwd("********");
//HttpClientUtil 封装工具类
HttpClientUtil httpClient = new HttpClientUtil();
if(!islogin(httpClient)){//判断是否登陆
httpClient=inlogin(httpClient,uuserJDinfo1); //登陆
//登陆完成后加载cookiest 选择监控区域
CookieStore cookiest =httpClient.getCookieStore();
BasicClientCookie cooks= new BasicClientCookie("ipLoc-djd","7-538-35189-52575");
cooks.setDomain("jd.com");
cooks.setVersion(0);
cooks.setPath("/");
cookiest.addCookie(cooks);
}
}
//登陆方法
public static HttpClientUtil inlogin(HttpClientUtil httpClient,UuserJDinfo uuserJDinfo) throws InterruptedException{
boolean isloginStar= true;
while (isloginStar) { //死循环登陆
Document doc = Jsoup.parse(httpClient.doGet("https://passport.jd.com/new/login.aspx"));
Elements elements = doc.select("form[id=formlogin] input[type=hidden]");
//获得初始化参数
Map<String, String> map = new HashMap<String, String>();
String k, v;
for (Element input : elements) {
k = input.attr("name");
v = input.attr("value");
if (StringUtils.isNotBlank(k)) {
map.put(k, v);
// System.out.println(input);
}
}
String code=incode(httpClient);//打码平台,我采用极速打码,非返回坐标,备注:滑块打码
map.put("loginname",uuserJDinfo.getLoginname());
map.put("nloginpwd",uuserJDinfo.getNloginpwd()); //此处应该加密
map.put("loginpwd",uuserJDinfo.getLoginpwd());
map.put("eid",uuserJDinfo.getEid());
map.put("fp",uuserJDinfo.getFp());
map.put("authcode",code);
String result = httpClient.doGet("https://seq.jd.com/jseqf.html?bizId=passport_jd_com_login_pc&platform=js&version=1");
String pattern = "sessionId=.+_jdtdseq_config_data";
// 创建 Pattern 对象
Pattern p = Pattern.compile(pattern);
Matcher m = p.matcher(result);
if (m.find()) {
// System.out.println(m.group().substring(11, 29));
map.put("seqSid", m.group().substring(11, 29));
}
Thread.sleep(2000);
String url = "https://passport.jd.com/uc/loginService?&uuid="+map.get("uuid")+"&r=" + Math.random()+"&version=2015";
String post =httpClient.doPost(url, map);
String ans =Tools.decodeUnicode(post);
System.out.println(ans);
if(ans.indexOf("success")>-1){
isloginStar=false;
}else{
isloginStar=true;
}
}
return httpClient;
}
//极速验证打码
public static String incode(HttpClientUtil httpClient){
String returncode="";
boolean iscodestar= true;
while (iscodestar) {
String result1 = httpClient.doGet("http://api.jjsou.cn/getreqsvr");
String post2 =httpClient.doPost("http://"+result1+"/reqcode", Const.CODE_INFO_MAP);
System.out.println(Tools.decodeUnicode(post2));
if(post2.indexOf("error") > -1){
iscodestar=true;
}else{
iscodestar=false;
returncode=Tools.toMap(post2).get("validate").toString().trim();
}
}
System.out.println(returncode);
return returncode;
}
//是否登陆
public static boolean islogin(HttpClientUtil httpClient){
boolean islogin_dm= true;
String verify = httpClient.doGet("https://home.jd.com/getUserVerifyRight.action");
if (verify.length() < 200) {
islogin_dm =true;
}else{
islogin_dm=false;
}
return islogin_dm;
}
此处使用得是 HttpClien 4.5 , 自动管理ck
那么此处太依赖于打码平台,有的人问我,为什么不用多家打码平台返回滑块得坐标,比如说QQ超人,若快等,但是考虑到打码平台,返回均是坐标,还需要考虑 滑块轨迹算法,而且京东对应得策略,所以故没有完善这里得滑块轨迹
后面需要考虑得是扫码登陆,人工(保证稳定和应急),下篇文章里面会有扫码登陆京东作为备用 HttpClien