爬虫实战爬取拉钩网
代码如下
from urllib import request,parse
#url = 'https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput='
url = 'https://www.lagou.com/jobs/positionAjax.json?city=%E6%B7%B1%E5%9C%B3&needAddtionalResult=false'
data = {
'first':'true',
'pn': 1,
'kd':'python'
}
headers ={
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3869.400',
'Referer':'https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput=',
'Accept':'application/json, text/javascript, */*; q=0.01'
}
req = request.Request(url,headers=headers,data= parse.urlencode(data).encode('utf-8'),method='POST')
resp = request.urlopen(req)
print(resp.read().decode('utf-8'))