import requests
from lxml import etree
from day5 import helper
import time
headers = {
'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
def wawj(nums):
for z in range(1,nums+1):
url = 'https://bj.5i5j.com/zufang/n{}/'.format(z)
super_helper = helper.MysqlHelper()
sql = 'insert into wawj_xinxi(title,price,huxing,mianji,zhifufangshi,xiaoqu)values (%s,%s,%s,%s,%s,%s)'
response = requests.get(url,headers=headers)
html_ele = etree.HTML(response.text)
ul_list = html_ele.xpath('//div[@ class="listImg"]/a/@href')
# /html/body/div[4]/div[1]/div[2]/ul/li[1]/div[1]/a
# print(ul_list)
time.sleep(3)
for ul in ul_list:
xiangqing_url = 'https://bj.5i5j.com'+ul
# xiangqing_url = 'https://bj.5i5j.com/zufang/41312465.html'
response = requests.get(xiangqing_url,headers=headers)
html_ele = etree.HTML(response.text)
time.sleep(3)
title = html_ele.xpath('//div[@class="main container"]/div/div[1]/h1')[0].text
print(title)
price = html_ele.xpath('//div[@class="content fr"]/div/div[1]/div/p')
price = price[0].text +price[1].text
print(price)
huxing = html_ele.xpath('//div[@class="content fr"]/div/div[2]/div/p')[0].text
# print(huxing)
mianji = html_ele.xpath('//div[@class="content fr"]/div/div[3]/div/p')
mianji = mianji[0].text+mianji[1].text
print(mianji)
zhifufangshi = html_ele.xpath('//div[@class="content fr"]/div/div[4]/div/p')[0].text
print(zhifufangshi)
xiaoqu = html_ele.xpath('//div[@class="content fr"]/div[2]/ul/li/a')[0].text
print(xiaoqu)
data = (title,price,huxing,mianji,zhifufangshi,xiaoqu)
super_helper.execute_modify_sql(sql,data)
time.sleep(3)
if __name__ == '__main__':
nums = int(input('输入你要搜的房源页数'))
wawj(nums)
python爬虫实现我爱我家住房信息查询
猜你喜欢
转载自blog.csdn.net/majiexiong/article/details/81865210
今日推荐
周排行