python爬虫实现我爱我家住房信息查询

import requests
from lxml import etree
from day5 import helper
import time

headers = {
    'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'

}

def wawj(nums):
    for z in range(1,nums+1):
        url = 'https://bj.5i5j.com/zufang/n{}/'.format(z)
        super_helper = helper.MysqlHelper()
        sql = 'insert into wawj_xinxi(title,price,huxing,mianji,zhifufangshi,xiaoqu)values (%s,%s,%s,%s,%s,%s)'

        response = requests.get(url,headers=headers)

        html_ele = etree.HTML(response.text)

        ul_list = html_ele.xpath('//div[@ class="listImg"]/a/@href')
        # /html/body/div[4]/div[1]/div[2]/ul/li[1]/div[1]/a
        # print(ul_list)
        time.sleep(3)
        for ul in ul_list:
            xiangqing_url = 'https://bj.5i5j.com'+ul
        # xiangqing_url = 'https://bj.5i5j.com/zufang/41312465.html'
            response = requests.get(xiangqing_url,headers=headers)

            html_ele = etree.HTML(response.text)
            time.sleep(3)
            title = html_ele.xpath('//div[@class="main container"]/div/div[1]/h1')[0].text
            print(title)
            price = html_ele.xpath('//div[@class="content fr"]/div/div[1]/div/p')
            price = price[0].text +price[1].text
            print(price)
            huxing = html_ele.xpath('//div[@class="content fr"]/div/div[2]/div/p')[0].text
            # print(huxing)
            mianji = html_ele.xpath('//div[@class="content fr"]/div/div[3]/div/p')
            mianji = mianji[0].text+mianji[1].text
            print(mianji)
            zhifufangshi = html_ele.xpath('//div[@class="content fr"]/div/div[4]/div/p')[0].text
            print(zhifufangshi)
            xiaoqu = html_ele.xpath('//div[@class="content fr"]/div[2]/ul/li/a')[0].text
            print(xiaoqu)
            data = (title,price,huxing,mianji,zhifufangshi,xiaoqu)
            super_helper.execute_modify_sql(sql,data)
            time.sleep(3)
if __name__ == '__main__':
    nums = int(input('输入你要搜的房源页数'))
    wawj(nums)

猜你喜欢

转载自blog.csdn.net/majiexiong/article/details/81865210