import requests
from lxml import etree
import time
import pymysql
class MyMysql(object):
def __init__(self):
self.db = pymysql.connect('127.0.0.1','root','******','wang')
self.cursor = self.db.cursor()
def excute_sql(self,sql,data):
self.cursor.execute(sql,data)
self.db.commit()
def __del__(self):
self.cursor.close()
self.db.close()
sql = 'insert into lianjia_jinan(title,region,zone,meters,price,date,url) values(%s,%s,%s,%s,%s,%s,%s)'
msq = MyMysql()
for i in range(1,4):
url = 'https://jn.lianjia.com/zufang/pg%srco10/' % i
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
response = requests.get(url,headers=headers)
html = response.text
html_ele = etree.HTML(html)
# 找到ul里的所有li
li_list = html_ele.xpath('//ul[@id="house-lst"]/li')
# print(len(li_list))
for res in li_list:
title = res.xpath('./div[2]/h2/a')[0].text
# print(title)
url = res.xpath('./div[2]/h2/a/@href')[0]
# print(url)
region = res.xpath('./div[2]/div[1]/div[1]/a/span')[0].text
# print(region)
zone = res.xpath('./div[2]/div[1]/div[1]/span[1]/span')[0].text
# print(zone)
meters = res.xpath('./div[2]/div[1]/div[1]/span[2]')[0].text
# print(meters)
price = res.xpath('./div[2]/div[2]/div[1]/span')[0].text
# print(price)
date = res.xpath('./div[2]/div[2]/div[2]')[0].text
data = (title,region,zone,meters,price,date,url)
msq.excute_sql(sql,data)
time.sleep(1)
# print(date)
print('第{}页保存完毕'.format(i))
利用xpath爬取lianjia租房信息 并保存到数据库
猜你喜欢
转载自blog.csdn.net/weixin_38920937/article/details/81783740
今日推荐
周排行