import requests,re from lxml import etree import mysql_test # 构建代理 proxy = { 'http' : 'http://alice:[email protected]:6666', 'https' : 'http://alice:[email protected]:6666' } headers={ "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Accept-Encoding":"gzip, deflate, br", # "Accept-Language":"zh-CN,zh;q=0.9", "Cache-Control":"max-age=0", "Connection":"keep-alive", "Cookie":"PHPSESSID=iqkug3po2cgvse5t4qtbuu087g; _ga=GA1.2.797059295.1534500574; _gid=GA1.2.605227355.1534500574; yfx_c_g_u_id_10000001=_ck18081718093415508437717534183; yfx_f_l_v_t_10000001=f_t_1534500574510__r_t_1534500574510__v_t_1534500574510__r_c_0; Hm_lvt_94ed3d23572054a86ed341d64b267ec6=1534500575; _Jo0OQK=B8A2A5ACBC1D521BEB8257558D1D8F5A21CE06AF5840D808C001A6E423AF077346A25E5D8D05E0B364BE60A699EFE58143534A5B1607D1319D36F454D9CBB01EC06C57212F12283777C840763663251ADEB840763663251ADEB34C0FD89F3435CFE6ECAC92C8E815B0AGJ1Z1dg==; Hm_lpvt_94ed3d23572054a86ed341d64b267ec6=1534501008; domain=bj", "Host":"bj.5i5j.com", # "Upgrade-Insecure-Requests":"1", "User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36", } base_url = 'https://bj.5i5j.com/zufang/changpingqu/n%d/' for i in range(1,4): url=base_url%i # url = 'https://bj.5i5j.com/zufang/changpingqu/n1/' response=requests.get(url,headers=headers,proxies=proxy) hrml_str=response.text # hrml_str=response.content # print(hrml_str) html_ele = etree.HTML(hrml_str) # print(url) print('正在保存第一页'+str(i)+'.............') li_list = html_ele.xpath('//div[@class="list-con-box"]/ul/li') print(li_list) # with open('woaiwojia.html','wb') as f: # f.write(hrml_str) xiangqing_url='https://bj.5i5j.com' for li_ele in li_list: title=li_ele.xpath('./div[2]/h3/a')[0].text print(title) region=li_ele.xpath('./div[2]/div[1]/p/text()')[0] print(region) zone = li_ele.xpath('./div[2]/div[1]/p[2]/a/text()')[0] print(zone) price = li_ele.xpath('./div[2]/div[1]/div[1]/p/strong/text()')[0] print(price) lianjie= xiangqing_url + li_ele.xpath('./div[2]/h3/a/@href')[0] print(lianjie) # 实例化存入mysql的类 mc = mysql_test.mysql_conn() sql = "insert into woaiwojia(title, region, zone, price,lianjie) values ('{title}','{region}','{zone}',{price},'{lianjie}')".format(title=title,region=region,zone=zone,price=price,lianjie=lianjie) mc.execute_modify_mysql(sql)
我爱我家(抓取)
猜你喜欢
转载自blog.csdn.net/q810935819/article/details/81783611
今日推荐
周排行