# -*- coding: utf-8 -*- #2345电影排行榜 import requests from bs4 import BeautifulSoup #获取网站的通用类 def get_html(url): try: r=requests.get(url,timeout=30) r.raise_for_status() print(r.encoding) r.encoding='gbk' return r.text except: print ("wrong") #下载图片的通用工具类 def get_pic_from_url(url): #从url以二进制的格式下载图片数据 pic_content = requests.get(url,stream=True).content open('filename','wb').write(pic_content) def main(url): html=get_html(url) soup=BeautifulSoup(html,'lxml') #获取这个ul大标签的对象 moves_list=soup.find('ul',attrs={'class':'picList clearfix'}); #获取ul下的所有的li标签 li_list=moves_list.find_all('li') for li in li_list: #获取图片的连接 ''' <img onerror="javascript:this.src='//imgwx1.2345.com/dypcimg/tv/newimages/default_poster.jpg'" src="//imgwx4.2345.com/dypcimg/img/f/66/sup198834_223x310.jpg" alt="壹号别墅" title="壹号别墅" width="130" height="173"> ''' img_src=li.find('img')['src'] #print(img_src) #获取影片的名字 ''' <span class="sTit"><a href="//dianying.2345.com/detail/195766.html" target="_blank">妈妈咪鸭</a></span> ''' name=li.find('span',attrs={'class':'sTit'}).a.text #print (name) #上映时间 ''' <span class="sIntro">上映时间:2015-05-12</span> ''' try: time=li.find('span',attrs={'class':'sIntro'}).text #print (time) except: time='还没上映' #print ('还没上映') #角色 ''' <p class="pActor">主演: <a target="_blank" href="//dianying.2345.com/list/---ZHANGYI5---.html" title="张译">张译</a> <a target="_blank" href="//dianying.2345.com/list/---HUANGJINGYU---.html" title="黄景瑜">黄景瑜</a> <a target="_blank" href="//dianying.2345.com/list/---HAIQING---.html" title="海清">海清</a></p> ''' actors=li.find('p',attrs='pActor') act='' for actor in actors: act+=actor.string+' ' #print (act) #介绍 ''' <p class="pTxt pIntroShow">简介:作为远达建筑公司的副总监杨维(王健饰), 工作上处处受到上级和同事的打压,家庭中妻子(王妍饰)对其也不尊重。各种的压迫下,导致杨维走上歧途。 将周燕(吕小漫饰)、白亚楠(徐艺涵饰)、沈美玲(刘雨晴饰)分别抓到自己的地窖中,将其虐待......地窖外面 ,三位女性的亲人苦苦寻找,白亚楠的父亲白景山(梁岩饰)和周燕... <a href="javascript:void(0);" target="_self" class="aMore pIntroShowMore">展开全部 <i class="iconfont"></i></a></p> ''' instroture=li.find('p',attrs={'class':'pTxt pIntroShow'}).text #print (instroture) print ('{}\t{}\n{}\n{}\n'.format(name,time,act,instroture)) #下载图片 with open('C:testdata/image/'+name+'.png','wb+') as f: f.write(requests.get('http:'+img_src).content) #获取 #print (li_list) #print (soup) url='http://dianying.2345.com/top/' #调用方法 if __name__=='__main__': main(url)
python爬虫六:爬取电影图片及简介
猜你喜欢
转载自blog.csdn.net/qq_38788128/article/details/80481708
今日推荐
周排行