关于在腾讯视频用爬虫下载毒品危害的视频代码
import requests url='http://122.246.10.160/vhot2.qqvideo.tc.qq.com/AERa3MJ-4PsBlS8oP_ZsuxlwJzm6Uf_hTxhVT81JTZiQ/b0175dfvbs1.mp4?sdtfrom=v1010&guid=1e962c528e492cd4f3ba7d926c487ae0&vkey=F41F7658B38D1E31D183CC4B255496D7BD92F9E63CDB8BC3F23882BCCB2DB34DB184F5675B25A2AD613E3F0BA1D00A49BF5BA21B930D8DADECD73D91C62A27F29A521D4705C293D0BA7C5AA1B1AF4D2E377E3CA5BE594132F8FA7C630F87DA39BC2D0EB8DE9B7E80CABF51D82153054C42E383A6784B0FDF' res=requests.get(url) with open("毒品的危害.mp4",'wb') as f: f.write(res.content)
关于爬取2018杭州某高中的录取线代码
#!/usr/bin/env python # -*- coding:utf-8 -*- # __author: __nash import requests from bs4 import BeautifulSoup url="http://hz.zhongkao.com/e/20170630/5955d2689a6b6.shtml" res=requests.get(url) res.encoding='gbk' soup=BeautifulSoup(res.text,'lxml') text1=soup.select('.content p a')[0] print(text1) print(type(text1)) print(text1['href']) print(type(text1['href'])) url2=text1['href'] res2=requests.get(url2) res2.encoding='gbk' # print(res2.text) soup2=BeautifulSoup(res2.text,'lxml') # print(soup2) img=soup2.select('img')[4] url3=img['src'] img2=requests.get(url3) f=open('score.png','wb') f.write(img2.content) f.close()
关于爬取2018杭州高中各个高校的录取线地址
import requests from bs4 import BeautifulSoup url='http://hz.zhongkao.com/zkzx/hzzkfsx/' res=requests.get(url) res.encoding='gbk' soup=BeautifulSoup(res.text,'html.parser') text1=soup.select('.ft16')[0].text print(text1) text2=soup.select('.bk-colkey a') # for i in text2: # print(i['href'],i.text) text3=text2[0:31] for i in text3: print(i['href'],i.text) with open('school.txt','w')as f: f.write(text1+'\n') for i in text3: f.write(i['href']+'\t'+i.text+'\n')
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx