免密,python通过关键字自动获取PDF网盘下载地址!

免密,python通过关键字自动获取PDF网盘下载地址!

PDF其实就是电子书籍,但是PDF这个格式允许插图,更加适合人们观看!

免密,python通过关键字自动获取PDF网盘下载地址!

(私信小编007可以获取很多本Python相关的PDF!!)

有人会说,这是黑客技术?这么强大!

免密,python通过关键字自动获取PDF网盘下载地址!

NO,一点都不强大。就比如上次发布的Python无需百度积分下载百度文库一样,不过是一个API的调用!

免密,python通过关键字自动获取PDF网盘下载地址!

扫描二维码关注公众号,回复: 3486583 查看本文章

免密,python通过关键字自动获取PDF网盘下载地址!

好了,下面看效果:

免密,python通过关键字自动获取PDF网盘下载地址!

源码:

# python3 代码
# 需要安装 requests 和 BeautifulSoup
# 从若兰格提取 pdf
import requests
from bs4 import BeautifulSoup as bp4
import json
BaiDuAPI_URL = "http://ypsuperkey.meek.com.cn/api/v1/items/BDY-{0}?client_version=2018.11"
# 设置代{过}{滤}理
def req(url_para):
 headers = {
 'Host': 'ypsuperkey.meek.com.cn',
 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:62.0) Gecko/20100101 Firefox/62.0',
 'Accept': '*/*',
 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
 'Connection': 'close',
 }
 req = requests.get(BaiDuAPI_URL.format(url_para),headers = headers)
 if req.status_code == 200:
 return req.text
 return ''
 # print(req.text)
# =============================================================================
RUGE_HOME = "http://www.ifblue.net/"
RUGE_SEARCH_URL = "http://www.ifblue.net/search/{0}/page/{1}"
headers = {
 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
 'Accept-Encoding': 'gzip, deflate',
 'Accept-Language': 'zh-CN,zh;q=0.9',
 'Cache-Control': 'max-age=0',
 'Connection': 'keep-alive',
 'DNT': '1',
 'Host': 'www.ifblue.net',
 'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
}
def req_html(url):
 req = requests.get(url,headers=headers)
 if req.status_code == 200:
 return req.text
 return ""
def main():
 key = input('搜索关键词:')
 page = int(input('查询页面:'))
 items = []
 print('当前搜索为 ' , key)
 count = 1
 for p_ in range(page):
 pp_ = p_ + 1 
 print('当前页面 : ',pp_)
 html = req_html(RUGE_SEARCH_URL.format(key,pp_))
 html_obj = bp4(html,"lxml")
 # 显示标题和链接
 articles = html_obj.find_all( name = 'article',attrs= {'class':"excerpt"})
 if(len(articles)< 1):
 exit() 
 for art_ in articles:
 item = {}
 item['title'] = art_.header.h2.a['title']
 item['url'] = art_.header.h2.a['href']
 items.append(item)
 print('{0:3} {1:40} {2}'.format(count,item['title'],item['url']))
 count += 1
 # 进行下载 
 PdfItems = [] 
 c_ = 1 
 for d_ in items:
 print('当前下载',d_['title'],end = ' ')
 downurl = d_['url'].replace('.html','').replace('http://www.ifblue.net/','http://www.ifblue.net/download.html?pid=')
 # print(downurl)
 html = req_html(downurl)
 html_obj = bp4(html,"lxml")
 dd_ = html_obj.find('center')
 baiduurl = dd_.a['href']
 # 请求密码
 # print(dd_.a['href'])
 try:
 baidukey = req(baiduurl.replace('https://','').replace('http://','').replace('pan.baidu.com/s/1',''))
 # print(downurl)
 # print(dd_)
 # print(baiduurl)
 # print(baidukey)
 j_obj = json.loads(baidukey)
 PdfItem = {}
 PdfItem['id'] = c_
 PdfItem['title'] = d_['title']
 PdfItem['url'] = baiduurl
 PdfItem['code'] = j_obj['access_code'] 
 PdfItems.append(PdfItem)
 print('{0} {1}'.format(baiduurl,j_obj['access_code']))
 c_ += 1
 except Exception as e:
 pass
 print(' 获取失败 ')
 # 进行下载
 # 将解析的保存到文件
 with open('down.txt','w') as pf:
 for t_ in PdfItems:
 pf.write('{0:3} 名称: {1} 链接: {2} 密码: {3}
'.format(t_['id'],t_['title'],t_['url'],t_['code']))
if __name__ == '__main__':
 main()

猜你喜欢

转载自blog.csdn.net/qq_41841569/article/details/82867812