新加功能,按什么保存文件;多个网址搜索资源
# -*- coding: utf-8 -*- """ Created on Tue Jan 30 17:01:26 2018 @author: gzs10227 搜索电影资源 """ import re,os import requests import time,datetime import urllib import sys stderr = sys.stderr stdout = sys.stdout reload(sys) sys.setdefaultencoding('utf8') sys.stderr = stderr sys.stdout = stdout urllib.getproxies_registry = lambda: {} null = '' from lxml import etree import locale HEADERS = { 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 ' '(KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' } print u'请输入您想搜索的电影:' keyword = raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)) print u'\n请输入您想保存文件的路径:' save_path = raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)) print u'\n请问您想按照哪种排序方式保存文件:' print u' 1、文件大小 2、创建时间 3、下载次数 4、无要求.以txt格式保存搜索结果' GS_num = int(raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True))) while GS_num > 4: print u'输入要求有误,请重新输入:1、文件大小 2、创建时间 3、下载次数 4、无要求' GS_num = int(raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True))) if GS_num == 4: save = os.path.join(save_path+'\\',keyword+'.txt') fle = open(save,'w') def open_url(url): html = requests.get(url,headers = HEADERS).content web_data = etree.HTML(html) return web_data def get_url(keyword): main_url = 'http://www.btyunsou.me/search/%s_ctime_1.html'%keyword web_data = open_url(main_url) links = web_data.xpath('//li[@class="media"]//h4//a/@href') # 获取链接 links = ['http://www.btyunsou.me'+i for i in links] return links def get_info(url): web_data = open_url(url) try: title = web_data.xpath(r'//div[@class="row-flbtd tor-title"]/h2/text()')[0] except: title = '' if keyword in title: print u'电影名:',title mange_link = 'magnet:?xt=urn:btih:' + url[23:-5] print u'磁力链接: ',mange_link if GS_num == 4: fle.write(u'电影名:'+ title + '\n') fle.write(u'磁力链接: ' + mange_link +'\n') datalist = web_data.xpath(r'//table[@class="table detail table-hover"]/tbody//tr//td/text()')[:10] for i in range(0,len(datalist),2): print datalist[i],datalist[i+1] fle.write(datalist[i] + datalist[i+1] + '\n') else: titles.append(title) cls.append(mange_link) datalist = web_data.xpath(r'//table[@class="table detail table-hover"]/tbody//tr//td/text()')[4:10] for i in range(0,len(datalist),2): print datalist[i],datalist[i+1] if i == 0: size.append(datalist[1]) # 文件大小 if i == 2: ctime.append(datalist[3]) # 时间 if i == 4: loadnum.append(datalist[5]) # 下载次数 else: print 'Sorry! None Search,Please change one: ' def get_info2(keyword): url = 'https://www.ciliba.org/s/%s.html'%keyword web_data = open_url(url) hrefs = web_data.xpath(r'//div[@class="item-title"]/h3/a/@href') for href in hrefs: try: web_data = open_url(href) except: continue try: title = web_data.xpath(r'//*[@id="wall"]/h1/text()')[0] except: title = '' if keyword in title: print u'电影名: ',title xl_link = web_data.xpath(r'//*[@id="wall"]/div[1]/p[6]/a[2]/@href')[0] print u'迅雷链接: ',xl_link data1 = web_data.xpath('//*[@id="wall"]/div[1]/p[2]/text()')[0] data2 = web_data.xpath('//*[@id="wall"]/div[1]/p[3]/text()')[0] print data1 print data2 if GS_num == 4: fle.write(u'电影名:'+ title + '\n') fle.write(u'迅雷链接: ' + xl_link +'\n') fle.write(data1) fle.write('\n') fle.write(data2) fle.write('\n') else: titles.append(title) cls.append(xl_link) size.append(data1.split(':')[1]) ctime.append(data2.split(':')[1]) loadnum.append(1) else: print 'Sorry! None Search,Please change one: ' def clear(i): if 'Gb' in i or 'GB' in i: inum = round(float(i.replace('GB','').replace('Gb','').replace(' ','')),2) return int(inum * 1024) else: inum = round(float(i.replace('Mb','').replace('MB','').replace(' ','')),2) return int(inum) if __name__ == '__main__': i = 1 while True: if i > 1: print u'请输入你想搜索的电影:' keyword = raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)) print u'\n请问您想按照哪种排序方式保存文件:1、文件大小 2、创建时间 3、下载次数 4、无要求,以txt保存' GS_num = int(raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True))) while GS_num > 4: print u'输入要求有误,请重新输入[1-4]:1、文件大小 2、创建时间 3、下载次数 4、无要求,以txt保存' GS_num = int(raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True))) if GS_num == 4: save = os.path.join(save_path,keyword+'.txt') fle = open(save,'w') links = get_url(keyword) #df = pd.DataFrame() titles = [];cls = [];size = [];ctime = [];loadnum = [] for url in links: try: get_info(url) except: continue if GS_num == 4: fle.write('--------------------------------------------------') fle.write('\n') try: get_info2(keyword) except: pass if GS_num != 4: # df[u'标题'] = titles # df[u'创建时间'] = ctime # df[u'文件大小MB'] = size # df[u'下载次数'] = loadnum # df[u'下载链接'] = cls # df[u'下载次数'] = df[u'下载次数'].astype(int) # df[u'文件大小MB'] = map(clear,df[u'文件大小MB']) size = map(clear,size) df_list = [] for ii in range(len(titles)): df_list.append([titles[ii],cls[ii],size[ii],ctime[ii],loadnum[ii]]) save = os.path.join(save_path,keyword+'.txt') fle2 = open(save,'w') if GS_num == 1: #df2 = df.sort_values(by = u'文件大小MB', ascending = False) df2 = sorted(df_list, key=lambda x: x[2],reverse = True) if GS_num == 2: #df2 = df.sort_values(by = u'创建时间', ascending = False) df2 = sorted(df_list, key=lambda x: x[3],reverse = True) else: #df2 = df.sort_values(by = u'下载次数', ascending = False) df2 = sorted(df_list, key=lambda x: x[4],reverse = True) #df2.to_excel(save,index = False,encoding = 'gbk') for sl in df2: for s in range(len(sl)): if s == 0: ss = u'电影名:' + sl[s] if s == 1: ss = u'磁力链接:' + sl[s] if s == 2: ss = u'文件大小MB:' + str(sl[s]) if s == 3: ss = u'创建时间: ' + str(sl[s]) if s == 4: ss = u'热度: ' + str(sl[s]) fle2.write(str(ss)) fle2.write('\n') fle2.write('--------------------------------------------------') fle2.write('\n') fle2.close() else: fle.close() i = i + 1 print u'\n如果您想再次搜索,请输入电影名!否则请手动关闭窗口.\n'