import requests
from bs4 import BeautifulSoup
def getHTMLText(html):
try:
r = requests.get(html, timeout=0.2)
r.encoding = r.apparent_encoding
return 1,r.text
except:
return 0
def search_CSDN_html(key, skind, page):
kind = ["&domain=&o=&s=&u=&l=&f=",
"blog&domain=&o=&s=&u=&l=&f=&rbg=0",
"doc&domain=&o=&s=all&u=&l=&f=",
"discuss&domain=&o=simi&s=&u=&l=&f=",
"course&o=&s=&l="]
return "https://so.csdn.net/so/search/s.do?p=" + str(page) + "&q=" + key + "&t=" + kind[skind]
def NUmlist(numlist,soup, skind):
if skind == 1 or skind == 3:
for i in soup.select('.author-time'):
numlist.append(int(i.text[i.text.find('浏览') + 3:i.text.find('次') -1]))
elif skind == 2:
for i in soup.select('.author-time'):
numlist.append(int(i.text[i.text.find('下载') + 2:-2]))
elif skind == 4:
for i in soup.select('.author-time'):
numlist.append(int(i.text[i.text.find('\\n') + 2:-3]))
return numlist
def maxpage(key, skind):
html = search_CSDN_html(key, skind, 1)
soup = BeautifulSoup(getHTMLText(html)[1],"html.parser")
i = soup.select('.text')[0].text
return int(int(i[i.find('共')+1:i.find('条')])/21)
key = "Python GUI"#关键字
skind = 1 #1:博客 2:下载 3:论坛 4:学院
print("最大页数为:",maxpage(key, skind))
maxP = eval(input('最后的页面:'))#搜索最大页数
numlist = []
weblist = []
i = 1
while i <= maxP:
html = search_CSDN_html(key, skind, i)
try:
soup = BeautifulSoup(getHTMLText(html)[1],"html.parser")
for j in soup.select('.search-link a'):
weblist.append(j['href'])
numlist = NUmlist(numlist, soup, skind)
print(round(i / maxP * 100, 2),'%')
i += 1
except:
continue
for i in range(len(numlist)):
max = 0
tag = i
for j in range(i,len(numlist)):
if max < numlist[j]:
max = numlist[j]
tag = j
numlist[i],numlist[tag] = numlist[tag],numlist[i]
weblist[i],weblist[tag] = weblist[tag],weblist[i]
print(weblist[:10])
###浏览人数,或者下载次数最多的##