第一次开始学着写的一个简单的爬虫,用python,适合从零开始入手
import requests from bs4 import BeautifulSoup import bs4 import traceback def getHtmlText(url): try: r = requests.get(url) r.raise_for_status() r.encoding = r.apparent_encoding return r.text except: return "" def fillUnivList(ulist, html): soup = BeautifulSoup(html, "html.parser") for tr in soup.find('tbody').children: if isinstance(tr, bs4.element.Tag): tds = tr('td') ulist.append(tds[0].string) ulist.append(tds[1].string) ulist.append(tds[2].string) return def printUnivList(ulist, num): print("{:^}\t{:^}\t{:^}".format("排名", "学校名称", "总分")) j = 0 for i in range(num): u = ulist[j], ulist[j+1], ulist[j+2] print(u[0], u[1], u[2]) #print("{0:^}\t{1:^20}\t{2:>10}".format(u[0].string, u[1].string, u[2].string)) j = j+3 def main(): uinfo = [] url = 'http://www.zuihaodaxue.com/zuihaodaxuepaiming2016.html' html = getHtmlText(url) fillUnivList(uinfo, html) printUnivList(uinfo, 20) # 20 univ if __name__ == '__main__': try: main() except: print("出现错误") traceback.print_exc()