import csv
from urllib.request import urlopen
from bs4 import BeautifulSoup
html = urlopen("http://en.wikipedia.org/wiki/Comparison_of_text_editors")
bsObj = BeautifulSoup(html)
# 主对比表格是当前页面上的第一个表格
table = bsObj.findAll("table",{"class":"wikitable"})[0]
rows = table.findAll("tr")
csvFile = open("C:/Users/Administrator/Desktop/test2.csv", 'wt', newline="", encoding='utf-8')
writer = csv.writer(csvFile)
try:
for row in rows:
csvRow = []
for cell in row.findAll(['td', 'th']):
csvRow.append(cell.get_text())
writer.writerow(csvRow)
finally:
csvFile.close()
import requests
def getHTMLText(url):
try:
for i in range(0,20):
r = requests.get(url, timeout=30)
r.raise_for_status()
r.encoding = 'utf-8'
return r.status_code,r.text,r.content,len(r.text),len(r.content)
except:
return ""
url = 'http://www.baidu.com.cn/'
print(getHTMLText(url))