已经将数据库操作注释掉了
1 from urllib.request import urlopen; 2 from urllib.error import HTTPError; 3 from bs4 import BeautifulSoup; 4 import pymysql; 5 6 num = 1480; 7 #conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='root', db='mysql',charset='utf8'); 8 #cur = conn.cursor(); 9 #cur.execute("USE hfutxjh"); 10 while num < 2000: 11 url = "http://gdjy.hfut.edu.cn/products/" + str(num) + ".html"; 12 num = num + 1; 13 try: 14 html = urlopen(url); 15 except HTTPError as e: 16 print(num - 1); 17 print(e); 18 continue; 19 else: 20 if html is None: 21 print(num - 1); 22 print("URL is not found"); 23 else: 24 bsObj = BeautifulSoup(html.read()); 25 str1=str(num-1); 26 print(str1); 27 str2=bsObj.h3.get_text(); 28 print(str2); 29 #cur.execute("INSERT INTO h3(id,h3text) VALUES('"+str1+"','"+str2+"')"); 30 #print(cur.fetchone()); 31 32 #cur.close(); 33 #conn.close();