1 import requests 2 import re 3 import pymysql 4 #10页 仔细观察路由 5 db = pymysql.connect("localhost","root","root","testdb" ) 6 cursor = db.cursor() 7 8 for i in range(1,10): 9 url = 'http://*******8****' 10 url=url+'index_'+str(i)+'.html' 11 response = requests.get(url) 12 HTML = response.text 13 pattern= re.compile(r'(((2(5[0-5]|[0-4]\d))|[0-1]?\d{1,2})(\.((2(5[0-5]|[0-4]\d))|[0-1]?\d{1,2})){3})') 14 compile_1 = re.compile(r'\s+(\d{3,6})\s') 15 compile_2 = re.compile(r'\s+([\u4e00-\u9fa5]{1,9})\s?(?:省|新疆|内蒙古|市|县|区])') 16 # compile_4 = re.compile(r's+(\d{4}/\d{2}/\d{2}\s+\d{2}:\d{2}:\d{2})\s') 17 res2 = compile_2.findall(HTML) 18 res1 = compile_1.findall(HTML) 19 result = pattern.findall(HTML) 20 # res3 = compile_4.findall(HTML) 21 # print(res3,len(result)) 22 for ip_ in result: 23 print(ip_[0]) 24 sql = "INSERT INTO test (ip,port,place) VALUES ('%s','%s','%s')" %(ip_[0],res1[0],res2[0]) 25 print(sql) 26 cursor.execute(sql) 27 db.close() 28 29 # 使用cursor()方法获取操作游标
python爬虫爬取ip记录网站信息并存入数据库
猜你喜欢
转载自www.cnblogs.com/Y139147/p/11306733.html
今日推荐
周排行