目录
1.Intro
文件名:authentication.py
模块名:验证模块
引用库:
urllib2 | requests | pymongo | socket |
gc | retry | spiderData(自定义库) |
自定义引用文件:spiderData,包含了一个网页返回状态码的字典,键为网页状态码,值为网页状态码对应的信息。由于没有合适的状态码返回值信息,所以自己写了个字典用于存储状态码和状态信息的键值对,其中包含了常见的网页错误状态码和错误信息:
httpStatusCode = {
"300": "Multiple Choices",
"301": "Moved Permanently",
"302": "Move temporarily",
"303": "See Other",
"304": "Not Modified",
"305": "Use Proxy",
"306": "Switch Proxy",
"307": "Temporary Redirect",
"400": "Bad Request",
"401": "Unauthorized",
"402": "Payment Required",
"403": "Forbidden",
"404": "Not Found",
"405": "Method Not Allowed",
"406": "Not Acceptable",
"407": "Proxy Authentication Required",
"408": "Request Timeout",
"409": "Conflict",
"410": "Gone",
"411": "Length Required",
"412": "Precondition Failed",
"413": "Request Entity Too Large",
"414": "Request-URI Too Long",
"415": "Unsupported Media Type",
"416": "Requested Range Not Satisfiable",
"417": "Expectation Failed",
"421": "Too many connections",
"422": "Unprocessable Entity",
"423": "Locked",
"424": "Failed Dependency",
"425": "Unordered Collection",
"426": "Upgrade Required",
"449": "Retry With",
"451": "Unavailable For Legal Reasons",
"500": "Internal Server Error",
"501": "Not Implemented",
"502": "Bad Gateway",
"503": "Service Unavailable",
"504": "Gateway Timeout",
"505": "HTTP Version Not Supported",
"506": "Variant Also Negotiates",
"507": "Insufficient Storage",
"509": "Bandwidth Limit Exceeded",
"510": "Not Extended",
"600": "Unparseable Response Headers"
}
功能:用于验证MongoDB数据库连接状态、网页连通性(HTTP状态码)、代理IP可用性。
2.Source
#!/usr/bin/env Python
# -*- coding: utf-8 -*-
'''
# Author : YSW
# Time : 2018/6/6 14:01
# File : authentication.py
# Version : 1.1
# Describe: 验证模块
# Update :
1.新增了retry库,可多次尝试网站连通性,直到连接超时。
'''
import urllib2
import requests
import socket
import spiderData
import pymongo
import gc
from retry import retry
class Authentication(object):
def __init__(self, headers):
print("[*] 初始化验证模块")
self.headers = headers
def dataBaseVerify(self, dbParams):
'''
验证数据库连接状态
:param dbParams: 数据库连接参数
:return: 验证通过返回 True,否则返回 False
'''
print("[+] 正在验证 MongoDB 数据库连接状态")
try:
userName = dbParams["userName"]
port = dbParams["port"]
pymongo.MongoClient(userName, port)
print("[+] 数据库验证通过")
return True
except Exception, e:
print("[+] 数据库验证失败")
print("ERROR: " + str(e.message))
return False
@retry(tries=5, delay=2)
def httpCodeVerify(self, url):
'''
验证 HTTP 状态码
:return: 验证通过返回 True,否则返回 False
'''
print("[+] 正在验证 HTTP 状态码:{0}".format(url))
try:
request = urllib2.Request(url, headers=self.headers)
urllib2.urlopen(request)
print("[+] HTTP 验证通过:{0}".format(url))
return True
except urllib2.HTTPError, e:
print("[+] HTTP 验证失败:{0}".format(url))
print("ERROR: " + str(e.code) + " " + spiderData.httpStatusCode[str(e.code)])
return False
def proxyVerify(self, url, protocol, ip, port):
'''
检查代理IP是否可用
:param ip:代理IP
:param port:代理端口
:param protocol:代理协议
:return:返回检查结果
'''
check_url = url
proxy_url = "{0}://{1}:{2}".format(protocol, ip, port)
print("[+] 正在验证代理 IP 可用性")
socket_timeout = 30
socket.setdefaulttimeout(socket_timeout)
try:
proxy_dict = {
protocol: proxy_url
}
response = requests.get(check_url, proxies=proxy_dict, headers=self.headers)
code = response.status_code
print(str(code))
if code >= 200 and code < 300:
print("[+] 可用的代理IP和端口: {0}:{1}:{2}".format(protocol, ip, port))
print("[+] 验证通过")
return True
else:
print("[-] 不可用的代理IP和端口: {0}:{1}:{2}".format(protocol, ip, port))
return False
except Exception, e:
print("[-] 不可用的代理IP和端口: {0}:{1}:{2}".format(protocol, ip, port))
print("ERROR: " + str(e.message))
return False
finally:
gc.collect()