声明:文章仅源自个人兴趣爱好,不涉及他用,侵权联系删。
版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://blog.csdn.net/qq_36853469/article/details/105098385
1.某公共资源交易平台网站
网站链接:
https://ggzyfw.fujian.gov.cn/default.aspx
2.问题:cookie时效性不用说,主要是cookie经过JS处理
之前直接用的是用自动化测试工具来拿的cookie,最近在优化脚本之类的,于是就将这网站cookie反爬给解决了,并不是很难
#由于福建省公共资源交易电子公共服务平台需要抓取cookie,所以每次启动脚本先抓cookie
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('blink-settings=imagesEnabled=false')
prefs = {'profile.managed_default_content_settings.images': 2}
chrome_options.add_experimental_option('prefs', prefs)
# driver = webdriver.Chrome("C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe",options=chrome_options)#本地
driver = webdriver.Chrome(options=chrome_options)#服务器
driver.get("https://ggzyfw.fj.gov.cn/")
cookie = driver.get_cookies()
driver.close()
cookie_dict = {}
for item in cookie:
key = item["name"]
value = item["value"]
cookie_dict[key] = value
try:
with open(r'./spiders/cookie.txt',"w",encoding="utf-8",errors="ignore") as f:
f.write(str(cookie_dict))
except Exception as e:
print("cookie保存失败!",3)
else:
print("cookie保存成功!")
with open(r'./spiders/cookie.txt') as f:
cookie = f.read()
cookie_dict = eval(cookie)
3.分析(抓包):
从发起请求到请求结束,大致是到#28,就获取到我们需要的数据了
Set-Cookie:
我们需要的cookie(这个也是不全的):
根据以往经验来说,我们用requests抓到的cookie,肯定是不完整的,事实也证明了,如下,是我们拿到的部分cookie
多次拿cookie对比,我们发现,cookie中真正需要获取的就两参数ASP.NET_SessionId和_qddagsx_02095bad0b,第一个我们发起请求可以正常拿到,第二个。。。
尝试直接搜索,结果很幸运,一下就出来了。
function setCookie(c_name, value, expiredays) {
var exdate = new Date();
exdate.setDate(exdate.getDate() + expiredays);
cookie = c_name + "=" + escape(value) + ((expiredays == null) ? "" : ";expires=" + exdate.toGMTString()) + ";path=/";
return cookie;
}
查看JS,代码,解析,稍微修改下这串JS代码,检测setCookie是不是我们需要的cookie
用execjs调用JS时,各种没定义是很正常的,直接用力扣JS就是了,扣完应该大概600行左右吧,也不多。
需要JS的自取:
var biRadixBits = 16;var bitsPerDigit = biRadixBits;var biRadix = 1 << 16; // = 2^16 = 65536var biRadixSquared = biRadix * biRadix;var biHalfRadix = biRadix >>> 1;var maxDigitVal = biRadix - 1;var highBitMasks = new Array(0x0000, 0x8000, 0xC000, 0xE000, 0xF000, 0xF800, 0xFC00, 0xFE00, 0xFF00, 0xFF80, 0xFFC0, 0xFFE0, 0xFFF0, 0xFFF8, 0xFFFC, 0xFFFE, 0xFFFF);var lowBitMasks = new Array(0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF);var hexToChar = new Array('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f');function RsaFunc() { setMaxDigits(129); var key = new RSAKeyPair("010001", "", "D718814C9DA3C7F8BB1D414C6B503737886F47FD4BA3E6EF164D9BFA0783AD8255C8401AEE4083794C89D3D4F79E6541DA824E4CC357194C6B02DA19DF84F4FC046137475D089DD07304E86D9508E68633C9454019DDC4B8ED6D24381BEF9071593219067DB4B121FE95471396B07D25850EA7FA4F5E27EB24EE29E158F99831"); var val = "vRKsAcUgdiaodnpVWvrqs9AXFdFKIB-OQ_TPOy1ZMsw"; var RsaEncrypted = encryptedString(key, val); var isReflash = false; var ck = "_qddagsx_02095bad0b"; cookie = setCookie(ck, RsaEncrypted, 2); return cookie; // if (getCookie(ck) == null) { // isReflash = true; // } // setCookie(ck, RsaEncrypted, 2); // if (isReflash) { // window.location = window.location; // }}function RSAKeyPair(encryptionExponent, decryptionExponent, modulus){ this.e = biFromHex(encryptionExponent); this.d = biFromHex(decryptionExponent); this.m = biFromHex(modulus); // We can do two bytes per digit, so // chunkSize = 2 * (number of digits in modulus - 1). // Since biHighIndex returns the high index, not the number of digits, 1 has // already been subtracted. //this.chunkSize = 2 * biHighIndex(this.m); ////////////////////////////////// TYF this.digitSize = 2 * biHighIndex(this.m) + 2; this.chunkSize = this.digitSize - 11; // maximum, anything lower is fine ////////////////////////////////// TYF this.radix = 16; this.barrett = new BarrettMu(this.m);}function encryptedString(key, s)// Altered by Rob Saunders ([email protected]). New routine pads the// string after it has been converted to an array. This fixes an// incompatibility with Flash MX's ActionScript.// Altered by Tang Yu Feng for interoperability with Microsoft's// RSACryptoServiceProvider implementation.{ ////////////////////////////////// TYF if (key.chunkSize > key.digitSize - 11) { return "Error"; } ////////////////////////////////// TYF var a = new Array(); var sl = s.length; var i = 0; while (i < sl) { a[i] = s.charCodeAt(i); i++; } //while (a.length % key.chunkSize != 0) { // a[i++] = 0; //} var al = a.length; var result = ""; var j, k, block; for (i = 0; i < al; i += key.chunkSize) { block = new BigInt(); j = 0; //for (k = i; k < i + key.chunkSize; ++j) { // block.digits[j] = a[k++]; // block.digits[j] += a[k++] << 8; //} ////////////////////////////////// TYF // Add PKCS#1 v1.5 padding // 0x00 || 0x02 || PseudoRandomNonZeroBytes || 0x00 || Message // Variable a before padding must be of at most digitSize-11 // That is for 3 marker bytes plus at least 8 random non-zero bytes var x; var msgLength = (i+key.chunkSize)>al ? al%key.chunkSize : key.chunkSize; // Variable b with 0x00 || 0x02 at the highest index. var b = new Array(); for (x=0; x<msgLength; x++) { b[x] = a[i+msgLength-1-x]; } b[msgLength] = 0; // marker var paddedSize = Math.max(8, key.digitSize - 3 - msgLength); for (x=0; x<paddedSize; x++) { b[msgLength+1+x] = Math.floor(Math.random()*254) + 1; // [1,255] } // It can be asserted that msgLength+paddedSize == key.digitSize-3 b[key.digitSize-2] = 2; // marker b[key.digitSize-1] = 0; // marker for (k = 0; k < key.digitSize; ++j) { block.digits[j] = b[k++]; block.digits[j] += b[k++] << 8; } ////////////////////////////////// TYF var crypt = key.barrett.powMod(block, key.e); var text = key.radix == 16 ? biToHex(crypt) : biToString(crypt, key.radix); result += text + " "; } return result.substring(0, result.length - 1); // Remove last space.}function setCookie(c_name, value, expiredays) { var exdate = new Date(); exdate.setDate(exdate.getDate() + expiredays); cookie = c_name + "=" + escape(value) + ((expiredays == null) ? "" : ";expires=" + exdate.toGMTString()) + ";path=/"; return cookie;}function setMaxDigits(value){ maxDigits = value; ZERO_ARRAY = new Array(maxDigits); for (var iza = 0; iza < ZERO_ARRAY.length; iza++) ZERO_ARRAY[iza] = 0; bigZero = new BigInt(); bigOne = new BigInt(); bigOne.digits[0] = 1;}setMaxDigits(20);function BigInt(flag){ if (typeof flag == "boolean" && flag == true) { this.digits = null; } else { this.digits = ZERO_ARRAY.slice(0); } this.isNeg = false;}function biFromHex(s){ var result = new BigInt(); var sl = s.length; for (var i = sl, j = 0; i > 0; i -= 4, ++j) { result.digits[j] = hexToDigit(s.substr(Math.max(i - 4, 0), Math.min(i, 4))); } return result;}function hexToDigit(s){ var result = 0; var sl = Math.min(s.length, 4); for (var i = 0; i < sl; ++i) { result <<= 4; result |= charToHex(s.charCodeAt(i)) } return result;}function charToHex(c){ var ZERO = 48; var NINE = ZERO + 9; var littleA = 97; var littleZ = littleA + 25; var bigA = 65; var bigZ = 65 + 25; var result; if (c >= ZERO && c <= NINE) { result = c - ZERO; } else if (c >= bigA && c <= bigZ) { result = 10 + c - bigA; } else if (c >= littleA && c <= littleZ) { result = 10 + c - littleA; } else { result = 0; } return result;}function biHighIndex(x){ var result = x.digits.length - 1; while (result > 0 && x.digits[result] == 0) --result; return result;}function BarrettMu(m){ this.modulus = biCopy(m); this.k = biHighIndex(this.modulus) + 1; var b2k = new BigInt(); b2k.digits[2 * this.k] = 1; // b2k = b^(2k) this.mu = biDivide(b2k, this.modulus); this.bkplus1 = new BigInt(); this.bkplus1.digits[this.k + 1] = 1; // bkplus1 = b^(k+1) this.modulo = BarrettMu_modulo; this.multiplyMod = BarrettMu_multiplyMod; this.powMod = BarrettMu_powMod;}function biCopy(bi){ var result = new BigInt(true); result.digits = bi.digits.slice(0); result.isNeg = bi.isNeg; return result;}function biDivide(x, y){ return biDivideModulo(x, y)[0];}function biDivideModulo(x, y){ var nb = biNumBits(x); var tb = biNumBits(y); var origYIsNeg = y.isNeg; var q, r; if (nb < tb) { // |x| < |y| if (x.isNeg) { q = biCopy(bigOne); q.isNeg = !y.isNeg; x.isNeg = false; y.isNeg = false; r = biSubtract(y, x); // Restore signs, 'cause they're references. x.isNeg = true; y.isNeg = origYIsNeg; } else { q = new BigInt(); r = biCopy(x); } return new Array(q, r); } q = new BigInt(); r = x; // Normalize Y. var t = Math.ceil(tb / bitsPerDigit) - 1; var lambda = 0; while (y.digits[t] < biHalfRadix) { y = biShiftLeft(y, 1); ++lambda; ++tb; t = Math.ceil(tb / bitsPerDigit) - 1; } // Shift r over to keep the quotient constant. We'll shift the // remainder back at the end. r = biShiftLeft(r, lambda); nb += lambda; // Update the bit count for x. var n = Math.ceil(nb / bitsPerDigit) - 1; var b = biMultiplyByRadixPower(y, n - t); while (biCompare(r, b) != -1) { ++q.digits[n - t]; r = biSubtract(r, b); } for (var i = n; i > t; --i) { var ri = (i >= r.digits.length) ? 0 : r.digits[i]; var ri1 = (i - 1 >= r.digits.length) ? 0 : r.digits[i - 1]; var ri2 = (i - 2 >= r.digits.length) ? 0 : r.digits[i - 2]; var yt = (t >= y.digits.length) ? 0 : y.digits[t]; var yt1 = (t - 1 >= y.digits.length) ? 0 : y.digits[t - 1]; if (ri == yt) { q.digits[i - t - 1] = maxDigitVal; } else { q.digits[i - t - 1] = Math.floor((ri * biRadix + ri1) / yt); } var c1 = q.digits[i - t - 1] * ((yt * biRadix) + yt1); var c2 = (ri * biRadixSquared) + ((ri1 * biRadix) + ri2); while (c1 > c2) { --q.digits[i - t - 1]; c1 = q.digits[i - t - 1] * ((yt * biRadix) | yt1); c2 = (ri * biRadix * biRadix) + ((ri1 * biRadix) + ri2); } b = biMultiplyByRadixPower(y, i - t - 1); r = biSubtract(r, biMultiplyDigit(b, q.digits[i - t - 1])); if (r.isNeg) { r = biAdd(r, b); --q.digits[i - t - 1]; } } r = biShiftRight(r, lambda); // Fiddle with the signs and stuff to make sure that 0 <= r < y. q.isNeg = x.isNeg != origYIsNeg; if (x.isNeg) { if (origYIsNeg) { q = biAdd(q, bigOne); } else { q = biSubtract(q, bigOne); } y = biShiftRight(y, lambda); r = biSubtract(y, r); } // Check for the unbelievably stupid degenerate case of r == -0. if (r.digits[0] == 0 && biHighIndex(r) == 0) r.isNeg = false; return new Array(q, r);}function biNumBits(x){ var n = biHighIndex(x); var d = x.digits[n]; var m = (n + 1) * bitsPerDigit; var result; for (result = m; result > m - bitsPerDigit; --result) { if ((d & 0x8000) != 0) break; d <<= 1; } return result;}function biShiftLeft(x, n){ var digitCount = Math.floor(n / bitsPerDigit); var result = new BigInt(); arrayCopy(x.digits, 0, result.digits, digitCount, result.digits.length - digitCount); var bits = n % bitsPerDigit; var rightBits = bitsPerDigit - bits; for (var i = result.digits.length - 1, i1 = i - 1; i > 0; --i, --i1) { result.digits[i] = ((result.digits[i] << bits) & maxDigitVal) | ((result.digits[i1] & highBitMasks[bits]) >>> (rightBits)); } result.digits[0] = ((result.digits[i] << bits) & maxDigitVal); result.isNeg = x.isNeg; return result;}function arrayCopy(src, srcStart, dest, destStart, n){ var m = Math.min(srcStart + n, src.length); for (var i = srcStart, j = destStart; i < m; ++i, ++j) { dest[j] = src[i]; }}function biMultiplyByRadixPower(x, n){ var result = new BigInt(); arrayCopy(x.digits, 0, result.digits, n, result.digits.length - n); return result;}function biCompare(x, y){ if (x.isNeg != y.isNeg) { return 1 - 2 * Number(x.isNeg); } for (var i = x.digits.length - 1; i >= 0; --i) { if (x.digits[i] != y.digits[i]) { if (x.isNeg) { return 1 - 2 * Number(x.digits[i] > y.digits[i]); } else { return 1 - 2 * Number(x.digits[i] < y.digits[i]); } } } return 0;}function biSubtract(x, y){ var result; if (x.isNeg != y.isNeg) { y.isNeg = !y.isNeg; result = biAdd(x, y); y.isNeg = !y.isNeg; } else { result = new BigInt(); var n, c; c = 0; for (var i = 0; i < x.digits.length; ++i) { n = x.digits[i] - y.digits[i] + c; result.digits[i] = n % biRadix; // Stupid non-conforming modulus operation. if (result.digits[i] < 0) result.digits[i] += biRadix; c = 0 - Number(n < 0); } // Fix up the negative sign, if any. if (c == -1) { c = 0; for (var i = 0; i < x.digits.length; ++i) { n = 0 - result.digits[i] + c; result.digits[i] = n % biRadix; // Stupid non-conforming modulus operation. if (result.digits[i] < 0) result.digits[i] += biRadix; c = 0 - Number(n < 0); } // Result is opposite sign of arguments. result.isNeg = !x.isNeg; } else { // Result is same sign. result.isNeg = x.isNeg; } } return result;}function biMultiplyDigit(x, y){ var n, c, uv; result = new BigInt(); n = biHighIndex(x); c = 0; for (var j = 0; j <= n; ++j) { uv = result.digits[j] + x.digits[j] * y + c; result.digits[j] = uv & maxDigitVal; c = uv >>> biRadixBits; //c = Math.floor(uv / biRadix); } result.digits[1 + n] = c; return result;}function biShiftRight(x, n){ var digitCount = Math.floor(n / bitsPerDigit); var result = new BigInt(); arrayCopy(x.digits, digitCount, result.digits, 0, x.digits.length - digitCount); var bits = n % bitsPerDigit; var leftBits = bitsPerDigit - bits; for (var i = 0, i1 = i + 1; i < result.digits.length - 1; ++i, ++i1) { result.digits[i] = (result.digits[i] >>> bits) | ((result.digits[i1] & lowBitMasks[bits]) << leftBits); } result.digits[result.digits.length - 1] >>>= bits; result.isNeg = x.isNeg; return result;}function BarrettMu_modulo(x){ var q1 = biDivideByRadixPower(x, this.k - 1); var q2 = biMultiply(q1, this.mu); var q3 = biDivideByRadixPower(q2, this.k + 1); var r1 = biModuloByRadixPower(x, this.k + 1); var r2term = biMultiply(q3, this.modulus); var r2 = biModuloByRadixPower(r2term, this.k + 1); var r = biSubtract(r1, r2); if (r.isNeg) { r = biAdd(r, this.bkplus1); } var rgtem = biCompare(r, this.modulus) >= 0; while (rgtem) { r = biSubtract(r, this.modulus); rgtem = biCompare(r, this.modulus) >= 0; } return r;}function BarrettMu_multiplyMod(x, y){ /* x = this.modulo(x); y = this.modulo(y); */ var xy = biMultiply(x, y); return this.modulo(xy);}function BarrettMu_powMod(x, y){ var result = new BigInt(); result.digits[0] = 1; var a = x; var k = y; while (true) { if ((k.digits[0] & 1) != 0) result = this.multiplyMod(result, a); k = biShiftRight(k, 1); if (k.digits[0] == 0 && biHighIndex(k) == 0) break; a = this.multiplyMod(a, a); } return result;}function biMultiply(x, y){ var result = new BigInt(); var c; var n = biHighIndex(x); var t = biHighIndex(y); var u, uv, k; for (var i = 0; i <= t; ++i) { c = 0; k = i; for (j = 0; j <= n; ++j, ++k) { uv = result.digits[k] + x.digits[j] * y.digits[i] + c; result.digits[k] = uv & maxDigitVal; c = uv >>> biRadixBits; //c = Math.floor(uv / biRadix); } result.digits[i + n + 1] = c; } // Someone give me a logical xor, please. result.isNeg = x.isNeg != y.isNeg; return result;}function biDivideByRadixPower(x, n){ var result = new BigInt(); arrayCopy(x.digits, n, result.digits, 0, result.digits.length - n); return result;}function biModuloByRadixPower(x, n){ var result = new BigInt(); arrayCopy(x.digits, 0, result.digits, 0, n); return result;}function biToHex(x){ var result = ""; var n = biHighIndex(x); for (var i = biHighIndex(x); i > -1; --i) { result += digitToHex(x.digits[i]); } return result;}function digitToHex(n){ var mask = 0xf; var result = ""; for (i = 0; i < 4; ++i) { result += hexToChar[n & mask]; n >>>= 4; } return reverseStr(result);}function reverseStr(s){ var result = ""; for (var i = s.length - 1; i > -1; --i) { result += s.charAt(i); } return result;}
调用js:
import execjs
with open(r"./fujian.js", "r") as f:
ctx = execjs.compile(f.read())
parm = ctx.call("RsaFunc")
print(parm)
结果:
_qddagsx_02095bad0b=9063b4a0c77fba79ab30d9763292bc5f71dbc6b18fdec0c97232578875814cda21289691adf879b682c608b6cce88f3dc02a1c44059e319734f035fe3b61d2ebce0d78f0608be571b40aaa27caf8c4e9cab9c5d62a437e5e8261b45d13ab14d52c2605f68f48e442ad508e29ab20ada8b4635ce6a2e3cd00ff644906372ab5fd;expires=Fri, 27 Mar 2020 09:16:05 GMT;path=/
_qddagsx_02095bad0b参数,拿到结束。其他参数有的可以自己提取,有的甚至不用,所以到这里就结束了。
4.代码:
# -*- coding: UTF-8 -*-
'''
@Author :Jason
'''
import requests
import random
import execjs
import time
import re
session = requests.Session()
proxies = random.choice([
{个人代理IP,除了这里其他都不需要懂了,代码可直接拿来用},
])
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
}
class getInfoFromGgzyfj(object):
def getCookie(self):
'''
获取cookie
:return:cookie->dict
'''
res = session.get(url="https://ggzyfw.fujian.gov.cn/Website/JYXXNew.aspx", proxies=proxies, headers=headers)
res.encoding = "utf-8"
cookies_dict = requests.utils.dict_from_cookiejar(res.cookies)
needJS = re.findall(r'WPA end.*?script.*?(function.*?var[\s]*ck.*?";).*?if', res.text, re.S)
constant = '''
var biRadixBits = 16;
var bitsPerDigit = biRadixBits;
var biRadix = 1 << 16; // = 2^16 = 65536
var biRadixSquared = biRadix * biRadix;
var biHalfRadix = biRadix >>> 1;
var maxDigitVal = biRadix - 1;
var highBitMasks = new Array(0x0000, 0x8000, 0xC000, 0xE000, 0xF000, 0xF800,
0xFC00, 0xFE00, 0xFF00, 0xFF80, 0xFFC0, 0xFFE0,
0xFFF0, 0xFFF8, 0xFFFC, 0xFFFE, 0xFFFF);
var lowBitMasks = new Array(0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F,
0x003F, 0x007F, 0x00FF, 0x01FF, 0x03FF, 0x07FF,
0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF);
var hexToChar = new Array('0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'a', 'b', 'c', 'd', 'e', 'f');
'''
with open(r"./fj.js", "r") as f:
partOne = constant
partTwo = needJS[0]
partThree = f.read()
ctx = execjs.compile(partOne + partTwo + partThree)
partCookie = ctx.call("RsaFunc")
part = partCookie.split(';')[0]
cookies_dict[part.split('=')[0]] = part.split('=')[1]
return cookies_dict
def getInfo(self,cookies):
'''
crawl with cookie
:param cookies:cookie
:return: None
'''
data = {
"OPtype": "GetListNew",
"pageNo": str(1),
"pageSize": "10",
"proArea": "-1",
"category": "GCJS",
"announcementType": "-1",
"ProType": "-1",
"xmlx": "-1",
"projectName": "",
"TopTime": time.strftime('%Y-%m-%d')+ " 00:00:00",
"EndTime": time.strftime('%Y-%m-%d')+ " 23:59:59",
"rrr": "0.3712223914365884"
}
res = session.post(url = "https://ggzyfw.fj.gov.cn/Website/AjaxHandler/BuilderHandler.ashx",
proxies=proxies,headers=headers,data=data,
cookies=cookies
)
res.encoding = "utf-8"
print(res.text)
if __name__ == "__main__":
ggzyfj = getInfoFromGgzyfj()
cookies = ggzyfj.getCookie()
ggzyfj.getInfo(cookies=cookies)