【2021-01-14】JS逆向之招标投标公告服务平台


前言

目标网站:aHR0cHM6Ly9idWxsZXRpbi5jZWJwdWJzZXJ2aWNlLmNvbS8=
加密类型:验证码缺口位置



一、页面分析

进入网页列表页就要我们进行验证码滑动,然后响应里包含缺块图、背景图和完整图,这里我们用到的是背景图和完整图,来进行缺口位置的识别
在这里插入图片描述

在这里插入图片描述


二、缺口位置识别

这里传入两个图片的地址即可

# 获取缺口位置
def get_point(bg_url, full_url, index):
    img_data1 = requests.get(url=bg_url).content
    img_data2 = requests.get(url=full_url).content
    with open('1.png','wb') as f:
        f.write(img_data1)
    with open('2.png', 'wb') as f:
        f.write(img_data2)
    img1 = Image.open(BytesIO(img_data1))
    img2 = Image.open(BytesIO(img_data2))

    def is_similar_color(x_pixel, y_pixel):
        # 颜色对比
        if sum(np.abs(np.array(x_pixel) - np.array(y_pixel))) >= 200:
            return True

    # 计算距离
    def get_offset_distance(cut_image, full_image, index):
        for x in range(cut_image.width):
            for y in range(cut_image.height):
                cpx = cut_image.getpixel((x, y))
                fpx = full_image.getpixel((x, y))
                if is_similar_color(cpx, fpx):
                    img = full_image.crop((x, y, x + 45, y + 40))
                    # 保存一下计算出来位置图片,看看是不是缺口部分
                    img.save("position.png")
                    return [{
    
    "x": x - 12, "y": random.randint(-3, 3)}] # 寸在偏移值

    return get_offset_distance(img1, img2, index)

三、数据获取

验证码校验成功后会返回一个data参数,带着他取请求列表页即可
在这里插入图片描述
在这里插入图片描述


四、源码

import random
from io import BytesIO
import numpy as np
import requests
from PIL import Image
from lxml import etree

# 获取缺口位置
def get_point(bg_url, full_url, index):
    img_data1 = requests.get(url=bg_url).content
    img_data2 = requests.get(url=full_url).content
    with open('1.png','wb') as f:
        f.write(img_data1)
    with open('2.png', 'wb') as f:
        f.write(img_data2)
    img1 = Image.open(BytesIO(img_data1))
    img2 = Image.open(BytesIO(img_data2))

    def is_similar_color(x_pixel, y_pixel):
        # 颜色对比
        if sum(np.abs(np.array(x_pixel) - np.array(y_pixel))) >= 200:
            return True

    # 计算距离
    def get_offset_distance(cut_image, full_image, index):
        for x in range(cut_image.width):
            for y in range(cut_image.height):
                cpx = cut_image.getpixel((x, y))
                fpx = full_image.getpixel((x, y))
                if is_similar_color(cpx, fpx):
                    img = full_image.crop((x, y, x + 45, y + 40))
                    # 保存一下计算出来位置图片,看看是不是缺口部分
                    img.save("position.png")
                    return [{
    
    "x": x - 12, "y": random.randint(-3, 3)}] # 寸在偏移值

    return get_offset_distance(img1, img2, index)



if __name__ == '__main__':
    headers = {
    
    
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Accept-Language': 'zh-CN,zh;q=0.9',
    }

    session = requests.session()
    respose = session.post(url='https://bulletin.cebpubservice.com/captcha/captcha/captchaImage')
    js_data = respose.json()
    url1 = 'https://bulletin.cebpubservice.com/captcha/captcha/image/'+js_data['bigImgName']
    url2 = 'https://bulletin.cebpubservice.com/captcha/captcha/image/'+js_data['sourceImgName']
    dataToken = js_data['dataToken']
    point = str(get_point(url1,url2,0)[0]['x'])
    print(dataToken,point)
    data = {
    
    
        'dataToken': dataToken,
        'point': point
    }
    response = session.post('https://bulletin.cebpubservice.com/captcha/captcha/checkCaptcha', headers=headers, data=data)
    print(response.text)

    params = (
        ('searchDate', '2021-01-14'),
        ('dates', '0'),
        ('categoryId', '88'),
        ('industryName', ''),
        ('area', ''),
        ('status', ''),
        ('publishMedia', ''),
        ('sourceInfo', ''),
        ('showStatus', ''),
        ('word', ''),
        ('token', response.json()['data']),
    )

    response = session.get('https://bulletin.cebpubservice.com/xxfbcmses/search/bulletin.html', headers=headers, params=params)
    selector = etree.HTML(response.text)
    titles = selector.xpath('/html/body/table//tr/td[1]/a')
    for title in titles:
        print(title.text.strip())

猜你喜欢

转载自blog.csdn.net/qq_26079939/article/details/112612108