Python学习笔记--Python 爬虫入门 -17-5 js 加密 (和有道词典的瓜葛)

 - js加密
    - 有的反爬虫策略采用js对需要传输的数据进行加密处理(通常是取md5值)
    - 经过加密,传输的就是密文,但是
    - 加密函数或者过程一定是在浏览器完成,也就是一定会把代码(js代码)暴露给使用者
    - 通过阅读加密算法,就可以模拟出加密过程,从而达到破解
    - 过程参看案例 v18
    - 视频请参考

图灵学院

https://study.163.com/course/courseLearn.htm?courseId=1004987028#/learn/video?lessonId=1052101889&courseId=1004987028
"""
破解有道词典
"""
from urllib import  request,parse

def youdao(key):

    url="http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"

    data = {
            "i": "girl",
            "from": "AUTO",
            "to": "AUTO",
            "smartresult": "dict",
            "client": "fanyideskweb",
            "salt": "1536459902050",
            "sign": "554b3a4f54ab1a44ca2246d7a365ed9d",
            "doctype": "json",
            "version": "2.1",
            "keyfrom": "fanyi.web",
            "action": "FY_BY_REALTIME",
            "typoResult": "false"
    }

    data = parse.urlencode(data).encode()

    headers = {
        "Accept": "application/json,text/javascript,*/*; q=0.01",
        # "Accept-Encoding": "gzip,deflate",
        "Accept-Language": "zh-CN,zh;q=0.9",
        "Connection": "keep-alive",
        "Content-Length": "200",
        "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8",
        "Cookie": "[email protected];OUTFOX_SEARCH_USER_ID_NCOO=129611368.41252394;JSESSIONID=aaaI4aiuvQBag6n9d53ww;fanyi-ad-id=49843;fanyi-ad-closed=1;___rl__test__cookies=1536459902048",
        "Host":"fanyi.youdao.com",
        "Origin": "http://fanyi.youdao.com",
        "Referer": "http://fanyi.youdao.com/",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
        "X-Requested-With": "XMLHttpRequest"
    }

    req = request.Request(url=url,data=data,headers=headers)
    res = request.urlopen(req)
    html = res.read().decode()

    print(html)

if __name__ == '__main__':
    youdao("girl")

换个单词,发现返回  {"errorCode":50},所以有了如下升级版 v19

      js 加密算法藏在了 fanyi.mini.js (打开fanyi.youdao.com ,F12,刷新一下网页即可看到)

注意: 6x(ZHw]mwzX#u0V7@yfwK  字符串会不定期更新

"""
破解有道词典
"""
from urllib import  request,parse

"""
var r = function(e) {
    var t = "" + ((new Date).getTime() + parseInt(10 * Math.random(), 10));
    return {
        salt: t,
        sign: n.md5("fanyideskweb" + e + t + "6x(ZHw]mwzX#u0V7@yfwK")
    }
};

"""

def getSalt():
    import  time
    import  random
    salt = int(time.time()*1000) + random.randint(0,10)
    print("getSalt...{}".format(salt))
    return salt

def getMD5(v):
    import hashlib
    md5 = hashlib.md5()
    md5.update(v.encode("utf-8"))
    sign = md5.hexdigest()
    return  sign

def getSign(key,salt):
    sign = 'fanyideskweb' + key + str(salt) + '6x(ZHw]mwzX#u0V7@yfwK'
    print("getSign ...{}".format(sign))
    sign = getMD5(sign)
    return sign

def youdao(key):

    url="http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"
    salt = getSalt()
    data = {
            "i": key,
            "from": "AUTO",
            "to": "AUTO",
            "smartresult": "dict",
            "client": "fanyideskweb",
            "salt": str(salt),
            "sign": getSign(key,salt),
            "doctype": "json",
            "version": "2.1",
            "keyfrom": "fanyi.web",
            "action": "FY_BY_REALTIME",
            "typoResult": "false"
    }

    data = parse.urlencode(data).encode()

    headers = {
        "Accept": "application/json,text/javascript,*/*; q=0.01",
        # "Accept-Encoding": "gzip,deflate",
        "Accept-Language": "zh-CN,zh;q=0.9",
        "Connection": "keep-alive",
        "Content-Length": len(data),
        "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8",
        "Cookie": "[email protected];OUTFOX_SEARCH_USER_ID_NCOO=129611368.41252394;JSESSIONID=aaaI4aiuvQBag6n9d53ww;fanyi-ad-id=49843;fanyi-ad-closed=1;___rl__test__cookies=1536459902048",
        "Host":"fanyi.youdao.com",
        "Origin": "http://fanyi.youdao.com",
        "Referer": "http://fanyi.youdao.com/",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
        "X-Requested-With": "XMLHttpRequest"
    }

    req = request.Request(url=url,data=data,headers=headers)
    res = request.urlopen(req)
    html = res.read().decode()

    print(html)

if __name__ == '__main__':
    youdao("bard")

猜你喜欢

转载自blog.csdn.net/u013985879/article/details/82556355