import urllib.request import urllib.parse url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule" key=input("请输入需要翻译的文字:") formdata ={ "i": key, "from": "AUTO", "to": "AUTO", "smartresult": "dict", "client": "fanyideskweb", "salt": "1537698317221", "sign": "b569c8bf54bf1b0d71725726e7c1a5", "doctype": "json", "version": "2.1", "keyfrom": "fanyi.web", "action": "FY_BY_REALTIME", "typoResult": "false" } headers={ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"} data = urllib.parse.urlencode(formdata).encode(encoding='gbk') request = urllib.request.Request(url,data=data, headers = headers) repost = urllib.request.urlopen(request) html=repost.read().decode("utf-8") print(html)
将在审查元素中获得的url中translate后面的_o去掉,错误就消失了,可以正常爬取。
data除了doctype键和i键不能去掉,其余的即使删除了也能正常运行翻译。[此行待验证]