#网页抓取汉字转码、多个参数拼接 from urllib import request #将汉字转成unicode码 from urllib import parse # base_url='http://www.baidu.com/s?wd=' base_url='http://www.baidu.com/s?wd=' content=input('请输入你要搜索的内容:') # base_url=base_url+content#当有一个参数时可以,但现实中会有很多参数,此时肯定不适用 qs={ 'wd':content, 'rsv_sp':1 } # print(qs) # print(base_url) qs=parse.urlencode(qs) print(qs) base_url=base_url+qs headers={ 'user-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0' } req=request.Request(base_url,headers=headers) response=request.urlopen(req) html=response.read() html=html.decode('utf-8') with open(content+'.html','w',encoding='utf-8') as f: f.write(html)
请输入你要搜索的内容:CSDN学院
wd=CSDN%E5%AD%A6%E9%99%A2&rsv_sp=1
Process finished with exit code 0