【爬虫】01 urllib 爬取网页

import urllib.request

response = urllib.request.urlopen('https://www.baidu.com/')

data = response.readlines()
print(data)

# 返回当前环境有关信息
print(response.info())

# 返回状态码 200成功处理了请求
print(response.getcode())

# 返回当前正在爬取的 url 地址
print(response.geturl())

# 解码
url = r'https://www.baidu.com/s?wd=200%E4%BB%A3%E8%A1%A8%E4%BB%80%E4%B9%88&rsv_spt=1&rsv_iqid=0x95ea389a0003eb1d&issp' \
      '=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&tn=baiduhome_pg&rsv_enter=1&rsv_dl=tb&rsv_sug3=21&rsv_sug1=28&rsv_sug7=101' \
      '&rsv_sug2=0&inputT=9758&rsv_sug4=10156 '
newUrl = urllib.request.unquote(url)
print(newUrl)
发布了96 篇原创文章 · 获赞 103 · 访问量 1万+

猜你喜欢

转载自blog.csdn.net/weixin_38114487/article/details/104707494