爬取的照片信息
from urllib import request
import re
def getResponse(url):
url_request = request.Request(url)
url_response = request.urlopen(url_request)
return url_response
def getjpg(data):
jpglist = re.findall(r'src="http.+?.jpg"',data)
return jpglist
http_response = getResponse("http://dzh.mop.com/")
data = http_response.read().decode('utf-8')
L = getjpg(data)
global n
n = 1
for jpginfo in L:
print(n,'------',jpginfo)
n=n+1
爬取网页信息
import urllib.request
url = "http://www.douban.com/"
request = urllib.request.Request(url)
response = urllib.request.urlopen(request)
data = response.read()
data = data.decode('utf-8')
print(data)
print(type(response))
print(response.geturl())
print(response.info())
print(response.getcode())