爬虫入门

import urllib
import urllib2

dic = {}
dic['name'] = 'Dick'
dic['age'] = 20

data = urllib.urlencode(dic)#编码,post的数据
request = urllib2.Request('http://baidu.com',data)#创建请求对象
responce = urllib2.urlopen(request)#打开网页,返回'响应'对象
the_page = responce.read()
print the_page


#有一些站点不喜欢被程序(非人为访问)访问
#这时候可以把自身模拟成Internet Explorer

user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'

headers = { 'User-Agent' : user_agent }    
req = urllib2.Request(url, data, headers) 

猜你喜欢

转载自8850702.iteye.com/blog/2279305