#!/usr/bin/env python # -*- coding: utf-8 -*- import requests from requests.exceptions import ReadTimeout, HTTPError, RequestException from bs4 import BeautifulSoup from urllib import request, parse, error # 加异常处理模块 try: url = 'http://sports.sina.com.cn/nba/' response = requests.get(url, timeout=0.1) response.encoding = 'utf-8' # 防止requests得到的文本值乱码 html = response.text print('requests请求库请求的结果:\n', html) print('状态码', response.status_code) except ReadTimeout: print('TIME OUT') # 加异常处理模块 try: url = 'http://sports.sina.com.cn/nba/' result = request.urlopen(url) print('urllib请求库的结果:\n', result.read().decode('utf-8')) except error.URLError as e: print(e.reason) print('状态码:', result.status) # 下面是解析部分程序 soup = BeautifulSoup(html, 'lxml') for p in soup.select('.list .item p'): for a in p.select('a'): print(a.get_text()) print('----------------------------------这是分隔线-------------------------------------') for li in soup.select('ul .item'): for a in li.select('a'): print(a.get_text())
python3爬取新浪NBA新闻信息(待完善)
猜你喜欢
转载自blog.csdn.net/cdlwhm1217096231/article/details/80459510
今日推荐
周排行