from bs4 import BeautifulSoup
import requests, pymongo
#激活MongoDB
client = pymongo.MongoClient('localhost', 27017)
#给数据库命名
xiaozhu = client['xiaozhu']
#创建一个表单
bnb_info = xiaozhu['bnb_info']
#定义一个能获取多页信息的函数
def getMorePage(pages):
for page in range(1, pages+1):
url = 'http://bj.xiaozhu.com/search-duanzufang-p{}-0/'
wb_data = requests.get(url.format(page))
soup = BeautifulSoup(wb_data.text, 'html.parser')
#房子标题、价格
titles = soup.select("span.result_title")
prices = soup.select("span.result_price > i")
# 打包存入字典
for title, price in zip(titles, prices):
data = {
'title':title.get_text(),
'price':price.get_text()
}
#将数据写入数据库==填写Excel表格每一行
bnb_info,insert_one(data)
print("完成")
getMorePage(3)
# 从数据库中筛选信息
for i in bnb_info.find():
if i['price'] >= 500:
#筛选出价格大于等于500的信息
print(i)
#上诉代码也可写成
for i in bnb_info({'price':{'$gte':500}}):
print(i)
补充:
$lt less than <
$lte less than equal <=
$gt greater than >
$gte …… >=
$ne not than !=