.txt
encoding=
: 写入中文时一定要设置
r
:只读
w
:写入
a
: 追加
b
: 二进制
举例(知乎)
import requests
from pyquery import PyQuery as pq
url = "https://zhihu.com/explore"
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}
html = requests.get(url, headers=headers).text
doc = pq(html)
items = doc(".explore-feed.feed-item").items()
for item in items:
question = item.find('h2').text()
author = item.find('.author-link').text()
answer = pq(item.find('.content').html()).text()
print(question)
print(author)
print(answer)
with open("explore.txt", "a", encoding="utf-8") as file:
file.write('\n'.join([question, author, answer]))
file.write('\n' + "=" * 50 + '\n')
*注:pq(xxx.html()).text()能去掉段落中的标签
注意区分text
属性和 text()
方法
.csv
列表和字典都可以写入.csv
文件
字典更常用:
- 把列名写在列表里
- 定义
DictWriter
- 写入字典
import csv
# 将列表写入.csv
with open("data.csv", "w") as csvfile:
# 先定义csv的writer
writer = csv.writer(csvfile)
writer.writerow(["id", "name", "age"])
writer.writerows([["1001", "claire", "18"], ["1002", "ann", "18"]])
# 将字典写入.csv
with open("dataDict.csv", "w", encoding="utf-8") as csvfile:
# 定义字典的writer
fieldnames = ["id", "name", "age"]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerow({"id":"1001", "name":"老王", "age": "50"})
writer.writerow({"id":"1002", "name":"张三", "age": "50"})
pandas
库
import pandas as pd
df = pd.read_csv("data.csv")
print(df)
.json
元素为字典的列表
.dumps()
json转化为str
.loads()
str转化为json
import json
# 字符串转JSON
str = '''
[{
"name": "Bob",
"gender": "male",
"birthday": "1992-10-18"
}, {
"name": "张三",
"gender": "男",
"birthday": "1992-10-18"
}]
'''
data = json.loads(str)
# 写入JSON文件
with open("data.json", "w", encoding="utf-8") as file:
file.write(json.dumps(data, indent=2, ensure_ascii=False))
# 读取JSON
with open("data.json", "w", encoding="utf-8") as file:
str = file.read()
data = json.loads(str)