python中的文件存储

.txt

encoding= : 写入中文时一定要设置

r ：只读

w ：写入

a：追加

b：二进制

举例（知乎）

import requests
from pyquery import PyQuery as pq

url = "https://zhihu.com/explore"
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}
html = requests.get(url, headers=headers).text

doc = pq(html)
items = doc(".explore-feed.feed-item").items()

for item in items:
    question = item.find('h2').text()
    author = item.find('.author-link').text()
    answer = pq(item.find('.content').html()).text()
    print(question)
    print(author)
    print(answer)
    with open("explore.txt", "a", encoding="utf-8") as file:
        file.write('\n'.join([question, author, answer]))
        file.write('\n' + "=" * 50 + '\n')

*注：pq(xxx.html()).text()能去掉段落中的标签
注意区分text属性和 text()方法

.csv

列表和字典都可以写入.csv文件

字典更常用：

把列名写在列表里
定义DictWriter
写入字典

import csv

# 将列表写入.csv
with open("data.csv", "w") as csvfile:

    # 先定义csv的writer
    writer = csv.writer(csvfile)
    writer.writerow(["id", "name", "age"])
    writer.writerows([["1001", "claire", "18"], ["1002", "ann", "18"]])


# 将字典写入.csv
with open("dataDict.csv", "w", encoding="utf-8") as csvfile:

    # 定义字典的writer
    fieldnames = ["id", "name", "age"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerow({"id":"1001", "name":"老王", "age": "50"})
    writer.writerow({"id":"1002", "name":"张三", "age": "50"})

`pandas` 库

import pandas as pd
df = pd.read_csv("data.csv")
print(df)

.json

元素为字典的列表
.dumps() json转化为str
.loads() str转化为json

import json

# 字符串转JSON
str = '''
[{
    "name": "Bob",
    "gender": "male",
    "birthday": "1992-10-18"
    }, {
    "name": "张三",
    "gender": "男",
    "birthday": "1992-10-18"
}]
'''
data = json.loads(str)

# 写入JSON文件
with open("data.json", "w", encoding="utf-8") as file:
    file.write(json.dumps(data, indent=2, ensure_ascii=False))

# 读取JSON
with open("data.json", "w", encoding="utf-8") as file:
    str = file.read()
    data = json.loads(str)

python中的文件存储

.txt

.csv

pandas 库

.json

猜你喜欢

`pandas` 库