python爬虫的各种存储方式之csv
2.csv的存储和读取
判断目录,有则打开,没有新建
import csv
import os
#判断目录,有则打开,没有新建
if os.path.exists('D:\Python\代码\数据爬取'):
os.chdir('D:\Python\代码\数据爬取')
else:
os.mkdir('D:\Python\代码\数据爬取')
os.chdir('D:\Python\代码\数据爬取')
文件写入,需要写入列表; newline:消除空格
with open('mycsv.csv','w',encoding="utf-8",newline='') as csvfile:
write1 = csv.writer(csvfile)
write1.writerow(['编号','网址','昵称'])
write1.writerows([
[1,'https://www.baidu.com/','百度'],
[2,'https://www.jd.com/','京东'],
[3,'https://www.qq.com/','腾讯']
])
# 写入九九乘法表
with open('mycsv.csv','a',encoding="utf-8",newline='') as csvfile:
write2 = csv.writer(csvfile)
for i in range(1,10):
list = []
for j in range(1,i+1):
num = str(j)+'x'+str(i)+'='+str(j*i)
list.append(num)
write2.writerow(list)
读取csv
with open('mycsv.csv','r',encoding="utf-8") as csvfile: #utf-8-
read1 = csv.reader(csvfile) #
print(type(read1)) #<class '_csv.reader'>
for i in read1: #每一行都是一个列表
*/98765**-*-print(i)
自定义读取
先额外加入一些数据
with open('mycsv.csv','a',encoding='utf-8',newline='') as csvfile:
write = csv.writer(csvfile)
write.writerows([
['上海市|普陀区|华大科创楼'],
['山东市|德州市|乐陵市'],
['四川市|成都|锦江区']
])
自定义读取规则
csv.register_dialect(‘任意规则名称’,delimiter=‘自定义分隔 符’,quoting=csv.QUOTE_ALL)
csv.register_dialect('mydialect',delimiter='|',quoting=csv.QUOTE_ALL) #注册规则
#csv.unregister_dialect('mydialect') #注销规则
with open('mycsv.csv','r',encoding='utf-8') as csvfile:
read2 = csv.reader(csvfile)
for i,v in enumerate(read2): #不用遍历全部,效率更高
print(v) #可以换规则继续迭代
if i == 12:
break
print('----------')
readagin = csv.reader(csvfile,'mydialect')
for each in readagin:
print(each)