python对文件的操作及相关的os

文件的读取

文件读取的三步曲：打开—–操作—–关闭
eg：

f=open('/tmp/passwd')   #打开文件
print(f)
content=f.read()        #读取文件
print(content)
f.close()                #关闭文件

这里写图片描述
- 文件读取模式的选择

r：

只能读，不能写；

读取的文件不存在，是会报错

r+：

可以执行读写操作

文件不存在，报错

默认情况下，从文件所在位置开始写入

w：

只写

文件不存在，不报错，会新建该文件

会清空文件内容

w+：

可读可写

文件不存在，不报错

会清空文件内容

a：

只写

文件不存在，不报错

不会清空文件内容

a+：

可读可写

文件不存在，不报错

不会清空文件内容

eg:可通过以下代码测试出这些模式的功能

f = open('/tmp/passwd1', mode='w+') #第一个参数是文件名，第二个参数是文件读取模式 
print(f.read())    #若报错则不能读         
# 写入的返回值是写入内容的长度;
print(f.write("123"))     #若报错则不能写   
f.close()

非纯文本文件的读取

如果读取图片，音乐或者视频(非文本文件), 需要通过二进制的方式进行读取与写入;b

读取二进制文件
rb:rb+:wb:wb+:ab:ab+:
读取文本文件
rt:rt+:wt:wt+:at:at+ 等价于 r:r+:w:w+:a:a+
eg：

# 先读取二进制文件内容， 保存在变量content里面
f1 = open("frame1.png", mode='rb')
content = f1.read()
print(content)
f1.close()
f2 = open('hello.png', mode='wb')
f2.write(content)
f2.close()

这里写图片描述

文件的常用属性

# buffer缓冲区对象
print(f.buffer)
# 判断文件对象是否关闭, 已经关闭返回True
print(f.closed)
# 文件号
print(f.fileno())
# 文件对象
print(f.name)
# 查看文件对象的相关信息
print(f)

文件常用方法

文件读取
f.read():
默认情况读取文件的所有内容, 小的文件，直接用read读取即可,
如果是一个大文件时(文件大小>内存大小)，不能通过read一次性读取所有内容;
f.readline(): 每次读取一行内容
f.readlines() 将读取的文件内容的每一行存到列表中
f.readable() 判断是否可读
f.read(3)类似于head -c 3 /tmp/passwd命令

f = open('/mnt/passwd', 'r+')
# # 类似于head -c 3 /tmp/passwd命令, 自己写一个和阿的命令
print(f.read(3))
print(f.readline(), end='')
print(f.readline(), end='')
# 读取文件的所有内容， 返回一个列表, 列表元素分别为文件行内容
print(f.readlines())
#对于每一行， 去掉后面的'\n' --(列表生成式， map)
# print([line.strip() for line in f.readlines()])
# print(list(map(lambda  x: x.strip(), f.readlines())))
f.close()

这里写图片描述

文件的写入操作
f.write() 从指针所在位置写入, 写入是字符串内容
f.writelines()将列表里面的每个元素写入文件中
f.writable()查看文件是否可写

f=open("/mnt/cooffee",mode="a+")
f.write('cooffee')
li=['user'+str(i)+"\n" for i in range(10)]
f.writelines(li)
f.close()

这里写图片描述

指针位置操作
print(f.tell())告诉当前指针所在位置
f.seek(0, 0)将文件指针移动到文件最开始
f.seek(0,2)将指针移动到文件最后

f = open("/mnt/passwd",'w+')
print(f.tell())
f.write("cooffee")
print(f.tell())
f.seek(0,0)
print(f.tell())
f.seek(0,2)
print(f.tell())
f.close()

这里写图片描述

文件上下文管理器之with语句

上下文管理器:打开文件，执行完with语句内容之后，自动关闭文件对象

with open('/mnt/passwd') as  f:
     print("with语句里边：",f.closed)
     print(f.read(5))
 print('with语句外边：',f.closed)     #f.closed判断文件是否关闭
 with    open('/mnt/passwd') as f1, \
         open('/mnt/passwdBack', 'w+') as f2:
     # 将第一个文件的内容写入第二个文件中;
     f2.write(f1.read())
     # 移动指针移动到文件最开始
     f2.seek(0,0)
     # 读取指针内容
     print(f2.read())
     # python2中只能这么实现
# with open('/tmp/passwd') as f1:
#     content = f1.read()
# with open('/tmp/passwdBack', 'w+'):
#     f2.write(content)

这里写图片描述

yield实现读取大文本文件

eg：1. 文件操作
1). 创建文件data.txt, 文件共100000行, 每行存放一个1～100之间的整数.

 import random
 with open('data.txt', mode='a+') as f:
     for i in range(1000000):
         f.write(str(random.randint(1,100))+'\n')
 # 通过yield， 每次读取一行进行处理
 def byLineReader(filename):
     with open(filename) as f:
         line = f.readline()
         # 如果可以读取到内容， 返回改行信息
         while line:
             yield  line
             line = f.readline()

 # read是一个生成器对象,# read = byLineReader('data.txt')
 #1). next 读取生成器的内容
 print(next(read))
 print(next(read))
# #  2). 通过for循环
# # for item in read:
# #     print(item)

这里写图片描述

文件对象的迭代

文件对象是可以for循环遍历的，默认遍历的内容为每一行的内容.是节省内存空间的。

from collections import Iterable
f = open('data.txt')
print(isinstance(f, Iterable))
for i, item in enumerate(f):
    if i == 3:
        break
    print(i, item)
f.close()

这里写图片描述

os之环境变量函数

import os
# 1). 返回操作系统类型， 值为posix，是Linux操作系统, 值为nt， 是windows操作系统
print(os.name)
# 2). 操作系统的详细信息
info = os.uname()
print(info)
# 3). 系统环境变量
print(os.environ)
# 4). 通过key值获取环境变量对应的value值
print(os.environ.get('PATH'))
print(os.getenv('PATH'))

这里写图片描述

os之文件操作函数

import os
# 1. 判断是否为绝对路径
print(os.path.isabs('/tme/hello'))
print(os.path.isabs('hello'))
# 2. 生成绝对路径
print(os.path.abspath("/tmp/hello"))
print(os.path.abspath('hello.peng'))
# 3.将目录与文件拼接
print(os.path.join('/home/kiosk','hello.peng'))
print(os.path.join(os.path.abspath('.'),'hello.peng'))   # 返回一个绝对路径： 当前目录的绝对路径+ 文件名/目录名
# 4.获取目录名或者文件名
filename='/home/kiosk/Desktop/python1/day9-1/hello.peng'
print(os.path.basename(filename))
print(os.path.dirname(filename))

这里写图片描述

# 5. 创建目录/删除目录
os.mkdir('floating')
os.makedirs('floating/films')
#os.rmdir('floating')
# 6. 创建文件/删除文件
os.mknod('coffee.txt')
#os.remove("cooffee.txt")
# 7. 文件重命名(mv)
os.rename("data.txt",'data1.txt')
# 8. 判断文件或者目录是否存在
print(os.path.exists('floating'))
print(os.path.exists('data1.txt'))
# 9. 分离后缀名和文件名
print(os.path.splitext('hello.peng'))
# 10. 将目录名和文件名分离
print(os.path.split("/tmp/hello/hello.peng"))

这里写图片描述

遍历指定文件目录下的所有内容

import os
from os.path import join
for root, dirs, files in os.walk('/var/log'):
     print(root, dirs, files)
     for name in files:
        print(join(root, name))

这里写图片描述

sys模块的常用方法

import  sys

# 返回一个列表， 第一个元素为脚本名
print(sys.argv)
# 如果获取脚本传入的第n个参数， sys.argv[n]

这里写图片描述

Counter对文件的中重复字符的统计(大文件）

生成一个大文件ips.txt，要求120000行，每行随机为172.25.254.0/24段的ip
读取ips。txt文件统计这个文件中ip出现频率排前10的ip

import random
from collections import Counter
def create_file(filename='ips.txt'):
    ips=('172.25.254.'+str(random.randint(1,254)) for i in range(120000))
    with open(filename,'w')as f:
        for ip in ips:
            f.write(ip+'\n')
def sort_by_ip_count(filename='ips.txt',count=10):
    with open(filename)as f:
        ipcount=Counter(f)
    return [ip[0].strip() for ip in ipcount.most_common(count)]
create_file()
print(sort_by_ip_count('ips.txt',10))

这里写图片描述

批量建文件及修改后缀名

  1. 在当前目录新建目录img, 里面包含100个文件, 100个文件名各不相同(X4G5.png)
  2. 将当前img目录所有以.png结尾的后缀名改为.jpg.

import optparse
import os
import random
import string
from os.path import exists, splitext, join
def gen_code(len=4):
    li=random.sample(string.ascii_letters+string.digits,len)    # 随机生成4位的文件名各不相同
    return ''.join(li)     # 将列表元素拼接为字符串
def create_files():
    li={gen_code()for i in range(100)}   # 随机生成100个验证码
    print(li)
    for name in li:
        os.mknod('floating/'+name+'.png')
from collections import Iterable
import sys
def modify_suffix(dirname,old_suffix,new_suffix):
    '''

    :param dirname:操作的目录
    :param old_suffix:原先的后缀名
    :param new_suffix:新的后缀名
    :return:
    '''
    # 1. 判断查找的目录是否存在， 如果不存在， 显示报错
    if exists(dirname):
    # 2. 找出所有以old_suffix(.png)结尾的
        pengfiles=filter(lambda filename:filename.endswith(old_suffix),os.listdir(dirname))
        # 3. 将文件名和后缀名分开， 留下所有的文件名
        basefiles=[splitext(filename)[0] for filename in pengfiles]
         # 4. 文件重命名
        for filename in basefiles:     # 需要加上目录名
            oldname=join(dirname,filename+old_suffix)
            newname=join(dirname,filename+new_suffix)
            os.rename(oldname,newname)
            print("%s重命名%s成功" %(oldname,newname))
    else:
        print("%s不存在" %(dirname))
# 如果该脚本没有被调用， 则执行下面的代码;
if __name__=='__main__':
        gen_code()
        create_files()
        modify_suffix('floating','.png','.jpg')

这里写图片描述

python对文件的操作及相关的os

文件的读取

r：

r+：

w：

w+：

a：

a+：

非纯文本文件的读取

文件的常用属性

文件常用方法

文件上下文管理器之with语句

yield实现读取大文本文件

文件对象的迭代

os之环境变量函数

os之文件操作函数

遍历指定文件目录下的所有内容

sys模块的常用方法

Counter对文件的中重复字符的统计(大文件）

批量建文件及修改后缀名

猜你喜欢