tarfile模块

读写tar存档文件

tarfile.open()
Signature: tarfile.open(name=None, mode=‘r’, fileobj=None, bufsize=10240, **kwargs)
打开tar存档进行读、写或追加
返回一个合适的TarFile类

import os
import tarfile
import urllib
import urllib.request

DOWNLOAD_ROOT = "https://raw.githubusercontent.com/ageron/handson-ml2/master/"
HOUSING_PATH = os.path.join("datasets", "housing") # 把目录和文件名合成一个路径
HOUSING_URL = DOWNLOAD_ROOT + "datasets/housing/housing.tgz"

def fetch_housing_data(housing_url=HOUSING_URL, housing_path=HOUSING_PATH):
    if not os.path.isdir(housing_path): # 判断路径是否为目录
        os.makedirs(housing_path) # 递归创建目录
    tgz_path = os.path.join(housing_path, "housing.tgz")
    urllib.request.urlretrieve(housing_url, tgz_path) # 将URL检索到磁盘上的临时位置
    housing_tgz = tarfile.open(tgz_path) # 打开
    housing_tgz.extractall(path=housing_path) # 解压
    housing_tgz.close() # 关闭

读取文件

fetch_housing_data() #调用函数
import pandas as pd
def load_housing_data(housing_path=HOUSING_PATH):
    csv_path = os.path.join(housing_path, "housing.csv")
    return pd.read_csv(csv_path)

housing = load_housing_data()
housing.head()

在这里插入图片描述
官方文档

发布了50 篇原创文章 · 获赞 51 · 访问量 2473

猜你喜欢

转载自blog.csdn.net/hezuijiudexiaobai/article/details/104574991