Pandas的常用数据类型
- Series一维,带标签的数组
- DataFrame二维,Series容器
一、Series
Series对象本质上是有两个数组组成,一个数组构成对象的键(index),一个数组构成对象的值(values)
import string import pandas as pd import numpy as np # 创建Series t1 = pd.Series(np.arange(5),index=list("abcde")) print(t1) """ 索引可以指定,默认为012... a 0 b 1 c 2 d 3 e 4 dtype: int64 """ print(type(t1)) # <class 'pandas.core.series.Series'> # 使用字典创建Series a = {string.ascii_uppercase[i]:i for i in range(5)} # 创建Series print(pd.Series(a)) """ A 0 B 1 C 2 D 3 E 4 dtype: int64 """ print(pd.Series(a,index=list("CDEFG"))) """ C 2.0 D 3.0 E 4.0 F NaN G NaN dtype: float64 """ # 切片 print(t1[0:4:2]) """ a 0 c 2 dtype: int64 """ print(t1[[2,3,4]]) """ c 2 d 3 e 4 dtype: int64 """ print(t1[t1>2]) """ d 3 e 4 dtype: int64 """ print(t1["b"]) # 1 print(t1[["a","e","f"]]) """ a 0.0 e 4.0 f NaN dtype: float64 """ # 索引和值 print(t1.index) # Index(['a', 'b', 'c', 'd', 'e'], dtype='object') print(type(t1.index)) # <class 'pandas.core.indexes.base.Index'> print(t1.values) # [0 1 2 3 4] print(type(t1.values)) # <class 'numpy.ndarray'>