前言
自己手动构造dataframe的两种方法
字典dict方法
这种方法,用dict方法构造,key是需要存储的信息,value是dataframe里面的数据(以list数据组成)。
import pandas as pd
import numpy as np
#首先有一个字典类型的数据,key是构建成dataframe之后的列索引
#key后面的list类型构成的value就是dataframe里面的数据
data_dict={
"age":[18,30,np.nan,40,np.nan,30],
"city":["Bei Jing","Shang Hai","Guang Zhou","Shen Zhen",np.nan," "],
"sex":[None,"male","femal","male",np.nan,"unkonwn"],
"birth":["2002-02-10","1998-10-17",None,"1978-08-08",np.nan,"1988-10-17"]
}
data_df=pd.DataFrame(data=data_dict)
data_df
#再给每一行加上一个索引
index=pd.Index(["Tom","Bob","Mary","James","Smith","John"],name="姓名")
data_df.set_index(index,inplace=True)
data_df
#设置成index的数据类型也可以是Series
index=pd.Series(["张三","李四","王五","赵六","宋江","李逵"],name="姓名")
data_df.set_index(index,inplace=True)
data_df
#也可以一步到位,索引都构建好了
data_dict={
"age":[18,30,np.nan,40,np.nan,30],
"city":["Bei Jing","Shang Hai","Guang Zhou","Shen Zhen",np.nan," "],
"sex":[None,"male","femal","male",np.nan,"unkonwn"],
"birth":["2002-02-10","1998-10-17",None,"1978-08-08",np.nan,"1988-10-17"]
}
myindex=pd.Series(["张三","李四","王五","赵六","宋江","李逵"],name="姓名")
data_df=pd.DataFrame(data=data_dict,index=myindex)
data_df
二维数组构建法
#也可以通过二维数据来构建
data=np.array([ [98,2],[3,104],[1,81],[99,5],[101,10],[2,100]])
data_df=pd.DataFrame(data=data,columns=['打斗镜头','约会镜头'])
data_df
总结
(如果您发现我写的有错误,欢迎在评论区批评指正)