pandas一些操作


# coding: utf-8

# In[38]:


# 导入相关库
import numpy as np
import pandas as pd


# In[39]:


index = pd.Index(data=["Tom", "Bob", "Mary", "James"], name="name")

data = {
    "age": [18, 30, 25, 40],
    "city": ["BeiJing", "ShangHai", "GuangZhou", "ShenZhen"],
    "sex": ["male", "male", "female", "male"]
}

user_info = pd.DataFrame(data=data, index=index)
user_info


# In[40]:


user_info.info()


# In[41]:


user_info.shape


# In[42]:


user_info.T


# In[43]:


user_info


# # 如果我们想要通过 DataFrame 来获取它包含的原有数据，可以通过 .values 来获取，获取后的数据类型其实是一个 ndarray。

# In[44]:


user_info.values


# In[45]:


user_info.age.max()


# In[46]:


user_info.describe()


# In[47]:


user_info.sex.value_counts()


# In[48]:


#pd.cut(user_info.age, 3)


# In[49]:


pd.cut(user_info.age, [1, 18, 30, 50], labels=["childhood", "youth", "middle"])


# In[50]:


user_info


# In[51]:


user_info.sort_index(axis=1, ascending=False)


# In[52]:


user_info.age.map(lambda x: "yes" if x >= 30 else "no")


# In[53]:


city_map = {
    "BeiJing": "north",
    "ShangHai": "south",
    "GuangZhou": "south",
    "ShenZhen": "south"
}

# 传入一个 map
user_info.city.map(city_map)


# In[54]:


user_info


# In[55]:


user_info.rename(columns={"age": "Age", "city": "City", "sex": "Sex"})


# In[56]:


user_info.rename(index={"Tom": "tom", "Bob": "bob"})


# In[57]:


user_info.get_dtype_counts()


# In[58]:


user_info["age"].astype(float)
user_info


# In[65]:


user_info.to_csv("data.csv",sep=" ")


# In[ ]:
猜你喜欢