Python 数据加工 之 数组(numpy)
Python編程 从数据分析到数据科学 摘抄及拓展
随机数
- 生成一个随机数
import random
random.seed(3)
a = random.randint(1,100) #生成一个[1,100]的随机整数
print(a) #31
a = random.uniform(-10,10)#[-10,10]之间的随机浮点数
print(a) #1.852818212543312
- 生成一个随机数组
*先下载numpy库,在cmd命令行中输入以下命令:
pip install numpy -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com
import numpy as np
rand = np.random.RandomState(32)
x = rand.randint(0, 10, (3, 6))
print(x)
'''
[[7 5 6 8 3 7]
[9 3 5 9 4 1]
[3 1 2 3 8 2]]
'''
----------------------------------------------------------------------------------------------
rand = np.random.RandomState(1)
x = rand.rand(5) *10
print(x)
'''
[4.17022005e+00 7.20324493e+00 1.14374817e-03 3.02332573e+00
1.46755891e+00]
'''
#生成等距数列
x = np.linspace(0,10,20)
print(x)
'''
[ 0. 0.52631579 1.05263158 1.57894737 2.10526316 2.63157895
3.15789474 3.68421053 4.21052632 4.73684211 5.26315789 5.78947368
6.31578947 6.84210526 7.36842105 7.89473684 8.42105263 8.94736842
9.47368421 10. ]
'''
数组
- 数组的创建
如下5种方法
import numpy as np
myArray1 = np.arange(1, 20)
myArray2 = np.array([1, 2, 3, 4, 5, 6])
myArray3 = np.zeros((5, 5))#创建全0
myArray4 = np.full((3, 5), 2)#创建全为2
rand = np.random.RandomState(30)
myArray5 = rand.randint(1, 10, (3, 5))
print(myArray1) # [ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]
print(myArray2) # [1 2 3 4 5 6]
print(myArray3)
'''
[[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]]
'''
print(myArray4)
'''
[[2 2 2 2 2]
[2 2 2 2 2]
[2 2 2 2 2]]
'''
print(myArray5)
'''
[[6 6 5 8 3]
[6 2 4 8 8]
[2 2 4 3 3]]
'''
print(type(myArray1)) # <class 'numpy.ndarray'>
- 特征
- shape代表数组的形状
- dtype代表数组的数据类型
import numpy as np
myArray1 = np.zeros(shape=(2,15),dtype=float)
print(myArray1)
'''
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
'''
* 切片的操作和列表类似
import numpy as np
myArray1 = np.arange(1, 20)
print(myArray1) # [ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]
print(myArray1[[1, 3, 5]]) # [2 4 6]
print(myArray1[myArray1 < 5]) # [1 2 3 4]
print(myArray1[::-1]) # [19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1]
- 规整化处理
np.newaxis的操作,给数组加一个维度
np.newaxis 与 None 等价
import numpy as np
myArray = np.arange(1, 10)
print(myArray.shape) # (9,)
myArray1 = myArray[:, np.newaxis]
print(myArray1)
'''
[[1]
[2]
[3]
[4]
[5]
[6]
[7]
[8]
[9]]
'''
print(myArray1.shape) # (9, 1)
print(myArray1[:, np.newaxis].shape) # (9, 1, 1)
print(myArray1[np.newaxis,:].shape) # (1, 9, 1)
- 更改形状 reshape
- 切片
import numpy as np
myArray = np.arange(1, 21).reshape([5, 4])
print(myArray[[0, 2], 1]) # [ 2 10]
- 深浅拷贝(同列表中的深浅复制)需要用.copy()方法 深拷贝
- reshape 和 resize :
- reshape 直接修改了数组本身,resize返回一个新的数组
- 矩阵的转置: swapaxes()和transpose() 交换维度
- 可以看下面的网页
https://blog.csdn.net/qq1483661204/article/details/70543952
- 多维数组转为一位数组 :flatten()
- 多维数组转化为嵌套列表: tolist()
- 重设数组元素数据类型: astype()
- 数组的三个常用属性,shape,ndim,size
ndarray的计算
- 乘法,相当于矩阵的乘法,对每个元素都乘对应的数
- 横向拆分 np.split()
- 纵向拆分 np.vsplit()
import numpy as np
myArray = np.arange(1, 21).reshape([5, 4])
print(myArray)
# 求矩阵的秩
print(myArray.ndim) # 2
# 横向拆分
print(np.split(myArray.flatten(), [3, 6]))
# [array([1, 2, 3]), array([4, 5, 6]), array([ 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20])]
# 纵向拆分
print(np.vsplit(myArray, [1, 2]))
'''
[array([[1, 2, 3, 4]]), array([[5, 6, 7, 8]]), array([[ 9, 10, 11, 12],
[13, 14, 15, 16],
[17, 18, 19, 20]])]
'''
- 合并
axis=0 表示列
axis=1 表示行
import numpy as np
myArray = np.array([[3, 4, 5], [6, 7, 8]])
myArray1 = np.array([[1, 2, 3], [2, 3, 4]])
print(myArray.shape) # (2, 3)
print(myArray1.shape) # (2, 3)
myArray2 = np.concatenate((myArray, myArray1), axis=0)
print(myArray2)
'''
[[3 4 5]
[6 7 8]
[1 2 3]
[2 3 4]]
'''
print(myArray2.shape) # (4, 3)
myArray2 = np.concatenate((myArray, myArray1), axis=1)
print(myArray2)
'''
[[3 4 5 1 2 3]
[6 7 8 2 3 4]]
'''
print(myArray2.shape) # (2, 6)
- 横向合并 vstack :列的个数相同
- 纵向合并 hstack :行的个数相同
通俗的讲就是
m * n 可以和m * k横向合并
m * n 可以和 k * n 纵向合并 - numpy中的函数计算
from python 中的api
>>> np.sum([0.5, 1.5])
2.0
>>> np.sum([0.5, 0.7, 0.2, 1.5], dtype=np.int32)
1 #这里是1因为先把所以的数据线转成int32类型再相加
>>> np.sum([[0, 1], [0, 5]])
6 #什么都不写就是全加起来
>>> np.sum([[0, 1], [0, 5]], axis=0)
array([0, 6]) #axis = 0代表列相加
>>> np.sum([[0, 1], [0, 5]], axis=1)
array([1, 5]) #axis = 1代表行相加
- 元素类型
a1 = np.array([1, 2, 3, None])
print(a1) # [1 2 3 None]
a1 = np.array([1, 2, 3, None, np.nan])
print(a1) # [1 2 3 None nan]
print(a1.dtype) # object
- 插入与删除
np.delete 下标删除
import numpy as np
myArray1 = np.array([11, 12, 13, 14, 15, 16, 17])
a = np.delete(myArray1, 2)
print(a) # [11 12 14 15 16 17]
print(myArray1) # [11 12 13 14 15 16 17]
insert 下标插入
a = np.insert(myArray1,2,97)
print(a) # [11 12 97 13 14 15 16 17]
print(myArray1) # [11 12 13 14 15 16 17]
- 缺失值处理(np.nan)
及 np.nan是float类型,可以参加算数运算,None缺不行 - 判断是否有缺失值 np.isnan()
- np.any() 是否至少有1个
- np.all() 是否全部
import numpy as np
a = np.array([1, np.nan, 2, 3, 4, 5, 6])
print(np.isnan(a)) # [False True False False False False False]
print(np.any(np.isnan(a))) # True
print(np.all(np.isnan(a))) #False
print(np.nansum(a)) #21.0 可以求和操作,类型为float
print(np.sum(a)) #nan
- ndarray的广播规则
如果列数一样但行数不一样,进行以行位单位的广播操作
import numpy as np
a = np.array([1, 2, 3, 4])
b = np.arange(1, 13).reshape(3, 4)
print(a + b)
'''
[[ 2 4 6 8]
[ 6 8 10 12]
[10 12 14 16]]
'''
- 排序
import numpy as np
a = np.array([1, 2, 3, 5, 7, 4, 6, 8, 10])
'排序返回结果'
print(np.sort(a)) # [ 1 2 3 4 5 6 7 8 10]
'排序返回下标'
print(np.argsort(a)) # [0 1 2 5 3 6 4 7 8]
'按行排序'
a = a.reshape(3, 3)
print(a)
'''
[[ 1 2 3]
[ 5 7 4]
[ 6 8 10]]
'''
# 按行排序
print(np.sort(a, axis=1))
'''
[[ 1 2 3]
[ 4 5 7]
[ 6 8 10]]
'''