数组的创建、访问、运算、IO
创建
#一维
>>> import numpy as np >>> list_1 = [1, 2, 3, 4] >>> array_1 = np.array(list_1) >>> array_1
# 结果 array([1, 2, 3, 4])
#二维
>>> array_2 = np.array([list_1, list_2]) >>> arrya_2
# 结果 array([[1, 2, 3, 4], [5, 6, 7, 8]])
#形状和类型
>>> array_2.shape (2, 4)
#类型 >>> array_2.dtype dtype('int32')
#混合类型以最高级别为准 >>> array_3 = np.array([[1.0,2.0,3.0], [4.0, 3, 4]]) >>> array_3.dtype dtype('float64') # int和float则都转换为float#切片一维数组
>>> array_4 = np.arange(1, 10, 2) >>> array_4 array([1, 3, 5, 7, 9])
#0数组
>>> np.zeros(5) array([0., 0., 0., 0., 0.])
>>> np.zeros([2,3]) array([[0., 0., 0.], [0., 0., 0.]])#单位矩阵
>>> np.eye(5) array([[1., 0., 0., 0., 0.], [0., 1., 0., 0., 0.], [0., 0., 1., 0., 0.], [0., 0., 0., 1., 0.], [0., 0., 0., 0., 1.]]) >>> np.eye(5).dtype dtype('float64')
# 数组查询
>>> a = np.arange(1, 10) >>> a array([1, 2, 3, 4, 5, 6, 7, 8, 9])# 切片查询
>>> a[1:5] array([2, 3, 4, 5])# 下标查询
>>> b = np.array([[1,2,4],[4,5,6]]) >>> b array([[1, 2, 4], [4, 5, 6]]) >>> b[1] array([4, 5, 6]) >>> b[1][0] 4# 行列查询
>>> c = np.array([[1,2,3],[4,5,6],[7,8,9]]) >>> c[:2,1:] # 第0,1行,第1,2列 array([[2, 3], [5, 6]])
数组与矩阵运算
# 快速创建数组>>> import numpy as np >>> np.random.randn(10) # 符合标准的正太分布的 array([ 0.14678535, -0.33388966, -0.15975914, -0.67999109, -0.31777257, -0.54772809, -0.80154849, 0.53322627, 0.01623595, -0.38386958])# 一维
>>> np.random.randint(10, size=20) array([6, 1, 9, 0, 3, 1, 9, 4, 7, 4, 9, 2, 2, 1, 3, 5, 5, 3, 3, 5])# 二维
>>> np.random.randint(10, size=(2,3)) array([[4, 2, 7], [1, 5, 5]])# 指定形状
>>> np.random.randint(10, size = 20).reshape(4,5) array([[4, 8, 6, 3, 4], [9, 2, 4, 4, 2], [4, 1, 8, 9, 4], [9, 4, 7, 6, 5]])#数组运算,不要用除法,可能会除到0
>>> a = np.random.randint(10, size=(4,5)) >>> b = np.random.randint(10, size=(4,5)) >>> a array([[5, 6, 6, 2, 7], [0, 4, 0, 2, 9], [6, 1, 2, 2, 5], [7, 7, 3, 0, 3]]) >>> b array([[5, 2, 4, 6, 5], [2, 9, 2, 6, 7], [5, 6, 1, 1, 6], [9, 9, 0, 1, 1]]) >>> a+b array([[10, 8, 10, 8, 12], [ 2, 13, 2, 8, 16], [11, 7, 3, 3, 11], [16, 16, 3, 1, 4]]) >>> a*b array([[25, 12, 24, 12, 35], [ 0, 36, 0, 12, 63], [30, 6, 2, 2, 30], [63, 63, 0, 0, 3]]) >>> a-b array([[ 0, 4, 2, -4, 2], [-2, -5, -2, -4, 2], [ 1, -5, 1, 1, -1], [-2, -2, 3, -1, 2]])# 矩阵 mat可以把数组转换为矩阵
>>> x =np.mat([[1,2,3],[4,5,6]]) >>> x matrix([[1, 2, 3], [4, 5, 6]])
# 矩阵求逆
>>> import numpy.linalg as nlg >>> a = np.random.rand(3,3) >>> a = np.mat(a) >>> ia = nlg.inv(a) >>> a # 原来的矩阵 matrix([[0.33966834, 0.45563803, 0.48934208], [0.63242106, 0.40535562, 0.64414694], [0.11362688, 0.03605258, 0.54176002]]) >>> ia # 逆矩阵 matrix([[-2.91202691, 3.39872493, -1.41077406], [ 3.99517492, -1.90420285, -1.34454505], [ 0.34489107, -0.58611764, 2.23120227]])# 求特征值和特征向量:
>>> a = np.random.randint(10, size=9).reshape(3,3) >>> ia = nlg.inv(a) >>> a array([[0, 0, 2], [1, 0, 3], [3, 2, 4]]) >>> ia array([[-1.5 , 1. , 0. ], [ 1.25, -1.5 , 0.5 ], [ 0.5 , 0. , 0. ]]) >>> eig_value, eig_vector = nlg.eig(a) # 特征值和特征向量 >>> eig_value # 特征值 array([ 6.08139 , -0.38854339, -1.69284661]) >>> eig_vector # 特征向量 array([[-0.27717773, -0.67564396, -0.62705837], [-0.46134462, 0.72544913, -0.57017102], [-0.84281294, 0.1312585 , 0.53075682]])# 按列拼接两个向量成一个矩阵:
>>> a = np.array((1,2,3)) >>> b = np.array((2,3,4)) >>> np.column_stack((a,b)) array([[1, 2], [2, 3], [3, 4]])# 在循环处理某些数据得到结果后,将结果拼接成一个矩阵是十分有用的,可以通过vstack和hstack完成:
>>> a = np.random.rand(2,2) >>> b = np.random.rand(2,2) >>> a array([[0.02417352, 0.1751437 ], [0.7935174 , 0.49437038]]) >>> b array([[0.07896746, 0.49799295], [0.85114061, 0.59149098]]) >>> c = np.hstack([a,b]) # 水平拼接 >>> d = np.vstack([a,b]) # 垂直拼接 >>> c array([[0.02417352, 0.1751437 , 0.07896746, 0.49799295], [0.7935174 , 0.49437038, 0.85114061, 0.59149098]]) >>> d array([[0.02417352, 0.1751437 ], [0.7935174 , 0.49437038], [0.07896746, 0.49799295], [0.85114061, 0.59149098]])# 缺失值: 缺失值在分析中也是信息的一种,NumPy提供nan作为缺失值的记录,通过isnan判定。
>>> a = np.random.rand(2,2) >>> a[0][1] = np.nan >>> np.isnan(a) array([[False, True], [False, False]]) >>> a array([[0.40745658, nan], [0.34749342, 0.77278907]]) >>> np.nan_to_num(a) array([[0.40745658, 0. ], [0.34749342, 0.77278907]])# Array常用函数
>>> a = np.random.randint(10, size=20).reshape(4,5) >>> a array([[9, 2, 3, 8, 3], [5, 3, 2, 2, 8], [2, 0, 2, 1, 5], [3, 6, 9, 2, 2]]) >>> np.unique(a) # 去重函数 array([0, 1, 2, 3, 5, 6, 8, 9]) >>> >>> sum(a) # 求各列的和 array([19, 11, 16, 13, 18]) >>> >>> sum(a[0]) # 求第0行的和 25 >>> sum(a[:,0]) # 求第0列的和 19 >>> max(a[0]) # 求第0行的最大值 9 >>> min(a[0]) # 求第0列的最小值 2 >>> max(a[:,1]) # 求第1列的最大值 6 >>> min(a[:,1]) # 求第一列最小值 0 >>> a.max() # 矩阵的最大值 9 >>> a.min() # 矩阵的最小值 0Array的input和output
# 使用pickle序列化numpy array:
>>> import pickle >>> x = np.arange(10) >>> x array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) >>> f = open('x.pkl','wb') # 打开或者创建一个文件 >>> pickle.dump(x,f) # 序列化x到f中 >>> f = open('x.pkl','rb') # 打开x.pkl文件 >>> pickle.load(f) # 加载 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
# 使用numpy:
>>> import pickle >>> x = np.arange(10) >>> x array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) >>> np.save('one_array',x) # 保存单个数组 >>> np.load('one_array.npy') # 加载 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) >>> y = np.arange(20) >>> y array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]) # 压缩 >>> np.savez('two_array.npz', a=x, b=y) # 压缩多个数组 >>> np.load('two_array.npz') # 地址 <numpy.lib.npyio.NpzFile object at 0x02CD0870> # 加载 >>> c = np.load('two_array.npz') >>> c['a'] array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) >>> c['b'] array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19])