numpy基础
import numpy as np
定义array
In [156]: np.ones(3) Out[156]: array([1., 1., 1.]) In [157]: np.ones((3,5)) Out[157]: array([[1., 1., 1., 1., 1.], [1., 1., 1., 1., 1.], [1., 1., 1., 1., 1.]]) In [158]: In [158]: np.zeros(4) Out[158]: array([0., 0., 0., 0.]) In [159]: np.zeros((2,5)) Out[159]: array([[0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.]]) In [160]: In [146]: a = np.array([[1,3,5,2],[4,2,6,1]]) In [147]: print(a) [[1 3 5 2] [4 2 6 1]] In [148]: In [161]: np.arange(10) Out[161]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) In [162]: np.arange(3,13) Out[162]: array([ 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) In [163]: np.arange(3,13).reshape((2,5)) Out[163]: array([[ 3, 4, 5, 6, 7], [ 8, 9, 10, 11, 12]]) In [164]: In [169]: np.arange(2,25,2) Out[169]: array([ 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24]) In [170]: np.arange(2,25,2).reshape(3,4) Out[170]: array([[ 2, 4, 6, 8], [10, 12, 14, 16], [18, 20, 22, 24]]) In [171]: In [176]: np.linspace(1,10,4) Out[176]: array([ 1., 4., 7., 10.]) In [177]:
array基本运算
In [7]: a = np.array([[1,2],[3,4]]) In [8]: b = np.arange(5,9).reshape((2,3)) In [10]: print(a) [[1 2] [3 4]] In [11]: print(b) [[5 6] [7 8]] In [12]: In [12]: a+b Out[12]: array([[ 6, 8], [10, 12]]) In [13]: a-b Out[13]: array([[-4, -4], [-4, -4]]) In [14]: a*b # 对应元素相乘 Out[14]: array([[ 5, 12], [21, 32]]) In [17]: a/b Out[17]: array([[0, 0], [0, 0]]) In [18]: In [18]: a**2 Out[18]: array([[ 1, 4], [ 9, 16]]) In [19]: In [15]: np.dot(a,b) # 矩阵乘法 Out[15]: array([[19, 22], [43, 50]]) In [16]: a.dot(b) Out[16]: array([[19, 22], [43, 50]]) In [17]: In [54]: print(a) [[ 2 3 4 5] [ 6 7 8 9] [10 11 12 13]] In [55]: np.sum(a) Out[55]: 90 In [56]: np.min(a) Out[56]: 2 In [57]: np.max(a) Out[57]: 13 In [58]: In [58]: np.sum(a,axis=1) Out[58]: array([14, 30, 46]) In [59]: np.sum(a,axis=0) Out[59]: array([18, 21, 24, 27]) In [60]: # 三角函数结合random生成一组随机数据 In [74]: N = 10 In [75]: t = np.linspace(0, 2*np.pi, N) In [76]: print(t) [0. 0.6981317 1.3962634 2.0943951 2.7925268 3.4906585 4.1887902 4.88692191 5.58505361 6.28318531] In [77]: y = np.sin(t) + 0.02*np.random.randn(N) In [78]: print(y) [-0.00947902 0.64196198 0.96567468 0.89394571 0.33830193 -0.3015316 -0.86943758 -0.95954123 -0.62526393 0.02872202] In [79]: M = 3 In [80]: for ii, vv in zip(np.random.rand(M)*N, np.random.randn(M)): ...: y[int(ii):] += vv ...: In [81]: print(y) [-0.00947902 0.64196198 1.47685437 1.55309848 0.99745469 0.35762117 -0.21028481 -0.30038846 -0.29746375 0.35652221] In [82]: In [101]: a = np.arange(2,14).reshape((3,4)) In [102]: print(a) [[ 2 3 4 5] [ 6 7 8 9] [10 11 12 13]] In [103]: print(np.argmin(a)) # 最小值的索引 0 In [104]: print(np.argmax(a)) # 最大值的索引 11 In [105]: np.cumsum(a) # 从0元素开始的累计和 Out[105]: array([ 2, 5, 9, 14, 20, 27, 35, 44, 54, 65, 77, 90]) In [106]: np.cumprod(a) # 从1元素开始的累计乘 Out[106]: array([ 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800, 39916800, 479001600, 6227020800]) In [107]: In [129]: a Out[129]: array([[ 2, 3, 4, 5], [ 6, 7, 8, 9], [10, 11, 12, 13]]) In [130]: np.cumsum(a,axis=1) Out[130]: array([[ 2, 5, 9, 14], [ 6, 13, 21, 30], [10, 21, 33, 46]]) In [131]: np.cumsum(a,axis=0) Out[131]: array([[ 2, 3, 4, 5], [ 8, 10, 12, 14], [18, 21, 24, 27]]) In [132]: In [133]: np.cumprod(a,axis=1) Out[133]: array([[ 2, 6, 24, 120], [ 6, 42, 336, 3024], [ 10, 110, 1320, 17160]]) In [134]: np.cumprod(a,axis=0) Out[134]: array([[ 2, 3, 4, 5], [ 12, 21, 32, 45], [120, 231, 384, 585]]) In [135]: In [146]: a = np.array([[1,3,5,2],[4,2,6,1]]) In [147]: print(a) [[1 3 5 2] [4 2 6 1]] In [148]: a.shape Out[148]: (2, 4) In [149]: a.ndim Out[149]: 2 In [150]: a.size Out[150]: 8 In [151]: np.diff(a) # 累差运算 Out[151]: array([[ 2, 2, -3], [-2, 4, -5]]) In [152]: np.diff(a,axis=1) Out[152]: array([[ 2, 2, -3], [-2, 4, -5]]) In [153]: np.diff(a,axis=0) Out[153]: array([[ 3, -1, 1, -1]]) In [154]: In [108]: a = np.array([10,7,11,9,8,13,12,9]) In [109]: a.ndim Out[109]: 1 In [110]: a.shape Out[110]: (8,) In [111]: a.size Out[111]: 8 In [112]: a.mean() # 均值 Out[112]: 9.875 In [113]: a.var() # 方差 Out[113]: 3.609375 In [114]: a.std() # 标准差 Out[114]: 1.899835519196333 In [115]: In [117]: np.median(a) # 中位数 Out[117]: 9.5 In [118]: In [138]: z = (a-a.mean())/a.std() # z-score In [139]: print(z) [ 0.06579517 -1.5132889 0.59215653 -0.46056619 -0.98692754 1.64487924 1.11851788 -0.46056619] In [140]: In [198]: a = np.arange(-3,3).reshape((2,3)) In [199]: a Out[199]: array([[-3, -2, -1], [ 0, 1, 2]]) In [200]: np.nonzero(a) # 查找非0元素 Out[200]: (array([0, 0, 0, 1, 1]), array([0, 1, 2, 1, 2])) In [201]: print(np.nonzero(a)) (array([0, 0, 0, 1, 1]), array([0, 1, 2, 1, 2])) In [202]: In [207]: a = np.arange(14,2,-1).reshape((3,4)) In [208]: print(a) [[14 13 12 11] [10 9 8 7] [ 6 5 4 3]] In [209]: np.sort(a) # 排序 Out[209]: array([[11, 12, 13, 14], [ 7, 8, 9, 10], [ 3, 4, 5, 6]]) In [210]: In [210]: np.sort(a,axis=1) Out[210]: array([[11, 12, 13, 14], [ 7, 8, 9, 10], [ 3, 4, 5, 6]]) In [211]: np.sort(a,axis=0) Out[211]: array([[ 6, 5, 4, 3], [10, 9, 8, 7], [14, 13, 12, 11]]) In [212]: # 矩阵的转置 In [212]: a = np.arange(14,2,-1).reshape((3,4)) In [213]: print(a) [[14 13 12 11] [10 9 8 7] [ 6 5 4 3]] In [214]: In [215]: print(np.transpose(a)) [[14 10 6] [13 9 5] [12 8 4] [11 7 3]] In [216]: a.T Out[216]: array([[14, 10, 6], [13, 9, 5], [12, 8, 4], [11, 7, 3]]) In [217]: In [220]: a.T.dot(a) # 先转置,再进行矩阵乘法 Out[220]: array([[332, 302, 272, 242], [302, 275, 248, 221], [272, 248, 224, 200], [242, 221, 200, 179]]) In [221]: # 矩阵的clip,处理最大值和最小值 In [221]: print(a) [[14 13 12 11] [10 9 8 7] [ 6 5 4 3]] In [222]: np.clip(a,5,11) Out[222]: array([[11, 11, 11, 11], [10, 9, 8, 7], [ 6, 5, 5, 5]]) In [223]:
卷积运算
numpy.convolve(weights,array) weight = [a,b,c] array = [i,j,k,m,n] Result:[ai, bi+aj, ci+bj+ak, cj+bk+am, ck+bm+an, cm+bn, cn][N-1:-N+1] 针对移动平均算法来预测下一个数据,越接近待预测点的数据权重越大, 那么就需要让 i, j, k, m, n 的系数逐渐增大即可;即让 a > b > c ,并且 a+b+c=1 。 示例: In [223]: weight = np.ones(3)/3 In [224]: print(weight) [0.33333333 0.33333333 0.33333333] In [225]: arr = np.array([8,11,9,7,10]) In [226]: np.convolve(weight,arr) Out[226]: array([2.66666667, 6.33333333, 9.33333333, 9. , 8.66666667, 5.66666667, 3.33333333]) In [227]: In [227]: weight = np.array([0.8,0.1,0.1]) In [228]: np.convolve(weight,arr) Out[228]: array([6.4, 9.6, 9.1, 7.6, 9.6, 1.7, 1. ]) In [229]:
random常用操作
# 生成随机浮点数,范围是在0.0~1.0之间 In [19]: a = np.random.random((2,3)) In [20]: print(a) [[0.02185901 0.69585563 0.04555439] [0.37331857 0.32903986 0.62448246]] In [21]: # 生成随机整数,可指定起止范围 In [48]: np.random.randint(3) Out[48]: 2 In [49]: np.random.randint(low=3,high=9) Out[49]: 6 In [50]: np.random.randint(low=3,high=9,size=(3,4)) Out[50]: array([[5, 6, 7, 8], [8, 7, 3, 8], [5, 4, 5, 5]]) In [51]: In [68]: np.random.randint(low=-5,high=2,size=(3,4)) Out[68]: array([[-4, -4, -2, 1], [ 1, 0, 0, 1], [-4, -3, 1, -5]]) In [69]: # 生成正态分布,又名高斯分布(Gaussian distribution)随机数 In [64]: np.random.normal() Out[64]: -0.5399414561419419 In [65]: np.random.normal(loc=0,scale=1,size=(2,3)) Out[65]: array([[-0.50318082, -0.38614219, 0.30450427], [ 0.41711087, 0.29990928, -0.7843322 ]]) In [66]: In [66]: np.random.normal(loc=2,scale=3,size=(2,3)) Out[66]: array([[ 3.37067379, 6.23517315, 2.3267659 ], [ 6.46832646, -2.76363304, 5.77883853]]) In [67]: # 生成标准正态分布("standard normal" distribution)随机数,标准正态分布的平均值为0,方差为1,服从u(0,1)分布。 In [83]: np.random.randn() Out[83]: 0.502482341264108 In [84]: np.random.randn(3,4) Out[84]: array([[ 0.34507555, -0.26868132, -0.56103417, 0.86176617], [-0.16535555, -0.38045904, 0.48176385, -1.09005206], [-0.60780266, 1.74113117, -0.72427329, -0.51232408]]) In [85]: # 生成[0, 1)间随机数 In [99]: np.random.rand() Out[99]: 0.607701127768974 In [100]: np.random.rand(3,4) Out[100]: array([[0.73020695, 0.53993878, 0.46693879, 0.82611629], [0.76117076, 0.16522599, 0.85129611, 0.74448772], [0.6450236 , 0.49994053, 0.04115063, 0.30081311]]) In [101]: