python中numpy库使用

转自 http://www.jb51.net/article/137235.htm和https://blog.csdn.net/jiasudu1234/article/details/68944954

import numpy as np#导入numpy
data=[[1,2,3],[4,5,6]]#创建元组
arr=np.array(data)#转化为数组
arr.ndim#数组空间维数
arr.shape#数据行列数
arr.dtype#数据类型""int32""
np.zeros(10)#指定长度
np.zeros((3,6))#创建3行6列的0数组
np.ones(5)#array([ 1., 1., 1., 1., 1.])
np.ones((3,4))#3行4列的全1矩阵
ar1=np.arange(10)#结果:array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
np.eye(3)#3维单位矩阵
np.identity(3)#3维单位矩阵
np.ones_like(arr)#创建与arr相同行、列的全1矩阵
np.zeros_like(arr)#创建与arr相同行、列的全0矩阵
arr_f=arr.astype(np.float64)#将arr中的数据类型转换为float64格式
Nstring=np.array(['1.2','3','4'],dtype=np.string_)#创建数组师可以自行定义字符串
N_float=Nstring.astype(np.float)#将string数据转换为float格式，等价于float64
######数组的标量运算
arr*arr#每个元素平方
arr*5#每个元素乘以5
'''索引和切片'''
ar1_s=ar1[5:8]
ar1[5:8]=12 ##ar1变化后，ar1_s的值也随之变化
ar1_s[:]=100#ar1_s变化后，ar1也随之变化
ar1_s1=ar1[5:8].copy()#完全拷贝，值变化不影响ar1
aro=np.array([[1,2,3],[4,5,6],[7,8,9]])#3维数组
aro[2]#第三组（行）数组
aro[0][2]#数组第0行第二列“3”或aro[0,2]
arr3d=np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])#3维数组
'''布尔型索引'''
names=np.array(['bob','joe','will','bob','will','joe','joe'])
data=np.random.randn(7,4)
names=='bob'
data[names='bob']#选取True的行,布尔型数组的长度必须跟被索引的轴长度一致。
data[names=='bob',2:]#选取从第二列开始的数据
data[names=='bob',3]#选取第三列的数据
data[names=='bob'][1,:]#选取第一行（从0行开始算）
names!='bob'#名字不是‘bob’的逻辑数据
-(names=='bob')#意思与names!='bob一致
mask=(names=='bob')|(names=='will')#逻辑或，&( 和),python中不能用and和or
data[data<0]=0#将data中所有小于0的数据设置为0
data[names!='joe']=7#将不是joe的行赋值为7
'''花式索引'''
arr0=np.empty((8,4))#创建新数组，只分配内存空间，不填充任何值
for i in range(8):
arr0[i]=i#数组的i值就是指i行
arr0[[2,3]]#选取数字的子集，第2行，第3行
arr0[[-7,-5]]#全负数将会从末尾开始选取子集
arr0[[-1,2]]#有正数，负数选取子集
arr1=np.arange(32).reshape((8,4))#按行重新设置数据维数，8行4列
arr1[[1,5,4,2],[0,3,1,2]]#等价与选取元素行列如下的元素（1，0）,（5，3）,（4，1）,（4，2）
arr1[[1,5,4,2]][:,[0,3,1,2]]#选取数组1，5，4，2行0，3，1，2列的子集
arr1[np.ix_([1,5,4,2],[0,3,1,2])]#选取数组1，5，4，2行0，3，1，2列的子集
arr1T=np.dot(arr1.T,arr1)#计算矩阵内积#(arr1)^t(arr1)
'''通用函数 ufunc'''
arr2=np.arange(10)
np.sqrt(arr2)#各元素开方
np.exp(arr2)#各元素求指数
x=np.random.randn(8)#
y=np.random.randn(8)
np.maximum(x,y)#x，y两数组元素两两对比，返回较大的值
points=np.arange(-5,5,1)#产生-5至5之间，间隔1的10个数
xs,ys=np.meshgrid(points,points)#xs按行重复points元素10次，ys按列重复points元素10次

import matplotlib.pyplot as plt
z=np.sqrt(xs**2+ys**2)#每个元素开方
plt.imshow(z,cmap=plt.cm.gray)#创建图形函数
plt.colorbar()
plt.title('Imange plot of $\sqrt{x^2+y^2}$ for a grid of values')
plt.show()#显示图形
'''将numpy.where 三元表达式x if condition else y的矢量化版本。'''
xarr=np.array([1,2,3,4,5])
yarr=np.array([6,7,8,9,10])
cond=np.array([True,False,True,True,False])
result=np.where(cond,xarr,yarr)#1,7,3,4,10。np.where的意思为[xarr if cond else yarr]
arrc=np.random.randn(4,4)#产生4行4列随机数,注意和zeros，ones区分
np.where(arrc>0,2,-2)#arr元素中大于0的赋值为2，小于0的赋值为-2
arr=np.arange(16).reshape((2,8))#产生5行4列随机数
arr.mean()#数据均值
np.mean(arr)#数据均值
arr.sum()#数组和
np.sum(arr)#数组和
arr.sum(axis=0)#按列求和
arr.sum(axis=1)#按行求和
np.sum(arr,axis=1)#按行求和
arr=np.array([[0,1,2],[3,4,5],[6,7,8]])
arr=np.random.randn(8)
arr.sort()#数据排序，对arr变化，此时arr为排序好的数据
arr=np.random.randn(5,3)
arr.sort(1)#多维数组可以对轴进行排序，0按行排序，1按列排序，修改后arr发生变化
np.sort(arr,0)#arr按行排序，arr本身不发生变化
large_arr=np.random.randn(1000)#产生1000个随机数
large_arr.sort()
large_arr[(int(0.05*len(large_arr)))]#5%分位数
names=np.array(['bob','joe','will','bob','will','joe','joe'])
np.unique(names)#唯一字符
sorted(set(names))#唯一字符
values=np.array([6,0,0,3,2,5,6])
np.in1d(values,[2,3,6])#value中的每一个值是否在[2,3,6]中，如果是，该值返回true,否则返回false.返回bool值
np.in1d([2,3,6],values)#[2,3,6]中的值是否在#value中，返回bool值(T,T,T)
x=np.array([1,2,7,6,3])
y=np.array([2,4,8,3,9,7,7])

np.intersect1d(x,y)#返回x和y中的公共元素，并返回有序结构
np.union1d(x,y)#计算x,y的并集（无重复），并返回有序据结果过
np.in1d(x,y)#x的每一个元素是否包含再y中，bool
np.setdiff1d(x,y)#集合的差，即在x中且不在y中
np.setxor1d(x,y)#集合对称差，“异或”，即存在与一个数组中，但不同时在两个数组中，并-交
'''numpy可以去写磁盘上的二进制数据或文本数据，pandas能将表格型读取到内存'''
'''np.save,np.load用于读写磁盘数据。'''
arr=np.arange(10)
np.save('some_arr',arr)#保存文件
np.load("some_arr.npy")#读取磁盘保存数据
arr2=np.arange(-5,5,0.1)
np.savez('arr_ar.npz',a=arr,b=arr2)#多个数组已压缩文件的形式保存，以关键字形式存入
ach=np.load("arr_ar.npz")#加载压缩文件式数组，得到类似字典的对象，
ach['a']#调取字典名字查看数据
arr=np.loadtxt("code_public.txt",delimiter=',')#读取文本数据
arrc=np.loadtxt("d:\code\code_public.txt",delimiter=',')#读取d盘数据
x=np.array([[1,2,3],[4,5,6]])
y=np.array([[6,23],[-1,7],[8,9]]) #
x.dot(y)##矩阵x与y相乘
samples=np.random.normal(size=(4,4))#产生4维正态分布矩阵
from numpy.linalg import *#计算与举证相关，行列式，矩阵的逆等
np.linalg.det(samples)#矩阵行列式
np.linalg.inv(samples)#矩阵求逆
np.diag(samples)#返回矩阵对角线元素
np.linalg.eig(samples)#返回矩阵特征值和特征向量
import timeit #导入计算时间的模块

timeit np.random.normal(size=2)#必须写明size，产生正态分布的2个随机数

where（）的用法

首先强调一下，where()函数对于不同的输入，返回的只是不同的。

1当数组是一维数组时，返回的值是一维的索引，所以只有一组索引数组

2当数组是二维数组时，满足条件的数组值返回的是值的位置索引，因此会有两组索引数组来表示值的位置

例如

 
          >>>b 
          = 
          np.arange( 
          10 
          ) 
         
 
          >>>b 
         
 
          array([ 
          0 
          ,  
          1 
          ,  
          2 
          ,  
          3 
          ,  
          4 
          ,  
          5 
          ,  
          6 
          ,  
          7 
          ,  
          8 
          ,  
          9 
          ]) 
         
 
          >>>np.where(b> 
          5 
          ) 
         
 
            
          (array([ 
          6 
          ,  
          7 
          ,  
          8 
          ,  
          9 
          ], dtype 
          = 
          int64),) 
         

             
         
 
          >>>a 
          = 
          np.reshape(np.arange( 
          20 
          ),( 
          4 
          , 
          5 
          )) 
         
 
          >>>a  
         
 
          array([[  
          0 
          ,  
          1 
          ,  
          2 
          ,  
          3 
          ,  
          4 
          ], 
         
 
               
          [  
          5 
          ,  
          6 
          ,  
          7 
          ,  
          8 
          ,  
          9 
          ], 
         
 
               
          [ 
          10 
          ,  
          11 
          ,  
          12 
          ,  
          13 
          ,  
          14 
          ], 
         
 
               
          [ 
          15 
          ,  
          16 
          ,  
          17 
          ,  
          18 
          ,  
          19 
          ]]) 
         
 
          >>>np.where(a> 
          10 
          ) 
         
 
          (array([ 
          2 
          ,  
          2 
          ,  
          2 
          ,  
          2 
          ,  
          3 
          ,  
          3 
          ,  
          3 
          ,  
          3 
          ,  
          3 
          ], dtype 
          = 
          int64), 
         
 
            
          array([ 
          1 
          ,  
          2 
          ,  
          3 
          ,  
          4 
          ,  
          0 
          ,  
          1 
          ,  
          2 
          ,  
          3 
          ,  
          4 
          ], dtype 
          = 
          int64)) 
         

对numpy标准库里的解释做一个介绍：

1	`numpy.where(condition[, x, y])`

基于条件condition，返回值来自x或者y.

如果.

参数:	condition : 数组，bool值 When True, yield x, otherwise yield y. x, y : array_like, 可选 x与y的shape要相同，当condition中的值是true时返回x对应位置的值，false是返回y的
返回值:	out : ndarray or tuple of ndarrays ①如果参数有condition，x和y，它们三个参数的shape是相同的。那么，当condition中的值是true时返回x对应位置的值，false是返回y的。 ②如果参数只有condition的话，返回值是condition中元素值为true的位置索引，切是以元组形式返回，元组的元素是ndarray数组，表示位置的索引

参数:

condition : 数组，bool值

When True, yield x, otherwise yield y.

x, y : array_like, 可选

x与y的shape要相同，当condition中的值是true时返回x对应位置的值，false是返回y的

返回值:

out : ndarray or tuple of ndarrays

①如果参数有condition，x和y，它们三个参数的shape是相同的。那么，当condition中的值是true时返回x对应位置的值，false是返回y的。

②如果参数只有condition的话，返回值是condition中元素值为true的位置索引，切是以元组形式返回，元组的元素是ndarray数组，表示位置的索引

 
          >>> np.where([[ 
          True 
          ,  
          False 
          ], [ 
          True 
          ,  
          True 
          ]], 
         
 
          ...     [[ 
          1 
          ,  
          2 
          ], [ 
          3 
          ,  
          4 
          ]], 
         
 
          ...     [[ 
          9 
          ,  
          8 
          ], [ 
          7 
          ,  
          6 
          ]]) 
         
 
          array([[ 
          1 
          ,  
          8 
          ], 
         
 
               
          [ 
          3 
          ,  
          4 
          ]]) 
         
 
          >>> 
         
 
          >>> np.where([[ 
          0 
          ,  
          1 
          ], [ 
          1 
          ,  
          0 
          ]]) 
         
 
          (array([ 
          0 
          ,  
          1 
          ]), array([ 
          1 
          ,  
          0 
          ])) 
         
 
          >>> 
         
 
          >>> x  
          = 
          np.arange( 
          9. 
          ).reshape( 
          3 
          ,  
          3 
          ) 
         
 
          >>> np.where( x >  
          5 
          ) 
         
 
          (array([ 
          2 
          ,  
          2 
          ,  
          2 
          ]), array([ 
          0 
          ,  
          1 
          ,  
          2 
          ])) 
         
 
          >>> x[np.where( x >  
          3.0 
          )]         
          # Note: result is 1D. 
         
 
          array([  
          4. 
          ,  
          5. 
          ,  
          6. 
          ,  
          7. 
          ,  
          8. 
          ]) 
         
 
          >>> np.where(x <  
          5 
          , x,  
          - 
          1 
          )         
          # Note: broadcasting. 
         
 
          array([[  
          0. 
          ,  
          1. 
          ,  
          2. 
          ], 
         
 
               
          [  
          3. 
          ,  
          4. 
          ,  
          - 
          1. 
          ], 
         
 
               
          [ 
          - 
          1. 
          ,  
          - 
          1. 
          ,  
          - 
          1. 
          ]]) 
         
 
          Find the indices of elements of x that are  
          in 
          goodvalues. 
         

             
         
 
          >>> 
         
 
          >>> goodvalues  
          = 
          [ 
          3 
          ,  
          4 
          ,  
          7 
          ] 
         
 
          >>> ix  
          = 
          np.in1d(x.ravel(), goodvalues).reshape(x.shape) 
         
 
          >>> ix 
         
 
          array([[ 
          False 
          ,  
          False 
          ,  
          False 
          ], 
         
 
               
          [  
          True 
          ,  
          True 
          ,  
          False 
          ], 
         
 
               
          [ 
          False 
          ,  
          True 
          ,  
          False 
          ]], dtype 
          = 
          bool 
          ) 
         
 
          >>> np.where(ix) 
         
 
          (array([ 
          1 
          ,  
          1 
          ,  
          2 
          ]), array([ 
          0 
          ,  
          1 
          ,  
          1 
          ])) 
         

两种方法的示例代码

第一种用法

np.where(conditions,x,y)

if （condituons成立）：

　　数组变x

else:

　　数组变y

 
          import 
          numpy as np 
         
          ''' 
         
          x = np.random.randn(4,4) 
         
          print(np.where(x>0,2,-2)) 
         
          #试试效果 
         
          xarr = np.array([1.1,1.2,1.3,1.4,1.5]) 
         
          yarr = np.array([2.1,2.2,2.3,2.4,2.5]) 
         
          zarr = np.array([True,False,True,True,False]) 
         
          result = [(x if c else y) 
         
          for x,y,c in zip(xarr,yarr,zarr)] 
         
          print(result) 
         
          #where()函数处理就相当于上面那种方案 
         
          result = np.where(zarr,xarr,yarr) 
         
          print(result) 
         
          ''' 
         
          #发现个有趣的东西 
         
          # #处理2组数组 
         
          # #True and True = 0 
         
          # #True and False = 1 
         
          # #False and True = 2 
         
          # #False and False = 3 
         
          cond2  
          = 
          np.array([ 
          True 
          , 
          False 
          , 
          True 
          , 
          False 
          ]) 
         
          cond1  
          = 
          np.array([ 
          True 
          , 
          True 
          , 
          False 
          , 
          False 
          ]) 
         
          #第一种处理 太长太丑 
         
          result  
          = 
          [] 
         
          for 
          i  
          in 
          range 
          ( 
          4 
          ): 
         
          if 
          (cond1[i] & cond2[i]):  result.append( 
          0 
          ); 
         
          elif 
          (cond1[i]):  result.append( 
          1 
          ); 
         
          elif 
          (cond2[i]):  result.append( 
          2 
          ); 
         
          else 
          : result.append( 
          3 
          ); 
         
          print 
          (result) 
         
          #第二种 直接where() 很快很方便 
         
          result  
          = 
          np.where(cond1 & cond2, 
          0 
          ,np.where(cond1, 
          1 
          ,np.where(cond2, 
          2 
          , 
          3 
          ))) 
         
          print 
          (result) 
         
          #第三种 更简便（好像这跟where()函数半毛钱的关系都没有 
         
          result  
          = 
          1 
          * 
          (cond1 &  
          - 
          cond2) 
          + 
          2 
          * 
          (cond2 &  
          - 
          cond1) 
          + 
          3 
          * 
          ( 
          - 
          (cond1 | cond2)) （没想到还可以这么表达吧） 
         
          print 
          (result)

第二种用法

where(conditions)

相当于给出数组的下标

 
          x  
          = 
          np.arange( 
          16 
          ) 
         
          print 
          (x[np.where(x> 
          5 
          )]) 
         
          #输出：(array([ 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], dtype=int64),) 
         
          x  
          = 
          np.arange( 
          16 
          ).reshape( 
          - 
          1 
          , 
          4 
          ) 
         
          print 
          (np.where(x> 
          5 
          )) 
         
          #(array([1, 1, 2, 2, 2, 2, 3, 3, 3, 3], dtype=int64), array([2, 3, 0, 1, 2, 3, 0, 1, 2, 3], dtype=int64)) 
         
          #注意这里是坐标是前面的一维的坐标，后面是二维的坐标

 
          ix  
          = 
          np.array([[ 
          False 
          ,  
          False 
          ,  
          False 
          ], 
         
 
               
          [  
          True 
          ,  
          True 
          ,  
          False 
          ], 
         
 
               
          [ 
          False 
          ,  
          True 
          ,  
          False 
          ]], dtype 
          = 
          bool 
          ) 
         
 
          print 
          (np.where(ix)) 
         
 
          #输出：(array([1, 1, 2], dtype=int64), array([0, 1, 1], dtype=int64)) 
         

python中numpy库使用

猜你喜欢