导入numpy并查看版本
import numpy as np
np.__version__
'1.13.1'
什么是numpy?
即Numeric Python,python经过扩展以后可以支持数组和矩阵类型,包含大量的矩阵和数组的计算函数
numpy框架是后面机器学习和数据挖掘的基础,pandas、scipy、matplotlib等都是基于numpy
一、创建ndarray及查看数据类型
numpy中最基础数据结构就是ndarray:即数组
1. 使用np.array()由python list创建
data = [1,2,3] nd = np.array(data) nd
array([1, 2, 3]) • 1
type(data),type(nd) • 1
(list, numpy.ndarray) • 1
# 查看nd中的元素的类型 nd.dtype • 1 • 2
dtype('int32')
nd2 = np.array([1,3,4.6,"fdsaf",True]) nd2 • 1 • 2
array(['1', '3', '4.6', 'fdsaf', 'True'], dtype='<U32') • 1 • 2
nd2.dtype • 1
dtype('<U32')
【注意】
1、数组中所有元素的类型都相同
2、如果数组是由列表来创建的,列表中元素类不同的时候会被统一成某个类型 (优先级:str>float>int)
图片与array数组的关系
# 注:图片在numpy中也是一个数组 # 导入一张图片 import matplotlib.pyplot as plt # 这个工具是数据可视化分析工具,在这里我用来导入图片
girl = plt.imread("./source/girl.jpg") • 1
type(girl) # 图片导入后是array类型的数组 • 1
numpy.ndarray
# 查看数组的形状 girl.shape # shape属性是一个元组,元组的每一个元素代表了数组girl在这个维度上的元素个数
(900, 1440, 3) • 1
girl
array([[[225, 231, 231], [229, 235, 235], [222, 228, 228], ..., [206, 213, 162], [211, 213, 166], [217, 220, 173]], [[224, 230, 230], [229, 235, 235], [223, 229, 229], ..., [206, 213, 162], [211, 213, 166], [217, 220, 173]], [[224, 230, 230], [229, 235, 235], [223, 229, 229], ..., [206, 213, 162], [211, 213, 166], [219, 221, 174]], ..., [[175, 187, 213], [180, 192, 218], [175, 187, 213], ..., [155, 162, 180], [153, 160, 178], [156, 163, 181]], [[175, 187, 213], [180, 192, 218], [174, 186, 212], ..., [155, 162, 180], [153, 160, 178], [155, 162, 180]], [[177, 189, 215], [181, 193, 219], [174, 186, 212], ..., [155, 162, 180], [153, 160, 178], [156, 163, 181]]], dtype=uint8)
# 用plt工具来显示一下图片 plt.imshow(girl) plt.show()
创建一张图片
# 创建一张图片 boy = np.array([[[0.4,0.5,0.6],[0.8,0.8,0.2],[0.6,0.9,0.5]], [[0.12,0.32,0.435],[0.22,0.45,0.9],[0.1,0.2,0.3]], [[0.12,0.32,0.435],[0.12,0.32,0.435],[0.12,0.32,0.435]], [[0.12,0.32,0.435],[0.12,0.32,0.435],[0.12,0.32,0.435]]]) boy
array([[[ 0.4 , 0.5 , 0.6 ], [ 0.8 , 0.8 , 0.2 ], [ 0.6 , 0.9 , 0.5 ]], [[ 0.12 , 0.32 , 0.435], [ 0.22 , 0.45 , 0.9 ], [ 0.1 , 0.2 , 0.3 ]], [[ 0.12 , 0.32 , 0.435], [ 0.12 , 0.32 , 0.435], [ 0.12 , 0.32 , 0.435]], [[ 0.12 , 0.32 , 0.435], [ 0.12 , 0.32 , 0.435], [ 0.12 , 0.32 , 0.435]]])
plt.imshow(boy) plt.show()
二维数组也可以表示一张图片,二维的图片是灰度级的
#二维数组也可以表示一张图片,二维的图片是灰度级的 boy2 = np.array([[0.1,0.2,0.3,0.4], [0.6,0.3,0.2,0.5], [0.9,0.8,0.3,0.2]]) boy2
array([[ 0.1, 0.2, 0.3, 0.4], [ 0.6, 0.3, 0.2, 0.5], [ 0.9, 0.8, 0.3, 0.2]])
plt.imshow(boy2,cmap="gray") plt.show()
图片切割:取出图片一部分
# 切图片 g = girl[:200,:300]
plt.imshow(g) plt.show()
2. 使用np的常用函数创建
1)np.ones(shape,dtype=None,order=‘C’)
np.ones((2,3,3,4,5)) # shape参数代表的是数组的形状,要求传一个元组或者列表,元组的每一元素 # 代表创建出来的数组的该维度上的元素的个数
array([[[[[ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.]], [[ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.]], [[ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.]]], [[[ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.]], [[ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.]], [[ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.]]], [[[ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.]], [[ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.]], [[ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.]]]], [[[[ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.]], [[ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.]], [[ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.]]], [[[ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.]], [[ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.]], [[ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.]]], [[[ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.]], [[ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.]], [[ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.], [ 1., 1., 1., 1., 1.]]]]])
ones = np.ones((168,233,3))
plt.imshow(ones) plt.show()
2)np.zeros(shape,dtype=“float”,order=“C”)
np.zeros((1,2,3))
array([[[ 0., 0., 0.], [ 0., 0., 0.]]])
3)np.full(shape,fill_value,dtype=None)
np.full((2,3),12)
array([[12, 12, 12], [12, 12, 12]])
4)np.eye(N,M,k=0,dtype=‘float’)
np.eye(6)
array([[ 1., 0., 0., 0., 0., 0.], [ 0., 1., 0., 0., 0., 0.], [ 0., 0., 1., 0., 0., 0.], [ 0., 0., 0., 1., 0., 0.], [ 0., 0., 0., 0., 1., 0.], [ 0., 0., 0., 0., 0., 1.]])
np.eye(3,4) • 1
array([[ 1., 0., 0., 0.], [ 0., 1., 0., 0.], [ 0., 0., 1., 0.]])
np.eye(5,4)
array([[ 1., 0., 0., 0.], [ 0., 1., 0., 0.], [ 0., 0., 1., 0.], [ 0., 0., 0., 1.], [ 0., 0., 0., 0.]])
5)np.linspace(start,stop,num=50)
np.linspace(1,10,num=100) # 从start到stop平均分成num份,取切分点
array([ 1. , 1.09090909, 1.18181818, 1.27272727, 1.36363636, 1.45454545, 1.54545455, 1.63636364, 1.72727273, 1.81818182, 1.90909091, 2. , 2.09090909, 2.18181818, 2.27272727, 2.36363636, 2.45454545, 2.54545455, 2.63636364, 2.72727273, 2.81818182, 2.90909091, 3. , 3.09090909, 3.18181818, 3.27272727, 3.36363636, 3.45454545, 3.54545455, 3.63636364, 3.72727273, 3.81818182, 3.90909091, 4. , 4.09090909, 4.18181818, 4.27272727, 4.36363636, 4.45454545, 4.54545455, 4.63636364, 4.72727273, 4.81818182, 4.90909091, 5. , 5.09090909, 5.18181818, 5.27272727, 5.36363636, 5.45454545, 5.54545455, 5.63636364, 5.72727273, 5.81818182, 5.90909091, 6. , 6.09090909, 6.18181818, 6.27272727, 6.36363636, 6.45454545, 6.54545455, 6.63636364, 6.72727273, 6.81818182, 6.90909091, 7. , 7.09090909, 7.18181818, 7.27272727, 7.36363636, 7.45454545, 7.54545455, 7.63636364, 7.72727273, 7.81818182, 7.90909091, 8. , 8.09090909, 8.18181818, 8.27272727, 8.36363636, 8.45454545, 8.54545455, 8.63636364, 8.72727273, 8.81818182, 8.90909091, 9. , 9.09090909, 9.18181818, 9.27272727, 9.36363636, 9.45454545, 9.54545455, 9.63636364, 9.72727273, 9.81818182, 9.90909091, 10. ])
np.logspace(1,10,num=10) # 从1-10分成10份(对应的分别是1、2、3...10) # logx = 1 logx = 2 logx = 3 => 返回值10^1、10^2 .... 10^10
array([ 1.00000000e+01, 1.00000000e+02, 1.00000000e+03, 1.00000000e+04, 1.00000000e+05, 1.00000000e+06, 1.00000000e+07, 1.00000000e+08, 1.00000000e+09, 1.00000000e+10])
6)np.arange([start,]stop,[step,]dtype=None) "[]"中是可选项
np.arange(10) • 1
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
np.arange(2,12)
array([ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
np.arange(2,12,2)
array([ 2, 4, 6, 8, 10])
7)np.random.randint(low,high=None,size=None,dtype=‘I’)
np.random.randint(3,10,size=(10,10,3)) # 随机生成整数数组
array([[[4, 6, 6], [5, 9, 4], [5, 9, 6], [4, 6, 4], [7, 4, 9], [5, 9, 4], [8, 6, 3], [7, 5, 8], [8, 3, 4], [5, 4, 8]], [[6, 5, 8], [9, 3, 5], [8, 4, 4], [5, 9, 8], [8, 5, 6], [9, 4, 6], [5, 8, 8], [5, 7, 6], [3, 7, 9], [5, 5, 7]], [[4, 7, 5], [9, 4, 9], [3, 3, 4], [8, 4, 8], [3, 6, 3], [4, 4, 3], [4, 4, 5], [5, 5, 4], [5, 7, 9], [4, 4, 9]], [[6, 3, 8], [5, 9, 6], [5, 6, 7], [3, 8, 6], [3, 7, 8], [6, 9, 7], [6, 7, 3], [7, 5, 4], [3, 3, 6], [9, 9, 7]], [[3, 5, 6], [7, 4, 6], [5, 3, 7], [3, 6, 3], [8, 3, 8], [7, 9, 7], [8, 7, 9], [4, 7, 5], [8, 8, 6], [4, 5, 4]], [[4, 4, 9], [9, 8, 7], [6, 6, 6], [4, 9, 5], [6, 9, 6], [9, 4, 8], [4, 7, 9], [9, 4, 9], [6, 9, 3], [8, 5, 9]], [[7, 6, 3], [4, 5, 4], [5, 6, 7], [7, 3, 4], [7, 4, 8], [7, 5, 6], [4, 9, 9], [4, 4, 8], [9, 3, 6], [3, 6, 9]], [[7, 7, 4], [8, 6, 3], [3, 8, 7], [5, 6, 9], [5, 8, 4], [9, 4, 4], [3, 6, 6], [6, 7, 4], [4, 8, 8], [4, 6, 3]], [[7, 4, 9], [5, 3, 7], [5, 9, 4], [5, 7, 9], [7, 6, 6], [6, 3, 3], [9, 4, 4], [5, 3, 4], [5, 7, 9], [3, 3, 5]], [[7, 3, 8], [7, 6, 8], [5, 7, 4], [4, 4, 7], [4, 5, 9], [8, 3, 5], [5, 9, 9], [6, 3, 7], [9, 5, 7], [8, 5, 9]]])
8)np.random.randn(d0,d1,…,dn)
从第一维度到第n维度生成一个数组,数组中的数字符合标准正态分布
np.random.randn(2,3,10) # N(0,1)
array([[[-0.03414751, -1.01771263, 1.12067965, -0.43953023, -1.82364645, -0.0971702 , -0.65734554, -0.10303229, 1.52904104, -0.48624526], [-0.29295679, -1.09430988, 0.07499788, 0.31664607, 0.3500672 , -0.18508775, 1.75620537, 0.71531162, 0.6161491 , -1.22053836], [ 0.7323965 , 0.20671506, -0.58314419, -0.16540522, -0.23903187, 1.27785655, 0.26691062, -1.45973265, -0.27273178, -1.02878312]], [[ 0.07655004, -0.35616184, -0.46353849, -1.8515281 , -0.26543777, 0.76412627, 0.83337437, 0.04521198, -2.10686009, 0.84883742], [ 0.22188875, 0.63737544, 0.26173337, -0.11475485, -1.30431707, 1.25062924, 2.03032414, 0.13742253, -0.98713219, 1.19711129], [ 0.69212245, 0.70550039, -1.15995398, -0.95507681, -0.39439139, 2.76551965, 0.56088858, 0.54709151, 1.17615801, 0.17744971]]])
9)np.random.normal(loc=0.0,scale=1.0,size=None)
np.random.normal(175,20,size=100) # 服从N(175,20) 生成10条数据
array([ 174.44281329, 177.66402876, 162.76426831, 210.11244283, 161.26671985, 209.52372115, 159.92703726, 197.83048917, 190.60230978, 170.27114821, 202.67422923, 203.04492988, 171.13235245, 175.64710565, 200.40533303, 207.930948 , 141.09792492, 158.87495159, 176.74197674, 164.57884322, 181.22386631, 156.26287142, 133.37408465, 178.07588597, 187.50842048, 186.35236779, 153.61560634, 145.53831704, 232.55949685, 142.01340562, 195.22465693, 188.922162 , 170.02159668, 167.74728882, 173.27258287, 187.68132279, 217.7260755 , 158.28833839, 155.11568289, 200.26945864, 178.91552559, 149.21007505, 200.6454259 , 169.37529856, 201.18878627, 184.37773296, 196.67909536, 144.10223051, 184.63682023, 167.86858875, 191.08394709, 169.98017168, 204.05198975, 199.65286793, 176.22452948, 181.17515804, 178.81440955, 176.79845708, 189.50950157, 136.05787608, 199.35198398, 162.43654974, 155.61396415, 172.22147069, 181.91161368, 192.82571507, 203.70689642, 190.79312957, 204.48924027, 180.48880551, 176.81359193, 145.87844077, 190.13853094, 160.22281705, 200.04783678, 165.19927728, 184.10218694, 178.27524256, 191.58148162, 141.4792985 , 208.4723939 , 163.70082179, 142.70675324, 189.25398816, 183.53849685, 150.86998696, 172.04187127, 207.12343336, 190.10648007, 188.18995666, 175.43040298, 183.79396855, 172.60260342, 195.1083776 , 194.70719705, 163.10904061, 146.78089275, 195.2271401 , 201.60339544, 164.91176955])
10)np.random.random(size=None)
np.random.random(size=(12,1)) # 0-1之间的浮点数
array([[ 0.54080763], [ 0.95618258], [ 0.19457156], [ 0.12198452], [ 0.3423529 ], [ 0.01716331], [ 0.28061005], [ 0.51960339], [ 0.60122982], [ 0.26462352], [ 0.85645091], [ 0.32352418]])
练习:用随机数生成一张图片
boy = np.random.random(size=(667,568,3))
plt.imshow(boy) plt.show()
二、ndarray的常用属性
数组的常用属性:
维度 ndim, 大小 size, 形状 shape, 元素类型 dtype, 每项大小 itemsize, 数据 data
tigger = plt.imread("./source/tigger.jpg")
# 1、维度 tigger.ndim • 1 • 2
3 • 1
# 2、大小,指的是一个数组中具体有多少个数字 tigger.size
2829600 • 1
# 3、形状 tigger.shape • 1 • 2
(786, 1200, 3) • 1
# 4、数据的类型 tigger.dtype
dtype('uint8') • 1
# 5、每个数字的大小(占的字节数) tigger.itemsize • 1 • 2
1 • 1
t = tigger / 255.0 • 1
t.dtype
dtype('float64') • 1
t.itemsize • 1
8 • 1
# 6、data tigger.data • 1 • 2
<memory at 0x000001AA3A0D8138>
三、ndarray的基本操作
1、索引
l = [1,2,3,4,5,6] l[5] l[-1] l[0] l[-6] # 正着数从0开始,倒着数从-1开始
1 • 1
nd = np.random.randint(0,10,size=(4)) nd • 1 • 2
array([9, 6, 1, 7]) • 1
nd[0] nd[1] nd[-3]
6 • 1
lp = [[1,2,3], [4,5,6], [7,8]] lp[1][2] • 1 • 2 • 3 • 4
6 • 1
np.array(lp)
array([list([1, 2, 3]), list([4, 5, 6]), list([7, 8])], dtype=object)
np.array(lp) # 如果二维列表中,某个维度值不保持一致,将会把这个维度打包成一个列表 # 【注意】数组中每个维度的元素的个数必须一样
array([list([1, 2, 3]), list([4, 5, 6]), list([7, 8])], dtype=object)
nd = np.random.randint(0,10,size=(4,4)) nd #[[2,2,1],[1,2,1]]
array([[7, 9, 2, 3], [0, 2, 7, 3], [1, 9, 0, 1], [4, 1, 2, 8]])
nd[1][3] # 多次索引:首先找最前面的维度得到子数组,然后从得到的子数组中继续索引
3
区别于列表
nd[1,3] # 一次索引:直接按照(1,3)这个次序来找
3
lp[1,3] # 列表不能这样找
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-64-8b65614beafa> in <module>() ----> 1 lp[1,3] # 列表不能这样找 TypeError: list indices must be integers or slices, not tuple
nd[[1,1,2,3,1,2]] # 用列表来做索引:按照列表中指定的次序来遍历数组
array([[0, 2, 7, 3], [0, 2, 7, 3], [1, 9, 0, 1], [4, 1, 2, 8], [0, 2, 7, 3], [1, 9, 0, 1]])
lp[[1,1]] # 列表的索引不能是列表
lp[[1,1]] # 列表的索引不能是列表
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-66-e9ca25f0b661> in <module>() ----> 1 lp[[1,1]] # 列表的索引不能是列表 TypeError: list indices must be integers or slices, not list
nd[[1,2,2,2]][[0,1,2]]
array([[0, 2, 7, 3], [1, 9, 0, 1], [1, 9, 0, 1]])
nd[[2,2,1]]
array([[1, 9, 0, 1], [1, 9, 0, 1], [0, 2, 7, 3]])
nd[[2,2,1,1],[1,2,1,1]]
array([9, 0, 2, 2])