1. Numpy的基本操作

导入numpy

1	import numpy as np

1.1 array数组

定义数组array = [1,2,3,4,5]

1
2
3

array = np.array([1,2,3,4,5])
array2 = array + 1
array2

array([2, 3, 4, 5, 6])

1	array2 +array

array([ 3,  5,  7,  9, 11])

1	array2 * array

array([ 2,  6, 12, 20, 30])

1.2 数组特性

查看数组维度信息

1	array.shape

(5,)

定义二维数组

1 2	np.array([[1,2,3], [4,5,6]])

array([[1, 2, 3],
       [4, 5, 6]])

列表转为numpy数组

1
2
3

list1 = [1,2,3,4,5]
array1 = np.array(list1)
array1

array([1, 2, 3, 4, 5])

列表数据类型不同时，自动向下转换：int→float→str

1
2
3

list1 = [1,2,3,4,'5']
array1 = np.array(list1)
array1

array(['1', '2', '3', '4', '5'], dtype='<U11')

1
2
3

list1 = [1,2,3,4,5.0]
array1 = np.array(list1)
array1

array([1., 2., 3., 4., 5.])

1.3 数组属性操作

1 2	#打印当前数据格式 type(array1)

numpy.ndarray

1 2	#打印当前数据类型 array1.dtype

dtype('float64')

1 2	#打印当前数组元素个数 array1.size

1 2	#打印当前数组维度 array1.ndim

2. 索引与切片

2.1 数值索引

1 2	array1[1:3] array1[-1]

array([7, 8, 9])

array1 = np.array([[1,2,3],
                  [4,5,6],
                  [7,8,9]])
array1[1][1] = 10
array1

array([[ 1,  2,  3],
       [ 4, 10,  6],
       [ 7,  8,  9]])

1 2	#取第二行数据 array1[1]

array([ 4, 10,  6])

1 2	#取所有行的第二列 array1[:,1]

array([ 2, 10,  8])

2.2 bool索引

1 2	array = np.arange(0,100,10) array

array([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

1	mask = np.array([0,0,0,1,1,1,0,0,1,1],dtype=bool)

1 2	#取所有位置为True的元素 array[mask]

array([30, 40, 50, 80, 90])

1
2
3

#(0,1)区间随机生成10个数
random_array = np.random.rand(10)
random_array

array([0.14346121, 0.10379705, 0.75719041, 0.3301816 , 0.02267718,
       0.90297419, 0.51393754, 0.99605731, 0.75787412, 0.90175181])

1 2	mask = random_array > 0.5 mask

array([False, False,  True, False, False,  True,  True,  True,  True,
        True])

1
2
3

array = np.array([10,20,30,40,50])
#找到符合要求的索引位置
np.where(array > 30)

(array([3, 4], dtype=int64),)

#数组对比

1
2
3

x = np.array([1,1,1,2])
y = np.array([1,1,1,4])
x ==y

array([ True,  True,  True, False])

1 2	#逻辑判断 np.logical_and(x,y)

array([ True,  True,  True,  True])

1	np.logical_or(x,y)

array([ True,  True,  True,  True])

3. 数据类型与数值计算

3.1 数据类型

1 2	#指定数据类型 array = np.array([1,2,3,4,5],dtype=np.float32)

array

array([1., 2., 3., 4., 5.], dtype=float32)

1	array.dtype

dtype('float32')

1
2
3

#Numpy中字符串的类型是object，和python不一样
array = np.array(['1','10','3.5','str'],dtype=object)
array

array(['1', '10', '3.5', 'str'], dtype=object)

#对创建好的数据进行数据类型转换
array = np.array([1,2,3,4,5])
array2 = np.asarray(array,dtype=np.float32)
array2

array([1., 2., 3., 4., 5.], dtype=float32)

3.2 复制与赋值

1
2
3

#等号赋值，对其中一个变量修改，另一个也发生变化
array2 = array
array2

array([1, 2, 3, 4, 5])

1 2	array[1] = 100 array

array([  1, 100,   3,   4,   5])

array2

array([  1, 100,   3,   4,   5])

#使用copy赋值
array2 = array.copy()
array2[1] = 1000
array2

array([   1, 1000,    3,    4,    5])

array

array([  1, 100,   3,   4,   5])

3.3 数值计算

1
2
3

#所有元素求和
array = np.array([[1,2,3],[4,5,6]])
np.sum(array)

1 2	#按列求和 np.sum(array,axis=0)

array([5, 7, 9])

1 2	#按行求和 np.sum(array,axis=1)

array([ 6, 15])

1 2	#各个元素累乘 array.prod()

1	array.prod(axis=0)

array([ 4, 10, 18])

1	array.prod(axis=1)

array([  6, 120])

1 2	#求元素的最小值 array.min()

1	array.min(axis=0)

array([1, 2, 3])

1	array.min(axis=1)

array([1, 4])

1 2	#求均值 array.mean()

3.5

1	array.mean(axis=0)

array([2.5, 3.5, 4.5])

1	array.mean(axis=1)

array([2., 5.])

1 2	#求标准差 array.std()

1.707825127659933

1 2	#求方差 array.var()

2.9166666666666665

1 2	#比2小的全部为2，比4大的全部为4 array.clip(2,4)

array([[2, 2, 3],
       [4, 4, 4]])

1
2
3

#四舍五入
array = np.array([1.2,3.56,6.41])
array.round()

array([1., 4., 6.])

1 2	#指定精度 array.round(decimals=1)

array([1.2, 3.6, 6.4])

1 2	#最小值索引位置 array.argmin()

1	array.argmin(axis=0)

3.4 矩阵乘法

#对应元素相乘
x = np.array([2,4])
y = np.array([3,5])
np.multiply(x,y)

array([ 6, 20])

1 2	#矩阵乘法 np.dot(x,y)

4. 常用功能模块

4.1 排序操作

1 2	array = np.array([[1.2,5.2,3.4],[5.7,3.1,7.1]]) np.sort(array)

array([[1.2, 3.4, 5.2],
       [3.1, 5.7, 7.1]])

1 2	#排序后显示索引位置 np.argsort(array)

array([[0, 2, 1],
       [1, 0, 2]], dtype=int64)

1
2
3

#在（0,10）之间生成10个随机数
array = np.linspace(0,10,10)
array

array([ 0.        ,  1.11111111,  2.22222222,  3.33333333,  4.44444444,
        5.55555556,  6.66666667,  7.77777778,  8.88888889, 10.        ])

1 2	values = np.array([3.5,2.7,5.8]) np.searchsorted(array,values)

array([4, 3, 6], dtype=int64)

1
2
3

array = np.array([[1,2,3],[1,0,3],[2,45,2],[2,6,8]])
index = np.lexsort([-1*array[:,0]])
index

array([2, 3, 0, 1], dtype=int64)

4.2 数组形状

1 2	array = np.arange(0,10) array

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

1	array.shape

(10,)

1	array.shape = 2,5

array

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

1 2	array = np.arange(0,10) array = array[np.newaxis,:]

1	array.shape

(1, 10)

1	array.squeeze()

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

1	array.shape

(1, 10)

1
2
3

#数组转置
array = np.array([[1,2,3],[2,5,3],[6,4,1]])
array

array([[1, 2, 3],
       [2, 5, 3],
       [6, 4, 1]])

1
2
3

#为啥不行呢？
array.transpose()
array

array([[1, 2, 3],
       [2, 5, 3],
       [6, 4, 1]])

array.T

array([[1, 2, 6],
       [2, 5, 4],
       [3, 3, 1]])

4.3 数组的拼接

1
2
3

a = np.array([[1,2,3],[4,5,6]])
b = np.array([[7,8,9],[10,11,12]])
np.concatenate((a,b))

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

1	np.concatenate((a,b),axis=1)

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

1 2	#另一种拼接方法,直接增加一个维度 np.stack((a,b))

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

4.4 创建数组函数

1	np.arange(0,20,2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

1 2	#快速创建行向量 np.r_[0:5:1]

array([0, 1, 2, 3, 4])

1 2	#快速创建列向量 np.c_[0:5:1]

array([[0],
       [1],
       [2],
       [3],
       [4]])

1 2	#创建零矩阵 np.zeros(3)

array([0., 0., 0.])

1	np.zeros((3,3))

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

1	np.ones((3,3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

1	np.ones((3,3)) * 8

array([[8., 8., 8.],
       [8., 8., 8.],
       [8., 8., 8.]])

1
2
3

a = np.empty(6)
a.fill(1)
a

array([1., 1., 1., 1., 1., 1.])

1	np.identity(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

4.5 随机模块

1	np.random.rand(3,2)

array([[0.58755743, 0.58962059],
       [0.03991693, 0.00247106],
       [0.44385432, 0.09460355]])

1 2	#返回区间（0,10）的随机整数 np.random.randint(10,size = (5,4))

array([[0, 2, 9, 2],
       [3, 5, 9, 1],
       [2, 4, 0, 9],
       [9, 8, 1, 1],
       [5, 2, 2, 1]])

1	np.random.randint(0,10,3)

array([8, 2, 4])

#指定随机种子，固定随机结果
np.random.seed(100)
array = np.random.rand(10)
array

array([0.54340494, 0.27836939, 0.42451759, 0.84477613, 0.00471886,
       0.12156912, 0.67074908, 0.82585276, 0.13670659, 0.57509333])

4.6 文件读写

#Notebook写文件
#%%writefile array.txt
#貌似要写在代码第一行才行？ 此命令前不能加注释！
data =  open ('array.txt','w')
data.write("4575\n1234")
data.close()

#python读取数据比较复杂
data = []
with open('array.txt') as f:
    for line in f.readlines():
        fileds = line.split()
        cur_data = [float(x) for x in fileds]
        data.append(cur_data)
data = np.array(data)
data

array([[4575.],
       [1234.]])

1
2
3

#Numpy读取数据
data = np.loadtxt("array.txt")
data

array([4575., 1234.])

%%writefile array2.txt

1,2,3,4
5,6,7,8

Writing array2.txt

1
2
3

#数据中带有分隔符‘，’，读取数据是指明,否则报错
data = np.loadtxt("array2.txt",delimiter = ',')
data

array([[1., 2., 3., 4.],
       [5., 6., 7., 8.]])

%%writefile array2.txt
a,b,c,d,e,f
1,2,3,4,5,6
7,8,9,10,11,12

Overwriting array2.txt

1
2
3

#可以指定读取数据时去掉前几行元素
data = np.loadtxt("array2.txt",delimiter = ',',skiprows = 1)
data

array([[ 1.,  2.,  3.,  4.,  5.,  6.],
       [ 7.,  8.,  9., 10., 11., 12.]])

1 2	#查看np.loadtxt的功能 print(help(np.loadtxt))

Help on function loadtxt in module numpy:

loadtxt(fname, dtype=<class 'float'>, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0, encoding='bytes', max_rows=None)
    Load data from a text file.

    Each row in the text file must have the same number of values.

    Parameters
    ----------
    fname : file, str, or pathlib.Path
        File, filename, or generator to read.  If the filename extension is
        ``.gz`` or ``.bz2``, the file is first decompressed. Note that
        generators should return byte strings for Python 3k.
    dtype : data-type, optional
        Data-type of the resulting array; default: float.  If this is a
        structured data-type, the resulting array will be 1-dimensional, and
        each row will be interpreted as an element of the array.  In this
        case, the number of columns used must match the number of fields in
        the data-type.
    comments : str or sequence of str, optional
        The characters or list of characters used to indicate the start of a
        comment. None implies no comments. For backwards compatibility, byte
        strings will be decoded as 'latin1'. The default is '#'.
    delimiter : str, optional
        The string used to separate values. For backwards compatibility, byte
        strings will be decoded as 'latin1'. The default is whitespace.
    converters : dict, optional
        A dictionary mapping column number to a function that will parse the
        column string into the desired value.  E.g., if column 0 is a date
        string: ``converters = {0: datestr2num}``.  Converters can also be
        used to provide a default value for missing data (but see also
        `genfromtxt`): ``converters = {3: lambda s: float(s.strip() or 0)}``.
        Default: None.
    skiprows : int, optional
        Skip the first `skiprows` lines; default: 0.
    usecols : int or sequence, optional
        Which columns to read, with 0 being the first. For example,
        ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns.
        The default, None, results in all columns being read.

        .. versionchanged:: 1.11.0
            When a single column has to be read it is possible to use
            an integer instead of a tuple. E.g ``usecols = 3`` reads the
            fourth column the same way as ``usecols = (3,)`` would.
    unpack : bool, optional
        If True, the returned array is transposed, so that arguments may be
        unpacked using ``x, y, z = loadtxt(...)``.  When used with a structured
        data-type, arrays are returned for each field.  Default is False.
    ndmin : int, optional
        The returned array will have at least `ndmin` dimensions.
        Otherwise mono-dimensional axes will be squeezed.
        Legal values: 0 (default), 1 or 2.

        .. versionadded:: 1.6.0
    encoding : str, optional
        Encoding used to decode the inputfile. Does not apply to input streams.
        The special value 'bytes' enables backward compatibility workarounds
        that ensures you receive byte arrays as results if possible and passes
        'latin1' encoded strings to converters. Override this value to receive
        unicode arrays and pass strings as input to converters.  If set to None
        the system default is used. The default value is 'bytes'.

        .. versionadded:: 1.14.0
    max_rows : int, optional
        Read `max_rows` lines of content after `skiprows` lines. The default
        is to read all the lines.

        .. versionadded:: 1.16.0

    Returns
    -------
    out : ndarray
        Data read from the text file.

    See Also
    --------
    load, fromstring, fromregex
    genfromtxt : Load data with missing values handled as specified.
    scipy.io.loadmat : reads MATLAB data files

    Notes
    -----
    This function aims to be a fast reader for simply formatted files.  The
    `genfromtxt` function provides more sophisticated handling of, e.g.,
    lines with missing values.

    .. versionadded:: 1.10.0

    The strings produced by the Python float.hex method can be used as
    input for floats.

    Examples
    --------
    >>> from io import StringIO   # StringIO behaves like a file object
    >>> c = StringIO(u"0 1\n2 3")
    >>> np.loadtxt(c)
    array([[ 0.,  1.],
           [ 2.,  3.]])

    >>> d = StringIO(u"M 21 72\nF 35 58")
    >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'),
    ...                      'formats': ('S1', 'i4', 'f4')})
    array([('M', 21, 72.0), ('F', 35, 58.0)],
          dtype=[('gender', '|S1'), ('age', '<i4'), ('weight', '<f4')])

    >>> c = StringIO(u"1,0,2\n3,0,4")
    >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True)
    >>> x
    array([ 1.,  3.])
    >>> y
    array([ 2.,  4.])

None

1 2	#Numpy写文件操作 np.savetxt("array4.txt", array, delimiter = ',')

#将数据文件保存为Numpy数组格式".npy",可以将机器学习训练的模型参数保存为此格式
array = np.array([[1,2,3],[4,5,6]])
np.save("array.npy", array)
np.load("array.npy")

array([[1, 2, 3],
       [4, 5, 6]])

1
2