NumPy, Pandas, MatPlotLib
Beginning Python Programming
Makzan, 2020 April.
NumPy, Pandas, MatPlotLib
- NumPy for array and matrix calculation
- Pandas for data frame processing
- MatPlotLib for plotting graphs
NumPy
- NumPy array creation
- arange
- linspace
- reshape
- shape
- zeros
- ones
- random
- seed
- dot
- broadcast
Numpy array creation
import numpy as np
arr1 = np.array([1,2,3,4,5])
print(arr1)
# [1 2 3 4 5]
Numpy: array from range
import numpy as np
arr2 = np.array(range(10))
print(arr2)
# [0 1 2 3 4 5 6 7 8 9]
Numpy: arange
import numpy as np
arr2b = np.arange(10)
print(arr2b)
# [0 1 2 3 4 5 6 7 8 9]
Numpy: arange
import numpy as np
arr2c = np.arange(10,20)
print(arr2c)
# [10 11 12 13 14 15 16 17 18 19]
Numpy: arange
import numpy as np
arr2d = np.arange(1,20,2)
print(arr2d)
# [ 1 3 5 7 9 11 13 15 17 19]
Numpy: dtype
import numpy as np
arr3 = np.array(range(10), dtype='float')
print(arr3)
# [0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]
Numpy: linspace
import numpy as np
arr4 = np.linspace(0,10,3)
print(arr4)
# [ 0. 5. 10.]
Numpy: linspace
import numpy as np
arr4b = np.linspace(0,100,5)
print(arr4b)
# [ 0. 25. 50. 75. 100.]
Numpy: linspace
import numpy as np
arr4c = np.linspace(0,1,4)
print(arr4c)
# [0. 0.33333333 0.66666667 1. ]
Numpy: reshape
import numpy as np
arr5 = np.arange(1,13).reshape([3,4])
print(arr5.shape)
# (3, 4)
Numpy: zeros
import numpy as np
arr6 = np.zeros(10)
print(arr6)
# [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Numpy: zeros
import numpy as np
arr6b = np.zeros(10, dtype='int')
print(arr6b)
# [0 0 0 0 0 0 0 0 0 0]
Numpy: ones
import numpy as np
arr7 = np.ones(10, dtype='float')
print(arr7)
# [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Numpy: full
import numpy as np
arr8 = np.full(3, 3.14)
print(arr8)
# [3.14 3.14 3.14]
Numpy: full
import numpy as np
arr9 = np.full( (3,5), 3.14)
print(arr9)
# [[3.14 3.14 3.14 3.14 3.14]
# [3.14 3.14 3.14 3.14 3.14]
# [3.14 3.14 3.14 3.14 3.14]]
Numpy: rand
import numpy as np
arr10 = np.random.rand(100)
print(arr10)
# [0.4236548 0.64589411 0.43758721 0.891773 0.96366276 0.38344152
# 0.79172504 0.52889492 0.56804456 0.92559664 0.07103606 0.0871293
# 0.0202184 0.83261985 0.77815675 0.87001215 0.97861834 0.79915856
# ...
Numpy: rand
import numpy as np
arr10b = np.random.rand(3,3)
print(arr10b)
# [[0.5488135 0.71518937 0.60276338]
# [0.54488318 0.4236548 0.64589411]
# [0.43758721 0.891773 0.96366276]]
Numpy: random seed
import numpy as np
np.random.seed(0)
arr11 = np.random.rand(4,1)
print(arr11)
# [[0.5488135 ]
# [0.71518937]
# [0.60276338]
# [0.54488318]]
Numpy: dot product
import numpy as np
np.random.seed(0)
grid = np.arange(1,13).reshape([3,4])
print(grid)
print(grid.shape)
grid2 = np.random.rand(4,2)
print(grid2)
print(grid2.shape)
print(np.dot(grid, grid2))
# [[ 4.7756535 7.30973007]
# [12.82692906 18.50068873]
# [20.87820462 29.69164738]]
Numpy: operations
import numpy as np
grid = np.arange(1,10).reshape([3,3])
print(grid)
# [[1 2 3]
# [4 5 6]
# [7 8 9]]
grid2 = np.arange(1,4)
print(grid2)
# [1 2 3]
grid2 = np.tile(grid2, (3,1))
print(grid2)
# [[1 2 3]
# [1 2 3]
# [1 2 3]]
print("----")
print(grid+grid2)
# [[ 2 4 6]
# [ 5 7 9]
# [ 8 10 12]]
print(grid-grid2)
# [[0 0 0]
# [3 3 3]
# [6 6 6]]
print(grid*grid2)
# [[ 1 4 9]
# [ 4 10 18]
# [ 7 16 27]]
print(grid/grid2)
# [[1. 1. 1. ]
# [4. 2.5 2. ]
# [7. 4. 3. ]]
print(grid//grid2)
# [[1 1 1]
# [4 2 2]
# [7 4 3]]
print(grid ** grid2)
#[[ 1 4 27]
# [ 4 25 216]
# [ 7 64 729]]
Numpy: broadcast
import numpy as np
grid = np.arange(1,13).reshape([3,4])
print(grid.shape)
# (3, 4)
print(grid + 3)
# [[ 4 5 6 7]
# [ 8 9 10 11]
# [12 13 14 15]]
Numpy: more on broadcast
import numpy as np
np.random.seed(0)
grid = np.arange(1,10).reshape([3,3])
print(grid)
# [[1 2 3]
# [4 5 6]
# [7 8 9]]
print(grid*3)
# [[ 3 6 9]
# [12 15 18]
# [21 24 27]]
print(grid/10)
# [[0.1 0.2 0.3]
# [0.4 0.5 0.6]
# [0.7 0.8 0.9]]
print(grid/3)
# [[0.33333333 0.66666667 1. ]
# [1.33333333 1.66666667 2. ]
# [2.33333333 2.66666667 3. ]]
print(grid//3)
# [[0 0 1]
# [1 1 2]
# [2 2 3]]
print(grid+1)
# [[ 2 3 4]
# [ 5 6 7]
# [ 8 9 10]]
grid2 = np.arange(1,4)
print(grid+grid2)
# [[ 2 4 6]
# [ 5 7 9]
# [ 8 10 12]]
print(grid ** 2)
# [[ 1 4 9]
# [16 25 36]
# [49 64 81]]
print(grid % 5)
# [[1 2 3]
# [4 0 1]
# [2 3 4]]
arr = np.random.random(10000)
print(arr)
# [0.5488135 0.71518937 0.60276338 ... 0.75842952 0.02378743 0.81357508]
print(np.sum(arr))
# 4964.588916200894
print(np.max(arr))
# 0.9999779517807228
print(np.min(arr))
# 7.2449638492178e-05
print(np.mean(arr))
# 0.49645889162008944
print(np.median(arr))
# 0.49350103035904186
print(len(arr[arr<0.2]))
# 2060
print(len(arr[(arr>0.2) & (arr<0.3)]))
# 995
NumPy
- NumPy array creation
- arange
- linspace
- reshape
- shape
- zeros
- ones
- rand
- seed
- dot
- broadcast
Summary
NumPy: Slicing
[i, j]
[i, :]
[:, j]
[i_start:i_end, j_start:j_end]
NumPy: Slicing
Slicing in NumPy array is NOT COPY.
Numpy: Slicing
import numpy as np
grid = np.arange(1,13).reshape([3,4])
print(grid)
print(grid[0,:])
print(grid[:,0])
print(grid[:,1:3])
grid2 = grid[:,:]
grid[0,0] = 100
print(grid)
print(grid2)
grid[:,1:3] = 99
print(grid)
print(grid2)
NumPy: Reading CSV
- genfromtxt
Numpy: Reading CSV
import numpy as np
data = np.genfromtxt('visitors.csv',delimiter=',', dtype='datetime64[D],uint8', skip_header=1, names=('date','visitors'))
print(data)
# [('2018-12-17', 0) ('2018-12-16', 4) ('2018-12-15', 218)
# ('2018-12-14', 11) ('2018-12-13', 11) ('2018-12-12', 14)
# ('2018-12-11', 4) ('2018-12-10', 5) ('2018-12-09', 15)
# ('2018-12-08', 104) ('2018-12-07', 19) ('2018-12-06', 8)
# ('2018-12-05', 3) ('2018-12-04', 24) ('2018-12-03', 66)
# ('2018-12-02', 40) ('2018-12-01', 69) ('2018-11-30', 8)
# ('2018-11-29', 13) ('2018-11-28', 10) ('2018-11-27', 18)
# ('2018-11-26', 72) ('2018-11-25', 31) ('2018-11-24', 146)
# ('2018-11-23', 42) ('2018-11-22', 56) ('2018-11-21', 19)
# ('2018-11-20', 76) ('2018-11-19', 11) ('2018-11-18', 0)
# ('2018-11-17', 0) ('2018-11-16', 6) ('2018-11-15', 7)
# ('2018-11-14', 32) ('2018-11-13', 102) ('2018-11-12', 198)
# ('2018-11-11', 22) ('2018-11-10', 82) ('2018-11-09', 213)
# ('2018-11-08', 52) ('2018-11-07', 13) ('2018-11-06', 0)
# ('2018-11-05', 6) ('2018-11-04', 0) ('2018-11-03', 7)
# ('2018-11-02', 25) ('2018-11-01', 29) ('2018-10-31', 9)
# ('2018-10-30', 14) ('2018-10-29', 4) ('2018-10-28', 4)]
Numpy: Reading CSV
import numpy as np
data = np.genfromtxt('visitors.csv',delimiter=',', dtype='datetime64[D],uint8', skip_header=1, names=('date','visitors'))
print(data['date'])
# ['2018-12-17' '2018-12-16' '2018-12-15' '2018-12-14' '2018-12-13'
# '2018-12-12' '2018-12-11' '2018-12-10' '2018-12-09' '2018-12-08'
# '2018-12-07' '2018-12-06' '2018-12-05' '2018-12-04' '2018-12-03'
# '2018-12-02' '2018-12-01' '2018-11-30' '2018-11-29' '2018-11-28'
# '2018-11-27' '2018-11-26' '2018-11-25' '2018-11-24' '2018-11-23'
# '2018-11-22' '2018-11-21' '2018-11-20' '2018-11-19' '2018-11-18'
# '2018-11-17' '2018-11-16' '2018-11-15' '2018-11-14' '2018-11-13'
# '2018-11-12' '2018-11-11' '2018-11-10' '2018-11-09' '2018-11-08'
# '2018-11-07' '2018-11-06' '2018-11-05' '2018-11-04' '2018-11-03'
# '2018-11-02' '2018-11-01' '2018-10-31' '2018-10-30' '2018-10-29'
# '2018-10-28']
Numpy: Reading CSV
import numpy as np
data = np.genfromtxt('visitors.csv',delimiter=',', dtype='datetime64[D],uint8', skip_header=1, names=('date','visitors'))
print(data['visitors'])
# [ 0 4 218 11 11 14 4 5 15 104 19 8 3 24 66 40 69 8
# 13 10 18 72 31 146 42 56 19 76 11 0 0 6 7 32 102 198
# 22 82 213 52 13 0 6 0 7 25 29 9 14 4 4]
Pandas
- read_excel
- read_csv
- dataframe
Pandas: Reading Excel
import pandas as pd
data = pd.read_excel('visitors.xlsx')
print(data)
print(data[ data['Visitors'] > 30 ])
Pandas: Reading CSV
import numpy as np
import pandas as pd
data = pd.read_csv('visitors.csv', delimiter=',', names=('date','visitors'))
data['date'] = pd.to_datetime(data['date'])
print(data)
Pandas: DataFrame
MatPlotLib
- plotting line chart
- plotting bar chart
- plotting pie chart
- plotting 3d chart
- configuring chart styles
Python: Numpy, Pandas & Matplotlib
By makzan
Python: Numpy, Pandas & Matplotlib
- 413