NumPy, Pandas, MatPlotLib

Beginning Python Programming

Makzan, 2020 April.

NumPy, Pandas, MatPlotLib

  • NumPy for array and matrix calculation
  • Pandas for data frame processing
  • MatPlotLib for plotting graphs

NumPy

  • NumPy array creation
  • arange
  • linspace
  • reshape
  • shape
  • zeros
  • ones
  • random
  • seed
  • dot
  • broadcast

Numpy array creation

import numpy as np

arr1 = np.array([1,2,3,4,5])

print(arr1)

# [1 2 3 4 5]

Numpy: array from range

import numpy as np

arr2 = np.array(range(10))

print(arr2)

# [0 1 2 3 4 5 6 7 8 9]

Numpy: arange

import numpy as np

arr2b = np.arange(10)

print(arr2b)

# [0 1 2 3 4 5 6 7 8 9]

Numpy: arange

import numpy as np

arr2c = np.arange(10,20)

print(arr2c)

# [10 11 12 13 14 15 16 17 18 19]

Numpy: arange

import numpy as np

arr2d = np.arange(1,20,2)

print(arr2d)

# [ 1  3  5  7  9 11 13 15 17 19]

Numpy: dtype

import numpy as np

arr3 = np.array(range(10), dtype='float')

print(arr3)

# [0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]

Numpy: linspace

import numpy as np

arr4 = np.linspace(0,10,3)

print(arr4)

# [ 0.  5. 10.]

Numpy: linspace

import numpy as np

arr4b = np.linspace(0,100,5)

print(arr4b)

# [  0.  25.  50.  75. 100.]

Numpy: linspace

import numpy as np

arr4c = np.linspace(0,1,4)

print(arr4c)

# [0.  0.33333333 0.66666667 1.  ]

Numpy: reshape

import numpy as np

arr5 = np.arange(1,13).reshape([3,4])

print(arr5.shape)

# (3, 4)

Numpy: zeros

import numpy as np

arr6 = np.zeros(10)

print(arr6)

# [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

Numpy: zeros

import numpy as np

arr6b = np.zeros(10, dtype='int')

print(arr6b)

# [0 0 0 0 0 0 0 0 0 0]

Numpy: ones

import numpy as np

arr7 = np.ones(10, dtype='float')

print(arr7)

# [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]

Numpy: full

import numpy as np

arr8 = np.full(3, 3.14)

print(arr8)

# [3.14 3.14 3.14]

Numpy: full

import numpy as np

arr9 = np.full( (3,5), 3.14)

print(arr9)

# [[3.14 3.14 3.14 3.14 3.14]
# [3.14 3.14 3.14 3.14 3.14]
# [3.14 3.14 3.14 3.14 3.14]]

Numpy: rand

import numpy as np

arr10 = np.random.rand(100)

print(arr10)

# [0.4236548  0.64589411 0.43758721 0.891773   0.96366276 0.38344152
# 0.79172504 0.52889492 0.56804456 0.92559664 0.07103606 0.0871293
# 0.0202184  0.83261985 0.77815675 0.87001215 0.97861834 0.79915856
# ...

Numpy: rand

import numpy as np

arr10b = np.random.rand(3,3)

print(arr10b)

# [[0.5488135  0.71518937 0.60276338]
#  [0.54488318 0.4236548  0.64589411]
#  [0.43758721 0.891773   0.96366276]]

Numpy: random seed

import numpy as np

np.random.seed(0)

arr11 = np.random.rand(4,1)

print(arr11)

# [[0.5488135 ]
# [0.71518937]
# [0.60276338]
# [0.54488318]]

Numpy: dot product

import numpy as np

np.random.seed(0)

grid = np.arange(1,13).reshape([3,4])

print(grid)

print(grid.shape)

grid2 = np.random.rand(4,2)
print(grid2)
print(grid2.shape)

print(np.dot(grid, grid2))
# [[ 4.7756535   7.30973007]
#  [12.82692906 18.50068873]
#  [20.87820462 29.69164738]]

Numpy: operations

import numpy as np

grid = np.arange(1,10).reshape([3,3])

print(grid)
# [[1 2 3]
#  [4 5 6]
#  [7 8 9]]

grid2 = np.arange(1,4)

print(grid2)
# [1 2 3]

grid2 = np.tile(grid2, (3,1))

print(grid2)
# [[1 2 3]
#  [1 2 3]
#  [1 2 3]]

print("----")

print(grid+grid2)
# [[ 2  4  6]
#  [ 5  7  9]
#  [ 8 10 12]]

print(grid-grid2)
# [[0 0 0]
#  [3 3 3]
#  [6 6 6]]

print(grid*grid2)
# [[ 1  4  9]
#  [ 4 10 18]
#  [ 7 16 27]]

print(grid/grid2)
# [[1.  1.  1. ]
#  [4.  2.5 2. ]
#  [7.  4.  3. ]]

print(grid//grid2)
# [[1 1 1]
#  [4 2 2]
#  [7 4 3]]

print(grid ** grid2)
#[[  1   4  27]
# [  4  25 216]
# [  7  64 729]]

Numpy: broadcast

import numpy as np

grid = np.arange(1,13).reshape([3,4])

print(grid.shape)
# (3, 4)

print(grid + 3)
# [[ 4  5  6  7]
# [ 8  9 10 11]
# [12 13 14 15]]

Numpy: more on broadcast

import numpy as np
np.random.seed(0)

grid = np.arange(1,10).reshape([3,3])

print(grid)
# [[1 2 3]
#  [4 5 6]
#  [7 8 9]]

print(grid*3)
# [[ 3  6  9]
#  [12 15 18]
#  [21 24 27]]

print(grid/10)
# [[0.1 0.2 0.3]
#  [0.4 0.5 0.6]
#  [0.7 0.8 0.9]]

print(grid/3)
# [[0.33333333 0.66666667 1.        ]
#  [1.33333333 1.66666667 2.        ]
#  [2.33333333 2.66666667 3.        ]]

print(grid//3)
# [[0 0 1]
#  [1 1 2]
#  [2 2 3]]

print(grid+1)
# [[ 2  3  4]
#  [ 5  6  7]
#  [ 8  9 10]]

grid2 = np.arange(1,4)
print(grid+grid2)
# [[ 2  4  6]
#  [ 5  7  9]
#  [ 8 10 12]]

print(grid ** 2)
# [[ 1  4  9]
#  [16 25 36]
#  [49 64 81]]

print(grid % 5)
# [[1 2 3]
#  [4 0 1]
#  [2 3 4]]

arr = np.random.random(10000)
print(arr)
# [0.5488135  0.71518937 0.60276338 ... 0.75842952 0.02378743 0.81357508]

print(np.sum(arr))
# 4964.588916200894
print(np.max(arr))
# 0.9999779517807228
print(np.min(arr))
# 7.2449638492178e-05
print(np.mean(arr))
# 0.49645889162008944
print(np.median(arr))
# 0.49350103035904186

print(len(arr[arr<0.2]))
# 2060

print(len(arr[(arr>0.2) & (arr<0.3)]))
# 995

NumPy

  • NumPy array creation
  • arange
  • linspace
  • reshape
  • shape
  • zeros
  • ones
  • rand
  • seed
  • dot
  • broadcast

Summary

NumPy: Slicing

[i, j]
[i, :]
[:, j]
[i_start:i_end, j_start:j_end]

NumPy: Slicing

Slicing in NumPy array is NOT COPY.

Numpy: Slicing

import numpy as np

grid = np.arange(1,13).reshape([3,4])

print(grid)

print(grid[0,:])

print(grid[:,0])

print(grid[:,1:3])

grid2 = grid[:,:]

grid[0,0] = 100

print(grid)

print(grid2)

grid[:,1:3] = 99

print(grid)

print(grid2)

NumPy: Reading CSV

  • genfromtxt

Numpy: Reading CSV

import numpy as np

data = np.genfromtxt('visitors.csv',delimiter=',', dtype='datetime64[D],uint8', skip_header=1, names=('date','visitors'))

print(data)

# [('2018-12-17',   0) ('2018-12-16',   4) ('2018-12-15', 218)
#  ('2018-12-14',  11) ('2018-12-13',  11) ('2018-12-12',  14)
#  ('2018-12-11',   4) ('2018-12-10',   5) ('2018-12-09',  15)
#  ('2018-12-08', 104) ('2018-12-07',  19) ('2018-12-06',   8)
#  ('2018-12-05',   3) ('2018-12-04',  24) ('2018-12-03',  66)
#  ('2018-12-02',  40) ('2018-12-01',  69) ('2018-11-30',   8)
#  ('2018-11-29',  13) ('2018-11-28',  10) ('2018-11-27',  18)
#  ('2018-11-26',  72) ('2018-11-25',  31) ('2018-11-24', 146)
#  ('2018-11-23',  42) ('2018-11-22',  56) ('2018-11-21',  19)
#  ('2018-11-20',  76) ('2018-11-19',  11) ('2018-11-18',   0)
#  ('2018-11-17',   0) ('2018-11-16',   6) ('2018-11-15',   7)
#  ('2018-11-14',  32) ('2018-11-13', 102) ('2018-11-12', 198)
#  ('2018-11-11',  22) ('2018-11-10',  82) ('2018-11-09', 213)
#  ('2018-11-08',  52) ('2018-11-07',  13) ('2018-11-06',   0)
#  ('2018-11-05',   6) ('2018-11-04',   0) ('2018-11-03',   7)
#  ('2018-11-02',  25) ('2018-11-01',  29) ('2018-10-31',   9)
#  ('2018-10-30',  14) ('2018-10-29',   4) ('2018-10-28',   4)]

Numpy: Reading CSV

import numpy as np

data = np.genfromtxt('visitors.csv',delimiter=',', dtype='datetime64[D],uint8', skip_header=1, names=('date','visitors'))

print(data['date'])

# ['2018-12-17' '2018-12-16' '2018-12-15' '2018-12-14' '2018-12-13'
#  '2018-12-12' '2018-12-11' '2018-12-10' '2018-12-09' '2018-12-08'
#  '2018-12-07' '2018-12-06' '2018-12-05' '2018-12-04' '2018-12-03'
#  '2018-12-02' '2018-12-01' '2018-11-30' '2018-11-29' '2018-11-28'
#  '2018-11-27' '2018-11-26' '2018-11-25' '2018-11-24' '2018-11-23'
#  '2018-11-22' '2018-11-21' '2018-11-20' '2018-11-19' '2018-11-18'
#  '2018-11-17' '2018-11-16' '2018-11-15' '2018-11-14' '2018-11-13'
#  '2018-11-12' '2018-11-11' '2018-11-10' '2018-11-09' '2018-11-08'
#  '2018-11-07' '2018-11-06' '2018-11-05' '2018-11-04' '2018-11-03'
#  '2018-11-02' '2018-11-01' '2018-10-31' '2018-10-30' '2018-10-29'
#  '2018-10-28']

Numpy: Reading CSV

import numpy as np

data = np.genfromtxt('visitors.csv',delimiter=',', dtype='datetime64[D],uint8', skip_header=1, names=('date','visitors'))

print(data['visitors'])

# [  0   4 218  11  11  14   4   5  15 104  19   8   3  24  66  40  69   8
#   13  10  18  72  31 146  42  56  19  76  11   0   0   6   7  32 102 198
#   22  82 213  52  13   0   6   0   7  25  29   9  14   4   4]

Pandas

  • read_excel
  • read_csv
  • dataframe

Pandas: Reading Excel

import pandas as pd

data = pd.read_excel('visitors.xlsx')
print(data)
print(data[ data['Visitors'] > 30 ])

Pandas: Reading CSV

import numpy as np
import pandas as pd

data = pd.read_csv('visitors.csv', delimiter=',', names=('date','visitors'))

data['date'] = pd.to_datetime(data['date'])

print(data)

Pandas: DataFrame

MatPlotLib

  • plotting line chart
  • plotting bar chart
  • plotting pie chart
  • plotting 3d chart
  • configuring chart styles

Python: Numpy, Pandas & Matplotlib

By makzan

Python: Numpy, Pandas & Matplotlib

  • 438