In [2]:
import numpy as np
In [14]:
A = np.array([3,6,8,2,4,6,7,0,3])
print(A)
[3 6 8 2 4 6 7 0 3]
In [15]:
B = A.reshape((3,3))
print(B)
[[3 6 8]
 [2 4 6]
 [7 0 3]]

Subsetting with a Boolean mask

In [16]:
E = (B % 2 == 0 ) # this is my Boolean mask
print(E)
[[False  True  True]
 [ True  True  True]
 [False  True False]]
In [19]:
print(B[E])
[-99 -99 -99 -99 -99 -99]
In [20]:
B[E] = -99
print(B)
[[  3 -99 -99]
 [-99 -99 -99]
 [  7 -99   3]]

Getting size and shape and dimensions of a NumPy array

In [28]:
print(np.size(B))
print(B.size)
print(np.shape(B))
print(B.shape)
print(np.ndim(B))
print(B.ndim)
B.dtype
9
9
(3, 3)
(3, 3)
2
2
Out[28]:
dtype('int64')

Making copies of a NumPy array¶

In [39]:
M = np.reshape(np.arange(0,100,5),(2,10))
print(M)

M2 = M[:2,:2] # take the first two rows and columns
print(M2)  # note that the above only creates a "view" of M

M2[0,0] = 8
print(M2)

print(M)  # the same entry changed also in the original array.
[[ 0  5 10 15 20 25 30 35 40 45]
 [50 55 60 65 70 75 80 85 90 95]]
[[ 0  5]
 [50 55]]
[[ 8  5]
 [50 55]]
[[ 8  5 10 15 20 25 30 35 40 45]
 [50 55 60 65 70 75 80 85 90 95]]
In [46]:
M2_copy = M[:2,:2].copy()
print(M2_copy)

M2_copy[1,1] = 999
print(M2_copy)

print(M)
[[ 8  5]
 [50 55]]
[[  8   5]
 [ 50 999]]
[[ 8  5 10 15 20 25 30 35 40 45]
 [50 55 60 65 70 75 80 85 90 95]]

Generating numbers in NumPy¶

Initializing a random number generator (RNG):

In [47]:
rng = np.random.default_rng() # make a RNG
In [51]:
X = rng.poisson(lam = 2,size = 100)
print(X)
[0 1 1 2 0 3 3 1 1 1 2 4 3 2 1 3 2 3 3 1 1 4 2 4 2 1 0 6 4 3 3 3 5 3 1 1 1
 0 2 0 4 3 3 3 3 0 1 2 2 1 1 3 1 2 1 3 4 2 0 2 2 4 4 4 3 3 2 1 1 2 4 2 2 3
 3 3 0 0 4 1 0 1 1 3 2 1 4 0 0 2 2 0 2 1 2 1 3 3 1 2]
In [54]:
Z = rng.normal(0,1,(8,3))
print(Z)
[[-0.63685425  0.10884605  0.84342504]
 [-3.5298302  -0.00607159 -0.51247321]
 [-0.44573658  1.86171911 -0.36189318]
 [ 0.27308139  0.24874142  0.86605495]
 [ 0.56231027  0.42163472 -1.4017885 ]
 [-0.10643883 -1.28422539  1.82975347]
 [ 0.82420896  0.8745869   2.13902815]
 [-0.86321028  0.65659523 -1.29751656]]

combining NumPy arrays¶

In [60]:
x = np.array([1,3,6,7])
y = rng.poisson(lam=3,size = 4)
print(x)
print(y)
# put x and y into a big long one-dimensional array
xy = np.concatenate([x,y])
print(xy)
[1 3 6 7]
[1 2 4 1]
[1 3 6 7 1 2 4 1]
In [61]:
np.vstack([x,y])
Out[61]:
array([[1, 3, 6, 7],
       [1, 2, 4, 1]])
In [71]:
w = rng.binomial(10,1/2,(10,3))
print(w)
u = np.ones((10,1))
print(u)
np.hstack([w,u])
[[5 3 7]
 [5 7 6]
 [5 4 7]
 [3 6 3]
 [7 4 6]
 [6 4 4]
 [7 6 2]
 [4 4 5]
 [5 8 5]
 [5 6 5]]
[[1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]]
Out[71]:
array([[5., 3., 7., 1.],
       [5., 7., 6., 1.],
       [5., 4., 7., 1.],
       [3., 6., 3., 1.],
       [7., 4., 6., 1.],
       [6., 4., 4., 1.],
       [7., 6., 2., 1.],
       [4., 4., 5., 1.],
       [5., 8., 5., 1.],
       [5., 6., 5., 1.]])

arithmetic on Numpy Arrays¶

In [83]:
print(x)
print(y)
print(x + y)
print(x * y)
print(x / y)
print(x % y)
print(x // y) # floor division 
print(x ** y) 

# v = np.array([1,2]) # python does not recycle shorter vectors in the way R does.
[1 3 6 7]
[1 2 4 1]
[ 2  5 10  8]
[ 1  6 24  7]
[1.  1.5 1.5 7. ]
[0 1 2 0]
[1 1 1 7]
[   1    9 1296    7]
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[83], line 12
      8 print(x ** y) 
     10 v = np.array([1,2])
---> 12 x + v

ValueError: operands could not be broadcast together with shapes (4,) (2,) 

Summary statistics on NumPy arrays¶

In [85]:
X = rng.poisson(lam = 5, size = (20,4))
print(X)
[[ 9  3  6  5]
 [ 2  5  8  8]
 [ 5  6  5  7]
 [ 0  3  6  5]
 [ 2  4  4  2]
 [ 6  3  8  4]
 [ 5  2  6  6]
 [ 6  5  4  1]
 [ 4  5  2  1]
 [ 5  5  7  4]
 [ 9  7  4  5]
 [ 5 10  2  8]
 [ 6  6  6  4]
 [10  7  5  4]
 [ 3  3  7  5]
 [ 5  8  3  4]
 [ 5  0  4  6]
 [ 4  2  6  6]
 [ 8  5  7  6]
 [ 4  4  5  5]]
Out[85]:
<function ndarray.sum>
In [92]:
print(X.sum())
print(X.sum(axis = 1))  # row sums
print(X.sum(axis = 0))  # column sums
print(X.max(axis = 1)) # row maxima
print(X.min(axis = 0)) # col minima
397
[23 23 23 14 12 21 19 16 12 21 25 25 22 26 18 20 15 18 26 18]
[103  93 105  96]
[ 9  8  7  6  4  8  6  6  5  7  9 10  6 10  7  8  6  6  8  5]
[0 0 2 1]

Missing values in NumPy arrays¶

In [100]:
D = rng.beta(a = 2, b = 4, size = (10,3))
print(D)
[[0.15475788 0.23089473 0.492387  ]
 [0.06601769 0.41978107 0.57223595]
 [0.41535124 0.41073002 0.06715321]
 [0.63211541 0.58932949 0.30109428]
 [0.6719356  0.48659361 0.06631449]
 [0.29074745 0.32905397 0.21785407]
 [0.40025564 0.18485521 0.72320056]
 [0.4089255  0.35956024 0.54029206]
 [0.57521063 0.59567838 0.22432727]
 [0.10740836 0.27799437 0.73246314]]
In [110]:
D[D < 0.10] = None # this make a missing value in Python
print(D)
[[0.15475788 0.23089473 0.492387  ]
 [       nan 0.41978107 0.57223595]
 [0.41535124 0.41073002        nan]
 [0.63211541 0.58932949 0.30109428]
 [0.6719356  0.48659361        nan]
 [0.29074745 0.32905397 0.21785407]
 [0.40025564 0.18485521 0.72320056]
 [0.4089255  0.35956024 0.54029206]
 [0.57521063 0.59567838 0.22432727]
 [0.10740836 0.27799437 0.73246314]]
In [113]:
print(D.sum())
print(D.max())
print(np.nansum(D))
print(np.nanmax(D))
nan
nan
11.34503314600664
0.732463138543532