In [2]:
import numpy as np
In [14]:
A = np.array([3,6,8,2,4,6,7,0,3])
print(A)
[3 6 8 2 4 6 7 0 3]
In [15]:
B = A.reshape((3,3))
print(B)
[[3 6 8] [2 4 6] [7 0 3]]
Subsetting with a Boolean mask
In [16]:
E = (B % 2 == 0 ) # this is my Boolean mask
print(E)
[[False True True] [ True True True] [False True False]]
In [19]:
print(B[E])
[-99 -99 -99 -99 -99 -99]
In [20]:
B[E] = -99
print(B)
[[ 3 -99 -99] [-99 -99 -99] [ 7 -99 3]]
Getting size and shape and dimensions of a NumPy array
In [28]:
print(np.size(B))
print(B.size)
print(np.shape(B))
print(B.shape)
print(np.ndim(B))
print(B.ndim)
B.dtype
9 9 (3, 3) (3, 3) 2 2
Out[28]:
dtype('int64')
Making copies of a NumPy array¶
In [39]:
M = np.reshape(np.arange(0,100,5),(2,10))
print(M)
M2 = M[:2,:2] # take the first two rows and columns
print(M2) # note that the above only creates a "view" of M
M2[0,0] = 8
print(M2)
print(M) # the same entry changed also in the original array.
[[ 0 5 10 15 20 25 30 35 40 45] [50 55 60 65 70 75 80 85 90 95]] [[ 0 5] [50 55]] [[ 8 5] [50 55]] [[ 8 5 10 15 20 25 30 35 40 45] [50 55 60 65 70 75 80 85 90 95]]
In [46]:
M2_copy = M[:2,:2].copy()
print(M2_copy)
M2_copy[1,1] = 999
print(M2_copy)
print(M)
[[ 8 5] [50 55]] [[ 8 5] [ 50 999]] [[ 8 5 10 15 20 25 30 35 40 45] [50 55 60 65 70 75 80 85 90 95]]
Generating numbers in NumPy¶
Initializing a random number generator (RNG):
In [47]:
rng = np.random.default_rng() # make a RNG
In [51]:
X = rng.poisson(lam = 2,size = 100)
print(X)
[0 1 1 2 0 3 3 1 1 1 2 4 3 2 1 3 2 3 3 1 1 4 2 4 2 1 0 6 4 3 3 3 5 3 1 1 1 0 2 0 4 3 3 3 3 0 1 2 2 1 1 3 1 2 1 3 4 2 0 2 2 4 4 4 3 3 2 1 1 2 4 2 2 3 3 3 0 0 4 1 0 1 1 3 2 1 4 0 0 2 2 0 2 1 2 1 3 3 1 2]
In [54]:
Z = rng.normal(0,1,(8,3))
print(Z)
[[-0.63685425 0.10884605 0.84342504] [-3.5298302 -0.00607159 -0.51247321] [-0.44573658 1.86171911 -0.36189318] [ 0.27308139 0.24874142 0.86605495] [ 0.56231027 0.42163472 -1.4017885 ] [-0.10643883 -1.28422539 1.82975347] [ 0.82420896 0.8745869 2.13902815] [-0.86321028 0.65659523 -1.29751656]]
combining NumPy arrays¶
In [60]:
x = np.array([1,3,6,7])
y = rng.poisson(lam=3,size = 4)
print(x)
print(y)
# put x and y into a big long one-dimensional array
xy = np.concatenate([x,y])
print(xy)
[1 3 6 7] [1 2 4 1] [1 3 6 7 1 2 4 1]
In [61]:
np.vstack([x,y])
Out[61]:
array([[1, 3, 6, 7], [1, 2, 4, 1]])
In [71]:
w = rng.binomial(10,1/2,(10,3))
print(w)
u = np.ones((10,1))
print(u)
np.hstack([w,u])
[[5 3 7] [5 7 6] [5 4 7] [3 6 3] [7 4 6] [6 4 4] [7 6 2] [4 4 5] [5 8 5] [5 6 5]] [[1.] [1.] [1.] [1.] [1.] [1.] [1.] [1.] [1.] [1.]]
Out[71]:
array([[5., 3., 7., 1.], [5., 7., 6., 1.], [5., 4., 7., 1.], [3., 6., 3., 1.], [7., 4., 6., 1.], [6., 4., 4., 1.], [7., 6., 2., 1.], [4., 4., 5., 1.], [5., 8., 5., 1.], [5., 6., 5., 1.]])
arithmetic on Numpy Arrays¶
In [83]:
print(x)
print(y)
print(x + y)
print(x * y)
print(x / y)
print(x % y)
print(x // y) # floor division
print(x ** y)
# v = np.array([1,2]) # python does not recycle shorter vectors in the way R does.
[1 3 6 7] [1 2 4 1] [ 2 5 10 8] [ 1 6 24 7] [1. 1.5 1.5 7. ] [0 1 2 0] [1 1 1 7] [ 1 9 1296 7]
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[83], line 12 8 print(x ** y) 10 v = np.array([1,2]) ---> 12 x + v ValueError: operands could not be broadcast together with shapes (4,) (2,)
Summary statistics on NumPy arrays¶
In [85]:
X = rng.poisson(lam = 5, size = (20,4))
print(X)
[[ 9 3 6 5] [ 2 5 8 8] [ 5 6 5 7] [ 0 3 6 5] [ 2 4 4 2] [ 6 3 8 4] [ 5 2 6 6] [ 6 5 4 1] [ 4 5 2 1] [ 5 5 7 4] [ 9 7 4 5] [ 5 10 2 8] [ 6 6 6 4] [10 7 5 4] [ 3 3 7 5] [ 5 8 3 4] [ 5 0 4 6] [ 4 2 6 6] [ 8 5 7 6] [ 4 4 5 5]]
Out[85]:
<function ndarray.sum>
In [92]:
print(X.sum())
print(X.sum(axis = 1)) # row sums
print(X.sum(axis = 0)) # column sums
print(X.max(axis = 1)) # row maxima
print(X.min(axis = 0)) # col minima
397 [23 23 23 14 12 21 19 16 12 21 25 25 22 26 18 20 15 18 26 18] [103 93 105 96] [ 9 8 7 6 4 8 6 6 5 7 9 10 6 10 7 8 6 6 8 5] [0 0 2 1]
Missing values in NumPy arrays¶
In [100]:
D = rng.beta(a = 2, b = 4, size = (10,3))
print(D)
[[0.15475788 0.23089473 0.492387 ] [0.06601769 0.41978107 0.57223595] [0.41535124 0.41073002 0.06715321] [0.63211541 0.58932949 0.30109428] [0.6719356 0.48659361 0.06631449] [0.29074745 0.32905397 0.21785407] [0.40025564 0.18485521 0.72320056] [0.4089255 0.35956024 0.54029206] [0.57521063 0.59567838 0.22432727] [0.10740836 0.27799437 0.73246314]]
In [110]:
D[D < 0.10] = None # this make a missing value in Python
print(D)
[[0.15475788 0.23089473 0.492387 ] [ nan 0.41978107 0.57223595] [0.41535124 0.41073002 nan] [0.63211541 0.58932949 0.30109428] [0.6719356 0.48659361 nan] [0.29074745 0.32905397 0.21785407] [0.40025564 0.18485521 0.72320056] [0.4089255 0.35956024 0.54029206] [0.57521063 0.59567838 0.22432727] [0.10740836 0.27799437 0.73246314]]
In [113]:
print(D.sum())
print(D.max())
print(np.nansum(D))
print(np.nanmax(D))
nan nan 11.34503314600664 0.732463138543532