NumPy
NOTE: The page is currently updated.
References:
Motivation
A matrix represents a set of values. Matrices are used in solving a system of equations, representing graphs, etc. The more concisely and clearly we represent matrices in scripts, the less time is required for debugging.
Assume X
and Y
represent matrices and vec
is a 1-D array.
np.add(X,Y) # Add
np.substract(X,Y) # Substract
np.divide(X,Y) # Divide
# Multiply, all same
X @ Y # recommended
np.multiply(X,Y)
np.matmul(X, Y)
np.dot(X, Y)
X.dot(Y)
Matrix operations
X.flatten() # Flatten
np.sqrt(X) # Square root all elements
np.sum(X) # Sum all elements
np.sum(X,axis=0) # Row-wise sum
np.sum(X,axis=1) # Column-wise sum
np.amax(X) # Single max value
np.amax(X, axis=0) # Get max in each column
np.amax(X, axis=1) # Get max in each row
np.mean(X) # Mean
np.std(X) # Standard deviation
np.var(X) # Variance
np.trace(X) # Sum of the elements on the diagonal
np.linalg.matrix_rank(X) # Rank of the matrix
np.linalg.det(X) # Determinant of the matrix
Slicing
1D slicing
vec = list(range(10)) # [0, ..., 9]
vec[4:8] # [4, 5, 6, 7]
vec[-5:-2] # 5th last to 2nd last => [5, 6, 7]
# Get every Nth index value
vec[::2] # [0, 2, 4, 6, 8]
vec[::5] # [0, 5]
# Inverse
vec[::-1] # Temp inverse [9, 8, ... 1, 0]
vec.reverse() # Permanent inverse
Boolean indexing
cols = X[0, :] > 1 # select col(s) where first row > 1
# => [False True True]
X[:, cols]
# => [[2 3]
# [5 6]
# [8 9]]
From the second element, : Recall x:y where y doesn’t include it
2D slicing
X = vec.reshape((3, 3))
X[1, :] # get second row
X[:, -1] # get last col
X[0:2, :] # get first two rows
X[[0, 2], :] # get first and third rows
X[:, 0:2] # get first two columns
X[:, [0, 2]] # get first and third columns
X[0:2, 0:2] # get submatrix of first two rows/columns
X[X > 5] # get elements greater than 5
# Advanced
X[:, ::-1] # reverse cols for each row
# => [[3 2 1]
# [6 5 4]
# [9 8 7]]
X[1:, ::-1] # same as above but skip first row
# => [[6 5 4]
# [9 8 7]]
[1]:
import numpy as np
import pandas as pd
[3]:
arr = np.array([[1, 2, 3, 4, 5],
[6, 7, 8, 9, 10]])
df = pd.DataFrame(arr)
print("Origin array:")
display(df)
print("[0:1, 1:4] ->")
display(df.iloc[0:1, 1:4])
print("[:1, 1:4] ->")
display(df.iloc[:1, 1:4])
print("[0:2, 2] ->")
display(df.iloc[0:2, 2].to_frame())
print("[0:2, 1:4] ->")
display(df.iloc[0:2, 1:4])
Origin array:
1 2 3 4 5
6 7 8 9 10
[0:1, 1:4] ->
2 3 4
[:1, 1:4] ->
2 3 4
[0:2, 2] ->
3
8
[0:2, 1:4] ->
2 3 4
7 8 9
Add new dimension
none
is used to insert a new axis or dimension.
[4]:
arr = np.arange(10)
assert arr.shape == (10,)
# Add two new axes using [:, None, None]
reshaped = arr[:, None, None]
assert reshaped.shape == (10, 1, 1)
[5]:
arr2 = arr.reshape(2, 5)
assert arr2.shape == (2, 5)
# Add two new axes after the first axis ("row")
assert arr2[:, None, None].shape == (2, 1, 1, 5)
assert arr2[:, :, None, None].shape == (2, 5, 1, 1)
Create and copy tensor
# Create and reshape at once
np.matrix(np.arange(12).reshape((3,4)))
np.zeros((5,), dtype=int)
np.zeros((2, 1))
# Rehsape
X = np.arange(6)
X = X.reshape((2, 3))
# Copy exactly
np.copy(X)
# Copy shape
np.ones_like(X) # Return 1's with (2,3) shape
np.zeros_like(X) # Return 0's with (2,3) shape
# Full
np.full((2, 2), 10) # Generate (2,2), all 10
np.full((2, 2), np.inf) # Generate (2,2), all inf
np.full((2, 2), [1, 2]) # Generate (2,2), each row of [1,2]
Broadcast
[6]:
a = np.array([1,2,3])
assert a.shape == (3,)
b = np.array([
[10],
[20],
[30]])
assert b.shape == (3,1)
# In a, 1, 2, 3 duplicated across new rows
# In b, 10, 20, 30 duplicated acorss new columns
# And then those are added
expected = np.array([[11,12,13],
[21,22,23],
[31,32,33]])
assert np.array_equal(a + b, expected)
Advanced indexing
[7]:
X = np.arange(9).reshape(3,3)
assert np.array_equal(X, [[0, 1, 2],
[3, 4, 5],
[6, 7, 8]])
result = X[[0,1,2], [0,1,2]]
expected = np.array([0, 4, 8])
assert np.array_equal(result, expected)
Stacking
Axis 0 - rows
Axis 1 - columns
Axis 2 - depth
Axis 3 - so on..
[12]:
# Base arrays
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
# Stack across rows (Method 1/2)
stacked = np.stack([a, b], axis=0)
expected = np.array([[1,2,3],
[4,5,6]])
assert stacked.shape == (2,3)
assert np.array_equal(stacked, expected)
# Stack across rows (Method 2/2)
vstacked = np.vstack([a, b]) # shape (2,3)
expected = np.array([[1,2,3],
[4,5,6]])
assert np.array_equal(vstacked, expected)
# Stack across columns (imagine you rotate the matrix and new rows)
stacked_axis1 = np.stack([a, b], axis=1)
expected_axis1 = np.array([[1,4],
[2,5],
[3,6]])
assert stacked_axis1.shape == (3,2)
assert np.array_equal(stacked_axis1, expected_axis1)
# np.hstack (concatenate along columns)
hstacked = np.hstack([a, b]) # shape (6,)
expected = np.array([1,2,3,4,5,6])
assert np.array_equal(hstacked, expected)
# np.dstack (stack along depth / third axis)
c = np.array([7,8,9])
dstacked = np.dstack([a, b, c]) # shape (1,3,3)
expected = np.array([[[1,4,7],
[2,5,8],
[3,6,9]]])
assert np.array_equal(dstacked, expected)
Just to note that np.vstack
is a shorthand for vertical stacking like np.concatenate(..., axis=0)
. np.stack
lets you choose any axis so it’s more general.
Performance
Vectoization - use array ops to loops
use
where
for conditional element selectionnp.where(X > 5, 1, 0) # Replace with 1 if >5 else 0
Missing data
import numpy as np
# Example array with NaN and Inf
arr = np.array([1.0, 2.0, np.nan, np.inf, -np.inf, 3.0])
# Count NaNs
assert np.isnan(arr).sum() == 1 # only one np.nan
# Count Infs
assert np.isinf(arr).sum() == 2 # +inf and -inf
# Mean ignoring NaNs
arr2 = np.array([1.0, 2.0, np.nan, 3.0])
assert np.nanmean(arr2) == 2.0 # (1+2+3)/3
# Replace NaN/Inf with finite values
cleaned = np.nan_to_num(arr, nan=0.0, posinf=999.0, neginf=-999.0)
expected = np.array([1.0, 2.0, 0.0, 999.0, -999.0, 3.0])
assert np.array_equal(cleaned, expected)
Other useful stuff
Print nicely
np.set_printoptions(
precision=3, # Set decimal places
suppress=True, # Avoid scientific notations
threshold=100, # Max number of elements to be printed
linewidth=80,
edgeitems=2 # Show two values per edge when truncated
)
Random geneator
# Uniform [0,1)
arr1 = np.random.rand(3, 2)
assert arr1.shape == (3, 2)
assert np.all((arr1 >= 0) & (arr1 < 1))
# Standard normal (mean ≈ 0, std ≈ 1, but here just shape check)
arr2 = np.random.randn(3, 2)
assert arr2.shape == (3, 2)
# Values can be any real number, so no bound check
# Random integers between 0 and 9
arr3 = np.random.randint(0, 10, (2, 3))
assert arr3.shape == (2, 3)
assert np.all((arr3 >= 0) & (arr3 < 10))
# Sampling with replacement
arr4 = np.random.choice([1, 2, 3], size=5, replace=True)
assert arr4.shape == (5,)
assert np.all(np.isin(arr4, [1, 2, 3]))