NumPy

NOTE: The page is currently updated.

References:

Motivation

A matrix represents a set of values. Matrices are used in solving a system of equations, representing graphs, etc. The more concisely and clearly we represent matrices in scripts, the less time is required for debugging.

Assume X and Y represent matrices and vec is a 1-D array.

np.add(X,Y)       # Add
np.substract(X,Y) # Substract
np.divide(X,Y)    # Divide

# Multiply, all same
X @ Y             # recommended
np.multiply(X,Y)
np.matmul(X, Y)
np.dot(X, Y)
X.dot(Y)

Matrix operations

X.flatten()        # Flatten
np.sqrt(X)         # Square root all elements
np.sum(X)          # Sum all elements
np.sum(X,axis=0)   # Row-wise sum
np.sum(X,axis=1)   # Column-wise sum
np.amax(X)         # Single max value
np.amax(X, axis=0) # Get max in each column
np.amax(X, axis=1) # Get max in each row
np.mean(X)         # Mean
np.std(X)          # Standard deviation
np.var(X)          # Variance
np.trace(X)        # Sum of the elements on the diagonal
np.linalg.matrix_rank(X)  # Rank of the matrix
np.linalg.det(X)   # Determinant of the matrix

Slicing

1D slicing

vec = list(range(10)) # [0, ..., 9]
vec[4:8]       # [4, 5, 6, 7]
vec[-5:-2]     # 5th last to 2nd last => [5, 6, 7]

# Get every Nth index value
vec[::2]      # [0, 2, 4, 6, 8]
vec[::5]      # [0, 5]

# Inverse
vec[::-1]     # Temp inverse [9, 8, ... 1, 0]
vec.reverse() # Permanent inverse

Boolean indexing

cols = X[0, :] > 1  # select col(s) where first row > 1
# => [False  True  True]
X[:, cols]
# => [[2 3]
#     [5 6]
#     [8 9]]

From the second element, : Recall x:y where y doesn’t include it

2D slicing

X =  vec.reshape((3, 3))
X[1, :]       # get second row
X[:, -1]      # get last col
X[0:2, :]     # get first two rows
X[[0, 2], :]  # get first and third rows
X[:, 0:2]     # get first two columns
X[:, [0, 2]]  # get first and third columns
X[0:2, 0:2]   # get submatrix of first two rows/columns
X[X > 5]      # get elements greater than 5

# Advanced
X[:, ::-1]    # reverse cols for each row
# => [[3 2 1]
#     [6 5 4]
#     [9 8 7]]
X[1:, ::-1]   # same as above but skip first row
# => [[6 5 4]
#     [9 8 7]]
[1]:
import numpy as np
import pandas as pd
[3]:
arr = np.array([[1, 2, 3, 4, 5],
                [6, 7, 8, 9, 10]])

df = pd.DataFrame(arr)
print("Origin array:")
display(df)
print("[0:1, 1:4] ->")
display(df.iloc[0:1, 1:4])

print("[:1, 1:4] ->")
display(df.iloc[:1, 1:4])

print("[0:2, 2] ->")
display(df.iloc[0:2, 2].to_frame())

print("[0:2, 1:4] ->")
display(df.iloc[0:2, 1:4])

Origin array:
 1 2 3 4  5
 6 7 8 9 10

[0:1, 1:4] ->
 2 3 4

[:1, 1:4] ->
 2 3 4

[0:2, 2] ->
 3
 8

[0:2, 1:4] ->
 2 3 4
 7 8 9

Add new dimension

none is used to insert a new axis or dimension.

[4]:
arr = np.arange(10)
assert arr.shape == (10,)
# Add two new axes using [:, None, None]
reshaped = arr[:, None, None]
assert reshaped.shape == (10, 1, 1)
[5]:
arr2 = arr.reshape(2, 5)
assert arr2.shape == (2, 5)
# Add two new axes after the first axis ("row")
assert arr2[:, None, None].shape == (2, 1, 1, 5)
assert arr2[:, :, None, None].shape == (2, 5, 1, 1)

Create and copy tensor

# Create and reshape at once
np.matrix(np.arange(12).reshape((3,4)))
np.zeros((5,), dtype=int)
np.zeros((2, 1))

# Rehsape
X = np.arange(6)
X = X.reshape((2, 3))

# Copy exactly
np.copy(X)

# Copy shape
np.ones_like(X)         # Return 1's with (2,3) shape
np.zeros_like(X)        # Return 0's with (2,3) shape

# Full
np.full((2, 2), 10)     # Generate (2,2), all 10
np.full((2, 2), np.inf) # Generate (2,2), all inf
np.full((2, 2), [1, 2]) # Generate (2,2), each row of [1,2]

Broadcast

[6]:
a = np.array([1,2,3])
assert a.shape == (3,)
b = np.array([
    [10],
    [20],
    [30]])
assert b.shape == (3,1)

# In a, 1, 2, 3 duplicated across new rows
# In b, 10, 20, 30 duplicated acorss new columns
# And then those are added
expected = np.array([[11,12,13],
                     [21,22,23],
                     [31,32,33]])

assert np.array_equal(a + b, expected)

Advanced indexing

[7]:
X = np.arange(9).reshape(3,3)
assert np.array_equal(X, [[0, 1, 2],
                          [3, 4, 5],
                          [6, 7, 8]])
result = X[[0,1,2], [0,1,2]]
expected = np.array([0, 4, 8])
assert np.array_equal(result, expected)

Stacking

  • Axis 0 - rows

  • Axis 1 - columns

  • Axis 2 - depth

  • Axis 3 - so on..

[12]:
# Base arrays
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

# Stack across rows (Method 1/2)
stacked = np.stack([a, b], axis=0)
expected = np.array([[1,2,3],
                     [4,5,6]])
assert stacked.shape == (2,3)
assert np.array_equal(stacked, expected)

# Stack across rows (Method 2/2)
vstacked = np.vstack([a, b])   # shape (2,3)
expected = np.array([[1,2,3],
                     [4,5,6]])
assert np.array_equal(vstacked, expected)

# Stack across columns (imagine you rotate the matrix and new rows)
stacked_axis1 = np.stack([a, b], axis=1)
expected_axis1 = np.array([[1,4],
                           [2,5],
                           [3,6]])
assert stacked_axis1.shape == (3,2)
assert np.array_equal(stacked_axis1, expected_axis1)

# np.hstack (concatenate along columns)
hstacked = np.hstack([a, b])   # shape (6,)
expected = np.array([1,2,3,4,5,6])
assert np.array_equal(hstacked, expected)

# np.dstack (stack along depth / third axis)
c = np.array([7,8,9])
dstacked = np.dstack([a, b, c])  # shape (1,3,3)
expected = np.array([[[1,4,7],
                      [2,5,8],
                      [3,6,9]]])
assert np.array_equal(dstacked, expected)

Just to note that np.vstack is a shorthand for vertical stacking like np.concatenate(..., axis=0). np.stack lets you choose any axis so it’s more general.

Performance

  • Vectoization - use array ops to loops

  • use where for conditional element selection np.where(X > 5, 1, 0)  # Replace with 1 if >5 else 0

Missing data

import numpy as np

# Example array with NaN and Inf
arr = np.array([1.0, 2.0, np.nan, np.inf, -np.inf, 3.0])

# Count NaNs
assert np.isnan(arr).sum() == 1   # only one np.nan

# Count Infs
assert np.isinf(arr).sum() == 2   # +inf and -inf

# Mean ignoring NaNs
arr2 = np.array([1.0, 2.0, np.nan, 3.0])
assert np.nanmean(arr2) == 2.0    # (1+2+3)/3

# Replace NaN/Inf with finite values
cleaned = np.nan_to_num(arr, nan=0.0, posinf=999.0, neginf=-999.0)
expected = np.array([1.0, 2.0, 0.0, 999.0, -999.0, 3.0])
assert np.array_equal(cleaned, expected)

Other useful stuff

Random geneator

# Uniform [0,1)
arr1 = np.random.rand(3, 2)
assert arr1.shape == (3, 2)
assert np.all((arr1 >= 0) & (arr1 < 1))

# Standard normal (mean ≈ 0, std ≈ 1, but here just shape check)
arr2 = np.random.randn(3, 2)
assert arr2.shape == (3, 2)
# Values can be any real number, so no bound check

# Random integers between 0 and 9
arr3 = np.random.randint(0, 10, (2, 3))
assert arr3.shape == (2, 3)
assert np.all((arr3 >= 0) & (arr3 < 10))

# Sampling with replacement
arr4 = np.random.choice([1, 2, 3], size=5, replace=True)
assert arr4.shape == (5,)
assert np.all(np.isin(arr4, [1, 2, 3]))