Mathematical Foundations of AI & ML
Unit 2: Linear Algebra for Machine Learning
FAU Erlangen-Nürnberg
By the end of this lecture, you will be able to:
Geometric Plot

//| panel: input
viewof a11 = Inputs.range([-2, 2], {value: 1, step: 0.1, label: "A11"})
viewof a12 = Inputs.range([-2, 2], {value: 0, step: 0.1, label: "A12"})
viewof a21 = Inputs.range([-2, 2], {value: 0, step: 0.1, label: "A21"})
viewof a22 = Inputs.range([-2, 2], {value: 1, step: 0.1, label: "A22"})//| fig-align: center
Plot.plot({
grid: true,
x: {domain: [-3, 3]},
y: {domain: [-3, 3]},
aspectRatio: 1,
marks: [
Plot.ruleX([0]),
Plot.ruleY([0]),
Plot.line(origSq, {x: "x", y: "y", stroke: "lightgray", strokeWidth: 2}),
Plot.line(transSq, {x: "x", y: "y", stroke: "dodgerblue", strokeWidth: 3})
]
})Matrix \(\mathbf{A}\): \(\begin{bmatrix} 1 & 2 \\ 2 & d \end{bmatrix}\)
When d=4, the matrix is Singular. The output space squashes to a 1D line!
//| output: false
ptsGridNull = {
let res = [];
for(let i=-3; i<=3; i+=0.5) {
for(let j=-3; j<=3; j+=0.5) {
res.push({x0: i, y0: j});
}
}
return res;
}
mappedNull = ptsGridNull.map(p => ({
x: 1 * p.x0 + 2 * p.y0,
y: 2 * p.x0 + dVal * p.y0,
is_null: Math.abs(1 * p.x0 + 2 * p.y0) < 0.1 && Math.abs(2 * p.x0 + dVal * p.y0) < 0.1
}))Move \(\mathbf{y}\) (blue) and watch its projection \(\hat{\mathbf{y}}\) (green) drop orthogonally (red dashed) onto the subspace (gray).
//| fig-align: center
Plot.plot({
grid: true, x: {domain: [-5, 5]}, y: {domain: [-5, 5]}, aspectRatio: 1,
marks: [
Plot.ruleX([0]), Plot.ruleY([0]),
Plot.line([[-6*Math.cos(thetaProj), -6*Math.sin(thetaProj)], [6*Math.cos(thetaProj), 6*Math.sin(thetaProj)]], {stroke: "lightgray", strokeWidth: 4}),
Plot.arrow([{x1: 0, y1: 0, x2: yProj.x, y2: yProj.y}], {x1: "x1", y1: "y1", x2: "x2", y2: "y2", stroke: "green", strokeWidth: 4}),
Plot.arrow([{x1: 0, y1: 0, x2: y1, y2: y2}], {x1: "x1", y1: "y1", x2: "x2", y2: "y2", stroke: "blue", strokeWidth: 2}),
Plot.line([[yProj.x, yProj.y], [y1, y2]], {stroke: "red", strokeDasharray: "4", strokeWidth: 2}),
Plot.text([{x: y1, y: y2+0.5, text: "y"}], {x: "x", y: "y", text: "text", fill: "blue"}),
Plot.text([{x: yProj.x, y: yProj.y-0.5, text: "y_hat"}], {x: "x", y: "y", text: "text", fill: "green"})
]
})//| fig-align: center
Plot.plot({
grid: true, x: {domain: [-4, 4]}, y: {domain: [-4, 4]}, aspectRatio: 1,
marks: [
Plot.ruleX([0]), Plot.ruleY([0]),
Plot.line(circlePts, {x: "x", y: "y", stroke: "gray", strokeDasharray: "4"}),
Plot.line(ellipsePts, {x: "x", y: "y", stroke: "dodgerblue", strokeWidth: 2}),
Plot.arrow(eigenVectors, {x1: 0, y1: 0, x2: "x", y2: "y", stroke: "red", strokeWidth: 3})
]
})//| output: false
circlePts = Array.from({length: 100}, (_, i) => {
let t = i * 2 * Math.PI / 99; return {x: Math.cos(t), y: Math.sin(t)};
});
ellipsePts = circlePts.map(p => ({
x: s11*p.x + s12*p.y, y: s12*p.x + s22*p.y
}));
eigenVectors = {
let T = s11 + s22; let D = s11*s22 - s12*s12;
let L1 = T/2 + Math.sqrt(Math.max(0, T*T/4 - D));
let L2 = T/2 - Math.sqrt(Math.max(0, T*T/4 - D));
if (Math.abs(s12) < 0.001) return [{x: L1, y: 0}, {x: 0, y: L2}];
let v1x = L1 - s22, v1y = s12;
let n1 = Math.sqrt(v1x*v1x + v1y*v1y) || 1;
let v2x = L2 - s22, v2y = s12;
let n2 = Math.sqrt(v2x*v2x + v2y*v2y) || 1;
return [
{x: (v1x/n1)*L1, y: (v1y/n1)*L1},
{x: (v2x/n2)*L2, y: (v2y/n2)*L2}
];
}//| output: false
validCov = Math.max(-Math.sqrt(varX*varY)*0.99, Math.min(Math.sqrt(varX*varY)*0.99, covXY));
randomData = {
let pts = [];
let L11 = Math.sqrt(varX);
let L21 = validCov / L11;
let L22 = Math.sqrt(Math.max(0, varY - L21*L21));
for(let i=0; i<400; i++) {
let u1 = Math.random(), u2 = Math.random();
let z0 = Math.sqrt(-2.0 * Math.log(u1)) * Math.cos(2.0 * Math.PI * u2);
let z1 = Math.sqrt(-2.0 * Math.log(u1)) * Math.sin(2.0 * Math.PI * u2);
pts.push({x: L11*z0, y: L21*z0 + L22*z1});
}
return pts;
}
covarianceEllipse = {
let pts = [];
let T = varX + varY; let D = varX*varY - validCov*validCov;
let L1 = T/2 + Math.sqrt(Math.max(0, T*T/4 - D));
let L2 = T/2 - Math.sqrt(Math.max(0, T*T/4 - D));
let v1x = L1 - varY, v1y = validCov;
let n1 = Math.sqrt(v1x*v1x + v1y*v1y) || 1; v1x/=n1; v1y/=n1;
let v2x = L2 - varY, v2y = validCov;
let n2 = Math.sqrt(v2x*v2x + v2y*v2y) || 1; v2x/=n2; v2y/=n2;
for(let i=0; i<=100; i++) {
let t = i * 2 * Math.PI / 100;
let c = Math.cos(t), s = Math.sin(t);
// Multiply by 2.0 to show the 2-sigma ellipse
pts.push({
x: 2.0 * (Math.sqrt(L1)*v1x*c + Math.sqrt(L2)*v2x*s),
y: 2.0 * (Math.sqrt(L1)*v1y*c + Math.sqrt(L2)*v2y*s)
});
}
return pts;
}Projected Variance:
Rotate the line (gray). Watch the variance of the projected points (red) change! It peaks when aligned with the principal component.
//| fig-align: center
Plot.plot({
grid: true, x: {domain: [-6, 6]}, y: {domain: [-6, 6]}, aspectRatio: 1,
marks: [
Plot.ruleX([0]), Plot.ruleY([0]),
Plot.link(pcaLinkData, {x1: "x1", y1: "y1", x2: "x2", y2: "y2", stroke: "pink", strokeWidth: 1}),
Plot.dot(pcaData, {x: "x", y: "y", fill: "lightgray", r: 3}),
Plot.line([[-7*Math.cos(pcaTheta), -7*Math.sin(pcaTheta)], [7*Math.cos(pcaTheta), 7*Math.sin(pcaTheta)]], {stroke: "gray", strokeWidth: 2}),
Plot.dot(pcaProjData, {x: "x", y: "y", fill: "red", r: 4})
]
})//| output: false
pcaData = {
let pts = [];
let ang = Math.PI / 6;
let s_major = 3.0, s_minor = 0.5;
for (let i = 0; i < 150; i++) {
let u1 = Math.max(0.0001, (Math.sin(i * 123.456) + 1)/2);
let u2 = Math.max(0.0001, (Math.cos(i * 789.123) + 1)/2);
let z0 = Math.sqrt(-2.0 * Math.log(u1)) * Math.cos(2.0 * Math.PI * u2);
let z1 = Math.sqrt(-2.0 * Math.log(u1)) * Math.sin(2.0 * Math.PI * u2);
let x_raw = s_major * z0; let y_raw = s_minor * z1;
pts.push({ x: x_raw * Math.cos(ang) - y_raw * Math.sin(ang), y: x_raw * Math.sin(ang) + y_raw * Math.cos(ang) });
}
return pts;
}
pcaProjData = pcaData.map(p => {
let c = Math.cos(pcaTheta), s = Math.sin(pcaTheta);
let proj = p.x * c + p.y * s;
return {x: proj * c, y: proj * s, proj_val: proj};
})
pcaLinkData = pcaData.map((p, i) => ({
x1: p.x, y1: p.y, x2: pcaProjData[i].x, y2: pcaProjData[i].y
}))
pcaVar = d3.variance(pcaProjData, d => d.proj_val)%%| echo: false
%%| fig-align: center
%%{init: {'theme': 'dark', 'themeVariables': { 'darkMode': true, 'background': 'transparent' }}}%%
graph LR
X["X"] --> Eq["="]
Eq["="] --> U["U"]
U["U"] --> Sigma["Σ"]
Sigma["Σ"] --> VT["V^T"]
subgraph Dimensions
X---D1["N x D"]
U---D2["N x N"]
Sigma---D3["N x D"]
VT---D4["D x D"]
end
style Dimensions fill:#1e1e1e,stroke:#555,stroke-width:2px,color:#fff

Increase C (weaker regularization, \(\lambda \to 0\)) to see the constraint regions expand to meet the unconstrained optimum (gray ellipses).
//| fig-align: center
Plot.plot({
grid: true, x: {domain: [-2, 3]}, y: {domain: [-2, 3]}, aspectRatio: 1,
marks: [
Plot.ruleX([0]), Plot.ruleY([0]),
Plot.line(l2Boundary, {x: "x", y: "y", stroke: "blue", strokeWidth: 2}),
Plot.line(l1Boundary, {x: "x", y: "y", stroke: "red", strokeWidth: 2}),
Plot.dot([{x: 1.5, y: 1.0}], {x: "x", y: "y", r: 4, fill: "black"}),
Plot.text([{x: 1.5, y: 1.0, text: "w* (OLS)"}], {x: "x", y: "y", text: "text", dy: -10}),
Plot.line(lossContour1, {x: "x", y: "y", stroke: "gray"}),
Plot.line(lossContour2, {x: "x", y: "y", stroke: "gray"}),
Plot.line(lossContour3, {x: "x", y: "y", stroke: "gray"})
]
})//| output: false
l2Boundary = Array.from({length: 100}, (_, i) => {
let t = i * 2 * Math.PI / 99; return {x: constraintC * Math.cos(t), y: constraintC * Math.sin(t)};
});
l1Boundary = [
{x: constraintC, y: 0}, {x: 0, y: constraintC}, {x: -constraintC, y: 0},
{x: 0, y: -constraintC}, {x: constraintC, y: 0}
];
makeContour = (r) => {
return Array.from({length: 100}, (_, i) => {
let t = i * 2 * Math.PI / 99;
return {
x: 1.5 + r * 1.5 * Math.cos(t) + r * 0.5 * Math.sin(t),
y: 1.0 + r * 0.5 * Math.cos(t) + r * 0.8 * Math.sin(t)
};
});
};
lossContour1 = makeContour(0.4);
lossContour2 = makeContour(0.8);
lossContour3 = makeContour(1.3);import numpy as np
import pandas as pd
from scipy.linalg import svd
# 1. Collect process data (N batches, D sensors)
data = {
'Temp_Zone1': [1050, 1052, 1048, 1055],
'Pressure': [2.1, 2.15, 2.05, 2.2],
'Ni_content': [0.45, 0.44, 0.46, 0.45],
}
df = pd.DataFrame(data)
# 2. Form the Process Matrix X and center it
X = df.values
X_centered = X - np.mean(X, axis=0)
# 3. SVD -> principal directions of variation
U, Sigma, Vt = svd(X_centered, full_matrices=False)
# 4. Interpret the result
explained = Sigma**2 / np.sum(Sigma**2)
print("Singular values: ", np.round(Sigma, 3))
print("Explained variance: ", np.round(explained, 3))
print("Primary direction: ", np.round(Vt[0], 2))
# 5. Rank-1 approximation = "stable processing" subspace
X_rank1 = U[:, :1] @ np.diag(Sigma[:1]) @ Vt[:1, :]#| echo: true
#| eval: false
import numpy as np
from sklearn.decomposition import PCA
# X is our dataset of N flattened TEM images: shape (N, 1048576)
X = np.load("tem_images.npy")
# Fit PCA and extract the top 50 components
pca = PCA(n_components=50)
X_reduced = pca.fit_transform(X)
# Check how much morphological variance is captured
var_explained = np.sum(pca.explained_variance_ratio_)
print(f"Top 50 components explain {var_explained * 100:.1f}% of variance.")
# The 'eigen-microstructures' are the principal components
eigen_microstructures = pca.components_ # shape (50, 1048576)#| echo: true
#| eval: false
import numpy as np
# Basis matrix: columns are pure phase spectra (e.g., Austenite, Martensite)
# shape: (n_angles, n_phases)
X_basis = np.load("pure_phases_basis.npy")
# Mixed measured XRD spectrum vector
# shape: (n_angles,)
y_mixed = np.load("mixed_spectrum.npy")
# Solve Xw = y using Ordinary Least Squares
# Finds fractions 'w' that minimize reconstruction error ||Xw - y||^2
fractions, residuals, rank, s = np.linalg.lstsq(X_basis, y_mixed, rcond=None)
# Normalize the fractions to sum to 100%
volume_fractions = fractions / np.sum(fractions)
print(f"Austenite: {volume_fractions[0]*100:.1f}%")
print(f"Martensite: {volume_fractions[1]*100:.1f}%")
# The reconstructed spectrum is the projection onto the basis span
y_reconstructed = X_basis @ fractions#| echo: true
#| eval: false
from sklearn.decomposition import NMF
# X_mixed: rows are different sample spots, columns are XRD angles
# We suspect there are 2 pure phases (components)
nmf = NMF(n_components=2, init='nndsvd', random_state=42)
# W: The discovered physical concentrations (N samples x 2 phases)
W = nmf.fit_transform(X_mixed)
# H: The discovered pure phase spectra (2 phases x D angles)
H = nmf.components_