import numpy as np
import scipy.linalg
np.random.seed(1)
"""
Equation:
3x + 6y + 5z = 30
4x - 2y + 1z = 3
2x + 4y - 3z = 1
[3 6 5][x] [30]
[4 -2 1][y] = [ 3]
[2 4 -3][z] [ 1]
↑ ↑ ↑
A x = b
"""
A = [[3, 6, 5],
[4, -2, 1],
[2, 4, -3]]
b = [30, 3, 1]
print("[x, y, z] = ", np.linalg.solve(A, b))
[x, y, z] = [1. 2. 3.]
combined = np.c_[A, b]
print(scipy.linalg.lu(combined)[-1])
[[ 4. -2. 1. 3. ] [ 0. 7.5 4.25 27.75 ] [ 0. 0. -6.33333333 -19. ]]
Multiplication is not commutative. The multiplicative identity $I$ satisfies $A = AI = IA$, it's essentially the number "1" but for matrices. The inverse, $A^{-1}$, of $A$ satisfies $AA^{-1} = A^{-1}A = I$. In other words, right/left multiplication by the inverse is like right/left division.
A = [[1, 2],
[3, 4]]
B = [[2, 3],
[5, 7]]
A = np.array(A)
B = np.array(B)
A_inv = np.linalg.inv(A)
with np.printoptions(precision=3, suppress=True):
print("AB =\n", A @ B, "\n")
print("BA =\n", B @ A, "\n")
print("A⁻¹ =\n", A_inv, "\n")
print("A⁻¹A = AA⁻¹ =\n", A_inv @ A)
AB = [[12 17] [26 37]] BA = [[11 16] [26 38]] A⁻¹ = [[-2. 1. ] [ 1.5 -0.5]] A⁻¹A = AA⁻¹ = [[1. 0.] [0. 1.]]
Using the finite difference matrix $$A = \begin{bmatrix} -2&1&0&0&\cdots&0\\ 1&-2&1&0&\cdots&0\\ 0&1&-2&1&\cdots&0\\ 0&0&1&-2&\cdots&0\\ \vdots&\vdots&\vdots&\vdots&\ddots&\vdots\\ 0&0&0&0&\cdots&-2\\ \end{bmatrix}$$ which encodes the one-dimensional second derivative.
n = 5
A = np.zeros((n, n))
A += np.diag(-2 * np.ones(n))
A += np.diag(np.ones(n-1), -1)
A += np.diag(np.ones(n-1), 1)
def eigenvalues(A):
Q, R = np.linalg.qr(A)
while not np.allclose(A, np.triu(A)):
A = R @ Q
Q, R = np.linalg.qr(A)
return np.diag(A)
I = np.identity(n)
for eig in eigenvalues(A):
print(f"λ = {eig:.3f} -> |A-λI| = {np.linalg.det(A-eig*I) : .3f}")
λ = -3.732 -> |A-λI| = 0.000 λ = -3.000 -> |A-λI| = 0.000 λ = -2.000 -> |A-λI| = -0.000 λ = -1.000 -> |A-λI| = 0.000 λ = -0.268 -> |A-λI| = 0.000
# P(x) = x^4 - 10x^3 + 35x^2 - 50x + 24 = (x-1)(x-2)(x-3)(x-4)
P = np.array([-10, 35, -50, 24][::-1])
A = np.zeros((len(P), len(P)))
A += np.diag(np.ones(len(P)-1), 1)
A[-1] = -P
print("Roots of x^4 - 10x^3 + 35x^2 - 50x + 24:")
with np.printoptions(precision=5):
print(eigenvalues(A))
Roots of x^4 - 10x^3 + 35x^2 - 50x + 24: [4. 3. 2. 1.]
Finding a root of $$f(x) = \begin{bmatrix}x^2-y^2\\x^2-z^2\\ 1 + yz\end{bmatrix}$$
def f(x, y, z):
return np.array([x**2 - y**2, x**2-z**2, 1 + y*z])
def Jacobian(x, y, z):
return np.array([[2*x, -2*y, 0],
[2*x, 0, -2*z],
[0, z, y]])
x, y, z = 1, 2, 3
print(f"x, y, z = {x}, {y}, {z}", "\n")
print("f(x, y, z) = ", f(x, y, z), "\n")
print("J(f) (x, y, z) = \n", Jacobian(x, y, z))
x, y, z = 1, 2, 3 f(x, y, z) = [-3 -8 7] J(f) (x, y, z) = [[ 2 -4 0] [ 2 0 -6] [ 0 3 2]]
def Newton(f, threshold=1e-5):
x, y, z = 1 + np.random.random(3) # Choose random 1 < x, y, z < 2
while np.linalg.norm(f(x, y, z)) > threshold:
J = Jacobian(x, y, z)
dx, dy, dz = np.linalg.solve(J, f(x, y, z))
x -= dx; y -= dy; z -= dz;
return np.array([x, y, z])
with np.printoptions(precision=3, suppress=True):
print("x, y, z = ", Newton(f))
x, y, z = [-1. 1. -1.]
Approximates the Jacobian using previous iterations: $$J_n\Delta x_n \approx \Delta f_n.$$ However, this isn't enough information to compute $J_n$.
Example: $$f = \begin{bmatrix}x+y\\x-y\end{bmatrix},$$ and $$\Delta x = \Delta y = 1\implies \Delta f = \begin{bmatrix}2\\0\end{bmatrix}.$$ Both $$\begin{bmatrix}2&0\\0&0\end{bmatrix},\text{ and }\begin{bmatrix}1&1\\1&-1\end{bmatrix}$$ could be $J$.
Use additional condition: For any vector $y$ perpendicular to $\Delta x_n$, we force $J_ny = J_{n-1}y,$ which means we're only modifying $J$ in directions we have new information on. This gives $$J_n = J_{n-1} + (\dots)\Delta x_n^T,$$ and plugging back into the first condition yields $$J_n = J_{n-1} + \frac{\Delta f_n - J_{n-1}\Delta x_n}{\Delta x_n^T \Delta x_n}\Delta x_n^T.$$ If we instead work with the inverse of the Jacobian, we get $$J_n^{-1} = J_{n-1}^{-1} + \frac{\Delta x_n - J_{n-1}^{-1}\Delta f_n}{\Delta f_n^T \Delta f_n}\Delta f_n^T.$$
def Broyden(f, threshold=1e-5):
x, y, z = np.ones(3)
J_inv = np.identity(3)
f_curr = f(x, y, z)
while np.linalg.norm(f(x, y, z)) > threshold:
delta = -J_inv @ f_curr
dx, dy, dz = delta
x += dx; y += dy; z += dz;
f_prev, f_curr = f_curr, f(x, y, z)
df = f_curr - f_prev
J_inv += np.outer((delta - J_inv @ df) / (df @ df), df)
return np.array([x, y, z])
with np.printoptions(precision=3, suppress=True):
print("x, y, z = ", Broyden(f))
x, y, z = [ 1. 1. -1.]