import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('seaborn-talk')
from sklearn.datasets import load_iris
from scipy import linalg
iris = load_iris()
iris.data[:5]
from sklearn.preprocessing import StandardScaler
X_scaled = StandardScaler().fit_transform(iris.data)
X_scaled[:5]
from sklearn.decomposition import PCA
X_pca2 = PCA(n_components=2).fit_transform(X_scaled)
fig, ax = plt.subplots(figsize=(12, 9))
ax.scatter(X_pca2[:, 0], X_pca2[:, 1], c=iris.target)
X_pca2[:5]
features = X_scaled.T
cov_matrix = np.cov(features)
cov_matrix[:5]
values, vectors = np.linalg.eig(cov_matrix)
values[:5]
explained_variances = []
for i in range(len(values)):
explained_variances.append(values[i] / np.sum(values))
print(np.sum(explained_variances), ‘\n’, explained_variances)
projected_1 = X_scaled.dot(vectors.T[0])
projected_2 = X_scaled.dot(vectors.T[1])res = pd.DataFrame(projected_1, columns=[‘PC1’])
res[‘PC2’] = projected_2
res[‘Y’] = y
res.head()
import matplotlib.pyplot as plt
import seaborn as snsplt.figure(figsize=(20, 10))
sns.scatterplot(res[‘PC1’], [0] * len(res), hue=res[‘Y’], s=200)
plt.figure(figsize=(20, 10))
sns.scatterplot(res[‘PC1’], [0] * len(res), hue=res[‘Y’], s=100)
pca = PCA()
pca.fit_transform(iris.data)
pca.components_, pca.explained_variance_ratio_
M = iris.data.T@iris.data
M
S, V, D = linalg.svd(M, full_matrices=False)
S, V, D
#S.shape, V.shape, D.shape
#S@np.eye(len(V))*V@D
P, L, U = linalg.lu(M)
print(P)
print(L)
print(U)
L@U
D = np.diag(U)
U_ = U / D[:, np.newaxis]
D = np.eye(len(D))*D
L@D@U_
L_norm = np.sqrt((L**2).sum(axis=1))[:, np.newaxis]
L_norm
X = iris.data - iris.data.mean(axis=0)
(X.T@X) / np.cov(X.T)
linalg.eig(np.cov(X.T))
val, vec = linalg.eig(X.T@X)
val, vec
val / val.sum()
A = X.T@X
A
Q, R = np.linalg.qr(A)
Q@R
B.sum(axis=0)
#np.sqrt((B**2).sum(axis=0))
C = B.copy()
C[abs(C) < 0.00001] = 0
np.diag(C)
eigenvalue to eigenvector: https://en.wikipedia.org/wiki/Inverse_iteration
identities: https://arxiv.org/abs/1908.03795
mu = np.diag(C)[0]
mu
b = np.random.rand(4)
for i in range(20):
part = np.linalg.inv(A - np.eye(len(b))*mu)@b
b = part/np.sqrt((part**2).sum())
#if i % 10 == 0:
# print(b)
print(b)
B / Q@R
QR decomposition: https://en.wikipedia.org/wiki/QR_decomposition
B = A.copy()
for i in range(20):
print(B)
Q, R = np.linalg.qr(B)
B = R@Q