-
Notifications
You must be signed in to change notification settings - Fork 0
/
LDA
74 lines (61 loc) · 2.14 KB
/
LDA
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#coding:utf-8
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
'''
LDA(Linear Discriminant Analysis)线性判别分析:降维后组间类别中心距离最大(类间散度矩阵),组内协方差最小(类内散度矩阵)
LDA是有监督的降维方法,利用了类别信息
应用公式:瑞利商(Rayleigh quotient)函数
https://www.cnblogs.com/pinard/p/6244265.html
降维时依赖样本均值,PCA降维依赖样本方差
'''
def lda(data, target, n_dim):
'''
:param data: (n_samples, n_features)
:param target: data class
:param n_dim: target dimension
:return: (n_samples, n_dims)
'''
clusters = np.unique(target)
if n_dim > len(clusters)-1:
print("K is too much")
print("please input again")
exit(0)
#within_class scatter matrix
Sw = np.zeros((data.shape[1],data.shape[1]))
for i in clusters:
datai = data[target == i]
datai = datai-datai.mean(0)
Swi = np.mat(datai).T*np.mat(datai)
Sw += Swi
#between_class scatter matrix
SB = np.zeros((data.shape[1],data.shape[1]))
u = data.mean(0) #所有样本的平均值
for i in clusters:
Ni = data[target == i].shape[0]
ui = data[target == i].mean(0) #某个类别的平均值
SBi = Ni*np.mat(ui - u).T*np.mat(ui - u)
SB += SBi
S = np.linalg.inv(Sw)*SB
eigVals,eigVects = np.linalg.eig(S) #求特征值,特征向量
eigValInd = np.argsort(eigVals)
eigValInd = eigValInd[:(-n_dim-1):-1]
w = eigVects[:,eigValInd]
data_ndim = np.dot(data, w)
return data_ndim
if __name__ == '__main__':
iris = load_iris()
X = iris.data
Y = iris.target
data_1 = lda(X, Y, 2)
data_2 = LinearDiscriminantAnalysis(n_components=2).fit_transform(X, Y)
plt.figure(figsize=(8,4))
plt.subplot(121)
plt.title("my_LDA")
plt.scatter(data_1[:, 0], data_1[:, 1], c = Y)
plt.subplot(122)
plt.title("sklearn_LDA")
plt.scatter(data_2[:, 0], data_2[:, 1], c = Y)
plt.savefig("LDA.png")
plt.show()