-
Notifications
You must be signed in to change notification settings - Fork 1
/
LandmarkNystroem.py
188 lines (148 loc) · 5.61 KB
/
LandmarkNystroem.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
"""
The :mod:`sklearn.kernel_approximation` module implements several
approximate kernel feature maps base on Fourier transforms.
"""
# Author: Andreas Mueller <[email protected]>
#
# License: BSD 3 clause
import warnings
import numpy as np
import scipy.sparse as sp
from scipy.linalg import svd
from sklearn.base import BaseEstimator
from sklearn.base import TransformerMixin
from sklearn.utils import check_array, check_random_state, as_float_array
from sklearn.utils.extmath import safe_sparse_dot
from sklearn.utils.validation import check_is_fitted
from sklearn.metrics.pairwise import pairwise_kernels
import collections
class Nystroem(BaseEstimator, TransformerMixin):
"""Approximate a kernel map using a subset of the training data.
Constructs an approximate feature map for an arbitrary kernel
using a subset of the data as basis.
Parameters
----------
kernel : string or callable, default="rbf"
Kernel map to be approximated. A callable should accept two arguments
and the keyword arguments passed to this object as kernel_params, and
should return a floating point number.
n_components : int
Number of features to construct.
How many data points will be used to construct the mapping.
gamma : float, default=None
Gamma parameter for the RBF, polynomial, exponential chi2 and
sigmoid kernels. Interpretation of the default value is left to
the kernel; see the documentation for sklearn.metrics.pairwise.
Ignored by other kernels.
degree : float, default=3
Degree of the polynomial kernel. Ignored by other kernels.
coef0 : float, default=1
Zero coefficient for polynomial and sigmoid kernels.
Ignored by other kernels.
kernel_params : mapping of string to any, optional
Additional parameters (keyword arguments) for kernel function passed
as callable object.
random_state : {int, RandomState}, optional
If int, random_state is the seed used by the random number generator;
if RandomState instance, random_state is the random number generator.
Attributes
----------
components_ : array, shape (n_components, n_features)
Subset of training points used to construct the feature map.
component_indices_ : array, shape (n_components)
Indices of ``components_`` in the training set.
normalization_ : array, shape (n_components, n_components)
Normalization matrix needed for embedding.
Square root of the kernel matrix on ``components_``.
References
----------
* Williams, C.K.I. and Seeger, M.
"Using the Nystroem method to speed up kernel machines",
Advances in neural information processing systems 2001
* T. Yang, Y. Li, M. Mahdavi, R. Jin and Z. Zhou
"Nystroem Method vs Random Fourier Features: A Theoretical and Empirical
Comparison",
Advances in Neural Information Processing Systems 2012
See also
--------
RBFSampler : An approximation to the RBF kernel using random Fourier
features.
sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.
"""
def __init__(self, basis = None, kernel="rbf", gamma=None, coef0=1, degree=3, kernel_params=None, n_components=100, random_state=None):
self.kernel = kernel
self.gamma = gamma
self.coef0 = coef0
self.degree = degree
self.kernel_params = kernel_params
self.n_components = n_components
self.random_state = random_state
self.basis = basis
def fit(self, X, y=None):
"""Fit estimator to data.
Samples a subset of training points, computes kernel
on these and computes normalization matrix.
Parameters
----------
X : array-like, shape=(n_samples, n_feature)
Training data.
"""
X = check_array(X, accept_sparse='csr')
rnd = check_random_state(self.random_state)
n_samples = X.shape[0]
# get basis vectors
basis = self.basis
print("Here's what goes into landmarknystroem:")
print((np.shape(X)))
print((np.shape(basis)))
if basis is not None:
if self.n_components > n_samples:
# XXX should we just bail?
n_components = n_samples
else:
n_components = self.n_components
n_components = min(n_samples, n_components)
inds = rnd.permutation(n_samples)
basis_inds = inds[:n_components]
basis = X[basis_inds]
self.components_ = basis
self.component_indices_ = inds
else:
n_components = self.n_components
self.components_ = basis
basis_kernel = pairwise_kernels(basis, metric=self.kernel, filter_params=True,**self._get_kernel_params())
# sqrt of kernel matrix on basis vectors
U, S, V = svd(basis_kernel)
S = np.maximum(S, 1e-12)
self.normalization_ = np.dot(U * 1. / np.sqrt(S), V)
return self
def transform(self, X):
"""Apply feature map to X.
Computes an approximate feature map using the kernel
between some training points and X.
Parameters
----------
X : array-like, shape=(n_samples, n_features)
Data to transform.
Returns
-------
X_transformed : array, shape=(n_samples, n_components)
Transformed data.
"""
check_is_fitted(self, 'components_')
X = check_array(X, accept_sparse='csr')
kernel_params = self._get_kernel_params()
embedded = pairwise_kernels(X, self.components_,
metric=self.kernel,
filter_params=True,
**kernel_params)
return np.dot(embedded, self.normalization_.T)
def _get_kernel_params(self):
params = self.kernel_params
if params is None:
params = {}
if not isinstance(self.kernel, collections.Callable):
params['gamma'] = self.gamma
params['degree'] = self.degree
params['coef0'] = self.coef0
return params