SHOGUN  3.2.1
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义  
PCA.cpp
浏览该文件的文档.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2008 Gunnar Raetsch
8  * Written (W) 1999-2008,2011 Soeren Sonnenburg
9  * Written (W) 2014 Parijat Mazumdar
10  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
11  * Copyright (C) 2011 Berlin Institute of Technology
12  */
13 #include <shogun/lib/config.h>
14 
15 #ifdef HAVE_EIGEN3
18 #include <string.h>
19 #include <stdlib.h>
20 #include <shogun/lib/common.h>
23 #include <shogun/io/SGIO.h>
25 
26 using namespace shogun;
27 using namespace Eigen;
28 
29 CPCA::CPCA(bool do_whitening, EPCAMode mode, float64_t thresh, EPCAMethod method, EPCAMemoryMode mem_mode)
31 {
32  init();
33  m_whitening = do_whitening;
34  m_mode = mode;
35  m_thresh = thresh;
36  m_mem_mode = mem_mode;
37  m_method = method;
38 }
39 
40 CPCA::CPCA(EPCAMethod method, bool do_whitening, EPCAMemoryMode mem_mode)
42 {
43  init();
44  m_whitening = do_whitening;
45  m_mem_mode = mem_mode;
46  m_method = method;
47 }
48 
49 void CPCA::init()
50 {
54  num_dim = 0;
55  m_initialized = false;
56  m_whitening = false;
58  m_thresh = 1e-6;
60  m_method = AUTO;
61 
62  SG_ADD(&m_transformation_matrix, "transformation_matrix",
63  "Transformation matrix (Eigenvectors of covariance matrix).",
65  SG_ADD(&m_mean_vector, "mean_vector", "Mean Vector.", MS_NOT_AVAILABLE);
66  SG_ADD(&m_eigenvalues_vector, "eigenvalues_vector",
67  "Vector with Eigenvalues.", MS_NOT_AVAILABLE);
68  SG_ADD(&m_initialized, "initalized", "True when initialized.",
70  SG_ADD(&m_whitening, "whitening", "Whether data shall be whitened.",
71  MS_AVAILABLE);
72  SG_ADD((machine_int_t*) &m_mode, "mode", "PCA Mode.", MS_AVAILABLE);
73  SG_ADD(&m_thresh, "m_thresh", "Cutoff threshold.", MS_AVAILABLE);
74  SG_ADD((machine_int_t*) &m_mem_mode, "m_mem_mode",
75  "Memory mode (in-place or reallocation).", MS_NOT_AVAILABLE);
76  SG_ADD((machine_int_t*) &m_method, "m_method",
77  "Method used for PCA calculation", MS_NOT_AVAILABLE);
78 }
79 
81 {
82 }
83 
84 bool CPCA::init(CFeatures* features)
85 {
86  if (!m_initialized)
87  {
88  REQUIRE(features->get_feature_class()==C_DENSE, "PCA only works with dense features")
89  REQUIRE(features->get_feature_type()==F_DREAL, "PCA only works with real features")
90 
91  SGMatrix<float64_t> feature_matrix = ((CDenseFeatures<float64_t>*)features)
92  ->get_feature_matrix();
93  int32_t num_vectors = feature_matrix.num_cols;
94  int32_t num_features = feature_matrix.num_rows;
95  SG_INFO("num_examples: %ld num_features: %ld \n", num_vectors, num_features)
96 
97  // max target dim allowed
98  int32_t max_dim_allowed = CMath::min(num_vectors, num_features);
99  num_dim=0;
100 
101  REQUIRE(m_target_dim<=max_dim_allowed,
102  "target dimension should be less or equal to than minimum of N and D")
103 
104  // center data
105  Map<MatrixXd> fmatrix(feature_matrix.matrix, num_features, num_vectors);
106  m_mean_vector = SGVector<float64_t>(num_features);
107  Map<VectorXd> data_mean(m_mean_vector.vector, num_features);
108  data_mean = fmatrix.rowwise().sum()/(float64_t) num_vectors;
109  fmatrix = fmatrix.colwise()-data_mean;
110 
111  m_eigenvalues_vector = SGVector<float64_t>(max_dim_allowed);
112  Map<VectorXd> eigenValues(m_eigenvalues_vector.vector, max_dim_allowed);
113 
114  if (m_method == AUTO)
115  m_method = (num_vectors>num_features) ? EVD : SVD;
116 
117  if (m_method == EVD)
118  {
119  // covariance matrix
120  MatrixXd cov_mat(num_features, num_features);
121  cov_mat = fmatrix*fmatrix.transpose();
122  cov_mat /= (num_vectors-1);
123 
124  SG_INFO("Computing Eigenvalues ... ")
125  // eigen value computed
126  SelfAdjointEigenSolver<MatrixXd> eigenSolve =
127  SelfAdjointEigenSolver<MatrixXd>(cov_mat);
128  eigenValues = eigenSolve.eigenvalues().tail(max_dim_allowed);
129 
130  // target dimension
131  switch (m_mode)
132  {
133  case FIXED_NUMBER :
135  break;
136 
137  case VARIANCE_EXPLAINED :
138  {
139  float64_t eig_sum = eigenValues.sum();
140  float64_t com_sum = 0;
141  for (int32_t i=num_features-1; i<-1; i++)
142  {
143  num_dim++;
144  com_sum += m_eigenvalues_vector.vector[i];
145  if (com_sum/eig_sum>=m_thresh)
146  break;
147  }
148  }
149  break;
150 
151  case THRESHOLD :
152  for (int32_t i=num_features-1; i<-1; i++)
153  {
155  num_dim++;
156  else
157  break;
158  }
159  break;
160  };
161  SG_INFO("Done\nReducing from %i to %i features..", num_features, num_dim)
162 
164  Map<MatrixXd> transformMatrix(m_transformation_matrix.matrix,
165  num_features, num_dim);
166  num_old_dim = num_features;
167 
168  // eigenvector matrix
169  transformMatrix = eigenSolve.eigenvectors().block(0,
170  num_features-num_dim, num_features,num_dim);
171  if (m_whitening)
172  {
173  for (int32_t i=0; i<num_dim; i++)
174  transformMatrix.col(i) /=
175  sqrt(eigenValues[i+max_dim_allowed-num_dim]);
176  }
177  }
178 
179  else
180  {
181  // compute SVD of data matrix
182  JacobiSVD<MatrixXd> svd(fmatrix.transpose(), ComputeThinU | ComputeThinV);
183 
184  // compute non-negative eigen values from singular values
185  eigenValues = svd.singularValues();
186  eigenValues = eigenValues.cwiseProduct(eigenValues)/(num_vectors-1);
187 
188  // target dimension
189  switch (m_mode)
190  {
191  case FIXED_NUMBER :
193  break;
194 
195  case VARIANCE_EXPLAINED :
196  {
197  float64_t eig_sum = eigenValues.sum();
198  float64_t com_sum = 0;
199  for (int32_t i=0; i<num_features; i++)
200  {
201  num_dim++;
202  com_sum += m_eigenvalues_vector.vector[i];
203  if (com_sum/eig_sum>=m_thresh)
204  break;
205  }
206  }
207  break;
208 
209  case THRESHOLD :
210  for (int32_t i=0; i<num_features; i++)
211  {
213  num_dim++;
214  else
215  break;
216  }
217  break;
218  };
219  SG_INFO("Done\nReducing from %i to %i features..", num_features, num_dim)
220 
221  // right singular vectors form eigenvectors
223  Map<MatrixXd> transformMatrix(m_transformation_matrix.matrix, num_features, num_dim);
224  num_old_dim = num_features;
225  transformMatrix = svd.matrixV().block(0, 0, num_features, num_dim);
226  if (m_whitening)
227  {
228  for (int32_t i=0; i<num_dim; i++)
229  transformMatrix.col(i) /= sqrt(eigenValues[i]);
230  }
231  }
232 
233  // restore feature matrix
234  fmatrix = fmatrix.colwise()+data_mean;
235  m_initialized = true;
236  return true;
237  }
238 
239  return false;
240 }
241 
243 {
247  m_initialized = false;
248 }
249 
251 {
253  SGMatrix<float64_t> m = ((CDenseFeatures<float64_t>*) features)->get_feature_matrix();
254  int32_t num_vectors = m.num_cols;
255  int32_t num_features = m.num_rows;
256 
257  SG_INFO("Transforming feature matrix\n")
258  Map<MatrixXd> transform_matrix(m_transformation_matrix.matrix,
260 
261  if (m_mem_mode == MEM_IN_PLACE)
262  {
263  if (m.matrix)
264  {
265  SG_INFO("Preprocessing feature matrix\n")
266  Map<MatrixXd> feature_matrix(m.matrix, num_features, num_vectors);
267  VectorXd data_mean = feature_matrix.rowwise().sum()/(float64_t) num_vectors;
268  feature_matrix = feature_matrix.colwise()-data_mean;
269 
270  feature_matrix.block(0,0,num_dim,num_vectors) =
271  transform_matrix.transpose()*feature_matrix;
272 
273  SG_INFO("Form matrix of target dimension")
274  for (int32_t col=0; col<num_vectors; col++)
275  {
276  for (int32_t row=0; row<num_dim; row++)
277  m.matrix[row*num_dim+col] = feature_matrix(row,col);
278  }
279  m.num_rows = num_dim;
280  m.num_cols = num_vectors;
281  }
282 
283  ((CDenseFeatures<float64_t>*) features)->set_feature_matrix(m);
284  return m;
285  }
286  else
287  {
288  SGMatrix<float64_t> ret(num_dim, num_vectors);
289  Map<MatrixXd> ret_matrix(ret.matrix, num_dim, num_vectors);
290  if (m.matrix)
291  {
292  SG_INFO("Preprocessing feature matrix\n")
293  Map<MatrixXd> feature_matrix(m.matrix, num_features, num_vectors);
294  VectorXd data_mean = feature_matrix.rowwise().sum()/(float64_t) num_vectors;
295  feature_matrix = feature_matrix.colwise()-data_mean;
296 
297  ret_matrix = transform_matrix.transpose()*feature_matrix;
298  }
299  ((CDenseFeatures<float64_t>*) features)->set_feature_matrix(ret);
300  return ret;
301  }
302 }
303 
305 {
307  Map<VectorXd> resultVec(result.vector, num_dim);
308  Map<VectorXd> inputVec(vector.vector, vector.vlen);
309 
310  Map<VectorXd> mean(m_mean_vector.vector, m_mean_vector.vlen);
311  Map<MatrixXd> transformMat(m_transformation_matrix.matrix,
313 
314  inputVec = inputVec-mean;
315  resultVec = transformMat.transpose()*inputVec;
316  inputVec = inputVec+mean;
317 
318  return result;
319 }
320 
322 {
324 }
325 
327 {
328  return m_eigenvalues_vector;
329 }
330 
332 {
333  return m_mean_vector;
334 }
335 
337 {
338  return m_mem_mode;
339 }
340 
342 {
343  m_mem_mode = e;
344 }
345 
346 #endif // HAVE_EIGEN3

SHOGUN 机器学习工具包 - 项目文档