Main MRPT website > C++ reference
MRPT logo

kmeans.h

Go to the documentation of this file.
00001 /* +---------------------------------------------------------------------------+
00002    |          The Mobile Robot Programming Toolkit (MRPT) C++ library          |
00003    |                                                                           |
00004    |                   http://mrpt.sourceforge.net/                            |
00005    |                                                                           |
00006    |   Copyright (C) 2005-2011  University of Malaga                           |
00007    |                                                                           |
00008    |    This software was written by the Machine Perception and Intelligent    |
00009    |      Robotics Lab, University of Malaga (Spain).                          |
00010    |    Contact: Jose-Luis Blanco  <jlblanco@ctima.uma.es>                     |
00011    |                                                                           |
00012    |  This file is part of the MRPT project.                                   |
00013    |                                                                           |
00014    |     MRPT is free software: you can redistribute it and/or modify          |
00015    |     it under the terms of the GNU General Public License as published by  |
00016    |     the Free Software Foundation, either version 3 of the License, or     |
00017    |     (at your option) any later version.                                   |
00018    |                                                                           |
00019    |   MRPT is distributed in the hope that it will be useful,                 |
00020    |     but WITHOUT ANY WARRANTY; without even the implied warranty of        |
00021    |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         |
00022    |     GNU General Public License for more details.                          |
00023    |                                                                           |
00024    |     You should have received a copy of the GNU General Public License     |
00025    |     along with MRPT.  If not, see <http://www.gnu.org/licenses/>.         |
00026    |                                                                           |
00027    +---------------------------------------------------------------------------+ */
00028 #ifndef  mrpt_math_kmeans_H
00029 #define  mrpt_math_kmeans_H
00030 
00031 #include <mrpt/math/CMatrixTemplateNumeric.h>
00032 #include <mrpt/math/CMatrixFixedNumeric.h>
00033 
00034 namespace mrpt
00035 {
00036         namespace math
00037         {
00038                 namespace detail {
00039                         // Auxiliary method: templatized for working with float/double's.
00040                         template <typename SCALAR>
00041                         double BASE_IMPEXP internal_kmeans(
00042                                 const bool use_kmeansplusplus_method,
00043                                 const size_t nPoints,
00044                                 const size_t k,
00045                                 const size_t dims,
00046                                 const SCALAR *points,
00047                                 const size_t attempts,
00048                                 SCALAR* out_center,
00049                                 int *out_assignments
00050                                 );
00051 
00052                         // Auxiliary method, the actual code of the two front-end functions offered to the user below.
00053                         template <class LIST_OF_VECTORS1,class LIST_OF_VECTORS2>
00054                         double stub_kmeans(
00055                                 const bool use_kmeansplusplus_method,
00056                                 const size_t k,
00057                                 const LIST_OF_VECTORS1 & points,
00058                                 std::vector<int>  &assignments,
00059                                 LIST_OF_VECTORS2 *out_centers,
00060                                 const size_t attempts
00061                                 )
00062                         {
00063                                 MRPT_START
00064                                 ASSERT_(k>=1)
00065                                 const size_t N = points.size();
00066                                 assignments.resize(N);
00067                                 if (out_centers) out_centers->clear();
00068                                 if (!N)
00069                                         return 0;       // No points, we're done.
00070                                 // Parse to required format:
00071                                 size_t dims=0;
00072                                 const typename LIST_OF_VECTORS1::const_iterator it_first=points.begin();
00073                                 const typename LIST_OF_VECTORS1::const_iterator it_end  =points.end();
00074                                 typedef typename LIST_OF_VECTORS1::value_type TInnerVector;
00075                                 typedef typename LIST_OF_VECTORS2::value_type TInnerVectorCenters;
00076                                 std::vector<typename TInnerVector::value_type> raw_vals;
00077                                 typename TInnerVector::value_type *trg_ptr=NULL;
00078                                 for (typename LIST_OF_VECTORS1::const_iterator it=it_first;it!=it_end;++it)
00079                                 {
00080                                         if (it==it_first)
00081                                         {
00082                                                 dims = it->size();
00083                                                 ASSERTMSG_(dims>0,"Dimensionality of points can't be zero.")
00084                                                 raw_vals.resize(N*dims);
00085                                                 trg_ptr = &raw_vals[0];
00086                                         }
00087                                         else
00088                                         {
00089                                                 ASSERTMSG_(size_t(dims)==size_t(it->size()),"All points must have the same dimensionality.")
00090                                         }
00091 
00092 					::memcpy(trg_ptr, &(*it)[0], dims*sizeof(typename TInnerVector::value_type));
00093                                         trg_ptr+=dims;
00094                                 }
00095                                 // Call the internal implementation:
00096                                 std::vector<typename TInnerVectorCenters::value_type> centers(dims*k);
00097                                 const double ret = detail::internal_kmeans(false,N,k,points.begin()->size(),&raw_vals[0],attempts,&centers[0],&assignments[0]);
00098                                 // Centers:
00099                                 if (out_centers)
00100                                 {
00101                                         const typename TInnerVectorCenters::value_type *center_ptr = &centers[0];
00102                                         for (size_t i=0;i<k;i++)
00103                                         {
00104                                                 TInnerVectorCenters c;
00105                                                 c.resize(dims);
00106                                                 for (size_t j=0;j<dims;j++) c[j]= *center_ptr++;
00107                                                 out_centers->push_back(c);
00108                                         }
00109                                 }
00110                                 return ret;
00111                                 MRPT_END
00112                         }
00113                 } // end detail namespace
00114 
00115 
00116                 /** @name k-means algorithms
00117                 @{ */
00118 
00119                 /** k-means algorithm to cluster a list of N points of arbitrary dimensionality into exactly K clusters.
00120                   *   The list of input points can be any template CONTAINER<POINT> with:
00121                   *             - CONTAINER can be: Any STL container: std::vector,std::list,std::deque,...
00122                   *             - POINT can be:
00123                   *                     - std::vector<double/float>
00124                   *                     - CArrayDouble<N> / CArrayFloat<N>
00125                   *
00126                   *  \param k [IN] Number of cluster to look for.
00127                   *  \param points [IN] The list of N input points. It can be any STL-like containers of std::vector<float/double>, for example a std::vector<vector_double>, a std::list<vector_float>, etc...
00128                   *  \param assignments [OUT] At output it will have a number [0,k-1] for each of the N input points.
00129                   *  \param out_centers [OUT] If not NULL, at output will have the centers of each group. Can be of any of the supported types of "points", but the basic coordinates should be float or double exactly as in "points".
00130                   *  \param attempts [IN] Number of attempts.
00131                   *
00132                   * \sa A more advanced algorithm, see: kmeanspp
00133                   * \note Uses the kmeans++ implementation by David Arthur (2009, http://www.stanford.edu/~darthur/kmpp.zip).
00134                   */
00135                 template <class LIST_OF_VECTORS1,class LIST_OF_VECTORS2>
00136                 inline double kmeans(
00137                         const size_t k,
00138                         const LIST_OF_VECTORS1 & points,
00139                         std::vector<int>  &assignments,
00140                         LIST_OF_VECTORS2 *out_centers = NULL,
00141                         const size_t attempts = 3
00142                         )
00143                 {
00144                         return detail::stub_kmeans(false /* standard method */, k,points,assignments,out_centers,attempts);
00145                 }
00146 
00147                 /** k-means++ algorithm to cluster a list of N points of arbitrary dimensionality into exactly K clusters.
00148                   *   The list of input points can be any template CONTAINER<POINT> with:
00149                   *             - CONTAINER can be: Any STL container: std::vector,std::list,std::deque,...
00150                   *             - POINT can be:
00151                   *                     - std::vector<double/float>
00152                   *                     - CArrayDouble<N> / CArrayFloat<N>
00153                   *
00154                   *  \param k [IN] Number of cluster to look for.
00155                   *  \param points [IN] The list of N input points. It can be any STL-like containers of std::vector<float/double>, for example a std::vector<vector_double>, a std::list<vector_float>, etc...
00156                   *  \param assignments [OUT] At output it will have a number [0,k-1] for each of the N input points.
00157                   *  \param out_centers [OUT] If not NULL, at output will have the centers of each group. Can be of any of the supported types of "points", but the basic coordinates should be float or double exactly as in "points".
00158                   *  \param attempts [IN] Number of attempts.
00159                   *
00160                   * \sa The standard kmeans algorithm, see: kmeans
00161                   * \note Uses the kmeans++ implementation by David Arthur (2009, http://www.stanford.edu/~darthur/kmpp.zip).
00162                   */
00163                 template <class LIST_OF_VECTORS1,class LIST_OF_VECTORS2>
00164                 inline double kmeanspp(
00165                         const size_t k,
00166                         const LIST_OF_VECTORS1 & points,
00167                         std::vector<int>  &assignments,
00168                         LIST_OF_VECTORS2 *out_centers = NULL,
00169                         const size_t attempts = 3
00170                         )
00171                 {
00172                         return detail::stub_kmeans(true /* kmeans++ algorithm*/, k,points,assignments,out_centers,attempts);
00173                 }
00174 
00175                 /** @} */
00176 
00177         } // End of MATH namespace
00178 } // End of namespace
00179 #endif



Page generated by Doxygen 1.7.3 for MRPT 0.9.4 SVN: at Sat Mar 26 06:40:17 UTC 2011