Main MRPT website > C++ reference
MRPT logo

Memory.h

Go to the documentation of this file.
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
00005 // Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
00006 // Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com>
00007 // Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
00008 // Copyright (C) 2010 Thomas Capricelli <orzel@freehackers.org>
00009 //
00010 // Eigen is free software; you can redistribute it and/or
00011 // modify it under the terms of the GNU Lesser General Public
00012 // License as published by the Free Software Foundation; either
00013 // version 3 of the License, or (at your option) any later version.
00014 //
00015 // Alternatively, you can redistribute it and/or
00016 // modify it under the terms of the GNU General Public License as
00017 // published by the Free Software Foundation; either version 2 of
00018 // the License, or (at your option) any later version.
00019 //
00020 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
00021 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00022 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
00023 // GNU General Public License for more details.
00024 //
00025 // You should have received a copy of the GNU Lesser General Public
00026 // License and a copy of the GNU General Public License along with
00027 // Eigen. If not, see <http://www.gnu.org/licenses/>.
00028 
00029 
00030 /*****************************************************************************
00031 *** Platform checks for aligned malloc functions                           ***
00032 *****************************************************************************/
00033 
00034 #ifndef EIGEN_MEMORY_H
00035 #define EIGEN_MEMORY_H
00036 
00037 // On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see:
00038 //   http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html
00039 // This is true at least since glibc 2.8.
00040 // This leaves the question how to detect 64-bit. According to this document,
00041 //   http://gcc.fyxm.net/summit/2003/Porting%20to%2064%20bit.pdf
00042 // page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
00043 // quite safe, at least within the context of glibc, to equate 64-bit with LP64.
00044 #if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
00045  && defined(__LP64__)
00046   #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
00047 #else
00048   #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
00049 #endif
00050 
00051 // FreeBSD 6 seems to have 16-byte aligned malloc
00052 //   See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup
00053 // FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures
00054 //   See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
00055 #if defined(__FreeBSD__) && !defined(__arm__) && !defined(__mips__)
00056   #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
00057 #else
00058   #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
00059 #endif
00060 
00061 #if defined(__APPLE__) \
00062  || defined(_WIN64) \
00063  || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \
00064  || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
00065   #define EIGEN_MALLOC_ALREADY_ALIGNED 1
00066 #else
00067   #define EIGEN_MALLOC_ALREADY_ALIGNED 0
00068 #endif
00069 
00070 #if ((defined __QNXNTO__) || (defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) \
00071  && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
00072   #define EIGEN_HAS_POSIX_MEMALIGN 1
00073 #else
00074   #define EIGEN_HAS_POSIX_MEMALIGN 0
00075 #endif
00076 
00077 #ifdef EIGEN_VECTORIZE_SSE
00078   #define EIGEN_HAS_MM_MALLOC 1
00079 #else
00080   #define EIGEN_HAS_MM_MALLOC 0
00081 #endif
00082 
00083 namespace internal {
00084 
00085 /*****************************************************************************
00086 *** Implementation of handmade aligned functions                           ***
00087 *****************************************************************************/
00088 
00089 /* ----- Hand made implementations of aligned malloc/free and realloc ----- */
00090 
00091 /** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
00092   * Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
00093   */
00094 inline void* handmade_aligned_malloc(size_t size)
00095 {
00096   void *original = std::malloc(size+16);
00097   if (original == 0) return 0;
00098   void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
00099   *(reinterpret_cast<void**>(aligned) - 1) = original;
00100   return aligned;
00101 }
00102 
00103 /** \internal Frees memory allocated with handmade_aligned_malloc */
00104 inline void handmade_aligned_free(void *ptr)
00105 {
00106   if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1));
00107 }
00108 
00109 /** \internal
00110   * \brief Reallocates aligned memory.
00111   * Since we know that our handmade version is based on std::realloc
00112   * we can use std::realloc to implement efficient reallocation.
00113   */
00114 inline void* handmade_aligned_realloc(void* ptr, size_t size, size_t = 0)
00115 {
00116   if (ptr == 0) return handmade_aligned_malloc(size);
00117   void *original = *(reinterpret_cast<void**>(ptr) - 1);
00118   original = std::realloc(original,size+16);
00119   if (original == 0) return 0;
00120   void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
00121   *(reinterpret_cast<void**>(aligned) - 1) = original;
00122   return aligned;
00123 }
00124 
00125 /*****************************************************************************
00126 *** Implementation of generic aligned realloc (when no realloc can be used)***
00127 *****************************************************************************/
00128 
00129 void* aligned_malloc(size_t size);
00130 void  aligned_free(void *ptr);
00131 
00132 /** \internal
00133   * \brief Reallocates aligned memory.
00134   * Allows reallocation with aligned ptr types. This implementation will
00135   * always create a new memory chunk and copy the old data.
00136   */
00137 inline void* generic_aligned_realloc(void* ptr, size_t size, size_t old_size)
00138 {
00139   if (ptr==0)
00140     return aligned_malloc(size);
00141 
00142   if (size==0)
00143   {
00144     aligned_free(ptr);
00145     return 0;
00146   }
00147 
00148   void* newptr = aligned_malloc(size);
00149   if (newptr == 0)
00150   {
00151     #ifdef EIGEN_HAS_ERRNO
00152     errno = ENOMEM; // according to the standard
00153     #endif
00154     return 0;
00155   }
00156 
00157   if (ptr != 0)
00158   {
00159     std::memcpy(newptr, ptr, std::min(size,old_size));
00160     aligned_free(ptr);
00161   }
00162 
00163   return newptr;
00164 }
00165 
00166 /*****************************************************************************
00167 *** Implementation of portable aligned versions of malloc/free/realloc     ***
00168 *****************************************************************************/
00169 
00170 /** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 bytes alignment.
00171   * On allocation error, the returned pointer is null, and if exceptions are enabled then a std::bad_alloc is thrown.
00172   */
00173 inline void* aligned_malloc(size_t size)
00174 {
00175   #ifdef EIGEN_NO_MALLOC
00176     eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
00177   #endif
00178 
00179   void *result;
00180   #if !EIGEN_ALIGN
00181     result = std::malloc(size);
00182   #elif EIGEN_MALLOC_ALREADY_ALIGNED
00183     result = std::malloc(size);
00184   #elif EIGEN_HAS_POSIX_MEMALIGN
00185     if(posix_memalign(&result, 16, size)) result = 0;
00186   #elif EIGEN_HAS_MM_MALLOC
00187     result = _mm_malloc(size, 16);
00188   #elif (defined _MSC_VER)
00189     result = _aligned_malloc(size, 16);
00190   #else
00191     result = handmade_aligned_malloc(size);
00192   #endif
00193 
00194   #ifdef EIGEN_EXCEPTIONS
00195     if(result == 0)
00196       throw std::bad_alloc();
00197   #endif
00198   return result;
00199 }
00200 
00201 /** \internal Frees memory allocated with aligned_malloc. */
00202 inline void aligned_free(void *ptr)
00203 {
00204   #if !EIGEN_ALIGN
00205     std::free(ptr);
00206   #elif EIGEN_MALLOC_ALREADY_ALIGNED
00207     std::free(ptr);
00208   #elif EIGEN_HAS_POSIX_MEMALIGN
00209     std::free(ptr);
00210   #elif EIGEN_HAS_MM_MALLOC
00211     _mm_free(ptr);
00212   #elif defined(_MSC_VER)
00213     _aligned_free(ptr);
00214   #else
00215     handmade_aligned_free(ptr);
00216   #endif
00217 }
00218 
00219 /**
00220 * \internal
00221 * \brief Reallocates an aligned block of memory.
00222 * \throws std::bad_alloc if EIGEN_EXCEPTIONS are defined.
00223 **/
00224 inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
00225 {
00226   EIGEN_UNUSED_VARIABLE(old_size);
00227 
00228   void *result;
00229 #if !EIGEN_ALIGN
00230   result = std::realloc(ptr,new_size);
00231 #elif EIGEN_MALLOC_ALREADY_ALIGNED
00232   result = std::realloc(ptr,new_size);
00233 #elif EIGEN_HAS_POSIX_MEMALIGN
00234   result = generic_aligned_realloc(ptr,new_size,old_size);
00235 #elif EIGEN_HAS_MM_MALLOC
00236   // The defined(_mm_free) is just here to verify that this MSVC version
00237   // implements _mm_malloc/_mm_free based on the corresponding _aligned_
00238   // functions. This may not always be the case and we just try to be safe.
00239   #if defined(_MSC_VER) && defined(_mm_free)
00240     result = _aligned_realloc(ptr,new_size,16);
00241   #else
00242     result = generic_aligned_realloc(ptr,new_size,old_size);
00243   #endif
00244 #elif defined(_MSC_VER)
00245   result = _aligned_realloc(ptr,new_size,16);
00246 #else
00247   result = handmade_aligned_realloc(ptr,new_size,old_size);
00248 #endif
00249 
00250 #ifdef EIGEN_EXCEPTIONS
00251   if (result==0 && new_size!=0)
00252     throw std::bad_alloc();
00253 #endif
00254   return result;
00255 }
00256 
00257 /*****************************************************************************
00258 *** Implementation of conditionally aligned functions                      ***
00259 *****************************************************************************/
00260 
00261 /** \internal Allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned.
00262   * On allocation error, the returned pointer is null, and if exceptions are enabled then a std::bad_alloc is thrown.
00263   */
00264 template<bool Align> inline void* conditional_aligned_malloc(size_t size)
00265 {
00266   return aligned_malloc(size);
00267 }
00268 
00269 template<> inline void* conditional_aligned_malloc<false>(size_t size)
00270 {
00271   #ifdef EIGEN_NO_MALLOC
00272     eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
00273   #endif
00274 
00275   void *result = std::malloc(size);
00276   #ifdef EIGEN_EXCEPTIONS
00277     if(!result) throw std::bad_alloc();
00278   #endif
00279   return result;
00280 }
00281 
00282 /** \internal Frees memory allocated with conditional_aligned_malloc */
00283 template<bool Align> inline void conditional_aligned_free(void *ptr)
00284 {
00285   aligned_free(ptr);
00286 }
00287 
00288 template<> inline void conditional_aligned_free<false>(void *ptr)
00289 {
00290   std::free(ptr);
00291 }
00292 
00293 template<bool Align> inline void* conditional_aligned_realloc(void* ptr, size_t new_size, size_t old_size)
00294 {
00295   return aligned_realloc(ptr, new_size, old_size);
00296 }
00297 
00298 template<> inline void* conditional_aligned_realloc<false>(void* ptr, size_t new_size, size_t)
00299 {
00300   return std::realloc(ptr, new_size);
00301 }
00302 
00303 /*****************************************************************************
00304 *** Construction/destruction of array elements                             ***
00305 *****************************************************************************/
00306 
00307 /** \internal Constructs the elements of an array.
00308   * The \a size parameter tells on how many objects to call the constructor of T.
00309   */
00310 template<typename T> inline T* construct_elements_of_array(T *ptr, size_t size)
00311 {
00312   for (size_t i=0; i < size; ++i) ::new (ptr + i) T;
00313   return ptr;
00314 }
00315 
00316 /** \internal Destructs the elements of an array.
00317   * The \a size parameters tells on how many objects to call the destructor of T.
00318   */
00319 template<typename T> inline void destruct_elements_of_array(T *ptr, size_t size)
00320 {
00321   // always destruct an array starting from the end.
00322   if(ptr)
00323     while(size) ptr[--size].~T();
00324 }
00325 
00326 /*****************************************************************************
00327 *** Implementation of aligned new/delete-like functions                    ***
00328 *****************************************************************************/
00329 
00330 /** \internal Allocates \a size objects of type T. The returned pointer is guaranteed to have 16 bytes alignment.
00331   * On allocation error, the returned pointer is undefined, but if exceptions are enabled then a std::bad_alloc is thrown.
00332   * The default constructor of T is called.
00333   */
00334 template<typename T> inline T* aligned_new(size_t size)
00335 {
00336   T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size));
00337   return construct_elements_of_array(result, size);
00338 }
00339 
00340 template<typename T, bool Align> inline T* conditional_aligned_new(size_t size)
00341 {
00342   T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
00343   return construct_elements_of_array(result, size);
00344 }
00345 
00346 /** \internal Deletes objects constructed with aligned_new
00347   * The \a size parameters tells on how many objects to call the destructor of T.
00348   */
00349 template<typename T> inline void aligned_delete(T *ptr, size_t size)
00350 {
00351   destruct_elements_of_array<T>(ptr, size);
00352   aligned_free(ptr);
00353 }
00354 
00355 /** \internal Deletes objects constructed with conditional_aligned_new
00356   * The \a size parameters tells on how many objects to call the destructor of T.
00357   */
00358 template<typename T, bool Align> inline void conditional_aligned_delete(T *ptr, size_t size)
00359 {
00360   destruct_elements_of_array<T>(ptr, size);
00361   conditional_aligned_free<Align>(ptr);
00362 }
00363 
00364 template<typename T, bool Align> inline T* conditional_aligned_realloc_new(T* pts, size_t new_size, size_t old_size)
00365 {
00366   T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
00367   if (new_size > old_size)
00368     construct_elements_of_array(result+old_size, new_size-old_size);
00369   return result;
00370 }
00371 
00372 /****************************************************************************/
00373 
00374 /** \internal Returns the index of the first element of the array that is well aligned for vectorization.
00375   *
00376   * \param array the address of the start of the array
00377   * \param size the size of the array
00378   *
00379   * \note If no element of the array is well aligned, the size of the array is returned. Typically,
00380   * for example with SSE, "well aligned" means 16-byte-aligned. If vectorization is disabled or if the
00381   * packet size for the given scalar type is 1, then everything is considered well-aligned.
00382   *
00383   * \note If the scalar type is vectorizable, we rely on the following assumptions: sizeof(Scalar) is a
00384   * power of 2, the packet size in bytes is also a power of 2, and is a multiple of sizeof(Scalar). On the
00385   * other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails for
00386   * example with Scalar=double on certain 32-bit platforms, see bug #79.
00387   *
00388   * There is also the variant first_aligned(const MatrixBase&) defined in DenseCoeffsBase.h.
00389   */
00390 template<typename Scalar, typename Index>
00391 inline static Index first_aligned(const Scalar* array, Index size)
00392 {
00393   typedef typename packet_traits<Scalar>::type Packet;
00394   enum { PacketSize = packet_traits<Scalar>::size,
00395          PacketAlignedMask = PacketSize-1
00396   };
00397 
00398   if(PacketSize==1)
00399   {
00400     // Either there is no vectorization, or a packet consists of exactly 1 scalar so that all elements
00401     // of the array have the same alignment.
00402     return 0;
00403   }
00404   else if(size_t(array) & (sizeof(Scalar)-1))
00405   {
00406     // There is vectorization for this scalar type, but the array is not aligned to the size of a single scalar.
00407     // Consequently, no element of the array is well aligned.
00408     return size;
00409   }
00410   else
00411   {
00412     return std::min<Index>( (PacketSize - (Index((size_t(array)/sizeof(Scalar))) & PacketAlignedMask))
00413                            & PacketAlignedMask, size);
00414   }
00415 }
00416 
00417 } // end namespace internal
00418 
00419 /*****************************************************************************
00420 *** Implementation of runtime stack allocation (falling back to malloc)    ***
00421 *****************************************************************************/
00422 
00423 /** \internal
00424   * Allocates an aligned buffer of SIZE bytes on the stack if SIZE is smaller than
00425   * EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform
00426   * (currently, this is Linux only). Otherwise the memory is allocated on the heap.
00427   * Data allocated with ei_aligned_stack_alloc \b must be freed by calling
00428   * ei_aligned_stack_free(PTR,SIZE).
00429   * \code
00430   * float * data = ei_aligned_stack_alloc(float,array.size());
00431   * // ...
00432   * ei_aligned_stack_free(data,float,array.size());
00433   * \endcode
00434   */
00435 #if (defined __linux__)
00436   #define ei_aligned_stack_alloc(SIZE) (SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) \
00437                                     ? alloca(SIZE) \
00438                                     : Eigen::internal::aligned_malloc(SIZE)
00439   #define ei_aligned_stack_free(PTR,SIZE) if(SIZE>EIGEN_STACK_ALLOCATION_LIMIT) Eigen::internal::aligned_free(PTR)
00440 #elif defined(_MSC_VER)
00441   #define ei_aligned_stack_alloc(SIZE) (SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) \
00442                                     ? _alloca(SIZE) \
00443                                     : Eigen::internal::aligned_malloc(SIZE)
00444   #define ei_aligned_stack_free(PTR,SIZE) if(SIZE>EIGEN_STACK_ALLOCATION_LIMIT) Eigen::internal::aligned_free(PTR)
00445 #else
00446   #define ei_aligned_stack_alloc(SIZE) Eigen::internal::aligned_malloc(SIZE)
00447   #define ei_aligned_stack_free(PTR,SIZE) Eigen::internal::aligned_free(PTR)
00448 #endif
00449 
00450 #define ei_aligned_stack_new(TYPE,SIZE) Eigen::internal::construct_elements_of_array(reinterpret_cast<TYPE*>(ei_aligned_stack_alloc(sizeof(TYPE)*SIZE)), SIZE)
00451 #define ei_aligned_stack_delete(TYPE,PTR,SIZE) do {Eigen::internal::destruct_elements_of_array<TYPE>(PTR, SIZE); \
00452                                                    ei_aligned_stack_free(PTR,sizeof(TYPE)*SIZE);} while(0)
00453 
00454 
00455 /*****************************************************************************
00456 *** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF]                ***
00457 *****************************************************************************/
00458 
00459 #if EIGEN_ALIGN
00460   #ifdef EIGEN_EXCEPTIONS
00461     #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
00462       void* operator new(size_t size, const std::nothrow_t&) throw() { \
00463         try { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
00464         catch (...) { return 0; } \
00465         return 0; \
00466       }
00467   #else
00468     #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
00469       void* operator new(size_t size, const std::nothrow_t&) throw() { \
00470         return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
00471       }
00472   #endif
00473 
00474   #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
00475       void *operator new(size_t size) { \
00476         return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
00477       } \
00478       void *operator new[](size_t size) { \
00479         return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
00480       } \
00481       void operator delete(void * ptr) throw() { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
00482       void operator delete[](void * ptr) throw() { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
00483       /* in-place new and delete. since (at least afaik) there is no actual   */ \
00484       /* memory allocated we can safely let the default implementation handle */ \
00485       /* this particular case. */ \
00486       static void *operator new(size_t size, void *ptr) { return ::operator new(size,ptr); } \
00487       void operator delete(void * memory, void *ptr) throw() { return ::operator delete(memory,ptr); } \
00488       /* nothrow-new (returns zero instead of std::bad_alloc) */ \
00489       EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
00490       void operator delete(void *ptr, const std::nothrow_t&) throw() { \
00491         Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
00492       } \
00493       typedef void eigen_aligned_operator_new_marker_type;
00494 #else
00495   #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
00496 #endif
00497 
00498 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
00499 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
00500   EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%16==0))
00501 
00502 /****************************************************************************/
00503 
00504 /** \class aligned_allocator
00505 * \ingroup Core_Module
00506 *
00507 * \brief STL compatible allocator to use with with 16 byte aligned types
00508 *
00509 * Example:
00510 * \code
00511 * // Matrix4f requires 16 bytes alignment:
00512 * std::map< int, Matrix4f, std::less<int>, 
00513 *           aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4;
00514 * // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
00515 * std::map< int, Vector3f > my_map_vec3;
00516 * \endcode
00517 *
00518 * \sa \ref TopicStlContainers.
00519 */
00520 template<class T>
00521 class aligned_allocator
00522 {
00523 public:
00524     typedef size_t    size_type;
00525     typedef std::ptrdiff_t difference_type;
00526     typedef T*        pointer;
00527     typedef const T*  const_pointer;
00528     typedef T&        reference;
00529     typedef const T&  const_reference;
00530     typedef T         value_type;
00531 
00532     template<class U>
00533     struct rebind
00534     {
00535         typedef aligned_allocator<U> other;
00536     };
00537 
00538     pointer address( reference value ) const
00539     {
00540         return &value;
00541     }
00542 
00543     const_pointer address( const_reference value ) const
00544     {
00545         return &value;
00546     }
00547 
00548     aligned_allocator() throw()
00549     {
00550     }
00551 
00552     aligned_allocator( const aligned_allocator& ) throw()
00553     {
00554     }
00555 
00556     template<class U>
00557     aligned_allocator( const aligned_allocator<U>& ) throw()
00558     {
00559     }
00560 
00561     ~aligned_allocator() throw()
00562     {
00563     }
00564 
00565     size_type max_size() const throw()
00566     {
00567         return std::numeric_limits<size_type>::max();
00568     }
00569 
00570     pointer allocate( size_type num, const_pointer* hint = 0 )
00571     {
00572         static_cast<void>( hint ); // suppress unused variable warning
00573         return static_cast<pointer>( internal::aligned_malloc( num * sizeof(T) ) );
00574     }
00575 
00576     void construct( pointer p, const T& value )
00577     {
00578         ::new( p ) T( value );
00579     }
00580 
00581     void destroy( pointer p )
00582     {
00583         p->~T();
00584     }
00585 
00586     void deallocate( pointer p, size_type /*num*/ )
00587     {
00588         internal::aligned_free( p );
00589     }
00590 
00591     bool operator!=(const aligned_allocator<T>& ) const
00592     { return false; }
00593 
00594     bool operator==(const aligned_allocator<T>& ) const
00595     { return true; }
00596 };
00597 
00598 //---------- Cache sizes ----------
00599 
00600 #if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) )
00601 #  if defined(__PIC__) && defined(__i386__)
00602      // Case for x86 with PIC
00603 #    define EIGEN_CPUID(abcd,func,id) \
00604        __asm__ __volatile__ ("xchgl %%ebx, %%esi;cpuid; xchgl %%ebx,%%esi": "=a" (abcd[0]), "=S" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
00605 #  else
00606      // Case for x86_64 or x86 w/o PIC
00607 #    define EIGEN_CPUID(abcd,func,id) \
00608        __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id) );
00609 #  endif
00610 #elif defined(_MSC_VER)
00611 #  if (_MSC_VER > 1500) /* newer than MSVC++ 9.0 */ || (_MSC_VER == 1500 && _MSC_FULL_VER >= 150030729) /* MSVC++ 9.0 with SP1*/
00612 #    define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
00613 #  endif
00614 #endif
00615 
00616 namespace internal {
00617 
00618 #ifdef EIGEN_CPUID
00619 
00620 inline bool cpuid_is_vendor(int abcd[4], const char* vendor)
00621 {
00622   return abcd[1]==((int*)(vendor))[0] && abcd[3]==((int*)(vendor))[1] && abcd[2]==((int*)(vendor))[2];
00623 }
00624 
00625 inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)
00626 {
00627   int abcd[4];
00628   l1 = l2 = l3 = 0;
00629   int cache_id = 0;
00630   int cache_type = 0;
00631   do {
00632     abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
00633     EIGEN_CPUID(abcd,0x4,cache_id);
00634     cache_type  = (abcd[0] & 0x0F) >> 0;
00635     if(cache_type==1||cache_type==3) // data or unified cache
00636     {
00637       int cache_level = (abcd[0] & 0xE0) >> 5;  // A[7:5]
00638       int ways        = (abcd[1] & 0xFFC00000) >> 22; // B[31:22]
00639       int partitions  = (abcd[1] & 0x003FF000) >> 12; // B[21:12]
00640       int line_size   = (abcd[1] & 0x00000FFF) >>  0; // B[11:0]
00641       int sets        = (abcd[2]);                    // C[31:0]
00642 
00643       int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);
00644 
00645       switch(cache_level)
00646       {
00647         case 1: l1 = cache_size; break;
00648         case 2: l2 = cache_size; break;
00649         case 3: l3 = cache_size; break;
00650         default: break;
00651       }
00652     }
00653     cache_id++;
00654   } while(cache_type>0 && cache_id<16);
00655 }
00656 
00657 inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3)
00658 {
00659   int abcd[4];
00660   abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
00661   l1 = l2 = l3 = 0;
00662   EIGEN_CPUID(abcd,0x00000002,0);
00663   unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;
00664   bool check_for_p2_core2 = false;
00665   for(int i=0; i<14; ++i)
00666   {
00667     switch(bytes[i])
00668     {
00669       case 0x0A: l1 = 8; break;   // 0Ah   data L1 cache, 8 KB, 2 ways, 32 byte lines
00670       case 0x0C: l1 = 16; break;  // 0Ch   data L1 cache, 16 KB, 4 ways, 32 byte lines
00671       case 0x0E: l1 = 24; break;  // 0Eh   data L1 cache, 24 KB, 6 ways, 64 byte lines
00672       case 0x10: l1 = 16; break;  // 10h   data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
00673       case 0x15: l1 = 16; break;  // 15h   code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
00674       case 0x2C: l1 = 32; break;  // 2Ch   data L1 cache, 32 KB, 8 ways, 64 byte lines
00675       case 0x30: l1 = 32; break;  // 30h   code L1 cache, 32 KB, 8 ways, 64 byte lines
00676       case 0x60: l1 = 16; break;  // 60h   data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored
00677       case 0x66: l1 = 8; break;   // 66h   data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored
00678       case 0x67: l1 = 16; break;  // 67h   data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored
00679       case 0x68: l1 = 32; break;  // 68h   data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored
00680       case 0x1A: l2 = 96; break;   // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64)
00681       case 0x22: l3 = 512; break;   // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored
00682       case 0x23: l3 = 1024; break;   // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
00683       case 0x25: l3 = 2048; break;   // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored
00684       case 0x29: l3 = 4096; break;   // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored
00685       case 0x39: l2 = 128; break;   // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored
00686       case 0x3A: l2 = 192; break;   // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored
00687       case 0x3B: l2 = 128; break;   // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored
00688       case 0x3C: l2 = 256; break;   // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored
00689       case 0x3D: l2 = 384; break;   // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored
00690       case 0x3E: l2 = 512; break;   // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored
00691       case 0x40: l2 = 0; break;   // no integrated L2 cache (P6 core) or L3 cache (P4 core)
00692       case 0x41: l2 = 128; break;   // code and data L2 cache, 128 KB, 4 ways, 32 byte lines
00693       case 0x42: l2 = 256; break;   // code and data L2 cache, 256 KB, 4 ways, 32 byte lines
00694       case 0x43: l2 = 512; break;   // code and data L2 cache, 512 KB, 4 ways, 32 byte lines
00695       case 0x44: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines
00696       case 0x45: l2 = 2048; break;   // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines
00697       case 0x46: l3 = 4096; break;   // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines
00698       case 0x47: l3 = 8192; break;   // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines
00699       case 0x48: l2 = 3072; break;   // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines
00700       case 0x49: if(l2!=0) l3 = 4096; else {check_for_p2_core2=true; l3 = l2 = 4096;} break;// code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or L2 for core2
00701       case 0x4A: l3 = 6144; break;   // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines
00702       case 0x4B: l3 = 8192; break;   // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines
00703       case 0x4C: l3 = 12288; break;   // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines
00704       case 0x4D: l3 = 16384; break;   // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines
00705       case 0x4E: l2 = 6144; break;   // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines
00706       case 0x78: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines
00707       case 0x79: l2 = 128; break;   // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored
00708       case 0x7A: l2 = 256; break;   // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored
00709       case 0x7B: l2 = 512; break;   // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored
00710       case 0x7C: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
00711       case 0x7D: l2 = 2048; break;   // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines
00712       case 0x7E: l2 = 256; break;   // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64)
00713       case 0x7F: l2 = 512; break;   // code and data L2 cache, 512 KB, 2 ways, 64 byte lines
00714       case 0x80: l2 = 512; break;   // code and data L2 cache, 512 KB, 8 ways, 64 byte lines
00715       case 0x81: l2 = 128; break;   // code and data L2 cache, 128 KB, 8 ways, 32 byte lines
00716       case 0x82: l2 = 256; break;   // code and data L2 cache, 256 KB, 8 ways, 32 byte lines
00717       case 0x83: l2 = 512; break;   // code and data L2 cache, 512 KB, 8 ways, 32 byte lines
00718       case 0x84: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines
00719       case 0x85: l2 = 2048; break;   // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines
00720       case 0x86: l2 = 512; break;   // code and data L2 cache, 512 KB, 4 ways, 64 byte lines
00721       case 0x87: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines
00722       case 0x88: l3 = 2048; break;   // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64)
00723       case 0x89: l3 = 4096; break;   // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64)
00724       case 0x8A: l3 = 8192; break;   // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64)
00725       case 0x8D: l3 = 3072; break;   // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64)
00726 
00727       default: break;
00728     }
00729   }
00730   if(check_for_p2_core2 && l2 == l3)
00731     l3 = 0;
00732   l1 *= 1024;
00733   l2 *= 1024;
00734   l3 *= 1024;
00735 }
00736 
00737 inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
00738 {
00739   if(max_std_funcs>=4)
00740     queryCacheSizes_intel_direct(l1,l2,l3);
00741   else
00742     queryCacheSizes_intel_codes(l1,l2,l3);
00743 }
00744 
00745 inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
00746 {
00747   int abcd[4];
00748   abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
00749   EIGEN_CPUID(abcd,0x80000005,0);
00750   l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
00751   abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
00752   EIGEN_CPUID(abcd,0x80000006,0);
00753   l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
00754   l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
00755 }
00756 #endif
00757 
00758 /** \internal
00759  * Queries and returns the cache sizes in Bytes of the L1, L2, and L3 data caches respectively */
00760 inline void queryCacheSizes(int& l1, int& l2, int& l3)
00761 {
00762   #ifdef EIGEN_CPUID
00763   int abcd[4];
00764 
00765   // identify the CPU vendor
00766   EIGEN_CPUID(abcd,0x0,0);
00767   int max_std_funcs = abcd[1];
00768   if(cpuid_is_vendor(abcd,"GenuineIntel"))
00769     queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
00770   else if(cpuid_is_vendor(abcd,"AuthenticAMD") || cpuid_is_vendor(abcd,"AMDisbetter!"))
00771     queryCacheSizes_amd(l1,l2,l3);
00772   else
00773     // by default let's use Intel's API
00774     queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
00775 
00776   // here is the list of other vendors:
00777 //   ||cpuid_is_vendor(abcd,"VIA VIA VIA ")
00778 //   ||cpuid_is_vendor(abcd,"CyrixInstead")
00779 //   ||cpuid_is_vendor(abcd,"CentaurHauls")
00780 //   ||cpuid_is_vendor(abcd,"GenuineTMx86")
00781 //   ||cpuid_is_vendor(abcd,"TransmetaCPU")
00782 //   ||cpuid_is_vendor(abcd,"RiseRiseRise")
00783 //   ||cpuid_is_vendor(abcd,"Geode by NSC")
00784 //   ||cpuid_is_vendor(abcd,"SiS SiS SiS ")
00785 //   ||cpuid_is_vendor(abcd,"UMC UMC UMC ")
00786 //   ||cpuid_is_vendor(abcd,"NexGenDriven")
00787   #else
00788   l1 = l2 = l3 = -1;
00789   #endif
00790 }
00791 
00792 /** \internal
00793  * \returns the size in Bytes of the L1 data cache */
00794 inline int queryL1CacheSize()
00795 {
00796   int l1(-1), l2, l3;
00797   queryCacheSizes(l1,l2,l3);
00798   return l1;
00799 }
00800 
00801 /** \internal
00802  * \returns the size in Bytes of the L2 or L3 cache if this later is present */
00803 inline int queryTopLevelCacheSize()
00804 {
00805   int l1, l2(-1), l3(-1);
00806   queryCacheSizes(l1,l2,l3);
00807   return std::max(l2,l3);
00808 }
00809 
00810 } // end namespace internal
00811 
00812 #endif // EIGEN_MEMORY_H



Page generated by Doxygen 1.7.3 for MRPT 0.9.4 SVN: at Sat Mar 26 06:16:28 UTC 2011