00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H
00026 #define EIGEN_SELFADJOINT_MATRIX_VECTOR_H
00027
00028 namespace internal {
00029
00030
00031
00032
00033
00034
00035 template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>
00036 static EIGEN_DONT_INLINE void product_selfadjoint_vector(
00037 Index size,
00038 const Scalar* lhs, Index lhsStride,
00039 const Scalar* _rhs, Index rhsIncr,
00040 Scalar* res, Scalar alpha)
00041 {
00042 typedef typename packet_traits<Scalar>::type Packet;
00043 typedef typename NumTraits<Scalar>::Real RealScalar;
00044 const Index PacketSize = sizeof(Packet)/sizeof(Scalar);
00045
00046 enum {
00047 IsRowMajor = StorageOrder==RowMajor ? 1 : 0,
00048 IsLower = UpLo == Lower ? 1 : 0,
00049 FirstTriangular = IsRowMajor == IsLower
00050 };
00051
00052 conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0;
00053 conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1;
00054 conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex, ConjugateRhs> cjd;
00055
00056 conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> pcj0;
00057 conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1;
00058
00059 Scalar cjAlpha = ConjugateRhs ? conj(alpha) : alpha;
00060
00061
00062
00063 const Scalar* EIGEN_RESTRICT rhs = _rhs;
00064 if (rhsIncr!=1)
00065 {
00066 Scalar* r = ei_aligned_stack_new(Scalar, size);
00067 const Scalar* it = _rhs;
00068 for (Index i=0; i<size; ++i, it+=rhsIncr)
00069 r[i] = *it;
00070 rhs = r;
00071 }
00072
00073 Index bound = std::max(Index(0),size-8) & 0xfffffffe;
00074 if (FirstTriangular)
00075 bound = size - bound;
00076
00077 for (Index j=FirstTriangular ? bound : 0;
00078 j<(FirstTriangular ? size : bound);j+=2)
00079 {
00080 register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
00081 register const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
00082
00083 Scalar t0 = cjAlpha * rhs[j];
00084 Packet ptmp0 = pset1<Packet>(t0);
00085 Scalar t1 = cjAlpha * rhs[j+1];
00086 Packet ptmp1 = pset1<Packet>(t1);
00087
00088 Scalar t2 = 0;
00089 Packet ptmp2 = pset1<Packet>(t2);
00090 Scalar t3 = 0;
00091 Packet ptmp3 = pset1<Packet>(t3);
00092
00093 size_t starti = FirstTriangular ? 0 : j+2;
00094 size_t endi = FirstTriangular ? j : size;
00095 size_t alignedStart = (starti) + first_aligned(&res[starti], endi-starti);
00096 size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
00097
00098
00099 res[j] += cjd.pmul(internal::real(A0[j]), t0);
00100 res[j+1] += cjd.pmul(internal::real(A1[j+1]), t1);
00101 if(FirstTriangular)
00102 {
00103 res[j] += cj0.pmul(A1[j], t1);
00104 t3 += cj1.pmul(A1[j], rhs[j]);
00105 }
00106 else
00107 {
00108 res[j+1] += cj0.pmul(A0[j+1],t0);
00109 t2 += cj1.pmul(A0[j+1], rhs[j+1]);
00110 }
00111
00112 for (size_t i=starti; i<alignedStart; ++i)
00113 {
00114 res[i] += t0 * A0[i] + t1 * A1[i];
00115 t2 += conj(A0[i]) * rhs[i];
00116 t3 += conj(A1[i]) * rhs[i];
00117 }
00118
00119
00120 const Scalar* EIGEN_RESTRICT a0It = A0 + alignedStart;
00121 const Scalar* EIGEN_RESTRICT a1It = A1 + alignedStart;
00122 const Scalar* EIGEN_RESTRICT rhsIt = rhs + alignedStart;
00123 Scalar* EIGEN_RESTRICT resIt = res + alignedStart;
00124 for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize)
00125 {
00126 Packet A0i = ploadu<Packet>(a0It); a0It += PacketSize;
00127 Packet A1i = ploadu<Packet>(a1It); a1It += PacketSize;
00128 Packet Bi = ploadu<Packet>(rhsIt); rhsIt += PacketSize;
00129 Packet Xi = pload <Packet>(resIt);
00130
00131 Xi = pcj0.pmadd(A0i,ptmp0, pcj0.pmadd(A1i,ptmp1,Xi));
00132 ptmp2 = pcj1.pmadd(A0i, Bi, ptmp2);
00133 ptmp3 = pcj1.pmadd(A1i, Bi, ptmp3);
00134 pstore(resIt,Xi); resIt += PacketSize;
00135 }
00136 for (size_t i=alignedEnd; i<endi; i++)
00137 {
00138 res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i],t1);
00139 t2 += cj1.pmul(A0[i], rhs[i]);
00140 t3 += cj1.pmul(A1[i], rhs[i]);
00141 }
00142
00143 res[j] += alpha * (t2 + predux(ptmp2));
00144 res[j+1] += alpha * (t3 + predux(ptmp3));
00145 }
00146 for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++)
00147 {
00148 register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
00149
00150 Scalar t1 = cjAlpha * rhs[j];
00151 Scalar t2 = 0;
00152
00153 res[j] += cjd.pmul(internal::real(A0[j]), t1);
00154 for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++)
00155 {
00156 res[i] += cj0.pmul(A0[i], t1);
00157 t2 += cj1.pmul(A0[i], rhs[i]);
00158 }
00159 res[j] += alpha * t2;
00160 }
00161
00162 if(rhsIncr!=1)
00163 ei_aligned_stack_delete(Scalar, const_cast<Scalar*>(rhs), size);
00164 }
00165
00166 }
00167
00168
00169
00170
00171
00172 namespace internal {
00173 template<typename Lhs, int LhsMode, typename Rhs>
00174 struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> >
00175 : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs> >
00176 {};
00177 }
00178
00179 template<typename Lhs, int LhsMode, typename Rhs>
00180 struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
00181 : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs >
00182 {
00183 EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
00184
00185 enum {
00186 LhsUpLo = LhsMode&(Upper|Lower)
00187 };
00188
00189 SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
00190
00191 template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
00192 {
00193 eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
00194
00195 const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
00196 const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
00197
00198 Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
00199 * RhsBlasTraits::extractScalarFactor(m_rhs);
00200
00201 eigen_assert(dst.innerStride()==1 && "not implemented yet");
00202
00203 internal::product_selfadjoint_vector<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>
00204 (
00205 lhs.rows(),
00206 &lhs.coeffRef(0,0), lhs.outerStride(),
00207 &rhs.coeffRef(0), rhs.innerStride(),
00208 &dst.coeffRef(0),
00209 actualAlpha
00210 );
00211 }
00212 };
00213
00214 namespace internal {
00215 template<typename Lhs, typename Rhs, int RhsMode>
00216 struct traits<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> >
00217 : traits<ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs> >
00218 {};
00219 }
00220
00221 template<typename Lhs, typename Rhs, int RhsMode>
00222 struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>
00223 : public ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs >
00224 {
00225 EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
00226
00227 enum {
00228 RhsUpLo = RhsMode&(Upper|Lower)
00229 };
00230
00231 SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
00232
00233 template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
00234 {
00235 eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
00236
00237 const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
00238 const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
00239
00240 Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
00241 * RhsBlasTraits::extractScalarFactor(m_rhs);
00242
00243 eigen_assert(dst.innerStride()==1 && "not implemented yet");
00244
00245
00246 internal::product_selfadjoint_vector<Scalar, Index, (internal::traits<_ActualRhsType>::Flags&RowMajorBit) ? ColMajor : RowMajor, int(RhsUpLo)==Upper ? Lower : Upper,
00247 bool(RhsBlasTraits::NeedToConjugate), bool(LhsBlasTraits::NeedToConjugate)>
00248 (
00249 rhs.rows(),
00250 &rhs.coeffRef(0,0), rhs.outerStride(),
00251 &lhs.coeffRef(0), lhs.innerStride(),
00252 &dst.coeffRef(0),
00253 actualAlpha
00254 );
00255 }
00256 };
00257
00258
00259 #endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H