SelfadjointMatrixMatrix.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
5 //
6 // Eigen is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU Lesser General Public
8 // License as published by the Free Software Foundation; either
9 // version 3 of the License, or (at your option) any later version.
10 //
11 // Alternatively, you can redistribute it and/or
12 // modify it under the terms of the GNU General Public License as
13 // published by the Free Software Foundation; either version 2 of
14 // the License, or (at your option) any later version.
15 //
16 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
17 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
19 // GNU General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public
22 // License and a copy of the GNU General Public License along with
23 // Eigen. If not, see <http://www.gnu.org/licenses/>.
24 
25 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
26 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
27 
28 namespace Eigen {
29 
30 namespace internal {
31 
32 // pack a selfadjoint block diagonal for use with the gebp_kernel
33 template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder>
34 struct symm_pack_lhs
35 {
36  template<int BlockRows> inline
37  void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
38  {
39  // normal copy
40  for(Index k=0; k<i; k++)
41  for(Index w=0; w<BlockRows; w++)
42  blockA[count++] = lhs(i+w,k); // normal
43  // symmetric copy
44  Index h = 0;
45  for(Index k=i; k<i+BlockRows; k++)
46  {
47  for(Index w=0; w<h; w++)
48  blockA[count++] = conj(lhs(k, i+w)); // transposed
49 
50  blockA[count++] = real(lhs(k,k)); // real (diagonal)
51 
52  for(Index w=h+1; w<BlockRows; w++)
53  blockA[count++] = lhs(i+w, k); // normal
54  ++h;
55  }
56  // transposed copy
57  for(Index k=i+BlockRows; k<cols; k++)
58  for(Index w=0; w<BlockRows; w++)
59  blockA[count++] = conj(lhs(k, i+w)); // transposed
60  }
61  void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
62  {
63  const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
64  Index count = 0;
65  Index peeled_mc = (rows/Pack1)*Pack1;
66  for(Index i=0; i<peeled_mc; i+=Pack1)
67  {
68  pack<Pack1>(blockA, lhs, cols, i, count);
69  }
70 
71  if(rows-peeled_mc>=Pack2)
72  {
73  pack<Pack2>(blockA, lhs, cols, peeled_mc, count);
74  peeled_mc += Pack2;
75  }
76 
77  // do the same with mr==1
78  for(Index i=peeled_mc; i<rows; i++)
79  {
80  for(Index k=0; k<i; k++)
81  blockA[count++] = lhs(i, k); // normal
82 
83  blockA[count++] = real(lhs(i, i)); // real (diagonal)
84 
85  for(Index k=i+1; k<cols; k++)
86  blockA[count++] = conj(lhs(k, i)); // transposed
87  }
88  }
89 };
90 
91 template<typename Scalar, typename Index, int nr, int StorageOrder>
92 struct symm_pack_rhs
93 {
94  enum { PacketSize = packet_traits<Scalar>::size };
95  void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
96  {
97  Index end_k = k2 + rows;
98  Index count = 0;
99  const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
100  Index packet_cols = (cols/nr)*nr;
101 
102  // first part: normal case
103  for(Index j2=0; j2<k2; j2+=nr)
104  {
105  for(Index k=k2; k<end_k; k++)
106  {
107  blockB[count+0] = rhs(k,j2+0);
108  blockB[count+1] = rhs(k,j2+1);
109  if (nr==4)
110  {
111  blockB[count+2] = rhs(k,j2+2);
112  blockB[count+3] = rhs(k,j2+3);
113  }
114  count += nr;
115  }
116  }
117 
118  // second part: diagonal block
119  for(Index j2=k2; j2<(std::min)(k2+rows,packet_cols); j2+=nr)
120  {
121  // again we can split vertically in three different parts (transpose, symmetric, normal)
122  // transpose
123  for(Index k=k2; k<j2; k++)
124  {
125  blockB[count+0] = conj(rhs(j2+0,k));
126  blockB[count+1] = conj(rhs(j2+1,k));
127  if (nr==4)
128  {
129  blockB[count+2] = conj(rhs(j2+2,k));
130  blockB[count+3] = conj(rhs(j2+3,k));
131  }
132  count += nr;
133  }
134  // symmetric
135  Index h = 0;
136  for(Index k=j2; k<j2+nr; k++)
137  {
138  // normal
139  for (Index w=0 ; w<h; ++w)
140  blockB[count+w] = rhs(k,j2+w);
141 
142  blockB[count+h] = real(rhs(k,k));
143 
144  // transpose
145  for (Index w=h+1 ; w<nr; ++w)
146  blockB[count+w] = conj(rhs(j2+w,k));
147  count += nr;
148  ++h;
149  }
150  // normal
151  for(Index k=j2+nr; k<end_k; k++)
152  {
153  blockB[count+0] = rhs(k,j2+0);
154  blockB[count+1] = rhs(k,j2+1);
155  if (nr==4)
156  {
157  blockB[count+2] = rhs(k,j2+2);
158  blockB[count+3] = rhs(k,j2+3);
159  }
160  count += nr;
161  }
162  }
163 
164  // third part: transposed
165  for(Index j2=k2+rows; j2<packet_cols; j2+=nr)
166  {
167  for(Index k=k2; k<end_k; k++)
168  {
169  blockB[count+0] = conj(rhs(j2+0,k));
170  blockB[count+1] = conj(rhs(j2+1,k));
171  if (nr==4)
172  {
173  blockB[count+2] = conj(rhs(j2+2,k));
174  blockB[count+3] = conj(rhs(j2+3,k));
175  }
176  count += nr;
177  }
178  }
179 
180  // copy the remaining columns one at a time (=> the same with nr==1)
181  for(Index j2=packet_cols; j2<cols; ++j2)
182  {
183  // transpose
184  Index half = (std::min)(end_k,j2);
185  for(Index k=k2; k<half; k++)
186  {
187  blockB[count] = conj(rhs(j2,k));
188  count += 1;
189  }
190 
191  if(half==j2 && half<k2+rows)
192  {
193  blockB[count] = real(rhs(j2,j2));
194  count += 1;
195  }
196  else
197  half--;
198 
199  // normal
200  for(Index k=half+1; k<k2+rows; k++)
201  {
202  blockB[count] = rhs(k,j2);
203  count += 1;
204  }
205  }
206  }
207 };
208 
209 /* Optimized selfadjoint matrix * matrix (_SYMM) product built on top of
210  * the general matrix matrix product.
211  */
212 template <typename Scalar, typename Index,
213  int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
214  int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
215  int ResStorageOrder>
216 struct product_selfadjoint_matrix;
217 
218 template <typename Scalar, typename Index,
219  int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
220  int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs>
221 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor>
222 {
223 
224  static EIGEN_STRONG_INLINE void run(
225  Index rows, Index cols,
226  const Scalar* lhs, Index lhsStride,
227  const Scalar* rhs, Index rhsStride,
228  Scalar* res, Index resStride,
229  Scalar alpha)
230  {
231  product_selfadjoint_matrix<Scalar, Index,
232  EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
233  RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
234  EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
235  LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
236  ColMajor>
237  ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha);
238  }
239 };
240 
241 template <typename Scalar, typename Index,
242  int LhsStorageOrder, bool ConjugateLhs,
243  int RhsStorageOrder, bool ConjugateRhs>
244 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>
245 {
246 
247  static EIGEN_DONT_INLINE void run(
248  Index rows, Index cols,
249  const Scalar* _lhs, Index lhsStride,
250  const Scalar* _rhs, Index rhsStride,
251  Scalar* res, Index resStride,
252  Scalar alpha)
253  {
254  Index size = rows;
255 
256  const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
257  const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
258 
259  typedef gebp_traits<Scalar,Scalar> Traits;
260 
261  Index kc = size; // cache block size along the K direction
262  Index mc = rows; // cache block size along the M direction
263  Index nc = cols; // cache block size along the N direction
264  computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
265  // kc must smaller than mc
266  kc = (std::min)(kc,mc);
267 
268  std::size_t sizeW = kc*Traits::WorkSpaceFactor;
269  std::size_t sizeB = sizeW + kc*cols;
270  ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
271  ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
272  Scalar* blockB = allocatedBlockB + sizeW;
273 
274  gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
275  symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
276  gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
277  gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
278 
279  for(Index k2=0; k2<size; k2+=kc)
280  {
281  const Index actual_kc = (std::min)(k2+kc,size)-k2;
282 
283  // we have selected one row panel of rhs and one column panel of lhs
284  // pack rhs's panel into a sequential chunk of memory
285  // and expand each coeff to a constant packet for further reuse
286  pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
287 
288  // the select lhs's panel has to be split in three different parts:
289  // 1 - the transposed panel above the diagonal block => transposed packed copy
290  // 2 - the diagonal block => special packed copy
291  // 3 - the panel below the diagonal block => generic packed copy
292  for(Index i2=0; i2<k2; i2+=mc)
293  {
294  const Index actual_mc = (std::min)(i2+mc,k2)-i2;
295  // transposed packed copy
296  pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc);
297 
298  gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
299  }
300  // the block diagonal
301  {
302  const Index actual_mc = (std::min)(k2+kc,size)-k2;
303  // symmetric packed copy
304  pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
305 
306  gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
307  }
308 
309  for(Index i2=k2+kc; i2<size; i2+=mc)
310  {
311  const Index actual_mc = (std::min)(i2+mc,size)-i2;
312  gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
313  (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
314 
315  gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
316  }
317  }
318  }
319 };
320 
321 // matrix * selfadjoint product
322 template <typename Scalar, typename Index,
323  int LhsStorageOrder, bool ConjugateLhs,
324  int RhsStorageOrder, bool ConjugateRhs>
325 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>
326 {
327 
328  static EIGEN_DONT_INLINE void run(
329  Index rows, Index cols,
330  const Scalar* _lhs, Index lhsStride,
331  const Scalar* _rhs, Index rhsStride,
332  Scalar* res, Index resStride,
333  Scalar alpha)
334  {
335  Index size = cols;
336 
337  const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
338 
339  typedef gebp_traits<Scalar,Scalar> Traits;
340 
341  Index kc = size; // cache block size along the K direction
342  Index mc = rows; // cache block size along the M direction
343  Index nc = cols; // cache block size along the N direction
344  computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
345  std::size_t sizeW = kc*Traits::WorkSpaceFactor;
346  std::size_t sizeB = sizeW + kc*cols;
347  ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
348  ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
349  Scalar* blockB = allocatedBlockB + sizeW;
350 
351  gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
352  gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
353  symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
354 
355  for(Index k2=0; k2<size; k2+=kc)
356  {
357  const Index actual_kc = (std::min)(k2+kc,size)-k2;
358 
359  pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
360 
361  // => GEPP
362  for(Index i2=0; i2<rows; i2+=mc)
363  {
364  const Index actual_mc = (std::min)(i2+mc,rows)-i2;
365  pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
366 
367  gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
368  }
369  }
370  }
371 };
372 
373 } // end namespace internal
374 
375 /***************************************************************************
376 * Wrapper to product_selfadjoint_matrix
377 ***************************************************************************/
378 
379 namespace internal {
380 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
381 struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> >
382  : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> >
383 {};
384 }
385 
386 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
387 struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
388  : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs >
389 {
390  EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
391 
392  SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
393 
394  enum {
395  LhsIsUpper = (LhsMode&(Upper|Lower))==Upper,
396  LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint,
397  RhsIsUpper = (RhsMode&(Upper|Lower))==Upper,
398  RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
399  };
400 
401  template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
402  {
403  eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
404 
405  typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
406  typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
407 
408  Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
409  * RhsBlasTraits::extractScalarFactor(m_rhs);
410 
411  internal::product_selfadjoint_matrix<Scalar, Index,
412  EIGEN_LOGICAL_XOR(LhsIsUpper,
413  internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
414  NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
415  EIGEN_LOGICAL_XOR(RhsIsUpper,
416  internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
417  NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
418  internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor>
419  ::run(
420  lhs.rows(), rhs.cols(), // sizes
421  &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
422  &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
423  &dst.coeffRef(0,0), dst.outerStride(), // result info
424  actualAlpha // alpha
425  );
426  }
427 };
428 
429 } // end namespace Eigen
430 
431 #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H