Redux.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6 //
7 // Eigen is free software; you can redistribute it and/or
8 // modify it under the terms of the GNU Lesser General Public
9 // License as published by the Free Software Foundation; either
10 // version 3 of the License, or (at your option) any later version.
11 //
12 // Alternatively, you can redistribute it and/or
13 // modify it under the terms of the GNU General Public License as
14 // published by the Free Software Foundation; either version 2 of
15 // the License, or (at your option) any later version.
16 //
17 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
18 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
20 // GNU General Public License for more details.
21 //
22 // You should have received a copy of the GNU Lesser General Public
23 // License and a copy of the GNU General Public License along with
24 // Eigen. If not, see <http://www.gnu.org/licenses/>.
25 
26 #ifndef EIGEN_REDUX_H
27 #define EIGEN_REDUX_H
28 
29 namespace Eigen {
30 
31 namespace internal {
32 
33 // TODO
34 // * implement other kind of vectorization
35 // * factorize code
36 
37 /***************************************************************************
38 * Part 1 : the logic deciding a strategy for vectorization and unrolling
39 ***************************************************************************/
40 
41 template<typename Func, typename Derived>
42 struct redux_traits
43 {
44 public:
45  enum {
46  PacketSize = packet_traits<typename Derived::Scalar>::size,
47  InnerMaxSize = int(Derived::IsRowMajor)
48  ? Derived::MaxColsAtCompileTime
49  : Derived::MaxRowsAtCompileTime
50  };
51 
52  enum {
53  MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
54  && (functor_traits<Func>::PacketAccess),
55  MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit),
56  MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize
57  };
58 
59 public:
60  enum {
61  Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
62  : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
64  };
65 
66 public:
67  enum {
68  Cost = ( Derived::SizeAtCompileTime == Dynamic
69  || Derived::CoeffReadCost == Dynamic
70  || (Derived::SizeAtCompileTime!=1 && functor_traits<Func>::Cost == Dynamic)
71  ) ? Dynamic
72  : Derived::SizeAtCompileTime * Derived::CoeffReadCost
73  + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
74  UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
75  };
76 
77 public:
78  enum {
79  Unrolling = Cost != Dynamic && Cost <= UnrollingLimit
81  : NoUnrolling
82  };
83 };
84 
85 /***************************************************************************
86 * Part 2 : unrollers
87 ***************************************************************************/
88 
89 /*** no vectorization ***/
90 
91 template<typename Func, typename Derived, int Start, int Length>
92 struct redux_novec_unroller
93 {
94  enum {
95  HalfLength = Length/2
96  };
97 
98  typedef typename Derived::Scalar Scalar;
99 
100  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
101  {
102  return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
103  redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
104  }
105 };
106 
107 template<typename Func, typename Derived, int Start>
108 struct redux_novec_unroller<Func, Derived, Start, 1>
109 {
110  enum {
111  outer = Start / Derived::InnerSizeAtCompileTime,
112  inner = Start % Derived::InnerSizeAtCompileTime
113  };
114 
115  typedef typename Derived::Scalar Scalar;
116 
117  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
118  {
119  return mat.coeffByOuterInner(outer, inner);
120  }
121 };
122 
123 // This is actually dead code and will never be called. It is required
124 // to prevent false warnings regarding failed inlining though
125 // for 0 length run() will never be called at all.
126 template<typename Func, typename Derived, int Start>
127 struct redux_novec_unroller<Func, Derived, Start, 0>
128 {
129  typedef typename Derived::Scalar Scalar;
130  static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
131 };
132 
133 /*** vectorization ***/
134 
135 template<typename Func, typename Derived, int Start, int Length>
136 struct redux_vec_unroller
137 {
138  enum {
139  PacketSize = packet_traits<typename Derived::Scalar>::size,
140  HalfLength = Length/2
141  };
142 
143  typedef typename Derived::Scalar Scalar;
144  typedef typename packet_traits<Scalar>::type PacketScalar;
145 
146  static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
147  {
148  return func.packetOp(
149  redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
150  redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
151  }
152 };
153 
154 template<typename Func, typename Derived, int Start>
155 struct redux_vec_unroller<Func, Derived, Start, 1>
156 {
157  enum {
158  index = Start * packet_traits<typename Derived::Scalar>::size,
159  outer = index / int(Derived::InnerSizeAtCompileTime),
160  inner = index % int(Derived::InnerSizeAtCompileTime),
161  alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
162  };
163 
164  typedef typename Derived::Scalar Scalar;
165  typedef typename packet_traits<Scalar>::type PacketScalar;
166 
167  static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
168  {
169  return mat.template packetByOuterInner<alignment>(outer, inner);
170  }
171 };
172 
173 /***************************************************************************
174 * Part 3 : implementation of all cases
175 ***************************************************************************/
176 
177 template<typename Func, typename Derived,
178  int Traversal = redux_traits<Func, Derived>::Traversal,
179  int Unrolling = redux_traits<Func, Derived>::Unrolling
180 >
181 struct redux_impl;
182 
183 template<typename Func, typename Derived>
184 struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
185 {
186  typedef typename Derived::Scalar Scalar;
187  typedef typename Derived::Index Index;
188  static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
189  {
190  eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
191  Scalar res;
192  res = mat.coeffByOuterInner(0, 0);
193  for(Index i = 1; i < mat.innerSize(); ++i)
194  res = func(res, mat.coeffByOuterInner(0, i));
195  for(Index i = 1; i < mat.outerSize(); ++i)
196  for(Index j = 0; j < mat.innerSize(); ++j)
197  res = func(res, mat.coeffByOuterInner(i, j));
198  return res;
199  }
200 };
201 
202 template<typename Func, typename Derived>
203 struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
204  : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
205 {};
206 
207 template<typename Func, typename Derived>
208 struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
209 {
210  typedef typename Derived::Scalar Scalar;
211  typedef typename packet_traits<Scalar>::type PacketScalar;
212  typedef typename Derived::Index Index;
213 
214  static Scalar run(const Derived& mat, const Func& func)
215  {
216  const Index size = mat.size();
217  eigen_assert(size && "you are using an empty matrix");
218  const Index packetSize = packet_traits<Scalar>::size;
219  const Index alignedStart = internal::first_aligned(mat);
220  enum {
221  alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit)
222  ? Aligned : Unaligned
223  };
224  const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
225  const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
226  const Index alignedEnd2 = alignedStart + alignedSize2;
227  const Index alignedEnd = alignedStart + alignedSize;
228  Scalar res;
229  if(alignedSize)
230  {
231  PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart);
232  if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
233  {
234  PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize);
235  for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
236  {
237  packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index));
238  packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize));
239  }
240 
241  packet_res0 = func.packetOp(packet_res0,packet_res1);
242  if(alignedEnd>alignedEnd2)
243  packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2));
244  }
245  res = func.predux(packet_res0);
246 
247  for(Index index = 0; index < alignedStart; ++index)
248  res = func(res,mat.coeff(index));
249 
250  for(Index index = alignedEnd; index < size; ++index)
251  res = func(res,mat.coeff(index));
252  }
253  else // too small to vectorize anything.
254  // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
255  {
256  res = mat.coeff(0);
257  for(Index index = 1; index < size; ++index)
258  res = func(res,mat.coeff(index));
259  }
260 
261  return res;
262  }
263 };
264 
265 template<typename Func, typename Derived>
266 struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
267 {
268  typedef typename Derived::Scalar Scalar;
269  typedef typename packet_traits<Scalar>::type PacketScalar;
270  typedef typename Derived::Index Index;
271 
272  static Scalar run(const Derived& mat, const Func& func)
273  {
274  eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
275  const Index innerSize = mat.innerSize();
276  const Index outerSize = mat.outerSize();
277  enum {
278  packetSize = packet_traits<Scalar>::size
279  };
280  const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
281  Scalar res;
282  if(packetedInnerSize)
283  {
284  PacketScalar packet_res = mat.template packet<Unaligned>(0,0);
285  for(Index j=0; j<outerSize; ++j)
286  for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
287  packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned>(j,i));
288 
289  res = func.predux(packet_res);
290  for(Index j=0; j<outerSize; ++j)
291  for(Index i=packetedInnerSize; i<innerSize; ++i)
292  res = func(res, mat.coeffByOuterInner(j,i));
293  }
294  else // too small to vectorize anything.
295  // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
296  {
297  res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
298  }
299 
300  return res;
301  }
302 };
303 
304 template<typename Func, typename Derived>
305 struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
306 {
307  typedef typename Derived::Scalar Scalar;
308  typedef typename packet_traits<Scalar>::type PacketScalar;
309  enum {
310  PacketSize = packet_traits<Scalar>::size,
311  Size = Derived::SizeAtCompileTime,
312  VectorizedSize = (Size / PacketSize) * PacketSize
313  };
314  static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
315  {
316  eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
317  Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
318  if (VectorizedSize != Size)
319  res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
320  return res;
321  }
322 };
323 
324 } // end namespace internal
325 
326 /***************************************************************************
327 * Part 4 : public API
328 ***************************************************************************/
329 
330 
338 template<typename Derived>
339 template<typename Func>
340 EIGEN_STRONG_INLINE typename internal::result_of<Func(typename internal::traits<Derived>::Scalar)>::type
341 DenseBase<Derived>::redux(const Func& func) const
342 {
343  typedef typename internal::remove_all<typename Derived::Nested>::type ThisNested;
344  return internal::redux_impl<Func, ThisNested>
345  ::run(derived(), func);
346 }
347 
350 template<typename Derived>
351 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
353 {
354  return this->redux(Eigen::internal::scalar_min_op<Scalar>());
355 }
356 
359 template<typename Derived>
360 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
362 {
363  return this->redux(Eigen::internal::scalar_max_op<Scalar>());
364 }
365 
370 template<typename Derived>
371 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
373 {
374  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
375  return Scalar(0);
376  return this->redux(Eigen::internal::scalar_sum_op<Scalar>());
377 }
378 
383 template<typename Derived>
384 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
386 {
387  return Scalar(this->redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
388 }
389 
397 template<typename Derived>
398 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
400 {
401  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
402  return Scalar(1);
403  return this->redux(Eigen::internal::scalar_product_op<Scalar>());
404 }
405 
412 template<typename Derived>
413 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
415 {
416  return derived().diagonal().sum();
417 }
418 
419 } // end namespace Eigen
420 
421 #endif // EIGEN_REDUX_H