25 #ifndef EIGEN_PACKET_MATH_ALTIVEC_H
26 #define EIGEN_PACKET_MATH_ALTIVEC_H
32 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
33 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
36 #ifndef EIGEN_HAS_FUSE_CJMADD
37 #define EIGEN_HAS_FUSE_CJMADD 1
41 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
42 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
55 #define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
56 Packet4f p4f_##NAME = (Packet4f) vec_splat_s32(X)
58 #define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
59 Packet4i p4i_##NAME = vec_splat_s32(X)
61 #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
62 Packet4f p4f_##NAME = pset1<Packet4f>(X)
64 #define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
65 Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int>(X))
67 #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
68 Packet4i p4i_##NAME = pset1<Packet4i>(X)
71 #define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride))
74 static Packet4f p4f_COUNTDOWN = { 3.0, 2.0, 1.0, 0.0 };
75 static Packet4i p4i_COUNTDOWN = { 3, 2, 1, 0 };
76 static Packet16uc p16uc_REVERSE = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
77 static Packet16uc p16uc_FORWARD = vec_lvsl(0, (
float*)0);
78 static Packet16uc p16uc_DUPLICATE = {0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7};
85 static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0);
88 template<>
struct packet_traits<float> : default_packet_traits
104 template<>
struct packet_traits<
int> : default_packet_traits
115 template<>
struct unpacket_traits<
Packet4f> {
typedef float type;
enum {size=4}; };
116 template<>
struct unpacket_traits<
Packet4i> {
typedef int type;
enum {size=4}; };
167 vc = vec_splat(vc, 0);
175 vc = vec_splat(vc, 0);
236 t = vec_nmsub(y_0, b, p4f_ONE);
237 y_1 = vec_madd(y_0, t, y_0);
239 res = vec_madd(a, y_1, p4f_ZERO);
244 {
eigen_assert(
false &&
"packet integer division are not supported by AltiVec");
280 MSQ = vec_ld(0, (
unsigned char *)from);
281 LSQ = vec_ld(15, (
unsigned char *)from);
282 mask = vec_lvsl(0, from);
283 return (
Packet4f) vec_perm(MSQ, LSQ, mask);
292 MSQ = vec_ld(0, (
unsigned char *)from);
293 LSQ = vec_ld(15, (
unsigned char *)from);
294 mask = vec_lvsl(0, from);
295 return (
Packet4i) vec_perm(MSQ, LSQ, mask);
303 return vec_perm(p, p, p16uc_DUPLICATE);
310 return vec_perm(p, p, p16uc_DUPLICATE);
324 MSQ = vec_ld(0, (
unsigned char *)to);
325 LSQ = vec_ld(15, (
unsigned char *)to);
326 edgeAlign = vec_lvsl(0, to);
327 edges=vec_perm(LSQ,MSQ,edgeAlign);
328 align = vec_lvsr( 0, to );
331 vec_st( LSQ, 15, (
unsigned char *)to );
332 vec_st( MSQ, 0, (
unsigned char *)to );
342 MSQ = vec_ld(0, (
unsigned char *)to);
343 LSQ = vec_ld(15, (
unsigned char *)to);
344 edgeAlign = vec_lvsl(0, to);
345 edges=vec_perm(LSQ, MSQ, edgeAlign);
346 align = vec_lvsr( 0, to );
347 MSQ = vec_perm(edges, (
Packet16uc) from, align);
348 LSQ = vec_perm((
Packet16uc) from, edges, align);
349 vec_st( LSQ, 15, (
unsigned char *)to );
350 vec_st( MSQ, 0, (
unsigned char *)to );
370 b = (
Packet4f) vec_sld(sum, sum, 4);
371 sum = vec_add(sum, b);
382 v[0] = vec_mergeh(vecs[0], vecs[2]);
383 v[1] = vec_mergel(vecs[0], vecs[2]);
384 v[2] = vec_mergeh(vecs[1], vecs[3]);
385 v[3] = vec_mergel(vecs[1], vecs[3]);
387 sum[0] = vec_mergeh(v[0], v[2]);
388 sum[1] = vec_mergel(v[0], v[2]);
389 sum[2] = vec_mergeh(v[1], v[3]);
390 sum[3] = vec_mergel(v[1], v[3]);
394 sum[0] = vec_add(sum[0], sum[1]);
396 sum[1] = vec_add(sum[2], sum[3]);
398 sum[0] = vec_add(sum[0], sum[1]);
406 sum = vec_sums(a, p4i_ZERO);
407 sum = vec_sld(sum, p4i_ZERO, 12);
418 v[0] = vec_mergeh(vecs[0], vecs[2]);
419 v[1] = vec_mergel(vecs[0], vecs[2]);
420 v[2] = vec_mergeh(vecs[1], vecs[3]);
421 v[3] = vec_mergel(vecs[1], vecs[3]);
423 sum[0] = vec_mergeh(v[0], v[2]);
424 sum[1] = vec_mergel(v[0], v[2]);
425 sum[2] = vec_mergeh(v[1], v[3]);
426 sum[3] = vec_mergel(v[1], v[3]);
430 sum[0] = vec_add(sum[0], sum[1]);
432 sum[1] = vec_add(sum[2], sum[3]);
434 sum[0] = vec_add(sum[0], sum[1]);
452 return aux[0] * aux[1] * aux[2] * aux[3];
459 b = vec_min(a, vec_sld(a, a, 8));
460 res = vec_min(b, vec_sld(b, b, 4));
467 b = vec_min(a, vec_sld(a, a, 8));
468 res = vec_min(b, vec_sld(b, b, 4));
476 b = vec_max(a, vec_sld(a, a, 8));
477 res = vec_max(b, vec_sld(b, b, 4));
484 b = vec_max(a, vec_sld(a, a, 8));
485 res = vec_max(b, vec_sld(b, b, 4));
495 first = vec_sld(first, second, Offset*4);
505 first = vec_sld(first, second, Offset*4);
513 #endif // EIGEN_PACKET_MATH_ALTIVEC_H