HMSBEAGLE  1.0.0
SSEDefinitions.h
1 /*
2  * BeagleCPU4StateSSEImpl.h
3  * BEAGLE
4  *
5  * Copyright 2009 Phylogenetic Likelihood Working Group
6  *
7  * This file is part of BEAGLE.
8  *
9  * BEAGLE is free software: you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser General Public License as
11  * published by the Free Software Foundation, either version 3 of
12  * the License, or (at your option) any later version.
13  *
14  * BEAGLE is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with BEAGLE. If not, see
21  * <http://www.gnu.org/licenses/>.
22  *
23  * @author Marc Suchard
24  */
25 
26 #ifndef __SSEDefinitions__
27 #define __SSEDefinitions__
28 
29 #ifdef HAVE_CONFIG_H
30 #include "libhmsbeagle/config.h"
31 #endif
32 
33 #define DLS_USE_SSE2
34 
35 #if defined(DLS_USE_SSE2)
36 # if !defined(DLS_MACOS)
37 # include <emmintrin.h>
38 # endif
39 # include <xmmintrin.h>
40 #endif
41 typedef double VecEl_t;
42 
43 #ifdef __GNUC__
44 #define ALIGN16 __attribute__((aligned(16)))
45 #else
46 #define ALIGN16 __declspec(align(16))
47 #endif
48 
49 #define USE_DOUBLE_PREC
50 #if defined(USE_DOUBLE_PREC)
51  typedef double RealType;
52  typedef __m128d V_Real;
53 # define REALS_PER_VEC 2 /* number of elements per vector */
54 # define VEC_LOAD(a) _mm_load_pd(a)
55 # define VEC_LOAD_SCALAR(a) _mm_load1_pd(a)
56 # define VEC_STORE(a, b) _mm_store_pd((a), (b))
57 # define VEC_STORE_SCALAR(a, b) _mm_store_sd((a), (b))
58 # define VEC_MULT(a, b) _mm_mul_pd((a), (b))
59 # define VEC_DIV(a, b) _mm_div_pd((a), (b))
60 # define VEC_MADD(a, b, c) _mm_add_pd(_mm_mul_pd((a), (b)), (c))
61 # define VEC_SPLAT(a) _mm_set1_pd(a)
62 # define VEC_ADD(a, b) _mm_add_pd(a, b)
63 # define VEC_SWAP(a) _mm_shuffle_pd(a, a, _MM_SHUFFLE2(0,1))
64 # define VEC_SETZERO() _mm_setzero_pd()
65 # define VEC_SET1(a) _mm_set_sd((a))
66 # define VEC_SET(a, b) _mm_set_pd((a), (b))
67 # define VEC_MOVE(a, b) _mm_move_sd((a), (b))
68 #else
69  typedef float RealType;
70  typedef __m128 V_Real;
71 # define REALS_PER_VEC 4 /* number of elements per vector */
72 # define VEC_MULT(a, b) _mm_mul_ps((a), (b))
73 # define VEC_MADD(a, b, c) _mm_add_ps(_mm_mul_ps((a), (b)), (c))
74 # define VEC_SPLAT(a) _mm_set1_ps(a)
75 # define VEC_ADD(a, b) _mm_add_ps(a, b)
76 #endif
77 typedef union /* for copying individual elements to and from vector floats */
78  {
79  RealType x[REALS_PER_VEC];
80  V_Real vx;
81  }
82  VecUnion;
83 
84 #ifdef __GNUC__
85  #define cpuid(func,ax,bx,cx,dx)\
86  __asm__ __volatile__ ("cpuid":\
87  "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (func));
88 #endif
89 
90 #ifdef _WIN32
91 
92 #endif
93 
94 int CPUSupportsSSE() {
95  //int a,b,c,d;
96  //cpuid(0,a,b,c,d);
97  //fprintf(stderr,"a = %d\nb = %d\nc = %d\nd = %d\n",a,b,c,d);
98  return 1;
99 }
100 
101 #endif // __SSEDefinitions__