26 #ifndef __BeagleCPU4StateSSEImpl__
27 #define __BeagleCPU4StateSSEImpl__
30 #include "libhmsbeagle/config.h"
33 #include "libhmsbeagle/CPU/BeagleCPU4StateImpl.h"
37 #define RESTRICT __restrict
39 #define T_PAD_4_SSE_DEFAULT 2 // Pad transition matrix with 2 rows for SSE
40 #define P_PAD_4_SSE_DEFAULT 0 // Partials padding not needed for 4 states SSE
42 #define BEAGLE_CPU_4_SSE_FLOAT float, T_PAD, P_PAD
43 #define BEAGLE_CPU_4_SSE_DOUBLE double, T_PAD, P_PAD
44 #define BEAGLE_CPU_4_SSE_TEMPLATE template <int T_PAD, int P_PAD>
53 BEAGLE_CPU_4_SSE_TEMPLATE
75 virtual const char* getName();
77 virtual const long getFlags();
80 virtual int getPaddedPatternsModulus();
84 virtual void calcStatesStates(
float* destP,
86 const float* matrices1,
88 const float* matrices2);
90 virtual void calcStatesPartials(
float* destP,
92 const float* __restrict matrices1,
93 const float* __restrict partials2,
94 const float* __restrict matrices2);
96 virtual void calcStatesPartialsFixedScaling(
float* destP,
98 const float* __restrict matrices1,
99 const float* __restrict partials2,
100 const float* __restrict matrices2,
101 const float* __restrict scaleFactors);
103 virtual void calcPartialsPartials(
float* __restrict destP,
104 const float* __restrict partials1,
105 const float* __restrict matrices1,
106 const float* __restrict partials2,
107 const float* __restrict matrices2);
109 virtual void calcPartialsPartialsFixedScaling(
float* __restrict destP,
110 const float* __restrict child0Partials,
111 const float* __restrict child0TransMat,
112 const float* __restrict child1Partials,
113 const float* __restrict child1TransMat,
114 const float* __restrict scaleFactors);
116 virtual void calcPartialsPartialsAutoScaling(
float* __restrict destP,
117 const float* __restrict partials1,
118 const float* __restrict matrices1,
119 const float* __restrict partials2,
120 const float* __restrict matrices2,
121 int* activateScaling);
123 virtual int calcEdgeLogLikelihoods(
const int parentBufferIndex,
124 const int childBufferIndex,
125 const int probabilityIndex,
126 const int categoryWeightsIndex,
127 const int stateFrequenciesIndex,
128 const int scalingFactorsIndex,
129 double* outSumLogLikelihood);
134 BEAGLE_CPU_4_SSE_TEMPLATE
156 virtual const char* getName();
158 virtual const long getFlags();
161 virtual int getPaddedPatternsModulus();
165 virtual void calcStatesStates(
double* destP,
167 const double* matrices1,
169 const double* matrices2);
171 virtual void calcStatesPartials(
double* destP,
173 const double* __restrict matrices1,
174 const double* __restrict partials2,
175 const double* __restrict matrices2);
177 virtual void calcStatesPartialsFixedScaling(
double* destP,
179 const double* __restrict matrices1,
180 const double* __restrict partials2,
181 const double* __restrict matrices2,
182 const double* __restrict scaleFactors);
184 virtual void calcPartialsPartials(
double* __restrict destP,
185 const double* __restrict partials1,
186 const double* __restrict matrices1,
187 const double* __restrict partials2,
188 const double* __restrict matrices2);
190 virtual void calcPartialsPartialsFixedScaling(
double* __restrict destP,
191 const double* __restrict child0Partials,
192 const double* __restrict child0TransMat,
193 const double* __restrict child1Partials,
194 const double* __restrict child1TransMat,
195 const double* __restrict scaleFactors);
197 virtual void calcPartialsPartialsAutoScaling(
double* __restrict destP,
198 const double* __restrict partials1,
199 const double* __restrict matrices1,
200 const double* __restrict partials2,
201 const double* __restrict matrices2,
202 int* activateScaling);
204 virtual int calcEdgeLogLikelihoods(
const int parentBufferIndex,
205 const int childBufferIndex,
206 const int probabilityIndex,
207 const int categoryWeightsIndex,
208 const int stateFrequenciesIndex,
209 const int scalingFactorsIndex,
210 double* outSumLogLikelihood);
215 BEAGLE_CPU_FACTORY_TEMPLATE
219 int partialsBufferCount,
220 int compactBufferCount,
223 int eigenBufferCount,
224 int matrixBufferCount,
226 int scaleBufferCount,
228 long preferenceFlags,
229 long requirementFlags,
232 virtual const char* getName();
233 virtual const long getFlags();
240 #include "libhmsbeagle/CPU/BeagleCPU4StateSSEImpl.hpp"
243 #endif // __BeagleCPU4StateSSEImpl__