31 #ifndef BEAGLE_CPU_4STATE_IMPL_HPP
32 #define BEAGLE_CPU_4STATE_IMPL_HPP
35 #include "libhmsbeagle/config.h"
45 #include "libhmsbeagle/beagle.h"
46 #include "libhmsbeagle/CPU/BeagleCPUImpl.h"
47 #include "libhmsbeagle/CPU/BeagleCPU4StateImpl.h"
49 #define EXPERIMENTAL_OPENMP
52 #define OFFSET (4 + T_PAD) // For easy conversion between 4/5
54 #define PREFETCH_MATRIX(num,matrices,w) \
55 REALTYPE m##num##00, m##num##01, m##num##02, m##num##03, \
56 m##num##10, m##num##11, m##num##12, m##num##13, \
57 m##num##20, m##num##21, m##num##22, m##num##23, \
58 m##num##30, m##num##31, m##num##32, m##num##33; \
59 m##num##00 = matrices[w + OFFSET*0 + 0]; \
60 m##num##01 = matrices[w + OFFSET*0 + 1]; \
61 m##num##02 = matrices[w + OFFSET*0 + 2]; \
62 m##num##03 = matrices[w + OFFSET*0 + 3]; \
63 m##num##10 = matrices[w + OFFSET*1 + 0]; \
64 m##num##11 = matrices[w + OFFSET*1 + 1]; \
65 m##num##12 = matrices[w + OFFSET*1 + 2]; \
66 m##num##13 = matrices[w + OFFSET*1 + 3]; \
67 m##num##20 = matrices[w + OFFSET*2 + 0]; \
68 m##num##21 = matrices[w + OFFSET*2 + 1]; \
69 m##num##22 = matrices[w + OFFSET*2 + 2]; \
70 m##num##23 = matrices[w + OFFSET*2 + 3]; \
71 m##num##30 = matrices[w + OFFSET*3 + 0]; \
72 m##num##31 = matrices[w + OFFSET*3 + 1]; \
73 m##num##32 = matrices[w + OFFSET*3 + 2]; \
74 m##num##33 = matrices[w + OFFSET*3 + 3];
76 #define PREFETCH_PARTIALS(num,partials,v) \
77 REALTYPE p##num##0, p##num##1, p##num##2, p##num##3; \
78 p##num##0 = partials[v + 0]; \
79 p##num##1 = partials[v + 1]; \
80 p##num##2 = partials[v + 2]; \
81 p##num##3 = partials[v + 3];
105 #define DO_INTEGRATION(num) \
106 REALTYPE sum##num##0, sum##num##1, sum##num##2, sum##num##3; \
107 sum##num##0 = m##num##00 * p##num##0 + \
108 m##num##01 * p##num##1 + \
109 m##num##02 * p##num##2 + \
110 m##num##03 * p##num##3; \
112 sum##num##1 = m##num##10 * p##num##0 + \
113 m##num##11 * p##num##1 + \
114 m##num##12 * p##num##2 + \
115 m##num##13 * p##num##3; \
117 sum##num##2 = m##num##20 * p##num##0 + \
118 m##num##21 * p##num##1 + \
119 m##num##22 * p##num##2 + \
120 m##num##23 * p##num##3; \
122 sum##num##3 = m##num##30 * p##num##0 + \
123 m##num##31 * p##num##1 + \
124 m##num##32 * p##num##2 + \
125 m##num##33 * p##num##3;
131 BEAGLE_CPU_FACTORY_TEMPLATE
132 inline const char* getBeagleCPU4StateName(){
return "CPU-4State-Unknown"; };
135 inline const char* getBeagleCPU4StateName<double>(){
return "CPU-4State-Double"; };
138 inline const char* getBeagleCPU4StateName<float>(){
return "CPU-4State-Single"; };
141 BeagleCPU4StateImpl<BEAGLE_CPU_GENERIC>::~BeagleCPU4StateImpl() {
154 void BeagleCPU4StateImpl<BEAGLE_CPU_GENERIC>::calcStatesStates(REALTYPE* destP,
156 const REALTYPE* matrices1,
158 const REALTYPE* matrices2) {
160 #pragma omp parallel for num_threads(kCategoryCount)
161 for (
int l = 0; l < kCategoryCount; l++) {
162 int v = l*4*kPaddedPatternCount;
165 for (
int k = 0; k < kPatternCount; k++) {
167 const int state1 = states1[k];
168 const int state2 = states2[k];
170 destP[v ] = matrices1[w + state1] *
171 matrices2[w + state2];
172 destP[v + 1] = matrices1[w + OFFSET*1 + state1] *
173 matrices2[w + OFFSET*1 + state2];
174 destP[v + 2] = matrices1[w + OFFSET*2 + state1] *
175 matrices2[w + OFFSET*2 + state2];
176 destP[v + 3] = matrices1[w + OFFSET*3 + state1] *
177 matrices2[w + OFFSET*3 + state2];
184 void BeagleCPU4StateImpl<BEAGLE_CPU_GENERIC>::calcStatesStatesFixedScaling(REALTYPE* destP,
186 const REALTYPE* matrices1,
188 const REALTYPE* matrices2,
189 const REALTYPE* scaleFactors) {
191 #pragma omp parallel for num_threads(kCategoryCount)
192 for (
int l = 0; l < kCategoryCount; l++) {
193 int v = l*4*kPaddedPatternCount;
196 for (
int k = 0; k < kPatternCount; k++) {
198 const int state1 = states1[k];
199 const int state2 = states2[k];
200 const REALTYPE scaleFactor = scaleFactors[k];
202 destP[v ] = matrices1[w + state1] *
203 matrices2[w + state2] / scaleFactor;
204 destP[v + 1] = matrices1[w + OFFSET*1 + state1] *
205 matrices2[w + OFFSET*1 + state2] / scaleFactor;
206 destP[v + 2] = matrices1[w + OFFSET*2 + state1] *
207 matrices2[w + OFFSET*2 + state2] / scaleFactor;
208 destP[v + 3] = matrices1[w + OFFSET*3 + state1] *
209 matrices2[w + OFFSET*3 + state2] / scaleFactor;
219 void BeagleCPU4StateImpl<BEAGLE_CPU_GENERIC>::calcStatesPartials(REALTYPE* destP,
221 const REALTYPE* matrices1,
222 const REALTYPE* partials2,
223 const REALTYPE* matrices2) {
225 #pragma omp parallel for num_threads(kCategoryCount)
226 for (
int l = 0; l < kCategoryCount; l++) {
227 int u = l*4*kPaddedPatternCount;
230 PREFETCH_MATRIX(2,matrices2,w);
232 for (
int k = 0; k < kPatternCount; k++) {
234 const int state1 = states1[k];
236 PREFETCH_PARTIALS(2,partials2,u);
240 destP[u ] = matrices1[w + state1] * sum20;
241 destP[u + 1] = matrices1[w + OFFSET*1 + state1] * sum21;
242 destP[u + 2] = matrices1[w + OFFSET*2 + state1] * sum22;
243 destP[u + 3] = matrices1[w + OFFSET*3 + state1] * sum23;
251 void BeagleCPU4StateImpl<BEAGLE_CPU_GENERIC>::calcStatesPartialsFixedScaling(REALTYPE* destP,
253 const REALTYPE* matrices1,
254 const REALTYPE* partials2,
255 const REALTYPE* matrices2,
256 const REALTYPE* scaleFactors) {
258 #pragma omp parallel for num_threads(kCategoryCount)
259 for (
int l = 0; l < kCategoryCount; l++) {
260 int u = l*4*kPaddedPatternCount;
263 PREFETCH_MATRIX(2,matrices2,w);
265 for (
int k = 0; k < kPatternCount; k++) {
267 const int state1 = states1[k];
268 const REALTYPE scaleFactor = scaleFactors[k];
270 PREFETCH_PARTIALS(2,partials2,u);
274 destP[u ] = matrices1[w + state1] * sum20 / scaleFactor;
275 destP[u + 1] = matrices1[w + OFFSET*1 + state1] * sum21 / scaleFactor;
276 destP[u + 2] = matrices1[w + OFFSET*2 + state1] * sum22 / scaleFactor;
277 destP[u + 3] = matrices1[w + OFFSET*3 + state1] * sum23 / scaleFactor;
285 void BeagleCPU4StateImpl<BEAGLE_CPU_GENERIC>::calcPartialsPartials(REALTYPE* destP,
286 const REALTYPE* partials1,
287 const REALTYPE* matrices1,
288 const REALTYPE* partials2,
289 const REALTYPE* matrices2) {
292 #pragma omp parallel for num_threads(kCategoryCount)
293 for (
int l = 0; l < kCategoryCount; l++) {
294 int u = l*4*kPaddedPatternCount;
297 PREFETCH_MATRIX(1,matrices1,w);
298 PREFETCH_MATRIX(2,matrices2,w);
299 for (
int k = 0; k < kPatternCount; k++) {
300 PREFETCH_PARTIALS(1,partials1,u);
301 PREFETCH_PARTIALS(2,partials2,u);
307 destP[u ] = sum10 * sum20;
308 destP[u + 1] = sum11 * sum21;
309 destP[u + 2] = sum12 * sum22;
310 destP[u + 3] = sum13 * sum23;
319 void BeagleCPU4StateImpl<BEAGLE_CPU_GENERIC>::calcPartialsPartialsAutoScaling(REALTYPE* destP,
320 const REALTYPE* partials1,
321 const REALTYPE* matrices1,
322 const REALTYPE* partials2,
323 const REALTYPE* matrices2,
324 int* activateScaling) {
327 #pragma omp parallel for num_threads(kCategoryCount)
328 for (
int l = 0; l < kCategoryCount; l++) {
329 int u = l*4*kPaddedPatternCount;
332 PREFETCH_MATRIX(1,matrices1,w);
333 PREFETCH_MATRIX(2,matrices2,w);
334 for (
int k = 0; k < kPatternCount; k++) {
335 PREFETCH_PARTIALS(1,partials1,u);
336 PREFETCH_PARTIALS(2,partials2,u);
342 destP[u ] = sum10 * sum20;
343 destP[u + 1] = sum11 * sum21;
344 destP[u + 2] = sum12 * sum22;
345 destP[u + 3] = sum13 * sum23;
347 if (*activateScaling == 0) {
350 frexp(destP[u], &expMax);
351 frexp(destP[u + 1], &expTmp);
352 if (abs(expTmp) > abs(expMax))
354 frexp(destP[u + 2], &expTmp);
355 if (abs(expTmp) > abs(expMax))
357 frexp(destP[u + 3], &expTmp);
358 if (abs(expTmp) > abs(expMax))
361 if(abs(expMax) > scalingExponentThreshhold) {
362 *activateScaling = 1;
374 void BeagleCPU4StateImpl<BEAGLE_CPU_GENERIC>::calcPartialsPartialsFixedScaling(REALTYPE* destP,
375 const REALTYPE* partials1,
376 const REALTYPE* matrices1,
377 const REALTYPE* partials2,
378 const REALTYPE* matrices2,
379 const REALTYPE* scaleFactors) {
381 #pragma omp parallel for num_threads(kCategoryCount)
382 for (
int l = 0; l < kCategoryCount; l++) {
383 int u = l*4*kPaddedPatternCount;
386 PREFETCH_MATRIX(1,matrices1,w);
387 PREFETCH_MATRIX(2,matrices2,w);
389 for (
int k = 0; k < kPatternCount; k++) {
392 const REALTYPE scaleFactor = scaleFactors[k];
394 PREFETCH_PARTIALS(1,partials1,u);
395 PREFETCH_PARTIALS(2,partials2,u);
401 destP[u ] = sum10 * sum20 / scaleFactor;
402 destP[u + 1] = sum11 * sum21 / scaleFactor;
403 destP[u + 2] = sum12 * sum22 / scaleFactor;
404 destP[u + 3] = sum13 * sum23 / scaleFactor;
412 int inline BeagleCPU4StateImpl<BEAGLE_CPU_GENERIC>::integrateOutStatesAndScale(
const REALTYPE* integrationTmp,
413 const int stateFrequenciesIndex,
414 const int scalingFactorsIndex,
415 double* outSumLogLikelihood) {
417 int returnCode = BEAGLE_SUCCESS;
419 register REALTYPE freq0, freq1, freq2, freq3;
420 freq0 = gStateFrequencies[stateFrequenciesIndex][0];
421 freq1 = gStateFrequencies[stateFrequenciesIndex][1];
422 freq2 = gStateFrequencies[stateFrequenciesIndex][2];
423 freq3 = gStateFrequencies[stateFrequenciesIndex][3];
426 for(
int k = 0; k < kPatternCount; k++) {
428 freq0 * integrationTmp[u ] +
429 freq1 * integrationTmp[u + 1] +
430 freq2 * integrationTmp[u + 2] +
431 freq3 * integrationTmp[u + 3];
435 outLogLikelihoodsTmp[k] = log(sumOverI);
438 if (scalingFactorsIndex != BEAGLE_OP_NONE) {
439 const REALTYPE* scalingFactors = gScaleBuffers[scalingFactorsIndex];
440 for(
int k=0; k < kPatternCount; k++) {
441 outLogLikelihoodsTmp[k] += scalingFactors[k];
445 *outSumLogLikelihood = 0.0;
446 for(
int k=0; k < kPatternCount; k++) {
447 *outSumLogLikelihood += outLogLikelihoodsTmp[k] * gPatternWeights[k];
450 if (!(*outSumLogLikelihood - *outSumLogLikelihood == 0.0))
451 returnCode = BEAGLE_ERROR_FLOATING_POINT;
457 #define FAST_MAX(x,y) (x > y ? x : y)
463 void BeagleCPU4StateImpl<BEAGLE_CPU_GENERIC>::rescalePartials(REALTYPE* destP,
464 REALTYPE* scaleFactors,
465 REALTYPE* cumulativeScaleFactors,
466 const int fillWithOnes) {
468 bool useLogScalars = kFlags & BEAGLE_FLAG_SCALERS_LOG;
470 for (
int k = 0; k < kPatternCount; k++) {
472 const int patternOffset = k * 4;
473 for (
int l = 0; l < kCategoryCount; l++) {
474 int offset = l * kPaddedPatternCount * 4 + patternOffset;
476 #ifdef BEAGLE_TEST_OPTIMIZATION
477 REALTYPE max01 = FAST_MAX(destP[offset + 0], destP[offset + 1]);
478 REALTYPE max23 = FAST_MAX(destP[offset + 2], destP[offset + 3]);
479 max = FAST_MAX(max, max01);
480 max = FAST_MAX(max, max23);
483 for (
int i = 0; i < 4; i++) {
484 if(destP[offset] > max)
494 REALTYPE oneOverMax = REALTYPE(1.0) / max;
495 for (
int l = 0; l < kCategoryCount; l++) {
496 int offset = l * kPaddedPatternCount * 4 + patternOffset;
498 for (
int i = 0; i < 4; i++)
499 destP[offset++] *= oneOverMax;
503 REALTYPE logMax = log(max);
504 scaleFactors[k] = logMax;
505 if( cumulativeScaleFactors != NULL )
506 cumulativeScaleFactors[k] += logMax;
508 scaleFactors[k] = max;
509 if( cumulativeScaleFactors != NULL )
510 cumulativeScaleFactors[k] += log(max);
516 int BeagleCPU4StateImpl<BEAGLE_CPU_GENERIC>::calcEdgeLogLikelihoods(
const int parIndex,
517 const int childIndex,
519 const int categoryWeightsIndex,
520 const int stateFrequenciesIndex,
521 const int scalingFactorsIndex,
522 double* outSumLogLikelihood) {
525 assert(parIndex >= kTipCount);
527 const REALTYPE* partialsParent = gPartials[parIndex];
528 const REALTYPE* transMatrix = gTransitionMatrices[probIndex];
529 const REALTYPE* wt = gCategoryWeights[categoryWeightsIndex];
532 memset(integrationTmp, 0, (kPatternCount * kStateCount)*
sizeof(REALTYPE));
534 if (childIndex < kTipCount && gTipStates[childIndex]) {
536 const int* statesChild = gTipStates[childIndex];
539 for(
int l = 0; l < kCategoryCount; l++) {
541 const REALTYPE weight = wt[l];
542 for(
int k = 0; k < kPatternCount; k++) {
544 const int stateChild = statesChild[k];
546 integrationTmp[u ] += transMatrix[w + stateChild] * partialsParent[v ] * weight;
547 integrationTmp[u + 1] += transMatrix[w + OFFSET*1 + stateChild] * partialsParent[v + 1] * weight;
548 integrationTmp[u + 2] += transMatrix[w + OFFSET*2 + stateChild] * partialsParent[v + 2] * weight;
549 integrationTmp[u + 3] += transMatrix[w + OFFSET*3 + stateChild] * partialsParent[v + 3] * weight;
556 v += 4 * kExtraPatterns;
561 const REALTYPE* partialsChild = gPartials[childIndex];
566 for(
int l = 0; l < kCategoryCount; l++) {
569 int v = l*kPaddedPatternCount*4;
571 const REALTYPE weight = wt[l];
573 PREFETCH_MATRIX(1,transMatrix,w);
575 for(
int k = 0; k < kPatternCount; k++) {
577 const REALTYPE* partials1 = partialsChild;
579 PREFETCH_PARTIALS(1,partials1,v);
583 integrationTmp[u ] += sum10 * partialsParent[v ] * weight;
584 integrationTmp[u + 1] += sum11 * partialsParent[v + 1] * weight;
585 integrationTmp[u + 2] += sum12 * partialsParent[v + 2] * weight;
586 integrationTmp[u + 3] += sum13 * partialsParent[v + 3] * weight;
594 v += 4 * kExtraPatterns;
599 return integrateOutStatesAndScale(integrationTmp, stateFrequenciesIndex, scalingFactorsIndex, outSumLogLikelihood);
603 int BeagleCPU4StateImpl<BEAGLE_CPU_GENERIC>::calcRootLogLikelihoods(
const int bufferIndex,
604 const int categoryWeightsIndex,
605 const int stateFrequenciesIndex,
606 const int scalingFactorsIndex,
607 double* outSumLogLikelihood) {
609 const REALTYPE* rootPartials = gPartials[bufferIndex];
610 assert(rootPartials);
611 const REALTYPE* wt = gCategoryWeights[categoryWeightsIndex];
615 const REALTYPE wt0 = wt[0];
616 for (
int k = 0; k < kPatternCount; k++) {
617 integrationTmp[v ] = rootPartials[v ] * wt0;
618 integrationTmp[v + 1] = rootPartials[v + 1] * wt0;
619 integrationTmp[v + 2] = rootPartials[v + 2] * wt0;
620 integrationTmp[v + 3] = rootPartials[v + 3] * wt0;
623 for (
int l = 1; l < kCategoryCount; l++) {
625 const REALTYPE wtl = wt[l];
626 for (
int k = 0; k < kPatternCount; k++) {
627 integrationTmp[u ] += rootPartials[v ] * wtl;
628 integrationTmp[u + 1] += rootPartials[v + 1] * wtl;
629 integrationTmp[u + 2] += rootPartials[v + 2] * wtl;
630 integrationTmp[u + 3] += rootPartials[v + 3] * wtl;
635 v += 4 * kExtraPatterns;
638 return integrateOutStatesAndScale(integrationTmp, stateFrequenciesIndex, scalingFactorsIndex, outSumLogLikelihood);
642 int BeagleCPU4StateImpl<BEAGLE_CPU_GENERIC>::calcRootLogLikelihoodsMulti(
const int* bufferIndices,
643 const int* categoryWeightsIndices,
644 const int* stateFrequenciesIndices,
645 const int* scaleBufferIndices,
647 double* outSumLogLikelihood) {
649 int returnCode = BEAGLE_SUCCESS;
651 std::vector<int> indexMaxScale(kPatternCount);
652 std::vector<REALTYPE> maxScaleFactor(kPatternCount);
654 for (
int subsetIndex = 0 ; subsetIndex < count; ++subsetIndex ) {
655 const int rootPartialIndex = bufferIndices[subsetIndex];
656 const REALTYPE* rootPartials = gPartials[rootPartialIndex];
657 const REALTYPE* frequencies = gStateFrequencies[stateFrequenciesIndices[subsetIndex]];
658 const REALTYPE* wt = gCategoryWeights[categoryWeightsIndices[subsetIndex]];
662 const REALTYPE wt0 = wt[0];
663 for (
int k = 0; k < kPatternCount; k++) {
664 integrationTmp[v ] = rootPartials[v ] * wt0;
665 integrationTmp[v + 1] = rootPartials[v + 1] * wt0;
666 integrationTmp[v + 2] = rootPartials[v + 2] * wt0;
667 integrationTmp[v + 3] = rootPartials[v + 3] * wt0;
670 for (
int l = 1; l < kCategoryCount; l++) {
672 const REALTYPE wtl = wt[l];
673 for (
int k = 0; k < kPatternCount; k++) {
674 integrationTmp[u ] += rootPartials[v ] * wtl;
675 integrationTmp[u + 1] += rootPartials[v + 1] * wtl;
676 integrationTmp[u + 2] += rootPartials[v + 2] * wtl;
677 integrationTmp[u + 3] += rootPartials[v + 3] * wtl;
682 v += 4 * kExtraPatterns;
685 register REALTYPE freq0, freq1, freq2, freq3;
686 freq0 = frequencies[0];
687 freq1 = frequencies[1];
688 freq2 = frequencies[2];
689 freq3 = frequencies[3];
692 for (
int k = 0; k < kPatternCount; k++) {
694 freq0 * integrationTmp[u ] +
695 freq1 * integrationTmp[u + 1] +
696 freq2 * integrationTmp[u + 2] +
697 freq3 * integrationTmp[u + 3];
702 if (scaleBufferIndices[0] != BEAGLE_OP_NONE || (kFlags & BEAGLE_FLAG_SCALING_ALWAYS)) {
703 int cumulativeScalingFactorIndex;
704 if (kFlags & BEAGLE_FLAG_SCALING_ALWAYS)
705 cumulativeScalingFactorIndex = rootPartialIndex - kTipCount;
707 cumulativeScalingFactorIndex = scaleBufferIndices[subsetIndex];
709 const REALTYPE* cumulativeScaleFactors = gScaleBuffers[cumulativeScalingFactorIndex];
711 if (subsetIndex == 0) {
712 indexMaxScale[k] = 0;
713 maxScaleFactor[k] = cumulativeScaleFactors[k];
714 for (
int j = 1; j < count; j++) {
715 REALTYPE tmpScaleFactor;
716 if (kFlags & BEAGLE_FLAG_SCALING_ALWAYS)
717 tmpScaleFactor = gScaleBuffers[bufferIndices[j] - kTipCount][k];
719 tmpScaleFactor = gScaleBuffers[scaleBufferIndices[j]][k];
721 if (tmpScaleFactor > maxScaleFactor[k]) {
722 indexMaxScale[k] = j;
723 maxScaleFactor[k] = tmpScaleFactor;
728 if (subsetIndex != indexMaxScale[k])
729 sum *= exp((REALTYPE)(cumulativeScaleFactors[k] - maxScaleFactor[k]));
732 if (subsetIndex == 0) {
733 outLogLikelihoodsTmp[k] = sum;
734 }
else if (subsetIndex == count - 1) {
735 REALTYPE tmpSum = outLogLikelihoodsTmp[k] + sum;
737 outLogLikelihoodsTmp[k] = log(tmpSum);
739 outLogLikelihoodsTmp[k] += sum;
744 if (scaleBufferIndices[0] != BEAGLE_OP_NONE || (kFlags & BEAGLE_FLAG_SCALING_ALWAYS)) {
745 for(
int i=0; i<kPatternCount; i++)
746 outLogLikelihoodsTmp[i] += maxScaleFactor[i];
749 *outSumLogLikelihood = 0.0;
750 for (
int i = 0; i < kPatternCount; i++) {
751 *outSumLogLikelihood += outLogLikelihoodsTmp[i] * gPatternWeights[i];
754 if (!(*outSumLogLikelihood - *outSumLogLikelihood == 0.0))
755 returnCode = BEAGLE_ERROR_FLOATING_POINT;
763 const char* BeagleCPU4StateImpl<BEAGLE_CPU_GENERIC>::getName() {
764 return getBeagleCPU4StateName<BEAGLE_CPU_FACTORY_GENERIC>();
770 BEAGLE_CPU_FACTORY_TEMPLATE
771 BeagleImpl* BeagleCPU4StateImplFactory<BEAGLE_CPU_FACTORY_GENERIC>::createImpl(
int tipCount,
772 int partialsBufferCount,
773 int compactBufferCount,
776 int eigenBufferCount,
777 int matrixBufferCount,
779 int scaleBufferCount,
781 long preferenceFlags,
782 long requirementFlags,
785 if (stateCount != 4) {
789 BeagleImpl* impl =
new BeagleCPU4StateImpl<REALTYPE, T_PAD_DEFAULT, P_PAD_DEFAULT>();
792 if (impl->createInstance(tipCount, partialsBufferCount, compactBufferCount, stateCount,
793 patternCount, eigenBufferCount, matrixBufferCount,
794 categoryCount,scaleBufferCount, resourceNumber,
795 preferenceFlags, requirementFlags) == 0)
799 if (DEBUGGING_OUTPUT)
800 std::cerr <<
"exception in initialize\n";
810 BEAGLE_CPU_FACTORY_TEMPLATE
811 const char* BeagleCPU4StateImplFactory<BEAGLE_CPU_FACTORY_GENERIC>::getName() {
812 return getBeagleCPU4StateName<BEAGLE_CPU_FACTORY_GENERIC>();
815 BEAGLE_CPU_FACTORY_TEMPLATE
816 const long BeagleCPU4StateImplFactory<BEAGLE_CPU_FACTORY_GENERIC>::getFlags() {
817 long flags = BEAGLE_FLAG_COMPUTATION_SYNCH |
818 BEAGLE_FLAG_SCALING_MANUAL | BEAGLE_FLAG_SCALING_ALWAYS | BEAGLE_FLAG_SCALING_AUTO |
819 BEAGLE_FLAG_THREADING_NONE |
820 BEAGLE_FLAG_PROCESSOR_CPU |
821 BEAGLE_FLAG_VECTOR_NONE |
822 BEAGLE_FLAG_SCALERS_LOG | BEAGLE_FLAG_SCALERS_RAW |
823 BEAGLE_FLAG_EIGEN_COMPLEX | BEAGLE_FLAG_EIGEN_REAL |
824 BEAGLE_FLAG_INVEVEC_STANDARD | BEAGLE_FLAG_INVEVEC_TRANSPOSED;
826 if (DOUBLE_PRECISION)
827 flags |= BEAGLE_FLAG_PRECISION_DOUBLE;
829 flags |= BEAGLE_FLAG_PRECISION_SINGLE;
836 #endif // BEAGLE_CPU_4STATE_IMPL_HPP