I am trying to implement a function which calculates the weightings and abscissae for the Gauss-Laguerre numerical integration method using C++AMP to parallelize the process and when running it I am getting a DXGI_ERROR_DEVICE_HUNG error.
This is my helper method for computing the logarithm of the gamma function on the GPU:
template <typename T>
T gammaln_fast( T tArg ) restrict( amp )
{
const T tCoefficients[] = { T( 57.1562356658629235f ), T( -59.5979603554754912f ),
T( 14.1360979747417471f ), T( -0.491913816097620199f ), T( 0.339946499848118887E-4f ),
T( 0.465236289270485756E-4f ), T( -0.983744753048795646E-4f ), T( 0.158088703224912494E-3f ),
T( -0.210264441724104883E-3f ), T( 0.217439618115212643E-3f ), T( -0.164318106536763890E-3f ),
T( 0.844182239838527433E-4f ), T( -0.261908384015814087E-4f ), T( 0.386991826595316234E-5f ) };
T y = tArg, tTemp = tArg + T( 5.2421875f );
tTemp = (tArg + T( 0.5f )) * concurrency::fast_math::log( tTemp ) - tTemp;
T tSer = T( 0.999999999999997092f );
for( std::size_t s = 0; s < (sizeof( tCoefficients ) / sizeof( T )); ++s )
{
tSer += tCoefficients[s] / ++y;
}
return tTemp + concurrency::fast_math::log( T( 2.5066282746310005f ) * tSer / tArg );
}
And here is my function which computes the weights and abscissae:
template <typename T>
ArrayPair<T> CalculateGaussLaguerreWeights_fast( const T tExponent, const std::size_t sNumPoints, T tEps = std::numeric_limits<T>::epsilon() )
{
static_assert(std::is_floating_point<T>::value, "You can only instantiate this function with a floating point data type");
static_assert(!std::is_same<T, long double>::value, "You can not instantiate this function with long double type"); // The long double type is not currently supported by C++AMP
T tCurrentGuess, tFatherGuess, tGrandFatherGuess;
std::vector<T> vecInitialGuesses( sNumPoints );
for( std::size_t s = 0; s < sNumPoints; ++s )
{
if( s == 0 )
{
tCurrentGuess = (T( 1.0f ) + tExponent) * (T( 3.0f ) + T( 0.92f ) * tExponent) / (T( 1.0f ) + T( 2.4f ) * sNumPoints + T( 1.8f ) * tExponent);
}
else if( s == 1 )
{
tFatherGuess = tCurrentGuess;
tCurrentGuess += (T( 15.0f ) + T( 6.25f ) * tExponent) / (T( 1.0f ) + T( 0.9f ) * tExponent + T( 2.5f ) * sNumPoints);
}
else
{
tGrandFatherGuess = tFatherGuess;
tFatherGuess = tCurrentGuess;
std::size_t sDec = s - 1U;
tCurrentGuess += ((T( 1.0f ) + T( 2.55f ) * sDec) / (T( 1.9f ) * sDec) + T( 1.26f ) * sDec * tExponent
/ (T( 1.0f ) + T( 3.5f ) * sDec)) * (tCurrentGuess - tGrandFatherGuess) / (T( 1.0f ) + T( 0.3f ) * tExponent);
}
vecInitialGuesses[s] = tCurrentGuess;
}
concurrency::array<T> arrWeights( sNumPoints ), arrAbsciasses( sNumPoints, std::begin(vecInitialGuesses) );
try {
concurrency::parallel_for_each( arrAbsciasses.extent, [=, &arrAbsciasses, &arrWeights]( concurrency::index<1> index ) restrict( amp ) {
T tVal = arrAbsciasses[index], tIntermediate;
T tPolynomial1 = T( 1.0f ), tPolynomial2 = T( 0.0f ), tPolynomial3, tDerivative;
std::size_t sIterationNum = 0;
do {
tPolynomial1 = T( 1.0f ), tPolynomial2 = T( 0.0f );
for( std::size_t s = 0; s < sNumPoints; ++s )
{
tPolynomial3 = tPolynomial2;
tPolynomial2 = tPolynomial1;
tPolynomial1 = ((2 * s + 1 + tExponent - tVal) * tPolynomial2 - (s + tExponent) * tPolynomial3) / (s + 1);
}
tDerivative = (sNumPoints * tPolynomial1 - (sNumPoints + tExponent) * tPolynomial2) / tVal;
tIntermediate = tVal;
tVal = tIntermediate - tPolynomial1 / tDerivative;
++sIterationNum;
} while( concurrency::fast_math::fabs( tVal - tIntermediate ) > tEps || sIterationNum < 10 );
arrAbsciasses[index] = tVal;
arrWeights[index] = -concurrency::fast_math::exp( gammaln_fast( tExponent + sNumPoints ) - gammaln_fast( T( sNumPoints ) ) ) / (tDerivative * sNumPoints * tPolynomial2);
} );
}
catch( concurrency::runtime_exception& e )
{
std::cerr << "Runtime error, code: " << e.get_error_code() << "; message: " << e.what() << std::endl;
}
return std::make_pair( std::move( arrAbsciasses ), std::move( arrWeights ) );
}
And here is the full trace from the debug console:
D3D11: Removing Device. D3D11 ERROR: ID3D11Device::RemoveDevice: Device removal has been triggered for the following reason (DXGI_ERROR_DEVICE_HUNG: The Device took an unreasonable amount of time to execute its commands, or the hardware crashed/hung. As a result, the TDR (Timeout Detection and Recovery) mechanism has been triggered. The current Device Context was executing commands when the hang occurred. The application may want to respawn and fallback to less aggressive use of the display hardware). [ EXECUTION ERROR #378: DEVICE_REMOVAL_PROCESS_AT_FAULT] D3D11 ERROR: ID3D11DeviceContext::Map: Returning DXGI_ERROR_DEVICE_REMOVED, when a Resource was trying to be mapped with READ or READWRITE. [ RESOURCE_MANIPULATION ERROR #2097214: RESOURCE_MAP_DEVICEREMOVED_RETURN]
My apologies for not being able to produce a small reproducible example; I hope that this is still an acceptable question, as I am unable to solve this by myself.