Cleanup the cuda executor code.
This commit is contained in:
parent
2f6d1607c8
commit
6af5ac7e27
@ -234,16 +234,11 @@ struct EigenMetaKernelEval<Evaluator, Index, true> {
|
||||
template <typename Evaluator, typename Index>
|
||||
__global__ void
|
||||
__launch_bounds__(1024)
|
||||
EigenMetaKernel(Evaluator memcopied_eval, Index size) {
|
||||
EigenMetaKernel(Evaluator eval, Index size) {
|
||||
|
||||
const Index first_index = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const Index step_size = blockDim.x * gridDim.x;
|
||||
|
||||
// Cuda memcopies the kernel arguments. That's fine for POD, but for more
|
||||
// complex types such as evaluators we should really conform to the C++
|
||||
// standard and call a proper copy constructor.
|
||||
Evaluator eval(memcopied_eval);
|
||||
|
||||
const bool vectorizable = Evaluator::PacketAccess & Evaluator::IsAligned;
|
||||
EigenMetaKernelEval<Evaluator, Index, vectorizable>::run(eval, first_index, size, step_size);
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user