25 #include "cheetah/fldo/cuda/detail/FldoUtils.h" 55 void build_scrunched_profiles(
size_t ncandidates,
size_t max_phases,
56 size_t nsubbands,
size_t nsubints,
float mean,
float *d_folded,
float *d_weight,
57 float *d_outfprof,
float* d_outprof, std::vector<util::GpuStream>&exec_stream)
62 int shared_memsize = max_phases *
sizeof(float);
63 PANDA_LOG_DEBUG <<
"Calling kernel to build profiles: threads= (" 72 PANDA_LOG_DEBUG <<
"raw time series mean: " << mean;
74 for (
size_t ncand = 0; ncand < ncandidates; ++ncand) {
76 int nstream = ncand % exec_stream.size();
78 threadsPerBlock.x = max_phases;
79 threadsPerBlock.y = 1;
80 blocksPerGrid.x = nsubints;
82 normalize_kernel<<<blocksPerGrid, threadsPerBlock, 0, exec_stream[nstream].stream() >>>
89 CUDA_ERROR_CHECK(cudaGetLastError());
91 threadsPerBlock.x = max_phases;
92 threadsPerBlock.y = 1;
93 blocksPerGrid.x = nsubints;
95 profile_kernel<<< blocksPerGrid, threadsPerBlock, shared_memsize, exec_stream[nstream].stream() >>>
101 CUDA_ERROR_CHECK(cudaGetLastError());
103 threadsPerBlock.x = max_phases;
104 threadsPerBlock.y = 1;
107 profile_kernel<<< blocksPerGrid, threadsPerBlock, shared_memsize, exec_stream[nstream].stream() >>>
113 CUDA_ERROR_CHECK(cudaGetLastError());
Some limits and constants for FLDO.