1 #include "cheetah/tdas/cuda/Tdas.cuh" 2 #include "cheetah/cuda_utils/nvtx.h" 12 : utils::AlgorithmBase<
Config, tdas::
Config>(config, algo_config)
25 PUSH_NVTX_RANGE(
"tdas_cuda_Tdas_process",0);
26 PANDA_LOG <<
"cuda::Tdas::process() invoked (on device "<< device.device_id() <<
")";
35 PUSH_NVTX_RANGE(
"tdas_cuda_Tdas_submodule_creation",0);
42 fft::Fft fft_c2r(_impl_config.fft_config());
43 fft::Fft fft_r2c(_impl_config.fft_config());
46 auto candidate_list_ptr = std::make_shared<data::Ccl>();
49 TimeSeriesType tdrt_input;
50 TimeSeriesType tdrt_output;
51 FourierSeriesType r2cfft_output;
52 FourierSeriesType dred_output;
53 PowerSeriesType pwft_output;
54 std::vector<PowerSeriesType> hrms_output(nharmonics);
57 PUSH_NVTX_RANGE(
"tdas_cuda_Tdas_process_dm_trial",3);
58 for(
auto dm_trial_iterator = data.cbegin();
59 dm_trial_iterator!=data.cend();
62 auto const& dm_trial = *dm_trial_iterator;
64 if (dm_trial.number_of_samples() < _algo_config.
minimum_size())
66 PANDA_LOG_WARN <<
"Number of samples in DM trial is lower than configured minimum size" 67 <<
"("<<dm_trial.number_of_samples()<<
" < "<<_algo_config.
minimum_size()<<
")";
69 return candidate_list_ptr;
73 data::DedispersionMeasureType<float>
const dm = dm_trial.dm();
74 PANDA_LOG_DEBUG <<
"Processing dispersion measure: " << dm;
80 PUSH_NVTX_RANGE(
"tdas_cuda_Tdas_copy_trial_to_device",1);
81 auto it = dm_trial.copy_to(copy_buffer);
85 std::size_t ncopied = std::distance(copy_buffer.
begin(),it);
88 PUSH_NVTX_RANGE(
"tdas_cuda_Tdas_prepare_and_deredden",2);
90 tdrt_input.resize(ncopied);
93 tdrt_input.sampling_interval(dm_trial.sampling_interval());
96 thrust::copy(copy_buffer.
begin(),copy_buffer.
begin()+ncopied,tdrt_input.begin());
100 if (ncopied!=_algo_config.
size())
103 T padding_value = thrust::reduce(thrust::cuda::par,tdrt_input.begin(),tdrt_input.end())/ncopied;
105 tdrt_input.resize(_algo_config.
size(),padding_value);
109 fft_r2c.process(device,tdrt_input,r2cfft_output);
115 brdz.process<cheetah::Cuda, T,
typename FourierSeriesType::AllocatorType>(device,dred_output);
118 fft_c2r.process(device, dred_output, tdrt_input);
124 tdrt_input.size(),tdrt_input.sampling_interval().value());
125 for (
auto accel: acc_list)
127 PANDA_LOG_DEBUG <<
"Processing acceleration: " << accel;
128 PUSH_NVTX_RANGE(
"tdas_cuda_Tdas_process_acceleration_trial",4);
130 tdrt.process(device,tdrt_input,tdrt_output,accel);
133 fft_r2c.process(device,tdrt_output,r2cfft_output);
136 pwft.process_nn(device,r2cfft_output,pwft_output);
139 hrms.process(device,pwft_output,hrms_output);
142 std::size_t harmonic = 1;
144 (device,pwft_output,*candidate_list_ptr,dm,accel,harmonic);
148 for (
auto const& series: hrms_output)
151 (device,series,*candidate_list_ptr,dm,accel,harmonic);
158 PANDA_LOG <<
"cuda::Tdas::process() complete (on device "<< device.device_id() <<
")";
160 return candidate_list_ptr;
163 template <
typename T>
165 std::shared_ptr<DmTimeSliceType>
const& data)
std::shared_ptr< data::Ccl > operator()(panda::PoolResource< cheetah::Cuda > &gpu, std::shared_ptr< DmTimeSliceType > const &data)
Version of process method for async calls.
Time Domain Resampler CUDA version / Transform / Module.
AccListGenConfig const & acceleration_list_generator() const
Return acceleration list generator configuration.
A class for performing FFTs.
Class that wraps a subset of DMs from a DmTime object.
Class for implementing spectral dereddening.
Tdas(Config const &config, tdas::Config const &algo_config)
Construct a new Tdas instance.
Power Spectrum Fourier Transform version / Transform / Module.
std::size_t size() const
The size for transform to use for the search.
A container of Fourier series data.
Some limits and constants for FLDO.
ConstIterator begin() const
Iterators to device memory.
std::vector< data::AccelerationType > acceleration_list(data::DedispersionMeasureType< float > dm, std::size_t number_of_samples, double sampling_interval) const
Generate a list of accelerations.
A class for performing harmonic summing.
Class for power series (detected FrequencySeries).
std::size_t minimum_size() const
The minimum timeseries length that will be searched.
Class for time series data.
void number_of_harmonic_sums(std::size_t nharmonics)
Number of harmonic sums to perform in the search.
CUDA/Thrust implementation of the Tdas module.
Time Domain Spectral Peak Detection and Candidate List Output.
void resize(std::size_t size)
resize the data
Class for performing birdie zapping.
data::AccelerationType magnitude() const
return the maximum absolute acceleration magnitude
std::shared_ptr< data::Ccl > process(panda::PoolResource< cheetah::Cuda > &gpu, DmTimeSliceType const &data)
Search a DmTimeSlice for significant periodic signals at a range of accelerations.