Cheetah - SKA - PSS - Prototype Time Domain Search Pipeline
Tdas.cu
1 #include "cheetah/tdas/cuda/Tdas.cuh"
2 #include "cheetah/cuda_utils/nvtx.h"
3 #include "panda/Log.h"
4 
5 namespace ska {
6 namespace cheetah {
7 namespace tdas {
8 namespace cuda {
9 
10 template <typename T>
11 Tdas<T>::Tdas(Config const& config, tdas::Config const& algo_config)
12  : utils::AlgorithmBase<Config, tdas::Config>(config, algo_config)
13 {
14 }
15 
16 template <typename T>
18 {
19 }
20 
21 template <typename T>
22 std::shared_ptr<data::Ccl> Tdas<T>::process(panda::PoolResource<Cuda>& device,
23  DmTimeSliceType const& data)
24 {
25  PUSH_NVTX_RANGE("tdas_cuda_Tdas_process",0);
26  PANDA_LOG << "cuda::Tdas::process() invoked (on device "<< device.device_id() << ")";
28  typedef data::TimeSeries<cheetah::Cuda,T> TimeSeriesType;
29  typedef data::PowerSeries<cheetah::Cuda,T> PowerSeriesType;
30 
31  //Thread safe processing object creation
32  //Note that this is memory inefficient but gets us around multi
33  //GPU and multi thread issues
34 
35  PUSH_NVTX_RANGE("tdas_cuda_Tdas_submodule_creation",0);
36  dred::Dred<T> dred(_impl_config.dred_config());
37  brdz::Brdz brdz(_impl_config.brdz_config());
38  tdrt::Tdrt tdrt(_impl_config.tdrt_config());
39  tdao::Tdao tdao(_impl_config.tdao_config());
40  pwft::Pwft pwft(_impl_config.pwft_config());
41  hrms::Hrms hrms(_impl_config.hrms_config());
42  fft::Fft fft_c2r(_impl_config.fft_config());
43  fft::Fft fft_r2c(_impl_config.fft_config());
44  POP_NVTX_RANGE; // tdas_cuda_Tdas_submodule_creation
45 
46  auto candidate_list_ptr = std::make_shared<data::Ccl>();
47  std::size_t nharmonics = _algo_config.number_of_harmonic_sums();
49  TimeSeriesType tdrt_input;
50  TimeSeriesType tdrt_output;
51  FourierSeriesType r2cfft_output;
52  FourierSeriesType dred_output;
53  PowerSeriesType pwft_output;
54  std::vector<PowerSeriesType> hrms_output(nharmonics);
55 
56 
57  PUSH_NVTX_RANGE("tdas_cuda_Tdas_process_dm_trial",3);
58  for(auto dm_trial_iterator = data.cbegin();
59  dm_trial_iterator!=data.cend();
60  ++dm_trial_iterator)
61  {
62  auto const& dm_trial = *dm_trial_iterator;
63  //Test if the dm trial meets the minimum criteria for Tdas execution
64  if (dm_trial.number_of_samples() < _algo_config.minimum_size())
65  {
66  PANDA_LOG_WARN << "Number of samples in DM trial is lower than configured minimum size"
67  << "("<<dm_trial.number_of_samples()<<" < "<<_algo_config.minimum_size()<<")";
68  //Abort Tdas and return empty candidate list
69  return candidate_list_ptr;
70  }
71 
72  //Try to get the DM value from the trial
73  data::DedispersionMeasureType<float> const dm = dm_trial.dm();
74  PANDA_LOG_DEBUG << "Processing dispersion measure: " << dm;
75 
76  //Resize the copy_buffer to the configured Tdas transform size in the config
77  copy_buffer.resize(_algo_config.size());
78 
79  //Copy single trial from DmTimeSlice to device
80  PUSH_NVTX_RANGE("tdas_cuda_Tdas_copy_trial_to_device",1);
81  auto it = dm_trial.copy_to(copy_buffer);
82  POP_NVTX_RANGE; // tdas_cuda_Tdas_copy_trial_to_device
83 
84  //Find how many samples were copied
85  std::size_t ncopied = std::distance(copy_buffer.begin(),it);
86 
87 
88  PUSH_NVTX_RANGE("tdas_cuda_Tdas_prepare_and_deredden",2);
89  //Resize the Tdrt input the size of the valid copied data
90  tdrt_input.resize(ncopied);
91 
92  //Set the sampling rate on the input
93  tdrt_input.sampling_interval(dm_trial.sampling_interval());
94 
95  //Copy from the copy buffer to the Tdrt input, implicit uint8_t to T (i.e. float/double) conversion.
96  thrust::copy(copy_buffer.begin(),copy_buffer.begin()+ncopied,tdrt_input.begin());
97 
98  //If the copied size is not the same as the desired transform size it is necessary
99  //to pad the data with the mean of the input.
100  if (ncopied!=_algo_config.size())
101  {
102  //Calcute the mean of the valid data
103  T padding_value = thrust::reduce(thrust::cuda::par,tdrt_input.begin(),tdrt_input.end())/ncopied;
104  //Extent the Tdrt input filling the new memory with the padding value
105  tdrt_input.resize(_algo_config.size(),padding_value);
106  }
107 
108  //Perform forward FFT to prepare data for dereddening
109  fft_r2c.process(device,tdrt_input,r2cfft_output);
110 
111  //Perform dereddening
112  dred.process(device, r2cfft_output, dred_output, _algo_config.acceleration_list_generator().magnitude());
113 
114  //Zap birdies
115  brdz.process<cheetah::Cuda, T, typename FourierSeriesType::AllocatorType>(device,dred_output);
116 
117  //Transform data back to time domain
118  fft_c2r.process(device, dred_output, tdrt_input);
119 
120  POP_NVTX_RANGE; // tdas_cuda_Tdas_prepare_and_deredden
121  //for each dm retreive the corresponding acceleration list and iterate
122  //over each acceleration
123  auto acc_list = _algo_config.acceleration_list_generator().acceleration_list(dm,
124  tdrt_input.size(),tdrt_input.sampling_interval().value());
125  for (auto accel: acc_list)
126  {
127  PANDA_LOG_DEBUG << "Processing acceleration: " << accel;
128  PUSH_NVTX_RANGE("tdas_cuda_Tdas_process_acceleration_trial",4);
129  //Resample to the acceleration
130  tdrt.process(device,tdrt_input,tdrt_output,accel);
131 
132  //Perform forward FFT
133  fft_r2c.process(device,tdrt_output,r2cfft_output);
134 
135  //Form Spectrum
136  pwft.process_nn(device,r2cfft_output,pwft_output);
137 
138  //Calculate harmonic sums
139  hrms.process(device,pwft_output,hrms_output);
140 
141  //Search for signals in the non-summed spectrum
142  std::size_t harmonic = 1;
143  tdao.process<cheetah::Cuda,T,typename data::PowerSeries<cheetah::Cuda,T>::AllocatorType>
144  (device,pwft_output,*candidate_list_ptr,dm,accel,harmonic);
145 
146  //For each harmonically summed spectrum search for significant signals
147  ++harmonic;
148  for (auto const& series: hrms_output)
149  {
150  tdao.process<cheetah::Cuda,T,typename data::PowerSeries<cheetah::Cuda,T>::AllocatorType>
151  (device,series,*candidate_list_ptr,dm,accel,harmonic);
152  ++harmonic;
153  }
154  POP_NVTX_RANGE; // tdas_cuda_Tdas_prepare_and_deredden
155  }
156  }
157  POP_NVTX_RANGE; // tdas_cuda_Tdas_process_dm_trial
158  PANDA_LOG << "cuda::Tdas::process() complete (on device "<< device.device_id() << ")";
159  POP_NVTX_RANGE; //tdas_cuda_Tdas_process
160  return candidate_list_ptr;
161 }
162 
163 template <typename T>
164 std::shared_ptr<data::Ccl> Tdas<T>::operator()(panda::PoolResource<Cuda>& device,
165  std::shared_ptr<DmTimeSliceType> const& data)
166 {
167  return process(device,*data);
168 }
169 
170 template class Tdas<float>;
171 template class Tdas<double>;
172 
173 } // namespace cuda
174 } // namespace tdas
175 } // namespace cheetah
176 } // namespace ska
std::shared_ptr< data::Ccl > operator()(panda::PoolResource< cheetah::Cuda > &gpu, std::shared_ptr< DmTimeSliceType > const &data)
Version of process method for async calls.
Time Domain Resampler CUDA version / Transform / Module.
Definition: Tdrt.h:44
AccListGenConfig const & acceleration_list_generator() const
Return acceleration list generator configuration.
Definition: Config.cpp:82
A class for performing FFTs.
Definition: Fft.h:50
Class that wraps a subset of DMs from a DmTime object.
Definition: DmTimeSlice.h:54
Class for implementing spectral dereddening.
Definition: Dred.h:46
Tdas(Config const &config, tdas::Config const &algo_config)
Construct a new Tdas instance.
Definition: Tdas.cu:11
Power Spectrum Fourier Transform version / Transform / Module.
Definition: Pwft.h:48
std::size_t size() const
The size for transform to use for the search.
Definition: Config.cpp:112
A container of Fourier series data.
Some limits and constants for FLDO.
Definition: Brdz.h:35
ConstIterator begin() const
Iterators to device memory.
Definition: Series.cpp:67
std::vector< data::AccelerationType > acceleration_list(data::DedispersionMeasureType< float > dm, std::size_t number_of_samples, double sampling_interval) const
Generate a list of accelerations.
A class for performing harmonic summing.
Definition: Hrms.h:45
Class for power series (detected FrequencySeries).
Definition: PowerSeries.h:58
std::size_t minimum_size() const
The minimum timeseries length that will be searched.
Definition: Config.cpp:102
Class for time series data.
Definition: TimeSeries.h:47
void number_of_harmonic_sums(std::size_t nharmonics)
Number of harmonic sums to perform in the search.
Definition: Config.cpp:122
CUDA/Thrust implementation of the Tdas module.
Definition: Tdas.cuh:34
Time Domain Spectral Peak Detection and Candidate List Output.
Definition: Tdao.h:53
void resize(std::size_t size)
resize the data
Definition: Series.cpp:115
Class for performing birdie zapping.
Definition: Brdz.h:47
data::AccelerationType magnitude() const
return the maximum absolute acceleration magnitude
std::shared_ptr< data::Ccl > process(panda::PoolResource< cheetah::Cuda > &gpu, DmTimeSliceType const &data)
Search a DmTimeSlice for significant periodic signals at a range of accelerations.
Definition: Tdas.cu:22