Cheetah - SKA - PSS - Prototype Time Domain Search Pipeline
Public Types | Public Member Functions | List of all members
ska::cheetah::tdas::cuda::Tdas< T > Class Template Reference

CUDA/Thrust implementation of the Tdas module. More...

#include <cheetah/tdas/cuda/Tdas.h>

Inheritance diagram for ska::cheetah::tdas::cuda::Tdas< T >:
Inheritance graph
Collaboration diagram for ska::cheetah::tdas::cuda::Tdas< T >:
Collaboration graph

Public Types

typedef cheetah::Cuda Architecture
 
typedef panda::nvidia::DeviceCapability< 2, 0, panda::nvidia::giga/2 > ArchitectureCapability
 
- Public Types inherited from ska::cheetah::utils::Mock< cheetah::Cuda, Config const &, tdas::Config const &>
typedef cheetah::Cuda Architecture
 

Public Member Functions

 Tdas (Config const &config, tdas::Config const &algo_config)
 Construct a new Tdas instance. More...
 
 Tdas (Tdas const &)=delete
 
 Tdas (Tdas &&)=default
 
std::shared_ptr< data::Cclprocess (panda::PoolResource< cheetah::Cuda > &gpu, DmTimeSliceType const &data)
 Search a DmTimeSlice for significant periodic signals at a range of accelerations. More...
 
std::shared_ptr< data::Ccloperator() (panda::PoolResource< cheetah::Cuda > &gpu, std::shared_ptr< DmTimeSliceType > const &data)
 Version of process method for async calls.
 
- Public Member Functions inherited from ska::cheetah::utils::Mock< cheetah::Cuda, Config const &, tdas::Config const &>
 Mock (Args &&...)
 
void operator() (OperatorArgs &&...)
 
- Public Member Functions inherited from ska::cheetah::utils::AlgorithmBase< Config, tdas::Config >
 AlgorithmBase (Config const &impl_config, tdas::Config const &algo_config)
 
 AlgorithmBase (AlgorithmBase const &)=delete
 
 AlgorithmBase (AlgorithmBase &&)=default
 

Additional Inherited Members

- Protected Attributes inherited from ska::cheetah::utils::AlgorithmBase< Config, tdas::Config >
Config const & _impl_config
 
tdas::Config const & _algo_config
 

Detailed Description

template<typename T>
class ska::cheetah::tdas::cuda::Tdas< T >

CUDA/Thrust implementation of the Tdas module.

Mock interface to Tdas to support non-CUDA builds.

The Tdas module supports time domain acceleration search.

Template Parameters
TThe internal value type to use for processing (float or double)
TThe internal value type

Definition at line 34 of file Tdas.cuh.

Constructor & Destructor Documentation

◆ Tdas()

template<typename T >
ska::cheetah::tdas::cuda::Tdas< T >::Tdas ( Config const &  config,
tdas::Config const &  algo_config 
)

Construct a new Tdas instance.

Parameters
configThe implementation configuration
algo_configThe algorithm configuration

Definition at line 11 of file Tdas.cu.

12  : utils::AlgorithmBase<Config, tdas::Config>(config, algo_config)
13 {
14 }

Member Function Documentation

◆ process()

template<typename T >
std::shared_ptr< data::Ccl > ska::cheetah::tdas::cuda::Tdas< T >::process ( panda::PoolResource< cheetah::Cuda > &  gpu,
DmTimeSliceType const &  data 
)

Search a DmTimeSlice for significant periodic signals at a range of accelerations.

Parameters
gpuThe gpu to process on
dataThe DmTimeSlice to search
Returns
A shared pointer to a list of candidate signals

Definition at line 22 of file Tdas.cu.

24 {
25  PUSH_NVTX_RANGE("tdas_cuda_Tdas_process",0);
26  PANDA_LOG << "cuda::Tdas::process() invoked (on device "<< device.device_id() << ")";
27  typedef data::FrequencySeries<cheetah::Cuda,thrust::complex<T>> FourierSeriesType;
28  typedef data::TimeSeries<cheetah::Cuda,T> TimeSeriesType;
29  typedef data::PowerSeries<cheetah::Cuda,T> PowerSeriesType;
30 
31  //Thread safe processing object creation
32  //Note that this is memory inefficient but gets us around multi
33  //GPU and multi thread issues
34 
35  PUSH_NVTX_RANGE("tdas_cuda_Tdas_submodule_creation",0);
36  dred::Dred<T> dred(_impl_config.dred_config());
37  brdz::Brdz brdz(_impl_config.brdz_config());
38  tdrt::Tdrt tdrt(_impl_config.tdrt_config());
39  tdao::Tdao tdao(_impl_config.tdao_config());
40  pwft::Pwft pwft(_impl_config.pwft_config());
41  hrms::Hrms hrms(_impl_config.hrms_config());
42  fft::Fft fft_c2r(_impl_config.fft_config());
43  fft::Fft fft_r2c(_impl_config.fft_config());
44  POP_NVTX_RANGE; // tdas_cuda_Tdas_submodule_creation
45 
46  auto candidate_list_ptr = std::make_shared<data::Ccl>();
47  std::size_t nharmonics = _algo_config.number_of_harmonic_sums();
48  data::TimeSeries<cheetah::Cuda,float> copy_buffer;
49  TimeSeriesType tdrt_input;
50  TimeSeriesType tdrt_output;
51  FourierSeriesType r2cfft_output;
52  FourierSeriesType dred_output;
53  PowerSeriesType pwft_output;
54  std::vector<PowerSeriesType> hrms_output(nharmonics);
55 
56 
57  PUSH_NVTX_RANGE("tdas_cuda_Tdas_process_dm_trial",3);
58  for(auto dm_trial_iterator = data.cbegin();
59  dm_trial_iterator!=data.cend();
60  ++dm_trial_iterator)
61  {
62  auto const& dm_trial = *dm_trial_iterator;
63  //Test if the dm trial meets the minimum criteria for Tdas execution
64  if (dm_trial.number_of_samples() < _algo_config.minimum_size())
65  {
66  PANDA_LOG_WARN << "Number of samples in DM trial is lower than configured minimum size"
67  << "("<<dm_trial.number_of_samples()<<" < "<<_algo_config.minimum_size()<<")";
68  //Abort Tdas and return empty candidate list
69  return candidate_list_ptr;
70  }
71 
72  //Try to get the DM value from the trial
73  data::DedispersionMeasureType<float> const dm = dm_trial.dm();
74  PANDA_LOG_DEBUG << "Processing dispersion measure: " << dm;
75 
76  //Resize the copy_buffer to the configured Tdas transform size in the config
77  copy_buffer.resize(_algo_config.size());
78 
79  //Copy single trial from DmTimeSlice to device
80  PUSH_NVTX_RANGE("tdas_cuda_Tdas_copy_trial_to_device",1);
81  auto it = dm_trial.copy_to(copy_buffer);
82  POP_NVTX_RANGE; // tdas_cuda_Tdas_copy_trial_to_device
83 
84  //Find how many samples were copied
85  std::size_t ncopied = std::distance(copy_buffer.begin(),it);
86 
87 
88  PUSH_NVTX_RANGE("tdas_cuda_Tdas_prepare_and_deredden",2);
89  //Resize the Tdrt input the size of the valid copied data
90  tdrt_input.resize(ncopied);
91 
92  //Set the sampling rate on the input
93  tdrt_input.sampling_interval(dm_trial.sampling_interval());
94 
95  //Copy from the copy buffer to the Tdrt input, implicit uint8_t to T (i.e. float/double) conversion.
96  thrust::copy(copy_buffer.begin(),copy_buffer.begin()+ncopied,tdrt_input.begin());
97 
98  //If the copied size is not the same as the desired transform size it is necessary
99  //to pad the data with the mean of the input.
100  if (ncopied!=_algo_config.size())
101  {
102  //Calcute the mean of the valid data
103  T padding_value = thrust::reduce(thrust::cuda::par,tdrt_input.begin(),tdrt_input.end())/ncopied;
104  //Extent the Tdrt input filling the new memory with the padding value
105  tdrt_input.resize(_algo_config.size(),padding_value);
106  }
107 
108  //Perform forward FFT to prepare data for dereddening
109  fft_r2c.process(device,tdrt_input,r2cfft_output);
110 
111  //Perform dereddening
112  dred.process(device, r2cfft_output, dred_output, _algo_config.acceleration_list_generator().magnitude());
113 
114  //Zap birdies
115  brdz.process<cheetah::Cuda, T, typename FourierSeriesType::AllocatorType>(device,dred_output);
116 
117  //Transform data back to time domain
118  fft_c2r.process(device, dred_output, tdrt_input);
119 
120  POP_NVTX_RANGE; // tdas_cuda_Tdas_prepare_and_deredden
121  //for each dm retreive the corresponding acceleration list and iterate
122  //over each acceleration
123  auto acc_list = _algo_config.acceleration_list_generator().acceleration_list(dm,
124  tdrt_input.size(),tdrt_input.sampling_interval().value());
125  for (auto accel: acc_list)
126  {
127  PANDA_LOG_DEBUG << "Processing acceleration: " << accel;
128  PUSH_NVTX_RANGE("tdas_cuda_Tdas_process_acceleration_trial",4);
129  //Resample to the acceleration
130  tdrt.process(device,tdrt_input,tdrt_output,accel);
131 
132  //Perform forward FFT
133  fft_r2c.process(device,tdrt_output,r2cfft_output);
134 
135  //Form Spectrum
136  pwft.process_nn(device,r2cfft_output,pwft_output);
137 
138  //Calculate harmonic sums
139  hrms.process(device,pwft_output,hrms_output);
140 
141  //Search for signals in the non-summed spectrum
142  std::size_t harmonic = 1;
143  tdao.process<cheetah::Cuda,T,typename data::PowerSeries<cheetah::Cuda,T>::AllocatorType>
144  (device,pwft_output,*candidate_list_ptr,dm,accel,harmonic);
145 
146  //For each harmonically summed spectrum search for significant signals
147  ++harmonic;
148  for (auto const& series: hrms_output)
149  {
150  tdao.process<cheetah::Cuda,T,typename data::PowerSeries<cheetah::Cuda,T>::AllocatorType>
151  (device,series,*candidate_list_ptr,dm,accel,harmonic);
152  ++harmonic;
153  }
154  POP_NVTX_RANGE; // tdas_cuda_Tdas_prepare_and_deredden
155  }
156  }
157  POP_NVTX_RANGE; // tdas_cuda_Tdas_process_dm_trial
158  PANDA_LOG << "cuda::Tdas::process() complete (on device "<< device.device_id() << ")";
159  POP_NVTX_RANGE; //tdas_cuda_Tdas_process
160  return candidate_list_ptr;
161 }
AccListGenConfig const & acceleration_list_generator() const
Return acceleration list generator configuration.
Definition: Config.cpp:82
std::size_t size() const
The size for transform to use for the search.
Definition: Config.cpp:112
std::vector< data::AccelerationType > acceleration_list(data::DedispersionMeasureType< float > dm, std::size_t number_of_samples, double sampling_interval) const
Generate a list of accelerations.
std::size_t minimum_size() const
The minimum timeseries length that will be searched.
Definition: Config.cpp:102
void number_of_harmonic_sums(std::size_t nharmonics)
Number of harmonic sums to perform in the search.
Definition: Config.cpp:122
data::AccelerationType magnitude() const
return the maximum absolute acceleration magnitude
Here is the call graph for this function:

The documentation for this class was generated from the following files: