Cheetah - SKA - PSS - Prototype Time Domain Search Pipeline
Tdao.cu
1 #include "cheetah/tdao/cuda/Tdao.cuh"
2 #include "cheetah/data/Candidate.h"
3 #include "cheetah/data/Units.h"
4 #include "cheetah/cuda_utils/nvtx.h"
5 #include "panda/Log.h"
6 
7 #include <iostream>
8 
9 namespace ska {
10 namespace cheetah {
11 namespace tdao {
12 namespace cuda {
13 namespace detail {
14 
15 template <typename T>
16 struct GreaterThanThreshold: thrust::unary_function<thrust::tuple<unsigned,T>,bool>
17 {
18  public:
19  GreaterThanThreshold(float thresh);
20  inline __host__ __device__
21  bool operator()(thrust::tuple<int,T> t);
22 
23  private:
24  float _threshold;
25  };
26 
27 template <typename T>
29  : _threshold(threshold)
30 {
31 }
32 
33 template <typename T>
34 inline __host__ __device__
35 bool GreaterThanThreshold<T>::operator()(thrust::tuple<int,T> t)
36 {
37  return thrust::get<1>(t) > _threshold;
38 }
39 
40 } //namespace detail
41 
42 using boost::math::complement;
43 using boost::math::quantile;
44 using boost::math::cdf;
45 
46 template <typename T>
47 std::size_t Tdao::_execute(typename thrust::device_vector<T>::const_iterator in, std::size_t size, size_t offset, float threshold)
48 {
49 
50  typedef thrust::tuple<unsigned,T> PeakType;
51  typedef thrust::device_vector<unsigned>::iterator IdxIterator;
52  typedef typename thrust::device_vector<T>::iterator PowerIterator;
53  typedef thrust::tuple< thrust::counting_iterator<unsigned>, PowerIterator> PeakTupleInputIterator;
54  typedef typename thrust::tuple< IdxIterator, PowerIterator > PeakTupleOutputIterator;
55 
56  using thrust::tuple;
57  using thrust::counting_iterator;
58  using thrust::zip_iterator;
59 
60  counting_iterator<unsigned> index_counter(offset);
61  auto input_zip = make_zip_iterator(make_tuple(index_counter,in));
62  auto output_zip = make_zip_iterator(make_tuple(_idxs.begin(),_powers.begin()));
63 
64 #ifdef CUDA_BROKEN_THRUST_WITH_ALLOCATOR
65  int num_copied = thrust::copy_if(thrust::cuda::par, input_zip, input_zip+size, output_zip,
66  detail::GreaterThanThreshold<T>(threshold)) - output_zip;
67 #else
68  int num_copied = thrust::copy_if(thrust::cuda::par(_allocator), input_zip, input_zip+size, output_zip,
69  detail::GreaterThanThreshold<T>(threshold)) - output_zip;
70 #endif
71  return (std::size_t) num_copied;
72 }
73 
74 template <typename PowerSeriesType>
75 data::Ccl::CandidateType Tdao::_make_candidate(
76  unsigned idx,
77  float power,
78  PowerSeriesType const& input,
79  data::DedispersionMeasureType<float> dm,
80  double accel_fact,
81  std::size_t nharmonics)
82 {
83  double sigma = input.power_to_equiv_sigma(power);
84  auto period = data::Ccl::CandidateType::MsecTimeType(input.bin_to_period(idx));
85  auto pdot = data::Ccl::CandidateType::SecPerSecType(accel_fact * period.value()/1000.0);
86  auto width = data::Ccl::CandidateType::MsecTimeType(period/(2.0*nharmonics));
87  PANDA_LOG_DEBUG << "Candidate -> Period=" <<period<< ", Pdot="<<pdot<<", Width="<<width<<", Sigma="<<sigma;
88  return data::Ccl::CandidateType(period,pdot,dm,width,sigma);
89 }
90 
91 template <typename PowerSeriesType>
92 void Tdao::_filter_unique(PowerSeriesType const& input,
93  data::Ccl& output,
94  std::size_t num_copied,
95  data::DedispersionMeasureType<float> dm,
96  data::AccelerationType acc,
97  std::size_t nharmonics)
98 {
99  if (num_copied<1)
100  {
101  return;
102  }
103 
104  //should be panda async copy when this exists
105  thrust::copy(_idxs.begin(),_idxs.begin()+num_copied,_h_idxs.begin());
106  thrust::copy(_powers.begin(),_powers.begin()+num_copied,_h_powers.begin());
107  //synchronize stream
108 
109  output.reserve(output.size() + num_copied);
110  double accel_fact = (acc / (boost::units::si::constants::codata::c * input.frequency_step())).value();
111 
112  std::size_t distance;
113  std::size_t idx = 0;
114  float cpeak = _h_powers[idx];
115  int cpeakidx = _h_idxs[idx];
116  idx++;
117  std::size_t init_size = output.size();
118  while(idx < num_copied)
119  {
120  distance = _h_idxs[idx]-_h_idxs[idx-1];
121  if (distance > 1)
122  {
123  output.push_back(_make_candidate(cpeakidx,cpeak,input,dm,accel_fact,nharmonics));
124  cpeak = _h_powers[idx];
125  cpeakidx = _h_idxs[idx];
126  }
127  else if (_h_powers[idx] > cpeak)
128  {
129  cpeak = _h_powers[idx];
130  cpeakidx = _h_idxs[idx];
131  }
132  idx++;
133  }
134  output.push_back(_make_candidate(cpeakidx,cpeak,input,dm,accel_fact,nharmonics));
135  std::size_t final_size = output.size();
136  PANDA_LOG_DEBUG << "Found " << final_size-init_size << " candidates above threshold of "
137  << _algo_config.significance_threshold() <<" (DM="<< dm <<", Acc="<<acc<<", filtered)";
138 }
139 
140 template <typename T, typename Alloc>
141 void Tdao::process(ResourceType& gpu,
143  data::Ccl& output,
144  data::DedispersionMeasureType<float> dm,
145  data::AccelerationType acc,
146  std::size_t nharmonics)
147 {
148  PUSH_NVTX_RANGE("cuda_Tdao_process",0);
149  PANDA_LOG_DEBUG << "GPU ID: "<<gpu.device_id();
150  _prepare(input.size());
151  float power_threshold = input.equiv_sigma_to_power(_algo_config.significance_threshold());
152  std::size_t end_bin = (std::size_t) (_algo_config.maximum_frequency()/input.frequency_step()).value();
153  std::size_t start_bin = (std::size_t) (_algo_config.minimum_frequency()/input.frequency_step()).value();
154  std::size_t size = std::min(end_bin,input.size());
155  PANDA_LOG_DEBUG << "Bin range to be searched -> ("<<start_bin<<", "<<size<<")";
156  PUSH_NVTX_RANGE("cuda_Tdao_process_execute_kernels",1);
157  std::size_t num_copied = _execute<T>(input.begin(),size,start_bin,power_threshold);
158  POP_NVTX_RANGE; // cuda_Tdao_process_execute_kernels
159  PANDA_LOG_DEBUG << "Found " << num_copied << " candidates above power threshold of "
160  << power_threshold <<" (DM="<< dm <<", Acc="<<acc<<", unfiltered)";
161  PUSH_NVTX_RANGE("cuda_Tdao_process_filter_unique",2);
162  _filter_unique(input,output,num_copied,dm,acc,nharmonics);
163  POP_NVTX_RANGE; // cuda_Tdao_process_filter_unique
164  POP_NVTX_RANGE; // cuda_Tdao_process
165 }
166 
167 } // namespace cuda
168 } // namespace tdao
169 } // namespace cheetah
170 } // namespace ska
FourierFrequencyType const & frequency_step() const
Retrieve the frequency step of the series.
void process(ResourceType &gpu, data::PowerSeries< Architecture, T, Alloc >const &input, data::Ccl &output, data::DedispersionMeasureType< float > dm, data::AccelerationType acc, std::size_t nharmonics)
Find peaks in a power series above a statistical threshold.
Definition: Tdao.cu:141
std::size_t size() const
Retrieve the size of the underlying vector.
Definition: VectorLike.cpp:61
Some limits and constants for FLDO.
Definition: Brdz.h:35
ConstIterator begin() const
Iterators to device memory.
Definition: Series.cpp:67
Class for power series (detected FrequencySeries).
Definition: PowerSeries.h:58
Candidate list.
Definition: Ccl.h:45
void push_back(ValueType const &value)
Appends element to end of vector.
Definition: VectorLike.cpp:145
void reserve(std::size_t size)
Reserve space for this many elements.
Definition: VectorLike.cpp:158
float equiv_sigma_to_power(float sigma) const
Compute the power that corresponds to a Gaussian equivalent sigma.
Definition: PowerSeries.cpp:83
std::size_t size() const
the size of the series
Definition: Series.cpp:109
A simple record to hold &#39;candidate&#39; proprerties.
Definition: Candidate.h:60