Cheetah - SKA - PSS - Prototype Time Domain Search Pipeline
FldoCuda.h
1 /*
2  * The MIT License (MIT)
3  *
4  * Copyright (c) 2016 The SKA organisation
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef SKA_CHEETAH_FLDO_CUDA_FLDOCUDA_H
25 #define SKA_CHEETAH_FLDO_CUDA_FLDOCUDA_H
26 
27 #ifdef ENABLE_CUDA
28 
29 #include "cheetah/fldo/Config.h"
30 #include "cheetah/fldo/cuda/detail/FldoUtils.h"
31 #include "cheetah/fldo/Types.h"
32 
33 namespace ska {
34 namespace cheetah {
35 namespace fldo {
36 namespace cuda {
37 
44 template<typename NumericalT>
45 class FldoCuda
46 {
47  public:
48  typedef cheetah::Cuda Architecture;
49  typedef panda::nvidia::DeviceCapability<2,0, panda::nvidia::giga/2> ArchitectureCapabilty; // minimum device requirements
50  typedef panda::PoolResource<Architecture> ResourceType;
51  typedef boost::units::quantity<data::MegaHertz, double> FrequencyType;
52  typedef NumericalT NumericalRep;
53  typedef data::TimeType TimeType;
54  typedef data::TimeFrequency<Cpu, NumericalRep> TimeFrequencyType;
55  typedef boost::units::quantity<boost::units::si::time, int> ScanTimeType;
56  typedef data::Candidate<Cpu, float> CandidateType;
57 
58  public:
59  FldoCuda(fldo::Config const& config);
60 
64  std::shared_ptr<data::Ocld> operator()(ResourceType& device
65  , std::vector<std::shared_ptr<TimeFrequencyType>> const& data
66  , data::Scl const& input_candidates);
67 
68  private:
72  void rebin_candidates(std::vector<util::CandidateRebin>& rebin, TimeType const tsamp, data::Scl &scl_data);
73 
77  void load_constant_devmem(std::vector<util::CandidateRebin>& rebin, std::vector<double> delta_freqs,
78  data::Scl const &input_candidates, std::vector <int> &nbins);
79 
83  void allocate_global_devmem(size_t ncand, std::vector<util::CandidateRebin> &rebin, NumericalRep **d_in,
84  NumericalRep ** h_in, float ** d_outfprof, float **d_outprof, float ** d_folded,
85  float ** d_weight);
86 
91  void folding(ResourceType& gpu, std::vector<util::CandidateRebin> &rebin, NumericalRep *d_in, float *d_folded,
92  float *d_weight, int *nbins, int isubint, float *data_mean, float *data_sigma,
93  float &gpu_time, std::vector<util::GpuStream>& exec_stream);
94 
98  void profiles(size_t ncandidates, float mean, float *d_folded, float *d_weight, float *d_outfprof,
99  float* d_outprof, float & gpu_time, std::vector<util::GpuStream> &exec_stream);
103  void run_optimization(data::Scl const& ori_data, data::Scl & opt_data, float *d_folded,
104  float* d_perturbed, float *d_outfprof,
105  std::vector<util::CandidateRebin> &rebin, std::vector<double> const &time_sum,
106  std::vector <double> const & freq_sum,float &total_gpu_time,
107  std::vector<util::GpuStream>& exec_stream, int p_trial, int p_rf,
108  int pdot_trial, int pdot_rf, int dm_trial, int dm_rf, float sigma, int verbose);
112  void free_dev_memory(NumericalRep *h_in, float *d_folded, float *d_weight, float *d_outprof, float * d_outfprof);
113 
114  /*
115  * @brief Run the whole process (folding + optimization)
116  */
117  std::shared_ptr<data::Ocld> run_folding(ResourceType& device,
118  std::vector<std::shared_ptr<TimeFrequencyType>> const& data,
119  data::Scl input_candidates);
120 
133  void data_reshape(std::vector<std::shared_ptr<TimeFrequencyType>> const& input_data,
134  std::vector<std::shared_ptr<TimeFrequencyType>> const& data);
135 
136  private:
137  fldo::Config const& _config;
138  bool _enable_split; //< flag to enable/disable phase shift in folding algorithm
139  size_t _mfold_phases; //< max number of phases (bins) to use in folding
140  size_t _nchannels; //< the number of frequency channels
141  uint64_t _nsamp; //< the total number of samples
142  size_t _nsubbands; //< the number of sub-bands used by folding procedure
143  size_t _nsubints; //< the numner of sub-integrations used by folding procedure
144  double _tobs; //< the total observing time (in sec.)
145  double _tsamp; //< the sampling time (in sec.)
146 };
147 
148 } // namespace cuda
149 } // namespace fldo
150 } // namespace cheetah
151 } // namespace ska
152 
153 #endif //ENABLE_CUDA
154 
155 #endif // SKA_CHEETAH_FLDO_CUDA_FLDO_H
Some limits and constants for FLDO.
Definition: Brdz.h:35