24 #ifndef SKA_CHEETAH_FLDO_CUDA_FLDOCUDA_H 25 #define SKA_CHEETAH_FLDO_CUDA_FLDOCUDA_H 29 #include "cheetah/fldo/Config.h" 30 #include "cheetah/fldo/cuda/detail/FldoUtils.h" 31 #include "cheetah/fldo/Types.h" 44 template<
typename NumericalT>
48 typedef cheetah::Cuda Architecture;
49 typedef panda::nvidia::DeviceCapability<2,0, panda::nvidia::giga/2> ArchitectureCapabilty;
50 typedef panda::PoolResource<Architecture> ResourceType;
51 typedef boost::units::quantity<data::MegaHertz, double> FrequencyType;
52 typedef NumericalT NumericalRep;
53 typedef data::TimeType TimeType;
54 typedef data::TimeFrequency<Cpu, NumericalRep> TimeFrequencyType;
55 typedef boost::units::quantity<boost::units::si::time, int> ScanTimeType;
56 typedef data::Candidate<Cpu, float> CandidateType;
59 FldoCuda(fldo::Config
const& config);
64 std::shared_ptr<data::Ocld> operator()(ResourceType& device
65 , std::vector<std::shared_ptr<TimeFrequencyType>>
const& data
66 , data::Scl
const& input_candidates);
72 void rebin_candidates(std::vector<util::CandidateRebin>& rebin, TimeType
const tsamp, data::Scl &scl_data);
77 void load_constant_devmem(std::vector<util::CandidateRebin>& rebin, std::vector<double> delta_freqs,
78 data::Scl
const &input_candidates, std::vector <int> &nbins);
83 void allocate_global_devmem(
size_t ncand, std::vector<util::CandidateRebin> &rebin, NumericalRep **d_in,
84 NumericalRep ** h_in,
float ** d_outfprof,
float **d_outprof,
float ** d_folded,
91 void folding(ResourceType& gpu, std::vector<util::CandidateRebin> &rebin, NumericalRep *d_in,
float *d_folded,
92 float *d_weight,
int *nbins,
int isubint,
float *data_mean,
float *data_sigma,
93 float &gpu_time, std::vector<util::GpuStream>& exec_stream);
98 void profiles(
size_t ncandidates,
float mean,
float *d_folded,
float *d_weight,
float *d_outfprof,
99 float* d_outprof,
float & gpu_time, std::vector<util::GpuStream> &exec_stream);
103 void run_optimization(data::Scl
const& ori_data, data::Scl & opt_data,
float *d_folded,
104 float* d_perturbed,
float *d_outfprof,
105 std::vector<util::CandidateRebin> &rebin, std::vector<double>
const &time_sum,
106 std::vector <double>
const & freq_sum,
float &total_gpu_time,
107 std::vector<util::GpuStream>& exec_stream,
int p_trial,
int p_rf,
108 int pdot_trial,
int pdot_rf,
int dm_trial,
int dm_rf,
float sigma,
int verbose);
112 void free_dev_memory(NumericalRep *h_in,
float *d_folded,
float *d_weight,
float *d_outprof,
float * d_outfprof);
117 std::shared_ptr<data::Ocld> run_folding(ResourceType& device,
118 std::vector<std::shared_ptr<TimeFrequencyType>>
const& data,
119 data::Scl input_candidates);
133 void data_reshape(std::vector<std::shared_ptr<TimeFrequencyType>>
const& input_data,
134 std::vector<std::shared_ptr<TimeFrequencyType>>
const& data);
137 fldo::Config
const& _config;
139 size_t _mfold_phases;
155 #endif // SKA_CHEETAH_FLDO_CUDA_FLDO_H Some limits and constants for FLDO.