24 #include "cheetah/cuda_utils/cuda_errorhandling.h" 25 #include "cheetah/data/TimeFrequency.h" 26 #include "cheetah/data/Units.h" 27 #include "panda/Resource.h" 28 #include "panda/Log.h" 36 namespace astroaccelerate{
38 template<
typename NumericalRep,
typename OptimizationParameterT>
39 DedispersionStrategy<NumericalRep , OptimizationParameterT>::DedispersionStrategy(
const data::TimeFrequency<Cpu,NumericalRep>& chunk,
const ddtr::DedispersionTrialPlan& plan, std::size_t gpu_memory)
41 _nsamp = chunk.number_of_spectra();
42 _tsamp = chunk.sample_interval();
43 _nchans = chunk.number_of_channels();
44 auto t = chunk.low_high_frequencies();
46 _foff = (t.second-t.first)/((
double)_nchans);
47 _dm_constant = plan.dm_constant();
48 for(
auto it = plan.begin_range(); it!=plan.end_range(); ++it)
50 _user_dm_low.push_back(it->dm_start());
51 _user_dm_high.push_back(it->dm_end());
52 _user_dm_step.push_back(it->dm_step());
54 _out_bin.push_back(1);
56 _range = _user_dm_low.size();
57 _SPS_mem_requirement = memory_requirement_of_SPS();
59 make_strategy(gpu_memory);
62 template<
typename NumericalRep,
typename OptimizationParameterT>
63 DedispersionStrategy<NumericalRep , OptimizationParameterT>::~DedispersionStrategy()
67 template<
typename NumericalRep,
typename OptimizationParameterT>
73 template<
typename NumericalRep,
typename OptimizationParameterT>
79 template<
typename NumericalRep,
typename OptimizationParameterT>
85 template<
typename NumericalRep,
typename OptimizationParameterT>
91 template<
typename NumericalRep,
typename OptimizationParameterT>
97 template<
typename NumericalRep,
typename OptimizationParameterT>
103 template<
typename NumericalRep,
typename OptimizationParameterT>
109 template<
typename NumericalRep,
typename OptimizationParameterT>
115 template<
typename NumericalRep,
typename OptimizationParameterT>
121 template<
typename NumericalRep,
typename OptimizationParameterT>
127 template<
typename NumericalRep,
typename OptimizationParameterT>
133 template<
typename NumericalRep,
typename OptimizationParameterT>
139 template<
typename NumericalRep,
typename OptimizationParameterT>
145 template<
typename NumericalRep,
typename OptimizationParameterT>
151 template<
typename NumericalRep,
typename OptimizationParameterT>
157 template<
typename NumericalRep,
typename OptimizationParameterT>
163 template<
typename NumericalRep,
typename OptimizationParameterT>
166 return sizeof(NumericalRep)*8;
169 template<
typename NumericalRep,
typename OptimizationParameterT>
175 template<
typename NumericalRep,
typename OptimizationParameterT>
181 template<
typename NumericalRep,
typename OptimizationParameterT>
187 template<
typename NumericalRep,
typename OptimizationParameterT>
193 template<
typename NumericalRep,
typename OptimizationParameterT>
199 template<
typename NumericalRep,
typename OptimizationParameterT>
205 template<
typename NumericalRep,
typename OptimizationParameterT>
211 template<
typename NumericalRep,
typename OptimizationParameterT>
217 template<
typename NumericalRep,
typename OptimizationParameterT>
220 return((std::size_t) (5.5*
sizeof(
float) + 2*
sizeof(ushort)));
226 template<
typename NumericalRep,
typename OptimizationParameterT>
230 _gpu_memory = gpu_memory;
234 unsigned int maxshift_high = 0;
238 _dm_low.resize(_range,0);
239 _dm_high.resize(_range,0);
240 _dm_step.resize(_range,0);
241 _ndms.resize(_range,0);
242 _dmshifts.resize(_nchans,0);
244 for (
unsigned int c = 0; c < _nchans; c++)
246 _dmshifts[c] = std::abs((
float)(_dm_constant.value() * ((1.0 / pow(_fch1.value()-_foff.value(), 2.0f )) - (1.0 / pow(_fch1.value(), 2.0f)))));
249 for (
unsigned int i = 0; i < _range; i++)
251 modff(( ( (
int) ( ( _user_dm_high[i] - _user_dm_low[i] ) / _user_dm_step[i] ) + OptimizationParameterT::SDIVINDM ) / OptimizationParameterT::SDIVINDM ), &n);
252 _ndms[i] = (
unsigned int) ( (
unsigned int) n * OptimizationParameterT::SDIVINDM );
253 if (_max_ndms < _ndms[i])
254 _max_ndms = _ndms[i];
255 _total_ndms = _total_ndms + _ndms[i];
258 _dm_low[0] = _user_dm_low[0];
259 _dm_high[0] = _ndms[0] * _user_dm_step[0];
260 _dm_step[0] = _user_dm_step[0];
262 float modulo = (float)(2 * OptimizationParameterT::SNUMREG * OptimizationParameterT::SDIVINT);
263 for (
unsigned int i = 1; i < _range; i++)
265 _dm_low[i] = _dm_high[i - 1];
266 _dm_high[i] = _dm_low[i] + _ndms[i] * _user_dm_step[i];
267 _dm_step[i] = _user_dm_step[i];
269 if (_in_bin[i - 1] > 1) {
270 _maxshift = (int) ceil(( ( _dm_low[i - 1].value() + _dm_step[i - 1].value() * _ndms[i - 1] ) * _dmshifts[_nchans - 1] ) / _tsamp.value());
271 _maxshift = (int)ceil(((
float)_maxshift / (float)_in_bin[i - 1] + modulo) / modulo);
272 _maxshift = (int)(_maxshift * modulo * _in_bin[i - 1]);
273 if (_maxshift > maxshift_high)
274 maxshift_high = _maxshift;
278 if (_in_bin[_range - 1] > 1) {
279 _maxshift = (
unsigned int) ceil(( ( _dm_low[_range - 1].value() + _dm_step[_range - 1].value() * _ndms[_range - 1] ) * _dmshifts[_nchans - 1] ) / _tsamp.value());
280 _maxshift = (int)ceil(((
float)_maxshift / (float)_in_bin[_range - 1] + modulo) / modulo);
281 _maxshift = (
unsigned int)(_maxshift * modulo * _in_bin[_range - 1]);
282 if (_maxshift > maxshift_high)
283 maxshift_high = _maxshift;
286 if (maxshift_high == 0) {
287 maxshift_high = (
unsigned int) ceil(( ( _dm_low[_range - 1].value() + _dm_step[_range - 1].value() * ( _ndms[_range - 1] ) ) * _dmshifts[_nchans - 1] ) / _tsamp.value());
289 _max_dm = ceil(_dm_high[_range - 1]);
290 _maxshift = ( maxshift_high + ( OptimizationParameterT::SNUMREG *
sizeof(NumericalRep) * OptimizationParameterT::SDIVINT ) );
292 if (_maxshift >= _nsamp) {
293 PANDA_LOG_ERROR<<
"ERROR!! Your maximum DM trial exceeds the number of samples you have.\nReduce your maximum DM trial \n";
297 unsigned int max_tsamps;
300 _t_processed.resize(_range);
303 if (_nchans < _max_ndms ) {
308 max_tsamps = (
unsigned int) ( (gpu_memory) / (
sizeof(NumericalRep)*_nchans +
sizeof(
float)*(_max_ndms) +
309 (
size_t)(_SPS_mem_requirement*OptimizationParameterT::MIN_DMS_PER_SPS_RUN )));
312 if (_maxshift > max_tsamps) {
313 PANDA_LOG_ERROR<<
"\nERROR!! Your GPU doesn't have enough memory for this number of dispersion trials. \n" 314 <<
"Reduce your maximum dm or increase the size of your dm step";
319 if (_nsamp < max_tsamps) {
322 unsigned int local_t_processed = (
unsigned int) floor(( (
float) ( _nsamp - _maxshift ) / (
float) _in_bin[_range - 1] ) / (float) ( OptimizationParameterT::SDIVINT * 2 * OptimizationParameterT::SNUMREG ));
323 local_t_processed = local_t_processed * ( OptimizationParameterT::SDIVINT *
sizeof(NumericalRep) * OptimizationParameterT::SNUMREG ) * _in_bin[_range - 1];
324 for (
unsigned int i = 0; i < _range; i++) {
325 _t_processed[i].resize(1);
326 _t_processed[i][0] = (int) floor(( (
float) ( local_t_processed ) / (
float) _in_bin[i] ) / (float) ( OptimizationParameterT::SDIVINT * 2 * OptimizationParameterT::SNUMREG ));
327 _t_processed[i][0] = _t_processed[i][0] * ( OptimizationParameterT::SDIVINT *
sizeof(NumericalRep) * OptimizationParameterT::SNUMREG );
335 int samp_block_size = max_tsamps - _maxshift;
343 int local_t_processed = (int) floor(( (
float) ( samp_block_size ) / (
float) _in_bin[_range - 1] ) / (float) ( OptimizationParameterT::SDIVINT * 2 * OptimizationParameterT::SNUMREG ));
344 unsigned int num_blocks;
345 if(local_t_processed == 0) {
349 local_t_processed = local_t_processed * ( OptimizationParameterT::SDIVINT *
sizeof(NumericalRep) * OptimizationParameterT::SNUMREG ) * _in_bin[_range - 1];
350 num_blocks = (int) floor(( (
float) _nsamp - ((float)_maxshift) )) / ( (float) ( local_t_processed ) );
354 int remainder = _nsamp - ( num_blocks * local_t_processed ) - _maxshift;
355 remainder = (int) floor((
float) remainder / (float) _in_bin[_range - 1]) / (float) ( OptimizationParameterT::SDIVINT *
sizeof(NumericalRep) * OptimizationParameterT::SNUMREG );
356 remainder = remainder * ( OptimizationParameterT::SDIVINT *
sizeof(NumericalRep) * OptimizationParameterT::SNUMREG ) * _in_bin[_range - 1];
358 for (
unsigned int i = 0; i < _range; i++) {
360 _t_processed[i].resize(num_blocks+1);
362 for (
unsigned j = 0; j < num_blocks ; j++) {
363 _t_processed[i][j] = (int) floor(( (
float) ( local_t_processed ) / (
float) _in_bin[i] ) / (float) ( OptimizationParameterT::SDIVINT *
sizeof(NumericalRep) * OptimizationParameterT::SNUMREG ));
364 _t_processed[i][j] = _t_processed[i][j] * ( OptimizationParameterT::SDIVINT *
sizeof(NumericalRep) * OptimizationParameterT::SNUMREG );
367 _t_processed[i][num_blocks] = (int) floor(( (
float) ( remainder ) / (
float) _in_bin[i] ) / (float) ( OptimizationParameterT::SDIVINT *
sizeof(NumericalRep) * OptimizationParameterT::SNUMREG ));
368 _t_processed[i][num_blocks] = _t_processed[i][num_blocks] * ( OptimizationParameterT::SDIVINT *
sizeof(NumericalRep) * OptimizationParameterT::SNUMREG );
370 _num_tchunks = num_blocks + 1;
378 max_tsamps = (
unsigned int) ( ( gpu_memory ) / ( _nchans * (
sizeof(float) +
sizeof(
unsigned short) )+ _SPS_mem_requirement*OptimizationParameterT::MIN_DMS_PER_SPS_RUN ));
381 if (_maxshift > max_tsamps) {
382 PANDA_LOG_ERROR<<
"\nERROR!! Your GPU doens't have enough memory for this number of dispersion trials." 383 <<
"\nReduce your maximum dm or increase the size of your dm step";
388 if (_nsamp < max_tsamps) {
391 int local_t_processed = (int) floor(( (
float) ( _nsamp - _maxshift ) / (
float) _in_bin[_range - 1] ) / (float) ( OptimizationParameterT::SDIVINT *
sizeof(NumericalRep) * OptimizationParameterT::SNUMREG ));
392 local_t_processed = local_t_processed * ( OptimizationParameterT::SDIVINT *
sizeof(NumericalRep) * OptimizationParameterT::SNUMREG ) * _in_bin[_range - 1];
393 for (
unsigned int i = 0; i < _range; i++) {
394 _t_processed[i].resize(1);
395 _t_processed[i][0] = (int) floor(( (
float) ( local_t_processed ) / (
float) _in_bin[i] ) / (float) ( OptimizationParameterT::SDIVINT *
sizeof(NumericalRep) * OptimizationParameterT::SNUMREG ));
396 _t_processed[i][0] = _t_processed[i][0] * ( OptimizationParameterT::SDIVINT *
sizeof(NumericalRep) * OptimizationParameterT::SNUMREG );
404 unsigned int samp_block_size = max_tsamps - _maxshift;
411 unsigned int local_t_processed = (
unsigned int) floor(( (
float) ( samp_block_size ) / (
float) _in_bin[_range - 1] ) / (float) ( OptimizationParameterT::SDIVINT * 2 * OptimizationParameterT::SNUMREG ));
412 local_t_processed = local_t_processed * ( OptimizationParameterT::SDIVINT *
sizeof(NumericalRep) * OptimizationParameterT::SNUMREG ) * _in_bin[_range - 1];
415 unsigned int num_blocks = (
unsigned int) floor(( (
float) _nsamp - (float) _maxshift ) / ( (float) local_t_processed ));
418 unsigned int remainder = _nsamp - ( num_blocks * local_t_processed ) - _maxshift;
420 for (
unsigned int i = 0; i < _range; i++) {
422 _t_processed[i].resize(num_blocks + 1);
424 for (
unsigned int j = 0; j < num_blocks; j++) {
425 _t_processed[i][j] = (int) floor(( (
float) ( local_t_processed ) / (
float) _in_bin[i] ) / (float) ( OptimizationParameterT::SDIVINT * 2 * OptimizationParameterT::SNUMREG ));
426 _t_processed[i][j] = _t_processed[i][j] * ( OptimizationParameterT::SDIVINT *
sizeof(NumericalRep) * OptimizationParameterT::SNUMREG );
429 _t_processed[i][num_blocks] = (int) floor(( (
float) ( remainder ) / (
float) _in_bin[i] ) / (float) ( OptimizationParameterT::SDIVINT * 2 * OptimizationParameterT::SNUMREG ));
430 _t_processed[i][num_blocks] = _t_processed[i][num_blocks] * ( OptimizationParameterT::SDIVINT *
sizeof(NumericalRep) * OptimizationParameterT::SNUMREG );
432 _num_tchunks = num_blocks + 1;
437 _dedispersed_time_samples = 0;
438 for (
unsigned t = 0; t < _num_tchunks; ++t)
440 _dedispersed_time_samples += (_t_processed[0][t] * _in_bin[0]);
unsigned int nifs() const
The number of IF channels.
unsigned int total_ndms() const
The total number of dm.
FrequencyType foff() const
Frequency channel width.
std::vector< unsigned int > ndms() const
An array containing the number of dms for each range.
std::vector< Dm > user_dm_step() const
An array containing dm step of each dm range, specified by the user.
unsigned int num_tchunks() const
The number of chunks the data are divided in.
unsigned int maxshift() const
Value used to make sure that dms from dm_low to dm_high are used.
static constexpr unsigned int nbits()
The number of bits of the input data.
std::vector< float > dmshifts() const
An array containing a constant associated with each channel to perform dedispersion algorithm...
std::vector< Dm > dm_step() const
An array containing the step size of each dm range.
std::vector< Dm > dm_high() const
An array containing the highest bound of each dm range.
unsigned int range() const
The number of dm ranges.
FrequencyType fch1() const
Frequency of the first channel.
Some limits and constants for FLDO.
std::vector< Dm > dm_low() const
An array containing the lowest bound of each dm range.
unsigned int max_ndms() const
The maximum number of dm.
TimeType tsamp() const
Time sample value.
unsigned int nsamp() const
The number of time samples.
Dm max_dm() const
The highest dm value.
std::vector< Dm > user_dm_low() const
An array containing lowest band of each dm range, specified by the user.
std::vector< std::vector< int > > t_processed() const
The number of time samples required to search for a dm in each dm range.
std::vector< Dm > user_dm_high() const
An array containing highest band of each dm range, specified by the user.
unsigned int nchans() const
The number of frequency channels.