Cheetah - SKA - PSS - Prototype Time Domain Search Pipeline
SpsCuda.cpp
1 /*
2  * The MIT License (MIT)
3  *
4  * Copyright (c) 2016 The SKA organisation
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "cheetah/sps/astroaccelerate/detail/SpsCuda.h"
25 #include "cheetah/cuda_utils/cuda_errorhandling.h"
26 #include "cheetah/data/DmTrialsMetadata.h"
27 #include "cheetah/data/DmTrials.h"
28 #include "cheetah/data/SpCcl.h"
29 #include "cheetah/data/TimeFrequency.h"
30 #include "cheetah/data/Units.h"
31 #include "cheetah/data/DedispersionMeasure.h"
32 #include "cheetah/cuda_utils/nvtx.h"
33 #include "panda/Resource.h"
34 #include "panda/Log.h"
35 #include "panda/Error.h"
36 #include <vector>
37 #include <memory>
38 #include <iostream>
39 
40 namespace ska {
41 namespace cheetah {
42 namespace sps {
43 namespace astroaccelerate {
44 
45 
46 #ifdef ENABLE_ASTROACCELERATE
47 template<typename DmHandler, typename SpHandler>
48 void SpsCuda::operator()(panda::PoolResource<panda::nvidia::Cuda>& gpu
49  , BufferType& agg_buf
50  , DmHandler& dm_handler
51  , SpHandler& sp_handler
52  )
53 {
54  PUSH_NVTX_RANGE("sps_astroaccelerate_SpsCuda_operator",0);
55  PANDA_LOG << "astroaccelerate::SpsCuda::operator() invoked (on device "<< gpu.device_id() << ")";
56  /* NOTE: This code is now pointless/wrong. The maxshift it not a good predictor of the minimum
57  * processable data length. This requires a new method on ::astroaccelerate::DedispersionStrategy
58  */
59  if (agg_buf.data_size() < (std::size_t) _dedispersion_strategy->get_maxshift())
60  {
61  const std::string msg("SpsCuda: data buffer size < maxshift ");
62  PANDA_LOG_ERROR << msg << "(" << agg_buf.data_size() << "<" << _dedispersion_strategy->get_maxshift() << ")";
63  throw panda::Error(msg);
64  }
65 
66  auto new_samples = agg_buf.data_size()/ _dedispersion_strategy->get_nchans();
67 
68  if(new_samples != _samples)
69  {
70  // TODO make thread safe (dedispersion_strategy can be changed by another thread)
71  PANDA_LOG_WARN << "Aggregation buffer size has changed from "
72  << _samples << " to " << new_samples << " samples";
73  _dedispersion_strategy->resize(new_samples, _dedispersion_strategy->get_gpu_memory());
74  calculate_internals(_dm_trial_metadata->fundamental_sampling_interval());
75  _samples = new_samples;
76  PANDA_LOG_DEBUG << "SpsCuda will now output "
77  << _dedispersion_strategy->get_dedispersed_time_samples()
78  << " time samples per call";
79  }
80 
84  data::DimensionIndex<data::Time> offset_samples(agg_buf.offset_first_block()/_dedispersion_strategy->get_nchans());
85  auto const& start_time = agg_buf.composition().front()->start_time(offset_samples);
86  PANDA_LOG_DEBUG << "Start time of current buffer: " << start_time;
87 
91  auto dmtrials = data::DmTrials<Cpu,float>::make_shared(_dm_trial_metadata, start_time);
92  std::vector<float> sps_cands;
93  DataType* tf_data = reinterpret_cast<DataType*>(agg_buf.data());
94 
95  ::astroaccelerate::AstroAccelerate<void> sps_astroaccelerate(*_dedispersion_strategy);
96  PANDA_LOG_DEBUG << "Calling astroaccelerate::AstroAccelerate::run_dedispersion_sps";
97  PUSH_NVTX_RANGE("astroaccelerate_run_dedispersion_sps",1);
98 
99  sps_astroaccelerate.run_dedispersion_sps(
100  gpu.device_id()
101  ,tf_data
102  ,*_dm_time
103  ,sps_cands);
104 
105  POP_NVTX_RANGE; //astroaccelerate_run_dedispersion_sps
106  PANDA_LOG_DEBUG << "astroaccelerate::AstroAccelerate::run_dedispersion_sps complete";
107 
108 
109  //Copy and convert SPS output candidates into SpCandidate instances
110  PUSH_NVTX_RANGE("astroaccelerate_cands_to_cheetah_cands",2);
111  auto sp_candidate_list = std::make_shared<data::SpCcl<uint8_t>>(agg_buf.composition(), offset_samples);
112  sp_candidate_list->reserve(sps_cands.size()/4);
113  for (std::size_t idx=0; idx<sps_cands.size(); idx+=4)
114  {
115  if(sps_cands[idx] == 0 && sps_cands[idx + 1 ]==0 && sps_cands[idx+2] ==0) break;
116  sp_candidate_list->emplace_back(
117  sps_cands[idx] * data::parsecs_per_cube_cm
118  , data::SpCcl<uint8_t>::SpCandidateType::MsecTimeType(sps_cands[idx+1] * boost::units::si::seconds) // tstart
119  , data::SpCcl<uint8_t>::SpCandidateType::MsecTimeType(sps_cands[idx+3] * boost::units::si::seconds) // width
120  , sps_cands[idx+2] // sigma
121  );
122  }
123 
124  POP_NVTX_RANGE; //astroaccelerate_cands_to_cheetah_cands
125 
126  PANDA_LOG_DEBUG << "run_dedispersion_sps returned "
127  << sp_candidate_list->size() << " candidates";
128 
129  //Copy and convert DmTime data into DmTrials object
130  PUSH_NVTX_RANGE("astroaccelerate_trials_to_cheetah_trials",3);
131  PANDA_LOG_DEBUG << "Copying astroaccelerate::DmTime object contents into DmTrials instance";
132  std::size_t true_trial = 0;
133  for (std::size_t range=0; range < _dm_time->number_of_dm_ranges(); ++range)
134  {
135  for (int trial=0; trial < _dedispersion_strategy->get_ndms()[range]; ++trial)
136  {
137  assert((*dmtrials)[true_trial].size() == _dm_time->nsamples()[range]);
138  std::copy((*_dm_time)[range][trial],
139  (*_dm_time)[range][trial]+_dm_time->nsamples()[range],
140  (*dmtrials)[true_trial].begin());
141  ++true_trial;
142  }
143  }
144  POP_NVTX_RANGE; // astroaccelerate_trials_to_cheetah_trials
145  dm_handler(dmtrials);
146  sp_handler(sp_candidate_list);
147  PANDA_LOG << "astroaccelerate::SpsCuda::operator() complete";
148  POP_NVTX_RANGE; //sps_astroaccelerate_SpsCuda_operator
149 }
150 #endif // ENABLE_ASTROACCELERATE
151 
152 } // namespace astroaccelerate
153 } // namespace sps
154 } // namespace cheetah
155 } // namespace ska
Some limits and constants for FLDO.
Definition: Brdz.h:35