Cheetah - SKA - PSS - Prototype Time Domain Search Pipeline
DdtrWorker.cpp
1 /*
2  * The MIT License (MIT)
3  *
4  * Copyright (c) 2016 The SKA organisation
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #include "cheetah/ddtr/fpga/DdtrWorker.h"
26 #include "cheetah/ddtr/fpga/Ddtr.h"
27 #include "panda/Error.h"
28 #include "panda/Log.h"
29 #ifdef ENABLE_SKA_RABBIT
30 #include "panda/arch/altera/DevicePointer.h"
31 #include "panda/arch/altera/Aocx.h"
32 #include "panda/arch/altera/Kernel.h"
33 #include "panda/arch/altera/DeviceCopy.h"
34 #include "rabbit/utils/ImageManager.h"
35 #include "rabbit/ddtr/Ddtr.h"
36 #include "rabbit/ddtr/opencl/input_params.h"
37 #include "rabbit/ddtr/opencl/kernel_params.h"
38 #endif // ENABLE_SKA_RABBIT
39 
40 namespace ska {
41 namespace cheetah {
42 namespace ddtr {
43 namespace fpga {
44 
45 
46 #ifdef ENABLE_SKA_RABBIT
47 template<typename BufferType, typename DmTrialsType>
48 DdtrWorker<BufferType, DmTrialsType>::DdtrWorker(ddtr::Config const& config, panda::PoolResource<Fpga> const& device)
49  : _config(config)
50  , _device(device)
51 {
52  ska::rabbit::ddtr::Ddtr ddtr;
53  std::string str=_device.device_name();
54  str.resize(str.find(':')-1);
55  std::string filename=ddtr.image_manager().image_path(str);
56  PANDA_LOG <<"\nDevice: "<<_device.device_name();
57  PANDA_LOG<<"\nCompiled binary: "<<filename<<"\n";
58  ska::panda::altera::Aocx aocx(filename);
59  ska::panda::altera::Program program(aocx,_device);
60  _command_queue.reset(new panda::altera::CommandQueue(_device));
61  _command_queue_2.reset(new panda::altera::CommandQueue(_device));
62  _kernel.reset(new panda::altera::Kernel("dedisperse", program));
63  _kernel_2.reset(new panda::altera::Kernel("dedisperse", program));
64 
65  PANDA_LOG << "DdtrWorker constructed";
66 }
67 #else // ENABLE_SKA_RABBIT
68 template<typename BufferType, typename DmTrialsType>
69 DdtrWorker<BufferType, DmTrialsType>::DdtrWorker(ddtr::Config const&, panda::PoolResource<Fpga> const& )
70 {
71 }
72 #endif // ENABLE_SKA_RABBIT
73 
74 template<typename BufferType, typename DmTrialsType>
75 DdtrWorker<BufferType, DmTrialsType>::~DdtrWorker()
76 {
77 }
78 
79 #ifdef ENABLE_SKA_RABBIT
80 template<typename BufferType, typename DmTrialsType>
81 std::shared_ptr<DmTrialsType> DdtrWorker<BufferType, DmTrialsType>::operator()(BufferType const& data, std::vector<double> _dm_factors, std::size_t _max_delay)
82 {
83  auto const& tf_obj = *(data.composition().front());
84  std::size_t nchans = tf_obj.number_of_channels();
85  std::size_t nsamples = data.data_size() / nchans;
86  data::DimensionIndex<data::Time> offset_samples(data.offset_first_block()/(nchans * sizeof(NumericalRep)));
87  auto const& start_time = tf_obj.start_time(offset_samples);
88 
89  //genrate _dm_trial_metadata based on samples available
90  _dm_trial_metadata.reset(new data::DmTrialsMetadata(tf_obj.sample_interval(), nsamples - _max_delay));
91  for (auto dm: this->_config.dm_trials())
92  {
93  _dm_trial_metadata->emplace_back(dm, 1);
94  }
95 
96  //get DM details from config object
97  std::size_t dm_start=this->_config.dm_trials()[0].value();
98  std::size_t dm_step=this->_config.dm_trials()[1].value()-this->_config.dm_trials()[0].value();
99  std::size_t dm_size=this->_config.dm_trials().size();
100 
101  //create host buffers
102  panda::Buffer<float> dm_shift_Buffer_1(nchans);
103  panda::Buffer<float> dm_shift_Buffer_2(nchans);
104  size_t output_total = 3*nsamples * TDMS * sizeof(unsigned int); //size more than two kernel outputs
105  panda::Buffer<unsigned char> output_buffer(output_total);
106  std::fill(output_buffer.begin(),output_buffer.end(),0); //Zeroed ouput buffers
107 
108  //create device pointers with device memory buffers, used to copy host to device
109  panda::altera::DevicePointer<NumericalRep> buff_in(_device, nsamples*nchans*sizeof(unsigned char),*_command_queue);
110  panda::copy(data.cbegin(), data.cend(), buff_in.begin());
111  panda::altera::DevicePointer<NumericalRep> buff_1(_device, sizeof(float)*nchans,*_command_queue);
112  panda::altera::DevicePointer<NumericalRep> buff_2(_device, sizeof(float)*nchans,*_command_queue);
113 
114  //queue used to copy data from device to host
115  panda::altera::DevicePointer<NumericalRep> buff_out(_device, nsamples*TDMS*sizeof(unsigned int),*_command_queue_2);
116  panda::altera::DevicePointer<NumericalRep> buff_out_2(_device, nsamples*TDMS*sizeof(unsigned int),*_command_queue_2);
117 
118  for(int k=0;k<2;++k) //two kernels called one by one
119  {
120  int dsamp=1;float dm1=dm_start, dm2=(TDMS/2)*dm_step;
121  for (int c = 0; c < nchans; ++c) {
122  float shift = _dm_factors[c];//calculated based on freq information
123  *(dm_shift_Buffer_1.begin()) = shift * ((float) dm_step * (float) (1/TSAMP) / (float) dsamp);
124  *(dm_shift_Buffer_2.begin()) = shift * ((float) dm_step * (float) (1/TSAMP) / (float) dsamp);
125  }
126 
127  //copy host buffer data to device huffers
128  panda::copy(dm_shift_Buffer_1.begin(), dm_shift_Buffer_1.end(), buff_1.begin());
129  panda::copy(dm_shift_Buffer_2.begin(), dm_shift_Buffer_2.end(), buff_2.begin());
130 
131  if (k==0){
132  //single work-item/thread, two arrays of 500 DMs, (TDMS/2)
133  (*_kernel)(*_command_queue,1, 1, (&*buff_1), (&*buff_2),(&*buff_in),(&*buff_out), dm1, dm2, dsamp);
134  //copy device buffer data to host, starts with begin
135  panda::copy(buff_out.begin(), buff_out.end(), output_buffer.begin());
136  }
137  else{
138  //single work-item/thread, two arrays of 500 DMs, (TDMS/2)
139  (*_kernel_2)(*_command_queue,1, 1, (&*buff_1), (&*buff_2),(&*buff_in),(&*buff_out_2), dm1, dm2, dsamp);
140  //copy subsequent device buffer data to host, starts with end of previous copy operation
141  panda::copy(buff_out_2.begin(), buff_out.end(), output_buffer.end()); //subsequent output data
142  }
143  }
144  PANDA_LOG << "DM trials completed";
145 
146  //create DmTrialsType instance
147  std::shared_ptr<DmTrialsType> dmtime = DmTrialsType::make_shared(_dm_trial_metadata, start_time);
148  DmTrialsType& dmtrials = *(dmtime);
149  NumericalRep const* tf_data_fpga = static_cast<NumericalRep const*>(output_buffer.data());//fpga output
150 
151  //save host buffer data into dmtrials object
152  int start_count=(int)dm_start, end_count=(int)dm_size, samp_idx=0;
153  for(int i = 0; i < 2; ++i) //two kernels called
154  for (int dm_count = 0; dm_count < TDMS; ++dm_count) {
155  samp_idx=0;
156  for(int j = 0; j < (nsamples - _max_delay)/NUM_REGS; ++j) {
157  for (int r = 0; r < NUM_REGS; r++) {
158  std::size_t input_idx = i*nsamples*TDMS + (j * (TDMS * NUM_REGS)) + (dm_count*NUM_REGS) + r;
159  float acc=(float) tf_data_fpga[input_idx] / (float) nchans;
160  if( ((dm_count+i*TDMS) >= start_count) && ((dm_count+i*TDMS) < end_count) )
161  {
162  dmtrials[dm_count][samp_idx] = acc;
163  samp_idx=samp_idx+1;
164  }
165  }
166  }
167  }
168  PANDA_LOG << "DM trials copied";
169  return dmtime;
170 }
171 #else // ENABLE_SKA_RABBIT
172 template<typename BufferType, typename DmTrialsType>
173 std::shared_ptr<DmTrialsType> DdtrWorker<BufferType, DmTrialsType>::operator()(BufferType const&, std::vector<double>, std::size_t)
174 {
175  PANDA_LOG_ERROR << "call to Ddtr::fpga module without compiling in the fpga modules";
176  std::shared_ptr<DmTrialsType> dmtime;
177  return dmtime;
178 }
179 #endif // ENABLE_SKA_RABBIT
180 
181 } // namespace fpga
182 } // namespace ddtr
183 } // namespace cheetah
184 } // namespace ska
Some limits and constants for FLDO.
Definition: Brdz.h:35