25 #include "cheetah/ddtr/fpga/DdtrWorker.h" 26 #include "cheetah/ddtr/fpga/Ddtr.h" 27 #include "panda/Error.h" 28 #include "panda/Log.h" 29 #ifdef ENABLE_SKA_RABBIT 30 #include "panda/arch/altera/DevicePointer.h" 31 #include "panda/arch/altera/Aocx.h" 32 #include "panda/arch/altera/Kernel.h" 33 #include "panda/arch/altera/DeviceCopy.h" 34 #include "rabbit/utils/ImageManager.h" 35 #include "rabbit/ddtr/Ddtr.h" 36 #include "rabbit/ddtr/opencl/input_params.h" 37 #include "rabbit/ddtr/opencl/kernel_params.h" 38 #endif // ENABLE_SKA_RABBIT 46 #ifdef ENABLE_SKA_RABBIT 47 template<
typename BufferType,
typename DmTrialsType>
48 DdtrWorker<BufferType, DmTrialsType>::DdtrWorker(ddtr::Config
const& config, panda::PoolResource<Fpga>
const& device)
52 ska::rabbit::ddtr::Ddtr ddtr;
53 std::string str=_device.device_name();
54 str.resize(str.find(
':')-1);
55 std::string filename=ddtr.image_manager().image_path(str);
56 PANDA_LOG <<
"\nDevice: "<<_device.device_name();
57 PANDA_LOG<<
"\nCompiled binary: "<<filename<<
"\n";
58 ska::panda::altera::Aocx aocx(filename);
59 ska::panda::altera::Program program(aocx,_device);
60 _command_queue.reset(
new panda::altera::CommandQueue(_device));
61 _command_queue_2.reset(
new panda::altera::CommandQueue(_device));
62 _kernel.reset(
new panda::altera::Kernel(
"dedisperse", program));
63 _kernel_2.reset(
new panda::altera::Kernel(
"dedisperse", program));
65 PANDA_LOG <<
"DdtrWorker constructed";
67 #else // ENABLE_SKA_RABBIT 68 template<
typename BufferType,
typename DmTrialsType>
69 DdtrWorker<BufferType, DmTrialsType>::DdtrWorker(ddtr::Config
const&, panda::PoolResource<Fpga>
const& )
72 #endif // ENABLE_SKA_RABBIT 74 template<
typename BufferType,
typename DmTrialsType>
75 DdtrWorker<BufferType, DmTrialsType>::~DdtrWorker()
79 #ifdef ENABLE_SKA_RABBIT 80 template<
typename BufferType,
typename DmTrialsType>
81 std::shared_ptr<DmTrialsType> DdtrWorker<BufferType, DmTrialsType>::operator()(BufferType
const& data, std::vector<double> _dm_factors, std::size_t _max_delay)
83 auto const& tf_obj = *(data.composition().front());
84 std::size_t nchans = tf_obj.number_of_channels();
85 std::size_t nsamples = data.data_size() / nchans;
86 data::DimensionIndex<data::Time> offset_samples(data.offset_first_block()/(nchans *
sizeof(NumericalRep)));
87 auto const& start_time = tf_obj.start_time(offset_samples);
90 _dm_trial_metadata.reset(
new data::DmTrialsMetadata(tf_obj.sample_interval(), nsamples - _max_delay));
91 for (
auto dm: this->_config.dm_trials())
93 _dm_trial_metadata->emplace_back(dm, 1);
97 std::size_t dm_start=this->_config.dm_trials()[0].value();
98 std::size_t dm_step=this->_config.dm_trials()[1].value()-this->_config.dm_trials()[0].value();
99 std::size_t dm_size=this->_config.dm_trials().size();
102 panda::Buffer<float> dm_shift_Buffer_1(nchans);
103 panda::Buffer<float> dm_shift_Buffer_2(nchans);
104 size_t output_total = 3*nsamples * TDMS *
sizeof(
unsigned int);
105 panda::Buffer<unsigned char> output_buffer(output_total);
106 std::fill(output_buffer.begin(),output_buffer.end(),0);
109 panda::altera::DevicePointer<NumericalRep> buff_in(_device, nsamples*nchans*
sizeof(
unsigned char),*_command_queue);
110 panda::copy(data.cbegin(), data.cend(), buff_in.begin());
111 panda::altera::DevicePointer<NumericalRep> buff_1(_device,
sizeof(
float)*nchans,*_command_queue);
112 panda::altera::DevicePointer<NumericalRep> buff_2(_device,
sizeof(
float)*nchans,*_command_queue);
115 panda::altera::DevicePointer<NumericalRep> buff_out(_device, nsamples*TDMS*
sizeof(
unsigned int),*_command_queue_2);
116 panda::altera::DevicePointer<NumericalRep> buff_out_2(_device, nsamples*TDMS*
sizeof(
unsigned int),*_command_queue_2);
120 int dsamp=1;
float dm1=dm_start, dm2=(TDMS/2)*dm_step;
121 for (
int c = 0; c < nchans; ++c) {
122 float shift = _dm_factors[c];
123 *(dm_shift_Buffer_1.begin()) = shift * ((
float) dm_step * (float) (1/TSAMP) / (float) dsamp);
124 *(dm_shift_Buffer_2.begin()) = shift * ((
float) dm_step * (float) (1/TSAMP) / (float) dsamp);
128 panda::copy(dm_shift_Buffer_1.begin(), dm_shift_Buffer_1.end(), buff_1.begin());
129 panda::copy(dm_shift_Buffer_2.begin(), dm_shift_Buffer_2.end(), buff_2.begin());
133 (*_kernel)(*_command_queue,1, 1, (&*buff_1), (&*buff_2),(&*buff_in),(&*buff_out), dm1, dm2, dsamp);
135 panda::copy(buff_out.begin(), buff_out.end(), output_buffer.begin());
139 (*_kernel_2)(*_command_queue,1, 1, (&*buff_1), (&*buff_2),(&*buff_in),(&*buff_out_2), dm1, dm2, dsamp);
141 panda::copy(buff_out_2.begin(), buff_out.end(), output_buffer.end());
144 PANDA_LOG <<
"DM trials completed";
147 std::shared_ptr<DmTrialsType> dmtime = DmTrialsType::make_shared(_dm_trial_metadata, start_time);
148 DmTrialsType& dmtrials = *(dmtime);
149 NumericalRep
const* tf_data_fpga =
static_cast<NumericalRep const*
>(output_buffer.data());
152 int start_count=(int)dm_start, end_count=(
int)dm_size, samp_idx=0;
153 for(
int i = 0; i < 2; ++i)
154 for (
int dm_count = 0; dm_count < TDMS; ++dm_count) {
156 for(
int j = 0; j < (nsamples - _max_delay)/NUM_REGS; ++j) {
157 for (
int r = 0; r < NUM_REGS; r++) {
158 std::size_t input_idx = i*nsamples*TDMS + (j * (TDMS * NUM_REGS)) + (dm_count*NUM_REGS) + r;
159 float acc=(float) tf_data_fpga[input_idx] / (
float) nchans;
160 if( ((dm_count+i*TDMS) >= start_count) && ((dm_count+i*TDMS) < end_count) )
162 dmtrials[dm_count][samp_idx] = acc;
168 PANDA_LOG <<
"DM trials copied";
171 #else // ENABLE_SKA_RABBIT 172 template<
typename BufferType,
typename DmTrialsType>
173 std::shared_ptr<DmTrialsType> DdtrWorker<BufferType, DmTrialsType>::operator()(BufferType
const&, std::vector<double>, std::size_t)
175 PANDA_LOG_ERROR <<
"call to Ddtr::fpga module without compiling in the fpga modules";
176 std::shared_ptr<DmTrialsType> dmtime;
179 #endif // ENABLE_SKA_RABBIT Some limits and constants for FLDO.