24 #include "cheetah/fft/altera/detail/FftWorker.h" 25 #include "cheetah/fft/altera/Fft.h" 26 #include "panda/Error.h" 27 #include "panda/Log.h" 28 #include "panda/Copy.h" 30 #include "panda/arch/altera/DevicePointer.h" 31 #include "panda/arch/altera/DeviceCopy.h" 32 #endif // ENABLE_OPENCL 42 template <
typename T,
typename InputAlloc,
typename OutputAlloc>
43 void FftWorker::operator()(data::TimeSeries<cheetah::Fpga, T, InputAlloc>
const& input
44 , data::FrequencySeries<cheetah::Fpga, FftWorker::Complex<T>
45 , OutputAlloc>& output)
47 auto const& twiddles=_cxft.eight_million_point().twiddles();
48 if(input.size() > twiddles.size()){
49 panda::Error e(
"Invalid FFT size! Kernel compiled for the size: ");
53 size_t half_fft_size=twiddles.size()/2, sqrt_half_fft=int(sqrt(half_fft_size));
55 panda::altera::DevicePointer<Complex<T>> dev_in_ev(_device, half_fft_size, *_first_queue);
56 panda::altera::DevicePointer<Complex<T>> dev_in_od(_device, half_fft_size, *_first_queue);
57 panda::altera::DevicePointer<Complex<T>> dev_temp(_device
58 , half_fft_size,*_data_queue);
59 panda::altera::DevicePointer<Complex<T>> dev_sig_ev(_device
60 , half_fft_size, *_data_queue);
61 panda::altera::DevicePointer<Complex<T>> dev_sig_od(_device
62 , half_fft_size, *_data_queue);
63 panda::altera::DevicePointer<Complex<T>> dev_twd(_device
64 , twiddles.size(), *_last_queue);
65 panda::copy(twiddles.cbegin(), twiddles.cend(), dev_twd.begin());
71 (*_first_kernel)(*_first_queue, 1, 1,
static_cast<cl_mem
>(input.begin())
72 , (&*dev_in_ev), (&*dev_in_od), twiddles.size());
73 (*_first_queue).finish();
82 cl_int mangle_int=0, twidle_int=1, inverse_int=0;
83 cl_uint rows_arg=sqrt_half_fft, columns_arg=sqrt_half_fft;
84 cl_int log_rows_arg=log2(sqrt_half_fft), log_columns_arg=log2(sqrt_half_fft);
85 int columns = (1 << log_columns_arg);
86 int rows = (1 << log_rows_arg);
87 float delta_const = -2.0f * (float)M_PI / (columns * rows);
88 for (
int j = 0; j < 2; ++j){
89 for (
int i = 0; i < 2; ++i){
91 (*_fetch_kernel)(*_fetch_queue, 1, 1
92 , i == 0 ? (j == 0 ?(&*dev_in_ev):(&*dev_in_od)) : (&*dev_temp)
93 , mangle_int, twidle_int
94 , log_rows_arg, log_columns_arg, rows_arg, columns_arg);
95 (*_fetch_mwt_kernel)(*_fetch_mwt_queue,1, 1, log_rows_arg, log_columns_arg, twidle_int);
96 (*_fft_kernel)(*_fft_queue,1, 1, inverse_int
97 , log_rows_arg, log_columns_arg, rows_arg, columns_arg);
98 (*_transpose_mwt_kernel)(*_transpose_mwt_queue, 1, 1, log_rows_arg, log_columns_arg);
99 (*_transpose_kernel)(*_transpose_queue, 1, 1
100 , i == 0 ? (&*dev_temp) : (j == 0 ? (&*dev_sig_ev) :(&*dev_sig_od))
101 , mangle_int, twidle_int, inverse_int
102 , log_rows_arg, log_columns_arg, rows_arg, columns_arg, delta_const);
105 (*_transpose_mwt_queue).finish();
106 (*_transpose_queue).finish();
112 (*_last_kernel)(*&(_device.default_queue()), 1, 1
113 , (&*dev_sig_ev), (&*dev_sig_od), (&*dev_twd)
114 ,
static_cast<cl_mem
>(output.begin()), half_fft_size);
117 #else // ENABLE_OPENCL 118 template <
typename T,
typename InputAlloc,
typename OutputAlloc>
119 void FftWorker::operator()(data::TimeSeries<cheetah::Fpga , T, InputAlloc>
const&,
120 data::FrequencySeries<cheetah::Fpga, FftWorker::Complex<T>, OutputAlloc>&)
123 #endif // ENABLE_OPENCL Some limits and constants for FLDO.