Cheetah - SKA - PSS - Prototype Time Domain Search Pipeline
RebinInputData.cu
1 
2 /*
3  * The MIT License (MIT)
4  *
5  * Copyright (c) 2016 The SKA organisation
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in all
15  * copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  */
25 #include "cheetah/fldo/cuda/detail/FldoUtils.h"
26 #include <vector>
27 
28 
29 namespace ska {
30 namespace cheetah {
31 namespace fldo {
32 namespace cuda {
33 namespace util {
34 
35 /*
36  *
37  * void rebin_input_data(int start, int current, int nchannels, std::vector<CandidateRebin> const &rebin)
38  *
39  * @brief Sum-up adiacent time samples depending on the rebin value.
40  *
41  * @param start the starting rebinning index
42  * @param current the current rebinning index
43  * @param nchannels the number of freq. channels
44  * @param rebin the vector with the rebinning structures
45  *
46  * @return On failure throws a runtime_error exception.
47  */
48 void rebin_input_data(int start, int current, int nchannels, std::vector<CandidateRebin> const &rebin)
49 {
50  //dim3 threadsPerBlock; // number of kernel threads per block
51  //dim3 blocksPerGrid; // number of kernel blocks for grid
52 
53  int tile_dimx = 32; // tile dimension in x = num of threads in x
54  int tile_dimy = 32; // tile dimension in y = num of threads in y
55  int nblock_y = (nchannels + (tile_dimy - 1))/tile_dimy;
56  int nblock_x = (rebin[start].pitch_dim + (tile_dimx - 1))/tile_dimx;
57  dim3 grid(nblock_x, nblock_y);
58  dim3 threads(tile_dimx, tile_dimy);
59  PANDA_LOG_DEBUG << "Call to bin input data: threads = ("
60  << tile_dimx
61  << ", "
62  << tile_dimy
63  << ") blocks = ("
64  << nblock_x
65  << " , "
66  << nblock_y
67  << ")";
68  PANDA_LOG_DEBUG << "rebin_input_data: start binning: "
69  << start
70  << "("
71  << rebin[start].rebin
72  << ") current binning: "
73  << current
74  << "("
75  << rebin[current].rebin
76  << ")";
77  //get the ratio between the first and current prebin value
78  int prebin = rebin[current].rebin/rebin[start].rebin;
79  // we handle the prebinning. (we hope it not end inn a mess!)
80 
81  // binInputKernel is launched in a stream different from the default one.
82  // The folding on the candidates belonging to the current prebin list,
83  // has to wait for the end of this kernel. In corrispondence of this
84  // kernel it is registered and event to signal its ending.
85 #ifdef TRANSPOSE_SHFL
86  binInputKernel_shfl<<<grid,threads, 0, rebin[current].stream>>>
87  (rebin[current].d_out,
88  rebin[start].d_out,
89  rebin[start].pitch_dim,
90  rebin[current].pitch_dim,
91  prebin);
92 #else
93  int shared_memory_size = tile_dimx * tile_dimy * sizeof(float);
94  binInputKernel<<<grid,threads, shared_memory_size, rebin[current].stream>>>
95  (rebin[current].d_out,
96  rebin[start].d_out,
97  nchannels,
98  rebin[start].pitch_dim,
99  rebin[current].pitch_dim,
100  prebin);
101 #endif
102 
103  CUDA_ERROR_CHECK(cudaGetLastError());
104  CUDA_ERROR_CHECK(cudaEventRecord(rebin[current].event, rebin[current].stream));
105  PANDA_LOG_DEBUG << "Succes in rebinning kernel execution";
106 }
107 } // utils
108 } // namespace cuda
109 } // namespace fldo
110 } // namespace cheetah
111 } // namespace ska
Some limits and constants for FLDO.
Definition: Brdz.h:35