Cheetah - SKA - PSS - Prototype Time Domain Search Pipeline
FldoUtils.h
1 /*
2  * The MIT License (MIT)
3  *
4  * Copyright (c) 2016 The SKA organisation
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef SKA_CHEETAH_FLDO_TEST_UTILS_FLDOUTILS_H
25 #define SKA_CHEETAH_FLDO_TEST_UTILS_FLDOUTILS_H
26 
27 #ifdef ENABLE_CUDA
28 
29 #include "cheetah/fldo/Types.h"
30 #include "cheetah/cuda_utils/cuda_errorhandling.h"
31 #include "panda/Error.h"
32 #include "panda/Log.h"
33 #include "panda/arch/nvidia/CudaDevicePointer.h"
34 
35 namespace ska {
36 namespace cheetah {
37 namespace fldo {
38 namespace cuda {
39 namespace util {
40 
46 class CandidateRebin
47 {
48  public:
49  uint64_t msize; // memory size in bytes
50  int rebin; // rebinning factor: 1, 2, 4, 8,.. (power of 2)
51  int first; // index of the first candidate with current rebinning factor
52  int last; // index of the last candidate with current rebinning factor
53  int pitch_dim; // pitch dim to allocate device memory
54  float *d_out; // pointer to memory with transposed rebinned matrix
55  cudaStream_t stream;// cuda stream associated to the current rebin factor
56  cudaEvent_t event; // cuda event associated to the current stream
57  public:
58  CandidateRebin()
59  {
60  first = -1;
61  last = -1;
62  pitch_dim = -1;
63  msize = 0;
64  d_out = NULL;
65  //associate a CUDA event and a CUDA steam to each rebin object
66  CUDA_ERROR_CHECK(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
67  CUDA_ERROR_CHECK(cudaStreamCreate(&stream));
68  }
69  ~CandidateRebin() {
70  CUDA_ERROR_CHECK(cudaEventDestroy(event));
71  CUDA_ERROR_CHECK(cudaStreamDestroy(stream));
72  if (d_out) {
73  cudaFree(d_out);
74  }
75  };
76 };
77 
78 /*
79  * class GpuEvent
80  *
81  * Simple interface for start/stop timing events.
82  */
83 class GpuEvent
84 {
85  public:
86  GpuEvent()
87  {
88  CUDA_ERROR_CHECK(cudaEventCreate(&_start_kernel));
89  CUDA_ERROR_CHECK(cudaEventCreate(&_stop_kernel));
90  }
91  ~GpuEvent()
92  {
93  CUDA_ERROR_CHECK(cudaEventDestroy(_start_kernel));
94  CUDA_ERROR_CHECK(cudaEventDestroy(_stop_kernel));
95  }
100  void gpu_event_start()
101  {
102  CUDA_ERROR_CHECK(cudaEventRecord(_start_kernel, 0));
103  }
108  void gpu_event_stop()
109  {
110  CUDA_ERROR_CHECK(cudaEventRecord(_stop_kernel, 0));
111  }
116  float gpu_elapsed_time()
117  {
118  float gpu_time;
119  CUDA_ERROR_CHECK(cudaEventSynchronize(_stop_kernel));
120  cudaEventElapsedTime(&gpu_time, _start_kernel, _stop_kernel);
121  return gpu_time;
122  }
123 
124  private:
125  cudaEvent_t _start_kernel; // event to record kernel start
126  cudaEvent_t _stop_kernel; // event to record kernel stop
127 };
128 
129 /*
130  * class GpuStream
131  *
132  * Simple interface to handle kernel streams different from the default
133  * one.
134  */
135 class GpuStream
136 {
137  public:
138  GpuStream()
139  {
140  CUDA_ERROR_CHECK(cudaStreamCreate(&_stream));
141  }
142 
143  ~GpuStream()
144  {
145  CUDA_ERROR_CHECK(cudaStreamDestroy(_stream));
146  }
147 
153  cudaStream_t const &stream() const
154  {
155  return _stream;
156  }
157  private:
158  cudaStream_t _stream; // kernel stream different from the default
159 };
160 
161 typedef data::Candidate<Cpu, float> CandidateType;
162 typedef data::TimeType TimeType;
163 
164 void load_constant_data(double *delta_freq, double *nu, double *nudot, float *dm,
165  int *nbins, int nchannels, int nchan_per_subband, size_t nsubint, double tsamp,
166  int ncandidates);
167 
168 void rebin_input_data(int start, int current, int nchannels, std::vector<CandidateRebin> const &rebin);
169 
170 void fold_input_data(cudaDeviceProp properties, float *d_folded, float *d_weight, int *nbins,
171  CandidateRebin const &rebin, int ncand, int isubint, int nchannels,
172  int nsubbands, uint64_t nsamp_subslot, int default_max_phase, double tobs,
173  bool enable_split, cudaStream_t const exec_stream);
174 
175 void statistics_float(float *raw_in, int N, float *mean, float *rms, cudaStream_t stream);
176 
177 } // util
178 } // namespace cuda
179 } // namespace fldo
180 } // namespace cheetah
181 } // namespace ska
182 
183 #endif //ENABLE_CUDA
184 
185 #endif // SKA_CHEETAH_FLDO_TEST_UTILS_FLDOUTIL_H
Some limits and constants for FLDO.
Definition: Brdz.h:35