Cheetah - SKA - PSS - Prototype Time Domain Search Pipeline
CornerTurn.cu
1 /*
2  * The MIT License (MIT)
3  *
4  * Copyright (c) 2016 The SKA organisation
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25  #ifdef ENABLE_CUDA
26 
27 #include "cheetah/data/cuda/src/kernels/corner_turn_kernels.cu"
28 #include "cheetah/data/cuda/CornerTurn.h"
29 #include <cuda.h>
30 #include <cuda_runtime.h>
31 
32 namespace ska {
33 namespace cheetah {
34 namespace data {
35 namespace cuda {
36 
37 template <typename InputNumericalT, typename OutputNumericalT>
38 void corner_turn_impl(const InputNumericalT* d_input, OutputNumericalT* d_output, std::size_t first_dimension, std::size_t second_dimension)
39 {
40  int max_threads_per_block;
41  int device;
42  cudaGetDevice(&device);
43  cudaDeviceGetAttribute(&max_threads_per_block,
44  cudaDevAttrMaxThreadsPerBlock, device);
45  int num_blocks = (first_dimension*second_dimension)/max_threads_per_block;
46  if((first_dimension*second_dimension)%max_threads_per_block != 0) num_blocks++;
47  simple_corner_turn_kernel<<<num_blocks, max_threads_per_block>>>(d_input, d_output, first_dimension, second_dimension);
48  cudaDeviceSynchronize();
49 }
50 
51 void corner_turn(const uint8_t* d_input, uint8_t* d_output, std::size_t first_dimension, std::size_t second_dimension)
52 {
53  corner_turn_impl(d_input, d_output, first_dimension, second_dimension);
54 }
55 
56 void corner_turn(const uint8_t* d_input, uint16_t* d_output, std::size_t first_dimension, std::size_t second_dimension)
57 {
58  corner_turn_impl(d_input, d_output, first_dimension, second_dimension);
59 }
60 
61 void corner_turn(const uint8_t* d_input, float* d_output, std::size_t first_dimension, std::size_t second_dimension)
62 {
63  corner_turn_impl(d_input, d_output, first_dimension, second_dimension);
64 }
65 
66 void corner_turn(const uint16_t* d_input, uint8_t* d_output, std::size_t first_dimension, std::size_t second_dimension)
67 {
68  corner_turn_impl(d_input, d_output, first_dimension, second_dimension);
69 }
70 
71 void corner_turn(const uint16_t* d_input, uint16_t* d_output, std::size_t first_dimension, std::size_t second_dimension)
72 {
73  corner_turn_impl(d_input, d_output, first_dimension, second_dimension);
74 }
75 
76 void corner_turn(const uint16_t* d_input, float* d_output, std::size_t first_dimension, std::size_t second_dimension)
77 {
78  corner_turn_impl(d_input, d_output, first_dimension, second_dimension);
79 }
80 
81 void corner_turn(const float* d_input, uint8_t* d_output, std::size_t first_dimension, std::size_t second_dimension)
82 {
83  corner_turn_impl(d_input, d_output, first_dimension, second_dimension);
84 }
85 
86 void corner_turn(const float* d_input, uint16_t* d_output, std::size_t first_dimension, std::size_t second_dimension)
87 {
88  corner_turn_impl(d_input, d_output, first_dimension, second_dimension);
89 }
90 
91 void corner_turn(const float* d_input, float* d_output, std::size_t first_dimension, std::size_t second_dimension)
92 {
93  corner_turn_impl(d_input, d_output, first_dimension, second_dimension);
94 }
95 
96 } //namespace cuda
97 } //namespace data
98 } //namespace cheetah
99 } //namespace ska
100 
101 #endif //ENABLE_CUDA
Some limits and constants for FLDO.
Definition: Brdz.h:35