27 #include "cheetah/data/cuda/src/kernels/corner_turn_kernels.cu" 28 #include "cheetah/data/cuda/CornerTurn.h" 30 #include <cuda_runtime.h> 37 template <
typename InputNumericalT,
typename OutputNumericalT>
38 void corner_turn_impl(
const InputNumericalT* d_input, OutputNumericalT* d_output, std::size_t first_dimension, std::size_t second_dimension)
40 int max_threads_per_block;
42 cudaGetDevice(&device);
43 cudaDeviceGetAttribute(&max_threads_per_block,
44 cudaDevAttrMaxThreadsPerBlock, device);
45 int num_blocks = (first_dimension*second_dimension)/max_threads_per_block;
46 if((first_dimension*second_dimension)%max_threads_per_block != 0) num_blocks++;
47 simple_corner_turn_kernel<<<num_blocks, max_threads_per_block>>>(d_input, d_output, first_dimension, second_dimension);
48 cudaDeviceSynchronize();
51 void corner_turn(
const uint8_t* d_input, uint8_t* d_output, std::size_t first_dimension, std::size_t second_dimension)
53 corner_turn_impl(d_input, d_output, first_dimension, second_dimension);
56 void corner_turn(
const uint8_t* d_input, uint16_t* d_output, std::size_t first_dimension, std::size_t second_dimension)
58 corner_turn_impl(d_input, d_output, first_dimension, second_dimension);
61 void corner_turn(
const uint8_t* d_input,
float* d_output, std::size_t first_dimension, std::size_t second_dimension)
63 corner_turn_impl(d_input, d_output, first_dimension, second_dimension);
66 void corner_turn(
const uint16_t* d_input, uint8_t* d_output, std::size_t first_dimension, std::size_t second_dimension)
68 corner_turn_impl(d_input, d_output, first_dimension, second_dimension);
71 void corner_turn(
const uint16_t* d_input, uint16_t* d_output, std::size_t first_dimension, std::size_t second_dimension)
73 corner_turn_impl(d_input, d_output, first_dimension, second_dimension);
76 void corner_turn(
const uint16_t* d_input,
float* d_output, std::size_t first_dimension, std::size_t second_dimension)
78 corner_turn_impl(d_input, d_output, first_dimension, second_dimension);
81 void corner_turn(
const float* d_input, uint8_t* d_output, std::size_t first_dimension, std::size_t second_dimension)
83 corner_turn_impl(d_input, d_output, first_dimension, second_dimension);
86 void corner_turn(
const float* d_input, uint16_t* d_output, std::size_t first_dimension, std::size_t second_dimension)
88 corner_turn_impl(d_input, d_output, first_dimension, second_dimension);
91 void corner_turn(
const float* d_input,
float* d_output, std::size_t first_dimension, std::size_t second_dimension)
93 corner_turn_impl(d_input, d_output, first_dimension, second_dimension);
Some limits and constants for FLDO.