Cheetah - SKA - PSS - Prototype Time Domain Search Pipeline
RfiGenerator.cpp
1 /*
2  * The MIT License (MIT)
3  *
4  * Copyright (c) 2016 The SKA organisation
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "../RfiGenerator.h"
25 #include <cmath>
26 #pragma GCC diagnostic push
27 #pragma GCC diagnostic ignored "-Wunused-parameter"
28 #include <boost/accumulators/accumulators.hpp>
29 #include <boost/accumulators/statistics/stats.hpp>
30 #include <boost/accumulators/statistics/mean.hpp>
31 #include <boost/accumulators/statistics/variance.hpp>
32 #pragma GCC diagnostic pop
33 
34 namespace ska {
35 namespace cheetah {
36 namespace generators {
37 
38 
39 template<typename T>
40 RfiGenerator<T>::RfiGenerator()
41  : TimeFrequencyGenerator<T>()
42 {
43 }
44 
45 template<typename T>
46 RfiGenerator<T>::~RfiGenerator()
47 {
48 }
49 
50 template<typename T>
51 void RfiGenerator<T>::next(DataType& data)
52 {
53  FlaggedDataType flagged_data(data);
54  this->next(flagged_data);
55 }
56 
57 template<typename T>
58 void RfiGenerator<T>::rfi_gaussian_block(FlaggedDataType& flagged_data
59  , std::size_t min_channel
60  , std::size_t max_channel
61  , std::size_t min_sample_number
62  , std::size_t max_sample_number
63  , float sigma
64  )
65 {
66  DataType& data = static_cast<DataType&>(flagged_data);
67  boost::accumulators::accumulator_set<double, boost::accumulators::stats<boost::accumulators::tag::variance,boost::accumulators::tag::mean> > acc;
68  for_each(data.begin(), data.end(), std::bind<void>(std::ref(acc), std::placeholders::_1));
69 
70  float mean = boost::accumulators::mean(acc);
71  float variance = boost::accumulators::variance(acc);
72  float stddev = sqrtf(variance);
73 
74  // Choose a peak value. This should depend on the size of the data, because the highest expected stochastic outlier
75  // obviously increases when you draw more samples from a normal distribution.
76  // General rule is sigma>=10 for strong RFI and less for weak. That should surely be detected and will very seldom be a noise
77  // peak, i.e. falsely flagged.
78  float max = mean + sigma * stddev;
79 
80  // Determine centre of data chunk.
81  int centre_sample = int((max_sample_number + min_sample_number)/2);
82  int centre_channel = int((max_channel + min_channel)/2);
83  float channel_gaussian_width = (max_channel - min_channel)/2.0;
84  float time_gaussian_width = ((max_sample_number - min_sample_number)/2.0);
85  float channel_gaussian_variance = powf(channel_gaussian_width/2.355, 2.0f);
86  float time_gaussian_variance = powf(time_gaussian_width/2.355, 2.0f);
87  auto & flags = flagged_data.rfi_flags();
88 
89  for (unsigned time_sample = min_sample_number; time_sample < max_sample_number; ++time_sample)
90  {
91  auto spectrum = data.spectrum(time_sample);
92  auto channel_it = spectrum.begin();
93  channel_it += min_channel;
94  unsigned channel_number = min_channel;
95  auto flag_slice = flags.spectrum(time_sample);
96  auto flag_it = flag_slice.begin();
97  flag_it += min_channel;
98  while(channel_number <= max_channel)
99  {
100  *channel_it += max * std::exp(-( (powf(float(channel_number) - float(centre_channel), 2.0f)/(2.0f*channel_gaussian_variance))
101  + (powf(float(time_sample) - float(centre_sample), 2.0f)/(2.0f*time_gaussian_variance))));
102  if(time_sample<centre_sample+std::floor(time_gaussian_width)+1 &&
103  time_sample>centre_sample-std::floor(time_gaussian_width)-1 &&
104  channel_number<centre_channel+std::floor(channel_gaussian_width)+1 &&
105  channel_number>centre_channel-std::floor(channel_gaussian_width)-1)
106  {
107  *flag_it = true;
108  }
109 
110  ++flag_it;
111  ++channel_it;
112  ++channel_number;
113  }
114  }
115 }
116 
117 template<typename T>
118 void RfiGenerator<T>::rfi_ramp_block(FlaggedDataType& flagged_data
119  , std::size_t min_channel
120  , std::size_t max_channel
121  , std::size_t min_sample_number
122  , std::size_t max_sample_number)
123 {
124  DataType& data = static_cast<DataType&>(flagged_data);
125  boost::accumulators::accumulator_set<double, boost::accumulators::stats<boost::accumulators::tag::variance,boost::accumulators::tag::mean> > acc;
126  for_each(data.begin(), data.end(), std::bind<void>(std::ref(acc), std::placeholders::_1));
127 
128  float mean = boost::accumulators::mean(acc);
129  float variance = boost::accumulators::variance(acc);
130  float stddev = sqrtf(variance);
131  // Choose a peak value. This should depend on the size of the data, because the highest expected stochastic outlier
132  // obviously increases when you draw more samples from a normal distribution.
133  // For now, I have chosen 10 standard deviations. That should surely be detected and will very seldom be a noise
134  // peak, i.e. falsely flagged.
135  float max = mean + 10.0f * stddev;
136  float slope = 1.0f/float(max_channel - min_channel);
137 
138  auto & flags = flagged_data.rfi_flags();
139 
140  for (unsigned time_sample = min_sample_number; time_sample < max_sample_number; ++time_sample)
141  {
142  auto spectrum = data.spectrum(time_sample);
143  auto channel_it = spectrum.begin();
144  channel_it += min_channel;
145  unsigned channel_number = min_channel;
146  auto flag_slice = flags.spectrum(time_sample);
147  auto flag_it = flag_slice.begin();
148  flag_it += min_channel;
149  while(channel_number <= max_channel)
150  {
151  *channel_it += max * slope * (channel_number - min_channel);
152  *flag_it=true;
153  ++flag_it;
154  ++channel_it;
155  ++channel_number;
156  }
157  }
158 }
159 
160 template<typename T>
161 void RfiGenerator<T>::rfi_broadband_block(FlaggedDataType& flagged_data
162  , std::size_t min_sample_number
163  , std::size_t max_sample_number
164  , float sigma
165  )
166 {
167  DataType& data = static_cast<DataType&>(flagged_data);
168  boost::accumulators::accumulator_set<double, boost::accumulators::stats<boost::accumulators::tag::variance,boost::accumulators::tag::mean> > acc;
169  for_each(data.begin(), data.end(), std::bind<void>(std::ref(acc), std::placeholders::_1));
170 
171  float mean = boost::accumulators::mean(acc);
172  float variance = boost::accumulators::variance(acc);
173  float stddev = sqrtf(variance);
174  std::size_t min_channel = 1;
175  std::size_t max_channel = flagged_data.number_of_channels()-1;
176 
177  //width of the gaussian pulse is taken as the half of the given time range. We can compute the width of the gaussian
178  float width = float((max_sample_number-min_sample_number)/2.0f);
179  float variance_of_gaussian = powf(width/2.355, 2.0f);
180 
181  // Choose a peak value. This should depend on the size of the data, because the highest expected stochastic outlier
182  // obviously increases when you draw more samples from a normal distribution.
183  // General rule is sigma>=10 for strong RFI and less for weak. That should surely be detected and will very seldom be a noise
184  // peak, i.e. falsely flagged.
185  float max = mean + sigma * stddev;
186 
187  // Determine centre of data chunk.
188  int centre_sample = int((max_sample_number + min_sample_number)/2);
189 
190  auto & flags = flagged_data.rfi_flags();
191 
192  for (unsigned time_sample = min_sample_number; time_sample < max_sample_number; ++time_sample)
193  {
194  auto spectrum = data.spectrum(time_sample);
195  auto channel_it = spectrum.begin();
196  channel_it += min_channel;
197  unsigned channel_number = min_channel;
198  auto flag_slice = flags.spectrum(time_sample);
199  auto flag_it = flag_slice.begin();
200  flag_it += min_channel;
201  while(channel_number <= max_channel)
202  {
203  *channel_it += max * std::exp(-(powf(float(time_sample) - float(centre_sample), 2.0f))/(2.0f*variance_of_gaussian));
204  if(time_sample<centre_sample+std::floor(width)+1 &&
205  time_sample>centre_sample-std::floor(width)-1)
206  {
207  *flag_it = true;
208  }
209 
210  ++flag_it;
211  ++channel_it;
212  ++channel_number;
213  }
214  }
215 }
216 
217 } // namespace generators
218 } // namespace cheetah
219 } // namespace ska
virtual void next(DataType &data) override
do nothing with the passed data
Some limits and constants for FLDO.
Definition: Brdz.h:35