15 template <
typename T >
class Data;
25 template <
typename T,
typename R >
39 template <
typename T,
typename R >
46 template <
typename T >
53 template <
typename T,
typename R >
68 template <
typename T,
typename R >
77 template <
typename T >
86 template <
typename T >
95 template <
typename T >
102 template <
typename T,
typename R >
104 assert(p.
size() > 0);
108 template <
typename T>
110 assert(feat < data.
dim());
112 for(
size_t i = 0; i < data.
size(); i++){
113 sum += (*data[i])[feat];
115 return (data.
size()>0)?sum/data.
size():0.0;
118 template <
typename T,
typename R >
120 assert(p.
size() > 0);
124 template <
typename T >
126 int i, size = data.
size();
128 std::vector<std::shared_ptr<Point< T > > > points = data.
points();
130 if(size == 1)
return 0.0;
134 for(sd = 0.0, i = 0; i < data.
size(); ++i){
135 sd += (points[i]->X()[feat] - avg)*(points[i]->X()[feat] - avg);
138 return std::sqrt(sd/(data.
size() - 1));
141 template <
typename T,
typename R >
143 assert(p.
size() > 0);
147 template <
typename T>
150 int dim = data.
dim(), size = data.
size();
152 std::vector<double> avg(dim);
153 std::vector<std::shared_ptr<Point< T > > > points = data.
points();
155 for(j = 0; j < dim; ++j){
156 if(feat < 0 || fnames[j] != feat){
158 for(i = 0; i < size; ++i){
159 avg[j] += points[i]->X()[j];
161 avg[j] = avg[j] / size;
166 for(i = 0; i < size; ++i){
168 for(j = 0; j < dim; ++j){
169 if(feat < 0 || fnames[j] != feat){
170 norm += std::pow(avg[j] - points[i]->X()[j], 2);
179 template <
typename T,
typename R >
185 template <
typename T >
187 int i = 0, j = 0, dim = data.
dim(), size = data.
size();
191 std::vector<double> avg(dim, 0.0);
192 std::vector<std::shared_ptr<Point< T > > > points = data.
points();
195 for(j = 0; j < dim; ++j){
196 if(feat < 0 || fnames[j] != feat){
198 for(i = 0; i < size; ++i){
199 avg[j] += points[i]->X()[j];
201 avg[j] = avg[j] / size;
205 for(
max = 0, i = 0; i < size; ++i){
206 for(norm = 0, j = 0; j < dim; ++j){
207 if(feat < 0 || fnames[j] != feat){
208 norm += std::pow(avg[j] - points[i]->X()[j], 2);
211 norm = std::sqrt(norm);
212 if(
max < norm)
max = norm;
216 for(
max = 0, i = 0; i < size; ++i){
217 for(j = 0; j < dim; ++j){
218 if(feat < 0 || fnames[j] != feat)
219 if(
max < fabs(points[i]->X()[j]))
220 max = fabs(points[i]->X()[j]);
228 template <
typename T >
230 int i = 0, j = 0, dim = data.
dim(), size = data.
size();
232 int size_pos = 0, size_neg = 0;
234 std::vector<double> avg_pos(dim, 0.0), avg_neg(dim, 0.0);
235 std::vector<std::shared_ptr<Point< T > > > points = data.
points();
237 for(size_pos = 0, size_neg = 0, i = 0; i < size; ++i){
238 if(points[i]->Y() == 1) size_pos++;
242 for(j = 0; j < dim; ++j){
243 for(i = 0; i < size; ++i){
244 if(points[i]->Y() == 1){
245 avg_pos[j] += points[i]->X()[j];
247 avg_neg[j] += points[i]->X()[j];
250 avg_pos[j] /= (double)size_pos;
251 avg_neg[j] /= (double)size_neg;
254 for(dist = 0.0, j = 0; j < dim; ++j){
255 if(feat < 0 || fnames[j] != feat)
256 dist += std::pow(avg_pos[j] - avg_neg[j], 2);
259 return std::sqrt(dist);
262 template <
typename T >
264 int i = 0, j = 0, dim = data.
dim(), size = data.
size();
266 int size_pos = 0, size_neg = 0, featsize = feats.size();
268 std::vector<double> avg_pos(dim, 0.0), avg_neg(dim, 0.0);
269 std::vector<std::shared_ptr<Point< T > > > points = data.
points();
271 for(size_pos = 0, size_neg = 0, i = 0; i < size; ++i){
272 if(points[i]->Y() == 1) size_pos++;
276 for(j = 0; j < dim; ++j){
277 for(i = 0; i < size; ++i){
278 if(points[i]->Y() == 1)
279 avg_pos[j] += points[i]->X()[j];
281 avg_neg[j] += points[i]->X()[j];
284 avg_pos[j] /= (double) size_pos;
285 avg_neg[j] /= (double) size_neg;
288 for(dist = 0.0, j = 0; j < dim; ++j){
289 for(i = 0; i < featsize; ++i){
290 if(fnames[j] == feats[i])
291 dist -= std::pow(avg_pos[j] - avg_neg[j], 2);
295 return std::sqrt(std::fabs(dist));
size_t size() const
Returns the size of the dataset.
Definition: Data.hpp:208
std::vector< int > getFeaturesNames() const
Returns the features names.
Definition: Data.hpp:1675
size_t dim() const
Returns the dimension of the dataset.
Definition: Data.hpp:213
std::vector< std::shared_ptr< Point< T > > > points()
Returns a shared pointer to the vector of Points of the sample.
Definition: Data.hpp:1685
Wrapper for the point data.
Definition: Point.hpp:42
T sum(const std::function< T(T)> &f=[](T const &t) { return t;}) const
Compute the sum of the components of the point.
Definition: Point.hpp:285
std::size_t size() const
Returns the dimension of the point.
Definition: Point.hpp:133
Namespace for statistical methods.
Definition: Statistics.hpp:19
double distCentersWithoutFeats(const Data< T > &data, const std::vector< int > &feats, int index)
Compute the distance between the centers of binary classes without given features.
Definition: Statistics.hpp:263
double mean(const mltk::Point< T, R > &p)
Compute the mean (average) of a point.
Definition: Statistics.hpp:103
double distCenters(const Data< T > &data, int feat)
Compute the distance between the centers of binary classes without given features.
Definition: Statistics.hpp:229
double covar(const mltk::Point< T, R > &p, const mltk::Point< T, R > &p1)
Compute the covariance between two points.
Definition: Statistics.hpp:180
double var(const mltk::Point< T, R > &p)
Compute the variance of a point.
Definition: Statistics.hpp:142
double std_dev(const mltk::Point< T, R > &p)
Compute the standard deviation of a point.
Definition: Statistics.hpp:119
double radius(const Data< T > &data, int feat, double q)
Returns radius of the ball that circ. the data.
Definition: Statistics.hpp:186
UFJF-MLTK main namespace for core functionalities.
Definition: classifier/Classifier.hpp:11
T max(const Point< T, R > &p)
Returns the max value of the point.
Definition: Point.hpp:544