UFJF - Machine Learning Toolkit  0.51.8
valid/Validation.hpp
Go to the documentation of this file.
1 
6 #ifndef VALIDATION__HPP
7 #define VALIDATION__HPP
8 #pragma once
9 
13 #include "ufjfmltk/core/Data.hpp"
15 
17 namespace mltk::validation {
21  struct ValidationReport : public Solution {
22  // Attributes
24  double accuracy = 0.0;
25  double error = 0.0;
27  double precision = 0.0;
29  double recall = 0.0;
30  double sensitivity{0.0}, specificity{0.0};
31  double fscore{0.0};
32  size_t errors{0};
33  size_t tp{0}, tn{0}, fp{0}, fn{0};
35  double tnrate = 0.0;
37  double tprate = 0.0;
39  size_t falsePositive = 0;
41  size_t falseNegative = 0;
43  size_t trueNegative = 0;
45  size_t truePositive = 0;
46 
47  void print() const{
48  std::cout << "True positives: " << tp << std::endl;
49  std::cout << "True negatives: " << tn << std::endl;
50  std::cout << "False positives: " << fp << std::endl;
51  std::cout << "False negatives: " << fn << std::endl;
52  std::cout << "Precision: " << precision << std::endl;
53  std::cout << "Recall: " << precision << std::endl;
54  std::cout << "F1-score: " << precision << std::endl;
55  std::cout << "Accuracy: " << accuracy << std::endl;
56  }
57  };
58 
63  size_t qtde = 0;
64  int fold = 0;
65  int jump = 0;
66  std::vector<unsigned int> seed;
67 
68  double initial_error = 0.0;
69  double actual_error = 0.0;
70  double limit_error = 0.0;
71  };
72 
76  template <typename T>
77  struct TrainTestPair{
82  size_t fold = 0;
83  size_t execution = 0;
84 
85  TrainTestPair() = default;
87  this->train.copy(train);
88  this->test.copy(test);
89  }
90  };
91 
98  template< typename T >
99  std::vector<std::vector<size_t> > generateConfusionMatrix(Data< T > &samples, Learner< T > &learner);
100 
109  template <typename T>
110  inline ValidationReport metricsReport(const Data<T>& data, const std::vector<std::vector<size_t> > &cfm,
111  std::vector<int> positive_labels = std::vector<int>());
112 
113  /*
114  * \brief computes the classifier accuracy based on the data passed.
115  * \param data Data to make predictions on.
116  * \param model Classifier model to make predictions.
117  * \param trained if set to false, will train the classifier model.
118  * \return Accuracy of the model on passed data.
119  */
120  template<typename T, typename Classifier>
121  inline double accuracy(const Data<T>& data, Classifier& model, bool trained = true);
122 
128  inline double confusionMatrixAccuracy(const std::vector<std::vector<size_t> > &conf_matrix);
129 
130  inline ValidationReport classificationReport(const Point<int> &real, const Point<int> &predicted);
131 
141  template< typename T >
142  std::vector<TrainTestPair<T>> kfoldsplit(Data<T> &samples, size_t folds=5, bool stratified=true, bool keepIndex=true,
143  size_t seed=0);
144 
155  template< typename T >
156  std::vector<TrainTestPair<T>> kfoldsplit(Data<T> &samples, size_t folds, size_t qtde,
157  bool stratified=true, bool keepIndex=true, size_t seed=0);
158 
166  template<typename T>
167  TrainTestPair<T> partTrainTest(Data<T> &data, size_t fold, bool stratified=true, bool keepIndex=true, size_t seed=0);
168 
177  template <typename T>
178  ValidationReport kfold (Data<T> sample, classifier::Classifier<T> &classifier, size_t fold,
179  bool stratified=true, size_t seed=0, int verbose=0);
180 
192  template <typename T>
193  ValidationReport kkfold(Data<T> samples, classifier::Classifier<T> &classifier, size_t qtde,
194  size_t fold, bool stratified = true, size_t seed = 0, int verbose = 0);
195 
196  /*********************************************
197  * Implementation *
198  *********************************************/
199 
200  template <typename T>
201  ValidationReport kkfold(Data<T> samples, classifier::Classifier<T> &classifier, const size_t qtde,
202  const size_t fold, bool stratified, const size_t seed, const int verbose){
203  size_t _seed = (seed == 0) ? std::random_device{}() : seed;
204  auto valid_pair = partTrainTest(samples, fold, true, true, _seed);
205  int i;
206  size_t fp = 0, fn = 0, tp = 0, tn = 0, erro=0;
207  double error = 0, errocross = 0, func = 0.0, margin = 0, bias;
208  std::vector<double> w;
209  auto classes = samples.classes();
210  ValidationReport solution;
211 
212  //sample = train_sample;
213 
214  /*cross-validation*/
215  if(qtde > 0)
216  {
217  for(errocross = 0, i = 0; i < qtde; i++)
218  {
219  if(verbose) std::cout << "\nExecucao " << i + 1 << " / " << qtde << ":\n";
220  errocross += kfold(samples, classifier, fold, stratified, _seed + i, verbose).error;
221  }
222  if(verbose >= 1)std::cout << "\n\nErro " << fold << "-Fold Cross Validation: " << errocross/qtde << "%\n";
223  solution.accuracy = 100.0 - errocross/qtde;
224  solution.error = 100.0 - solution.accuracy;
225  solution.precision /= qtde*fold;
226  solution.recall /= qtde*fold;
227  solution.tnrate /= qtde*fold;
228  solution.falseNegative /= qtde*fold;
229  solution.falsePositive /= qtde*fold;
230  solution.trueNegative /= qtde*fold;
231  solution.truePositive /= qtde*fold;
232  }
233 
234  /*start final validation*/
235  if(verbose)
236  {
237  std::cout << "\nFinal Validation:\n";
238  std::cout << "Pts de Treino: " << valid_pair.train.size() << "\n";
239  std::cout << "Pts de Teste: " << valid_pair.test.size() << "\n";
240  }
241  //training
242  valid_pair.train.setClasses(samples.classes());
243  classifier.setSamples(mltk::make_data<T>(valid_pair.train));
244  classifier.setVerbose(0);
245 
246  bool isDual = (classifier.getFormulationString() == "Dual");
247  classifier.setSeed(seed);
248  if(!isDual){
249  if(!classifier.train()){
250  if(verbose)
251  std::cerr << "Validation error: The convergency wasn't reached in the training set!\n";
252  }
253  Solution s = classifier.getSolution();
254  w = s.w.X();
255  bias = s.bias;
256 
257  i = 0;
258  for(auto it = valid_pair.test.begin(); it != valid_pair.test.end(); it++, i++){
259  auto point = (*it);
260  double _y = classifier.evaluate(*point);
261 
262  if(point->Y() != _y){
263  if(verbose > 1)
264  std::cerr << "[" << i+1 << "x] function: " << _y << ", y: " << point->Y() << std::endl;
265  erro++;
266  if(classes.size() == 2 && point->Y() == -1) fp++; else fn++;
267  }else{
268  if(verbose > 1)
269  std::cerr << "[" << i+1 << "] function: " << _y << ", y: " << point->Y() << std::endl;
270  if(classes.size() == 2 && point->Y() == -1) tn++; else tp++;
271  }
272  }
273  }else{
274  /*testing imadual and smo*/
275  classifier::DualClassifier< T > *dual = dynamic_cast<classifier::DualClassifier< T > *>(&classifier);
276  dMatrix matrix;
277  std::shared_ptr<Data< T > > traintest_sample(mltk::make_data<T>());
278 
279  *traintest_sample = valid_pair.test;
280  traintest_sample->join(valid_pair.train);
281  traintest_sample->setClasses(classes);
282  dual->setSamples(traintest_sample);
283  if(!dual->train()){
284  if(verbose)
285  std::cerr << "Validation error: The convergency wasn't reached in the training set!\n";
286  }
287 
288  size_t i = 0;
289  for(auto it = valid_pair.test.begin(); it != valid_pair.test.end(); it++, i++){
290  auto point = (*it);
291  double _y = dual->evaluate(*point);
292 
293  if(point->Y() != _y){
294  if(verbose > 1)
295  std::cerr << "[" << i+1 << "x] function: " << _y << ", y: " << point->Y() << std::endl;
296  erro++;
297  if(point->Y() == -1 && classes.size() == 2) fp++; else fn++;
298  }else{
299  if(verbose > 1)
300  std::cerr << "[" << i+1 << "] function: " << _y << ", y: " << point->Y() << std::endl;
301  if(point->Y() == -1 && classes.size() == 2) tn++; else tp++;
302  }
303  }
304  if(verbose) std::cout.flush();
305  }
306 
307  if(verbose >= 1) std::cout << "Validation Error: " << erro << " -- " << ((double)erro/(double) valid_pair.test.size()) * 100.0f << "%\n";
308  return solution;
309  }
310 
311  template <typename T>
312  ValidationReport kfold (Data<T> sample, classifier::Classifier<T> &classifier, const size_t fold,
313  bool stratified, const size_t seed, const int verbose){
314  double error = 0.0;
315  std::vector<double> error_arr(fold);
316  auto classes = sample.classes();
317  size_t _seed = (seed == 0) ? std::random_device{}() : seed;
318  sample.shuffle(_seed);
319  std::vector<TrainTestPair<T>> folds = kfoldsplit(sample, fold, stratified, true, seed);
320  ValidationReport solution;
321 
322  //Start cross-validation
323  for(size_t fp = 0, fn = 0, tp = 0, tn = 0, j = 0; j < fold; ++j){
324  auto _test_sample = folds[j].test;
325  auto _train_sample = folds[j].train;
326 
327  if(verbose){
328  std::cout << "\nCross-Validation " << j + 1 << ": \n";
329  std::cout << "Train points: " << _train_sample.size() << std::endl;
330  std::cout << "Train distribution: " << mltk::Point<size_t>(_train_sample.classesDistribution()) << std::endl;
331  std::cout << "Test points: " << _test_sample.size() << std::endl;
332  std::cout << "Test distribution: " << mltk::Point<size_t>(_test_sample.classesDistribution()) << std::endl;
333  std::cout << std::endl;
334  }
335 
336  // Training phase
337  classifier.setSamples(_train_sample);
338  Solution s = classifier.getSolution();
339  bool isDual = classifier.getFormulationString() == "Dual";
340  classifier.setSeed(_seed);
341  if(!isDual){
342  if(!classifier.train()){
343  if(verbose){
344  std::cerr << "Error at " << fold << "-fold: The convergency wasn't reached at the set " << j+1 << "!\n";
345  }
346  }
347 
348  size_t i = 0;
349  for(auto it = _test_sample.begin(); it != _test_sample.end(); it++, i++){
350  auto point = (*it);
351  double _y = classifier.evaluate(*point);
352 
353  if(point->Y() != _y){
354  if(verbose > 1)
355  std::cerr << "[" << i+1 << "x] function: " << _y << ", y: " << point->Y() << std::endl;
356  error_arr[j]++;
357  if(classes.size() == 2 && point->Y() == -1) fp++; else fn++;
358  }else{
359  if(verbose > 1)
360  std::cerr << "[" << i+1 << "] function: " << _y << ", y: " << point->Y() << std::endl;
361  if(classes.size() == 2 && point->Y() == -1) tn++; else tp++;
362  }
363  }
364  }else{
365  classifier::DualClassifier< T > *dual = dynamic_cast<classifier::DualClassifier< T > *>(&classifier);
366  std::shared_ptr<Data< T > > traintest_sample(std::make_shared<Data< T > >());
367  traintest_sample = mltk::make_data<T>(_test_sample);
368  traintest_sample->join(_train_sample);
369  traintest_sample->setClasses(classes);
370  dual->setSamples(traintest_sample);
371  dual->recomputeKernel();
372  if(!dual->train()){
373  if(verbose)
374  std::cerr << "Validation error: The convergency wasn't reached in the training set!\n";
375  }
376 
377  size_t i = 0;
378  for(auto it = _test_sample.begin(); it != _test_sample.end(); it++, i++){
379  auto point = (*it);
380  double _y = dual->evaluate(*point);
381  if(point->Y() != _y){
382  if(verbose > 1)
383  std::cerr << "[" << i+1 << "x] function: " << _y << ", y: " << point->Y() << std::endl;
384  error_arr[j]++;
385  if(point->Y() == -1 && classes.size() == 2) fp++; else fn++;
386  }else{
387  if(verbose > 1)
388  std::cerr << "[" << i+1 << "] function: " << _y << ", y: " << point->Y() << std::endl;
389  if(point->Y() == -1 && classes.size() == 2) tn++; else tp++;
390  }
391  }
392 
393  }
394 
395  if(verbose) std::cout << "Error " << j + 1 << ": " << error_arr[j] << " -- " << ((double)error_arr[j]/(double) _test_sample.size()) * 100.0f << "%\n";
396  error += ((double)error_arr[j]/(double) _test_sample.size()) * 100.0f;
397  if(classes.size() == 2){
398  solution.accuracy += (double)(tp + tn)/(double)(tp + tn + fp + fn);
399  solution.precision += (double)tp/(double)(tp + fp);
400  solution.recall += (double)tp/(double)(tp + fn);
401  solution.tnrate += (double)tn/(double)(tn + fp);
402  solution.falseNegative += fn;
403  solution.falsePositive += fp;
404  solution.trueNegative += tn;
405  solution.truePositive += tp;
406  }
407  }
408  solution.error = (((double)error)/(double)fold);
409  solution.accuracy = 100.0 - solution.error;
410  return solution;
411  }
412 
413  template<typename T>
414  TrainTestPair<T> partTrainTest(Data<T> &data, const size_t fold, bool stratified, bool keepIndex, const size_t seed) {
415  mltk::Data<T> samples = data.copy();
416  size_t _seed = (seed == 0) ? std::random_device{}() : seed;
417  std::vector<Data<T> > folds = samples.splitSample(fold, stratified, keepIndex, seed);
418  TrainTestPair<T> result;
419 
420  for(auto it = folds.begin(); it != folds.end()-1; it++){
421  auto _data = *it;
422  for(auto p = _data.begin(); p != _data.end(); p++){
423  auto point = *p;
424  result.train.insertPoint(point, keepIndex);
425  }
426  }
427  size_t last_fold = folds.size()-1;
428  for(auto it = folds[last_fold].begin(); it != folds[last_fold].end(); it++){
429  auto point = *it;
430  result.test.insertPoint(point, keepIndex);
431  }
432 
433  result.train.setName(data.name()+"_train");
434  result.train.shuffle(_seed);
435  result.test.setName(data.name()+"_test");
436  result.test.shuffle(_seed);
437 
438  return result;
439  }
440 
441  template< typename T >
442  std::vector<TrainTestPair<T>> kfoldsplit(Data<T> &samples, const size_t folds, const size_t qtde,
443  bool stratified, bool keepIndex, const size_t seed) {
444  std::vector<TrainTestPair<T> > kkfold_split;
445  size_t _seed = (seed == 0) ? std::random_device{}() : seed;
446 
447  kkfold_split.reserve(qtde*folds);
448  for(int i = 0; i < qtde; i++){
449  size_t other_seed = (seed == 0) ? std::random_device{}() : _seed+i;
450  auto kfold_split = kfoldsplit(samples, folds, stratified, keepIndex, other_seed);
451 
452  for(size_t j = 0; j < kfold_split.size(); j++){
453  kfold_split[j].execution = i+1;
454  }
455 
456  kkfold_split.insert(kkfold_split.end(), kfold_split.begin(), kfold_split.end());
457  }
458  return kkfold_split;
459  }
460 
461  template< typename T >
462  std::vector<TrainTestPair<T>> kfoldsplit(Data<T> &data, const size_t folds, bool stratified, bool keepIndex,
463  const size_t seed){
464  mltk::Data<T> samples = data.copy();
465  size_t _seed = (seed == 0) ? std::random_device{}() : seed;
466 
467  std::vector<Data<T> > data_folds = samples.splitSample(folds, stratified, keepIndex, seed);
468  std::vector<TrainTestPair<T> > kfold_split;
469 
470  kfold_split.reserve(folds);
471 
472  for(int i = 0; i < folds; i++){
473  Data<T> train;
474  train.join(data_folds[i]);
475 
476  int gone = 0;
477  int next_j=(i+2) % folds;
478 
479  for(int j = (i+1) % folds; gone < folds-2; gone++, j = (j+1) % folds){
480  train.join(data_folds[j]);
481  next_j = (j+1) % folds;
482  }
483 
484  auto test = data_folds[(next_j)%folds];
485 
486  train.shuffle(_seed+i);
487  test.shuffle(_seed+i);
488 
489  train.setName(data.name() + "_train_fold_"+std::to_string(i));
490  test.setName(data.name() + "_test_fold_"+std::to_string(i));
491 
492  train.resetIndex();
493  test.resetIndex();
494 
495  train.setName(data.name()+"_train_fold_"+std::to_string(i+1));
496  test.setName(data.name()+"_test_fold_"+std::to_string(i+1));
497 
498  kfold_split.emplace_back(train, test);
499  kfold_split.back().fold = i+1;
500  }
501  return kfold_split;
502  }
503 
504  inline double confusionMatrixAccuracy(const std::vector<std::vector<size_t> > &conf_matrix){
505  double errors = 0, total = 0;
506  for(size_t i = 0; i < conf_matrix.size(); i++){
507  for(size_t j = 0; j < conf_matrix[i].size(); j++){
508  if(i != j){
509  errors += conf_matrix[i][j];
510  }
511  total += conf_matrix[i][j];
512  }
513  }
514  return (1 - errors/total)*100;
515  }
516 
517  template<typename T, typename Classifier>
518  inline double accuracy(const Data<T>& data, Classifier& model, bool trained){
519  if(!trained){
520  model.train();
521  }
522  double acc = 0.0;
523  for(int i = 0; i < data.size(); i++){
524  auto point = data(i);
525  if(model.evaluate(point) == point.Y()){
526  acc++;
527  }
528  }
529  return acc/data.size();
530  }
531 
532  template <typename T>
533  inline ValidationReport metricsReport(const Data<T>& data, const std::vector<std::vector<size_t> > &cfm,
534  std::vector<int> positive_labels){
535  auto classes = data.classes();
536  ValidationReport report;
537 
538  std::vector<char> is_positive;
539 
540  if(positive_labels.empty()){
541  is_positive.resize(classes.size(), true);
542  }else{
543  is_positive.resize(classes.size(), false);
544  std::for_each(positive_labels.begin(), positive_labels.end(), [&](const int& label){
545  int pos = std::find(classes.begin(), classes.end(), label) - classes.begin();
546  assert((pos < classes.size()) && "Label not found.");
547  is_positive[pos] = true;
548  });
549  }
550 
551  for(int i = 0; i < cfm.size(); i++){
552  for(int j = 0; j < cfm.size(); j++){
553  if(i == j){
554  if(is_positive[i]){
555  report.tp += cfm[i][j];
556  }else{
557  report.tn += cfm[i][j];
558  }
559  }else{
560  if(is_positive[i]){
561  report.fn += cfm[i][j];
562  }else{
563  report.fp += cfm[i][j];
564  }
565  }
566  }
567  }
568 
569  report.accuracy = (double)report.tp / data.size();
570  report.error = 1.0 - report.accuracy;
571  report.errors = report.fp+report.fn;
572  report.sensitivity = (double)report.tp/(report.tp + report.fn);
573  report.specificity = (double)report.tn/(report.tn + report.fp);
574  report.precision = (double)report.tp/(report.tp + report.fp);
575  report.recall = report.sensitivity;
576  report.fscore = 2*report.precision*report.recall/(report.precision + report.recall);
577 
578  return report;
579  }
580 
581  template< typename T >
582  std::vector<std::vector<size_t> > generateConfusionMatrix(Data< T > &samples, Learner< T > &learner){
583  auto classes = samples.classes();
584  size_t size = samples.size(), i, j, idp, idy, n_classes = classes.size();
585  std::vector<std::vector<size_t> > confusion_m(n_classes, std::vector<size_t>(n_classes, 0));
586  double acc = 0.0;
587 
588  for(i = 0; i < size; i++){
589  int pred = learner.evaluate(*(samples[i]));
590  for(j = 0, idp = 0, idy = 0; j < n_classes; j++) {
591  if (classes[j] == pred) {
592  idp = j;
593  }
594  if (classes[j] == samples[i]->Y()) {
595  idy = j;
596  }
597  if (idp && idy) break;
598  }
599  confusion_m[idp][idy]++;
600  }
601  return confusion_m;
602  }
603 
604  ValidationReport classificationReport(const Point<int> &real, const Point<int> &predicted){
605  assert((real.size() == predicted.size()) && "Desired and predicted values points of different sizes. ");
606  ValidationReport report;
607  std::set<int> unique_labels;
608  for(int i = 0; i < real.size(); i++){
609  unique_labels.insert(real[i]);
610  unique_labels.insert(predicted[i]);
611  }
612  for(int i = 0; i < real.size(); i++){
613  if(real[i] == predicted[i]){
614  report.accuracy++;
615  if(unique_labels.size() == 2){
616  if(real[i] == 1){
617  report.tp++;
618  }else{
619  report.tn++;
620  }
621  }
622  }else if(unique_labels.size() == 2){
623  if(real[i] == 1){
624  report.fn++;
625  }else{
626  report.fp++;
627  }
628  }
629  }
630  report.accuracy /= real.size();
631  report.sensitivity = report.tp/(report.tp+report.fn);
632  report.specificity = report.tn/(report.fn+report.tn);
633  report.precision = report.tp/(report.tp+report.fp);
634  report.recall = report.tp/(report.tp+report.fn);
635  report.fscore = 2*(report.precision*report.recall)/(report.precision + report.recall);
636  return report;
637  }
638 }
639 #endif
bool insertPoint(const Data< T > &samples, int _index, bool keepIndex=false)
Insert a point to the data from another sample.
Definition: Data.hpp:1437
std::vector< Data< T > > splitSample(const std::size_t &split_size, bool stratified=true, bool keepIndex=false, size_t seed=0)
Split the data by a given size.
Definition: Data.hpp:1855
void join(const Data< T > &data)
Merge one dataset with another.
Definition: Data.hpp:1598
size_t size() const
Returns the size of the dataset.
Definition: Data.hpp:208
void resetIndex()
Reset the index vector.
Definition: Data.hpp:1759
std::string name() const
The name of the dataset is defined as the name ofthe file where it were loaded from.
Definition: Data.hpp:203
const std::vector< int > classes() const
Returns a vector containing the numeric values of the classes.
Definition: Data.hpp:1831
void setName(const std::string &name)
Set the name of the dataset.
Definition: Data.hpp:277
mltk::Data< T > copy() const
Returns a copy of itself.
Definition: Data.hpp:1551
void shuffle(const size_t &seed=42)
Shuffle the data with a given seed.
Definition: Data.hpp:1349
virtual void setSamples(const Data< T > &data)
setSamples Set the samples used by the Learner.
Definition: Learner.hpp:150
void setSeed(const size_t _seed)
Set the seed to be used by the learner.
Definition: Learner.hpp:145
virtual bool train()=0
Function that execute the training phase of a Learner.
virtual std::string getFormulationString()=0
getFormulationString Returns a string that represents the formulation of the learner (Primal or Dual)...
virtual double evaluate(const Point< T > &p, bool raw_value=false)=0
Returns the class of a feature point based on the trained Learner.
void setVerbose(int _verbose)
Set the level of verbose.
Definition: Learner.hpp:175
Wrapper for the point data.
Definition: Point.hpp:42
Rep const & X() const
Returns the attributes representation of the point (std::vector by default).
Definition: Point.hpp:139
std::size_t size() const
Returns the dimension of the point.
Definition: Point.hpp:133
Definition: Solution.hpp:13
double bias
Bias of the solution.
Definition: Solution.hpp:23
mltk::Point< double > w
Weights vector.
Definition: Solution.hpp:17
Definition: classifier/Classifier.hpp:17
Solution getSolution() const
getSolution Returns the solution of the classifier.
Definition: classifier/Classifier.hpp:52
Definition: DualClassifier.hpp:16
virtual double evaluate(const Point< T > &p, bool raw_value=false) override
Returns the class of a feature point based on the trained Learner.
Definition: DualClassifier.hpp:28
Validation methods namespace.
Definition: valid/Validation.hpp:17
double confusionMatrixAccuracy(const std::vector< std::vector< size_t > > &conf_matrix)
Compute the accuracy based on a confusion matrix.
Definition: valid/Validation.hpp:504
std::vector< std::vector< size_t > > generateConfusionMatrix(Data< T > &samples, Learner< T > &learner)
Compute the confusion matrix for a given trained classifier.
Definition: valid/Validation.hpp:582
ValidationReport metricsReport(const Data< T > &data, const std::vector< std::vector< size_t > > &cfm, std::vector< int > positive_labels=std::vector< int >())
Generates a report with classifiers metrics.
Definition: valid/Validation.hpp:533
TrainTestPair< T > partTrainTest(Data< T > &data, size_t fold, bool stratified=true, bool keepIndex=true, size_t seed=0)
Divide the samples in training and test set.
Definition: valid/Validation.hpp:414
std::vector< TrainTestPair< T > > kfoldsplit(Data< T > &samples, size_t folds=5, bool stratified=true, bool keepIndex=true, size_t seed=0)
Split the data in k folds.
Definition: valid/Validation.hpp:462
ValidationReport kkfold(Data< T > samples, classifier::Classifier< T > &classifier, size_t qtde, size_t fold, bool stratified=true, size_t seed=0, int verbose=0)
Executes the validation with several executions of the k fold algorithm.
Definition: valid/Validation.hpp:201
ValidationReport kfold(Data< T > sample, classifier::Classifier< T > &classifier, size_t fold, bool stratified=true, size_t seed=0, int verbose=0)
Executes k-fold stratified cross-validation.
Definition: valid/Validation.hpp:312
Structure to manage cross validation.
Definition: valid/Validation.hpp:62
A struct representing a pair with training and test data.
Definition: valid/Validation.hpp:77
Data< T > test
Test data.
Definition: valid/Validation.hpp:81
Data< T > train
Train data.
Definition: valid/Validation.hpp:79
Solution for the validation of a ML method.
Definition: valid/Validation.hpp:21
size_t trueNegative
Number of true negatives generated by the classifier in the evaluation.
Definition: valid/Validation.hpp:43
double precision
Precision of the validated model.
Definition: valid/Validation.hpp:27
double recall
Recall of the validated model.
Definition: valid/Validation.hpp:29
double tprate
True positive rate.
Definition: valid/Validation.hpp:37
size_t truePositive
Number of true positives generated by the classifier in the evaluation.
Definition: valid/Validation.hpp:45
double accuracy
Accuracy of the validated model.
Definition: valid/Validation.hpp:24
size_t falseNegative
Number of false negatives generated by the classifier in the evaluation.
Definition: valid/Validation.hpp:41
double tnrate
True negative rate.
Definition: valid/Validation.hpp:35
size_t falsePositive
Number of false positives generated by the classifier in the evaluation.
Definition: valid/Validation.hpp:39