UFJF - Machine Learning Toolkit  0.51.8
BaggingClassifier.hpp
1 //
2 // Created by mateuscmarim on 19/11/2020.
3 //
4 
5 #pragma once
6 
7 #include "Ensemble.hpp"
8 
9 namespace mltk {
10  namespace ensemble {
11  template<typename T>
12  class BaggingClassifier : public Ensemble<T>, public classifier::Classifier<T> {
13  private:
14  size_t n_estimators = 10;
15  size_t seed = 0;
16 
17  public:
18  BaggingClassifier() = default;
19 
20  template<class Estimator>
21  explicit BaggingClassifier(const Data<T> &samples, const Estimator &estimator, size_t n_estimators = 10,
22  size_t seed = 0)
23  : n_estimators(n_estimators), seed(seed) {
24  this->samples = mltk::make_data<T>(samples);
25  this->m_learners.resize(n_estimators);
26  for (size_t i = 0; i < n_estimators; i++) {
27  this->m_learners[i] = std::make_shared<Estimator>(estimator);
28  }
29  }
30 
31  bool train() override {
32  seed = (seed == 0) ? std::random_device()() : seed;
33 
34  size_t samp_size = this->samples->size() / n_estimators;
35  for (size_t i = 0; i < n_estimators; i++) {
36  size_t salt = (seed == 0) ? std::random_device()() : i;
37  this->m_learners[i]->setSeed(seed+salt);
38  this->m_learners[i]->setSamples(this->samples->sampling(samp_size, true, seed+salt));
39  this->m_learners[i]->train();
40  }
41  return true;
42  }
43 
44  double evaluate(const Point<T> &p, bool raw_value = false) override {
45  auto classes = this->samples->classes();
46  Point<int> votes(classes.size());
47  for (size_t i = 0; i < n_estimators; i++) {
48  int pred = this->m_learners[i]->evaluate(p);
49  size_t pred_pos = std::find(classes.begin(), classes.end(), pred) - classes.begin();
50  votes[pred_pos]++;
51  }
52  return classes[std::max_element(votes.X().begin(), votes.X().end()) - votes.X().begin()];
53  }
54 
55  std::string getFormulationString() override {
56  return this->m_learners[0]->getFormulationString();
57  }
58  };
59  }
60 }
size_t size() const
Returns the size of the dataset.
Definition: Data.hpp:208
Data< T > sampling(const size_t &samp_size, bool with_replacement=true, const int &seed=0)
Sample the dataset with the given size.
Definition: Data.hpp:2031
const std::vector< int > classes() const
Returns a vector containing the numeric values of the classes.
Definition: Data.hpp:1831
std::shared_ptr< Data< T > > samples
Samples used in the model training.
Definition: Learner.hpp:21
Rep const & X() const
Returns the attributes representation of the point (std::vector by default).
Definition: Point.hpp:139
Definition: classifier/Classifier.hpp:17
Definition: BaggingClassifier.hpp:12
bool train() override
Function that execute the training phase of a Learner.
Definition: BaggingClassifier.hpp:31
double evaluate(const Point< T > &p, bool raw_value=false) override
Returns the class of a feature point based on the trained Learner.
Definition: BaggingClassifier.hpp:44
std::string getFormulationString() override
getFormulationString Returns a string that represents the formulation of the learner (Primal or Dual)...
Definition: BaggingClassifier.hpp:55
Namespace for ensemble methods.
Definition: ensemble/Ensemble.hpp:16
std::vector< LearnerPointer< T > > m_learners
Pointer to base learner used by the ensemble method.
Definition: ensemble/Ensemble.hpp:22
UFJF-MLTK main namespace for core functionalities.
Definition: classifier/Classifier.hpp:11