7 #include "VotingClassifier.hpp"
8 #include <ufjfmltk/Validation.hpp>
15 size_t p_size = 1000, fold = 5;
16 double F = 0.8, CR = 0.9;
17 std::vector<double> best_weights;
20 bool use_simplex =
false;
21 std::mt19937 generator;
22 std::pair<Point<double>,
double> Sbest;
23 size_t best_generation = 0;
25 template<
template<
typename...>
class WeakLearner,
26 template<
typename...>
class... WeakLearners>
27 void fillLearnersVector(WeakLearner<T> flearner) {
28 this->
m_learners.push_back(std::make_shared<WeakLearner<T> >(flearner));
31 template<
template<
typename...>
class WeakLearner,
32 template<
typename...>
class... WeakLearners>
33 void fillLearnersVector(WeakLearner<T> flearner, WeakLearners<T>... weak_learners) {
34 this->
m_learners.push_back(std::make_shared<WeakLearner<T> >(flearner));
35 fillLearnersVector(weak_learners...);
40 double that = 0, tmpsum = 0, tmax = 0;
45 std::sort(u.
X().begin(), u.
X().end());
48 for (
size_t j = i+1; j < n; j++) {
51 if (t[i] != 0) t[i] = t[i]/double(n - i);
53 tmax = (tmpsum-1)/
double(n-i);
63 that = (tmpsum+u[n-1]-1)/n;
65 for(
int j = 0; j < x.size(); j++){
67 if(x[j] < 0) x[j] = 0;
75 for(
auto const& p: data){
81 return double(acc)/ data.size();
85 double tp = 0, tn = 0, fp = 0, fn = 0;
87 voter.setWeights(x.
X());
105 return 100*compute_acc(valid_pair.
test, voter);
108 std::vector<Point<double>> init_population(){
109 std::vector<Point<double>> P;
110 std::uniform_real_distribution<double> dist(0., 1.);
113 for(
size_t i = 0; i < p_size; i++){
114 std::vector<double> w(this->
m_learners.size());
120 P[P.size()-1] = simplex_projection(P[P.size()-1]);
122 P[P.size()-1] = mltk::abs(P[P.size()-1]);
128 std::vector<double> eval_population(std::vector<
Point<double>>
const& population){
129 std::vector<double> costs(population.size());
131 for(
size_t i = 0; i < population.size(); i++){
132 costs[i] = objective_function(population[i]);
137 std::pair<Point<double>,
double> get_best_solution(std::vector<
Point<double>>
const& population, std::vector<double>
const& costs){
138 auto best_pos = std::max_element(costs.begin(), costs.end()) - costs.begin();
139 return std::make_pair(population[best_pos], costs[best_pos]);
143 std::uniform_int_distribution<size_t> dist(0, population.size()-1);
150 pos = dist(generator);
151 P1 = population[pos];
155 pos = dist(generator);
156 P2 = population[pos];
157 }
while(P2 == P0 || P2 == P1);
161 pos = dist(generator);
162 P3 = population[pos];
163 }
while(P3 == P0 || P3 == P1 || P3 == P2);
165 std::uniform_real_distribution<double> distCR(0., 1.);
166 std::uniform_int_distribution<size_t> distNP(0, this->
m_learners.size() - 1);
167 size_t cut_point = distNP(generator);
169 for(
int i = 0; i < S.
size(); i++){
170 double _cr = distCR(generator);
172 if(i == cut_point || _cr < CR){
173 S[i] = P3[i] + F * (P1[i]-P2[i]);
180 S = simplex_projection(S);
182 S = mltk::abs(S/S.
norm());
191 template<
template<
typename...>
class WeakLearner,
192 template<
typename...>
class... WeakLearners>
194 this->samples = std::make_shared<Data<T> >(
samples);
195 this->use_simplex = simplex;
196 fillLearnersVector(flearner, weak_learners...);
201 voter.setVotingType(
"soft");
205 generator.seed(this->
seed);
207 auto population = init_population();
208 auto p_costs = eval_population(population);
209 Sbest = get_best_solution(population, p_costs);
215 for(
int G = 0; G < this->
MAX_IT; G++){
216 std::clog <<
"\nGeneration " << G <<std::endl;
217 std::vector<Point<double> > new_population;
218 for(
size_t i = 0; i < population.size(); i++){
219 auto Si = new_sample(Sbest.first, population);
220 double si_cost = objective_function(Si);
221 if(si_cost <= p_costs[i]){
222 new_population.push_back(Si);
224 new_population.push_back(population[i]);
227 population = new_population;
228 p_costs = eval_population(population);
229 for(
size_t i = 0; i < population.size(); i++){
230 std::clog << population[i] <<
", cost: " << p_costs[i] << std::endl;
232 auto candidate = get_best_solution(population, p_costs);
233 if(Sbest.second < candidate.second){
239 if(Sbest.second == 1){
243 best_weights = Sbest.first.X();
244 voter.setWeights(best_weights);
245 std::cout << Sbest.first <<
" Best weights cost: " << objective_function(Sbest.first) << std::endl;
250 voter.setWeights(best_weights);
251 return voter.
evaluate(p, raw_value);
255 return this->
m_learners[0]->getFormulationString();
262 size_t getBestGeneration() {
return best_generation; }
264 VotingClassifier<double> getVoter(){
return voter; }
265 auto getValidPair(){
return valid_pair; }
size_t size() const
Returns the size of the dataset.
Definition: Data.hpp:208
int MAX_IT
Max number of iterations.
Definition: Learner.hpp:37
std::shared_ptr< Data< T > > samples
Samples used in the model training.
Definition: Learner.hpp:21
virtual double evaluate(const Point< T > &p, bool raw_value=false)=0
Returns the class of a feature point based on the trained Learner.
size_t seed
seed for random operations.
Definition: Learner.hpp:46
double norm(int p=NormType::NORM_L2) const
Returns the p-norm of the point.
Definition: Point.hpp:651
Rep const & X() const
Returns the attributes representation of the point (std::vector by default).
Definition: Point.hpp:139
std::size_t size() const
Returns the dimension of the point.
Definition: Point.hpp:133
Definition: classifier/Classifier.hpp:17
Definition: AutoWeightedVoting.hpp:13
bool train() override
Function that execute the training phase of a Learner.
Definition: AutoWeightedVoting.hpp:199
double evaluate(const Point< T > &p, bool raw_value=false) override
Returns the class of a feature point based on the trained Learner.
Definition: AutoWeightedVoting.hpp:249
std::string getFormulationString() override
getFormulationString Returns a string that represents the formulation of the learner (Primal or Dual)...
Definition: AutoWeightedVoting.hpp:254
Namespace for ensemble methods.
Definition: ensemble/Ensemble.hpp:16
void setSamples(DataPointer< T > samples) override
setSamples Set the samples used by the Learner.
Definition: ensemble/Ensemble.hpp:56
std::vector< LearnerPointer< T > > m_learners
Pointer to base learner used by the ensemble method.
Definition: ensemble/Ensemble.hpp:22
Definition: VotingClassifier.hpp:13
bool train() override
Function that execute the training phase of a Learner.
Definition: VotingClassifier.hpp:28
double evaluate(const Point< T > &p, bool raw_value=false) override
Returns the class of a feature point based on the trained Learner.
Definition: VotingClassifier.hpp:38
TrainTestPair< T > partTrainTest(Data< T > &data, size_t fold, bool stratified=true, bool keepIndex=true, size_t seed=0)
Divide the samples in training and test set.
Definition: valid/Validation.hpp:414
UFJF-MLTK main namespace for core functionalities.
Definition: classifier/Classifier.hpp:11
A struct representing a pair with training and test data.
Definition: valid/Validation.hpp:77
Data< T > test
Test data.
Definition: valid/Validation.hpp:81
Data< T > train
Train data.
Definition: valid/Validation.hpp:79