UFJF - Machine Learning Toolkit  0.51.8
AOS.hpp
1 //
2 // Created by Mateus Coutinho Mari on 8/20/2018.
3 //
4 
5 #pragma once
6 #define MAX_HEAP 500000
7 #define NUM_ERROR_EPS 0.05
8 #define FIRST_DECAY 0.5
9 
10 #include <algorithm>
11 #include <functional>
12 #include "FeatureSelection.hpp"
13 #include "ufjfmltk/Core.hpp"
14 
15 namespace mltk{
16  namespace featselect {
17  template<typename T = double>
18  class AOS : public FeatureSelection<T> {
19  public:
20  struct select_weight {
21  int fname{0};
22  int indice{0};
23  double w{0};
24  double val{0};
25  double pmargin{0};
26  double radius{-1};
27  double dcents{-1};
28  double golub{-1};
29  double fisher{-1};
30 
31  bool operator==(AOS::select_weight other) const;
32  };
33 
34  struct select_gamma {
35  std::vector<int> fnames;
36  int level{0};
37  int sv{0};
38  int train{0};
39  double value{0}; /*valor usado como criterio de escolha*/
40  double pgamma{0}; /*projected gamma*/
41  double rgamma{0}; /*real gamma p/ display*/
42  double radius{-1}; /*raio*/
43  double dcents{-1}; /*distancia entre os centros*/
44  double golub{-1}; /*golub - estatistica*/
45  double fisher{-1}; /*fisher - estatistica*/
46  std::vector<double> w;
47  double bias{0};
48 
49  bool operator==(AOS::select_gamma other) const;
50  };
51 
52 
53  class Hash {
54  private:
55  size_t length = 0, width = 0, conthash = 0;
56  AOS::select_gamma ***elements;
57 
58  public:
59  Hash(size_t length, size_t width);
60 
61  bool add(AOS::select_gamma *elmt);
62 
63  //void set_null(AOS::select_gamma *elmt);
64 
65  //void print(int dim);
66 
67  size_t getLength() const;
68 
69  void setLength(size_t length);
70 
71  unsigned int getWidth() const;
72 
73  void setWidth(size_t width);
74 
75  unsigned int getConthash() const;
76 
77  void setConthash(size_t conthash);
78 
79  ~Hash();
80  };
81 
82  class Heap {
83  private:
84  size_t size = 0, contheap = 0, contheapreins = 0, maxheapsize = 0, contprooning = 0;
85  AOS::select_gamma **elements;
86 
87  public:
88  Heap();
89 
90  select_gamma **getElements() const;
91 
92  size_t getMaxheapsize() const;
93 
94  size_t getContheap() const;
95 
96  size_t getContheapreins() const;
97 
98  size_t getContprooning() const;
99 
100  size_t getSize() const;
101 
102  bool insert(AOS::select_gamma *tok, int cont);
103 
104  AOS::select_gamma *pop();
105 
106  void print();
107 
108  size_t projected();
109 
110  void percolate(size_t i);
111 
112  void cut(std::shared_ptr<AOS::Hash> hash, int levelat, int cut, double g_margin, int verbose);
113 
114  ~Heap();
115  };
116 
117  void setBreadth(int breadth);
118 
119  void setCut(int cut);
120 
121  void setSortingShape(int sortingShape);
122 
123  void setChoiceShape(int choiceShape);
124 
125  void setLookAheadDepth(int lookAheadDepth);
126 
127  private:
128  const size_t HASH_SIZE = 161387, HASH_WIDTH = 100;
129  int breadth = 0, cut = 0, sorting_shape = 0, choice_shape = 0, startover = 0, look_ahead_depth = 0, ftime = 0, lool = 0;
130  int dim_orig = 1, startdim;
131  int contheap_parcial, contheapreins_parcial, conthash_parcial, contprooning_parcial, maxheapsize_parcial,
132  contnaoheap_parcial, conthashnaoheap_parcial, contexpandidos_parcial, contprojetados_parcial,
133  contprojtreinados_parcial, sobraprojecoes_parcial;
134  int contexpanded = 0, contnotheap = 0, contprojected = 0, contprojtrained = 0, conthashnotheap = 0;
135  double bonus = 0.0, n0 = 1, max_time = 0, kfolderror = 0, g_margin = 0;
136  double partial_margin, partial_svs, partial_time, partial_dim;
137  double START_TIME, initial_time, max_time_orig;
138  bool doleave_oo = false;
139  double mult_tempo = 2;
140  public:
141  void setMultTempo(double multTempo) {
142  mult_tempo = multTempo;
143  }
144 
145  private:
146  int q = 2;
147  mltk::KernelType kernel_type = INNER_PRODUCT;
148 
149  std::string filename;
150  std::unique_ptr<Heap> heap;
151  std::shared_ptr<Hash> hash;
152  std::shared_ptr<Data<T> > stmp_partial{nullptr};
153 
154  private:
155  static bool select_gamma_equal(const select_gamma *a, const select_gamma *b);
156 
157  public:
158  AOS() = default;
160  typename validation::CrossValidation *cv = nullptr,
161  int breadth = 3, int sorting_shape = 1,
162  int choice_shape = 1, int look_ahead_depth = 0, int cut = 0, int skip = 1, double bonus = 0,
163  int startover = 999999, double g_margin = 0, bool doleave_oo = 0, int verbose = 0);
164 
165  Data<T> selectFeatures() override;
166 
167  void mainLoop(Data<T>& sample);
168 
169  double lookAhead(Data<T>& sample, std::vector<int> fnames_orig, std::vector<double> w_orig, int level_orig);
170 
171  void setQ(int q);
172  };
173 
174  template<typename T>
175  AOS<T>::AOS(const Data<T>& samples, classifier::Classifier<T> *classifier, int final_dim,
176  typename validation::CrossValidation *cv, int breadth,
177  int sorting_shape, int choice_shape, int look_ahead_depth, int cut, int skip, double bonus,
178  int startover, double g_margin, bool doleave_oo, int verbose) {
179  if (cv == nullptr) {
180  this->cv = new validation::CrossValidation;
181  }
182  this->samples = mltk::make_data<T>(samples);
183  this->classifier = classifier;
184  this->cv = cv;
185  if(!this->cv){
186  this->cv = new validation::CrossValidation();
187  this->cv->seed = std::vector<unsigned int>(1, 0);
188  this->cv->fold = 10;
189  this->cv->qtde = 0;
190  this->cv->jump = this->jump;
191  }
192  this->breadth = breadth;
193  this->depth = this->samples->dim()-final_dim;
194  this->bonus = bonus;
195  this->cut = cut;
196  this->look_ahead_depth = look_ahead_depth;
197  this->skip = skip;
198  this->startover = startover;
199  this->g_margin = g_margin;
200  this->leave_one_out = (bool) doleave_oo;
201  this->sorting_shape = sorting_shape;
202  this->choice_shape = choice_shape;
203  this->verbose = verbose;
204  if (classifier)
205  classifier->setVerbose(0);
206  }
207 
208 
209  template<typename T>
211  int i = 0;
212  int tbreadth = 0;
213  int level = 0;
214  int dim = this->samples->dim();
215  int startdim = dim_orig = dim;
216  std::vector<int> fnames;
217  int ftime = 1;
218  int lool = 0; /*leave one out level*/
219  double g_margin = 0;
220  std::shared_ptr<Data<T>> stmp = make_data<T>();
221  max_time_orig = this->max_time;
222  this->startdim = dim;
223 
224  int parcial = 0; //verifica se �ltima solu��o � uma solu��o recuperada (parcial)
225 
226  this->timer.reset();
227  this->initial_time = this->timer.elapsed();
228 
229  /*checking arguments*/
230  if (this->breadth > this->MAX_BREATH) { this->breadth = this->MAX_BREATH; }
231  if (this->depth > dim - 1) { this->depth = dim - 1; }
232  if (this->depth > this->MAX_DEPTH) { this->depth = this->MAX_DEPTH; }
233 
234  /*create a hash*/
235  this->hash = std::make_shared<AOS<T>::Hash>(this->HASH_SIZE, this->HASH_WIDTH);
236  this->heap = std::make_unique<AOS<T>::Heap>();
237 
238  /*inicializando o cross-validation*/
239  if (this->cv->qtde > 0) {
240  //utils_initialize_random();
241  this->cv->seed.resize(this->cv->qtde);
242  for (i = 0; i < this->cv->qtde; i++)
243  this->cv->seed[i] = (size_t) i; //rand();
244  this->cv->initial_error = 0;
245  this->cv->actual_error = 0;
246  }
247 
248  stmp_partial = mltk::make_data<T>(this->samples->copy());
249  this->ftime = ftime;
250 
251  /*do this while my depth permits*/
252  while (1) {
253  /*first problem to solve, when heap is empty*/
254  if (this->heap->getSize() == 0) {
255  /*quit end of heap found*/
256  if (ftime == 0) /*first time = false -- nao eh a primeira vez, o processo falhou!*/
257  {
258  std::cout << "End of Heap, recovering last dimension...\n\n";
259 
260  /*pegar os dados do ultimo lool, uma vez que foi a ultima dimensao fechada*/
261  std::cout << "---------------\n :: FINAL :: \n---------------\n";
262  std::cout << "Chosen Features: ";
263  std::vector<int> fnamesp = this->stmp_partial->getFeaturesNames();
264  for (i = 0; i < this->stmp_partial->dim() - 1; ++i)
265  std::cout << fnamesp[i] << ", ";
266  std::cout << fnamesp[i] << std::endl;
267 
268 
269  if (this->cv->qtde > 0) {
270  if (level % this->cv->jump != 0) {
271  for (this->cv->actual_error = 0, i = 0; i < this->cv->qtde; i++) {
272  this->cv->actual_error += 100-validation::kfold(*this->stmp_partial, *this->classifier,
273  this->cv->fold, true, this->cv->seed[i], 0).accuracy;
274  }
275  this->kfolderror = this->cv->actual_error / this->cv->qtde;
276  }
277  std::cout << "Dim: " << this->partial_dim << ", Margin: " << this->partial_margin
278  << ", SVs: " << this->partial_svs
279  << ", Error " << this->cv->fold << "-fold: " << this->kfolderror << "%\n";
280  } else {
281  std::cout << "Dim: " << this->partial_dim << ", Margin: -" << this->partial_margin
282  << ", SVs: " << this->partial_svs
283  << std::endl;
284  }
285  std::cout << "Total insertions in Heap: " << this->contheap_parcial << std::endl;
286  std::cout << "Total reinsertions in Heap: " << this->contheapreins_parcial << std::endl;
287  std::cout << "Max size of the Heap: " << this->maxheapsize_parcial << std::endl;
288  std::cout << "Total prunes in the Heap: " << this->contprooning_parcial << std::endl;
289  std::cout << "Expanded nodes: " << this->contexpandidos_parcial << std::endl;
290  std::cout << "Not inserted in Heap: " << this->contnaoheap_parcial << std::endl;
291  std::cout << "Total of projections: " << this->contprojetados_parcial << std::endl;
292  std::cout << "Total of trained projections: " << this->contprojtreinados_parcial << std::endl;
293  std::cout << "Total of non-trained projections: " << this->contprojetados_parcial
294  - this->contprojtreinados_parcial
295  << std::endl;
296  //printf("Sobra de projecoes no Heap: %d\n", sobraprojecoes_parcial);
297  std::cout << "Equal nodes in Hash that didn't entered in the Heap: "
298  << this->conthashnaoheap_parcial
299  << std::endl;
300  std::cout << "Hash size: " << this->conthash_parcial << std::endl;
301  std::cout << "Total time: " << this->partial_time << std::endl;
302  parcial = 1;
303  /*Save data to file*/
304 
305  //data_write(filename, stmp_partial, 0);
306  break;
307  }
308  /*check breadth*/
309  if (this->breadth > dim) this->breadth = dim;
310 
311  /*run select*/
312  this->mainLoop(*this->samples);
313  std::cout << this->heap->getSize() << std::endl;
314  if (this->heap->getSize() == 0) {
315  std::cout << "Initial training failed!!!\n\n";
316  break;
317  }
318  ftime = 0;
319  }
320  /*subsequent problems (heap not empty)*/
321  else {
322  /*create new data struct*/
323  level = this->heap->getElements()[1]->level;
324  fnames = this->heap->getElements()[1]->fnames;
325  stmp->copy(*this->samples);
326  stmp->removeFeatures(fnames);
327 
328  if (level == 1)
329  this->n0 = this->max_time *= FIRST_DECAY;
330  else if (level > 1)
331  this->max_time =
332  this->n0 * std::exp(-stmp->getTime_mult() * ((double) startdim / (startdim - level)));
333 
334  /*stop criterium*/
335  if (this->samples->dim() == dim - this->depth && this->heap->getElements()[1]->rgamma > 0) {
336  std::cout << "---------------\n :: FINAL :: \n---------------\n";
337  std::cout << "Chosen Features: ";
338  std::vector<int> fnamesp = stmp->getFeaturesNames();
339  for (i = 0; i < stmp->dim() - 1; ++i)
340  std::cout << fnamesp[i] << ",";
341  std::cout << fnamesp[i] << std::endl;
342 
343  std::cout << "---------------\nRemoved Features: ";
344  for (i = 0; i < this->samples->dim() - stmp->dim() - 1; ++i)
345  std::cout << this->heap->getElements()[1]->fnames[i] << ", ";
346  std::cout << this->heap->getElements()[1]->fnames[i] << std::endl;
347 
348  if (this->cv->qtde > 0) {
349  for (this->cv->actual_error = 0, i = 0; i < this->cv->qtde; i++)
350  this->cv->actual_error += 100-validation::kfold(*stmp, *this->classifier, this->cv->fold, true,
351  this->cv->seed[i], 0).accuracy;
352  this->kfolderror = this->cv->actual_error / this->cv->qtde;
353  std::cout << "Dim: " << stmp->dim() << ", Margin: "
354  << this->heap->getElements()[1]->rgamma
355  << ", SVs: " << this->heap->getElements()[1]->sv << ", Error " << this->cv->fold
356  << "-fold: " << this->kfolderror << "%\n";
357  } else {
358  std::cout << "Dim: " << stmp->dim() << ", Margin: "
359  << this->heap->getElements()[1]->rgamma
360  << ", SVs: " << this->heap->getElements()[1]->sv << "\n";
361  }
362  std::cout << "Total insertions in the Heap: " << this->heap->getContheap() << std::endl;
363  std::cout << "Total reinsertions in the Heap: " << this->heap->getContheapreins() << std::endl;
364  std::cout << "Max size of the Heap: " << this->heap->getMaxheapsize() << std::endl;
365  std::cout << "Total prune in the Heap: " << this->heap->getContprooning() << std::endl;
366  std::cout << "Expanded nodes: " << this->contexpanded << std::endl;
367  std::cout << "Not inserted in Heap: " << this->contnotheap << std::endl;
368  std::cout << "Total of projections: " << this->contprojected << std::endl;
369  std::cout << "Total of trained projections: " << this->contprojtrained << std::endl;
370  std::cout << "Total of trained projections: " << this->contprojected - this->contprojtrained
371  << std::endl;
372  //printf("Sobra de projecoes no Heap: %d\n", aos_select_heap_projected(heap));
373  std::cout << "Equal nodes in Hash that didn't entered in the Heap: " << this->conthashnotheap
374  << std::endl;
375  std::cout << "Hash size: " << this->hash->getConthash() << std::endl;
376  std::cout << "Total time: "
377  << (((100.0f * clock() / CLOCKS_PER_SEC) - this->initial_time) / 100.0f) << std::endl;
378 
379  /*Save data to file*/
380  stmp->write(this->filename, "data");
381  break;
382  }
383 
384  /*some verbose*/
385  if (this->verbose && level <= 50) {
386  std::cout << "-- Testing node - Features (";
387  for (i = 0; i < level - 1; ++i)
388  std::cout << fnames[i] << ",";
389  std::cout << fnames[i] << ")\n";
390  std::cout << "------------------------------------------\n";
391  }
392 
393  /*check breadth*/
394  tbreadth = this->breadth;
395  if (tbreadth > stmp->dim()) tbreadth = stmp->dim();
396 
397  /*run select*/
398  auto stmp_copy = stmp->copy();
399  this->mainLoop(stmp_copy);
400 
401  /*free stuff*/
402  stmp.reset(new Data<T>());
403  }
404 
405  /*verbose*/
406  if (this->verbose) {
407  if (this->verbose > 1) {
408  this->heap->print();
409  std::cout.flush();
410  } else
411  std::cout << "-- Heap Size: " << this->heap->getSize();
412  std::cout << "--------\n";
413  }
414  }
415 
416  /*free stuff*/
417  if (parcial) {
418  stmp.reset();
419  return *this->stmp_partial;
420  } else {
421  this->stmp_partial.reset();
422  return *stmp;
423  }
424  }
425 
426  /*----------------------------------------------------------*
427  * Run A* feature selection main loop *
428  *----------------------------------------------------------*/
429 
430  template<typename T>
431  void AOS<T>::mainLoop(Data<T>& sample) {
432  std::vector<int> ofnames;
433  std::vector<double> w, w_manut;
434  size_t i = 0, j = 0, k = 0;
435  int level = 0;
436  int svcount = 0;
437  //int trained = 0;
438  double margin = 0;
439  double omargin = 0;
440  double wnorm = 0;
441  double tpmargin = 0;
442  double sumnorm = 0;
443  double leave_oo = -1;
444  size_t dim = sample.dim();
445  size_t size = sample.size();
446  AOS<T>::select_gamma *gtmp = nullptr;
447  std::vector<AOS<T>::select_weight> weight(dim);
448  bool isPrimal = this->classifier->getFormulationString() == "Primal";
449  auto data_copy = sample.copy();
450 
451  //double q = sample->q;
452 
453  int loolflag = 0; //fechar uma dimensao
454 
455  this->classifier->setSamples(sample);
456 
457  if (this->heap->getSize() == 0) {
458  if(!this->classifier->train()){
459  std::cerr << "Training failed!" << std::endl;
460  }
461  auto solution = this->classifier->getSolution();
462  w = solution.w.X();
463  margin = solution.margin;
464  svcount = solution.svs;
465  if(ftime){
466  max_time /= mult_tempo;
467  }
468  } else {
469  gtmp = this->heap->pop();
470 
471  if (gtmp->level > this->depth) { /*eliminar nodo com nivel maior que a profundidade desejada*/
472  gtmp = nullptr;
473  return;
474  }
475 
476  w = gtmp->w;
477  if (gtmp->rgamma > 0) {
478  //trained = heap->elements[1]->train;
479  margin = gtmp->rgamma;
480  svcount = gtmp->sv;
481  } else { /*Training*/
482  margin = gtmp->pgamma;
483 
484  this->classifier->getSolutionRef()->bias = gtmp->bias;
485  this->classifier->setSamples(sample);
486 
487  if (!this->classifier->train()) { /*training failed, remove this option*/
488  gtmp = nullptr;
489  if (this->verbose > 1) std::cout << "Training failed!\n";
490  return;
491  }
492 
493  auto solution = this->classifier->getSolution();
494  w = solution.w.X();
495  margin = solution.margin;
496  svcount = solution.svs;
497 
498  if (this->choice_shape == 2)
499  gtmp->value = margin * mltk::stats::distCenters(sample, -1);
500  else
501  gtmp->value = margin;
502  gtmp->rgamma = margin;
503  gtmp->train = 1;
504 
505  this->contprojtrained++;
506 
507  /*verifica se realmente eh o melhor ou se jah chegou na profundidade desejada*/
508  if ((gtmp->value < this->heap->getElements()[1]->value) ||
509  (gtmp->level == this->depth)) { /*reinsert the node into heap*/
510  gtmp->w = w;
511  gtmp->sv = svcount;
512  this->heap->insert(gtmp, 0);
513  return;
514  }
515  }
516  /*this is the best solution, continue*/
517  this->contexpanded++;
518  //ofnames = gtmp->fnames;
519  ofnames.resize((size_t) gtmp->level);
520  ofnames = gtmp->fnames;
521  level = gtmp->level;
522  omargin = gtmp->pgamma;
523 
524  gtmp = nullptr;
525  if (level > this->lool) {
526  this->lool = level;
527  loolflag = 1; //fechou a dimensao
528  }
529  }
530 
531  /*some verbose*/
532  if (this->verbose)
533  if (level > 0) {
534  std::cout << "-> Expanding features (";
535  for (i = 0; i < level - 1; ++i)
536  std::cout << ofnames[i] << ",";
537  std::cout << ofnames[i];
538  std::cout << ") -- Margin: " << margin << ", pMargin: " << omargin << ", Level: " << level << "\n";
539  }
540 
541  /*calculating leave one out, if it's the first to hit this level*/
542  if ((this->lool) == level && (loolflag == 1 || level == 0)) {
543  loolflag = 0;
544  if (this->cv->qtde > 0) {
545  if (level == 0) {
546  for (this->cv->initial_error = 0, i = 0; i < this->cv->qtde; i++)
547  this->cv->initial_error += 100-validation::kfold(sample, *this->classifier,
548  this->cv->fold, true, this->cv->seed[i], 0).accuracy;
549  kfolderror = this->cv->initial_error / this->cv->qtde;
550  } else if (level % this->cv->jump == 0) {
551  for (this->cv->actual_error = 0, i = 0; i < this->cv->qtde; i++)
552  this->cv->actual_error += 100-validation::kfold(sample, *this->classifier,
553  this->cv->fold, true, this->cv->seed[i], 0).accuracy;
554  kfolderror = this->cv->actual_error / this->cv->qtde;
555  }
556  }
557 
558  if (this->doleave_oo) {
559  //leave_oo = utils_leave_one_out(sample, train, skip, 0);
560  std::cout << "Leave One Out -- Dim: " << this->startdim - level << ", Margin: " << margin
561  << ", LeaveOO: "
562  << leave_oo << ", SVs: " << svcount << ", Time: "
563  << ((100.0f * clock() / CLOCKS_PER_SEC) - this->initial_time) / 100.0f << "\n";
564  } else {
565  leave_oo = -1;
566  std::cout << "--- --- --- --- --- --- --- ---\n";
567  if (this->cv->qtde > 0 && level % this->cv->jump == 0)
568  std::cout << "Dim: " << (this->startdim - level) << ", Margin: " << margin << ", SVs: "
569  << svcount
570  << ", Error " << this->cv->fold << "-fold: " << this->kfolderror << ", Time: "
571  << ((100.0f * clock() / CLOCKS_PER_SEC) - this->initial_time) / 100.0f;
572  else
573  std::cout << "Dim: " << (this->startdim - level) << ", Margin: " << margin << ", SVs: "
574  << svcount
575  << ", Time: " << ((100.0f * clock() / CLOCKS_PER_SEC) - this->initial_time) / 100.0f;
576  if (level > 0) {
577  std::cout << " - Removed features: ";
578  for (j = 0; j < level - 1; j++)
579  std::cout << ofnames[j] << ",";
580  std::cout << ofnames[j] << std::endl;
581  }
582  std::cout << "\nIns: " << this->heap->getContheap() << " / ";
583  std::cout << "ReIns: " << this->heap->getContheapreins() << " / ";
584  std::cout << "Max: " << this->heap->getMaxheapsize() << " / ";
585  std::cout << "Proonings: " << this->heap->getContprooning() << " / ";
586  std::cout << "Hash: " << this->hash->getConthash() << " / ";
587  std::cout << "Expanded: " << this->contexpanded << " / ";
588  std::cout << "Not trained: " << this->contprojected - this->contprojtrained << " / ";
589  std::cout << "\n--- --- --- --- --- --- --- ---\n";
590  }
591 
592  /*salvando os dados da ultima dimensao fechada*/
593  this->partial_margin = margin;
594  this->partial_svs = svcount;
595  this->partial_time = ((100.0f * clock() / CLOCKS_PER_SEC) - this->initial_time) / 100.0f;
596  this->partial_dim = startdim - level;
597  this->contheap_parcial = this->heap->getContheap();
598  this->contheapreins_parcial = this->heap->getContheapreins();
599  this->conthash_parcial = this->hash->getConthash();
600  this->contprooning_parcial = this->heap->getContprooning();
601  this->maxheapsize_parcial = this->heap->getMaxheapsize();
602  this->contnaoheap_parcial = this->contnotheap;
603  this->conthashnaoheap_parcial = this->conthashnotheap;
604  this->contexpandidos_parcial = this->contexpanded;
605  this->contprojetados_parcial = this->contexpanded;
606  this->contprojtreinados_parcial = this->contprojtrained;
607  //sobraprojecoes_parcial = aos_select_heap_projected(heap);
608  this->stmp_partial = mltk::make_data<T>();
609  this->stmp_partial->copy(data_copy);
610  if (look_ahead_depth > 0) {
611  if (isPrimal) {
612  /*criar um w de manutencao pra volta do look_ahead*/
613  w_manut.resize(dim);
614  w_manut = w;
615  }
616 
617  /*get new look ahead margin*/
618  g_margin = this->lookAhead(sample, ofnames, w, level);
619 
620  if (isPrimal) {
621  w = w_manut;
622  }
623  }
624  /*cut heap based on its level and look ahead margin*/
625  this->heap->cut(this->hash, lool, cut, g_margin, this->verbose);
626 
627  if (this->verbose > 2) {
628  std::cout << " (" << this->heap->getSize() << ")\t";
629  for (j = 0; j < level; j++)
630  std::cout << ofnames[j] << " ";
631  std::cout << "\n";
632  }
633  }
634  for(i = 0; i < weight.size(); i++){
635  weight[i].w = w[i];
636  weight[i].indice = i;
637  weight[i].fname = sample.getFeaturesNames()[i];
638  weight[i].radius = -1;
639  weight[i].dcents = -1;
640  weight[i].golub = -1;
641  weight[i].fisher = -1;
642  }
643 
644  if(sorting_shape == 2){
645  for(i = 0; i < dim; i++){
646  weight[i].dcents = mltk::stats::distCenters(sample, weight[i].fname);
647  }
648  }else if(sorting_shape == 3){
649  for(i = 0; i < dim; i++){
650  weight[i].radius = mltk::stats::radius(sample, weight[i].fname, q);
651  weight[i].dcents = mltk::stats::distCenters(sample, weight[i].fname);
652  }
653  }else if(sorting_shape == 4){
654  for(i = 0; i < dim; i++){
655  weight[i].radius = mltk::stats::radius(sample, weight[i].fname, q);;
656  }
657  }else if(sorting_shape == 5 || sorting_shape == 6){
658  int num_pos = 0, num_neg = 0;
659 
660  auto classes = sample.classes();
661  auto neg_id = std::find(classes.begin(), classes.end(), -1) - classes.begin();
662  auto pos_id = std::find(classes.begin(), classes.end(), 1) - classes.begin();
663 
664  num_pos = sample.classesDistribution()[pos_id];
665  num_neg = sample.classesDistribution()[neg_id];
666 
667  std::vector<double> avg_pos(dim), avg_neg(dim), sd_pos(dim), sd_neg(dim);
668 
669  // calc average
670  for(i = 0; i < dim; i++){
671  avg_neg[i] = 0; avg_pos[i] = 0;
672  for(j = 0; j < size; j++){
673  if(sample.point(j)->Y() == -1){
674  avg_neg[i] += sample.point(j)->X()[i];
675  }else{
676  avg_pos[i] += sample.point(j)->X()[i];
677  }
678  }
679  avg_neg[i] /= num_neg;
680  avg_pos[i] /= num_pos;
681  }
682  // calc variance
683  for(i = 0; i < dim; i++){
684  sd_neg[i] = 0; sd_pos[i] = 0;
685  for(j = 0; j < size; j++){
686  if(sample.point(j)->Y() == -1){
687  sd_neg[i] += std::pow(sample.point(j)->X()[i]-avg_neg[i], 2);
688  }else{
689  sd_pos[i] += std::pow(sample.point(j)->X()[i]-avg_pos[i], 2);
690  }
691  }
692  }
693 
694  for(i = 0; i < dim; i++){
695  weight[i].golub = std::fabs(avg_pos[i] - avg_pos[i])/(std::sqrt(sd_pos[i]/num_pos-1)) +
696  std::sqrt(sd_neg[i]/num_neg-1);
697  weight[i].fisher = std::pow(avg_pos[i] - avg_neg[i], 2)/(sd_pos[i] + sd_neg[i]);
698  }
699  }
700 
701  // sorting shape
702  if(sorting_shape == 6){ // w* fisher
703  std::sort(weight.begin(), weight.end(), [](const auto& a, const auto& b){
704  return (((std::fabs(a.w * a.fisher) > std::fabs(b.w * b.fisher)) -
705  (std::fabs(a.w * a.fisher) < std::fabs(b.w * b.fisher)))<=0)?false:true;
706  });
707  }else if(sorting_shape == 5){ // w * golub
708  std::sort(weight.begin(), weight.end(), [](const auto& a, const auto& b){
709  return (((std::fabs(a.w * a.golub) > std::fabs(b.w * b.golub)) -
710  (std::fabs(a.w * a.golub) < std::fabs(b.w * b.golub))) <= 0)?false:true;
711  });
712  }else if(sorting_shape == 4){ // w * radius
713  std::sort(weight.begin(), weight.end(), [](const auto& a, const auto& b){
714  return (((std::fabs(a.w * a.radius) > std::fabs(b.w * b.radius)) -
715  (std::fabs(a.w * a.radius) < std::fabs(b.w * b.radius))) <= 0)?false:true;
716  });
717  }else if(sorting_shape == 3){ // w * radius / distcenter
718  std::sort(weight.begin(), weight.end(), [](const auto& a, const auto& b){
719  return ((std::fabs((a.w * a.radius)/a.dcents) > std::fabs((b.w * b.radius)/b.dcents)) -
720  (std::fabs((a.w * a.radius)/a.dcents) < std::fabs((b.w * b.radius)/b.dcents))<=0)?false:true;
721  });
722  }else if(sorting_shape == 2){ // w / distcenter
723  std::sort(weight.begin(), weight.end(), [](const auto& a, const auto& b){
724  return ((std::fabs(a.w / a.dcents) > std::fabs(b.w / b.dcents)) -
725  (std::fabs(a.w / a.dcents) < std::fabs(b.w / b.dcents)) <= 0)?false:true;
726  });
727  }else{
728  std::sort(weight.begin(), weight.end(), [](const auto& a, const auto& b){
729  return (((std::fabs(a.w)>std::fabs(b.w)) - (std::fabs(a.w)<std::fabs(b.w))) <= 0)?false:true;
730  });
731  }
732 
733  if(kernel_type == INNER_PRODUCT){
734  wnorm = mltk::norm(mltk::Point<double>(w), q);
735  }else{
736  mltk::Kernel<T> kernel(INNER_PRODUCT);
737  kernel.compute(make_data<T>(sample));
738  wnorm = kernel.featureSpaceNorm(make_data<T>(sample));
739  }
740 
741  // branching nodes
742  for(i = 0; i < breadth; i++){
743  if(kernel_type == INNER_PRODUCT){
744  // projected margin computation on IMA primal
745  if(dim > 1){
746  for(sumnorm = 0, j = 0; j < dim; j++){
747  if(i != j){
748  sumnorm += std::pow(std::fabs(weight[i].w/wnorm)*margin, q);
749  }
750  }
751  tpmargin = std::pow(sumnorm, 1.0/q);
752  }
753  }else{ // projected margin computation on IMA dual and SMO
754  mltk::Kernel<T> kernel;
755  auto Hk = kernel.generateMatrixHwithoutDim(make_data<T>(sample), weight[i].indice);
756  std::vector<double> alphaaux(size);
757 
758  for(k = 0; k < size; ++k){
759  for(alphaaux[k] = 0, j = 0; j < size; ++j){
760  alphaaux[k] += sample.point(j)->Alpha() * (*Hk)[k][j];
761  }
762  }
763 
764  for(sumnorm = 0, k = 0; k < size; k++){
765  sumnorm += alphaaux[k] * sample.point(k)->Alpha();
766  }
767 
768  tpmargin = std::sqrt(sumnorm)/wnorm*margin;
769  }
770  // creating nodes for heap
771  gtmp = new select_gamma;
772 
773  // setting values
774  if(choice_shape == 2){
775  gtmp->value = tpmargin*weight[i].dcents;
776  }else{
777  gtmp->value = tpmargin;
778  }
779  gtmp->pgamma = tpmargin;
780  gtmp->rgamma = -1;
781  gtmp->train = 0;
782  gtmp->sv = 0;
783  gtmp->level = level+1;
784  gtmp->radius = weight[i].radius;
785  gtmp->dcents = weight[i].dcents;
786  gtmp->golub = weight[i].golub;
787  gtmp->fisher = weight[i].fisher;
788 
789  // father node W maintenance
790  if(kernel_type == INNER_PRODUCT){
791  gtmp->w.resize(dim);
792 
793  for(k = 0, j = 0; k < dim; k++){
794  if(sample.getFeaturesNames()[j] != weight[i].fname){
795  gtmp->w[k++] = w[j];
796  }
797  }
798  }
799 
800  // resizing fnames array
801  gtmp->fnames.resize(level+1);
802 
803  for(j = 0; j < level; j++){
804  gtmp->fnames[j] = ofnames[j];
805  }
806  gtmp->fnames[j] = weight[i].fname;
807 
808  // sorting new feature array
809  std::sort(gtmp->fnames.begin(), gtmp->fnames.end(), [](const auto& a, const auto& b){
810  return (((a>b)-(a < b)) <= 0)?false:true;
811  });
812  if(choice_shape == 2){
813  if(i != 0 && tpmargin*weight[i].dcents < (1-NUM_ERROR_EPS)*(g_margin)){
814  hash->add(gtmp);
815  contnotheap++; continue;
816  }
817  }else{
818  if(i != 0 && tpmargin < (1-NUM_ERROR_EPS)*g_margin){
819  hash->add(gtmp);
820  contnotheap++; continue;
821  }
822  }
823 
824  if(this->verbose){
825  if(sorting_shape == 5 || sorting_shape == 6){
826  std::cout << " -- New Node - Feature " << weight[i].fname << ", Value: " << gtmp->value <<
827  ", pMargin: " << gtmp->pgamma << ", DCent: " << gtmp->dcents << ", Radius: " << gtmp->radius <<
828  ", Golub: " << gtmp->golub << ", Fisher: " << gtmp->fisher << ", Level: " << gtmp->level <<
829  std::endl;
830  }else{
831  std::cout << " -- New Node - Feature " << weight[i].fname << ", Value: " << gtmp->value <<
832  ", pMargin: " << gtmp->pgamma << ", DCent: " << gtmp->dcents << ", Radius: " << gtmp->radius <<
833  gtmp->level << std::endl;
834  }
835  }
836 
837  // push node into heap if it is not redundant in hash
838  if(hash->add(gtmp)){
839  heap->insert(gtmp, 1);
840  contprojected++;
841  }else{
842  conthashnotheap++;
843  }
844  }
845  }
846 
847  /*----------------------------------------------------------*
848  * look ahead for pruning value *
849  *----------------------------------------------------------*/
850 
851  template<typename T>
852  double AOS<T>::lookAhead(Data<T>& sample, std::vector<int> fnames_orig, std::vector<double> w_orig, int level_orig) {
853  size_t i = 0, j = 0;
854  int level = level_orig;
855  int svcount = 0;
856  int count = 0;
857  int feat = 0;
858  bool isPrimal = this->classifier->getFormulationString() == "Primal";
859  std::vector<int> features((unsigned int) look_ahead_depth + 1, 0);
860  double min = 0;
861  double margin = 0;
862  double g_margin = 0;
863  std::vector<double> w = w_orig;
864  std::vector<double> novo_w;
865  auto stmp = make_data<T>(sample.copy());
866  select_gamma *gtmp = nullptr;
867  double distcents = 0;
868  Solution solution;
869 
870  while (true) {
871  /*stopping criterion*/
872  if (count == this->look_ahead_depth || count == sample.dim() - 1 ||
873  stmp->dim() == sample.dim() - this->depth || level == this->depth)
874  break;
875  if (this->choice_shape == 2) {
876  /*selecting one feature with least w / dist. centers*/
877  min = fabs(w[0]) / mltk::stats::distCenters(*stmp, stmp->getFeaturesNames()[0]);
878  feat = stmp->getFeaturesNames()[0];
879  for (i = 1; i < stmp->dim(); i++) {
880  distcents = mltk::stats::distCenters(*stmp, stmp->getFeaturesNames()[i]);
881 // std::cout << "feat: " << feat << " feati: " << stmp->getFeaturesNames()[i] << " w[i]: " << w[i] << std::endl;
882 // std::cout << "distcenters: " << distcents << std::endl;
883  if (fabs(w[i]) / distcents < min) {
884  min = fabs(w[i]) / distcents;
885  feat = stmp->getFeaturesNames()[i];
886  }
887  }
888  } else {
889  /*selecting one feature with least w*/
890  min = fabs(w[0]);
891  feat = stmp->getFeaturesNames()[0];
892  for (i = 1; i < stmp->dim(); i++)
893  if (fabs(w[i]) < min) {
894  min = fabs(w[i]);
895  feat = stmp->getFeaturesNames()[i];
896  }
897  }
898 
899  /*manutencao do w do pai para o IMA Primal*/
900  if (isPrimal) {
901  size_t dim = sample.dim() - 1;
902  std::vector<int> fnames = sample.getFeaturesNames();
903  novo_w.reserve(dim - 1);
904  for (i = 0, j = 0; j < dim; ++j)
905  if (fnames[j] != feat)
906  novo_w.push_back(w[j]);
907  }
908 
909  /*saving removed feature name*/
910  features[count] = feat;
911 
912  /*removing old data sample*/
913  if (*stmp != sample) stmp.reset();
914 
915  /*get temp data struct*/
916  stmp = make_data<T>(sample.removeFeatures(features, count+1));
917 
918  if (level == 0)
919  this->n0 = this->max_time = max_time_orig * FIRST_DECAY;
920  else
921  this->max_time = n0 * std::exp(
922  -stmp->getTime_mult() * ((double) this->dim_orig / (this->dim_orig - level - 1)));
923 
924  /*creating new nodes for heap*/
925  gtmp = new AOS<T>::select_gamma;
926  if (gtmp == nullptr) {
927  std::cerr << "Error: Out of memory 9\n" << std::endl;
928  exit(1);
929  }
930 
931  /*setting values*/
932  gtmp->value = -1;
933  gtmp->pgamma = solution.margin;
934  gtmp->rgamma = -1;
935  gtmp->train = 0;
936  gtmp->sv = 0;
937  gtmp->w.clear();
938  gtmp->bias = 0;
939  gtmp->level = level + 1;
940  gtmp->radius = -1; //data_get_radius(stmp, -1, stmp->q);
941  if (this->choice_shape == 2)
942  gtmp->dcents = mltk::stats::distCenters(*stmp, -1);
943  else
944  gtmp->dcents = -1;
945  gtmp->fisher = -1;
946  gtmp->golub = -1;
947 
948  /*creating new fnames array*/
949  gtmp->fnames.resize((size_t) level + 1);
950 
951  for (j = 0; j < level_orig; ++j) gtmp->fnames[j] = fnames_orig[j];
952  for (j = 0; j < count + 1; ++j) gtmp->fnames[level_orig + j] = features[j];
953 
954  /*sorting new feature array*/
955  std::sort(gtmp->fnames.begin(), gtmp->fnames.end(), std::greater<int>());
956 
957  if (this->verbose) {
958  std::cout << " -- New look-ahead node - Features: " << std::endl;
959  for (i = 0; i < count; i++)
960  std::cout << features[i] << ", ";
961  std::cout << features[i] << std::endl;
962  }
963 
964  /*push node into heap if it is not redundant in hash*/
965  if (hash->add(gtmp)) {
966  /*training sample*/
967  svcount = 0;
968  solution.margin = gtmp->pgamma;
969  solution.bias = gtmp->bias;
970  solution.w = novo_w;
971  this->classifier->setSolution(solution);
972  if (!this->classifier->train()) {
973  if (this->verbose) std::cout << "Training failed!\n";
974  break;
975  }
976 
977  gtmp->w = this->classifier->getSolutionRef()->w.X();
978  gtmp->bias = this->classifier->getSolutionRef()->bias;
979  gtmp->sv = this->classifier->getSolutionRef()->svs;
980  gtmp->rgamma = this->classifier->getSolutionRef()->margin;
981  gtmp->train = 1;
982  if (this->choice_shape == 2)
983  gtmp->value = this->classifier->getSolutionRef()->margin * gtmp->dcents;
984  else
985  gtmp->value = this->classifier->getSolutionRef()->margin;
986  g_margin = gtmp->value;
987  this->heap->insert(gtmp, 1);
988  }
989  if (isPrimal) {
990  w.resize(stmp->dim());
991  w = this->classifier->getSolutionRef()->w.X();
992  }
993 
994  /*increment*/
995  level++;
996  count++;
997  }
998 
999  w_orig = w;
1000 
1001  /*free stuff*/
1002  if (*stmp != sample) stmp.reset();
1003 
1004  return 0;
1005  }
1006 
1007  template<typename T>
1008  void AOS<T>::setBreadth(int breadth) {
1009  AOS::breadth = breadth;
1010  }
1011 
1012  template<typename T>
1013  void AOS<T>::setCut(int cut) {
1014  AOS::cut = cut;
1015  }
1016 
1017  template<typename T>
1018  void AOS<T>::setSortingShape(int sortingShape) {
1019  sorting_shape = sortingShape;
1020  }
1021 
1022  template<typename T>
1023  void AOS<T>::setChoiceShape(int choiceShape) {
1024  choice_shape = choiceShape;
1025  }
1026 
1027  template<typename T>
1028  void AOS<T>::setLookAheadDepth(int lookAheadDepth) {
1029  look_ahead_depth = lookAheadDepth;
1030  }
1031 
1032  template<typename T>
1033  void AOS<T>::setQ(int q) {
1034  AOS::q = q;
1035  }
1036 
1037  template<typename T>
1038  bool AOS<T>::select_gamma_equal(const AOS::select_gamma *a, const AOS::select_gamma *b) {
1039  int i = 0;
1040  int eq = 0;
1041 
1042  if(a->level != b->level){
1043  return false;
1044  }
1045  for(i = 0; i < a->level; i++){
1046  if(a->fnames[i] == b->fnames[i]) eq++;
1047  else break;
1048  }
1049  return (eq == a->level);
1050  }
1051 
1052  /***********************************************************
1053  * HASH FUNCTIONS *
1054  ***********************************************************/
1055 
1056  /*----------------------------------------------------------*
1057  * creates a fresh new hash *
1058  *----------------------------------------------------------*/
1059 
1060  template<typename T>
1061  AOS<T>::Hash::Hash(size_t length, size_t width) {
1062  size_t i, j;
1063 
1064  this->length = length;
1065  this->width = width;
1066 
1067  elements = new AOS<T>::select_gamma **[length];
1068 
1069  for (i = 0; i < length; i++) {
1070  elements[i] = new AOS<T>::select_gamma *[width];
1071 
1072  for(j = 0; j < width; j++){
1073  elements[i][j] = nullptr;
1074  }
1075  }
1076  }
1077 
1078  /*----------------------------------------------------------*
1079  * insert an element into my hash *
1080  *----------------------------------------------------------*/
1081 
1082  template<typename T>
1083  bool AOS<T>::Hash::add(AOS<T>::select_gamma *elmt) {
1084  unsigned int i = 0;
1085  int index = 0;
1086  double func = 0;
1087 
1088  /*error check*/
1089  if (elmt == nullptr || this->elements == nullptr)
1090  return false;
1091 
1092  /*hashing function*/
1093  func = 0;
1094  for (i = 0; i < elmt->level; ++i)
1095  func += std::pow(elmt->fnames[i], 2);
1096 
1097  index = (int) fmod(func, this->length);
1098 
1099  /*skiping equals*/
1100  i = 0;
1101  while (i < this->width && this->elements[index][i] != nullptr) {
1102  /*check equality between nodes*/
1103  if (select_gamma_equal(elmt, this->elements[index][i])) {
1104  /*this node is identical to some other node*/
1105  /*check if this node has real gamma*/
1106  if (this->elements[index][i]->rgamma < 0) {
1107  /*keep node with highest projected value*/
1108  if (this->elements[index][i]->value < elmt->value) {
1109  this->elements[index][i]->pgamma = elmt->pgamma;
1110  this->elements[index][i]->value = elmt->value;
1111  }
1112  }
1113 
1114  /*free stuff*/
1115  if(elmt) delete elmt;
1116  elmt = nullptr;
1117  return false;
1118  } else {
1119  /*
1120  printf("CRASH! (%d) ",index);
1121  printf("[");
1122  for(j = 0; j< hash->elements[index][i]->level; ++j)
1123  printf("%d,",hash->elements[index][i]->fnames[j]);
1124  printf("]\n");
1125  */
1126  }
1127  /*increment*/
1128  i++;
1129  }
1130 
1131  /*adding element*/
1132  if (i >= this->width) {
1133  int filled = 0;
1134  for (i = 0; i < this->length; ++i)
1135  if (this->elements[i][0] == nullptr) filled++;
1136 
1137  std::cerr << "NEED RE-HASH! (just failed) (" << filled << "/" << this->length << ") = "
1138  << ((double) filled) /
1139  (this->length) * 100.0 << "%\n";
1140  exit(1);
1141  } else {
1142  this->elements[index][i] = elmt;
1143  conthash++;
1144  }
1145  return true;
1146  }
1147 
1148  /*----------------------------------------------------------*
1149  * erase an element from my hash *
1150  *----------------------------------------------------------*/
1151 
1152 // template<typename T>
1153 // void AOS<T>::Hash::set_null(AOS<T>::select_gamma *elmt) {
1154 // unsigned int i = 0, j = 0;
1155 // int index = 0;
1156 // double func = 0;
1157 //
1158 // /*error check*/
1159 // if (elmt == nullptr || this->elements == nullptr) return;
1160 //
1161 // /*hashing function*/
1162 // for (i = 0; i < elmt->level; ++i)
1163 // func += std::pow(elmt->fnames[i], 2);
1164 //
1165 // index = (unsigned int) fmod(func, this->length);
1166 //
1167 // /*finding it*/
1168 // i = 0;
1169 // while (i < this->width && this->elements[index][i] != nullptr) {
1170 // /*check equality between nodes*/
1171 // if (select_gamma_equal(elmt, this->elements[index][i])) {
1172 // /*shift elements*/
1173 // j = i + 1;
1174 // while (j < this->width && this->elements[index][j] != nullptr) {
1175 // this->elements[index][j - 1] = this->elements[index][j];
1176 // j++;
1177 // }
1178 // /*setting last element as null*/
1179 // this->elements[index][j - 1] = nullptr;
1180 //
1181 // break;
1182 // }
1183 // i++;
1184 // }
1185 // }
1186 
1187  /*----------------------------------------------------------*
1188  * print hash table *
1189  *----------------------------------------------------------*/
1190 
1191 // template<typename T>
1192 // void AOS<T>::Hash::print(int dim) {
1193 // size_t i = 0, j = 0, k = 0, d = 0;
1194 // int cont = 0;
1195 //
1196 // for (i = 0; i < this->length; ++i)
1197 // for (j = 0; j < this->width; ++j)
1198 // if (this->elements[i][j] != nullptr) {
1199 // cont++;
1200 // }
1201 // std::cout << "Cont = " << cont << "\n";
1202 // if (dim <= 10)
1203 // for (d = 0; d < dim; d++) {
1204 // for (i = 0; i < this->length; ++i)
1205 // for (j = 0; j < this->width; ++j)
1206 // if (this->elements[i][j] != nullptr)
1207 // if (this->elements[i][j]->level == d) {
1208 // for (k = 0; k < (this->elements[i][j]->level) - 1; ++k)
1209 // std::cout << this->elements[i][j]->fnames[k] << ",";
1210 // std::cout << this->elements[i][j]->fnames[k] << "\n";
1211 // }
1212 // std::cout << "\n";
1213 // }
1214 // }
1215 
1216  /*---------------------------------q-------------------------*
1217  * frees hash table *
1218  *----------------------------------------------------------*/
1219 
1220  template<typename T>
1221  AOS<T>::Hash::~Hash() {
1222  size_t i = 0;
1223 
1224  for (i = 0; i < this->length; i++) {
1225  delete[] this->elements[i];
1226  }
1227 
1228  delete[] this->elements;
1229  this->elements = nullptr;
1230  }
1231 
1232  template<typename T>
1233  size_t AOS<T>::Hash::getLength() const {
1234  return length;
1235  }
1236 
1237  template<typename T>
1238  void AOS<T>::Hash::setLength(size_t length) {
1239  AOS<T>::Hash::length = length;
1240  }
1241 
1242  template<typename T>
1243  unsigned int AOS<T>::Hash::getWidth() const {
1244  return width;
1245  }
1246 
1247  template<typename T>
1248  void AOS<T>::Hash::setWidth(size_t width) {
1249  AOS<T>::Hash::width = width;
1250  }
1251 
1252  template<typename T>
1253  unsigned int AOS<T>::Hash::getConthash() const {
1254  return conthash;
1255  }
1256 
1257  template<typename T>
1258  void AOS<T>::Hash::setConthash(size_t conthash) {
1259  AOS<T>::Hash::conthash = conthash;
1260  }
1261 
1262  template<typename T>
1263  bool AOS<T>::select_gamma::operator==(const AOS<T>::select_gamma other) const {
1264  unsigned int i = 0;
1265  int eq = 0;
1266 
1267  if (this->level != other.level)
1268  return false;
1269 
1270  for (i = 0; i < this->level; ++i) {
1271  if (this->fnames[i] == other.fnames[i]) eq++;
1272  else break;
1273  }
1274  return (eq == this->level);
1275  }
1276 
1277  /***********************************************************
1278  * HEAP FUNCTIONS *
1279  ***********************************************************/
1280 
1281  /*----------------------------------------------------------*
1282  * creates a heap *
1283  *----------------------------------------------------------*/
1284 
1285  template<typename T>
1286  AOS<T>::Heap::Heap() {
1287  size_t i = 0;
1288 
1289  this->elements = new AOS<T>::select_gamma *[MAX_HEAP + 1];
1290 
1291  if (this->elements == nullptr) {
1292  std::cerr << "Out of mem! Heap 2\n";
1293  exit(1);
1294  }
1295 
1296  for (i = 0; i < MAX_HEAP + 1; ++i) this->elements[i] = nullptr;
1297  this->size = 0;
1298  }
1299 
1300  /*----------------------------------------------------------*
1301  * Insert into heap *
1302  *----------------------------------------------------------*/
1303 
1304  template<typename T>
1305  bool AOS<T>::Heap::insert(AOS<T>::select_gamma *tok, int cont) {
1306  int i = 0;
1307  double val = 0;
1308 
1309  if (this->size == MAX_HEAP) {
1310  if (tok->value > this->elements[MAX_HEAP]->value)
1311  i = MAX_HEAP;
1312  else return false;
1313  } else i = ++(this->size);
1314 
1315  val = (this->elements[i / 2] != nullptr) ? this->elements[i / 2]->value : 0;
1316 
1317  while (i > 1 && val < tok->value) {
1318  this->elements[i] = this->elements[i / 2];
1319  i /= 2;
1320  val = (this->elements[i / 2] != nullptr) ? this->elements[i / 2]->value : 0;
1321  }
1322  this->elements[i] = tok;
1323 
1324  if (this->size > this->maxheapsize)
1325  this->maxheapsize = this->size;
1326 
1327  if (cont) this->contheap++;
1328  else this->contheapreins++;
1329  return true;
1330  }
1331 
1332  /*----------------------------------------------------------*
1333  * Returns top element *
1334  *----------------------------------------------------------*/
1335 
1336  template<typename T>
1337  typename AOS<T>::select_gamma *AOS<T>::Heap::pop() {
1338  AOS<T>::select_gamma *min_element = nullptr;
1339 
1340  if (this->size == 0) {
1341  std::cerr << "Tried to pop an empty heap!\n";
1342  return nullptr;
1343  }
1344 
1345  min_element = this->elements[1];
1346 
1347  this->size--;
1348  this->percolate(1);
1349 
1350  return min_element;
1351  }
1352 
1353  /*----------------------------------------------------------*
1354  * Prints heap *
1355  *----------------------------------------------------------*/
1356 
1357  template<typename T>
1358  void AOS<T>::Heap::print() {
1359  int i = 0, j = 0;
1360  std::vector<int> fnames;
1361  AOS<T>::select_gamma *curr = nullptr;
1362 
1363  if (this->elements == nullptr) return;
1364 
1365  for (i = 1; i <= this->size; ++i) {
1366  curr = this->elements[i];
1367  fnames = curr->fnames;
1368  std::cout << "Heap[" << i << "] --";
1369  if (curr->level <= 50) {
1370  std::cout << " (";
1371  for (j = 0; j < curr->level - 1; ++j)
1372  std::cout << fnames[j] << ",";
1373  std::cout << fnames[j] << ")";
1374  }
1375  std::cout << " Value: " << curr->value << ",";
1376  std::cout << " rMargem: " << curr->rgamma << ",";
1377  std::cout << " pMargem: " << curr->pgamma << ",";
1378  std::cout << " Nivel: " << curr->level << "\n";
1379  }
1380  }
1381 
1382  /*----------------------------------------------------------*
1383  * Cont projected margin nodes in heap *
1384  *----------------------------------------------------------*/
1385 
1386  template<typename T>
1387  size_t AOS<T>::Heap::projected() {
1388  size_t i = 0, projected = 0;
1389  AOS<T>::select_gamma *curr = nullptr;
1390 
1391  if (this->elements == nullptr) return 0;
1392 
1393  for (i = 1; i <= this->size; ++i) {
1394  curr = this->elements[i];
1395  if (curr->rgamma < 0)
1396  projected++;
1397  }
1398  return projected;
1399  }
1400 
1401  /*----------------------------------------------------------*
1402  * percolates *
1403  *----------------------------------------------------------*/
1404 
1405  template<typename T>
1406  void AOS<T>::Heap::percolate(size_t i) {
1407  size_t child = i * 2;
1408  AOS<T>::select_gamma *last_element = nullptr;
1409  last_element = this->elements[this->size + 1];
1410 
1411  if (child > this->size) {
1412  this->elements[i] = last_element;
1413  return;
1414  }
1415 
1416  if (child != this->size && this->elements[child + 1]->value > this->elements[child]->value)
1417  child++;
1418 
1419  /*percolate one level*/
1420  if (last_element->value < this->elements[child]->value) {
1421  this->elements[i] = this->elements[child];
1422  this->percolate(child);
1423  } else this->elements[i] = last_element;
1424  }
1425 
1426  /*----------------------------------------------------------*
1427  * removes old levels *
1428  *----------------------------------------------------------*/
1429 
1430  template<typename T>
1431  void AOS<T>::Heap::cut(std::shared_ptr<AOS<T>::Hash> hash, int levelat, int cut, double g_margin, int verbose) {
1432  size_t i = 0, count = 0;
1433  AOS<T>::select_gamma *curr = nullptr;
1434 
1435  for (i = this->size; i > 0; --i) {
1436  curr = this->elements[i];
1437  if (curr->level < levelat)
1438  if (curr->value < (1 - NUM_ERROR_EPS) * g_margin || levelat - curr->level >= cut) {
1439  /*errase it from hash*/
1440  if(curr) delete curr;
1441  curr = nullptr;
1442  /*percolate heap*/
1443  this->size--;
1444  this->percolate(i);
1445  count++;
1446  }
1447  }
1448  if (verbose) std::cout << " [Removed nodes from pruning: " << count << "]\n";
1449 
1450  contprooning += count;
1451  }
1452 
1453  /*----------------------------------------------------------*
1454  * Frees heap *
1455  *----------------------------------------------------------*/
1456 
1457  template<typename T>
1458  AOS<T>::Heap::~Heap() {
1459  size_t i = 0;
1460  for (i = 1; i <= this->size; ++i) {
1461  if(this->elements[i])delete this->elements[i];
1462  this->elements[i] = nullptr;
1463  }
1464  delete[] this->elements;
1465  this->elements = nullptr;
1466  }
1467 
1468  template<typename T>
1469  size_t AOS<T>::Heap::getSize() const {
1470  return size;
1471  }
1472 
1473  template<typename T>
1474  typename AOS<T>::select_gamma **AOS<T>::Heap::getElements() const {
1475  return elements;
1476  }
1477 
1478  template<typename T>
1479  size_t AOS<T>::Heap::getContheap() const {
1480  return contheap;
1481  }
1482 
1483  template<typename T>
1484  size_t AOS<T>::Heap::getContheapreins() const {
1485  return contheapreins;
1486  }
1487 
1488  template<typename T>
1489  size_t AOS<T>::Heap::getContprooning() const {
1490  return contprooning;
1491  }
1492 
1493  template<typename T>
1494  size_t AOS<T>::Heap::getMaxheapsize() const {
1495  return maxheapsize;
1496  }
1497 
1498  template<typename T>
1499  bool AOS<T>::select_weight::operator==(AOS::select_weight other) const {
1500  return (fabs(other.w) > fabs(other.w)) - (fabs(other.w) < fabs(other.w));
1501  }
1502  }
1503 }
PointPointer< T > point(int index) const
Returns a shared pointer to the point with the given index.
Definition: Data.hpp:1510
size_t size() const
Returns the size of the dataset.
Definition: Data.hpp:208
const std::vector< int > classes() const
Returns a vector containing the numeric values of the classes.
Definition: Data.hpp:1831
std::vector< int > getFeaturesNames() const
Returns the features names.
Definition: Data.hpp:1675
size_t dim() const
Returns the dimension of the dataset.
Definition: Data.hpp:213
mltk::Data< T > copy() const
Returns a copy of itself.
Definition: Data.hpp:1551
std::vector< size_t > classesDistribution() const
Returns a vector containing the frequency of the classes. Only valid for classification datasets.
Definition: Data.hpp:1826
mltk::dMatrix * generateMatrixHwithoutDim(std::shared_ptr< Data< T > > samples, int dim)
compute Compute the H matrix without a dimension, with the computed kernel matrix and given samples.
Definition: Kernel.hpp:196
Definition: classifier/Classifier.hpp:17
Definition: AOS.hpp:53
Definition: AOS.hpp:82
Definition: AOS.hpp:18
Data< T > selectFeatures() override
Function that executes the feature selection phase.
Definition: AOS.hpp:210
Definition: featselect/FeatureSelection.hpp:17
int skip
Number of levels to be skipped.
Definition: featselect/FeatureSelection.hpp:44
int verbose
Verbose level.
Definition: featselect/FeatureSelection.hpp:48
classifier::Classifier< double > * classifier
Classifier used by the method.
Definition: featselect/FeatureSelection.hpp:23
int final_dim
Final dimension.
Definition: featselect/FeatureSelection.hpp:40
validation::CrossValidation * cv
Structure to hold the cross-validation result.
Definition: featselect/FeatureSelection.hpp:25
const double NUM_ERROR_EPS
Error tolerance constant.
Definition: featselect/FeatureSelection.hpp:36
std::shared_ptr< Data< double > > samples
Attributes.
Definition: featselect/FeatureSelection.hpp:21
A helper class to measure execution time for benchmarking purposes.
Definition: ThreadPool.hpp:503
double distCenters(const Data< T > &data, int feat)
Compute the distance between the centers of binary classes without given features.
Definition: Statistics.hpp:229
double radius(const Data< T > &data, int feat, double q)
Returns radius of the ball that circ. the data.
Definition: Statistics.hpp:186
ValidationReport kfold(Data< T > sample, classifier::Classifier< T > &classifier, size_t fold, bool stratified=true, size_t seed=0, int verbose=0)
Executes k-fold stratified cross-validation.
Definition: valid/Validation.hpp:312
UFJF-MLTK main namespace for core functionalities.
Definition: classifier/Classifier.hpp:11
T min(const Point< T, R > &p)
Returns the min value of the point.
Definition: Point.hpp:557
Structure to manage cross validation.
Definition: valid/Validation.hpp:62
double accuracy
Accuracy of the validated model.
Definition: valid/Validation.hpp:24