6 #define MAX_HEAP 500000
7 #define NUM_ERROR_EPS 0.05
8 #define FIRST_DECAY 0.5
12 #include "FeatureSelection.hpp"
13 #include "ufjfmltk/Core.hpp"
16 namespace featselect {
17 template<
typename T =
double>
35 std::vector<int> fnames;
46 std::vector<double> w;
55 size_t length = 0, width = 0, conthash = 0;
59 Hash(
size_t length,
size_t width);
67 size_t getLength()
const;
69 void setLength(
size_t length);
71 unsigned int getWidth()
const;
73 void setWidth(
size_t width);
75 unsigned int getConthash()
const;
77 void setConthash(
size_t conthash);
84 size_t size = 0, contheap = 0, contheapreins = 0, maxheapsize = 0, contprooning = 0;
92 size_t getMaxheapsize()
const;
94 size_t getContheap()
const;
96 size_t getContheapreins()
const;
98 size_t getContprooning()
const;
100 size_t getSize()
const;
110 void percolate(
size_t i);
112 void cut(std::shared_ptr<AOS::Hash> hash,
int levelat,
int cut,
double g_margin,
int verbose);
117 void setBreadth(
int breadth);
119 void setCut(
int cut);
121 void setSortingShape(
int sortingShape);
123 void setChoiceShape(
int choiceShape);
125 void setLookAheadDepth(
int lookAheadDepth);
128 const size_t HASH_SIZE = 161387, HASH_WIDTH = 100;
129 int breadth = 0, cut = 0, sorting_shape = 0, choice_shape = 0, startover = 0, look_ahead_depth = 0, ftime = 0, lool = 0;
130 int dim_orig = 1, startdim;
131 int contheap_parcial, contheapreins_parcial, conthash_parcial, contprooning_parcial, maxheapsize_parcial,
132 contnaoheap_parcial, conthashnaoheap_parcial, contexpandidos_parcial, contprojetados_parcial,
133 contprojtreinados_parcial, sobraprojecoes_parcial;
134 int contexpanded = 0, contnotheap = 0, contprojected = 0, contprojtrained = 0, conthashnotheap = 0;
135 double bonus = 0.0, n0 = 1, max_time = 0, kfolderror = 0, g_margin = 0;
136 double partial_margin, partial_svs, partial_time, partial_dim;
137 double START_TIME, initial_time, max_time_orig;
138 bool doleave_oo =
false;
139 double mult_tempo = 2;
141 void setMultTempo(
double multTempo) {
142 mult_tempo = multTempo;
147 mltk::KernelType kernel_type = INNER_PRODUCT;
149 std::string filename;
150 std::unique_ptr<Heap> heap;
151 std::shared_ptr<Hash> hash;
152 std::shared_ptr<Data<T> > stmp_partial{
nullptr};
155 static bool select_gamma_equal(
const select_gamma *a,
const select_gamma *b);
161 int breadth = 3,
int sorting_shape = 1,
162 int choice_shape = 1,
int look_ahead_depth = 0,
int cut = 0,
int skip = 1,
double bonus = 0,
163 int startover = 999999,
double g_margin = 0,
bool doleave_oo = 0,
int verbose = 0);
167 void mainLoop(
Data<T>& sample);
169 double lookAhead(
Data<T>& sample, std::vector<int> fnames_orig, std::vector<double> w_orig,
int level_orig);
175 AOS<T>::AOS(
const Data<T>& samples, classifier::Classifier<T> *classifier,
int final_dim,
176 typename validation::CrossValidation *cv,
int breadth,
177 int sorting_shape,
int choice_shape,
int look_ahead_depth,
int cut,
int skip,
double bonus,
178 int startover,
double g_margin,
bool doleave_oo,
int verbose) {
180 this->cv =
new validation::CrossValidation;
182 this->samples = mltk::make_data<T>(samples);
183 this->classifier = classifier;
186 this->cv =
new validation::CrossValidation();
187 this->cv->seed = std::vector<unsigned int>(1, 0);
190 this->cv->jump = this->jump;
192 this->breadth = breadth;
193 this->depth = this->samples->dim()-final_dim;
196 this->look_ahead_depth = look_ahead_depth;
198 this->startover = startover;
199 this->g_margin = g_margin;
200 this->leave_one_out = (bool) doleave_oo;
201 this->sorting_shape = sorting_shape;
202 this->choice_shape = choice_shape;
203 this->verbose = verbose;
205 classifier->setVerbose(0);
214 int dim = this->samples->
dim();
215 int startdim = dim_orig = dim;
216 std::vector<int> fnames;
220 std::shared_ptr<Data<T>> stmp = make_data<T>();
221 max_time_orig = this->max_time;
222 this->startdim = dim;
227 this->initial_time = this->
timer.elapsed();
230 if (this->breadth > this->MAX_BREATH) { this->breadth = this->MAX_BREATH; }
231 if (this->depth > dim - 1) { this->depth = dim - 1; }
232 if (this->depth > this->MAX_DEPTH) { this->depth = this->MAX_DEPTH; }
235 this->hash = std::make_shared<AOS<T>::Hash>(this->HASH_SIZE, this->HASH_WIDTH);
236 this->heap = std::make_unique<AOS<T>::Heap>();
239 if (this->cv->qtde > 0) {
241 this->cv->seed.resize(this->cv->qtde);
242 for (i = 0; i < this->cv->qtde; i++)
243 this->cv->seed[i] = (
size_t) i;
244 this->cv->initial_error = 0;
245 this->cv->actual_error = 0;
248 stmp_partial = mltk::make_data<T>(this->samples->copy());
254 if (this->heap->getSize() == 0) {
258 std::cout <<
"End of Heap, recovering last dimension...\n\n";
261 std::cout <<
"---------------\n :: FINAL :: \n---------------\n";
262 std::cout <<
"Chosen Features: ";
263 std::vector<int> fnamesp = this->stmp_partial->getFeaturesNames();
264 for (i = 0; i < this->stmp_partial->dim() - 1; ++i)
265 std::cout << fnamesp[i] <<
", ";
266 std::cout << fnamesp[i] << std::endl;
269 if (this->cv->qtde > 0) {
270 if (level % this->cv->jump != 0) {
271 for (this->cv->actual_error = 0, i = 0; i < this->cv->qtde; i++) {
272 this->cv->actual_error += 100-
validation::kfold(*this->stmp_partial, *this->classifier,
273 this->cv->fold,
true, this->cv->seed[i], 0).
accuracy;
275 this->kfolderror = this->cv->actual_error / this->cv->qtde;
277 std::cout <<
"Dim: " << this->partial_dim <<
", Margin: " << this->partial_margin
278 <<
", SVs: " << this->partial_svs
279 <<
", Error " << this->cv->fold <<
"-fold: " << this->kfolderror <<
"%\n";
281 std::cout <<
"Dim: " << this->partial_dim <<
", Margin: -" << this->partial_margin
282 <<
", SVs: " << this->partial_svs
285 std::cout <<
"Total insertions in Heap: " << this->contheap_parcial << std::endl;
286 std::cout <<
"Total reinsertions in Heap: " << this->contheapreins_parcial << std::endl;
287 std::cout <<
"Max size of the Heap: " << this->maxheapsize_parcial << std::endl;
288 std::cout <<
"Total prunes in the Heap: " << this->contprooning_parcial << std::endl;
289 std::cout <<
"Expanded nodes: " << this->contexpandidos_parcial << std::endl;
290 std::cout <<
"Not inserted in Heap: " << this->contnaoheap_parcial << std::endl;
291 std::cout <<
"Total of projections: " << this->contprojetados_parcial << std::endl;
292 std::cout <<
"Total of trained projections: " << this->contprojtreinados_parcial << std::endl;
293 std::cout <<
"Total of non-trained projections: " << this->contprojetados_parcial
294 - this->contprojtreinados_parcial
297 std::cout <<
"Equal nodes in Hash that didn't entered in the Heap: "
298 << this->conthashnaoheap_parcial
300 std::cout <<
"Hash size: " << this->conthash_parcial << std::endl;
301 std::cout <<
"Total time: " << this->partial_time << std::endl;
309 if (this->breadth > dim) this->breadth = dim;
312 this->mainLoop(*this->samples);
313 std::cout << this->heap->getSize() << std::endl;
314 if (this->heap->getSize() == 0) {
315 std::cout <<
"Initial training failed!!!\n\n";
323 level = this->heap->getElements()[1]->level;
324 fnames = this->heap->getElements()[1]->fnames;
325 stmp->copy(*this->samples);
326 stmp->removeFeatures(fnames);
329 this->n0 = this->max_time *= FIRST_DECAY;
332 this->n0 * std::exp(-stmp->getTime_mult() * ((
double) startdim / (startdim - level)));
335 if (this->samples->dim() == dim - this->depth && this->heap->getElements()[1]->rgamma > 0) {
336 std::cout <<
"---------------\n :: FINAL :: \n---------------\n";
337 std::cout <<
"Chosen Features: ";
338 std::vector<int> fnamesp = stmp->getFeaturesNames();
339 for (i = 0; i < stmp->dim() - 1; ++i)
340 std::cout << fnamesp[i] <<
",";
341 std::cout << fnamesp[i] << std::endl;
343 std::cout <<
"---------------\nRemoved Features: ";
344 for (i = 0; i < this->samples->dim() - stmp->dim() - 1; ++i)
345 std::cout << this->heap->getElements()[1]->fnames[i] <<
", ";
346 std::cout << this->heap->getElements()[1]->fnames[i] << std::endl;
348 if (this->cv->qtde > 0) {
349 for (this->cv->actual_error = 0, i = 0; i < this->cv->qtde; i++)
350 this->cv->actual_error += 100-
validation::kfold(*stmp, *this->classifier, this->cv->fold,
true,
352 this->kfolderror = this->cv->actual_error / this->cv->qtde;
353 std::cout <<
"Dim: " << stmp->dim() <<
", Margin: "
354 << this->heap->getElements()[1]->rgamma
355 <<
", SVs: " << this->heap->getElements()[1]->sv <<
", Error " << this->cv->fold
356 <<
"-fold: " << this->kfolderror <<
"%\n";
358 std::cout <<
"Dim: " << stmp->dim() <<
", Margin: "
359 << this->heap->getElements()[1]->rgamma
360 <<
", SVs: " << this->heap->getElements()[1]->sv <<
"\n";
362 std::cout <<
"Total insertions in the Heap: " << this->heap->getContheap() << std::endl;
363 std::cout <<
"Total reinsertions in the Heap: " << this->heap->getContheapreins() << std::endl;
364 std::cout <<
"Max size of the Heap: " << this->heap->getMaxheapsize() << std::endl;
365 std::cout <<
"Total prune in the Heap: " << this->heap->getContprooning() << std::endl;
366 std::cout <<
"Expanded nodes: " << this->contexpanded << std::endl;
367 std::cout <<
"Not inserted in Heap: " << this->contnotheap << std::endl;
368 std::cout <<
"Total of projections: " << this->contprojected << std::endl;
369 std::cout <<
"Total of trained projections: " << this->contprojtrained << std::endl;
370 std::cout <<
"Total of trained projections: " << this->contprojected - this->contprojtrained
373 std::cout <<
"Equal nodes in Hash that didn't entered in the Heap: " << this->conthashnotheap
375 std::cout <<
"Hash size: " << this->hash->getConthash() << std::endl;
376 std::cout <<
"Total time: "
377 << (((100.0f * clock() / CLOCKS_PER_SEC) - this->initial_time) / 100.0f) << std::endl;
380 stmp->write(this->filename,
"data");
385 if (this->verbose && level <= 50) {
386 std::cout <<
"-- Testing node - Features (";
387 for (i = 0; i < level - 1; ++i)
388 std::cout << fnames[i] <<
",";
389 std::cout << fnames[i] <<
")\n";
390 std::cout <<
"------------------------------------------\n";
394 tbreadth = this->breadth;
395 if (tbreadth > stmp->dim()) tbreadth = stmp->dim();
398 auto stmp_copy = stmp->copy();
399 this->mainLoop(stmp_copy);
407 if (this->verbose > 1) {
411 std::cout <<
"-- Heap Size: " << this->heap->getSize();
412 std::cout <<
"--------\n";
419 return *this->stmp_partial;
421 this->stmp_partial.reset();
432 std::vector<int> ofnames;
433 std::vector<double> w, w_manut;
434 size_t i = 0, j = 0, k = 0;
443 double leave_oo = -1;
444 size_t dim = sample.
dim();
445 size_t size = sample.
size();
447 std::vector<AOS<T>::select_weight> weight(dim);
448 bool isPrimal = this->classifier->getFormulationString() ==
"Primal";
449 auto data_copy = sample.
copy();
455 this->classifier->setSamples(sample);
457 if (this->heap->getSize() == 0) {
458 if(!this->classifier->train()){
459 std::cerr <<
"Training failed!" << std::endl;
461 auto solution = this->classifier->getSolution();
463 margin = solution.margin;
464 svcount = solution.svs;
466 max_time /= mult_tempo;
469 gtmp = this->heap->pop();
471 if (gtmp->level > this->depth) {
477 if (gtmp->rgamma > 0) {
479 margin = gtmp->rgamma;
482 margin = gtmp->pgamma;
484 this->classifier->getSolutionRef()->bias = gtmp->bias;
485 this->classifier->setSamples(sample);
487 if (!this->classifier->train()) {
489 if (this->verbose > 1) std::cout <<
"Training failed!\n";
493 auto solution = this->classifier->getSolution();
495 margin = solution.margin;
496 svcount = solution.svs;
498 if (this->choice_shape == 2)
501 gtmp->value = margin;
502 gtmp->rgamma = margin;
505 this->contprojtrained++;
508 if ((gtmp->value < this->heap->getElements()[1]->value) ||
509 (gtmp->level == this->depth)) {
512 this->heap->insert(gtmp, 0);
517 this->contexpanded++;
519 ofnames.resize((
size_t) gtmp->level);
520 ofnames = gtmp->fnames;
522 omargin = gtmp->pgamma;
525 if (level > this->lool) {
534 std::cout <<
"-> Expanding features (";
535 for (i = 0; i < level - 1; ++i)
536 std::cout << ofnames[i] <<
",";
537 std::cout << ofnames[i];
538 std::cout <<
") -- Margin: " << margin <<
", pMargin: " << omargin <<
", Level: " << level <<
"\n";
542 if ((this->lool) == level && (loolflag == 1 || level == 0)) {
544 if (this->cv->qtde > 0) {
546 for (this->cv->initial_error = 0, i = 0; i < this->cv->qtde; i++)
548 this->cv->fold,
true, this->cv->seed[i], 0).
accuracy;
549 kfolderror = this->cv->initial_error / this->cv->qtde;
550 }
else if (level % this->cv->jump == 0) {
551 for (this->cv->actual_error = 0, i = 0; i < this->cv->qtde; i++)
553 this->cv->fold,
true, this->cv->seed[i], 0).
accuracy;
554 kfolderror = this->cv->actual_error / this->cv->qtde;
558 if (this->doleave_oo) {
560 std::cout <<
"Leave One Out -- Dim: " << this->startdim - level <<
", Margin: " << margin
562 << leave_oo <<
", SVs: " << svcount <<
", Time: "
563 << ((100.0f * clock() / CLOCKS_PER_SEC) - this->initial_time) / 100.0f <<
"\n";
566 std::cout <<
"--- --- --- --- --- --- --- ---\n";
567 if (this->cv->qtde > 0 && level % this->cv->jump == 0)
568 std::cout <<
"Dim: " << (this->startdim - level) <<
", Margin: " << margin <<
", SVs: "
570 <<
", Error " << this->cv->fold <<
"-fold: " << this->kfolderror <<
", Time: "
571 << ((100.0f * clock() / CLOCKS_PER_SEC) - this->initial_time) / 100.0f;
573 std::cout <<
"Dim: " << (this->startdim - level) <<
", Margin: " << margin <<
", SVs: "
575 <<
", Time: " << ((100.0f * clock() / CLOCKS_PER_SEC) - this->initial_time) / 100.0f;
577 std::cout <<
" - Removed features: ";
578 for (j = 0; j < level - 1; j++)
579 std::cout << ofnames[j] <<
",";
580 std::cout << ofnames[j] << std::endl;
582 std::cout <<
"\nIns: " << this->heap->getContheap() <<
" / ";
583 std::cout <<
"ReIns: " << this->heap->getContheapreins() <<
" / ";
584 std::cout <<
"Max: " << this->heap->getMaxheapsize() <<
" / ";
585 std::cout <<
"Proonings: " << this->heap->getContprooning() <<
" / ";
586 std::cout <<
"Hash: " << this->hash->getConthash() <<
" / ";
587 std::cout <<
"Expanded: " << this->contexpanded <<
" / ";
588 std::cout <<
"Not trained: " << this->contprojected - this->contprojtrained <<
" / ";
589 std::cout <<
"\n--- --- --- --- --- --- --- ---\n";
593 this->partial_margin = margin;
594 this->partial_svs = svcount;
595 this->partial_time = ((100.0f * clock() / CLOCKS_PER_SEC) - this->initial_time) / 100.0f;
596 this->partial_dim = startdim - level;
597 this->contheap_parcial = this->heap->getContheap();
598 this->contheapreins_parcial = this->heap->getContheapreins();
599 this->conthash_parcial = this->hash->getConthash();
600 this->contprooning_parcial = this->heap->getContprooning();
601 this->maxheapsize_parcial = this->heap->getMaxheapsize();
602 this->contnaoheap_parcial = this->contnotheap;
603 this->conthashnaoheap_parcial = this->conthashnotheap;
604 this->contexpandidos_parcial = this->contexpanded;
605 this->contprojetados_parcial = this->contexpanded;
606 this->contprojtreinados_parcial = this->contprojtrained;
608 this->stmp_partial = mltk::make_data<T>();
609 this->stmp_partial->copy(data_copy);
610 if (look_ahead_depth > 0) {
618 g_margin = this->lookAhead(sample, ofnames, w, level);
625 this->heap->cut(this->hash, lool, cut, g_margin, this->verbose);
627 if (this->verbose > 2) {
628 std::cout <<
" (" << this->heap->getSize() <<
")\t";
629 for (j = 0; j < level; j++)
630 std::cout << ofnames[j] <<
" ";
634 for(i = 0; i < weight.size(); i++){
636 weight[i].indice = i;
638 weight[i].radius = -1;
639 weight[i].dcents = -1;
640 weight[i].golub = -1;
641 weight[i].fisher = -1;
644 if(sorting_shape == 2){
645 for(i = 0; i < dim; i++){
648 }
else if(sorting_shape == 3){
649 for(i = 0; i < dim; i++){
653 }
else if(sorting_shape == 4){
654 for(i = 0; i < dim; i++){
657 }
else if(sorting_shape == 5 || sorting_shape == 6){
658 int num_pos = 0, num_neg = 0;
660 auto classes = sample.
classes();
661 auto neg_id = std::find(classes.begin(), classes.end(), -1) - classes.begin();
662 auto pos_id = std::find(classes.begin(), classes.end(), 1) - classes.begin();
667 std::vector<double> avg_pos(dim), avg_neg(dim), sd_pos(dim), sd_neg(dim);
670 for(i = 0; i < dim; i++){
671 avg_neg[i] = 0; avg_pos[i] = 0;
672 for(j = 0; j < size; j++){
673 if(sample.
point(j)->Y() == -1){
674 avg_neg[i] += sample.
point(j)->X()[i];
676 avg_pos[i] += sample.
point(j)->X()[i];
679 avg_neg[i] /= num_neg;
680 avg_pos[i] /= num_pos;
683 for(i = 0; i < dim; i++){
684 sd_neg[i] = 0; sd_pos[i] = 0;
685 for(j = 0; j < size; j++){
686 if(sample.
point(j)->Y() == -1){
687 sd_neg[i] += std::pow(sample.
point(j)->X()[i]-avg_neg[i], 2);
689 sd_pos[i] += std::pow(sample.
point(j)->X()[i]-avg_pos[i], 2);
694 for(i = 0; i < dim; i++){
695 weight[i].golub = std::fabs(avg_pos[i] - avg_pos[i])/(std::sqrt(sd_pos[i]/num_pos-1)) +
696 std::sqrt(sd_neg[i]/num_neg-1);
697 weight[i].fisher = std::pow(avg_pos[i] - avg_neg[i], 2)/(sd_pos[i] + sd_neg[i]);
702 if(sorting_shape == 6){
703 std::sort(weight.begin(), weight.end(), [](
const auto& a,
const auto& b){
704 return (((std::fabs(a.w * a.fisher) > std::fabs(b.w * b.fisher)) -
705 (std::fabs(a.w * a.fisher) < std::fabs(b.w * b.fisher)))<=0)?false:true;
707 }
else if(sorting_shape == 5){
708 std::sort(weight.begin(), weight.end(), [](
const auto& a,
const auto& b){
709 return (((std::fabs(a.w * a.golub) > std::fabs(b.w * b.golub)) -
710 (std::fabs(a.w * a.golub) < std::fabs(b.w * b.golub))) <= 0)?false:true;
712 }
else if(sorting_shape == 4){
713 std::sort(weight.begin(), weight.end(), [](
const auto& a,
const auto& b){
714 return (((std::fabs(a.w * a.radius) > std::fabs(b.w * b.radius)) -
715 (std::fabs(a.w * a.radius) < std::fabs(b.w * b.radius))) <= 0)?false:true;
717 }
else if(sorting_shape == 3){
718 std::sort(weight.begin(), weight.end(), [](
const auto& a,
const auto& b){
719 return ((std::fabs((a.w * a.radius)/a.dcents) > std::fabs((b.w * b.radius)/b.dcents)) -
720 (std::fabs((a.w * a.radius)/a.dcents) < std::fabs((b.w * b.radius)/b.dcents))<=0)?false:true;
722 }
else if(sorting_shape == 2){
723 std::sort(weight.begin(), weight.end(), [](
const auto& a,
const auto& b){
724 return ((std::fabs(a.w / a.dcents) > std::fabs(b.w / b.dcents)) -
725 (std::fabs(a.w / a.dcents) < std::fabs(b.w / b.dcents)) <= 0)?false:true;
728 std::sort(weight.begin(), weight.end(), [](
const auto& a,
const auto& b){
729 return (((std::fabs(a.w)>std::fabs(b.w)) - (std::fabs(a.w)<std::fabs(b.w))) <= 0)?false:true;
733 if(kernel_type == INNER_PRODUCT){
737 kernel.compute(make_data<T>(sample));
738 wnorm = kernel.featureSpaceNorm(make_data<T>(sample));
742 for(i = 0; i < breadth; i++){
743 if(kernel_type == INNER_PRODUCT){
746 for(sumnorm = 0, j = 0; j < dim; j++){
748 sumnorm += std::pow(std::fabs(weight[i].w/wnorm)*margin, q);
751 tpmargin = std::pow(sumnorm, 1.0/q);
756 std::vector<double> alphaaux(size);
758 for(k = 0; k < size; ++k){
759 for(alphaaux[k] = 0, j = 0; j < size; ++j){
760 alphaaux[k] += sample.
point(j)->Alpha() * (*Hk)[k][j];
764 for(sumnorm = 0, k = 0; k < size; k++){
765 sumnorm += alphaaux[k] * sample.
point(k)->Alpha();
768 tpmargin = std::sqrt(sumnorm)/wnorm*margin;
771 gtmp =
new select_gamma;
774 if(choice_shape == 2){
775 gtmp->value = tpmargin*weight[i].dcents;
777 gtmp->value = tpmargin;
779 gtmp->pgamma = tpmargin;
783 gtmp->level = level+1;
784 gtmp->radius = weight[i].radius;
785 gtmp->dcents = weight[i].dcents;
786 gtmp->golub = weight[i].golub;
787 gtmp->fisher = weight[i].fisher;
790 if(kernel_type == INNER_PRODUCT){
793 for(k = 0, j = 0; k < dim; k++){
801 gtmp->fnames.resize(level+1);
803 for(j = 0; j < level; j++){
804 gtmp->fnames[j] = ofnames[j];
806 gtmp->fnames[j] = weight[i].fname;
809 std::sort(gtmp->fnames.begin(), gtmp->fnames.end(), [](
const auto& a,
const auto& b){
810 return (((a>b)-(a < b)) <= 0)?false:true;
812 if(choice_shape == 2){
813 if(i != 0 && tpmargin*weight[i].dcents < (1-NUM_ERROR_EPS)*(g_margin)){
815 contnotheap++;
continue;
818 if(i != 0 && tpmargin < (1-NUM_ERROR_EPS)*g_margin){
820 contnotheap++;
continue;
825 if(sorting_shape == 5 || sorting_shape == 6){
826 std::cout <<
" -- New Node - Feature " << weight[i].fname <<
", Value: " << gtmp->value <<
827 ", pMargin: " << gtmp->pgamma <<
", DCent: " << gtmp->dcents <<
", Radius: " << gtmp->radius <<
828 ", Golub: " << gtmp->golub <<
", Fisher: " << gtmp->fisher <<
", Level: " << gtmp->level <<
831 std::cout <<
" -- New Node - Feature " << weight[i].fname <<
", Value: " << gtmp->value <<
832 ", pMargin: " << gtmp->pgamma <<
", DCent: " << gtmp->dcents <<
", Radius: " << gtmp->radius <<
833 gtmp->level << std::endl;
839 heap->insert(gtmp, 1);
852 double AOS<T>::lookAhead(Data<T>& sample, std::vector<int> fnames_orig, std::vector<double> w_orig,
int level_orig) {
854 int level = level_orig;
858 bool isPrimal = this->classifier->getFormulationString() ==
"Primal";
859 std::vector<int> features((
unsigned int) look_ahead_depth + 1, 0);
863 std::vector<double> w = w_orig;
864 std::vector<double> novo_w;
865 auto stmp = make_data<T>(sample.copy());
866 select_gamma *gtmp =
nullptr;
867 double distcents = 0;
872 if (count == this->look_ahead_depth || count == sample.dim() - 1 ||
873 stmp->dim() == sample.dim() - this->depth || level == this->depth)
875 if (this->choice_shape == 2) {
878 feat = stmp->getFeaturesNames()[0];
879 for (i = 1; i < stmp->dim(); i++) {
883 if (fabs(w[i]) / distcents <
min) {
884 min = fabs(w[i]) / distcents;
885 feat = stmp->getFeaturesNames()[i];
891 feat = stmp->getFeaturesNames()[0];
892 for (i = 1; i < stmp->dim(); i++)
893 if (fabs(w[i]) <
min) {
895 feat = stmp->getFeaturesNames()[i];
901 size_t dim = sample.dim() - 1;
902 std::vector<int> fnames = sample.getFeaturesNames();
903 novo_w.reserve(dim - 1);
904 for (i = 0, j = 0; j < dim; ++j)
905 if (fnames[j] != feat)
906 novo_w.push_back(w[j]);
910 features[count] = feat;
913 if (*stmp != sample) stmp.reset();
916 stmp = make_data<T>(sample.removeFeatures(features, count+1));
919 this->n0 = this->max_time = max_time_orig * FIRST_DECAY;
921 this->max_time = n0 * std::exp(
922 -stmp->getTime_mult() * ((
double) this->dim_orig / (this->dim_orig - level - 1)));
925 gtmp =
new AOS<T>::select_gamma;
926 if (gtmp ==
nullptr) {
927 std::cerr <<
"Error: Out of memory 9\n" << std::endl;
933 gtmp->pgamma = solution.margin;
939 gtmp->level = level + 1;
941 if (this->choice_shape == 2)
949 gtmp->fnames.resize((
size_t) level + 1);
951 for (j = 0; j < level_orig; ++j) gtmp->fnames[j] = fnames_orig[j];
952 for (j = 0; j < count + 1; ++j) gtmp->fnames[level_orig + j] = features[j];
955 std::sort(gtmp->fnames.begin(), gtmp->fnames.end(), std::greater<int>());
958 std::cout <<
" -- New look-ahead node - Features: " << std::endl;
959 for (i = 0; i < count; i++)
960 std::cout << features[i] <<
", ";
961 std::cout << features[i] << std::endl;
965 if (hash->add(gtmp)) {
968 solution.margin = gtmp->pgamma;
969 solution.bias = gtmp->bias;
971 this->classifier->setSolution(solution);
972 if (!this->classifier->train()) {
973 if (this->verbose) std::cout <<
"Training failed!\n";
977 gtmp->w = this->classifier->getSolutionRef()->w.X();
978 gtmp->bias = this->classifier->getSolutionRef()->bias;
979 gtmp->sv = this->classifier->getSolutionRef()->svs;
980 gtmp->rgamma = this->classifier->getSolutionRef()->margin;
982 if (this->choice_shape == 2)
983 gtmp->value = this->classifier->getSolutionRef()->margin * gtmp->dcents;
985 gtmp->value = this->classifier->getSolutionRef()->margin;
986 g_margin = gtmp->value;
987 this->heap->insert(gtmp, 1);
990 w.resize(stmp->dim());
991 w = this->classifier->getSolutionRef()->w.X();
1002 if (*stmp != sample) stmp.reset();
1007 template<
typename T>
1008 void AOS<T>::setBreadth(
int breadth) {
1009 AOS::breadth = breadth;
1012 template<
typename T>
1013 void AOS<T>::setCut(
int cut) {
1017 template<
typename T>
1018 void AOS<T>::setSortingShape(
int sortingShape) {
1019 sorting_shape = sortingShape;
1022 template<
typename T>
1023 void AOS<T>::setChoiceShape(
int choiceShape) {
1024 choice_shape = choiceShape;
1027 template<
typename T>
1028 void AOS<T>::setLookAheadDepth(
int lookAheadDepth) {
1029 look_ahead_depth = lookAheadDepth;
1032 template<
typename T>
1033 void AOS<T>::setQ(
int q) {
1037 template<
typename T>
1038 bool AOS<T>::select_gamma_equal(
const AOS::select_gamma *a,
const AOS::select_gamma *b) {
1042 if(a->level != b->level){
1045 for(i = 0; i < a->level; i++){
1046 if(a->fnames[i] == b->fnames[i]) eq++;
1049 return (eq == a->level);
1060 template<
typename T>
1061 AOS<T>::Hash::Hash(
size_t length,
size_t width) {
1064 this->length = length;
1065 this->width = width;
1067 elements =
new AOS<T>::select_gamma **[length];
1069 for (i = 0; i < length; i++) {
1070 elements[i] =
new AOS<T>::select_gamma *[width];
1072 for(j = 0; j < width; j++){
1073 elements[i][j] =
nullptr;
1082 template<
typename T>
1083 bool AOS<T>::Hash::add(AOS<T>::select_gamma *elmt) {
1089 if (elmt ==
nullptr || this->elements ==
nullptr)
1094 for (i = 0; i < elmt->level; ++i)
1095 func += std::pow(elmt->fnames[i], 2);
1097 index = (int) fmod(func, this->length);
1101 while (i < this->width && this->elements[index][i] !=
nullptr) {
1103 if (select_gamma_equal(elmt, this->elements[index][i])) {
1106 if (this->elements[index][i]->rgamma < 0) {
1108 if (this->elements[index][i]->value < elmt->value) {
1109 this->elements[index][i]->pgamma = elmt->pgamma;
1110 this->elements[index][i]->value = elmt->value;
1115 if(elmt)
delete elmt;
1132 if (i >= this->width) {
1134 for (i = 0; i < this->length; ++i)
1135 if (this->elements[i][0] ==
nullptr) filled++;
1137 std::cerr <<
"NEED RE-HASH! (just failed) (" << filled <<
"/" << this->length <<
") = "
1138 << ((double) filled) /
1139 (this->length) * 100.0 <<
"%\n";
1142 this->elements[index][i] = elmt;
1220 template<
typename T>
1221 AOS<T>::Hash::~Hash() {
1224 for (i = 0; i < this->length; i++) {
1225 delete[] this->elements[i];
1228 delete[] this->elements;
1229 this->elements =
nullptr;
1232 template<
typename T>
1233 size_t AOS<T>::Hash::getLength()
const {
1237 template<
typename T>
1238 void AOS<T>::Hash::setLength(
size_t length) {
1239 AOS<T>::Hash::length = length;
1242 template<
typename T>
1243 unsigned int AOS<T>::Hash::getWidth()
const {
1247 template<
typename T>
1248 void AOS<T>::Hash::setWidth(
size_t width) {
1249 AOS<T>::Hash::width = width;
1252 template<
typename T>
1253 unsigned int AOS<T>::Hash::getConthash()
const {
1257 template<
typename T>
1258 void AOS<T>::Hash::setConthash(
size_t conthash) {
1259 AOS<T>::Hash::conthash = conthash;
1262 template<
typename T>
1263 bool AOS<T>::select_gamma::operator==(
const AOS<T>::select_gamma other)
const {
1267 if (this->level != other.level)
1270 for (i = 0; i < this->level; ++i) {
1271 if (this->fnames[i] == other.fnames[i]) eq++;
1274 return (eq == this->level);
1285 template<
typename T>
1286 AOS<T>::Heap::Heap() {
1289 this->elements =
new AOS<T>::select_gamma *[MAX_HEAP + 1];
1291 if (this->elements ==
nullptr) {
1292 std::cerr <<
"Out of mem! Heap 2\n";
1296 for (i = 0; i < MAX_HEAP + 1; ++i) this->elements[i] =
nullptr;
1304 template<
typename T>
1305 bool AOS<T>::Heap::insert(AOS<T>::select_gamma *tok,
int cont) {
1309 if (this->size == MAX_HEAP) {
1310 if (tok->value > this->elements[MAX_HEAP]->value)
1313 }
else i = ++(this->size);
1315 val = (this->elements[i / 2] !=
nullptr) ? this->elements[i / 2]->value : 0;
1317 while (i > 1 && val < tok->value) {
1318 this->elements[i] = this->elements[i / 2];
1320 val = (this->elements[i / 2] !=
nullptr) ? this->elements[i / 2]->value : 0;
1322 this->elements[i] = tok;
1324 if (this->size > this->maxheapsize)
1325 this->maxheapsize = this->size;
1327 if (cont) this->contheap++;
1328 else this->contheapreins++;
1336 template<
typename T>
1337 typename AOS<T>::select_gamma *AOS<T>::Heap::pop() {
1338 AOS<T>::select_gamma *min_element =
nullptr;
1340 if (this->size == 0) {
1341 std::cerr <<
"Tried to pop an empty heap!\n";
1345 min_element = this->elements[1];
1357 template<
typename T>
1358 void AOS<T>::Heap::print() {
1360 std::vector<int> fnames;
1361 AOS<T>::select_gamma *curr =
nullptr;
1363 if (this->elements ==
nullptr)
return;
1365 for (i = 1; i <= this->size; ++i) {
1366 curr = this->elements[i];
1367 fnames = curr->fnames;
1368 std::cout <<
"Heap[" << i <<
"] --";
1369 if (curr->level <= 50) {
1371 for (j = 0; j < curr->level - 1; ++j)
1372 std::cout << fnames[j] <<
",";
1373 std::cout << fnames[j] <<
")";
1375 std::cout <<
" Value: " << curr->value <<
",";
1376 std::cout <<
" rMargem: " << curr->rgamma <<
",";
1377 std::cout <<
" pMargem: " << curr->pgamma <<
",";
1378 std::cout <<
" Nivel: " << curr->level <<
"\n";
1386 template<
typename T>
1387 size_t AOS<T>::Heap::projected() {
1388 size_t i = 0, projected = 0;
1389 AOS<T>::select_gamma *curr =
nullptr;
1391 if (this->elements ==
nullptr)
return 0;
1393 for (i = 1; i <= this->size; ++i) {
1394 curr = this->elements[i];
1395 if (curr->rgamma < 0)
1405 template<
typename T>
1406 void AOS<T>::Heap::percolate(
size_t i) {
1407 size_t child = i * 2;
1408 AOS<T>::select_gamma *last_element =
nullptr;
1409 last_element = this->elements[this->size + 1];
1411 if (child > this->size) {
1412 this->elements[i] = last_element;
1416 if (child != this->size && this->elements[child + 1]->value > this->elements[child]->value)
1420 if (last_element->value < this->elements[child]->value) {
1421 this->elements[i] = this->elements[child];
1422 this->percolate(child);
1423 }
else this->elements[i] = last_element;
1430 template<
typename T>
1431 void AOS<T>::Heap::cut(std::shared_ptr<AOS<T>::Hash> hash,
int levelat,
int cut,
double g_margin,
int verbose) {
1432 size_t i = 0, count = 0;
1433 AOS<T>::select_gamma *curr =
nullptr;
1435 for (i = this->size; i > 0; --i) {
1436 curr = this->elements[i];
1437 if (curr->level < levelat)
1438 if (curr->value < (1 -
NUM_ERROR_EPS) * g_margin || levelat - curr->level >= cut) {
1440 if(curr)
delete curr;
1448 if (
verbose) std::cout <<
" [Removed nodes from pruning: " << count <<
"]\n";
1450 contprooning += count;
1457 template<
typename T>
1458 AOS<T>::Heap::~Heap() {
1460 for (i = 1; i <= this->size; ++i) {
1461 if(this->elements[i])
delete this->elements[i];
1462 this->elements[i] =
nullptr;
1464 delete[] this->elements;
1465 this->elements =
nullptr;
1468 template<
typename T>
1469 size_t AOS<T>::Heap::getSize()
const {
1473 template<
typename T>
1474 typename AOS<T>::select_gamma **AOS<T>::Heap::getElements()
const {
1478 template<
typename T>
1479 size_t AOS<T>::Heap::getContheap()
const {
1483 template<
typename T>
1484 size_t AOS<T>::Heap::getContheapreins()
const {
1485 return contheapreins;
1488 template<
typename T>
1489 size_t AOS<T>::Heap::getContprooning()
const {
1490 return contprooning;
1493 template<
typename T>
1494 size_t AOS<T>::Heap::getMaxheapsize()
const {
1498 template<
typename T>
1499 bool AOS<T>::select_weight::operator==(AOS::select_weight other)
const {
1500 return (fabs(other.w) > fabs(other.w)) - (fabs(other.w) < fabs(other.w));
PointPointer< T > point(int index) const
Returns a shared pointer to the point with the given index.
Definition: Data.hpp:1510
size_t size() const
Returns the size of the dataset.
Definition: Data.hpp:208
const std::vector< int > classes() const
Returns a vector containing the numeric values of the classes.
Definition: Data.hpp:1831
std::vector< int > getFeaturesNames() const
Returns the features names.
Definition: Data.hpp:1675
size_t dim() const
Returns the dimension of the dataset.
Definition: Data.hpp:213
mltk::Data< T > copy() const
Returns a copy of itself.
Definition: Data.hpp:1551
std::vector< size_t > classesDistribution() const
Returns a vector containing the frequency of the classes. Only valid for classification datasets.
Definition: Data.hpp:1826
mltk::dMatrix * generateMatrixHwithoutDim(std::shared_ptr< Data< T > > samples, int dim)
compute Compute the H matrix without a dimension, with the computed kernel matrix and given samples.
Definition: Kernel.hpp:196
Definition: classifier/Classifier.hpp:17
Data< T > selectFeatures() override
Function that executes the feature selection phase.
Definition: AOS.hpp:210
Definition: featselect/FeatureSelection.hpp:17
int skip
Number of levels to be skipped.
Definition: featselect/FeatureSelection.hpp:44
int verbose
Verbose level.
Definition: featselect/FeatureSelection.hpp:48
classifier::Classifier< double > * classifier
Classifier used by the method.
Definition: featselect/FeatureSelection.hpp:23
int final_dim
Final dimension.
Definition: featselect/FeatureSelection.hpp:40
validation::CrossValidation * cv
Structure to hold the cross-validation result.
Definition: featselect/FeatureSelection.hpp:25
const double NUM_ERROR_EPS
Error tolerance constant.
Definition: featselect/FeatureSelection.hpp:36
std::shared_ptr< Data< double > > samples
Attributes.
Definition: featselect/FeatureSelection.hpp:21
A helper class to measure execution time for benchmarking purposes.
Definition: ThreadPool.hpp:503
double distCenters(const Data< T > &data, int feat)
Compute the distance between the centers of binary classes without given features.
Definition: Statistics.hpp:229
double radius(const Data< T > &data, int feat, double q)
Returns radius of the ball that circ. the data.
Definition: Statistics.hpp:186
ValidationReport kfold(Data< T > sample, classifier::Classifier< T > &classifier, size_t fold, bool stratified=true, size_t seed=0, int verbose=0)
Executes k-fold stratified cross-validation.
Definition: valid/Validation.hpp:312
UFJF-MLTK main namespace for core functionalities.
Definition: classifier/Classifier.hpp:11
T min(const Point< T, R > &p)
Returns the min value of the point.
Definition: Point.hpp:557
Structure to manage cross validation.
Definition: valid/Validation.hpp:62
double accuracy
Accuracy of the validated model.
Definition: valid/Validation.hpp:24