6 #include "FeatureSelection.hpp"
10 template<
typename T =
double>
15 struct golub_select_score {
20 static int golub_select_compare_score_greater(
const golub_select_score &a,
const golub_select_score &b);
32 this->samples = mltk::make_data<T>(samples);
33 this->classifier = classifier;
34 this->number = number;
40 size_t num_pos = 0, num_neg = 0, svs = 0, dim = this->samples->
dim(), size = this->samples->size();
43 std::vector<int> remove(dim - number), fnames;
44 std::vector<double> avg_neg(dim), avg_pos(dim), sd_neg(dim), sd_pos(dim), w;
45 std::vector<golub_select_score> scores(dim);
46 std::shared_ptr<Data<T> > stmp(std::make_shared<
Data<T> >()), stmp_partial(std::make_shared<
Data<T> >());
50 for (i = 0; i < dim; ++i) {
55 for (j = 0; j < size; ++j) {
56 if ((*this->samples)[j]->Y() == -1) {
57 avg_neg[i] += (*this->samples)[j]->X()[i];
60 avg_pos[i] += (*this->samples)[j]->X()[i];
64 avg_neg[i] /= num_neg;
65 avg_pos[i] /= num_pos;
69 for (i = 0; i < dim; ++i) {
72 for (j = 0; j < size; ++j) {
73 if ((*this->samples)[j]->Y() == -1)
74 sd_neg[i] += std::pow((*this->samples)[j]->X()[i] - avg_neg[i], 2);
75 else sd_pos[i] += std::pow((*this->samples)[j]->X()[i] - avg_pos[i], 2);
77 sd_neg[i] = sqrt(sd_neg[i] / (num_neg - 1));
78 sd_pos[i] = sqrt(sd_pos[i] / (num_pos - 1));
81 fnames = this->samples->getFeaturesNames();
84 for (i = 0; i < dim; ++i) {
85 scores[i].score = fabs(avg_pos[i] - avg_neg[i]) / (sd_pos[i] + sd_neg[i]);
86 scores[i].fname = fnames[i];
88 std::cout <<
"Score: " << scores[i].score <<
", Fname: " << scores[i].fname << std::endl;
90 if (this->verbose) std::cout <<
"----------------------------\n";
92 if (this->verbose) std::cout <<
"Dim: " << dim <<
" -- ";
95 this->classifier->setVerbose(0);
96 this->classifier->setGamma(margin);
97 this->classifier->setSamples(this->samples);
98 if (!this->classifier->train()) {
99 w.erase(w.begin(), w.end());
100 if (this->verbose) std::cout <<
"Training failed!\n";
103 sol = this->classifier->getSolution();
104 std::cout <<
"Training sucessful...\n";
105 std::cout <<
"Margin = " << sol.
margin <<
", Support Vectors = " << sol.
svs <<
"\n";
106 std::cout <<
"----------------------------\n";
109 std::sort(scores.begin(), scores.end(), golub_select_compare_score_greater);
111 stmp_partial->copy(*this->samples);
112 stmp->copy(*this->samples);
114 for (i = 0; i < (dim - this->number); ++i) {
115 if (this->verbose) std::cout <<
"Score: " << scores[i].score <<
", Fname: " << scores[i].fname <<
"\n";
116 remove[i] = scores[i].fname;
117 stmp->removeFeatures(remove);
120 std::cout <<
"Dim: " << dim - i - 1 <<
" -- ";
123 w.erase(w.begin(), w.end());
124 this->classifier->setGamma(margin);
125 this->classifier->setSamples(stmp);
126 if (!this->classifier->train()) {
127 if (this->verbose) std::cout <<
"Training failed!\n";
131 sol = this->classifier->getSolution();
132 std::cout <<
"Training sucessful...\n";
133 std::cout <<
"Margin = " << sol.
margin <<
", Support Vectors = " << sol.
svs <<
"\n";
134 std::cout <<
"----------------------------\n";
136 stmp_partial.reset();
137 stmp_partial = std::make_shared<Data<T> >();
138 stmp_partial->copy(*stmp);
143 return *stmp_partial;
145 stmp_partial.reset();
159 return a.score < b.score;
size_t dim() const
Returns the dimension of the dataset.
Definition: Data.hpp:213
Definition: Solution.hpp:13
unsigned int svs
Number of support Vectors.
Definition: Solution.hpp:31
double margin
Margin generated from the classifier that generated the solution.
Definition: Solution.hpp:27
Definition: classifier/Classifier.hpp:17
Definition: featselect/FeatureSelection.hpp:17
classifier::Classifier< double > * classifier
Classifier used by the method.
Definition: featselect/FeatureSelection.hpp:23
std::shared_ptr< Data< double > > samples
Attributes.
Definition: featselect/FeatureSelection.hpp:21
Data< T > selectFeatures() override
Function that executes the feature selection phase.
Definition: Golub.hpp:38
UFJF-MLTK main namespace for core functionalities.
Definition: classifier/Classifier.hpp:11