6 #define first_decay 0.25
8 #include "FeatureSelection.hpp"
12 namespace featselect {
13 template<
typename T =
double>
16 struct select_weight {
22 static int compare_weight_greater(
const select_weight &a,
const select_weight &b);
36 this->samples = mltk::make_data<T>(samples);
37 this->classifier = classifier;
38 this->depth = this->samples->
dim()-final_dim;
39 this->final_dim = final_dim;
45 this->cv->seed = std::vector<unsigned int>(1, 0);
48 this->cv->jump = this->jump;
50 this->leave_one_out = leave_one_out;
55 size_t dim = this->samples->
dim(), partial_dim = 0, i = 0, j = 0;
56 std::vector<int> features, partial_features, choosen_feats, fnames;
57 std::vector<double> w, new_w;
58 std::vector<select_weight> weight;
59 std::shared_ptr<Data<T> > stmp_partial, stmp(std::make_shared<
Data<T> >());
61 int svcount = 0, level = 0, leveljump = 0, partial_svs = 0;
64 double margin = 0, leave_oo = 0, kfolderror = 0, partial_time = 0, partial_margin = 0;
65 double START_TIME = 100.0f * clock() / CLOCKS_PER_SEC;
68 stmp->copy(*this->samples);
70 if (this->depth < 1 || this->depth >= dim) {
71 std::cerr <<
"Invalid depth!\n";
75 features.assign(this->depth, -1);
77 if (this->cv->qtde > 0) {
79 this->cv->seed.resize(this->cv->qtde);
80 for (i = 0; i < this->cv->qtde; i++)
81 this->cv->seed[i] = i;
82 this->cv->initial_error = 0;
83 this->cv->actual_error = 0;
94 n0 = max_time *= first_decay;
95 }
else if (level > 1) {
96 max_time = n0 * std::exp(-time_mult * ((
double) dim / (dim - level)));
104 if (!this->classifier->
train()) {
108 sol.
w.
X().erase(sol.
w.
X().begin(), sol.
w.
X().end());
110 if (this->verbose) std::cerr <<
"Training Failed!\n";
112 std::cout <<
"---------------\n :: FINAL :: \n---------------\n";
113 choosen_feats = stmp_partial->getFeaturesNames();
114 std::cout <<
"Choosen Features: ";
115 for (i = 0; i < stmp_partial->dim() - 1; ++i) std::cout << choosen_feats[i] <<
", ";
116 std::cout << choosen_feats[i] << std::endl;
118 if (this->cv->qtde > 0) {
119 if ((dim - partial_dim) % this->cv->jump != 0) {
120 for (this->cv->actual_error = 0, i = 0; i < this->cv->qtde; i++) {
122 this->cv->fold,
true, this->cv->seed[i], 0).
accuracy;
124 kfolderror = this->cv->actual_error / this->cv->qtde;
126 std::cout <<
"Dim: " << partial_dim <<
", Margin: " << partial_dim <<
", SVs: " << partial_svs
127 <<
", Error " << this->cv->fold <<
"-fold: " << kfolderror <<
"%\n";
129 std::cout <<
"Dim: " << partial_dim <<
", Margin: " << partial_dim <<
", SVs: " << partial_svs
131 std::cout <<
"---------------\nTotal time: " << partial_time <<
"\n\n";
141 partial_margin = margin;
142 partial_svs = svcount;
143 partial_time = (100.0f * clock() / CLOCKS_PER_SEC - START_TIME) / 100.0f;
144 partial_dim = dim - level;
146 stmp_partial.reset();
147 stmp_partial = std::make_shared<Data<T> >();
148 stmp_partial->copy(*this->samples);
150 partial_features.clear();
152 int levelminusjump = (level - this->jump);
154 if (levelminusjump > 0) {
155 partial_features.resize((
size_t) (level - this->jump));
158 for (i = 0; (i < levelminusjump && levelminusjump > 0); ++i) {
159 partial_features[i] = features[i];
162 if (this->cv->qtde > 0) {
164 for (this->cv->initial_error = 0, i = 0; i < this->cv->qtde; i++) {
165 auto report =
validation::kfold(*stmp, *this->classifier, this->cv->fold,
true, this->cv->seed[i], 0);
166 this->cv->initial_error = report.error;
168 kfolderror = this->cv->initial_error / this->cv->qtde;
169 }
else if (level % this->cv->jump == 0) {
170 for (this->cv->actual_error = 0, i = 0; i < this->cv->qtde; i++) {
171 this->cv->actual_error += 100-
validation::kfold(*stmp, *this->classifier, this->cv->fold,
true,
174 kfolderror = this->cv->actual_error / this->cv->qtde;
179 if (this->leave_one_out) {
181 std::cout <<
"LeaveOO -- Dim: " << (dim - level) <<
", Margin: " << margin <<
", LeaveOO: " << leave_oo
182 <<
", SVs: " << svcount << std::endl;
183 }
else if (this->verbose) {
184 if (this->cv->qtde > 0 && level % this->cv->jump == 0)
185 std::cout <<
"Dim: " << (dim - level) <<
", Margin: " << margin <<
", SVs: " << svcount <<
", Erro "
186 << this->cv->fold <<
"-fold: " << kfolderror <<
"%\n";
188 std::cout <<
"Dim: " << (dim - level) <<
", Margin: " << margin <<
", SVs: " << svcount << std::endl;
193 weight.resize(stmp->dim());
194 auto fnames = stmp->getFeaturesNames();
196 for (i = 0; i < stmp->dim(); ++i) {
198 weight[i].fname = fnames[i];
201 std::sort(weight.begin(), weight.end(), compare_weight_greater);
203 std::cout <<
"---------------------\n";
204 if (this->verbose > 1) {
205 for (i = 0; i < stmp->dim(); ++i)
206 std::cout << weight[i].fname <<
": " << weight[i].w << std::endl;
207 std::cout <<
"---------------------\n";
211 if (level >= this->depth ||
212 (this->cv->qtde > 0 && (this->cv->actual_error - this->cv->initial_error) > this->cv->limit_error)) {
213 std::cout <<
"---------------\n :: FINAL :: \n---------------\n";
214 choosen_feats = stmp->getFeaturesNames();
215 std::cout <<
"Choosen Features: ";
216 for (i = 0; i < stmp->dim() - 1; ++i) std::cout << choosen_feats[i] <<
",";
217 std::cout << choosen_feats[i] << std::endl;
219 std::cout <<
"---------------\nEliminated Features: ";
220 for (i = 0; i < leveljump - 1; ++i) std::cout << features[i] <<
",";
221 std::cout << features[i] << std::endl;
223 if (this->cv->qtde > 0) {
224 if (level % this->cv->jump != 0) {
225 for (this->cv->actual_error = 0, i = 0; i < this->cv->qtde; i++) {
226 this->cv->actual_error += 100-
validation::kfold(*stmp, *this->classifier, this->cv->fold,
true,
229 kfolderror = this->cv->actual_error / this->cv->qtde;
231 std::cout <<
"Dim: " << (dim - level) <<
", Margin: " << margin <<
", SVs: " << svcount <<
", Erro "
232 << this->cv->fold <<
"-fold: " << kfolderror <<
"%\n";
234 std::cout <<
"Dim: " << (dim - level) <<
", Margem: " << margin <<
", SVs: " << svcount << std::endl;
236 std::cout <<
"---------------\nTotal time: " << (100.0f * clock() / CLOCKS_PER_SEC - START_TIME) / 100.0f
244 if (level + this->jump > this->depth)
245 leveljump = this->depth;
247 leveljump = level + this->jump;
251 for (j = 0; j < stmp->dim(); ++j)
252 for (i = level; i < leveljump; ++i)
253 if (weight[i - level].w == w[j])
256 new_w.resize(dim - leveljump);
258 for (i = 0, j = 0; j < stmp->dim(); ++j)
274 for (i = level; i < leveljump; ++i) {
275 std::cout <<
"Removing w = " << weight[i - level].w <<
"\n";
276 features[i] = weight[i - level].fname;
278 std::cout <<
"---------------------\n";
282 if (level + this->jump > this->depth) {
290 stmp->removeFeatures(features);
294 if (this->cv->qtde > 0) this->cv->seed.clear();
299 return *stmp_partial;
301 stmp_partial.reset();
313 return fabs(a.w) < fabs(b.w);
double getTime_mult() const
Return the time multiplier.
Definition: Data.hpp:1765
size_t dim() const
Returns the dimension of the dataset.
Definition: Data.hpp:213
virtual void setSamples(const Data< T > &data)
setSamples Set the samples used by the Learner.
Definition: Learner.hpp:150
virtual bool train()=0
Function that execute the training phase of a Learner.
double getMaxTime() const
getMaxTime Returns the maximum running time in the training phase of the Learner.
Definition: Learner.hpp:130
virtual std::string getFormulationString()=0
getFormulationString Returns a string that represents the formulation of the learner (Primal or Dual)...
void setVerbose(int _verbose)
Set the level of verbose.
Definition: Learner.hpp:175
Rep const & X() const
Returns the attributes representation of the point (std::vector by default).
Definition: Point.hpp:139
Definition: Solution.hpp:13
unsigned int svs
Number of support Vectors.
Definition: Solution.hpp:31
mltk::Point< double > w
Weights vector.
Definition: Solution.hpp:17
double margin
Margin generated from the classifier that generated the solution.
Definition: Solution.hpp:27
Definition: classifier/Classifier.hpp:17
Solution * getSolutionRef()
getSolution Returns a reference to the solution of the classifier.
Definition: classifier/Classifier.hpp:58
Solution getSolution() const
getSolution Returns the solution of the classifier.
Definition: classifier/Classifier.hpp:52
void setGamma(double gamma)
Set the gamma (margin) of the classifier.
Definition: classifier/Classifier.hpp:67
void setSolution(Solution solution)
setSolution Set a solution for the classifier.
Definition: classifier/Classifier.hpp:79
Definition: featselect/FeatureSelection.hpp:17
bool leave_one_out
Leave one out.
Definition: featselect/FeatureSelection.hpp:46
int skip
Number of levels to be skipped.
Definition: featselect/FeatureSelection.hpp:44
classifier::Classifier< double > * classifier
Classifier used by the method.
Definition: featselect/FeatureSelection.hpp:23
int final_dim
Final dimension.
Definition: featselect/FeatureSelection.hpp:40
validation::CrossValidation * cv
Structure to hold the cross-validation result.
Definition: featselect/FeatureSelection.hpp:25
std::shared_ptr< Data< double > > samples
Attributes.
Definition: featselect/FeatureSelection.hpp:21
int jump
Jump size.
Definition: featselect/FeatureSelection.hpp:42
Data< T > selectFeatures() override
Function that executes the feature selection phase.
Definition: RFE.hpp:54
ValidationReport kfold(Data< T > sample, classifier::Classifier< T > &classifier, size_t fold, bool stratified=true, size_t seed=0, int verbose=0)
Executes k-fold stratified cross-validation.
Definition: valid/Validation.hpp:312
UFJF-MLTK main namespace for core functionalities.
Definition: classifier/Classifier.hpp:11
Structure to manage cross validation.
Definition: valid/Validation.hpp:62
double accuracy
Accuracy of the validated model.
Definition: valid/Validation.hpp:24