UFJF - Machine Learning Toolkit  0.51.8
Perceptron.hpp
Go to the documentation of this file.
1 
6 #ifndef PERCEPTRONPRIMAL__HPP
7 #define PERCEPTRONPRIMAL__HPP
8 
9 #include "PrimalClassifier.hpp"
10 #include "DualClassifier.hpp"
11 #include <chrono>
12 
13 namespace mltk{
14  namespace classifier {
15  using namespace std::chrono;
16 
20  template<typename T>
21  class PerceptronPrimal : public PrimalClassifier<T> {
22  public:
23  explicit PerceptronPrimal(std::shared_ptr<Data<T> > samples = nullptr, double q = 2, double rate = 0.5,
24  Solution *initial_solution = nullptr);
25  PerceptronPrimal(const Data<T>& samples, double q = 2, double rate = 0.5, Solution *initial_solution = nullptr);
26  bool train() override;
27 
28  double evaluate(const Point<T> &p, bool raw_value = false) override;
29  };
30 
34  template<typename T>
36  public:
37  PerceptronFixedMarginPrimal() = default;
38  explicit PerceptronFixedMarginPrimal(const mltk::Data<T>& samples, double gamma = 0,
39  double q = 2, double rate = 0.5, Solution *initial_solution = nullptr);
40 
41  bool train() override;
42 
43  double evaluate(const Point<T> &p, bool raw_value = false) override;
44  };
45 
49  template<typename T>
50  class PerceptronDual : public DualClassifier<T> {
51  private:
52  Solution *initial = nullptr;
53  public:
54  PerceptronDual() = default;
55  PerceptronDual(const Data<T>& samples,
56  KernelType kernel_type, double kernel_param = 0,
57  double rate = 0.5, Solution *initial_solution = nullptr);
58 
59  explicit PerceptronDual(const Data<T>& samples, double rate = 0.5,
60  Solution *initial_solution = nullptr);
61 
62  bool train() override;
63  };
64 
68  template<typename T>
70  public:
71  PerceptronFixedMarginDual() = default;
72  explicit PerceptronFixedMarginDual(const mltk::Data<T>& samples, KernelType kernel_type = KernelType::INNER_PRODUCT,
73  double kernel_param = 0, double gamma = 0, double rate = 0.5,
74  Solution *initial_solution = nullptr);
75 
76  bool train() override;
77  };
78 
79  template<typename T>
81  private:
82  Point<double> weights;
83  double bias = 0;
84 
85  public:
86  BalancedPerceptron() = default;
87 
88  explicit BalancedPerceptron(const Data<T> &samples, const size_t &seed = 0) {
89  this->samples = mltk::make_data<T>(samples);
90  this->seed = seed;
91  }
92 
93  bool train() override {
94  size_t epoch = 0, ite = 0;
95  bool stop = false;
97  size_t errors = 0;
98 
99  this->bias = 0;
100  mltk::random_init<double>(this->weights, this->samples->dim(), this->seed);
101  this->samples->shuffle(this->seed);
102  this->timer.reset();
103 
104  while (epoch < this->MAX_EPOCH) {
105  errors = 0;
106  for (auto it = this->samples->begin(); it != this->samples->end(); ++it) {
107  auto point = *it;
108  int pred = ((mltk::dot(weights, *point) + bias) >= 0) ? 1 : -1;
109 
110  if (point->Y() != pred) {
111  weights += this->rate * (*point) * point->Y();
112  bias += this->rate * bias;
113  ite++;
114  errors++;
115  }
116 
117  if (ite > this->MAX_UP) {
118  stop = true;
119  break;
120  }
121 
122  }
123  epoch++;
124  if (stop || this->timer.elapsed() > this->max_time || errors == 0) break;
125  }
126 
127  for (auto it = this->samples->begin(); it != this->samples->end(); ++it) {
128  auto point = *(*it);
129  double func = point.Y() * (mltk::dot(weights, point) + bias);
130 
131  if (point.Y() == 1 && func < gamma1) gamma1 = func;
132  if (point.Y() == -1 && func < gamma2) gamma2 = func;
133  }
134  double rmargin = (fabs(gamma1) > fabs(gamma2)) ? fabs(gamma2) : fabs(gamma1);
135  double mmargin = (fabs(gamma1) + fabs(gamma2)) / 2;
136  if(fabs(gamma2) > fabs(gamma1)){
137  bias += fabs(mmargin - rmargin);
138  }else{
139  bias -= fabs(mmargin - rmargin);
140  }
141  this->solution.w = weights;
142  this->solution.bias = bias;
143 
144  return (errors == 0);
145  }
146 
147  double evaluate(const Point<T> &p, bool raw_value) override {
148  if (raw_value) {
149  return mltk::dot(weights, p) + bias;
150  } else {
151  return ((mltk::dot(weights, p) + bias) >= 0) ? 1 : -1;
152  }
153  }
154 
155  std::string getFormulationString() override { return "Primal"; }
156  };
157 
158  template<typename T>
159  PerceptronPrimal<T>::PerceptronPrimal(std::shared_ptr<Data<T> > samples, double q, double rate,
160  Solution *initial_solution) {
161  this->samples = samples;
162  this->q = q;
163  this->rate = rate;
164  if (initial_solution)
165  this->solution = *initial_solution;
166  }
167 
168  template<typename T>
169  PerceptronPrimal<T>::PerceptronPrimal(const Data<T> &samples, double q, double rate,
170  Solution *initial_solution) {
171  this->samples = mltk::make_data<T>(samples);
172  this->q = q;
173  this->rate = rate;
174  if (initial_solution)
175  this->solution = *initial_solution;
176  }
177 
178 
179  template<typename T>
181  size_t size = this->samples->size(), dim = this->samples->dim();
182  int i, j, e = 0, idx;
183  double y, time = this->start_time + this->max_time;
184  bool cond;
185  std::vector<double> func(size, 0);
186  std::vector<T> x;
187  std::vector<int> index = this->samples->getIndex();
188 
189  if (this->solution.w.empty()) this->solution.w.resize(dim);
190  this->solution.bias = 0;
191  this->solution.norm = 0.0;
192 
193  this->timer.reset();
194  while (this->timer.elapsed() - time <= 0) {
195  for (e = 0, i = 0; i < size; ++i) {
196  idx = index[i];
197  auto p = this->samples->point(idx);
198  x = p->X();
199  y = p->Y();
200 
201  //calculating function
202  for (func[idx] = this->solution.bias, j = 0; j < dim; ++j)
203  func[idx] += this->solution.w[j] * x[j];
204 
205  //Checking if the point is a mistake
206  if (y * func[idx] <= 0.0) {
207  for (this->solution.norm = 0.0, j = 0; j < dim; ++j) {
208  this->solution.w[j] += this->rate * y * x[j];
209  this->solution.norm += this->solution.w[j] * this->solution.w[j];
210  }
211  this->solution.norm = sqrt(this->solution.norm);
212  this->solution.bias += this->rate * y;
213  this->ctot++;
214  e++;
215  } else if (this->steps > 0 && e > 1) break;
216  }
217  this->steps++; //Number of iterations update
218 
219  //stop criterion
220  if (e == 0) break;
221  if (this->steps > this->MAX_IT) break;
222  if (this->ctot > this->MAX_UP) break;
223  }
224 
225  return (e == 0);
226  }
227 
228  template<typename T>
229  double PerceptronPrimal<T>::evaluate(const Point<T> &p, bool raw_value) {
230  return PrimalClassifier<T>::evaluate(p, raw_value);
231  }
232 
233  template<typename T>
235  double q, double rate, Solution *initial_solution) {
236  this->samples = mltk::make_data<T>(samples);
237  this->q = q;
238  this->rate = rate;
239  this->gamma = gamma;
240  if (initial_solution)
241  this->solution = *initial_solution;
242  }
243 
244  template<typename T>
246  size_t i, j, k, e, s, r, dim = this->samples->dim();
247  size_t size = this->samples->size(), n = dim;
248  int idx, sign = 1, n_temp = 0, largn = 0;
249  double norm = this->solution.norm, lambda = 1.0, y, time = this->start_time + this->max_time;
250  double sumnorm = 0.0, bias = this->solution.bias, largw = 0.0, largw_temp = 0.0;
251  bool cond;
252  std::vector<double> func = this->solution.func.X(), w = this->solution.w.X();
253  std::vector<T> x;
254  std::vector<int> index = this->samples->getIndex();
255  std::shared_ptr<Point<T> > p;
256 
257  if (func.empty()) func.resize(size);
258  if (w.empty()) w.resize(dim);
259  e = s = 0;
260  this->timer.reset();
261  while (this->timer.elapsed() - time <= 0) {
262  for (e = 0, i = 0; i < size; ++i) {
263  idx = index[i];
264  p = this->samples->point(idx);
265  x = p->X();
266  y = p->Y();
267 
268  //calculating function
269  for (func[idx] = bias, j = 0; j < dim; ++j) {
270  func[idx] += w[j] * x[j];
271  }
272 
273  //Checking if the point is a mistake
274  if (y * func[idx] <= this->gamma * norm - this->samples->point(idx)->Alpha() * this->flexible) {
275  lambda = (norm != 0.0) ? (1 - this->rate * this->gamma / norm) : 1;
276  for (r = 0; r < size; ++r) {
277  std::shared_ptr<Point<T> > b = this->samples->point(r);
278  b->Alpha() *= lambda;
279  this->samples->setPoint(r, b);
280  }
281 
282  if (this->q == 1.0) { //Linf
283  for (sumnorm = 0.0, j = 0; j < dim; ++j) {
284  sign = (w[j] < 0) ? -1 : 1;
285  lambda = (norm > 0 && w[j] != 0) ? this->gamma * sign : 0;
286  w[j] += this->rate * (y * x[j] - lambda);
287  sumnorm += fabs(w[j]);
288  }
289  norm = sumnorm;
290  } else if (this->q == 2.0) { //L2
291  for (sumnorm = 0.0, j = 0; j < dim; ++j) {
292  lambda = (norm > 0 && w[j] != 0) ? w[j] * this->gamma / norm : 0;
293  w[j] += this->rate * (y * x[j] - lambda);
294  sumnorm += w[j] * w[j];
295  }
296  norm = sqrt(sumnorm);
297  } else if (this->q == -1.0) { //L1
298  largw_temp = fabs(w[0]);
299  n_temp = 1;
300  for (j = 0; j < dim; ++j) {
301  if (largw == 0 || fabs(largw - fabs(w[j])) / largw < this->EPS) {
302  sign = (w[j] < 0) ? -1 : 1;
303  lambda = (norm > 0 && w[j] != 0) ? this->gamma * sign / n : 0;
304  w[j] += this->rate * (y * x[j] - lambda);
305  } else
306  w[j] += this->rate * (y * x[j]);
307 
308  if (j > 0) {
309  if (fabs(largw_temp - fabs(w[j])) / largw_temp < this->EPS)
310  n_temp++;
311  else if (fabs(w[j]) > largw_temp) {
312  largw_temp = fabs(w[j]);
313  n_temp = 1;
314  }
315  }
316  }
317  largw = largw_temp;
318  n = n_temp;
319  norm = largw;
320  if (n > largn) largn = n;
321  } else { //Other Lp formulations
322  for (sumnorm = 0, j = 0; j < dim; ++j) {
323  lambda = (norm > 0 && w[j] != 0) ? w[j] * this->gamma *
324  std::pow(fabs(w[j]), this->q - 2.0) *
325  std::pow(norm, 1.0 - this->q) : 0;
326  w[j] += this->rate * (y * x[j] - lambda);
327  sumnorm += std::pow(fabs(w[j]), this->q);
328  }
329  norm = std::pow(sumnorm, 1.0 / this->q);
330  }
331  bias += this->rate * y;
332  p->Alpha() += this->rate;
333  this->samples->setPoint(idx, p);
334 
335  k = (i > s) ? s++ : e;
336  j = index[k];
337  index[k] = idx;
338  index[i] = j;
339  this->ctot++;
340  e++;
341  } else if (this->steps > 0 && e > 1 && i > s) break;
342  }
343  ++this->steps; //Number of iterations update
344 
345  //stop criterion
346  if (e == 0) break;
347  if (this->steps > this->MAX_IT) break;
348  if (this->ctot > this->MAX_UP) break;
349  }
350 
351  this->solution.norm = norm;
352  this->solution.bias = bias;
353  this->solution.w = w;
354  return (e == 0);
355  }
356 
357  template<typename T>
358  double PerceptronFixedMarginPrimal<T>::evaluate(const Point<T> &p, bool raw_value) {
359  double func = 0.0;
360  int i;
361  size_t dim = this->samples->dim();
362 
363  if (p.X().size() != dim) {
364  std::cerr << "The point must have the same dimension of the feature set!" << std::endl;
365  return 0;
366  }
367 
368  for (func = this->solution.bias, i = 0; i < dim; i++) {
369  func += this->solution.w[i] * p[i];
370  }
371 
372  if (!raw_value) return (func >= this->solution.margin * this->solution.norm) ? 1 : -1;
373  else return func;
374  }
375 
376  template<typename T>
377  PerceptronDual<T>::PerceptronDual(const Data<T>& samples, KernelType kernel_type,
378  double kernel_param, double rate, Solution *initial_solution) {
379  this->samples = mltk::make_data<T>(samples);
380  if (initial_solution) {
381  this->solution = *initial_solution;
382  this->alpha = (*initial_solution).alpha;
383  }
384  this->rate = rate;
385  this->kernel = new Kernel<T>(kernel_type, kernel_param);
386  }
387 
388  template<typename T>
389  PerceptronDual<T>::PerceptronDual(const Data<T>& samples, double rate, Solution *initial_solution) {
390  this->samples = mltk::make_data<T>(samples);
391  if (initial_solution) {
392  this->solution = *initial_solution;
393  this->alpha = (*initial_solution).alpha;
394  }
395  this->initial = initial_solution;
396  this->rate = rate;
397  this->kernel = nullptr;
398  }
399 
400 
401  template<typename T>
403  size_t e, i, j, idx, r, size = this->samples->size(), dim = this->samples->dim();
404  double norm = this->solution.norm, time = this->start_time + this->max_time;
405  double bias = this->solution.bias, f;
406  const double sqrate = this->rate * this->rate;
407  const double tworate = 2 * this->rate;
408  std::vector<int> index(size); //= this->samples->getIndex();
409  std::vector<double> func(size, 0.0), Kv;
410  std::vector<std::shared_ptr<Point<T> > > points = this->samples->points();
411  if(!this->kernel) this->kernel = new Kernel<T>(mltk::KernelType::INNER_PRODUCT);
412  this->kernel->compute(this->samples);
413  dMatrix *K = this->kernel->getKernelMatrixPointer();
414  int y;
415  iota(index.begin(), index.end(), 0);
416  if (this->alpha.empty()) {
417  this->alpha.assign(size, 0.0);
418  }
419  if(!initial){
420  this->solution = Solution();
421  }
422  this->timer.reset();
423  e = 1;
424  auto time_elapsed = this->timer.elapsed() - time;
425  while (time_elapsed <= 0) {
426  for (e = 0, i = 0; i < size; ++i) {
427  idx = index[i];
428  y = points[idx]->Y();
429 
430  //Calculating function
431  for (f = bias, r = 0; r < size; ++r)
432  f += this->alpha[r] * points[index[r]]->Y() * (*K)[idx][index[r]];
433  func[idx] = f;
434 
435  //Checking if the point is a mistake
436  if (y * f <= 0.0) {
437  norm = std::sqrt(norm * norm + tworate * points[idx]->Y() * (func[idx] - bias) + sqrate * (*K)[idx][idx]);
438  this->alpha[i] += this->rate;
439  bias += this->rate * y;
440  ++this->ctot, ++e;
441  } else if (this->steps > 0 && e > 1) break;
442  }
443  time_elapsed = this->timer.elapsed() - time;
444  ++this->steps;
445 
446  //stop criterion
447  if (e == 0) break;
448  if (this->steps > this->MAX_IT) break;
449  if (this->ctot > this->MAX_UP) break;
450  }
451  this->solution.bias = bias;
452  this->solution.norm = norm;
453  this->solution.alpha = this->alpha;
454  this->solution.margin = 0.0;
455  this->solution.w.resize(dim);
456 
457  for (i = 0; i < size; i++) {
458  for (j = 0; j < dim; j++) {
459  this->solution.w[j] += this->alpha[i] * points[i]->Y() * points[i]->X()[j];
460  }
461  }
462 
463  return (e == 0);
464  }
465 
466  template<typename T>
467  PerceptronFixedMarginDual<T>::PerceptronFixedMarginDual(const mltk::Data<T>& samples, KernelType kernel_type,
468  double kernel_param, double gamma, double rate,
469  Solution *initial_solution) {
470  this->samples = mltk::make_data<T>(samples);
471  //this->solution = *initial_solution;
472  this->rate = rate;
473  this->kernel = new Kernel<T>(kernel_type, kernel_param);
474  this->gamma = gamma;
475  if (initial_solution)
476  this->alpha = (*initial_solution).alpha;
477  }
478 
479  template<typename T>
481  size_t e, i, j, k, s, idx, r, size = this->samples->size(), dim = this->samples->dim();
482  double y, lambda, norm = this->solution.norm, time = this->start_time + this->max_time;
483  double bias = this->solution.bias;
484  const double sqrate = this->rate * this->rate;
485  const double tworate = 2 * this->rate;
486  std::vector<int> index = this->samples->getIndex();
487  std::vector<double> func = this->solution.func.X(), Kv;
488  this->kernel->compute(this->samples);
489  dMatrix *K = this->kernel->getKernelMatrixPointer();
490 
491  if(this->alpha.empty()){
492  this->alpha.resize(this->samples->size());
493  }
494 
495  if (func.empty()) { func.resize(size); }
496  e = 1, s = 0;
497  this->timer.reset();
498 
499  while (this->timer.elapsed() - time <= 0) {
500  for (e = 0, i = 0; i < size; ++i) {
501  idx = index[i];
502  y = (*this->samples)[idx]->Y();
503 
504  //Checking if the point is a mistake
505 
506  if (y * func[idx] - this->gamma * norm <= 0) {
507  lambda = (this->gamma) ? (1 - this->rate * this->gamma / norm) : 1;
508  norm *= lambda;
509 
510  for (r = 0; r < size; ++r) {
511  (*this->samples)[r]->Alpha() *= lambda;
512  func[r] = lambda * func[r] + this->rate * y * ((*K)[idx][r] + 1) + bias * (1 - lambda);
513  }
514 
515  norm = sqrt(norm * norm + tworate * (*this->samples)[idx]->Y() * lambda * (func[idx] - bias) +
516  sqrate * (*K)[idx][idx]);
517  (*this->samples)[idx]->Alpha() += this->rate;
518 
519  bias += this->rate * y;
520 
521  k = (i > s) ? ++s : e;
522  j = index[k];
523  index[k] = idx;
524  index[i] = j;
525  ++this->ctot;
526  ++e;
527  } else if (this->steps > 0 && e > 1 && i > s) break;
528  }
529 
530  ++this->steps; //Number of iterations update
531  //stop criterion
532  if (e == 0) break;
533  if (this->steps > this->MAX_IT) break;
534  if (this->ctot > this->MAX_UP) break;
535  }
536 
537  this->samples->setIndex(index);
538  this->solution.bias = bias;
539  this->solution.norm = norm;
540  this->solution.func = func;
541  this->solution.w.resize(dim);
542  this->solution.alpha.resize(size);
543  for (i = 0; i < dim; i++) {
544  for (j = 0; j < size; j++) {
545  this->solution.alpha[j] = (*this->samples)[j]->Alpha();
546  this->solution.w[i] +=
547  (*this->samples)[j]->Alpha() * (*this->samples)[j]->Y() * (*this->samples)[j]->X()[i];
548  }
549  }
550 
551  return (e == 0);
552  }
553  }
554 }
555 
556 #endif
PointPointer< T > point(int index) const
Returns a shared pointer to the point with the given index.
Definition: Data.hpp:1510
size_t size() const
Returns the size of the dataset.
Definition: Data.hpp:208
void setIndex(std::vector< int > index)
Set the index vector for the data.
Definition: Data.hpp:1754
void setPoint(int index, std::shared_ptr< Point< T > > p)
setPoint Set the point in a position of the data.
Definition: Data.hpp:1515
std::vector< int > getIndex() const
Returns the vector of indexes.
Definition: Data.hpp:1695
size_t dim() const
Returns the dimension of the dataset.
Definition: Data.hpp:213
void shuffle(const size_t &seed=42)
Shuffle the data with a given seed.
Definition: Data.hpp:1349
Rep const & X() const
Returns the attributes representation of the point (std::vector by default).
Definition: Point.hpp:139
double const & Y() const
Returns the class or value of the point.
Definition: Point.hpp:152
Definition: Solution.hpp:13
std::vector< double > alpha
Alpha Vector for Dual methods.
Definition: Solution.hpp:21
Definition: Perceptron.hpp:80
bool train() override
Function that execute the training phase of a Learner.
Definition: Perceptron.hpp:93
std::string getFormulationString() override
getFormulationString Returns a string that represents the formulation of the learner (Primal or Dual)...
Definition: Perceptron.hpp:155
double evaluate(const Point< T > &p, bool raw_value) override
Returns the class of a feature point based on the trained Learner.
Definition: Perceptron.hpp:147
Definition: DualClassifier.hpp:16
Wrapper for the implementation of the Perceptron dual algorithm.
Definition: Perceptron.hpp:50
bool train() override
Function that execute the training phase of a Learner.
Definition: Perceptron.hpp:402
Wrapper for the implementation of the Perceptron dual with fixed margin algorithm.
Definition: Perceptron.hpp:69
bool train() override
Function that execute the training phase of a Learner.
Definition: Perceptron.hpp:480
Wrapper for the implementation of the Perceptron primal with fixed margin algorithm.
Definition: Perceptron.hpp:35
double evaluate(const Point< T > &p, bool raw_value=false) override
Returns the class of a feature point based on the trained Learner.
Definition: Perceptron.hpp:358
bool train() override
Function that execute the training phase of a Learner.
Definition: Perceptron.hpp:245
Wrapper for the implementation of the Perceptron primal algorithm.
Definition: Perceptron.hpp:21
double evaluate(const Point< T > &p, bool raw_value=false) override
Returns the class of a feature point based on the trained Learner.
Definition: Perceptron.hpp:229
bool train() override
Function that execute the training phase of a Learner.
Definition: Perceptron.hpp:180
Definition: PrimalClassifier.hpp:14
double evaluate(const Point< T > &p, bool raw_value=false) override
Returns the class of a feature point based on the trained Learner.
Definition: PrimalClassifier.hpp:42
A helper class to measure execution time for benchmarking purposes.
Definition: ThreadPool.hpp:503
UFJF-MLTK main namespace for core functionalities.
Definition: classifier/Classifier.hpp:11
T dot(const Point< T, R > &p, const Point< T, R > &p1)
Computes the dot product with a vector.
Definition: Point.hpp:528
T max(const Point< T, R > &p)
Returns the max value of the point.
Definition: Point.hpp:544