UFJF - Machine Learning Toolkit  0.51.8
Datasets.hpp
1 //
2 // Created by mateus on 20/05/2021.
3 //
4 
5 #pragma once
6 #ifndef M_PI
7 #define M_PI 3.14159265358979323846
8 #endif
9 
10 #include "ufjfmltk/core/Data.hpp"
11 
12 
14 namespace mltk::datasets{
15  using Centers = std::vector<mltk::Point<double>>;
16 
17  struct RegPair{
22  };
23 
24  struct BlobsPair{
28  Centers centers;
29  };
30 
42  mltk::Data<double> make_spirals(size_t n_samples=100, int n_classes=2, bool shuffle=true, double noise=1.0,
43  size_t n_loops=2, double margin = 0.5, size_t seed = 0);
44 
58  BlobsPair make_blobs(size_t n_samples=100, int n_centers=2, int n_dims=2, double cluster_std=1.0,
59  double center_min=-10.0, double center_max=10.0, bool shuffle=true,
60  bool has_classes=true, size_t seed = 0);
61 
73  BlobsPair make_blobs(const std::vector<size_t>& n_samples, const std::vector<mltk::Point<double>>& centers,
74  std::vector<double> clusters_std, int n_dims=2, bool shuffle=true,
75  bool has_classes=true, size_t seed = 0);
76 
89  RegPair make_regression(size_t n_samples=100, size_t n_dims=100, double bias=0.0, double noise=0.1, double stdev=0.01,
90  size_t n_informative=10, bool shuffle=true, size_t seed=0);
91 
92 }
Namespace for artificial datasets generation.
Definition: Datasets.hpp:14
BlobsPair make_blobs(size_t n_samples=100, int n_centers=2, int n_dims=2, double cluster_std=1.0, double center_min=-10.0, double center_max=10.0, bool shuffle=true, bool has_classes=true, size_t seed=0)
Generate isotropic Gaussian blobs for clustering or classification [source].
Definition: Datasets.cpp:46
mltk::Data< double > make_spirals(size_t n_samples=100, int n_classes=2, bool shuffle=true, double noise=1.0, size_t n_loops=2, double margin=0.5, size_t seed=0)
generates a synthetic data set composed of interlaced Archimedean spirals [source].
Definition: Datasets.cpp:8
RegPair make_regression(size_t n_samples=100, size_t n_dims=100, double bias=0.0, double noise=0.1, double stdev=0.01, size_t n_informative=10, bool shuffle=true, size_t seed=0)
Generate a random regression problem [source].
Definition: Datasets.cpp:110
Definition: Datasets.hpp:24
Centers centers
centers used for points clouds generation
Definition: Datasets.hpp:28
mltk::Data< double > dataset
blobs dataset
Definition: Datasets.hpp:26
Definition: Datasets.hpp:17
mltk::Point< double > coef
true coefficients
Definition: Datasets.hpp:21
mltk::Data< double > dataset
Regression dataset.
Definition: Datasets.hpp:19