![]() |
UFJF - Machine Learning Toolkit
0.51.8
|
Wrapper for the dataset data. More...
#include <Data.hpp>
Public Member Functions | |
| const std::string & | getType () const |
| Returns the type of the dataset. More... | |
| void | setType (const std::string &type) |
| Data (const Data< T > &other) | |
| Data (const std::string &dataset, bool atEnd) | |
| Data (const char *dataset, const char *datasetType, const char *pos_class="1", const char *neg_class="-1") | |
| Data constructor to load a dataset from a file. More... | |
| Data (size_t size, size_t dim, T val=T()) | |
| Constructor for instantiation with an initial size and dimension. More... | |
| Data (const char *dataset) | |
| Constructor for empty data. More... | |
| bool | isClassification () const |
| Inform if the dataset is used for classification. More... | |
| std::string | name () const |
| The name of the dataset is defined as the name ofthe file where it were loaded from. More... | |
| size_t | size () const |
| Returns the size of the dataset. More... | |
| size_t | dim () const |
| Returns the dimension of the dataset. More... | |
| std::vector< std::shared_ptr< Point< T > > > | points () |
| Returns a shared pointer to the vector of Points of the sample. More... | |
| std::vector< std::shared_ptr< Point< T > > > | points () const |
| Returns a shared pointer to the vector of Points of the sample. More... | |
| const std::vector< int > | classes () const |
| Returns a vector containing the numeric values of the classes. More... | |
| PointPointer< T > | point (int index) const |
| Returns a shared pointer to the point with the given index. More... | |
| Point< T > | getFeature (int index) const |
| Get the values of a feature from all points. More... | |
| Point< double > | getLabels () const |
| Point< double > | labels () const |
| std::vector< size_t > | classesDistribution () const |
| Returns a vector containing the frequency of the classes. Only valid for classification datasets. More... | |
| std::vector< std::string > | classesNames () const |
| Returns a vector containing the name of the classes. Only valid for classification datasets. More... | |
| std::vector< int > | getFeaturesNames () const |
| Returns the features names. More... | |
| mltk::Point< int > | featuresNames () const |
| std::vector< int > | getIndex () const |
| Returns the vector of indexes. More... | |
| double | getTime_mult () const |
| Return the time multiplier. More... | |
| void | setName (const std::string &name) |
| Set the name of the dataset. More... | |
| void | setClassesNames (const std::vector< std::string > class_names) |
| Set classes names. More... | |
| void | setClasses (const std::vector< int > &classes) |
| Set the classes to use in the dataset. More... | |
| void | setClassesAtEnd (bool atEnd) |
| Inform if the classes of the dataset to be loaded are at begining or end of the attributes. More... | |
| void | setPoint (int index, std::shared_ptr< Point< T > > p) |
| setPoint Set the point in a position of the data. More... | |
| void | setFeaturesNames (const std::vector< int > &fnames) |
| setFeaturesNames Set the name of the features of the data. More... | |
| void | setIndex (std::vector< int > index) |
| Set the index vector for the data. More... | |
| void | setDim (size_t dim) |
| setDim Set the dimension of the points. More... | |
| void | computeClassesDistribution () |
| Compute the frequency of each class in the dataset. More... | |
| void | shuffle (const size_t &seed=42) |
| Shuffle the data with a given seed. More... | |
| bool | load (const std::string &file) |
| Load a dataset from a file. More... | |
| bool | load (const std::string &file, bool _atEnd) |
| Load a dataset from a file. More... | |
| void | write (const std::string &fname, std::string ext) |
| write Write the data to a file with the given extention. More... | |
| bool | isEmpty () const |
| Returns if there's a dataset loaded. More... | |
| bool | isNormalized () const |
| Returns if the dataset is normalized. More... | |
| void | clear () |
| clear Clear the data. More... | |
| void | classesCopy (const Data< T > &_data, std::vector< int > &classes) |
| Makes a deep copy from another data object. More... | |
| mltk::Data< T > | copy () const |
| Returns a copy of itself. More... | |
| void | copy (const Data< T > &_data) |
| Makes a deep copy from another data object. More... | |
| void | copyZero (const Data< T > &other) |
| Returns a copy of the data with zero points. More... | |
| std::vector< Data< T > > | splitByClasses (bool keepIndex=false) |
| Split the dataset by its labels. Only valid for classification datasets. More... | |
| std::vector< Data< T > > | splitSample (const std::size_t &split_size, bool stratified=true, bool keepIndex=false, size_t seed=0) |
| Split the data by a given size. More... | |
| Data< T > | selectFeatures (std::vector< size_t > feats, int size=-1) |
| Returns a Data object with selected features. More... | |
| Data< T > | sampling (const size_t &samp_size, bool with_replacement=true, const int &seed=0) |
| Sample the dataset with the given size. More... | |
| void | apply (std::function< void(mltk::PointPointer< T > point)> f) |
| Apply a function to all points on the dataset. More... | |
| void | join (const Data< T > &data) |
| Merge one dataset with another. More... | |
| bool | insertPoint (const Data< T > &samples, int _index, bool keepIndex=false) |
| Insert a point to the data from another sample. More... | |
| bool | insertPoint (std::shared_ptr< Point< T > > p, bool keepIndex=false) |
| Insert a point to the end of points vector. More... | |
| bool | insertPoint (Point< T > p, bool keepIndex=false) |
| Insert a point to the end of points vector. More... | |
| std::vector< bool > | removePoints (std::vector< int > ids) |
| Remove several points from the sample. More... | |
| bool | removePoint (int pid) |
| Remove a point from the data. More... | |
| Data< T > | insertFeatures (std::vector< int > ins_feat, bool keepIndex=false) |
| insertFeatures Returns Data object with only features in array. More... | |
| Data< T > | removeFeatures (std::vector< int > feats, int fsize) const |
| Remove several features from the sample. More... | |
| bool | removeFeatures (std::vector< int > feats) |
| Remove features from the dataset. More... | |
| bool | updatePointValue (const size_t &idx, double value) |
| Updates a Point value. More... | |
| void | changeXVector (std::vector< int > index) |
| Change the x vector of a sample. More... | |
| void | normalize (double p=2) |
| normalize Normalize the dataset using a Lp-norm. More... | |
| void | resetIndex () |
| Reset the index vector. More... | |
| SampleIterator< T > | begin () |
| SampleIterator< T > | end () |
| ConstSampleIterator< T > | begin () const |
| ConstSampleIterator< T > | end () const |
| std::shared_ptr< Point< T > > | operator[] (size_t i) const |
| std::shared_ptr< Point< T > > & | operator[] (size_t i) |
| Point< T > | operator() (size_t i) const |
| Point< T > & | operator() (size_t i) |
| Data< T > & | operator= (const Data< T > &) |
| bool | operator== (const Data< T > &rhs) const |
| bool | operator!= (const Data< T > &rhs) const |
Friends | |
| template<typename U > | |
| std::ostream & | operator<< (std::ostream &output, const Data< U > &data) |
Wrapper for the dataset data.
| mltk::Data< T >::Data | ( | const std::string & | dataset, |
| bool | atEnd | ||
| ) |
| dataset | Dataset path. |
| atEnd | Indicates if labels are at end of the columns of the dataset. |
|
explicit |
Data constructor to load a dataset from a file.
| dataset | Path to the dataset to be loaded. |
| pos_class | String representing the positive class on the dataset. |
| neg_class | String representing the negative class on the dataset. |
| mltk::Data< T >::Data | ( | size_t | size, |
| size_t | dim, | ||
| T | val = T() |
||
| ) |
Constructor for instantiation with an initial size and dimension.
| size | The initial size of the data. |
| dim | The initial dimension of the data. |
| val | The default value for the initialization, if not given, it'll call the default constructor of T. |
|
explicit |
Constructor for empty data.
| pos_class | String representing the positive class on the dataset. |
| neg_class | String representing the negative class on the dataset. |
| void mltk::Data< T >::apply | ( | std::function< void(mltk::PointPointer< T > point)> | f | ) |
Apply a function to all points on the dataset.
| f | Function to be applied to the data, must receive a PointPointer. |
| void mltk::Data< T >::changeXVector | ( | std::vector< int > | index | ) |
Change the x vector of a sample.
| index | Indexes of the change to be made. |
| const std::vector< int > mltk::Data< T >::classes |
Returns a vector containing the numeric values of the classes.
| void mltk::Data< T >::classesCopy | ( | const Data< T > & | _data, |
| std::vector< int > & | classes | ||
| ) |
Makes a deep copy from another data object.
| std::vector< size_t > mltk::Data< T >::classesDistribution |
Returns a vector containing the frequency of the classes. Only valid for classification datasets.
| std::vector< std::string > mltk::Data< T >::classesNames |
Returns a vector containing the name of the classes. Only valid for classification datasets.
| void mltk::Data< T >::clear |
clear Clear the data.
| void mltk::Data< T >::computeClassesDistribution |
Compute the frequency of each class in the dataset.
| Data< T > mltk::Data< T >::copy |
Returns a copy of itself.
| void mltk::Data< T >::copy | ( | const Data< T > & | _data | ) |
Makes a deep copy from another data object.
| void mltk::Data< T >::copyZero | ( | const Data< T > & | other | ) |
Returns a copy of the data with zero points.
|
inline |
Returns the dimension of the dataset.
| Point< T > mltk::Data< T >::getFeature | ( | int | index | ) | const |
Get the values of a feature from all points.
| index | Index of the feature on the dataset. |
| std::vector< int > mltk::Data< T >::getFeaturesNames |
Returns the features names.
| std::vector< int > mltk::Data< T >::getIndex |
Returns the vector of indexes.
| double mltk::Data< T >::getTime_mult |
Return the time multiplier.
| const std::string & mltk::Data< T >::getType |
Returns the type of the dataset.
| mltk::Data< T > mltk::Data< T >::insertFeatures | ( | std::vector< int > | ins_feat, |
| bool | keepIndex = false |
||
| ) |
| bool mltk::Data< T >::insertPoint | ( | const Data< T > & | samples, |
| int | _index, | ||
| bool | keepIndex = false |
||
| ) |
Insert a point to the data from another sample.
| sample | Sample with the point to be added. |
| _index | Index of the point to be added. |
| bool mltk::Data< T >::insertPoint | ( | Point< T > | p, |
| bool | keepIndex = false |
||
| ) |
| bool mltk::Data< T >::insertPoint | ( | std::shared_ptr< Point< T > > | p, |
| bool | keepIndex = false |
||
| ) |
|
inline |
Inform if the dataset is used for classification.
| bool mltk::Data< T >::isEmpty |
Returns if there's a dataset loaded.
|
inline |
Returns if the dataset is normalized.
| void mltk::Data< T >::join | ( | const Data< T > & | data | ) |
Merge one dataset with another.
| data | Dataset to be joined. |
| bool mltk::Data< T >::load | ( | const std::string & | file | ) |
Load a dataset from a file.
| file | Path to dataset file. |
| bool mltk::Data< T >::load | ( | const std::string & | file, |
| bool | _atEnd | ||
| ) |
Load a dataset from a file.
| file | Path to dataset file. |
|
inline |
The name of the dataset is defined as the name ofthe file where it were loaded from.
| void mltk::Data< T >::normalize | ( | double | p = 2 | ) |
normalize Normalize the dataset using a Lp-norm.
| p | Norm to be utilized. |
| PointPointer< T > mltk::Data< T >::point | ( | int | index | ) | const |
Returns a shared pointer to the point with the given index.
| index | Position of a point in the points array. |
| std::vector< std::shared_ptr< Point< T > > > mltk::Data< T >::points |
Returns a shared pointer to the vector of Points of the sample.
| std::vector< std::shared_ptr< Point< T > > > mltk::Data< T >::points |
Returns a shared pointer to the vector of Points of the sample.
| bool mltk::Data< T >::removeFeatures | ( | std::vector< int > | feats | ) |
Remove features from the dataset.
| feats | Features to be removed. |
| Data< T > mltk::Data< T >::removeFeatures | ( | std::vector< int > | feats, |
| int | fsize | ||
| ) | const |
Remove several features from the sample.
| feats | Names of the features to be removed (must be sorted). |
| bool mltk::Data< T >::removePoint | ( | int | pid | ) |
Remove a point from the data.
| pid | Index of the point to be removed. |
| std::vector< bool > mltk::Data< T >::removePoints | ( | std::vector< int > | ids | ) |
Remove several points from the sample.
| ids | Ids of the points to be removed (must be sorted). |
| void mltk::Data< T >::resetIndex |
Reset the index vector.
| Data< T > mltk::Data< T >::sampling | ( | const size_t & | samp_size, |
| bool | with_replacement = true, |
||
| const int & | seed = 0 |
||
| ) |
Sample the dataset with the given size.
| samp_size | Sampling size. |
| with_replacement | Tells if sampling must be made with replacement. |
| seed | Random generator seed. |
| Data< T > mltk::Data< T >::selectFeatures | ( | std::vector< size_t > | feats, |
| int | size = -1 |
||
| ) |
| void mltk::Data< T >::setClasses | ( | const std::vector< int > & | classes | ) |
Set the classes to use in the dataset.
| classes | Vector of classes that will be in the dataset. |
|
inline |
Inform if the classes of the dataset to be loaded are at begining or end of the attributes.
| atEnd | Boolean informing if classes are at end. |
|
inline |
Set classes names.
| class_names | strings represeting the classes names. |
| void mltk::Data< T >::setDim | ( | size_t | dim | ) |
setDim Set the dimension of the points.
| dim | Dimension to be set. |
| void mltk::Data< T >::setFeaturesNames | ( | const std::vector< int > & | fnames | ) |
setFeaturesNames Set the name of the features of the data.
| fnames | Name of the features. |
| void mltk::Data< T >::setIndex | ( | std::vector< int > | index | ) |
Set the index vector for the data.
| index | Index vector. |
|
inline |
Set the name of the dataset.
| name | Name of the dataset. |
| void mltk::Data< T >::setPoint | ( | int | index, |
| std::shared_ptr< Point< T > > | p | ||
| ) |
setPoint Set the point in a position of the data.
| index | Index of the point that will be set. |
| p | Point to be set. |
| void mltk::Data< T >::shuffle | ( | const size_t & | seed = 42 | ) |
Shuffle the data with a given seed.
| seed | Seed given for randomization |
|
inline |
Returns the size of the dataset.
| std::vector< Data< T > > mltk::Data< T >::splitByClasses | ( | bool | keepIndex = false | ) |
Split the dataset by its labels. Only valid for classification datasets.
| std::vector< Data< T > > mltk::Data< T >::splitSample | ( | const std::size_t & | split_size, |
| bool | stratified = true, |
||
| bool | keepIndex = false, |
||
| size_t | seed = 0 |
||
| ) |
Split the data by a given size.
| split_size | Number of samples on each split. |
| stratified | If true, the split will be done in a stratified manner. |
| seed | Random generator seed. |
| bool mltk::Data< T >::updatePointValue | ( | const size_t & | idx, |
| double | value | ||
| ) |
Updates a Point value.
| idx | Index of the point to be updated. |
| value | New value of the point. |
| void mltk::Data< T >::write | ( | const std::string & | fname, |
| std::string | ext | ||
| ) |
write Write the data to a file with the given extention.
| fname | Name of the file. |
| ext | Extention of the file. |