![]()  | 
  
    UFJF - Machine Learning Toolkit
    0.51.8
    
   | 
 
Wrapper for the dataset data. More...
#include <Data.hpp>
Public Member Functions | |
| const std::string & | getType () const | 
| Returns the type of the dataset.  More... | |
| void | setType (const std::string &type) | 
| Data (const Data< T > &other) | |
| Data (const std::string &dataset, bool atEnd) | |
| Data (const char *dataset, const char *datasetType, const char *pos_class="1", const char *neg_class="-1") | |
| Data constructor to load a dataset from a file.  More... | |
| Data (size_t size, size_t dim, T val=T()) | |
| Constructor for instantiation with an initial size and dimension.  More... | |
| Data (const char *dataset) | |
| Constructor for empty data.  More... | |
| bool | isClassification () const | 
| Inform if the dataset is used for classification.  More... | |
| std::string | name () const | 
| The name of the dataset is defined as the name ofthe file where it were loaded from.  More... | |
| size_t | size () const | 
| Returns the size of the dataset.  More... | |
| size_t | dim () const | 
| Returns the dimension of the dataset.  More... | |
| std::vector< std::shared_ptr< Point< T > > > | points () | 
| Returns a shared pointer to the vector of Points of the sample.  More... | |
| std::vector< std::shared_ptr< Point< T > > > | points () const | 
| Returns a shared pointer to the vector of Points of the sample.  More... | |
| const std::vector< int > | classes () const | 
| Returns a vector containing the numeric values of the classes.  More... | |
| PointPointer< T > | point (int index) const | 
| Returns a shared pointer to the point with the given index.  More... | |
| Point< T > | getFeature (int index) const | 
| Get the values of a feature from all points.  More... | |
| Point< double > | getLabels () const | 
| Point< double > | labels () const | 
| std::vector< size_t > | classesDistribution () const | 
| Returns a vector containing the frequency of the classes. Only valid for classification datasets.  More... | |
| std::vector< std::string > | classesNames () const | 
| Returns a vector containing the name of the classes. Only valid for classification datasets.  More... | |
| std::vector< int > | getFeaturesNames () const | 
| Returns the features names.  More... | |
| mltk::Point< int > | featuresNames () const | 
| std::vector< int > | getIndex () const | 
| Returns the vector of indexes.  More... | |
| double | getTime_mult () const | 
| Return the time multiplier.  More... | |
| void | setName (const std::string &name) | 
| Set the name of the dataset.  More... | |
| void | setClassesNames (const std::vector< std::string > class_names) | 
| Set classes names.  More... | |
| void | setClasses (const std::vector< int > &classes) | 
| Set the classes to use in the dataset.  More... | |
| void | setClassesAtEnd (bool atEnd) | 
| Inform if the classes of the dataset to be loaded are at begining or end of the attributes.  More... | |
| void | setPoint (int index, std::shared_ptr< Point< T > > p) | 
| setPoint Set the point in a position of the data.  More... | |
| void | setFeaturesNames (const std::vector< int > &fnames) | 
| setFeaturesNames Set the name of the features of the data.  More... | |
| void | setIndex (std::vector< int > index) | 
| Set the index vector for the data.  More... | |
| void | setDim (size_t dim) | 
| setDim Set the dimension of the points.  More... | |
| void | computeClassesDistribution () | 
| Compute the frequency of each class in the dataset.  More... | |
| void | shuffle (const size_t &seed=42) | 
| Shuffle the data with a given seed.  More... | |
| bool | load (const std::string &file) | 
| Load a dataset from a file.  More... | |
| bool | load (const std::string &file, bool _atEnd) | 
| Load a dataset from a file.  More... | |
| void | write (const std::string &fname, std::string ext) | 
| write Write the data to a file with the given extention.  More... | |
| bool | isEmpty () const | 
| Returns if there's a dataset loaded.  More... | |
| bool | isNormalized () const | 
| Returns if the dataset is normalized.  More... | |
| void | clear () | 
| clear Clear the data.  More... | |
| void | classesCopy (const Data< T > &_data, std::vector< int > &classes) | 
| Makes a deep copy from another data object.  More... | |
| mltk::Data< T > | copy () const | 
| Returns a copy of itself.  More... | |
| void | copy (const Data< T > &_data) | 
| Makes a deep copy from another data object.  More... | |
| void | copyZero (const Data< T > &other) | 
| Returns a copy of the data with zero points.  More... | |
| std::vector< Data< T > > | splitByClasses (bool keepIndex=false) | 
| Split the dataset by its labels. Only valid for classification datasets.  More... | |
| std::vector< Data< T > > | splitSample (const std::size_t &split_size, bool stratified=true, bool keepIndex=false, size_t seed=0) | 
| Split the data by a given size.  More... | |
| Data< T > | selectFeatures (std::vector< size_t > feats, int size=-1) | 
| Returns a Data object with selected features.  More... | |
| Data< T > | sampling (const size_t &samp_size, bool with_replacement=true, const int &seed=0) | 
| Sample the dataset with the given size.  More... | |
| void | apply (std::function< void(mltk::PointPointer< T > point)> f) | 
| Apply a function to all points on the dataset.  More... | |
| void | join (const Data< T > &data) | 
| Merge one dataset with another.  More... | |
| bool | insertPoint (const Data< T > &samples, int _index, bool keepIndex=false) | 
| Insert a point to the data from another sample.  More... | |
| bool | insertPoint (std::shared_ptr< Point< T > > p, bool keepIndex=false) | 
| Insert a point to the end of points vector.  More... | |
| bool | insertPoint (Point< T > p, bool keepIndex=false) | 
| Insert a point to the end of points vector.  More... | |
| std::vector< bool > | removePoints (std::vector< int > ids) | 
| Remove several points from the sample.  More... | |
| bool | removePoint (int pid) | 
| Remove a point from the data.  More... | |
| Data< T > | insertFeatures (std::vector< int > ins_feat, bool keepIndex=false) | 
| insertFeatures Returns Data object with only features in array.  More... | |
| Data< T > | removeFeatures (std::vector< int > feats, int fsize) const | 
| Remove several features from the sample.  More... | |
| bool | removeFeatures (std::vector< int > feats) | 
| Remove features from the dataset.  More... | |
| bool | updatePointValue (const size_t &idx, double value) | 
| Updates a Point value.  More... | |
| void | changeXVector (std::vector< int > index) | 
| Change the x vector of a sample.  More... | |
| void | normalize (double p=2) | 
| normalize Normalize the dataset using a Lp-norm.  More... | |
| void | resetIndex () | 
| Reset the index vector.  More... | |
| SampleIterator< T > | begin () | 
| SampleIterator< T > | end () | 
| ConstSampleIterator< T > | begin () const | 
| ConstSampleIterator< T > | end () const | 
| std::shared_ptr< Point< T > > | operator[] (size_t i) const | 
| std::shared_ptr< Point< T > > & | operator[] (size_t i) | 
| Point< T > | operator() (size_t i) const | 
| Point< T > & | operator() (size_t i) | 
| Data< T > & | operator= (const Data< T > &) | 
| bool | operator== (const Data< T > &rhs) const | 
| bool | operator!= (const Data< T > &rhs) const | 
Friends | |
| template<typename U > | |
| std::ostream & | operator<< (std::ostream &output, const Data< U > &data) | 
Wrapper for the dataset data.
| mltk::Data< T >::Data | ( | const std::string & | dataset, | 
| bool | atEnd | ||
| ) | 
| dataset | Dataset path. | 
| atEnd | Indicates if labels are at end of the columns of the dataset. | 
      
  | 
  explicit | 
Data constructor to load a dataset from a file.
| dataset | Path to the dataset to be loaded. | 
| pos_class | String representing the positive class on the dataset. | 
| neg_class | String representing the negative class on the dataset. | 
| mltk::Data< T >::Data | ( | size_t | size, | 
| size_t | dim, | ||
| T | val = T()  | 
        ||
| ) | 
Constructor for instantiation with an initial size and dimension.
| size | The initial size of the data. | 
| dim | The initial dimension of the data. | 
| val | The default value for the initialization, if not given, it'll call the default constructor of T. | 
      
  | 
  explicit | 
Constructor for empty data.
| pos_class | String representing the positive class on the dataset. | 
| neg_class | String representing the negative class on the dataset. | 
| void mltk::Data< T >::apply | ( | std::function< void(mltk::PointPointer< T > point)> | f | ) | 
Apply a function to all points on the dataset.
| f | Function to be applied to the data, must receive a PointPointer. | 
| void mltk::Data< T >::changeXVector | ( | std::vector< int > | index | ) | 
Change the x vector of a sample.
| index | Indexes of the change to be made. | 
| const std::vector< int > mltk::Data< T >::classes | 
Returns a vector containing the numeric values of the classes.
| void mltk::Data< T >::classesCopy | ( | const Data< T > & | _data, | 
| std::vector< int > & | classes | ||
| ) | 
Makes a deep copy from another data object.
| std::vector< size_t > mltk::Data< T >::classesDistribution | 
Returns a vector containing the frequency of the classes. Only valid for classification datasets.
| std::vector< std::string > mltk::Data< T >::classesNames | 
Returns a vector containing the name of the classes. Only valid for classification datasets.
| void mltk::Data< T >::clear | 
clear Clear the data.
| void mltk::Data< T >::computeClassesDistribution | 
Compute the frequency of each class in the dataset.
| Data< T > mltk::Data< T >::copy | 
Returns a copy of itself.
| void mltk::Data< T >::copy | ( | const Data< T > & | _data | ) | 
Makes a deep copy from another data object.
| void mltk::Data< T >::copyZero | ( | const Data< T > & | other | ) | 
Returns a copy of the data with zero points.
      
  | 
  inline | 
Returns the dimension of the dataset.
| Point< T > mltk::Data< T >::getFeature | ( | int | index | ) | const | 
Get the values of a feature from all points.
| index | Index of the feature on the dataset. | 
| std::vector< int > mltk::Data< T >::getFeaturesNames | 
Returns the features names.
| std::vector< int > mltk::Data< T >::getIndex | 
Returns the vector of indexes.
| double mltk::Data< T >::getTime_mult | 
Return the time multiplier.
| const std::string & mltk::Data< T >::getType | 
Returns the type of the dataset.
| mltk::Data< T > mltk::Data< T >::insertFeatures | ( | std::vector< int > | ins_feat, | 
| bool | keepIndex = false  | 
        ||
| ) | 
| bool mltk::Data< T >::insertPoint | ( | const Data< T > & | samples, | 
| int | _index, | ||
| bool | keepIndex = false  | 
        ||
| ) | 
Insert a point to the data from another sample.
| sample | Sample with the point to be added. | 
| _index | Index of the point to be added. | 
| bool mltk::Data< T >::insertPoint | ( | Point< T > | p, | 
| bool | keepIndex = false  | 
        ||
| ) | 
| bool mltk::Data< T >::insertPoint | ( | std::shared_ptr< Point< T > > | p, | 
| bool | keepIndex = false  | 
        ||
| ) | 
      
  | 
  inline | 
Inform if the dataset is used for classification.
| bool mltk::Data< T >::isEmpty | 
Returns if there's a dataset loaded.
      
  | 
  inline | 
Returns if the dataset is normalized.
| void mltk::Data< T >::join | ( | const Data< T > & | data | ) | 
Merge one dataset with another.
| data | Dataset to be joined. | 
| bool mltk::Data< T >::load | ( | const std::string & | file | ) | 
Load a dataset from a file.
| file | Path to dataset file. | 
| bool mltk::Data< T >::load | ( | const std::string & | file, | 
| bool | _atEnd | ||
| ) | 
Load a dataset from a file.
| file | Path to dataset file. | 
      
  | 
  inline | 
The name of the dataset is defined as the name ofthe file where it were loaded from.
| void mltk::Data< T >::normalize | ( | double | p = 2 | ) | 
normalize Normalize the dataset using a Lp-norm.
| p | Norm to be utilized. | 
| PointPointer< T > mltk::Data< T >::point | ( | int | index | ) | const | 
Returns a shared pointer to the point with the given index.
| index | Position of a point in the points array. | 
| std::vector< std::shared_ptr< Point< T > > > mltk::Data< T >::points | 
Returns a shared pointer to the vector of Points of the sample.
| std::vector< std::shared_ptr< Point< T > > > mltk::Data< T >::points | 
Returns a shared pointer to the vector of Points of the sample.
| bool mltk::Data< T >::removeFeatures | ( | std::vector< int > | feats | ) | 
Remove features from the dataset.
| feats | Features to be removed. | 
| Data< T > mltk::Data< T >::removeFeatures | ( | std::vector< int > | feats, | 
| int | fsize | ||
| ) | const | 
Remove several features from the sample.
| feats | Names of the features to be removed (must be sorted). | 
| bool mltk::Data< T >::removePoint | ( | int | pid | ) | 
Remove a point from the data.
| pid | Index of the point to be removed. | 
| std::vector< bool > mltk::Data< T >::removePoints | ( | std::vector< int > | ids | ) | 
Remove several points from the sample.
| ids | Ids of the points to be removed (must be sorted). | 
| void mltk::Data< T >::resetIndex | 
Reset the index vector.
| Data< T > mltk::Data< T >::sampling | ( | const size_t & | samp_size, | 
| bool | with_replacement = true,  | 
        ||
| const int & | seed = 0  | 
        ||
| ) | 
Sample the dataset with the given size.
| samp_size | Sampling size. | 
| with_replacement | Tells if sampling must be made with replacement. | 
| seed | Random generator seed. | 
| Data< T > mltk::Data< T >::selectFeatures | ( | std::vector< size_t > | feats, | 
| int | size = -1  | 
        ||
| ) | 
| void mltk::Data< T >::setClasses | ( | const std::vector< int > & | classes | ) | 
Set the classes to use in the dataset.
| classes | Vector of classes that will be in the dataset. | 
      
  | 
  inline | 
Inform if the classes of the dataset to be loaded are at begining or end of the attributes.
| atEnd | Boolean informing if classes are at end. | 
      
  | 
  inline | 
Set classes names.
| class_names | strings represeting the classes names. | 
| void mltk::Data< T >::setDim | ( | size_t | dim | ) | 
setDim Set the dimension of the points.
| dim | Dimension to be set. | 
| void mltk::Data< T >::setFeaturesNames | ( | const std::vector< int > & | fnames | ) | 
setFeaturesNames Set the name of the features of the data.
| fnames | Name of the features. | 
| void mltk::Data< T >::setIndex | ( | std::vector< int > | index | ) | 
Set the index vector for the data.
| index | Index vector. | 
      
  | 
  inline | 
Set the name of the dataset.
| name | Name of the dataset. | 
| void mltk::Data< T >::setPoint | ( | int | index, | 
| std::shared_ptr< Point< T > > | p | ||
| ) | 
setPoint Set the point in a position of the data.
| index | Index of the point that will be set. | 
| p | Point to be set. | 
| void mltk::Data< T >::shuffle | ( | const size_t & | seed = 42 | ) | 
Shuffle the data with a given seed.
| seed | Seed given for randomization | 
      
  | 
  inline | 
Returns the size of the dataset.
| std::vector< Data< T > > mltk::Data< T >::splitByClasses | ( | bool | keepIndex = false | ) | 
Split the dataset by its labels. Only valid for classification datasets.
| std::vector< Data< T > > mltk::Data< T >::splitSample | ( | const std::size_t & | split_size, | 
| bool | stratified = true,  | 
        ||
| bool | keepIndex = false,  | 
        ||
| size_t | seed = 0  | 
        ||
| ) | 
Split the data by a given size.
| split_size | Number of samples on each split. | 
| stratified | If true, the split will be done in a stratified manner. | 
| seed | Random generator seed. | 
| bool mltk::Data< T >::updatePointValue | ( | const size_t & | idx, | 
| double | value | ||
| ) | 
Updates a Point value.
| idx | Index of the point to be updated. | 
| value | New value of the point. | 
| void mltk::Data< T >::write | ( | const std::string & | fname, | 
| std::string | ext | ||
| ) | 
write Write the data to a file with the given extention.
| fname | Name of the file. | 
| ext | Extention of the file. |