Source code for olpy.datasets.dataset

import numpy as np 
import pandas as pd


[docs]class Dataset:
    """A helper class to load various datasets.

    Properties created with the ``@property`` decorator should be documented
    in the property's getter method.

    Attributes:
        train_data (:obj:`pandas.DataFrame`): The training data without
            the labels.
        train_target (:obj:`numpy.ndarray`): The output variable for the
            training data.
        test_data (:obj:`pandas.DataFrame`): The testing data without 
            the labels.
        test_target (:obj:`numpy..ndarray`): The output variable for the
            test data.

    Args:
        f_train (str): The path to the file containing the training
            dataset.
        f_test (str): The path to the file containing the testing
            dataset.
        label (str, optional): The column in which the target variable
            is located in the files. Defaults to `Label`.

    Raises:
        FileNotFoundError: if the supplied files are inexistent.
        IndexError: if the label provided does not match any column in
            the file.

    """

    def __init__(self, f_train, f_test, label='Label'):
        # Load the datasets
        self.train_data = pd.read_csv(f_train)
        self.test_data = pd.read_csv(f_test)

        # Retrieve the relevant parts
        self.train_target = self.train_data[label].to_numpy()
        self.train_data = self.train_data.drop(columns=[label]).to_numpy()
        self.test_target = self.test_data[label].to_numpy()
        self.test_data = self.test_data.drop(columns=[label]).to_numpy()