ifiguero 2025-03-10 19:32:46 -03:00
parent 27cd228aea
commit c78c0c539b
1 changed files with 7 additions and 6 deletions

View File

@ -5,7 +5,7 @@ import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams['text.usetex'] = True
from sklearn.model_selection import StratifiedGroupKFold, StratifiedShuffleSplit, GridSearchCV
from sklearn.model_selection import StratifiedGroupKFold, KBinsDiscretizer, StratifiedShuffleSplit, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
@ -204,17 +204,18 @@ class eNoseTrainer:
node = os.uname()[1]
X_xboost, Y_xboost, G_xboost = self.loader.load_dataset_xboost()
target_scaler = MinMaxScaler()
Y_scaled = target_scaler.fit_transform(Y_xboost)
discretizer = KBinsDiscretizer(n_bins=50*Y_xboost.shape[1], encode='ordinal', strategy='uniform')
Y_discrete = discretizer.fit_transform(Y_xboost)
gss = StratifiedGroupKFold(n_splits=int(1/self.ratio), shuffle=True, random_state=get_seed())
dataset = 'Tabular'
os.makedirs('{}/{}/{}'.format(self.name, self.target, dataset), exist_ok=True)
for i, (train_index, test_index) in enumerate(gss.split(X_xboost, Y_xboost, G_xboost)):
X_train, X_test = X_xboost[train_index], Y_scaled[test_index]
y_train, y_test = Y_xboost[train_index], Y_scaled[test_index]
for i, (train_index, test_index) in enumerate(gss.split(X_xboost, Y_discrete, G_xboost)):
X_train, X_test = X_xboost[train_index], Y_xboost[test_index]
y_train, y_test = Y_xboost[train_index], Y_xboost[test_index]
for model in self.get_model_train():