ifiguero 2025-03-10 23:11:41 -03:00
parent 4df9e9595a
commit 69f6e976f0
1 changed files with 23 additions and 14 deletions

View File

@ -121,7 +121,7 @@ class eNoseTrainer:
return model
def train_and_score_model_keras(self, X_train, X_test, y_train, y_test, seed, label):
def train_and_score_model_keras(self, X_train, X_test, Y_train, Y_test, seed, label):
# set_random_seed(seed)
ntrials = 6
tuner = RandomSearch(
@ -140,7 +140,7 @@ class eNoseTrainer:
os.makedirs(search_dir, exist_ok=True)
search_callback = TensorBoard(log_dir=search_dir)
early_stopping_search = EarlyStopping(monitor='val_loss', patience=13, min_delta=0.005, start_from_epoch=7, restore_best_weights=True)
tuner.search(X_train, y_train, epochs=150, batch_size=10, validation_data=(X_test, y_test), callbacks=[early_stopping_search, search_callback])
tuner.search(X_train, Y_train, epochs=150, batch_size=10, validation_data=(X_test, Y_test), callbacks=[early_stopping_search, search_callback])
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
self.trained += 1
@ -172,23 +172,23 @@ class eNoseTrainer:
else:
return {}
def train_and_score_model(self, model, X_train, X_test, y_train, y_test):
def train_and_score_model(self, model, X_train, X_test, Y_train, Y_test):
param_dist = self.get_tunable_params(model)
cv = StratifiedShuffleSplit(n_splits=int(1/(2*self.ratio))+1, test_size=self.ratio, random_state=get_seed())
grid_search = GridSearchCV(estimator=model, param_grid=param_dist, scoring='neg_mean_squared_error', cv=cv, verbose=10, n_jobs=-1)
grid_search.fit(X_train, y_train)
grid_search.fit(X_train, Y_train)
optimized_model = grid_search.best_estimator_
model_params = grid_search.best_params_
y_aux = optimized_model.predict(X_train)
tmse = mean_squared_error(y_train, y_aux)
tmse = mean_squared_error(Y_train, y_aux)
y_pred = optimized_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(Y_test, y_pred)
mae = mean_absolute_error(Y_test, y_pred)
rmse = np.sqrt(mse)
return tmse, mse, mae, rmse, optimized_model, model_params
@ -205,9 +205,14 @@ class eNoseTrainer:
node = os.uname()[1]
X_xboost, Y_xboost, G_xboost = self.loader.load_dataset_xboost()
self.logger.debug(f"X_xboost: {X_xboost.shape}")
self.logger.debug(f"Y_xboost: {Y_xboost.shape}")
self.logger.debug(f"G_xboost: {G_xboost.shape}")
discretizer = KBinsDiscretizer(n_bins=50*Y_xboost.shape[1], encode='ordinal', strategy='uniform')
Y_discrete = discretizer.fit_transform(Y_xboost)
discretizer.fit(Y_xboost)
Y_discrete = discretizer.transform(Y_xboost)
self.logger.debug(f"Y_discrete: {Y_discrete.shape}")
gss = StratifiedGroupKFold(n_splits=int(1/self.ratio), shuffle=True, random_state=get_seed())
@ -215,8 +220,12 @@ class eNoseTrainer:
os.makedirs('{}/{}/{}'.format(self.name, self.target, dataset), exist_ok=True)
for i, (train_index, test_index) in enumerate(gss.split(X_xboost, Y_discrete, G_xboost)):
X_train, X_test = X_xboost[train_index], Y_xboost[test_index]
y_train, y_test = Y_xboost[train_index], Y_xboost[test_index]
X_train, X_test = X_xboost[train_index], X_xboost[test_index]
Y_train, Y_test = Y_xboost[train_index], Y_xboost[test_index]
self.logger.debug(f"X_train: {X_train.shape}")
self.logger.debug(f"X_test: {X_test.shape}")
self.logger.debug(f"Y_train: {Y_train.shape}")
self.logger.debug(f"Y_test: {Y_test.shape}")
for model in self.get_model_train():
@ -229,7 +238,7 @@ class eNoseTrainer:
model_file = '{}/{}/{}/{}'.format(self.name, self.target, dataset, model_id )
tmse, mse, mae, rmse, optimized_model, model_params = self.train_and_score_model(model, X_train, X_test, y_train, y_test)
tmse, mse, mae, rmse, optimized_model, model_params = self.train_and_score_model(model, X_train, X_test, Y_train, Y_test)
ts = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
joblib.dump(optimized_model, model_file)
@ -239,8 +248,8 @@ class eNoseTrainer:
"Dataset": dataset,
"Samples": Y_xboost.shape[0],
"Target": self.target,
"Train Size": y_train.shape[0],
"Train Ratio": y_train.shape[0]/Y_xboost.shape[0],
"Train Size": Y_train.shape[0],
"Train Ratio": Y_train.shape[0]/Y_xboost.shape[0],
"Ratio": self.ratio,
"Model": model_id,
"Params": json.dumps(model_params),
@ -258,7 +267,7 @@ class eNoseTrainer:
# model_file = '{}/{}/DNN_{}'.format(self.name, label, seed )
# model_label = "{}".format(label)
#
# accuracy, specificity, recall, f1, roc_auc, optimized_model, parms = self.train_and_score_model_keras(X_train, X_test, y_train, y_test, seed, model_label)
# accuracy, specificity, recall, f1, roc_auc, optimized_model, parms = self.train_and_score_model_keras(X_train, X_test, Y_train, Y_test, seed, model_label)
# ts = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
#
# newrow = pd.DataFrame( [{"node": node,