fix
parent
4df9e9595a
commit
69f6e976f0
|
@ -121,7 +121,7 @@ class eNoseTrainer:
|
|||
|
||||
return model
|
||||
|
||||
def train_and_score_model_keras(self, X_train, X_test, y_train, y_test, seed, label):
|
||||
def train_and_score_model_keras(self, X_train, X_test, Y_train, Y_test, seed, label):
|
||||
# set_random_seed(seed)
|
||||
ntrials = 6
|
||||
tuner = RandomSearch(
|
||||
|
@ -140,7 +140,7 @@ class eNoseTrainer:
|
|||
os.makedirs(search_dir, exist_ok=True)
|
||||
search_callback = TensorBoard(log_dir=search_dir)
|
||||
early_stopping_search = EarlyStopping(monitor='val_loss', patience=13, min_delta=0.005, start_from_epoch=7, restore_best_weights=True)
|
||||
tuner.search(X_train, y_train, epochs=150, batch_size=10, validation_data=(X_test, y_test), callbacks=[early_stopping_search, search_callback])
|
||||
tuner.search(X_train, Y_train, epochs=150, batch_size=10, validation_data=(X_test, Y_test), callbacks=[early_stopping_search, search_callback])
|
||||
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
|
||||
|
||||
self.trained += 1
|
||||
|
@ -172,23 +172,23 @@ class eNoseTrainer:
|
|||
else:
|
||||
return {}
|
||||
|
||||
def train_and_score_model(self, model, X_train, X_test, y_train, y_test):
|
||||
def train_and_score_model(self, model, X_train, X_test, Y_train, Y_test):
|
||||
param_dist = self.get_tunable_params(model)
|
||||
|
||||
cv = StratifiedShuffleSplit(n_splits=int(1/(2*self.ratio))+1, test_size=self.ratio, random_state=get_seed())
|
||||
grid_search = GridSearchCV(estimator=model, param_grid=param_dist, scoring='neg_mean_squared_error', cv=cv, verbose=10, n_jobs=-1)
|
||||
|
||||
grid_search.fit(X_train, y_train)
|
||||
grid_search.fit(X_train, Y_train)
|
||||
|
||||
optimized_model = grid_search.best_estimator_
|
||||
model_params = grid_search.best_params_
|
||||
|
||||
y_aux = optimized_model.predict(X_train)
|
||||
tmse = mean_squared_error(y_train, y_aux)
|
||||
tmse = mean_squared_error(Y_train, y_aux)
|
||||
|
||||
y_pred = optimized_model.predict(X_test)
|
||||
mse = mean_squared_error(y_test, y_pred)
|
||||
mae = mean_absolute_error(y_test, y_pred)
|
||||
mse = mean_squared_error(Y_test, y_pred)
|
||||
mae = mean_absolute_error(Y_test, y_pred)
|
||||
rmse = np.sqrt(mse)
|
||||
|
||||
return tmse, mse, mae, rmse, optimized_model, model_params
|
||||
|
@ -205,9 +205,14 @@ class eNoseTrainer:
|
|||
|
||||
node = os.uname()[1]
|
||||
X_xboost, Y_xboost, G_xboost = self.loader.load_dataset_xboost()
|
||||
self.logger.debug(f"X_xboost: {X_xboost.shape}")
|
||||
self.logger.debug(f"Y_xboost: {Y_xboost.shape}")
|
||||
self.logger.debug(f"G_xboost: {G_xboost.shape}")
|
||||
|
||||
discretizer = KBinsDiscretizer(n_bins=50*Y_xboost.shape[1], encode='ordinal', strategy='uniform')
|
||||
Y_discrete = discretizer.fit_transform(Y_xboost)
|
||||
discretizer.fit(Y_xboost)
|
||||
Y_discrete = discretizer.transform(Y_xboost)
|
||||
self.logger.debug(f"Y_discrete: {Y_discrete.shape}")
|
||||
|
||||
gss = StratifiedGroupKFold(n_splits=int(1/self.ratio), shuffle=True, random_state=get_seed())
|
||||
|
||||
|
@ -215,8 +220,12 @@ class eNoseTrainer:
|
|||
os.makedirs('{}/{}/{}'.format(self.name, self.target, dataset), exist_ok=True)
|
||||
|
||||
for i, (train_index, test_index) in enumerate(gss.split(X_xboost, Y_discrete, G_xboost)):
|
||||
X_train, X_test = X_xboost[train_index], Y_xboost[test_index]
|
||||
y_train, y_test = Y_xboost[train_index], Y_xboost[test_index]
|
||||
X_train, X_test = X_xboost[train_index], X_xboost[test_index]
|
||||
Y_train, Y_test = Y_xboost[train_index], Y_xboost[test_index]
|
||||
self.logger.debug(f"X_train: {X_train.shape}")
|
||||
self.logger.debug(f"X_test: {X_test.shape}")
|
||||
self.logger.debug(f"Y_train: {Y_train.shape}")
|
||||
self.logger.debug(f"Y_test: {Y_test.shape}")
|
||||
|
||||
|
||||
for model in self.get_model_train():
|
||||
|
@ -229,7 +238,7 @@ class eNoseTrainer:
|
|||
|
||||
model_file = '{}/{}/{}/{}'.format(self.name, self.target, dataset, model_id )
|
||||
|
||||
tmse, mse, mae, rmse, optimized_model, model_params = self.train_and_score_model(model, X_train, X_test, y_train, y_test)
|
||||
tmse, mse, mae, rmse, optimized_model, model_params = self.train_and_score_model(model, X_train, X_test, Y_train, Y_test)
|
||||
|
||||
ts = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
|
||||
joblib.dump(optimized_model, model_file)
|
||||
|
@ -239,8 +248,8 @@ class eNoseTrainer:
|
|||
"Dataset": dataset,
|
||||
"Samples": Y_xboost.shape[0],
|
||||
"Target": self.target,
|
||||
"Train Size": y_train.shape[0],
|
||||
"Train Ratio": y_train.shape[0]/Y_xboost.shape[0],
|
||||
"Train Size": Y_train.shape[0],
|
||||
"Train Ratio": Y_train.shape[0]/Y_xboost.shape[0],
|
||||
"Ratio": self.ratio,
|
||||
"Model": model_id,
|
||||
"Params": json.dumps(model_params),
|
||||
|
@ -258,7 +267,7 @@ class eNoseTrainer:
|
|||
# model_file = '{}/{}/DNN_{}'.format(self.name, label, seed )
|
||||
# model_label = "{}".format(label)
|
||||
#
|
||||
# accuracy, specificity, recall, f1, roc_auc, optimized_model, parms = self.train_and_score_model_keras(X_train, X_test, y_train, y_test, seed, model_label)
|
||||
# accuracy, specificity, recall, f1, roc_auc, optimized_model, parms = self.train_and_score_model_keras(X_train, X_test, Y_train, Y_test, seed, model_label)
|
||||
# ts = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
|
||||
#
|
||||
# newrow = pd.DataFrame( [{"node": node,
|
||||
|
|
Loading…
Reference in New Issue