diff --git a/TrainerClass.py b/TrainerClass.py index 8518bce..e5dfc28 100644 --- a/TrainerClass.py +++ b/TrainerClass.py @@ -121,7 +121,7 @@ class eNoseTrainer: return model - def train_and_score_model_keras(self, X_train, X_test, y_train, y_test, seed, label): + def train_and_score_model_keras(self, X_train, X_test, Y_train, Y_test, seed, label): # set_random_seed(seed) ntrials = 6 tuner = RandomSearch( @@ -140,7 +140,7 @@ class eNoseTrainer: os.makedirs(search_dir, exist_ok=True) search_callback = TensorBoard(log_dir=search_dir) early_stopping_search = EarlyStopping(monitor='val_loss', patience=13, min_delta=0.005, start_from_epoch=7, restore_best_weights=True) - tuner.search(X_train, y_train, epochs=150, batch_size=10, validation_data=(X_test, y_test), callbacks=[early_stopping_search, search_callback]) + tuner.search(X_train, Y_train, epochs=150, batch_size=10, validation_data=(X_test, Y_test), callbacks=[early_stopping_search, search_callback]) best_hps = tuner.get_best_hyperparameters(num_trials=1)[0] self.trained += 1 @@ -172,23 +172,23 @@ class eNoseTrainer: else: return {} - def train_and_score_model(self, model, X_train, X_test, y_train, y_test): + def train_and_score_model(self, model, X_train, X_test, Y_train, Y_test): param_dist = self.get_tunable_params(model) cv = StratifiedShuffleSplit(n_splits=int(1/(2*self.ratio))+1, test_size=self.ratio, random_state=get_seed()) grid_search = GridSearchCV(estimator=model, param_grid=param_dist, scoring='neg_mean_squared_error', cv=cv, verbose=10, n_jobs=-1) - grid_search.fit(X_train, y_train) + grid_search.fit(X_train, Y_train) optimized_model = grid_search.best_estimator_ model_params = grid_search.best_params_ y_aux = optimized_model.predict(X_train) - tmse = mean_squared_error(y_train, y_aux) + tmse = mean_squared_error(Y_train, y_aux) y_pred = optimized_model.predict(X_test) - mse = mean_squared_error(y_test, y_pred) - mae = mean_absolute_error(y_test, y_pred) + mse = mean_squared_error(Y_test, y_pred) + mae = mean_absolute_error(Y_test, y_pred) rmse = np.sqrt(mse) return tmse, mse, mae, rmse, optimized_model, model_params @@ -205,9 +205,14 @@ class eNoseTrainer: node = os.uname()[1] X_xboost, Y_xboost, G_xboost = self.loader.load_dataset_xboost() + self.logger.debug(f"X_xboost: {X_xboost.shape}") + self.logger.debug(f"Y_xboost: {Y_xboost.shape}") + self.logger.debug(f"G_xboost: {G_xboost.shape}") discretizer = KBinsDiscretizer(n_bins=50*Y_xboost.shape[1], encode='ordinal', strategy='uniform') - Y_discrete = discretizer.fit_transform(Y_xboost) + discretizer.fit(Y_xboost) + Y_discrete = discretizer.transform(Y_xboost) + self.logger.debug(f"Y_discrete: {Y_discrete.shape}") gss = StratifiedGroupKFold(n_splits=int(1/self.ratio), shuffle=True, random_state=get_seed()) @@ -215,8 +220,12 @@ class eNoseTrainer: os.makedirs('{}/{}/{}'.format(self.name, self.target, dataset), exist_ok=True) for i, (train_index, test_index) in enumerate(gss.split(X_xboost, Y_discrete, G_xboost)): - X_train, X_test = X_xboost[train_index], Y_xboost[test_index] - y_train, y_test = Y_xboost[train_index], Y_xboost[test_index] + X_train, X_test = X_xboost[train_index], X_xboost[test_index] + Y_train, Y_test = Y_xboost[train_index], Y_xboost[test_index] + self.logger.debug(f"X_train: {X_train.shape}") + self.logger.debug(f"X_test: {X_test.shape}") + self.logger.debug(f"Y_train: {Y_train.shape}") + self.logger.debug(f"Y_test: {Y_test.shape}") for model in self.get_model_train(): @@ -229,7 +238,7 @@ class eNoseTrainer: model_file = '{}/{}/{}/{}'.format(self.name, self.target, dataset, model_id ) - tmse, mse, mae, rmse, optimized_model, model_params = self.train_and_score_model(model, X_train, X_test, y_train, y_test) + tmse, mse, mae, rmse, optimized_model, model_params = self.train_and_score_model(model, X_train, X_test, Y_train, Y_test) ts = datetime.now().strftime("%d/%m/%Y %H:%M:%S") joblib.dump(optimized_model, model_file) @@ -239,8 +248,8 @@ class eNoseTrainer: "Dataset": dataset, "Samples": Y_xboost.shape[0], "Target": self.target, - "Train Size": y_train.shape[0], - "Train Ratio": y_train.shape[0]/Y_xboost.shape[0], + "Train Size": Y_train.shape[0], + "Train Ratio": Y_train.shape[0]/Y_xboost.shape[0], "Ratio": self.ratio, "Model": model_id, "Params": json.dumps(model_params), @@ -258,7 +267,7 @@ class eNoseTrainer: # model_file = '{}/{}/DNN_{}'.format(self.name, label, seed ) # model_label = "{}".format(label) # - # accuracy, specificity, recall, f1, roc_auc, optimized_model, parms = self.train_and_score_model_keras(X_train, X_test, y_train, y_test, seed, model_label) + # accuracy, specificity, recall, f1, roc_auc, optimized_model, parms = self.train_and_score_model_keras(X_train, X_test, Y_train, Y_test, seed, model_label) # ts = datetime.now().strftime("%d/%m/%Y %H:%M:%S") # # newrow = pd.DataFrame( [{"node": node,