From 4b9cdf76adc055135d77837dcde813c8550d217f Mon Sep 17 00:00:00 2001 From: Israel Figueroa Date: Tue, 11 Mar 2025 02:14:25 -0300 Subject: [PATCH] mix --- TrainerClass.py | 46 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/TrainerClass.py b/TrainerClass.py index e5dfc28..1284bf0 100644 --- a/TrainerClass.py +++ b/TrainerClass.py @@ -194,7 +194,7 @@ class eNoseTrainer: return tmse, mse, mae, rmse, optimized_model, model_params def fit(self): - total_train_queue = int(1/self.ratio)*len(self.get_model_train()) + total_train_queue = 2*int(1/self.ratio)*len(self.get_model_train()) self.logger.info("{:=^60}".format(f'Begin Fit {total_train_queue} Models')) self.trained = 0 manager = enlighten.get_manager() @@ -216,10 +216,10 @@ class eNoseTrainer: gss = StratifiedGroupKFold(n_splits=int(1/self.ratio), shuffle=True, random_state=get_seed()) - dataset = 'Tabular' - os.makedirs('{}/{}/{}'.format(self.name, self.target, dataset), exist_ok=True) for i, (train_index, test_index) in enumerate(gss.split(X_xboost, Y_discrete, G_xboost)): + dataset = 'Tabular' + os.makedirs('{}/{}/{}'.format(self.name, self.target, dataset), exist_ok=True) X_train, X_test = X_xboost[train_index], X_xboost[test_index] Y_train, Y_test = Y_xboost[train_index], Y_xboost[test_index] self.logger.debug(f"X_train: {X_train.shape}") @@ -263,6 +263,46 @@ class eNoseTrainer: self.saveCheckPoint() + dataset = 'Tabular-s3' + os.makedirs('{}/{}/{}'.format(self.name, self.target, dataset), exist_ok=True) + X_xboost_no_noise = np.convolve(X_xboost, [0.2, 0.6, 0.2], mode='same') + X_train, X_test = X_xboost_no_noise[train_index], X_xboost_no_noise[test_index] + + for model in self.get_model_train(): + model_id = "{}_{}".format(type(model).__name__, i) + self.trained += 1 + + if self.row_exists(dataset, model_id): + self.bar.update() + continue + + model_file = '{}/{}/{}/{}'.format(self.name, self.target, dataset, model_id ) + + tmse, mse, mae, rmse, optimized_model, model_params = self.train_and_score_model(model, X_train, X_test, Y_train, Y_test) + + ts = datetime.now().strftime("%d/%m/%Y %H:%M:%S") + joblib.dump(optimized_model, model_file) + + newrow = pd.DataFrame( [{"node": node, + "ts": ts, + "Dataset": dataset, + "Samples": Y_xboost.shape[0], + "Target": self.target, + "Train Size": Y_train.shape[0], + "Train Ratio": Y_train.shape[0]/Y_xboost.shape[0], + "Ratio": self.ratio, + "Model": model_id, + "Params": json.dumps(model_params), + "Train mse": tmse, + "mse": mse, + "mae": mae, + "rmse": rmse + }] ) + self.ledger = pd.concat([self.ledger, newrow], ignore_index=True) + self.bar.update() + + self.saveCheckPoint() + # if self.dnn: # model_file = '{}/{}/DNN_{}'.format(self.name, label, seed ) # model_label = "{}".format(label)