ifiguero 2025-03-11 02:14:25 -03:00
parent 69f6e976f0
commit 4b9cdf76ad
1 changed files with 43 additions and 3 deletions

View File

@ -194,7 +194,7 @@ class eNoseTrainer:
return tmse, mse, mae, rmse, optimized_model, model_params
def fit(self):
total_train_queue = int(1/self.ratio)*len(self.get_model_train())
total_train_queue = 2*int(1/self.ratio)*len(self.get_model_train())
self.logger.info("{:=^60}".format(f'Begin Fit {total_train_queue} Models'))
self.trained = 0
manager = enlighten.get_manager()
@ -216,10 +216,10 @@ class eNoseTrainer:
gss = StratifiedGroupKFold(n_splits=int(1/self.ratio), shuffle=True, random_state=get_seed())
dataset = 'Tabular'
os.makedirs('{}/{}/{}'.format(self.name, self.target, dataset), exist_ok=True)
for i, (train_index, test_index) in enumerate(gss.split(X_xboost, Y_discrete, G_xboost)):
dataset = 'Tabular'
os.makedirs('{}/{}/{}'.format(self.name, self.target, dataset), exist_ok=True)
X_train, X_test = X_xboost[train_index], X_xboost[test_index]
Y_train, Y_test = Y_xboost[train_index], Y_xboost[test_index]
self.logger.debug(f"X_train: {X_train.shape}")
@ -263,6 +263,46 @@ class eNoseTrainer:
self.saveCheckPoint()
dataset = 'Tabular-s3'
os.makedirs('{}/{}/{}'.format(self.name, self.target, dataset), exist_ok=True)
X_xboost_no_noise = np.convolve(X_xboost, [0.2, 0.6, 0.2], mode='same')
X_train, X_test = X_xboost_no_noise[train_index], X_xboost_no_noise[test_index]
for model in self.get_model_train():
model_id = "{}_{}".format(type(model).__name__, i)
self.trained += 1
if self.row_exists(dataset, model_id):
self.bar.update()
continue
model_file = '{}/{}/{}/{}'.format(self.name, self.target, dataset, model_id )
tmse, mse, mae, rmse, optimized_model, model_params = self.train_and_score_model(model, X_train, X_test, Y_train, Y_test)
ts = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
joblib.dump(optimized_model, model_file)
newrow = pd.DataFrame( [{"node": node,
"ts": ts,
"Dataset": dataset,
"Samples": Y_xboost.shape[0],
"Target": self.target,
"Train Size": Y_train.shape[0],
"Train Ratio": Y_train.shape[0]/Y_xboost.shape[0],
"Ratio": self.ratio,
"Model": model_id,
"Params": json.dumps(model_params),
"Train mse": tmse,
"mse": mse,
"mae": mae,
"rmse": rmse
}] )
self.ledger = pd.concat([self.ledger, newrow], ignore_index=True)
self.bar.update()
self.saveCheckPoint()
# if self.dnn:
# model_file = '{}/{}/DNN_{}'.format(self.name, label, seed )
# model_label = "{}".format(label)