mix
parent
69f6e976f0
commit
4b9cdf76ad
|
@ -194,7 +194,7 @@ class eNoseTrainer:
|
|||
return tmse, mse, mae, rmse, optimized_model, model_params
|
||||
|
||||
def fit(self):
|
||||
total_train_queue = int(1/self.ratio)*len(self.get_model_train())
|
||||
total_train_queue = 2*int(1/self.ratio)*len(self.get_model_train())
|
||||
self.logger.info("{:=^60}".format(f'Begin Fit {total_train_queue} Models'))
|
||||
self.trained = 0
|
||||
manager = enlighten.get_manager()
|
||||
|
@ -216,10 +216,10 @@ class eNoseTrainer:
|
|||
|
||||
gss = StratifiedGroupKFold(n_splits=int(1/self.ratio), shuffle=True, random_state=get_seed())
|
||||
|
||||
dataset = 'Tabular'
|
||||
os.makedirs('{}/{}/{}'.format(self.name, self.target, dataset), exist_ok=True)
|
||||
|
||||
for i, (train_index, test_index) in enumerate(gss.split(X_xboost, Y_discrete, G_xboost)):
|
||||
dataset = 'Tabular'
|
||||
os.makedirs('{}/{}/{}'.format(self.name, self.target, dataset), exist_ok=True)
|
||||
X_train, X_test = X_xboost[train_index], X_xboost[test_index]
|
||||
Y_train, Y_test = Y_xboost[train_index], Y_xboost[test_index]
|
||||
self.logger.debug(f"X_train: {X_train.shape}")
|
||||
|
@ -263,6 +263,46 @@ class eNoseTrainer:
|
|||
|
||||
self.saveCheckPoint()
|
||||
|
||||
dataset = 'Tabular-s3'
|
||||
os.makedirs('{}/{}/{}'.format(self.name, self.target, dataset), exist_ok=True)
|
||||
X_xboost_no_noise = np.convolve(X_xboost, [0.2, 0.6, 0.2], mode='same')
|
||||
X_train, X_test = X_xboost_no_noise[train_index], X_xboost_no_noise[test_index]
|
||||
|
||||
for model in self.get_model_train():
|
||||
model_id = "{}_{}".format(type(model).__name__, i)
|
||||
self.trained += 1
|
||||
|
||||
if self.row_exists(dataset, model_id):
|
||||
self.bar.update()
|
||||
continue
|
||||
|
||||
model_file = '{}/{}/{}/{}'.format(self.name, self.target, dataset, model_id )
|
||||
|
||||
tmse, mse, mae, rmse, optimized_model, model_params = self.train_and_score_model(model, X_train, X_test, Y_train, Y_test)
|
||||
|
||||
ts = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
|
||||
joblib.dump(optimized_model, model_file)
|
||||
|
||||
newrow = pd.DataFrame( [{"node": node,
|
||||
"ts": ts,
|
||||
"Dataset": dataset,
|
||||
"Samples": Y_xboost.shape[0],
|
||||
"Target": self.target,
|
||||
"Train Size": Y_train.shape[0],
|
||||
"Train Ratio": Y_train.shape[0]/Y_xboost.shape[0],
|
||||
"Ratio": self.ratio,
|
||||
"Model": model_id,
|
||||
"Params": json.dumps(model_params),
|
||||
"Train mse": tmse,
|
||||
"mse": mse,
|
||||
"mae": mae,
|
||||
"rmse": rmse
|
||||
}] )
|
||||
self.ledger = pd.concat([self.ledger, newrow], ignore_index=True)
|
||||
self.bar.update()
|
||||
|
||||
self.saveCheckPoint()
|
||||
|
||||
# if self.dnn:
|
||||
# model_file = '{}/{}/DNN_{}'.format(self.name, label, seed )
|
||||
# model_label = "{}".format(label)
|
||||
|
|
Loading…
Reference in New Issue