2025-03-09 04:22:15 -03:00
|
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
|
|
|
import tensorflow as tf
|
2025-03-13 17:51:13 -03:00
|
|
|
import matplotlib.cm as cm
|
2025-03-09 04:22:15 -03:00
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
import matplotlib
|
|
|
|
matplotlib.rcParams['text.usetex'] = True
|
|
|
|
|
2025-03-10 19:34:13 -03:00
|
|
|
from sklearn.preprocessing import KBinsDiscretizer
|
2025-03-16 19:41:52 -03:00
|
|
|
from sklearn.model_selection import StratifiedGroupKFold, StratifiedShuffleSplit, GridSearchCV, train_test_split
|
2025-03-09 04:22:15 -03:00
|
|
|
from sklearn.metrics import mean_squared_error, mean_absolute_error
|
2025-03-10 19:16:11 -03:00
|
|
|
from sklearn.preprocessing import MinMaxScaler
|
2025-03-09 04:22:15 -03:00
|
|
|
|
|
|
|
from xgboost import XGBRegressor
|
|
|
|
|
2025-03-16 04:09:55 -03:00
|
|
|
import keras
|
|
|
|
from keras import layers
|
2025-03-17 15:23:34 -03:00
|
|
|
import optuna
|
2025-03-16 04:09:55 -03:00
|
|
|
|
|
|
|
|
2025-03-09 04:22:15 -03:00
|
|
|
# from ray import tune
|
|
|
|
# import ray
|
|
|
|
# from keras.callbacks import TensorBoard
|
|
|
|
# from keras.models import Sequential
|
|
|
|
# from keras.callbacks import EarlyStopping
|
|
|
|
# from keras.layers import Dense, BatchNormalization, Dropout
|
|
|
|
# from kerastuner.tuners import RandomSearch, Hyperband, GridSearch
|
|
|
|
|
|
|
|
from datetime import datetime
|
|
|
|
import enlighten
|
|
|
|
import logging
|
|
|
|
import zipfile
|
|
|
|
import random
|
|
|
|
import joblib
|
|
|
|
import pickle
|
|
|
|
import time
|
|
|
|
import json
|
|
|
|
import os
|
|
|
|
|
|
|
|
|
2025-03-16 19:41:52 -03:00
|
|
|
|
2025-03-09 04:22:15 -03:00
|
|
|
def get_seed():
|
|
|
|
return random.randint(0, 2**32 - 1)
|
|
|
|
|
|
|
|
class eNoseTrainer:
|
2025-03-13 17:51:13 -03:00
|
|
|
def __init__(self, loader, test_size=0.2, debug=False):
|
2025-03-18 22:37:06 -03:00
|
|
|
self.ledger = pd.DataFrame(columns=["node", "ts", "Dataset", "Samples", "Target", "Train Size", "Train Ratio", "Model", "Params", "Ratio", "Train mse", "mse", "mae", "rmse", "num_params"])
|
2025-03-09 04:22:15 -03:00
|
|
|
self.loader = loader
|
|
|
|
self.name = self.loader.label_file
|
|
|
|
self.state = dict()
|
|
|
|
|
|
|
|
os.makedirs(self.name, exist_ok=True)
|
|
|
|
self.start = int(time.time())
|
|
|
|
|
|
|
|
log_format = '%(asctime)s | %(levelname)-8s | %(name)-15s | %(message)s'
|
|
|
|
date_format = '%Y-%m-%d %H:%M:%S'
|
|
|
|
logging.basicConfig(format=log_format, datefmt=date_format)
|
|
|
|
|
|
|
|
target_log = '{}/load-{}.log'.format(self.name, self.start)
|
|
|
|
fh = logging.FileHandler(target_log)
|
2025-03-18 01:30:46 -03:00
|
|
|
optuna.logging.enable_propagation() # Propagate logs to the root logger.
|
|
|
|
optuna.logging.disable_default_handler() # Stop showing logs in sys.stderr.
|
2025-03-09 04:22:15 -03:00
|
|
|
|
|
|
|
self.debug = debug
|
|
|
|
|
|
|
|
self.logger = logging.getLogger("eNoseTrainer")
|
|
|
|
if self.debug:
|
|
|
|
self.logger.setLevel(logging.DEBUG)
|
|
|
|
fh.setLevel(logging.DEBUG)
|
|
|
|
else:
|
|
|
|
self.logger.setLevel(logging.INFO)
|
|
|
|
fh.setLevel(logging.INFO)
|
|
|
|
self.logger.addHandler(fh)
|
|
|
|
|
|
|
|
self.ratio = test_size
|
|
|
|
|
|
|
|
self.loader.stats()
|
|
|
|
self.loadCheckPoint()
|
|
|
|
|
|
|
|
def loadCheckPoint(self):
|
|
|
|
if not os.path.isfile('{}/Simulaciones.xlsx'.format(self.name)):
|
|
|
|
self.saveCheckPoint()
|
|
|
|
|
|
|
|
with pd.ExcelFile('{}/Simulaciones.xlsx'.format(self.name)) as xls:
|
|
|
|
self.ledger = pd.read_excel(xls, sheet_name='Historial')
|
|
|
|
self.trained = self.ledger.shape[0]
|
|
|
|
|
2025-03-13 17:51:13 -03:00
|
|
|
# with open('{}/vars.pickle'.format(self.name), 'rb') as pfile:
|
|
|
|
# self.ratio, self.state = pickle.load(pfile)
|
2025-03-09 04:22:15 -03:00
|
|
|
|
|
|
|
def saveCheckPoint(self):
|
|
|
|
with pd.ExcelWriter('{}/Simulaciones.xlsx'.format(self.name), engine='xlsxwriter') as xls:
|
|
|
|
self.ledger.to_excel(xls, sheet_name='Historial', index=False)
|
|
|
|
|
2025-03-13 17:51:13 -03:00
|
|
|
# with open('{}/vars.pickle'.format(self.name), 'wb') as pfile:
|
|
|
|
# pickle.dump((self.ratio, self.state), pfile, protocol=pickle.HIGHEST_PROTOCOL)
|
2025-03-09 04:22:15 -03:00
|
|
|
|
|
|
|
self.trained = self.ledger.shape[0]
|
|
|
|
|
|
|
|
def wrap_and_save(self):
|
|
|
|
self.logger.info("{:=^60}".format(' Saving Summary and Wrap the output in a ZipFile '))
|
2025-03-17 16:06:17 -03:00
|
|
|
with zipfile.ZipFile('{}-{}.zip'.format(self.name, self.start), 'w', zipfile.ZIP_LZMA) as zipf:
|
2025-03-09 04:22:15 -03:00
|
|
|
for root, dirs, files in os.walk(self.name):
|
|
|
|
for file in files:
|
|
|
|
zipf.write(os.path.join(root, file))
|
|
|
|
|
|
|
|
def row_exists(self, dataset, model):
|
2025-03-13 17:51:13 -03:00
|
|
|
search_result = self.ledger[(self.ledger["Dataset"]==dataset) & (self.ledger["Target"]==self.loader.target) & (self.ledger["Model"]==model) & (self.ledger["Ratio"]==self.ratio)].shape[0] > 0
|
|
|
|
self.logger.debug(f'Looking for {dataset}, {model}, {self.loader.target}, {self.ratio} => {search_result} {self.ledger.shape}')
|
|
|
|
return search_result
|
2025-03-09 04:22:15 -03:00
|
|
|
|
2025-03-16 19:41:52 -03:00
|
|
|
def get_model_train(self):
|
|
|
|
return [
|
|
|
|
XGBRegressor(objective='reg:squarederror'),
|
|
|
|
]
|
2025-03-16 04:09:55 -03:00
|
|
|
|
2025-03-16 19:41:52 -03:00
|
|
|
def get_tunable_params(self, model):
|
|
|
|
if isinstance(model, XGBRegressor):
|
|
|
|
return {
|
|
|
|
'tree_method': ["hist"],
|
|
|
|
"n_estimators": [100, 128, 150],
|
|
|
|
'max_depth': [6, 7, 8],
|
|
|
|
'subsample': [0.5, 0.6, 0.7],
|
|
|
|
'multi_strategy': ['one_output_per_tree', 'multi_output_tree']
|
|
|
|
}
|
|
|
|
elif isinstance(model, RandomForestClassifier):
|
|
|
|
return {
|
|
|
|
"n_estimators": [50, 100, 200],
|
|
|
|
"max_depth": [5, 10, 15],
|
|
|
|
"max_features": [2, 5, 10] #['n', 'max_depth', 'max_features', 'max_leaf_nodes', 'max_samples', 'min_impurity_decrease', 'min_samples_leaf', 'min_samples_split', 'min_weight_fraction_leaf', 'monotonic_cst', 'n_estimators', 'n_jobs', 'oob_score', 'random_state', 'verbose', 'warm_start']
|
|
|
|
}
|
|
|
|
else:
|
|
|
|
return {}
|
|
|
|
|
2025-03-17 15:23:34 -03:00
|
|
|
def search_best_conv1D_v1(self, X_train, X_test, Y_train, Y_test, epochs=50, num_trials=100):
|
|
|
|
# Stratified sampling
|
|
|
|
|
|
|
|
input_shape = X_train.shape[1:]
|
|
|
|
output_dim = Y_train.shape[1]
|
|
|
|
|
|
|
|
def build_model(trial):
|
|
|
|
"""Builds a Keras model using hyperparameters suggested by Optuna"""
|
|
|
|
filters = trial.suggest_categorical('filters', [32, 64, 128])
|
|
|
|
kernel_l1 = trial.suggest_categorical('kernel_size', [3, 5, 7])
|
|
|
|
# kernel_l2 = trial.suggest_categorical('kernel_size', [3, 5, 7])
|
|
|
|
pool_size = trial.suggest_int('pool_size', 2, min(3, input_shape[0] - 1))
|
|
|
|
dense_units = trial.suggest_categorical('dense_units', [32, 64, 128])
|
|
|
|
dropout = trial.suggest_float('dropout', 0.1, 0.3)
|
|
|
|
lr = trial.suggest_loguniform('lr', 1e-4, 5e-3)
|
|
|
|
batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128])
|
|
|
|
|
|
|
|
inputs = keras.Input(shape=input_shape)
|
|
|
|
x = layers.Conv1D(filters=filters, kernel_size=kernel_l1, activation='relu', strides=kernel_l1//2, padding='causal')(inputs)
|
|
|
|
x = layers.MaxPooling1D(pool_size=pool_size)(x)
|
|
|
|
# x = layers.Conv1D(filters=filters * 2, kernel_size=kernel_l2, activation='relu', strides=kernel_l2//2, padding='causal')(x)
|
|
|
|
# x = layers.MaxPooling1D(pool_size=pool_size)(x)
|
|
|
|
x = layers.Flatten()(x)
|
|
|
|
x = layers.Dense(dense_units, activation='relu')(x)
|
|
|
|
x = layers.Dropout(dropout)(x)
|
|
|
|
outputs = layers.Dense(output_dim)(x)
|
|
|
|
|
|
|
|
model = keras.Model(inputs, outputs)
|
|
|
|
model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr), loss='mse')
|
|
|
|
return model, batch_size
|
|
|
|
|
|
|
|
def objective(trial):
|
|
|
|
"""Objective function for Optuna hyperparameter optimization"""
|
|
|
|
model, batch_size = build_model(trial)
|
2025-03-16 23:22:18 -03:00
|
|
|
# early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
|
2025-03-16 04:09:55 -03:00
|
|
|
|
|
|
|
model.fit(
|
2025-03-17 15:23:34 -03:00
|
|
|
X_train, Y_train,
|
|
|
|
validation_data=(X_test, Y_test),
|
|
|
|
epochs=epochs,
|
|
|
|
batch_size=batch_size,
|
|
|
|
verbose=0#,
|
2025-03-16 23:22:18 -03:00
|
|
|
# callbacks=[early_stopping]
|
2025-03-16 04:09:55 -03:00
|
|
|
)
|
|
|
|
|
2025-03-17 15:23:34 -03:00
|
|
|
Y_pred = model.predict(X_test)
|
|
|
|
mse = mean_squared_error(Y_test, Y_pred)
|
2025-03-18 22:37:06 -03:00
|
|
|
# num_params = model.count_params() # Get number of weights in the model
|
|
|
|
# trial.set_user_attr("num_params", num_params) # Store it in the trial object
|
2025-03-16 04:09:55 -03:00
|
|
|
|
2025-03-17 15:23:34 -03:00
|
|
|
return mse
|
2025-03-16 22:24:57 -03:00
|
|
|
|
2025-03-17 15:23:34 -03:00
|
|
|
# Run hyperparameter tuning
|
|
|
|
study = optuna.create_study(direction='minimize')
|
|
|
|
study.optimize(objective, n_trials=num_trials)
|
2025-03-16 04:09:55 -03:00
|
|
|
|
2025-03-17 15:23:34 -03:00
|
|
|
# Get best hyperparameters
|
|
|
|
best_params = study.best_params
|
2025-03-16 04:09:55 -03:00
|
|
|
|
2025-03-17 15:23:34 -03:00
|
|
|
# Train final model with best parameters
|
|
|
|
best_model, best_batch_size = build_model(optuna.trial.FixedTrial(best_params))
|
2025-03-09 04:22:15 -03:00
|
|
|
|
2025-03-17 15:23:34 -03:00
|
|
|
|
|
|
|
return best_model, study, best_batch_size
|
2025-03-09 04:22:15 -03:00
|
|
|
|
2025-03-10 23:11:41 -03:00
|
|
|
def train_and_score_model(self, model, X_train, X_test, Y_train, Y_test):
|
2025-03-09 04:22:15 -03:00
|
|
|
param_dist = self.get_tunable_params(model)
|
|
|
|
|
2025-03-10 18:49:22 -03:00
|
|
|
cv = StratifiedShuffleSplit(n_splits=int(1/(2*self.ratio))+1, test_size=self.ratio, random_state=get_seed())
|
2025-03-13 23:17:57 -03:00
|
|
|
grid_search = GridSearchCV(estimator=model, param_grid=param_dist, scoring='neg_mean_squared_error', cv=cv, verbose=2, n_jobs=-1)
|
2025-03-09 04:22:15 -03:00
|
|
|
|
2025-03-10 23:11:41 -03:00
|
|
|
grid_search.fit(X_train, Y_train)
|
2025-03-09 04:22:15 -03:00
|
|
|
|
|
|
|
optimized_model = grid_search.best_estimator_
|
|
|
|
model_params = grid_search.best_params_
|
|
|
|
|
|
|
|
y_aux = optimized_model.predict(X_train)
|
2025-03-10 23:11:41 -03:00
|
|
|
tmse = mean_squared_error(Y_train, y_aux)
|
2025-03-09 04:22:15 -03:00
|
|
|
|
|
|
|
y_pred = optimized_model.predict(X_test)
|
2025-03-10 23:11:41 -03:00
|
|
|
mse = mean_squared_error(Y_test, y_pred)
|
|
|
|
mae = mean_absolute_error(Y_test, y_pred)
|
2025-03-09 04:22:15 -03:00
|
|
|
rmse = np.sqrt(mse)
|
|
|
|
|
|
|
|
return tmse, mse, mae, rmse, optimized_model, model_params
|
|
|
|
|
2025-03-13 17:51:13 -03:00
|
|
|
def gen_plots(self, dataset, model_id, target=None):
|
2025-04-14 13:01:39 -04:00
|
|
|
import re
|
2025-03-13 17:51:13 -03:00
|
|
|
if isinstance(target, list):
|
|
|
|
self.loader.target_list=target
|
|
|
|
if isinstance(target, str):
|
|
|
|
self.loader.target_list= list(target)
|
|
|
|
|
|
|
|
if dataset.endswith("-conv3"):
|
|
|
|
self.loader.smooth = 'conv3'
|
|
|
|
else:
|
|
|
|
self.loader.smooth = None
|
|
|
|
|
|
|
|
self.loader.reset()
|
2025-04-14 13:01:39 -04:00
|
|
|
|
|
|
|
if dataset.startswith("Conv1D"):
|
|
|
|
width = int(m.group(1)) if (m := re.search(r'w(\d+)', dataset)) else 1
|
|
|
|
self.logger.debug(f'Conv1D: {dataset} of width {width}')
|
|
|
|
y_padding = np.zeros((width-1, self.loader.target_len))
|
|
|
|
elif dataset.startswith("Tabular"):
|
|
|
|
self.logger.debug(f'Tabular: {dataset}')
|
|
|
|
width = 1
|
|
|
|
else:
|
|
|
|
self.logger.error(f'Tipo de dataset desconocido {dataset}')
|
|
|
|
return
|
|
|
|
|
2025-03-13 17:51:13 -03:00
|
|
|
if not self.row_exists(dataset, model_id):
|
|
|
|
self.logger.error(f'No se encuentra la simulacion {dataset}, {model_id}')
|
|
|
|
return
|
|
|
|
|
|
|
|
model_file = '{}/{}/{}/{}'.format(self.name, self.loader.target, dataset, model_id )
|
|
|
|
|
2025-04-14 13:01:39 -04:00
|
|
|
if width > 1:
|
|
|
|
if not os.path.isfile(f'{model_file}.keras'):
|
|
|
|
self.logger.debug(f'{model_file}')
|
|
|
|
self.logger.error('No se encuentra el modelo')
|
|
|
|
return
|
|
|
|
trained_model = keras.models.load_model(f"{model_file}.keras")
|
|
|
|
trained_model.load_weights(f"{model_file}.weights.h5")
|
|
|
|
else:
|
|
|
|
if not os.path.isfile(model_file):
|
|
|
|
self.logger.debug(f'{model_file}')
|
|
|
|
self.logger.error('No se encuentra el modelo')
|
|
|
|
return
|
|
|
|
trained_model = joblib.load(model_file)
|
|
|
|
|
2025-03-13 17:51:13 -03:00
|
|
|
|
|
|
|
pics_folder = '{}/{}/{}/plots'.format(self.name, self.loader.target, dataset)
|
|
|
|
os.makedirs(pics_folder, exist_ok=True)
|
|
|
|
|
|
|
|
df = self.loader.scaled_data
|
|
|
|
|
|
|
|
Y_samples = np.zeros((len(df), len(self.loader.target_list)))
|
|
|
|
for i, sample in enumerate(df):
|
|
|
|
Y_samples[i] = np.array([[df[sample]['label'][key] for key in self.loader.target_list]])
|
|
|
|
|
|
|
|
self.logger.debug(f"Y_samples.shape: {Y_samples.shape}")
|
|
|
|
|
|
|
|
target_scaler = MinMaxScaler()
|
|
|
|
Y_samples = target_scaler.fit_transform(Y_samples)
|
|
|
|
|
2025-03-16 04:09:55 -03:00
|
|
|
cmapx = cm.get_cmap('winter', len(self.loader.source_channels))
|
2025-03-13 17:51:13 -03:00
|
|
|
cmapy = cm.get_cmap('prism', Y_samples.shape[1])
|
|
|
|
|
|
|
|
for measurament, (r, l) in self.loader.dataset['range'].items():
|
|
|
|
# df[measurament]['data'].plot(figsize=(12, 6), title=f"{measurament} Prediction")
|
|
|
|
plt.figure(figsize=(12, 6))
|
2025-04-14 13:01:39 -04:00
|
|
|
|
|
|
|
if dataset.startswith('Conv1D'):
|
|
|
|
label = dataset.split('-')
|
|
|
|
model_label = '-'.join(label[:2])
|
|
|
|
else:
|
|
|
|
model_label = "XGBRegressor"
|
|
|
|
|
|
|
|
if self.loader.smooth is not None:
|
|
|
|
model_label += " + denoise"
|
|
|
|
|
|
|
|
plt.title(f"[{model_label}] Sample {measurament}")
|
|
|
|
|
|
|
|
plt.xlabel("Sensor Array Readings")
|
2025-03-13 17:51:13 -03:00
|
|
|
|
2025-03-16 04:09:55 -03:00
|
|
|
plt.vlines(x=r, ymin=0, ymax=1, colors='blue', linestyle='dashed')
|
|
|
|
plt.vlines(x=l, ymin=0, ymax=1, colors='blue', linestyle='dashed')
|
2025-03-13 17:51:13 -03:00
|
|
|
|
|
|
|
|
|
|
|
Y_value = np.zeros((1, len(self.loader.target_list)))
|
|
|
|
Y_value[0] = np.array([[df[measurament]['label'][key] for key in self.loader.target_list]])
|
|
|
|
|
|
|
|
self.logger.debug(f"Y_value.shape: {Y_value.shape}")
|
|
|
|
self.logger.debug(f"Y_value: {Y_value}")
|
|
|
|
|
|
|
|
Y_scaled = target_scaler.transform(Y_value).reshape(1, -1)
|
|
|
|
self.logger.debug(f"Y_scaled.shape: {Y_scaled.shape}")
|
|
|
|
self.logger.debug(f"Y_scaled: {Y_scaled}")
|
|
|
|
|
2025-04-14 13:01:39 -04:00
|
|
|
if width > 1:
|
|
|
|
plt.vlines(x=r+width, ymin=0, ymax=1, colors='cyan', linestyle='dashed')
|
|
|
|
|
|
|
|
X_data = self.loader.scaled_data[measurament]['data']
|
|
|
|
total_samples = X_data.shape[0] - width + 1
|
|
|
|
x_samples = np.zeros((total_samples, width, self.loader.data_channels))
|
|
|
|
|
|
|
|
for i in range(total_samples):
|
|
|
|
x_samples[i] = X_data.iloc[i:i + width].values
|
|
|
|
|
|
|
|
y_pred_w = trained_model.predict(x_samples)
|
|
|
|
self.logger.debug(f"y_pred_w.shape: {y_pred_w.shape}")
|
|
|
|
self.logger.debug(f"y_padding.shape: {y_padding.shape}")
|
|
|
|
self.logger.debug(f"X_data.shape: {X_data.shape}")
|
|
|
|
|
|
|
|
y_pred = np.concatenate((y_padding, y_pred_w))
|
|
|
|
self.logger.debug(f"y_pred.shape: {y_pred.shape}")
|
|
|
|
else:
|
|
|
|
y_pred = trained_model.predict(df[measurament]['data'].to_numpy())
|
|
|
|
self.logger.debug(f"y_pred.shape: {y_pred.shape}")
|
|
|
|
|
2025-03-16 04:09:55 -03:00
|
|
|
# self.logger.debug(f"y_pred: {Y_scaled}")
|
2025-03-13 17:51:13 -03:00
|
|
|
|
|
|
|
if y_pred.ndim == 2:
|
2025-03-16 04:09:55 -03:00
|
|
|
plt.ylabel("Target dashed / Pred solid")
|
|
|
|
for i, channel_name in enumerate(df[measurament]['data'].columns):
|
|
|
|
plt.plot(df[measurament]['data'][channel_name], linestyle = 'dotted', color=cmapx(i), alpha=0.2)
|
|
|
|
for i in range(y_pred.shape[1]):
|
|
|
|
self.logger.debug(f"Y_scaled[0][i]: {Y_scaled[0][i]}")
|
|
|
|
plt.axhline(y=Y_scaled[0][i], xmin=0, xmax=df[measurament]['data'].shape[0], color=cmapy(i), linestyle='dashed')
|
2025-03-13 17:51:13 -03:00
|
|
|
plt.plot(y_pred[:, i], color=cmapy(i), linestyle='solid')
|
|
|
|
else:
|
2025-03-16 04:09:55 -03:00
|
|
|
plt.ylabel("Samples dotted / Target dashed / Pred solid")
|
|
|
|
for i, channel_name in enumerate(df[measurament]['data'].columns):
|
|
|
|
plt.plot(df[measurament]['data'][channel_name], linestyle = 'dotted', color=cmapx(i))
|
2025-03-13 17:51:13 -03:00
|
|
|
plt.plot(y_pred, color=cmapy(0), linestyle='solid')
|
2025-03-16 04:09:55 -03:00
|
|
|
plt.axhline(y=Y_scaled, xmin=0, xmax=df[measurament]['data'].shape[0], color=cmapy(i), linestyle='dashed')
|
2025-03-13 17:51:13 -03:00
|
|
|
|
|
|
|
filename = os.path.join(pics_folder, f"{measurament}_{model_id}.png")
|
2025-03-16 04:09:55 -03:00
|
|
|
plt.savefig(filename, format='png')
|
2025-03-13 17:51:13 -03:00
|
|
|
self.logger.info(f"Saved plot as {filename}")
|
|
|
|
|
|
|
|
plt.close()
|
|
|
|
|
2025-03-09 04:22:15 -03:00
|
|
|
def fit(self):
|
2025-03-18 01:30:46 -03:00
|
|
|
windows = [32, 64, 128,]
|
2025-03-16 23:25:17 -03:00
|
|
|
# windows = [128, 256, 384]
|
2025-03-16 19:41:52 -03:00
|
|
|
total_train_queue = 2*int(1/self.ratio)*(len(self.get_model_train())+len(windows))
|
2025-03-09 04:22:15 -03:00
|
|
|
self.logger.info("{:=^60}".format(f'Begin Fit {total_train_queue} Models'))
|
|
|
|
self.trained = 0
|
|
|
|
manager = enlighten.get_manager()
|
|
|
|
self.bar = manager.counter(total=total_train_queue, count=self.trained, desc='Tunning', unit='Models',
|
|
|
|
format='{desc}{desc_pad}{percentage:3.0f}%|{bar}| {count:{len_total}d}/{total:d} [{elapsed}<{eta}, {rate:.2f}{unit_pad}{unit}/s]'
|
|
|
|
)
|
|
|
|
|
2025-03-13 17:51:13 -03:00
|
|
|
discretizer = KBinsDiscretizer(n_bins=200, encode='ordinal', strategy='uniform')
|
|
|
|
gss = StratifiedGroupKFold(n_splits=int(1/self.ratio), shuffle=True, random_state=get_seed())
|
2025-03-09 04:22:15 -03:00
|
|
|
|
|
|
|
node = os.uname()[1]
|
2025-03-13 17:51:13 -03:00
|
|
|
self.loader.smooth = None
|
|
|
|
self.loader.reset()
|
|
|
|
|
2025-03-09 04:22:15 -03:00
|
|
|
X_xboost, Y_xboost, G_xboost = self.loader.load_dataset_xboost()
|
2025-03-13 17:51:13 -03:00
|
|
|
# self.logger.debug(f"X_xboost: {X_xboost.shape}")
|
2025-03-10 23:11:41 -03:00
|
|
|
self.logger.debug(f"Y_xboost: {Y_xboost.shape}")
|
2025-03-13 17:51:13 -03:00
|
|
|
# self.logger.debug(f"G_xboost: {G_xboost.shape}")
|
2025-03-09 04:22:15 -03:00
|
|
|
|
2025-03-13 17:51:13 -03:00
|
|
|
Y_discrete = discretizer.fit_transform(Y_xboost)
|
|
|
|
if Y_discrete.ndim == 2:
|
|
|
|
Y_discrete = np.sum(Y_discrete, axis=1)
|
|
|
|
# self.logger.debug(f"Y_discrete: {Y_discrete.shape}")
|
2025-03-09 04:22:15 -03:00
|
|
|
|
2025-03-16 04:09:55 -03:00
|
|
|
dataset = 'Tabular'
|
2025-03-10 19:32:46 -03:00
|
|
|
for i, (train_index, test_index) in enumerate(gss.split(X_xboost, Y_discrete, G_xboost)):
|
2025-03-16 19:41:52 -03:00
|
|
|
self.logger.info("{:=^60}".format(f'CV {i+1}/{int(1/self.ratio)} {dataset}'))
|
2025-03-13 17:51:13 -03:00
|
|
|
os.makedirs('{}/{}/{}'.format(self.name, self.loader.target, dataset), exist_ok=True)
|
2025-03-10 23:11:41 -03:00
|
|
|
X_train, X_test = X_xboost[train_index], X_xboost[test_index]
|
|
|
|
Y_train, Y_test = Y_xboost[train_index], Y_xboost[test_index]
|
2025-03-13 17:51:13 -03:00
|
|
|
# self.logger.debug(f"X_train: {X_train.shape}")
|
|
|
|
# self.logger.debug(f"X_test: {X_test.shape}")
|
2025-03-10 23:11:41 -03:00
|
|
|
self.logger.debug(f"Y_train: {Y_train.shape}")
|
|
|
|
self.logger.debug(f"Y_test: {Y_test.shape}")
|
2025-03-09 04:22:15 -03:00
|
|
|
|
|
|
|
|
2025-03-11 02:14:25 -03:00
|
|
|
for model in self.get_model_train():
|
|
|
|
model_id = "{}_{}".format(type(model).__name__, i)
|
|
|
|
self.trained += 1
|
|
|
|
|
|
|
|
if self.row_exists(dataset, model_id):
|
|
|
|
self.bar.update()
|
|
|
|
continue
|
|
|
|
|
2025-03-13 17:51:13 -03:00
|
|
|
model_file = '{}/{}/{}/{}'.format(self.name, self.loader.target, dataset, model_id )
|
2025-03-11 02:14:25 -03:00
|
|
|
|
|
|
|
tmse, mse, mae, rmse, optimized_model, model_params = self.train_and_score_model(model, X_train, X_test, Y_train, Y_test)
|
|
|
|
|
|
|
|
ts = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
|
|
|
|
joblib.dump(optimized_model, model_file)
|
|
|
|
|
|
|
|
newrow = pd.DataFrame( [{"node": node,
|
|
|
|
"ts": ts,
|
|
|
|
"Dataset": dataset,
|
|
|
|
"Samples": Y_xboost.shape[0],
|
2025-03-13 17:51:13 -03:00
|
|
|
"Target": self.loader.target,
|
2025-03-11 02:14:25 -03:00
|
|
|
"Train Size": Y_train.shape[0],
|
|
|
|
"Train Ratio": Y_train.shape[0]/Y_xboost.shape[0],
|
|
|
|
"Ratio": self.ratio,
|
|
|
|
"Model": model_id,
|
|
|
|
"Params": json.dumps(model_params),
|
|
|
|
"Train mse": tmse,
|
|
|
|
"mse": mse,
|
|
|
|
"mae": mae,
|
2025-03-18 22:37:06 -03:00
|
|
|
"rmse": rmse,
|
2025-03-18 23:06:10 -03:00
|
|
|
"num_params": None if model_params.get('multi_strategy') == 'multi_output_tree' else sum(t.count("\n") for t in optimized_model.get_booster().get_dump())
|
2025-03-11 02:14:25 -03:00
|
|
|
}] )
|
|
|
|
self.ledger = pd.concat([self.ledger, newrow], ignore_index=True)
|
|
|
|
self.bar.update()
|
|
|
|
|
|
|
|
self.saveCheckPoint()
|
|
|
|
|
2025-03-13 17:51:13 -03:00
|
|
|
self.loader.smooth = 'conv3'
|
|
|
|
self.loader.reset()
|
|
|
|
X_xboost, Y_xboost, G_xboost = self.loader.load_dataset_xboost()
|
|
|
|
# self.logger.debug(f"X_xboost: {X_xboost.shape}")
|
|
|
|
self.logger.debug(f"Y_xboost: {Y_xboost.shape}")
|
|
|
|
# self.logger.debug(f"G_xboost: {G_xboost.shape}")
|
|
|
|
|
|
|
|
Y_discrete = discretizer.fit_transform(Y_xboost)
|
|
|
|
if Y_discrete.ndim == 2:
|
|
|
|
Y_discrete = np.sum(Y_discrete, axis=1)
|
|
|
|
|
2025-03-16 04:09:55 -03:00
|
|
|
dataset = 'Tabular-conv3'
|
2025-03-13 17:51:13 -03:00
|
|
|
for i, (train_index, test_index) in enumerate(gss.split(X_xboost, Y_discrete, G_xboost)):
|
2025-03-16 19:41:52 -03:00
|
|
|
self.logger.info("{:=^60}".format(f'CV {i+1}/{int(1/self.ratio)} {dataset}'))
|
2025-03-13 17:51:13 -03:00
|
|
|
os.makedirs('{}/{}/{}'.format(self.name, self.loader.target, dataset), exist_ok=True)
|
|
|
|
X_train, X_test = X_xboost[train_index], X_xboost[test_index]
|
|
|
|
Y_train, Y_test = Y_xboost[train_index], Y_xboost[test_index]
|
|
|
|
# self.logger.debug(f"X_train: {X_train.shape}")
|
|
|
|
# self.logger.debug(f"X_test: {X_test.shape}")
|
|
|
|
self.logger.debug(f"Y_train: {Y_train.shape}")
|
|
|
|
self.logger.debug(f"Y_test: {Y_test.shape}")
|
2025-03-11 02:14:25 -03:00
|
|
|
|
2025-03-09 04:22:15 -03:00
|
|
|
for model in self.get_model_train():
|
|
|
|
model_id = "{}_{}".format(type(model).__name__, i)
|
|
|
|
self.trained += 1
|
|
|
|
|
|
|
|
if self.row_exists(dataset, model_id):
|
|
|
|
self.bar.update()
|
|
|
|
continue
|
|
|
|
|
2025-03-13 17:51:13 -03:00
|
|
|
model_file = '{}/{}/{}/{}'.format(self.name, self.loader.target, dataset, model_id )
|
2025-03-09 04:22:15 -03:00
|
|
|
|
2025-03-10 23:11:41 -03:00
|
|
|
tmse, mse, mae, rmse, optimized_model, model_params = self.train_and_score_model(model, X_train, X_test, Y_train, Y_test)
|
2025-03-09 04:22:15 -03:00
|
|
|
|
|
|
|
ts = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
|
|
|
|
joblib.dump(optimized_model, model_file)
|
|
|
|
|
|
|
|
newrow = pd.DataFrame( [{"node": node,
|
|
|
|
"ts": ts,
|
|
|
|
"Dataset": dataset,
|
|
|
|
"Samples": Y_xboost.shape[0],
|
2025-03-13 17:51:13 -03:00
|
|
|
"Target": self.loader.target,
|
2025-03-10 23:11:41 -03:00
|
|
|
"Train Size": Y_train.shape[0],
|
|
|
|
"Train Ratio": Y_train.shape[0]/Y_xboost.shape[0],
|
2025-03-09 04:22:15 -03:00
|
|
|
"Ratio": self.ratio,
|
|
|
|
"Model": model_id,
|
|
|
|
"Params": json.dumps(model_params),
|
|
|
|
"Train mse": tmse,
|
|
|
|
"mse": mse,
|
|
|
|
"mae": mae,
|
2025-03-18 22:37:06 -03:00
|
|
|
"rmse": rmse,
|
2025-03-18 23:06:10 -03:00
|
|
|
"num_params": None if model_params.get('multi_strategy') == 'multi_output_tree' else sum(t.count("\n") for t in optimized_model.get_booster().get_dump())
|
2025-03-18 22:49:04 -03:00
|
|
|
|
2025-03-09 04:22:15 -03:00
|
|
|
}] )
|
|
|
|
self.ledger = pd.concat([self.ledger, newrow], ignore_index=True)
|
|
|
|
self.bar.update()
|
|
|
|
|
|
|
|
self.saveCheckPoint()
|
|
|
|
|
2025-03-16 21:26:01 -03:00
|
|
|
sample_size = 50000
|
2025-03-16 22:33:58 -03:00
|
|
|
epochs = 50
|
2025-03-16 04:09:55 -03:00
|
|
|
self.loader.smooth = None
|
|
|
|
self.loader.reset()
|
|
|
|
for window in windows:
|
|
|
|
X_conv1d, Y_conv1d, G_conv1d = self.loader.load_dataset_window(window)
|
|
|
|
self.logger.debug(f"X_conv1d: {X_conv1d.shape}")
|
|
|
|
self.logger.debug(f"Y_conv1d: {Y_conv1d.shape}")
|
|
|
|
self.logger.debug(f"G_conv1d: {G_conv1d.shape}")
|
|
|
|
|
|
|
|
Y_discrete = discretizer.fit_transform(Y_conv1d)
|
|
|
|
if Y_discrete.ndim == 2:
|
|
|
|
Y_discrete = np.sum(Y_discrete, axis=1)
|
|
|
|
|
2025-03-18 21:10:50 -03:00
|
|
|
dataset = f'Conv1D-w{window}'
|
2025-03-18 21:37:05 -03:00
|
|
|
for i, (train_index, test_index) in enumerate(gss.split(X_conv1d, Y_discrete, G_conv1d)):
|
2025-03-16 19:41:52 -03:00
|
|
|
self.logger.info("{:=^60}".format(f'CV {i+1}/{int(1/self.ratio)} {dataset}'))
|
2025-03-16 04:09:55 -03:00
|
|
|
|
2025-03-18 21:10:50 -03:00
|
|
|
os.makedirs('{}/{}/{}'.format(self.name, self.loader.target, dataset), exist_ok=True)
|
2025-03-16 04:09:55 -03:00
|
|
|
X_train, X_test = X_conv1d[train_index], X_conv1d[test_index]
|
|
|
|
Y_train, Y_test = Y_conv1d[train_index], Y_conv1d[test_index]
|
2025-03-16 19:41:52 -03:00
|
|
|
G_train, G_test = G_conv1d[train_index], G_conv1d[test_index]
|
2025-03-16 04:09:55 -03:00
|
|
|
# self.logger.debug(f"X_train: {X_train.shape}")
|
|
|
|
# self.logger.debug(f"X_test: {X_test.shape}")
|
|
|
|
|
2025-03-18 21:10:50 -03:00
|
|
|
model_id = "Conv1D_v1_{}".format(i)
|
2025-03-16 04:09:55 -03:00
|
|
|
self.trained += 1
|
|
|
|
|
|
|
|
if self.row_exists(dataset, model_id):
|
|
|
|
self.bar.update()
|
|
|
|
continue
|
|
|
|
|
2025-03-18 21:10:50 -03:00
|
|
|
model_file = '{}/{}/{}/{}'.format(self.name, self.loader.target, dataset, model_id )
|
2025-03-16 04:09:55 -03:00
|
|
|
|
2025-03-16 21:26:01 -03:00
|
|
|
X_train_sample, _, Y_train_sample, _ = train_test_split(X_train, Y_train, stratify=G_train, train_size=0.8*sample_size / len(X_train), random_state=get_seed())
|
2025-03-16 19:41:52 -03:00
|
|
|
X_test_sample, _, Y_test_sample, _ = train_test_split(X_test, Y_test, stratify=G_test, train_size=0.2*sample_size / len(X_test), random_state=get_seed())
|
|
|
|
|
|
|
|
self.logger.debug(f"Y_train_sample: {Y_train_sample.shape}")
|
|
|
|
self.logger.debug(f"Y_test_sample: {Y_test_sample.shape}")
|
|
|
|
|
2025-03-18 01:30:46 -03:00
|
|
|
best_model, study, best_batch_size = self.search_best_conv1D_v1(X_train_sample, X_test_sample, Y_train_sample, Y_test_sample, epochs=10, num_trials=10)
|
2025-03-17 15:23:34 -03:00
|
|
|
# Save study results to an Excel file
|
|
|
|
trials_data = []
|
|
|
|
for trial in study.trials:
|
|
|
|
trial_info = trial.params.copy()
|
|
|
|
trial_info['mse'] = trial.value
|
2025-03-18 22:37:06 -03:00
|
|
|
# trial_info['num_params'] = trial.user_attrs.get("num_params", 0)
|
2025-03-17 15:23:34 -03:00
|
|
|
trials_data.append(trial_info)
|
|
|
|
|
|
|
|
df = pd.DataFrame(trials_data)
|
|
|
|
df.to_excel(f"{model_file}.search.xlsx", index=False)
|
|
|
|
|
|
|
|
self.logger.info(f"Training Model {model_id} with {study.best_params}")
|
2025-03-18 15:01:35 -03:00
|
|
|
|
2025-03-17 15:23:34 -03:00
|
|
|
early_stopping = keras.callbacks.EarlyStopping(monitor='loss', patience=5, restore_best_weights=True, min_delta=0.0003)
|
|
|
|
best_model.fit(X_train, Y_train, epochs=epochs, batch_size=best_batch_size, verbose=1, callbacks=[early_stopping])
|
2025-03-16 19:41:52 -03:00
|
|
|
|
2025-03-17 15:23:34 -03:00
|
|
|
best_model.save(f"{model_file}.keras")
|
|
|
|
best_model.save_weights(f"{model_file}.weights.h5")
|
2025-03-16 19:41:52 -03:00
|
|
|
|
2025-03-17 15:23:34 -03:00
|
|
|
Y_train_pred = best_model.predict(X_train)
|
|
|
|
Y_test_pred = best_model.predict(X_test)
|
2025-03-16 19:41:52 -03:00
|
|
|
|
|
|
|
mse_train = mean_squared_error(Y_train, Y_train_pred)
|
|
|
|
mse_test = mean_squared_error(Y_test, Y_test_pred)
|
2025-03-17 15:23:34 -03:00
|
|
|
mae_test = mean_absolute_error(Y_test, Y_test_pred)
|
2025-03-16 19:41:52 -03:00
|
|
|
rmse_test = np.sqrt(mse_test)
|
|
|
|
|
2025-03-16 04:09:55 -03:00
|
|
|
ts = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
|
|
|
|
|
|
|
|
newrow = pd.DataFrame( [{"node": node,
|
|
|
|
"ts": ts,
|
|
|
|
"Dataset": dataset,
|
2025-03-18 22:37:06 -03:00
|
|
|
"Samples": Y_conv1d.shape[0],
|
2025-03-16 04:09:55 -03:00
|
|
|
"Target": self.loader.target,
|
|
|
|
"Train Size": Y_train.shape[0],
|
2025-03-18 22:37:06 -03:00
|
|
|
"Train Ratio": Y_train.shape[0]/Y_conv1d.shape[0],
|
2025-03-16 04:09:55 -03:00
|
|
|
"Ratio": self.ratio,
|
|
|
|
"Model": model_id,
|
2025-03-17 15:23:34 -03:00
|
|
|
"Params": json.dumps(study.best_params),
|
2025-03-16 19:41:52 -03:00
|
|
|
"Train mse": mse_train,
|
|
|
|
"mse": mse_test,
|
|
|
|
"mae": mae_test,
|
2025-03-18 22:37:06 -03:00
|
|
|
"rmse": rmse_test,
|
|
|
|
"num_params": best_model.count_params()
|
2025-03-16 04:09:55 -03:00
|
|
|
}] )
|
|
|
|
self.ledger = pd.concat([self.ledger, newrow], ignore_index=True)
|
|
|
|
self.bar.update()
|
2025-03-16 23:24:36 -03:00
|
|
|
self.saveCheckPoint()
|
2025-03-16 04:09:55 -03:00
|
|
|
|
|
|
|
self.loader.smooth = 'conv3'
|
|
|
|
self.loader.reset()
|
|
|
|
for window in windows:
|
|
|
|
X_conv1d, Y_conv1d, G_conv1d = self.loader.load_dataset_window(window)
|
|
|
|
self.logger.debug(f"X_conv1d: {X_conv1d.shape}")
|
|
|
|
self.logger.debug(f"Y_conv1d: {Y_conv1d.shape}")
|
|
|
|
self.logger.debug(f"G_conv1d: {G_conv1d.shape}")
|
|
|
|
|
|
|
|
Y_discrete = discretizer.fit_transform(Y_conv1d)
|
|
|
|
if Y_discrete.ndim == 2:
|
|
|
|
Y_discrete = np.sum(Y_discrete, axis=1)
|
|
|
|
|
2025-03-20 13:00:14 -03:00
|
|
|
dataset = f'Conv1D-w{window}-{self.loader.smooth}'
|
2025-03-16 04:09:55 -03:00
|
|
|
for i, (train_index, test_index) in enumerate(gss.split(X_conv1d, Y_discrete, G_conv1d)):
|
2025-03-16 19:41:52 -03:00
|
|
|
self.logger.info("{:=^60}".format(f'CV {i+1}/{int(1/self.ratio)} {dataset}'))
|
2025-03-16 04:09:55 -03:00
|
|
|
|
2025-03-18 21:10:50 -03:00
|
|
|
os.makedirs('{}/{}/{}'.format(self.name, self.loader.target, dataset), exist_ok=True)
|
2025-03-16 04:09:55 -03:00
|
|
|
X_train, X_test = X_conv1d[train_index], X_conv1d[test_index]
|
|
|
|
Y_train, Y_test = Y_conv1d[train_index], Y_conv1d[test_index]
|
2025-03-16 19:41:52 -03:00
|
|
|
G_train, G_test = G_conv1d[train_index], G_conv1d[test_index]
|
2025-03-16 04:09:55 -03:00
|
|
|
# self.logger.debug(f"X_train: {X_train.shape}")
|
|
|
|
# self.logger.debug(f"X_test: {X_test.shape}")
|
|
|
|
self.logger.debug(f"Y_train: {Y_train.shape}")
|
|
|
|
self.logger.debug(f"Y_test: {Y_test.shape}")
|
|
|
|
|
|
|
|
|
2025-03-20 13:00:14 -03:00
|
|
|
model_id = "Conv1D_v1_{}".format(i)
|
2025-03-16 04:09:55 -03:00
|
|
|
self.trained += 1
|
|
|
|
|
|
|
|
if self.row_exists(dataset, model_id):
|
|
|
|
self.bar.update()
|
|
|
|
continue
|
|
|
|
|
2025-03-18 21:10:50 -03:00
|
|
|
model_file = '{}/{}/{}/{}'.format(self.name, self.loader.target, dataset, model_id )
|
2025-03-18 10:26:48 -03:00
|
|
|
|
2025-03-16 21:26:01 -03:00
|
|
|
X_train_sample, _, Y_train_sample, _ = train_test_split(X_train, Y_train, stratify=G_train, train_size=0.8*sample_size / len(X_train), random_state=get_seed())
|
2025-03-16 19:41:52 -03:00
|
|
|
X_test_sample, _, Y_test_sample, _ = train_test_split(X_test, Y_test, stratify=G_test, train_size=0.2*sample_size / len(X_test), random_state=get_seed())
|
|
|
|
|
|
|
|
self.logger.debug(f"Y_train_sample: {Y_train_sample.shape}")
|
|
|
|
self.logger.debug(f"Y_test_sample: {Y_test_sample.shape}")
|
|
|
|
|
2025-03-18 10:26:48 -03:00
|
|
|
|
2025-03-18 01:30:46 -03:00
|
|
|
best_model, study, best_batch_size = self.search_best_conv1D_v1(X_train_sample, X_test_sample, Y_train_sample, Y_test_sample, epochs=10, num_trials=10)
|
2025-03-17 15:23:34 -03:00
|
|
|
# Save study results to an Excel file
|
|
|
|
trials_data = []
|
|
|
|
for trial in study.trials:
|
|
|
|
trial_info = trial.params.copy()
|
|
|
|
trial_info['mse'] = trial.value
|
2025-03-18 22:37:06 -03:00
|
|
|
# trial_info['num_params'] = trial.user_attrs.get("num_params", 0)
|
2025-03-17 15:23:34 -03:00
|
|
|
trials_data.append(trial_info)
|
|
|
|
|
|
|
|
df = pd.DataFrame(trials_data)
|
|
|
|
df.to_excel(f"{model_file}.search.xlsx", index=False)
|
|
|
|
|
|
|
|
self.logger.info(f"Training Model {model_id} with {study.best_params}")
|
2025-03-18 15:01:35 -03:00
|
|
|
|
|
|
|
early_stopping = keras.callbacks.EarlyStopping(monitor='loss', patience=5, restore_best_weights=True, min_delta=0.0003)
|
2025-03-17 15:23:34 -03:00
|
|
|
best_model.fit(X_train, Y_train, epochs=epochs, batch_size=best_batch_size, verbose=1, callbacks=[early_stopping])
|
2025-03-16 19:41:52 -03:00
|
|
|
|
2025-03-17 15:23:34 -03:00
|
|
|
best_model.save(f"{model_file}.keras")
|
|
|
|
best_model.save_weights(f"{model_file}.weights.h5")
|
2025-03-16 19:41:52 -03:00
|
|
|
|
2025-03-17 15:23:34 -03:00
|
|
|
Y_train_pred = best_model.predict(X_train)
|
|
|
|
Y_test_pred = best_model.predict(X_test)
|
2025-03-16 04:09:55 -03:00
|
|
|
|
2025-03-16 19:41:52 -03:00
|
|
|
mse_train = mean_squared_error(Y_train, Y_train_pred)
|
|
|
|
mse_test = mean_squared_error(Y_test, Y_test_pred)
|
2025-03-17 15:23:34 -03:00
|
|
|
mae_test = mean_absolute_error(Y_test, Y_test_pred)
|
2025-03-16 19:41:52 -03:00
|
|
|
rmse_test = np.sqrt(mse_test)
|
2025-03-16 04:09:55 -03:00
|
|
|
|
|
|
|
ts = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
|
|
|
|
|
|
|
|
newrow = pd.DataFrame( [{"node": node,
|
|
|
|
"ts": ts,
|
|
|
|
"Dataset": dataset,
|
2025-03-18 22:37:06 -03:00
|
|
|
"Samples": Y_conv1d.shape[0],
|
2025-03-16 04:09:55 -03:00
|
|
|
"Target": self.loader.target,
|
|
|
|
"Train Size": Y_train.shape[0],
|
2025-03-18 22:37:06 -03:00
|
|
|
"Train Ratio": Y_train.shape[0]/Y_conv1d.shape[0],
|
2025-03-16 04:09:55 -03:00
|
|
|
"Ratio": self.ratio,
|
|
|
|
"Model": model_id,
|
2025-03-17 15:23:34 -03:00
|
|
|
"Params": json.dumps(study.best_params),
|
2025-03-18 09:29:23 -03:00
|
|
|
"Train mse": mse_train,
|
|
|
|
"mse": mse_test,
|
|
|
|
"mae": mae_test,
|
2025-03-18 22:37:06 -03:00
|
|
|
"rmse": rmse_test,
|
|
|
|
"num_params": best_model.count_params()
|
2025-03-16 04:09:55 -03:00
|
|
|
}] )
|
2025-03-18 10:26:48 -03:00
|
|
|
|
2025-03-16 04:09:55 -03:00
|
|
|
self.ledger = pd.concat([self.ledger, newrow], ignore_index=True)
|
|
|
|
self.bar.update()
|
2025-03-16 23:24:36 -03:00
|
|
|
self.saveCheckPoint()
|
2025-03-16 04:09:55 -03:00
|
|
|
|
2025-03-09 04:22:15 -03:00
|
|
|
|
|
|
|
self.bar.close()
|