test
parent
4b9cdf76ad
commit
632b66e0ce
|
@ -15,6 +15,7 @@ class GasSensorDataLoader:
|
||||||
self.data_folder = os.path.splitext(label_file)[0]
|
self.data_folder = os.path.splitext(label_file)[0]
|
||||||
self.state_file = f"{self.label_file}.pkl"
|
self.state_file = f"{self.label_file}.pkl"
|
||||||
self.lower_limit = lower_limit
|
self.lower_limit = lower_limit
|
||||||
|
self.smooth = None
|
||||||
self.data = None
|
self.data = None
|
||||||
self.debug = debug
|
self.debug = debug
|
||||||
self.threshold = threshold
|
self.threshold = threshold
|
||||||
|
@ -24,6 +25,7 @@ class GasSensorDataLoader:
|
||||||
|
|
||||||
self.samples = {}
|
self.samples = {}
|
||||||
self.target_list = sorted(target_list)
|
self.target_list = sorted(target_list)
|
||||||
|
self.target = '_'.join(self.target_list)
|
||||||
self.target_len = len(self.target_list)
|
self.target_len = len(self.target_list)
|
||||||
self.source_channels = sorted(source_channels)
|
self.source_channels = sorted(source_channels)
|
||||||
self.force_overwrite = force_overwrite
|
self.force_overwrite = force_overwrite
|
||||||
|
@ -53,6 +55,7 @@ class GasSensorDataLoader:
|
||||||
if False:#not self.force_overwrite and not self._compare_state_with_main():
|
if False:#not self.force_overwrite and not self._compare_state_with_main():
|
||||||
raise ValueError("State file differs from the main Excel file. Use 'force_overwrite=True' to overwrite.")
|
raise ValueError("State file differs from the main Excel file. Use 'force_overwrite=True' to overwrite.")
|
||||||
else:
|
else:
|
||||||
|
self.logger.info(f"Init for {len(self.target_list)} targets => {self.target_list}")
|
||||||
self.load_state()
|
self.load_state()
|
||||||
else:
|
else:
|
||||||
self.logger.info("State file not found. Loading dataset.")
|
self.logger.info("State file not found. Loading dataset.")
|
||||||
|
@ -69,6 +72,27 @@ class GasSensorDataLoader:
|
||||||
self.logger.error(f"Error comparing state file: {e}")
|
self.logger.error(f"Error comparing state file: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.dataset = {}
|
||||||
|
self.dataset['threshold'] = self.threshold
|
||||||
|
self.dataset['range'] = {}
|
||||||
|
|
||||||
|
if isinstance(self.target_list, list):
|
||||||
|
self.target_list = sorted(self.target_list)
|
||||||
|
|
||||||
|
elif isinstance(self.target_list, str):
|
||||||
|
self.target_list = list(self.target_list)
|
||||||
|
|
||||||
|
self.target = '_'.join(self.target_list)
|
||||||
|
self.target_len = len(self.target_list)
|
||||||
|
|
||||||
|
self.logger.info(f"Reset requested. Init for {len(self.target_list)} targets => {self.target}")
|
||||||
|
|
||||||
|
delattr(self, "delta_data")
|
||||||
|
delattr(self, "scaled_data")
|
||||||
|
self.init_minmax()
|
||||||
|
self.stats()
|
||||||
|
|
||||||
def load_dataset(self):
|
def load_dataset(self):
|
||||||
self.logger.info("Loading dataset from Excel files.")
|
self.logger.info("Loading dataset from Excel files.")
|
||||||
labels = pd.read_excel(self.main_file)
|
labels = pd.read_excel(self.main_file)
|
||||||
|
@ -114,6 +138,15 @@ class GasSensorDataLoader:
|
||||||
def init_delta(self):
|
def init_delta(self):
|
||||||
self.logger.info("Initializing dataset delta values.")
|
self.logger.info("Initializing dataset delta values.")
|
||||||
data_copy = {key: {'label': value['label'], 'sampleId': value['sampleId'], 'data': value['data'].copy()} for key, value in self.data.items()}
|
data_copy = {key: {'label': value['label'], 'sampleId': value['sampleId'], 'data': value['data'].copy()} for key, value in self.data.items()}
|
||||||
|
if self.smooth == 'conv3':
|
||||||
|
kernel = np.array([0.2, 0.6, 0.2])
|
||||||
|
for key in data_copy:
|
||||||
|
tempdf = pd.DataFrame()
|
||||||
|
for col in data_copy[key]['data'].columns:
|
||||||
|
tempdf[col] = np.convolve(data_copy[key]['data'][col], kernel, mode='valid')
|
||||||
|
data_copy[key]['data'] = tempdf.copy()
|
||||||
|
|
||||||
|
|
||||||
lower_limit = pd.concat([data_copy[key]['data'] for key in data_copy], axis=0).max() * self.lower_limit
|
lower_limit = pd.concat([data_copy[key]['data'] for key in data_copy], axis=0).max() * self.lower_limit
|
||||||
self.logger.debug("Lower limit {}.".format(lower_limit))
|
self.logger.debug("Lower limit {}.".format(lower_limit))
|
||||||
|
|
||||||
|
@ -140,6 +173,7 @@ class GasSensorDataLoader:
|
||||||
for key in data_instance:
|
for key in data_instance:
|
||||||
if channel_name in data_instance[key]['data'].columns:
|
if channel_name in data_instance[key]['data'].columns:
|
||||||
plt.plot(data_instance[key]['data'][channel_name])
|
plt.plot(data_instance[key]['data'][channel_name])
|
||||||
|
|
||||||
plt.xlabel("Time")
|
plt.xlabel("Time")
|
||||||
plt.ylabel("Sensor Reading")
|
plt.ylabel("Sensor Reading")
|
||||||
plt.title(f"{title} Sensor Channel: {channel_name}")
|
plt.title(f"{title} Sensor Channel: {channel_name}")
|
||||||
|
@ -314,7 +348,8 @@ class GasSensorDataLoader:
|
||||||
x_output = np.concatenate((x_output, x_sample))
|
x_output = np.concatenate((x_output, x_sample))
|
||||||
y_output = np.concatenate((y_output, y_sample))
|
y_output = np.concatenate((y_output, y_sample))
|
||||||
|
|
||||||
|
target_scaler = MinMaxScaler()
|
||||||
|
y_output = target_scaler.fit_transform(y_output)
|
||||||
self.dataset['xboost'] = (x_output, y_output, g_output)
|
self.dataset['xboost'] = (x_output, y_output, g_output)
|
||||||
|
|
||||||
return self.dataset['xboost']
|
return self.dataset['xboost']
|
||||||
|
@ -426,7 +461,7 @@ class GasSensorDataLoader:
|
||||||
# loader.plotRawdata(save=True)
|
# loader.plotRawdata(save=True)
|
||||||
# loader.plotDeltadata(save=True)
|
# loader.plotDeltadata(save=True)
|
||||||
# loader.plotScaledBoundaries(save=True)
|
# loader.plotScaledBoundaries(save=True)
|
||||||
# # loader.threshold = 0.90
|
# # loader.threshold = 0.90, smooth=None
|
||||||
# print(loader.load_dataset_window(128).shape)
|
# print(loader.load_dataset_window(128).shape)
|
||||||
# loader.threshold = 0.85
|
# loader.threshold = 0.85
|
||||||
# print(loader.load_dataset_window(128).shape)
|
# print(loader.load_dataset_window(128).shape)
|
||||||
|
|
173
TrainerClass.py
173
TrainerClass.py
|
@ -1,6 +1,7 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
import matplotlib.cm as cm
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import matplotlib
|
import matplotlib
|
||||||
matplotlib.rcParams['text.usetex'] = True
|
matplotlib.rcParams['text.usetex'] = True
|
||||||
|
@ -36,12 +37,10 @@ def get_seed():
|
||||||
return random.randint(0, 2**32 - 1)
|
return random.randint(0, 2**32 - 1)
|
||||||
|
|
||||||
class eNoseTrainer:
|
class eNoseTrainer:
|
||||||
def __init__(self, loader, splits=1, test_size=0.2, debug=False):
|
def __init__(self, loader, test_size=0.2, debug=False):
|
||||||
self.ledger = pd.DataFrame(columns=["node", "ts", "Dataset", "Samples", "Target", "Train Size", "Train Ratio", "Model", "Params", "Ratio", "Train mse", "mse", "mae", "rmse"])
|
self.ledger = pd.DataFrame(columns=["node", "ts", "Dataset", "Samples", "Target", "Train Size", "Train Ratio", "Model", "Params", "Ratio", "Train mse", "mse", "mae", "rmse"])
|
||||||
self.loader = loader
|
self.loader = loader
|
||||||
self.splits = splits
|
|
||||||
self.name = self.loader.label_file
|
self.name = self.loader.label_file
|
||||||
self.target = '_'.join(self.loader.target_list)
|
|
||||||
self.state = dict()
|
self.state = dict()
|
||||||
|
|
||||||
os.makedirs(self.name, exist_ok=True)
|
os.makedirs(self.name, exist_ok=True)
|
||||||
|
@ -78,15 +77,15 @@ class eNoseTrainer:
|
||||||
self.ledger = pd.read_excel(xls, sheet_name='Historial')
|
self.ledger = pd.read_excel(xls, sheet_name='Historial')
|
||||||
self.trained = self.ledger.shape[0]
|
self.trained = self.ledger.shape[0]
|
||||||
|
|
||||||
with open('{}/vars.pickle'.format(self.name), 'rb') as pfile:
|
# with open('{}/vars.pickle'.format(self.name), 'rb') as pfile:
|
||||||
self.ratio, self.splits, self.state = pickle.load(pfile)
|
# self.ratio, self.state = pickle.load(pfile)
|
||||||
|
|
||||||
def saveCheckPoint(self):
|
def saveCheckPoint(self):
|
||||||
with pd.ExcelWriter('{}/Simulaciones.xlsx'.format(self.name), engine='xlsxwriter') as xls:
|
with pd.ExcelWriter('{}/Simulaciones.xlsx'.format(self.name), engine='xlsxwriter') as xls:
|
||||||
self.ledger.to_excel(xls, sheet_name='Historial', index=False)
|
self.ledger.to_excel(xls, sheet_name='Historial', index=False)
|
||||||
|
|
||||||
with open('{}/vars.pickle'.format(self.name), 'wb') as pfile:
|
# with open('{}/vars.pickle'.format(self.name), 'wb') as pfile:
|
||||||
pickle.dump((self.ratio, self.splits, self.state), pfile, protocol=pickle.HIGHEST_PROTOCOL)
|
# pickle.dump((self.ratio, self.state), pfile, protocol=pickle.HIGHEST_PROTOCOL)
|
||||||
|
|
||||||
self.trained = self.ledger.shape[0]
|
self.trained = self.ledger.shape[0]
|
||||||
|
|
||||||
|
@ -102,7 +101,9 @@ class eNoseTrainer:
|
||||||
zipf.write(os.path.join(root, file))
|
zipf.write(os.path.join(root, file))
|
||||||
|
|
||||||
def row_exists(self, dataset, model):
|
def row_exists(self, dataset, model):
|
||||||
return self.ledger[(self.ledger["Dataset"] == dataset) & (self.ledger["Target"] == self.target) & (self.ledger["Model"] == model) & (self.ledger["Ratio"] == self.ratio)].shape[0] > 0
|
search_result = self.ledger[(self.ledger["Dataset"]==dataset) & (self.ledger["Target"]==self.loader.target) & (self.ledger["Model"]==model) & (self.ledger["Ratio"]==self.ratio)].shape[0] > 0
|
||||||
|
self.logger.debug(f'Looking for {dataset}, {model}, {self.loader.target}, {self.ratio} => {search_result} {self.ledger.shape}')
|
||||||
|
return search_result
|
||||||
|
|
||||||
def model_A(self, hp):
|
def model_A(self, hp):
|
||||||
|
|
||||||
|
@ -155,13 +156,11 @@ class eNoseTrainer:
|
||||||
def get_tunable_params(self, model):
|
def get_tunable_params(self, model):
|
||||||
if isinstance(model, XGBRegressor):
|
if isinstance(model, XGBRegressor):
|
||||||
return {
|
return {
|
||||||
"n_estimators": [800, 1000, 1200],
|
'tree_method': ["hist"],
|
||||||
"learning_rate": np.logspace(-1.5, -0.5, 3),
|
"n_estimators": [100, 128, 150],
|
||||||
'max_depth': [5, 7, 9],
|
'max_depth': [6, 7, 8],
|
||||||
'subsample': [0.5, 0.75, 1.0],
|
'subsample': [0.5, 0.6, 0.7],
|
||||||
# 'colsample_bytree': [0.8, 0.9, 1.0],
|
'multi_strategy': ['one_output_per_tree', 'multi_output_tree']
|
||||||
# 'gamma': [0, 0.1, 0.2],
|
|
||||||
# 'min_child_weight': [1, 3, 5]
|
|
||||||
}
|
}
|
||||||
elif isinstance(model, RandomForestClassifier):
|
elif isinstance(model, RandomForestClassifier):
|
||||||
return {
|
return {
|
||||||
|
@ -193,6 +192,86 @@ class eNoseTrainer:
|
||||||
|
|
||||||
return tmse, mse, mae, rmse, optimized_model, model_params
|
return tmse, mse, mae, rmse, optimized_model, model_params
|
||||||
|
|
||||||
|
def gen_plots(self, dataset, model_id, target=None):
|
||||||
|
if isinstance(target, list):
|
||||||
|
self.loader.target_list=target
|
||||||
|
if isinstance(target, str):
|
||||||
|
self.loader.target_list= list(target)
|
||||||
|
|
||||||
|
if dataset.endswith("-conv3"):
|
||||||
|
self.loader.smooth = 'conv3'
|
||||||
|
else:
|
||||||
|
self.loader.smooth = None
|
||||||
|
|
||||||
|
self.loader.reset()
|
||||||
|
if not self.row_exists(dataset, model_id):
|
||||||
|
self.logger.error(f'No se encuentra la simulacion {dataset}, {model_id}')
|
||||||
|
return
|
||||||
|
|
||||||
|
model_file = '{}/{}/{}/{}'.format(self.name, self.loader.target, dataset, model_id )
|
||||||
|
if not os.path.isfile(model_file):
|
||||||
|
self.logger.error('No se encuentra el modelo')
|
||||||
|
return
|
||||||
|
|
||||||
|
trained_model = joblib.load(model_file)
|
||||||
|
|
||||||
|
pics_folder = '{}/{}/{}/plots'.format(self.name, self.loader.target, dataset)
|
||||||
|
os.makedirs(pics_folder, exist_ok=True)
|
||||||
|
|
||||||
|
df = self.loader.scaled_data
|
||||||
|
|
||||||
|
Y_samples = np.zeros((len(df), len(self.loader.target_list)))
|
||||||
|
for i, sample in enumerate(df):
|
||||||
|
Y_samples[i] = np.array([[df[sample]['label'][key] for key in self.loader.target_list]])
|
||||||
|
|
||||||
|
self.logger.debug(f"Y_samples.shape: {Y_samples.shape}")
|
||||||
|
|
||||||
|
target_scaler = MinMaxScaler()
|
||||||
|
Y_samples = target_scaler.fit_transform(Y_samples)
|
||||||
|
|
||||||
|
cmapx = cm.get_cmap('ocean', len(self.loader.source_channels))
|
||||||
|
cmapy = cm.get_cmap('prism', Y_samples.shape[1])
|
||||||
|
|
||||||
|
for measurament, (r, l) in self.loader.dataset['range'].items():
|
||||||
|
# df[measurament]['data'].plot(figsize=(12, 6), title=f"{measurament} Prediction")
|
||||||
|
plt.figure(figsize=(12, 6))
|
||||||
|
plt.xlabel("Time")
|
||||||
|
plt.ylabel("Sensor Readings")
|
||||||
|
plt.legend(bbox_to_anchor=(0.95, 0.5), loc="center left")
|
||||||
|
|
||||||
|
plt.vlines(x=r, ymin=0, ymax=1, colors='blue')
|
||||||
|
plt.vlines(x=l, ymin=0, ymax=1, colors='blue')
|
||||||
|
|
||||||
|
for i, channel_name in enumerate(df[measurament]['data'].columns):
|
||||||
|
plt.plot(df[measurament]['data'][channel_name], linestyle = 'dotted', color=cmapx(i))
|
||||||
|
|
||||||
|
Y_value = np.zeros((1, len(self.loader.target_list)))
|
||||||
|
Y_value[0] = np.array([[df[measurament]['label'][key] for key in self.loader.target_list]])
|
||||||
|
|
||||||
|
self.logger.debug(f"Y_value.shape: {Y_value.shape}")
|
||||||
|
self.logger.debug(f"Y_value: {Y_value}")
|
||||||
|
|
||||||
|
Y_scaled = target_scaler.transform(Y_value).reshape(1, -1)
|
||||||
|
self.logger.debug(f"Y_scaled.shape: {Y_scaled.shape}")
|
||||||
|
self.logger.debug(f"Y_scaled: {Y_scaled}")
|
||||||
|
|
||||||
|
for i, value in enumerate(Y_scaled):
|
||||||
|
plt.axhline(y=value, xmin=0, xmax=df[measurament]['data'].shape[0], color=cmapy(i), linestyle='dashed')
|
||||||
|
|
||||||
|
y_pred = trained_model.predict(df[measurament]['data'].to_numpy())
|
||||||
|
|
||||||
|
if y_pred.ndim == 2:
|
||||||
|
for i in range(y_pred.shape[0]):
|
||||||
|
plt.plot(y_pred[:, i], color=cmapy(i), linestyle='solid')
|
||||||
|
else:
|
||||||
|
plt.plot(y_pred, color=cmapy(0), linestyle='solid')
|
||||||
|
|
||||||
|
filename = os.path.join(pics_folder, f"{measurament}_{model_id}.png")
|
||||||
|
plt.savefig(filename)
|
||||||
|
self.logger.info(f"Saved plot as {filename}")
|
||||||
|
|
||||||
|
plt.close()
|
||||||
|
|
||||||
def fit(self):
|
def fit(self):
|
||||||
total_train_queue = 2*int(1/self.ratio)*len(self.get_model_train())
|
total_train_queue = 2*int(1/self.ratio)*len(self.get_model_train())
|
||||||
self.logger.info("{:=^60}".format(f'Begin Fit {total_train_queue} Models'))
|
self.logger.info("{:=^60}".format(f'Begin Fit {total_train_queue} Models'))
|
||||||
|
@ -202,28 +281,30 @@ class eNoseTrainer:
|
||||||
format='{desc}{desc_pad}{percentage:3.0f}%|{bar}| {count:{len_total}d}/{total:d} [{elapsed}<{eta}, {rate:.2f}{unit_pad}{unit}/s]'
|
format='{desc}{desc_pad}{percentage:3.0f}%|{bar}| {count:{len_total}d}/{total:d} [{elapsed}<{eta}, {rate:.2f}{unit_pad}{unit}/s]'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
discretizer = KBinsDiscretizer(n_bins=200, encode='ordinal', strategy='uniform')
|
||||||
node = os.uname()[1]
|
|
||||||
X_xboost, Y_xboost, G_xboost = self.loader.load_dataset_xboost()
|
|
||||||
self.logger.debug(f"X_xboost: {X_xboost.shape}")
|
|
||||||
self.logger.debug(f"Y_xboost: {Y_xboost.shape}")
|
|
||||||
self.logger.debug(f"G_xboost: {G_xboost.shape}")
|
|
||||||
|
|
||||||
discretizer = KBinsDiscretizer(n_bins=50*Y_xboost.shape[1], encode='ordinal', strategy='uniform')
|
|
||||||
discretizer.fit(Y_xboost)
|
|
||||||
Y_discrete = discretizer.transform(Y_xboost)
|
|
||||||
self.logger.debug(f"Y_discrete: {Y_discrete.shape}")
|
|
||||||
|
|
||||||
gss = StratifiedGroupKFold(n_splits=int(1/self.ratio), shuffle=True, random_state=get_seed())
|
gss = StratifiedGroupKFold(n_splits=int(1/self.ratio), shuffle=True, random_state=get_seed())
|
||||||
|
|
||||||
|
node = os.uname()[1]
|
||||||
|
self.loader.smooth = None
|
||||||
|
self.loader.reset()
|
||||||
|
|
||||||
|
X_xboost, Y_xboost, G_xboost = self.loader.load_dataset_xboost()
|
||||||
|
# self.logger.debug(f"X_xboost: {X_xboost.shape}")
|
||||||
|
self.logger.debug(f"Y_xboost: {Y_xboost.shape}")
|
||||||
|
# self.logger.debug(f"G_xboost: {G_xboost.shape}")
|
||||||
|
|
||||||
|
Y_discrete = discretizer.fit_transform(Y_xboost)
|
||||||
|
if Y_discrete.ndim == 2:
|
||||||
|
Y_discrete = np.sum(Y_discrete, axis=1)
|
||||||
|
# self.logger.debug(f"Y_discrete: {Y_discrete.shape}")
|
||||||
|
|
||||||
for i, (train_index, test_index) in enumerate(gss.split(X_xboost, Y_discrete, G_xboost)):
|
for i, (train_index, test_index) in enumerate(gss.split(X_xboost, Y_discrete, G_xboost)):
|
||||||
dataset = 'Tabular'
|
dataset = 'Tabular'
|
||||||
os.makedirs('{}/{}/{}'.format(self.name, self.target, dataset), exist_ok=True)
|
os.makedirs('{}/{}/{}'.format(self.name, self.loader.target, dataset), exist_ok=True)
|
||||||
X_train, X_test = X_xboost[train_index], X_xboost[test_index]
|
X_train, X_test = X_xboost[train_index], X_xboost[test_index]
|
||||||
Y_train, Y_test = Y_xboost[train_index], Y_xboost[test_index]
|
Y_train, Y_test = Y_xboost[train_index], Y_xboost[test_index]
|
||||||
self.logger.debug(f"X_train: {X_train.shape}")
|
# self.logger.debug(f"X_train: {X_train.shape}")
|
||||||
self.logger.debug(f"X_test: {X_test.shape}")
|
# self.logger.debug(f"X_test: {X_test.shape}")
|
||||||
self.logger.debug(f"Y_train: {Y_train.shape}")
|
self.logger.debug(f"Y_train: {Y_train.shape}")
|
||||||
self.logger.debug(f"Y_test: {Y_test.shape}")
|
self.logger.debug(f"Y_test: {Y_test.shape}")
|
||||||
|
|
||||||
|
@ -236,7 +317,7 @@ class eNoseTrainer:
|
||||||
self.bar.update()
|
self.bar.update()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
model_file = '{}/{}/{}/{}'.format(self.name, self.target, dataset, model_id )
|
model_file = '{}/{}/{}/{}'.format(self.name, self.loader.target, dataset, model_id )
|
||||||
|
|
||||||
tmse, mse, mae, rmse, optimized_model, model_params = self.train_and_score_model(model, X_train, X_test, Y_train, Y_test)
|
tmse, mse, mae, rmse, optimized_model, model_params = self.train_and_score_model(model, X_train, X_test, Y_train, Y_test)
|
||||||
|
|
||||||
|
@ -247,7 +328,7 @@ class eNoseTrainer:
|
||||||
"ts": ts,
|
"ts": ts,
|
||||||
"Dataset": dataset,
|
"Dataset": dataset,
|
||||||
"Samples": Y_xboost.shape[0],
|
"Samples": Y_xboost.shape[0],
|
||||||
"Target": self.target,
|
"Target": self.loader.target,
|
||||||
"Train Size": Y_train.shape[0],
|
"Train Size": Y_train.shape[0],
|
||||||
"Train Ratio": Y_train.shape[0]/Y_xboost.shape[0],
|
"Train Ratio": Y_train.shape[0]/Y_xboost.shape[0],
|
||||||
"Ratio": self.ratio,
|
"Ratio": self.ratio,
|
||||||
|
@ -263,10 +344,26 @@ class eNoseTrainer:
|
||||||
|
|
||||||
self.saveCheckPoint()
|
self.saveCheckPoint()
|
||||||
|
|
||||||
dataset = 'Tabular-s3'
|
self.loader.smooth = 'conv3'
|
||||||
os.makedirs('{}/{}/{}'.format(self.name, self.target, dataset), exist_ok=True)
|
self.loader.reset()
|
||||||
X_xboost_no_noise = np.convolve(X_xboost, [0.2, 0.6, 0.2], mode='same')
|
X_xboost, Y_xboost, G_xboost = self.loader.load_dataset_xboost()
|
||||||
X_train, X_test = X_xboost_no_noise[train_index], X_xboost_no_noise[test_index]
|
# self.logger.debug(f"X_xboost: {X_xboost.shape}")
|
||||||
|
self.logger.debug(f"Y_xboost: {Y_xboost.shape}")
|
||||||
|
# self.logger.debug(f"G_xboost: {G_xboost.shape}")
|
||||||
|
|
||||||
|
Y_discrete = discretizer.fit_transform(Y_xboost)
|
||||||
|
if Y_discrete.ndim == 2:
|
||||||
|
Y_discrete = np.sum(Y_discrete, axis=1)
|
||||||
|
|
||||||
|
for i, (train_index, test_index) in enumerate(gss.split(X_xboost, Y_discrete, G_xboost)):
|
||||||
|
dataset = 'Tabular-conv3'
|
||||||
|
os.makedirs('{}/{}/{}'.format(self.name, self.loader.target, dataset), exist_ok=True)
|
||||||
|
X_train, X_test = X_xboost[train_index], X_xboost[test_index]
|
||||||
|
Y_train, Y_test = Y_xboost[train_index], Y_xboost[test_index]
|
||||||
|
# self.logger.debug(f"X_train: {X_train.shape}")
|
||||||
|
# self.logger.debug(f"X_test: {X_test.shape}")
|
||||||
|
self.logger.debug(f"Y_train: {Y_train.shape}")
|
||||||
|
self.logger.debug(f"Y_test: {Y_test.shape}")
|
||||||
|
|
||||||
for model in self.get_model_train():
|
for model in self.get_model_train():
|
||||||
model_id = "{}_{}".format(type(model).__name__, i)
|
model_id = "{}_{}".format(type(model).__name__, i)
|
||||||
|
@ -276,7 +373,7 @@ class eNoseTrainer:
|
||||||
self.bar.update()
|
self.bar.update()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
model_file = '{}/{}/{}/{}'.format(self.name, self.target, dataset, model_id )
|
model_file = '{}/{}/{}/{}'.format(self.name, self.loader.target, dataset, model_id )
|
||||||
|
|
||||||
tmse, mse, mae, rmse, optimized_model, model_params = self.train_and_score_model(model, X_train, X_test, Y_train, Y_test)
|
tmse, mse, mae, rmse, optimized_model, model_params = self.train_and_score_model(model, X_train, X_test, Y_train, Y_test)
|
||||||
|
|
||||||
|
@ -287,7 +384,7 @@ class eNoseTrainer:
|
||||||
"ts": ts,
|
"ts": ts,
|
||||||
"Dataset": dataset,
|
"Dataset": dataset,
|
||||||
"Samples": Y_xboost.shape[0],
|
"Samples": Y_xboost.shape[0],
|
||||||
"Target": self.target,
|
"Target": self.loader.target,
|
||||||
"Train Size": Y_train.shape[0],
|
"Train Size": Y_train.shape[0],
|
||||||
"Train Ratio": Y_train.shape[0]/Y_xboost.shape[0],
|
"Train Ratio": Y_train.shape[0]/Y_xboost.shape[0],
|
||||||
"Ratio": self.ratio,
|
"Ratio": self.ratio,
|
||||||
|
|
|
@ -5,35 +5,37 @@ import warnings
|
||||||
warnings.filterwarnings("ignore")
|
warnings.filterwarnings("ignore")
|
||||||
|
|
||||||
source_channels=["MQ 8", "MQ 9", "MQ 135", "TGS 813", "TGS 821", "TGS 2600", "TGS 2602", "TGS 2611-0", "TGS 2612", "TGS 2620"]
|
source_channels=["MQ 8", "MQ 9", "MQ 135", "TGS 813", "TGS 821", "TGS 2600", "TGS 2602", "TGS 2611-0", "TGS 2612", "TGS 2620"]
|
||||||
#target_variables=['C2H2', 'CH4', 'C3H6', 'CO', 'C2H6', 'C3H8', 'C2H4', 'H2', 'O2']
|
target_variables=['C2H2', 'CH4', 'C3H6', 'CO', 'C2H6', 'C3H8', 'C2H4', 'H2', 'O2']
|
||||||
|
|
||||||
target_variables=['C2H2']
|
eNoseLoader = GasSensorDataLoader("enose_dataset", threshold=0.85, source_channels=source_channels, target_list=target_variables, debug=False)
|
||||||
eNoseLoaderC2H2 = GasSensorDataLoader("enose_dataset", threshold=0.85, source_channels=source_channels, target_list=target_variables, debug=False)
|
eNose = eNoseTrainer(eNoseLoader, test_size=0.5)
|
||||||
eNoseC2H2 = eNoseTrainer(eNoseLoaderC2H2, test_size=0.2, debug=True)
|
eNoseLoader.target_list=['C2H2',]
|
||||||
eNoseC2H2.fit()
|
eNose.fit()
|
||||||
|
eNoseLoader.target_list=['CH4',]
|
||||||
|
eNose.fit()
|
||||||
|
eNoseLoader.target_list=['C3H6',]
|
||||||
|
eNose.fit()
|
||||||
|
eNoseLoader.target_list=['CO',]
|
||||||
|
eNose.fit()
|
||||||
|
eNoseLoader.target_list=['C2H6',]
|
||||||
|
eNose.fit()
|
||||||
|
eNoseLoader.target_list=['C3H8',]
|
||||||
|
eNose.fit()
|
||||||
|
eNoseLoader.target_list=['C2H2', 'CH4', 'C3H6', 'CO', 'C2H6',]
|
||||||
|
eNose.fit()
|
||||||
|
eNose.wrap_and_save()
|
||||||
|
|
||||||
target_variables=['CH4']
|
# eNoseLoader.target_list=['CH4']
|
||||||
eNoseLoaderCH4 = GasSensorDataLoader("enose_dataset", threshold=0.85, source_channels=source_channels, target_list=target_variables, debug=False)
|
# eNose.fit()
|
||||||
eNoseCH4 = eNoseTrainer(eNoseLoaderCH4, test_size=0.2, debug=True)
|
#
|
||||||
eNoseCH4.fit()
|
# eNoseLoader.target_list=['C3H6']
|
||||||
|
# eNose.fit()
|
||||||
target_variables=['C3H6']
|
#
|
||||||
eNoseLoaderC3H6 = GasSensorDataLoader("enose_dataset", threshold=0.85, source_channels=source_channels, target_list=target_variables, debug=False)
|
# eNoseLoader.target_list=['C2H6']
|
||||||
eNoseC3H6 = eNoseTrainer(eNoseLoaderC3H6, test_size=0.2, debug=True)
|
# eNose.fit()
|
||||||
eNoseC3H6.fit()
|
#
|
||||||
|
# eNoseLoader.target_list=['H2']
|
||||||
|
# eNose.fit()
|
||||||
target_variables=['C2H6']
|
#
|
||||||
eNoseLoaderC2H6 = GasSensorDataLoader("enose_dataset", threshold=0.85, source_channels=source_channels, target_list=target_variables, debug=False)
|
# eNoseLoader.target_list=['C2H2', 'CH4', 'C3H6', 'C2H6', 'H2']
|
||||||
eNoseC2H6 = eNoseTrainer(eNoseLoaderC2H6, test_size=0.2, debug=True)
|
# eNose.fit()
|
||||||
eNoseC2H6.fit()
|
|
||||||
|
|
||||||
|
|
||||||
target_variables=['H2']
|
|
||||||
eNoseLoaderH2 = GasSensorDataLoader("enose_dataset", threshold=0.85, source_channels=source_channels, target_list=target_variables, debug=False)
|
|
||||||
eNoseH2 = eNoseTrainer(eNoseLoaderH2, test_size=0.2, debug=True)
|
|
||||||
eNoseH2.fit()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#eNose.wrap_and_save()
|
|
||||||
|
|
Loading…
Reference in New Issue