Compare commits

..

No commits in common. "78527732033e7a4147a6880559583e2deeff9377" and "28b1123ae41e2bd7568a4454e5977ccf8cf776ec" have entirely different histories.

4 changed files with 61 additions and 238 deletions

View File

@ -139,7 +139,6 @@ class GasSensorDataLoader:
self.logger.info("Initializing dataset delta values.") self.logger.info("Initializing dataset delta values.")
data_copy = {key: {'label': value['label'], 'sampleId': value['sampleId'], 'data': value['data'].copy()} for key, value in self.data.items()} data_copy = {key: {'label': value['label'], 'sampleId': value['sampleId'], 'data': value['data'].copy()} for key, value in self.data.items()}
if self.smooth == 'conv3': if self.smooth == 'conv3':
self.logger.info("Noise filter: Convolution [0.2, 0.6, 0.2] selected.")
kernel = np.array([0.2, 0.6, 0.2]) kernel = np.array([0.2, 0.6, 0.2])
for key in data_copy: for key in data_copy:
tempdf = pd.DataFrame() tempdf = pd.DataFrame()

View File

@ -13,12 +13,6 @@ from sklearn.preprocessing import MinMaxScaler
from xgboost import XGBRegressor from xgboost import XGBRegressor
import keras
from keras import layers
from ray import tune
from ray.tune.schedulers import ASHAScheduler
# from ray import tune # from ray import tune
# import ray # import ray
# from keras.callbacks import TensorBoard # from keras.callbacks import TensorBoard
@ -98,6 +92,8 @@ class eNoseTrainer:
def wrap_and_save(self): def wrap_and_save(self):
self.logger.info("{:=^60}".format(' Saving Summary and Wrap the output in a ZipFile ')) self.logger.info("{:=^60}".format(' Saving Summary and Wrap the output in a ZipFile '))
with pd.ExcelWriter('{}/Summary.xlsx'.format(self.name) , engine='xlsxwriter') as xls:
self.get_best_models().to_excel(xls, sheet_name='Results')
with zipfile.ZipFile('{}-{}.zip'.format(self.name, self.start), 'w', zipfile.ZIP_DEFLATED) as zipf: with zipfile.ZipFile('{}-{}.zip'.format(self.name, self.start), 'w', zipfile.ZIP_DEFLATED) as zipf:
for root, dirs, files in os.walk(self.name): for root, dirs, files in os.walk(self.name):
@ -152,72 +148,6 @@ class eNoseTrainer:
self.bar.update() self.bar.update()
return mse, mae, rmse, optimized_model, model_params return mse, mae, rmse, optimized_model, model_params
def train_and_score_conv1D_v1(self, X_train, X_test, y_train, y_test, epochs=30, num_samples=25):
def build_model(config, input_shape, output_dim):
model = keras.Sequential([
layers.Conv1D(filters=config['filters'], kernel_size=config['kernel_size'], activation='relu', input_shape=input_shape),
layers.MaxPooling1D(pool_size=config['pool_size']),
layers.Conv1D(filters=config['filters'] * 2, kernel_size=config['kernel_size'], activation='relu'),
layers.MaxPooling1D(pool_size=config['pool_size']),
layers.Flatten(),
layers.Dense(config['dense_units'], activation='relu'),
layers.Dropout(config['dropout']),
layers.Dense(output_dim)
])
model.compile(optimizer=keras.optimizers.Adam(learning_rate=config['lr']), loss='mse')
return model
def train_model(config):
input_shape = X_train.shape[1:]
output_dim = Y_train.shape[1]
model = build_model(config, input_shape, output_dim)
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model.fit(
X_train, Y_train,
validation_data=(X_test, Y_test),
epochs=epochs,
batch_size=config['batch_size'],
verbose=0,
callbacks=[early_stopping]
)
Y_pred = model.predict(X_test)
mse = mean_squared_error(Y_test, Y_pred)
tune.report(mse=mse)
config_space = {
'filters': tune.choice([32, 64, 128]),
'kernel_size': tune.choice([3, 5]),
'pool_size': tune.choice([2, 3]),
'dense_units': tune.choice([32, 64, 128]),
'dropout': tune.choice([0.1, 0.2, 0.3]),
'lr': tune.choice([0.001, 0.0005, 0.0001]),
'batch_size': tune.choice([16, 32, 64])
}
scheduler = ASHAScheduler(metric='mse', mode='min', max_t=epochs, grace_period=5, reduction_factor=2)
analysis = tune.run(train_model, config=config_space, num_samples=num_samples, scheduler=scheduler)
best_config = analysis.get_best_config(metric='mse', mode='min')
best_model = build_model(best_config, X_train.shape[1:], Y_train.shape[1])
best_model.fit(X_train, Y_train, epochs=epochs, batch_size=best_config['batch_size'], verbose=0)
Y_train_pred = best_model.predict(X_train)
Y_test_pred = best_model.predict(X_test)
mse_train = mean_squared_error(Y_train, Y_train_pred)
mae_test = mean_absolute_error(y_test, Y_test_pred)
mse_test = mean_squared_error(Y_test, Y_test_pred)
rmse_test = np.sqrt(mse_test)
# # Calculate evaluation metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
return mse_train, mae_test, mse_test, rmse_test, best_model, best_config
def get_model_train(self): def get_model_train(self):
return [ return [
XGBRegressor(objective='reg:squarederror'), XGBRegressor(objective='reg:squarederror'),
@ -299,18 +229,21 @@ class eNoseTrainer:
target_scaler = MinMaxScaler() target_scaler = MinMaxScaler()
Y_samples = target_scaler.fit_transform(Y_samples) Y_samples = target_scaler.fit_transform(Y_samples)
cmapx = cm.get_cmap('winter', len(self.loader.source_channels)) cmapx = cm.get_cmap('ocean', len(self.loader.source_channels))
cmapy = cm.get_cmap('prism', Y_samples.shape[1]) cmapy = cm.get_cmap('prism', Y_samples.shape[1])
for measurament, (r, l) in self.loader.dataset['range'].items(): for measurament, (r, l) in self.loader.dataset['range'].items():
# df[measurament]['data'].plot(figsize=(12, 6), title=f"{measurament} Prediction") # df[measurament]['data'].plot(figsize=(12, 6), title=f"{measurament} Prediction")
plt.figure(figsize=(12, 6)) plt.figure(figsize=(12, 6))
plt.title(f"[{dataset}] {model_id}. Sample {measurament}") plt.xlabel("Time")
plt.xlabel("Sensor Readings") plt.ylabel("Sensor Readings")
plt.legend(bbox_to_anchor=(0.95, 0.5), loc="center left")
plt.vlines(x=r, ymin=0, ymax=1, colors='blue', linestyle='dashed') plt.vlines(x=r, ymin=0, ymax=1, colors='blue')
plt.vlines(x=l, ymin=0, ymax=1, colors='blue', linestyle='dashed') plt.vlines(x=l, ymin=0, ymax=1, colors='blue')
for i, channel_name in enumerate(df[measurament]['data'].columns):
plt.plot(df[measurament]['data'][channel_name], linestyle = 'dotted', color=cmapx(i))
Y_value = np.zeros((1, len(self.loader.target_list))) Y_value = np.zeros((1, len(self.loader.target_list)))
Y_value[0] = np.array([[df[measurament]['label'][key] for key in self.loader.target_list]]) Y_value[0] = np.array([[df[measurament]['label'][key] for key in self.loader.target_list]])
@ -322,34 +255,25 @@ class eNoseTrainer:
self.logger.debug(f"Y_scaled.shape: {Y_scaled.shape}") self.logger.debug(f"Y_scaled.shape: {Y_scaled.shape}")
self.logger.debug(f"Y_scaled: {Y_scaled}") self.logger.debug(f"Y_scaled: {Y_scaled}")
for i, value in enumerate(Y_scaled):
plt.axhline(y=value, xmin=0, xmax=df[measurament]['data'].shape[0], color=cmapy(i), linestyle='dashed')
y_pred = trained_model.predict(df[measurament]['data'].to_numpy()) y_pred = trained_model.predict(df[measurament]['data'].to_numpy())
self.logger.debug(f"y_pred.shape: {y_pred.shape}")
# self.logger.debug(f"y_pred: {Y_scaled}")
if y_pred.ndim == 2: if y_pred.ndim == 2:
plt.ylabel("Target dashed / Pred solid") for i in range(y_pred.shape[0]):
for i, channel_name in enumerate(df[measurament]['data'].columns):
plt.plot(df[measurament]['data'][channel_name], linestyle = 'dotted', color=cmapx(i), alpha=0.2)
for i in range(y_pred.shape[1]):
self.logger.debug(f"Y_scaled[0][i]: {Y_scaled[0][i]}")
plt.axhline(y=Y_scaled[0][i], xmin=0, xmax=df[measurament]['data'].shape[0], color=cmapy(i), linestyle='dashed')
plt.plot(y_pred[:, i], color=cmapy(i), linestyle='solid') plt.plot(y_pred[:, i], color=cmapy(i), linestyle='solid')
else: else:
plt.ylabel("Samples dotted / Target dashed / Pred solid")
for i, channel_name in enumerate(df[measurament]['data'].columns):
plt.plot(df[measurament]['data'][channel_name], linestyle = 'dotted', color=cmapx(i))
plt.plot(y_pred, color=cmapy(0), linestyle='solid') plt.plot(y_pred, color=cmapy(0), linestyle='solid')
plt.axhline(y=Y_scaled, xmin=0, xmax=df[measurament]['data'].shape[0], color=cmapy(i), linestyle='dashed')
filename = os.path.join(pics_folder, f"{measurament}_{model_id}.png") filename = os.path.join(pics_folder, f"{measurament}_{model_id}.png")
plt.savefig(filename, format='png') plt.savefig(filename)
self.logger.info(f"Saved plot as {filename}") self.logger.info(f"Saved plot as {filename}")
plt.close() plt.close()
def fit(self): def fit(self):
windows = [16, 32, 64, 128] total_train_queue = 2*int(1/self.ratio)*len(self.get_model_train())
total_train_queue = 2*int(1/self.ratio)*(len(self.get_model_train())+1)
self.logger.info("{:=^60}".format(f'Begin Fit {total_train_queue} Models')) self.logger.info("{:=^60}".format(f'Begin Fit {total_train_queue} Models'))
self.trained = 0 self.trained = 0
manager = enlighten.get_manager() manager = enlighten.get_manager()
@ -374,9 +298,8 @@ class eNoseTrainer:
Y_discrete = np.sum(Y_discrete, axis=1) Y_discrete = np.sum(Y_discrete, axis=1)
# self.logger.debug(f"Y_discrete: {Y_discrete.shape}") # self.logger.debug(f"Y_discrete: {Y_discrete.shape}")
dataset = 'Tabular'
for i, (train_index, test_index) in enumerate(gss.split(X_xboost, Y_discrete, G_xboost)): for i, (train_index, test_index) in enumerate(gss.split(X_xboost, Y_discrete, G_xboost)):
self.logger.info("{:=^60}".format(f'CV {i}/{int(1/self.ratio)} {dataset}')) dataset = 'Tabular'
os.makedirs('{}/{}/{}'.format(self.name, self.loader.target, dataset), exist_ok=True) os.makedirs('{}/{}/{}'.format(self.name, self.loader.target, dataset), exist_ok=True)
X_train, X_test = X_xboost[train_index], X_xboost[test_index] X_train, X_test = X_xboost[train_index], X_xboost[test_index]
Y_train, Y_test = Y_xboost[train_index], Y_xboost[test_index] Y_train, Y_test = Y_xboost[train_index], Y_xboost[test_index]
@ -432,9 +355,8 @@ class eNoseTrainer:
if Y_discrete.ndim == 2: if Y_discrete.ndim == 2:
Y_discrete = np.sum(Y_discrete, axis=1) Y_discrete = np.sum(Y_discrete, axis=1)
dataset = 'Tabular-conv3'
for i, (train_index, test_index) in enumerate(gss.split(X_xboost, Y_discrete, G_xboost)): for i, (train_index, test_index) in enumerate(gss.split(X_xboost, Y_discrete, G_xboost)):
self.logger.info("{:=^60}".format(f'CV {i}/{int(1/self.ratio)} {dataset}')) dataset = 'Tabular-conv3'
os.makedirs('{}/{}/{}'.format(self.name, self.loader.target, dataset), exist_ok=True) os.makedirs('{}/{}/{}'.format(self.name, self.loader.target, dataset), exist_ok=True)
X_train, X_test = X_xboost[train_index], X_xboost[test_index] X_train, X_test = X_xboost[train_index], X_xboost[test_index]
Y_train, Y_test = Y_xboost[train_index], Y_xboost[test_index] Y_train, Y_test = Y_xboost[train_index], Y_xboost[test_index]
@ -478,123 +400,22 @@ class eNoseTrainer:
self.saveCheckPoint() self.saveCheckPoint()
# if self.dnn:
self.loader.smooth = None # model_file = '{}/{}/DNN_{}'.format(self.name, label, seed )
self.loader.reset() # model_label = "{}".format(label)
for window in windows: #
X_conv1d, Y_conv1d, G_conv1d = self.loader.load_dataset_window(window) # accuracy, specificity, recall, f1, roc_auc, optimized_model, parms = self.train_and_score_model_keras(X_train, X_test, Y_train, Y_test, seed, model_label)
self.logger.debug(f"X_conv1d: {X_conv1d.shape}") # ts = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
self.logger.debug(f"Y_conv1d: {Y_conv1d.shape}") #
self.logger.debug(f"G_conv1d: {G_conv1d.shape}") # newrow = pd.DataFrame( [{"node": node,
# "ts": ts,
Y_discrete = discretizer.fit_transform(Y_conv1d) # "Dataset": model_label,
if Y_discrete.ndim == 2: # "Model": 'DNN',
Y_discrete = np.sum(Y_discrete, axis=1) # "Params": parms,
# "Seed": seed,
dataset = f'Conv1d-base-w{window}' # "F1": f1,
for i, (train_index, test_index) in enumerate(gss.split(X_conv1d, Y_discrete, G_conv1d)): # "ROC_AUC": roc_auc
self.logger.info("{:=^60}".format(f'CV {i}/{int(1/self.ratio)} {dataset}')) # }] )
# self.ledger = pd.concat([self.ledger, newrow], ignore_index=True)
os.makedirs('{}/{}/{}-w{}'.format(self.name, self.loader.target, dataset, window), exist_ok=True)
X_train, X_test = X_conv1d[train_index], X_conv1d[test_index]
Y_train, Y_test = Y_conv1d[train_index], Y_conv1d[test_index]
# self.logger.debug(f"X_train: {X_train.shape}")
# self.logger.debug(f"X_test: {X_test.shape}")
self.logger.debug(f"Y_train: {Y_train.shape}")
self.logger.debug(f"Y_test: {Y_test.shape}")
model_id = "Conv1d-base_{}".format(i)
self.trained += 1
if self.row_exists(dataset, model_id):
self.bar.update()
continue
model_file = '{}/{}/{}-w{}/{}'.format(self.name, self.loader.target, dataset, window, model_id )
tmse, mse, mae, rmse, optimized_model, model_params = self.train_and_score_conv1D_v1(X_train, X_test, Y_train, Y_test)
ts = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
optimized_model.save(model_file)
optimized_model.save_weights(f"{model_file}.weights.h5")
newrow = pd.DataFrame( [{"node": node,
"ts": ts,
"Dataset": dataset,
"Samples": Y_xboost.shape[0],
"Target": self.loader.target,
"Train Size": Y_train.shape[0],
"Train Ratio": Y_train.shape[0]/Y_xboost.shape[0],
"Ratio": self.ratio,
"Model": model_id,
"Params": json.dumps(model_params),
"Train mse": tmse,
"mse": mse,
"mae": mae,
"rmse": rmse
}] )
self.ledger = pd.concat([self.ledger, newrow], ignore_index=True)
self.bar.update()
self.saveCheckPoint()
self.loader.smooth = 'conv3'
self.loader.reset()
for window in windows:
X_conv1d, Y_conv1d, G_conv1d = self.loader.load_dataset_window(window)
self.logger.debug(f"X_conv1d: {X_conv1d.shape}")
self.logger.debug(f"Y_conv1d: {Y_conv1d.shape}")
self.logger.debug(f"G_conv1d: {G_conv1d.shape}")
Y_discrete = discretizer.fit_transform(Y_conv1d)
if Y_discrete.ndim == 2:
Y_discrete = np.sum(Y_discrete, axis=1)
dataset = f'Conv1d-base-w{window}-conv3'
for i, (train_index, test_index) in enumerate(gss.split(X_conv1d, Y_discrete, G_conv1d)):
self.logger.info("{:=^60}".format(f'CV {i}/{int(1/self.ratio)} {dataset}'))
os.makedirs('{}/{}/{}-w{}'.format(self.name, self.loader.target, dataset, window), exist_ok=True)
X_train, X_test = X_conv1d[train_index], X_conv1d[test_index]
Y_train, Y_test = Y_conv1d[train_index], Y_conv1d[test_index]
# self.logger.debug(f"X_train: {X_train.shape}")
# self.logger.debug(f"X_test: {X_test.shape}")
self.logger.debug(f"Y_train: {Y_train.shape}")
self.logger.debug(f"Y_test: {Y_test.shape}")
model_id = "Conv1d-base_{}".format(i)
self.trained += 1
if self.row_exists(dataset, model_id):
self.bar.update()
continue
model_file = '{}/{}/{}-w{}/{}'.format(self.name, self.loader.target, dataset, window, model_id )
tmse, mse, mae, rmse, optimized_model, model_params = self.train_and_score_conv1D_v1(X_train, X_test, Y_train, Y_test)
ts = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
optimized_model.save(model_file)
optimized_model.save_weights(f"{model_file}.weights.h5")
newrow = pd.DataFrame( [{"node": node,
"ts": ts,
"Dataset": dataset,
"Samples": Y_xboost.shape[0],
"Target": self.loader.target,
"Train Size": Y_train.shape[0],
"Train Ratio": Y_train.shape[0]/Y_xboost.shape[0],
"Ratio": self.ratio,
"Model": model_id,
"Params": json.dumps(model_params),
"Train mse": tmse,
"mse": mse,
"mae": mae,
"rmse": rmse
}] )
self.ledger = pd.concat([self.ledger, newrow], ignore_index=True)
self.bar.update()
self.saveCheckPoint()
self.bar.close() self.bar.close()

View File

@ -13,7 +13,7 @@ fi
env_name=$1 env_name=$1
conda create -n "$env_name" python scikit-learn==1.3.1 xgboost tensorflow conda-forge::ray-tune keras pandas numpy matplotlib openpyxl xlsxwriter conda-forge::enlighten conda create -n "$env_name" python scikit-learn==1.3.1 xgboost keras pandas numpy matplotlib tensorflow openpyxl xlsxwriter conda-forge::enlighten
if [ $? -eq 0 ]; then if [ $? -eq 0 ]; then
echo "Packages installed successfully in environment '$env_name'" echo "Packages installed successfully in environment '$env_name'"
else else

View File

@ -8,29 +8,32 @@ source_channels=["MQ 8", "MQ 9", "MQ 135", "TGS 813", "TGS 821", "TGS 2600", "TG
target_variables=['C2H2', 'CH4', 'C3H6', 'CO', 'C2H6', 'C3H8', 'C2H4', 'H2', 'O2'] target_variables=['C2H2', 'CH4', 'C3H6', 'CO', 'C2H6', 'C3H8', 'C2H4', 'H2', 'O2']
eNoseLoader = GasSensorDataLoader("enose_dataset", threshold=0.85, source_channels=source_channels, target_list=target_variables, debug=False) eNoseLoader = GasSensorDataLoader("enose_dataset", threshold=0.85, source_channels=source_channels, target_list=target_variables, debug=False)
eNose = eNoseTrainer(eNoseLoader, test_size=0.2, debug=True) eNose = eNoseTrainer(eNoseLoader, test_size=0.2)
eNoseLoader.target_list=['H2',] eNoseLoader.target_list=['H2',]
eNose.fit() eNose.fit()
# eNoseLoader.target_list=['C2H2',] eNoseLoader.target_list=['C2H2',]
# eNose.fit() eNose.fit()
# eNoseLoader.target_list=['CH4',] eNoseLoader.target_list=['CH4',]
# eNose.fit() eNose.fit()
# eNoseLoader.target_list=['C2H4',] eNoseLoader.target_list=['C2H4',]
# eNose.fit() eNose.fit()
# eNoseLoader.target_list=['C2H6',] eNoseLoader.target_list=['C2H6',]
# eNose.fit() eNose.fit()
# eNoseLoader.target_list=['H2', 'C2H2', 'CH4', 'C2H4', 'C2H6',] eNoseLoader.target_list=['H2', 'C2H2', 'CH4', 'C2H4', 'C2H6',]
# eNose.fit() eNose.fit()
# eNose.wrap_and_save() eNose.wrap_and_save()
# eNoseLoader.target_list=['H2',] # eNoseLoader.target_list=['CH4']
# eNose.gen_plots('Tabular-conv3','XGBRegressor_0') # eNose.fit()
# eNoseLoader.target_list=['H2', 'C2H2', 'CH4', 'C2H4', 'C2H6',]
# eNose.gen_plots('Tabular','XGBRegressor_1')
# #
# eNoseLoader.target_list=['H2', 'C2H2', 'CH4', 'C2H4', 'C2H6',] # eNoseLoader.target_list=['C3H6']
# eNose.gen_plots('Tabular-conv3','XGBRegressor_0') # eNose.fit()
#
# eNoseLoader.target_list=['H2',] # eNoseLoader.target_list=['C2H6']
# eNose.gen_plots('Tabular-conv3','XGBRegressor_0') # eNose.fit()
#
# eNoseLoader.target_list=['H2']
# eNose.fit()
#
# eNoseLoader.target_list=['C2H2', 'CH4', 'C3H6', 'C2H6', 'H2']
# eNose.fit()