main
ifiguero 2025-03-16 21:26:01 -03:00
parent 48cecaa8cf
commit 5e87e9b8f2
2 changed files with 20 additions and 20 deletions

View File

@ -177,7 +177,7 @@ class eNoseTrainer:
else:
return {}
def train_and_score_conv1D_v1(self, X_train_orig, X_test_orig, Y_train_orig, Y_test_orig, epochs=30, num_samples=25):
def search_best_conv1D_v1(self, X_train_orig, X_test_orig, Y_train_orig, Y_test_orig, epochs=30, nsamples=0.1):
ray.init(ignore_reinit_error=True)
X_train_ref = ray.put(X_train_orig)
Y_train_ref = ray.put(Y_train_orig)
@ -187,9 +187,9 @@ class eNoseTrainer:
def build_model_conv1D(config, input_shape, output_dim):
model = keras.Sequential([
layers.Conv1D(filters=config['filters'], kernel_size=config['kernel_size'], activation='relu', input_shape=input_shape),
layers.Conv1D(filters=config['filters'], kernel_size=config['kernel_size'], stride=config['kernel_size']//2, activation='relu', input_shape=input_shape),
layers.MaxPooling1D(pool_size=config['pool_size']),
layers.Conv1D(filters=config['filters'] * 2, kernel_size=config['kernel_size'], activation='relu'),
layers.Conv1D(filters=config['filters'] * 2, kernel_size=config['kernel_size'], stride=config['kernel_size']//2, activation='relu'),
layers.MaxPooling1D(pool_size=config['pool_size']),
layers.Flatten(),
layers.Dense(config['dense_units'], activation='relu'),
@ -216,7 +216,7 @@ class eNoseTrainer:
validation_data=(X_testc1D, Y_testc1D),
epochs=config['epochs'],
batch_size=config['batch_size'],
verbose=1,
verbose=0,
callbacks=[early_stopping]
)
@ -226,7 +226,7 @@ class eNoseTrainer:
config_space = {
'filters': tune.choice([16, 32, 64]),
'kernel_size': tune.choice([3, 5]),
'kernel_size': tune.choice([3, 5, 7]),
'pool_size': tune.choice([2, 3]),
'dense_units': tune.choice([32, 64, 128]),
'dropout': tune.choice([0.1, 0.2, 0.3]),
@ -234,10 +234,11 @@ class eNoseTrainer:
'batch_size': tune.choice([16, 32, 64]),
'epochs': epochs
}
total_space = (3*3*2*3*3*3*3)
scheduler = ASHAScheduler(metric='mse', mode='min', max_t=epochs, grace_period=5, reduction_factor=2)
# analysis = tune.run(train_model, config=config_space, num_samples=num_samples, scheduler=scheduler)
analysis = tune.run( tune.with_parameters(train_model_conv1D), config=config_space, num_samples=num_samples, scheduler=scheduler, max_concurrent_trials=3 )
analysis = tune.run( tune.with_parameters(train_model_conv1D), config=config_space, num_samples=int(nsamples*total_space), scheduler=scheduler, max_concurrent_trials=8 )
best_config = analysis.get_best_config(metric='mse', mode='min')
best_model = build_model_conv1D(best_config, X_train_orig.shape[1:], Y_train_orig.shape[1])
@ -483,7 +484,7 @@ class eNoseTrainer:
self.saveCheckPoint()
sample_size = 50000
self.loader.smooth = None
self.loader.reset()
for window in windows:
@ -516,19 +517,19 @@ class eNoseTrainer:
model_file = '{}/{}/{}-w{}/{}'.format(self.name, self.loader.target, dataset, window, model_id )
sample_size = 25000
X_train_sample, _, Y_train_sample, _ = train_test_split(X_train, Y_train, stratify=G_train, train_size=sample_size / len(X_train), random_state=get_seed())
X_train_sample, _, Y_train_sample, _ = train_test_split(X_train, Y_train, stratify=G_train, train_size=0.8*sample_size / len(X_train), random_state=get_seed())
X_test_sample, _, Y_test_sample, _ = train_test_split(X_test, Y_test, stratify=G_test, train_size=0.2*sample_size / len(X_test), random_state=get_seed())
self.logger.debug(f"Y_train_sample: {Y_train_sample.shape}")
self.logger.debug(f"Y_test_sample: {Y_test_sample.shape}")
optimized_model, model_params = self.train_and_score_conv1D_v1(X_train_sample, X_test_sample, Y_train_sample, Y_test_sample)
optimized_model, model_params = self.search_best_conv1D_v1(X_train_sample, X_test_sample, Y_train_sample, Y_test_sample)
optimized_model.fit(X_train, Y_train, epochs=model_params['epochs'], batch_size=model_params['batch_size'], verbose=0)
self.logger.info(f"Training Model {model_id} with {model_params}"))
optimized_model.fit(X_train, Y_train, epochs=model_params['epochs'], batch_size=model_params['batch_size'], verbose=1)
Y_train_pred = best_model.predict(X_train)
Y_test_pred = best_model.predict(X_test)
Y_train_pred = optimized_model.predict(X_train)
Y_test_pred = optimized_model.predict(X_test)
mse_train = mean_squared_error(Y_train, Y_train_pred)
mae_test = mean_absolute_error(Y_test, Y_test_pred)
@ -592,19 +593,18 @@ class eNoseTrainer:
self.bar.update()
continue
sample_size = 25000
X_train_sample, _, Y_train_sample, _ = train_test_split(X_train, Y_train, stratify=G_train, train_size=sample_size / len(X_train), random_state=get_seed())
X_train_sample, _, Y_train_sample, _ = train_test_split(X_train, Y_train, stratify=G_train, train_size=0.8*sample_size / len(X_train), random_state=get_seed())
X_test_sample, _, Y_test_sample, _ = train_test_split(X_test, Y_test, stratify=G_test, train_size=0.2*sample_size / len(X_test), random_state=get_seed())
self.logger.debug(f"Y_train_sample: {Y_train_sample.shape}")
self.logger.debug(f"Y_test_sample: {Y_test_sample.shape}")
optimized_model, model_params = self.train_and_score_conv1D_v1(X_train_sample, X_test_sample, Y_train_sample, Y_test_sample)
optimized_model, model_params = self.search_best_conv1D_v1(X_train_sample, X_test_sample, Y_train_sample, Y_test_sample)
optimized_model.fit(X_train, Y_train, epochs=model_params['epochs'], batch_size=model_params['batch_size'], verbose=0)
optimized_model.fit(X_train, Y_train, epochs=model_params['epochs'], batch_size=model_params['batch_size'], verbose=1)
Y_train_pred = best_model.predict(X_train)
Y_test_pred = best_model.predict(X_test)
Y_train_pred = optimized_model.predict(X_train)
Y_test_pred = optimized_model.predict(X_test)
mse_train = mean_squared_error(Y_train, Y_train_pred)
mae_test = mean_absolute_error(Y_test, Y_test_pred)

View File

@ -13,7 +13,7 @@ fi
env_name=$1
conda create -n "$env_name" python scikit-learn==1.3.1 xgboost tensorflow conda-forge::ray-tune keras pandas numpy matplotlib openpyxl xlsxwriter conda-forge::enlighten
conda create -n "$env_name" python scikit-learn==1.3.1 xgboost conda-forge::tensorflow-cpu conda-forge::ray-tune keras pandas numpy matplotlib openpyxl xlsxwriter conda-forge::enlighten
if [ $? -eq 0 ]; then
echo "Packages installed successfully in environment '$env_name'"
else