main
ifiguero 2025-03-16 21:26:01 -03:00
parent 48cecaa8cf
commit 5e87e9b8f2
2 changed files with 20 additions and 20 deletions

View File

@ -177,7 +177,7 @@ class eNoseTrainer:
else: else:
return {} return {}
def train_and_score_conv1D_v1(self, X_train_orig, X_test_orig, Y_train_orig, Y_test_orig, epochs=30, num_samples=25): def search_best_conv1D_v1(self, X_train_orig, X_test_orig, Y_train_orig, Y_test_orig, epochs=30, nsamples=0.1):
ray.init(ignore_reinit_error=True) ray.init(ignore_reinit_error=True)
X_train_ref = ray.put(X_train_orig) X_train_ref = ray.put(X_train_orig)
Y_train_ref = ray.put(Y_train_orig) Y_train_ref = ray.put(Y_train_orig)
@ -187,9 +187,9 @@ class eNoseTrainer:
def build_model_conv1D(config, input_shape, output_dim): def build_model_conv1D(config, input_shape, output_dim):
model = keras.Sequential([ model = keras.Sequential([
layers.Conv1D(filters=config['filters'], kernel_size=config['kernel_size'], activation='relu', input_shape=input_shape), layers.Conv1D(filters=config['filters'], kernel_size=config['kernel_size'], stride=config['kernel_size']//2, activation='relu', input_shape=input_shape),
layers.MaxPooling1D(pool_size=config['pool_size']), layers.MaxPooling1D(pool_size=config['pool_size']),
layers.Conv1D(filters=config['filters'] * 2, kernel_size=config['kernel_size'], activation='relu'), layers.Conv1D(filters=config['filters'] * 2, kernel_size=config['kernel_size'], stride=config['kernel_size']//2, activation='relu'),
layers.MaxPooling1D(pool_size=config['pool_size']), layers.MaxPooling1D(pool_size=config['pool_size']),
layers.Flatten(), layers.Flatten(),
layers.Dense(config['dense_units'], activation='relu'), layers.Dense(config['dense_units'], activation='relu'),
@ -216,7 +216,7 @@ class eNoseTrainer:
validation_data=(X_testc1D, Y_testc1D), validation_data=(X_testc1D, Y_testc1D),
epochs=config['epochs'], epochs=config['epochs'],
batch_size=config['batch_size'], batch_size=config['batch_size'],
verbose=1, verbose=0,
callbacks=[early_stopping] callbacks=[early_stopping]
) )
@ -226,7 +226,7 @@ class eNoseTrainer:
config_space = { config_space = {
'filters': tune.choice([16, 32, 64]), 'filters': tune.choice([16, 32, 64]),
'kernel_size': tune.choice([3, 5]), 'kernel_size': tune.choice([3, 5, 7]),
'pool_size': tune.choice([2, 3]), 'pool_size': tune.choice([2, 3]),
'dense_units': tune.choice([32, 64, 128]), 'dense_units': tune.choice([32, 64, 128]),
'dropout': tune.choice([0.1, 0.2, 0.3]), 'dropout': tune.choice([0.1, 0.2, 0.3]),
@ -234,10 +234,11 @@ class eNoseTrainer:
'batch_size': tune.choice([16, 32, 64]), 'batch_size': tune.choice([16, 32, 64]),
'epochs': epochs 'epochs': epochs
} }
total_space = (3*3*2*3*3*3*3)
scheduler = ASHAScheduler(metric='mse', mode='min', max_t=epochs, grace_period=5, reduction_factor=2) scheduler = ASHAScheduler(metric='mse', mode='min', max_t=epochs, grace_period=5, reduction_factor=2)
# analysis = tune.run(train_model, config=config_space, num_samples=num_samples, scheduler=scheduler) # analysis = tune.run(train_model, config=config_space, num_samples=num_samples, scheduler=scheduler)
analysis = tune.run( tune.with_parameters(train_model_conv1D), config=config_space, num_samples=num_samples, scheduler=scheduler, max_concurrent_trials=3 ) analysis = tune.run( tune.with_parameters(train_model_conv1D), config=config_space, num_samples=int(nsamples*total_space), scheduler=scheduler, max_concurrent_trials=8 )
best_config = analysis.get_best_config(metric='mse', mode='min') best_config = analysis.get_best_config(metric='mse', mode='min')
best_model = build_model_conv1D(best_config, X_train_orig.shape[1:], Y_train_orig.shape[1]) best_model = build_model_conv1D(best_config, X_train_orig.shape[1:], Y_train_orig.shape[1])
@ -483,7 +484,7 @@ class eNoseTrainer:
self.saveCheckPoint() self.saveCheckPoint()
sample_size = 50000
self.loader.smooth = None self.loader.smooth = None
self.loader.reset() self.loader.reset()
for window in windows: for window in windows:
@ -516,19 +517,19 @@ class eNoseTrainer:
model_file = '{}/{}/{}-w{}/{}'.format(self.name, self.loader.target, dataset, window, model_id ) model_file = '{}/{}/{}-w{}/{}'.format(self.name, self.loader.target, dataset, window, model_id )
sample_size = 25000 X_train_sample, _, Y_train_sample, _ = train_test_split(X_train, Y_train, stratify=G_train, train_size=0.8*sample_size / len(X_train), random_state=get_seed())
X_train_sample, _, Y_train_sample, _ = train_test_split(X_train, Y_train, stratify=G_train, train_size=sample_size / len(X_train), random_state=get_seed())
X_test_sample, _, Y_test_sample, _ = train_test_split(X_test, Y_test, stratify=G_test, train_size=0.2*sample_size / len(X_test), random_state=get_seed()) X_test_sample, _, Y_test_sample, _ = train_test_split(X_test, Y_test, stratify=G_test, train_size=0.2*sample_size / len(X_test), random_state=get_seed())
self.logger.debug(f"Y_train_sample: {Y_train_sample.shape}") self.logger.debug(f"Y_train_sample: {Y_train_sample.shape}")
self.logger.debug(f"Y_test_sample: {Y_test_sample.shape}") self.logger.debug(f"Y_test_sample: {Y_test_sample.shape}")
optimized_model, model_params = self.train_and_score_conv1D_v1(X_train_sample, X_test_sample, Y_train_sample, Y_test_sample) optimized_model, model_params = self.search_best_conv1D_v1(X_train_sample, X_test_sample, Y_train_sample, Y_test_sample)
optimized_model.fit(X_train, Y_train, epochs=model_params['epochs'], batch_size=model_params['batch_size'], verbose=0) self.logger.info(f"Training Model {model_id} with {model_params}"))
optimized_model.fit(X_train, Y_train, epochs=model_params['epochs'], batch_size=model_params['batch_size'], verbose=1)
Y_train_pred = best_model.predict(X_train) Y_train_pred = optimized_model.predict(X_train)
Y_test_pred = best_model.predict(X_test) Y_test_pred = optimized_model.predict(X_test)
mse_train = mean_squared_error(Y_train, Y_train_pred) mse_train = mean_squared_error(Y_train, Y_train_pred)
mae_test = mean_absolute_error(Y_test, Y_test_pred) mae_test = mean_absolute_error(Y_test, Y_test_pred)
@ -592,19 +593,18 @@ class eNoseTrainer:
self.bar.update() self.bar.update()
continue continue
sample_size = 25000 X_train_sample, _, Y_train_sample, _ = train_test_split(X_train, Y_train, stratify=G_train, train_size=0.8*sample_size / len(X_train), random_state=get_seed())
X_train_sample, _, Y_train_sample, _ = train_test_split(X_train, Y_train, stratify=G_train, train_size=sample_size / len(X_train), random_state=get_seed())
X_test_sample, _, Y_test_sample, _ = train_test_split(X_test, Y_test, stratify=G_test, train_size=0.2*sample_size / len(X_test), random_state=get_seed()) X_test_sample, _, Y_test_sample, _ = train_test_split(X_test, Y_test, stratify=G_test, train_size=0.2*sample_size / len(X_test), random_state=get_seed())
self.logger.debug(f"Y_train_sample: {Y_train_sample.shape}") self.logger.debug(f"Y_train_sample: {Y_train_sample.shape}")
self.logger.debug(f"Y_test_sample: {Y_test_sample.shape}") self.logger.debug(f"Y_test_sample: {Y_test_sample.shape}")
optimized_model, model_params = self.train_and_score_conv1D_v1(X_train_sample, X_test_sample, Y_train_sample, Y_test_sample) optimized_model, model_params = self.search_best_conv1D_v1(X_train_sample, X_test_sample, Y_train_sample, Y_test_sample)
optimized_model.fit(X_train, Y_train, epochs=model_params['epochs'], batch_size=model_params['batch_size'], verbose=0) optimized_model.fit(X_train, Y_train, epochs=model_params['epochs'], batch_size=model_params['batch_size'], verbose=1)
Y_train_pred = best_model.predict(X_train) Y_train_pred = optimized_model.predict(X_train)
Y_test_pred = best_model.predict(X_test) Y_test_pred = optimized_model.predict(X_test)
mse_train = mean_squared_error(Y_train, Y_train_pred) mse_train = mean_squared_error(Y_train, Y_train_pred)
mae_test = mean_absolute_error(Y_test, Y_test_pred) mae_test = mean_absolute_error(Y_test, Y_test_pred)

View File

@ -13,7 +13,7 @@ fi
env_name=$1 env_name=$1
conda create -n "$env_name" python scikit-learn==1.3.1 xgboost tensorflow conda-forge::ray-tune keras pandas numpy matplotlib openpyxl xlsxwriter conda-forge::enlighten conda create -n "$env_name" python scikit-learn==1.3.1 xgboost conda-forge::tensorflow-cpu conda-forge::ray-tune keras pandas numpy matplotlib openpyxl xlsxwriter conda-forge::enlighten
if [ $? -eq 0 ]; then if [ $? -eq 0 ]; then
echo "Packages installed successfully in environment '$env_name'" echo "Packages installed successfully in environment '$env_name'"
else else