From 5e87e9b8f2ed89aaa45d9e4ceae0b2b7f619e607 Mon Sep 17 00:00:00 2001 From: Israel Figueroa Date: Sun, 16 Mar 2025 21:26:01 -0300 Subject: [PATCH] test --- TrainerClass.py | 38 +++++++++++++++++++------------------- create_conda.sh | 2 +- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/TrainerClass.py b/TrainerClass.py index 8cb9c51..f0ccf1d 100644 --- a/TrainerClass.py +++ b/TrainerClass.py @@ -177,7 +177,7 @@ class eNoseTrainer: else: return {} - def train_and_score_conv1D_v1(self, X_train_orig, X_test_orig, Y_train_orig, Y_test_orig, epochs=30, num_samples=25): + def search_best_conv1D_v1(self, X_train_orig, X_test_orig, Y_train_orig, Y_test_orig, epochs=30, nsamples=0.1): ray.init(ignore_reinit_error=True) X_train_ref = ray.put(X_train_orig) Y_train_ref = ray.put(Y_train_orig) @@ -187,9 +187,9 @@ class eNoseTrainer: def build_model_conv1D(config, input_shape, output_dim): model = keras.Sequential([ - layers.Conv1D(filters=config['filters'], kernel_size=config['kernel_size'], activation='relu', input_shape=input_shape), + layers.Conv1D(filters=config['filters'], kernel_size=config['kernel_size'], stride=config['kernel_size']//2, activation='relu', input_shape=input_shape), layers.MaxPooling1D(pool_size=config['pool_size']), - layers.Conv1D(filters=config['filters'] * 2, kernel_size=config['kernel_size'], activation='relu'), + layers.Conv1D(filters=config['filters'] * 2, kernel_size=config['kernel_size'], stride=config['kernel_size']//2, activation='relu'), layers.MaxPooling1D(pool_size=config['pool_size']), layers.Flatten(), layers.Dense(config['dense_units'], activation='relu'), @@ -216,7 +216,7 @@ class eNoseTrainer: validation_data=(X_testc1D, Y_testc1D), epochs=config['epochs'], batch_size=config['batch_size'], - verbose=1, + verbose=0, callbacks=[early_stopping] ) @@ -226,7 +226,7 @@ class eNoseTrainer: config_space = { 'filters': tune.choice([16, 32, 64]), - 'kernel_size': tune.choice([3, 5]), + 'kernel_size': tune.choice([3, 5, 7]), 'pool_size': tune.choice([2, 3]), 'dense_units': tune.choice([32, 64, 128]), 'dropout': tune.choice([0.1, 0.2, 0.3]), @@ -234,10 +234,11 @@ class eNoseTrainer: 'batch_size': tune.choice([16, 32, 64]), 'epochs': epochs } + total_space = (3*3*2*3*3*3*3) scheduler = ASHAScheduler(metric='mse', mode='min', max_t=epochs, grace_period=5, reduction_factor=2) # analysis = tune.run(train_model, config=config_space, num_samples=num_samples, scheduler=scheduler) - analysis = tune.run( tune.with_parameters(train_model_conv1D), config=config_space, num_samples=num_samples, scheduler=scheduler, max_concurrent_trials=3 ) + analysis = tune.run( tune.with_parameters(train_model_conv1D), config=config_space, num_samples=int(nsamples*total_space), scheduler=scheduler, max_concurrent_trials=8 ) best_config = analysis.get_best_config(metric='mse', mode='min') best_model = build_model_conv1D(best_config, X_train_orig.shape[1:], Y_train_orig.shape[1]) @@ -483,7 +484,7 @@ class eNoseTrainer: self.saveCheckPoint() - + sample_size = 50000 self.loader.smooth = None self.loader.reset() for window in windows: @@ -516,19 +517,19 @@ class eNoseTrainer: model_file = '{}/{}/{}-w{}/{}'.format(self.name, self.loader.target, dataset, window, model_id ) - sample_size = 25000 - X_train_sample, _, Y_train_sample, _ = train_test_split(X_train, Y_train, stratify=G_train, train_size=sample_size / len(X_train), random_state=get_seed()) + X_train_sample, _, Y_train_sample, _ = train_test_split(X_train, Y_train, stratify=G_train, train_size=0.8*sample_size / len(X_train), random_state=get_seed()) X_test_sample, _, Y_test_sample, _ = train_test_split(X_test, Y_test, stratify=G_test, train_size=0.2*sample_size / len(X_test), random_state=get_seed()) self.logger.debug(f"Y_train_sample: {Y_train_sample.shape}") self.logger.debug(f"Y_test_sample: {Y_test_sample.shape}") - optimized_model, model_params = self.train_and_score_conv1D_v1(X_train_sample, X_test_sample, Y_train_sample, Y_test_sample) + optimized_model, model_params = self.search_best_conv1D_v1(X_train_sample, X_test_sample, Y_train_sample, Y_test_sample) - optimized_model.fit(X_train, Y_train, epochs=model_params['epochs'], batch_size=model_params['batch_size'], verbose=0) + self.logger.info(f"Training Model {model_id} with {model_params}")) + optimized_model.fit(X_train, Y_train, epochs=model_params['epochs'], batch_size=model_params['batch_size'], verbose=1) - Y_train_pred = best_model.predict(X_train) - Y_test_pred = best_model.predict(X_test) + Y_train_pred = optimized_model.predict(X_train) + Y_test_pred = optimized_model.predict(X_test) mse_train = mean_squared_error(Y_train, Y_train_pred) mae_test = mean_absolute_error(Y_test, Y_test_pred) @@ -592,19 +593,18 @@ class eNoseTrainer: self.bar.update() continue - sample_size = 25000 - X_train_sample, _, Y_train_sample, _ = train_test_split(X_train, Y_train, stratify=G_train, train_size=sample_size / len(X_train), random_state=get_seed()) + X_train_sample, _, Y_train_sample, _ = train_test_split(X_train, Y_train, stratify=G_train, train_size=0.8*sample_size / len(X_train), random_state=get_seed()) X_test_sample, _, Y_test_sample, _ = train_test_split(X_test, Y_test, stratify=G_test, train_size=0.2*sample_size / len(X_test), random_state=get_seed()) self.logger.debug(f"Y_train_sample: {Y_train_sample.shape}") self.logger.debug(f"Y_test_sample: {Y_test_sample.shape}") - optimized_model, model_params = self.train_and_score_conv1D_v1(X_train_sample, X_test_sample, Y_train_sample, Y_test_sample) + optimized_model, model_params = self.search_best_conv1D_v1(X_train_sample, X_test_sample, Y_train_sample, Y_test_sample) - optimized_model.fit(X_train, Y_train, epochs=model_params['epochs'], batch_size=model_params['batch_size'], verbose=0) + optimized_model.fit(X_train, Y_train, epochs=model_params['epochs'], batch_size=model_params['batch_size'], verbose=1) - Y_train_pred = best_model.predict(X_train) - Y_test_pred = best_model.predict(X_test) + Y_train_pred = optimized_model.predict(X_train) + Y_test_pred = optimized_model.predict(X_test) mse_train = mean_squared_error(Y_train, Y_train_pred) mae_test = mean_absolute_error(Y_test, Y_test_pred) diff --git a/create_conda.sh b/create_conda.sh index 57e176a..6865105 100755 --- a/create_conda.sh +++ b/create_conda.sh @@ -13,7 +13,7 @@ fi env_name=$1 -conda create -n "$env_name" python scikit-learn==1.3.1 xgboost tensorflow conda-forge::ray-tune keras pandas numpy matplotlib openpyxl xlsxwriter conda-forge::enlighten +conda create -n "$env_name" python scikit-learn==1.3.1 xgboost conda-forge::tensorflow-cpu conda-forge::ray-tune keras pandas numpy matplotlib openpyxl xlsxwriter conda-forge::enlighten if [ $? -eq 0 ]; then echo "Packages installed successfully in environment '$env_name'" else