import numpy as np import os import xlsxwriter def safe_float(x): try: return float(x.replace(',', '.')) except ValueError: return np.nan def analisis_univariado(dfi, target=None, continuas=[], discretas=[]): if target is None: raise ValueError("No target variable provided") import pandas as pd from scipy.stats import shapiro, ttest_ind, mannwhitneyu, chi2_contingency, fisher_exact # label_columns = ['sexo', 'hist fam', 'edad diag', 'IMC', 'glu ayu', 'glu 120','A1c'] label_columns = dfi.drop(target, axis=1).columns # Separar el target en dos grupos: N positivo y N negativo groups = sorted(dfi[target].unique()) if len(groups) != 2: raise ValueError("Target variable must have exactly two unique values") glabel = {1: 'MODY Pos', 0: 'MODY Neg'} group1, group2 = groups data_group1 = dfi[dfi[target] == group1][label_columns] data_group2 = dfi[dfi[target] == group2][label_columns] results = [] resultsmody = [] # Análisis de variables continuas for var in continuas: Xvar = dfi[var].T group1_values = data_group1[var].T group2_values = data_group2[var].T # Test de normalidad (Shapiro-Wilk) stat, p = shapiro(Xvar, nan_policy='raise') normal = p >= 0.05 if normal: # Distribución normal: media, desviación estándar, y test t meang, stdg = dfi[var].mean(), dfi[var].std() mean1, std1 = group1_values.mean(), group1_values.std() mean2, std2 = group2_values.mean(), group2_values.std() t_stat, t_pval = ttest_ind(group1_values, group2_values, equal_var=False) results.append([ var, "Continua", f"Normal (p={p:.7f})", f"mean: {mean1:.2f}, stdev: {std1:.2f}", f"mean: {mean2:.2f}, stdev: {std2:.2f}", f"t Student: p={t_pval:.3f} " + ("Dif Significativa" if t_pval < 0.05 else "Dif No-Significativa") ]) resultsmody.append([ var, " ", f"{mean1:.1f} ± {std1:.1f}", f"{mean2:.1f} ± {std2:.1f}", f"{t_pval:.3f}", ("*" if t_pval < 0.05 else "NS"), f"{meang:.1f} ± {stdg:.1f}" ]) else: # Distribución no normal: mediana, rango intercuartil, y test Mann-Whitney mediang, qrg, qlg = dfi[var].median(), dfi[var].quantile(0.75), dfi[var].quantile(0.25) qr1, ql1 = group1_values.quantile(0.75), group1_values.quantile(0.25) qr2, ql2 = group2_values.quantile(0.75), group2_values.quantile(0.25) median1, iqr1 = group1_values.median(), group1_values.quantile(0.75) - group1_values.quantile(0.25) median2, iqr2 = group2_values.median(), group2_values.quantile(0.75) - group2_values.quantile(0.25) mw_stat, mw_pval = mannwhitneyu(group1_values, group2_values) results.append([ var, "Continua", f"No Normal (p={p:.7f})", f"Mediana: {median1:.2f}, RIC: {iqr1:.2f}", f"Mediana: {median2:.2f}, RIC: {iqr2:.2f}", f"Mann-Whitney: p={mw_pval:.3f} " + ("Dif Significativa" if mw_pval < 0.05 else "Dif No-Significativa") ]) resultsmody.append([ var, " ", f"{median1:.1f} ({ql1:.1f} - {qr1:.1f})", f"{median2:.1f} ({ql2:.1f} - {qr2:.1f})", f"{mw_pval:.3f}", ("*" if mw_pval < 0.05 else "NS"), f"{mediang:.1f} ({qlg:.1f} - {qrg:.1f})" ]) # Análisis de variables discretas for var in discretas: freq_table = dfi.groupby([target, var]).size().unstack(fill_value=0) percentages = freq_table.div(freq_table.sum(axis=1), axis=0) * 100 # Pruebas estadísticas if freq_table.shape[1] == 2: # Test exacto de Fisher _, fisher_pval = fisher_exact(freq_table.values) test_result = f"Fisher Exact: p={fisher_pval:.3f} " + ("Dif Significativa" if fisher_pval < 0.05 else "Dif No-Significativa") else: # Test Chi cuadrado chi2_stat, chi2_pval, _, _ = chi2_contingency(freq_table) test_result = f"Chi2: p={chi2_pval:.3f}" results.append([ var, "Discreta", "N/A", f"Frecuencias: {freq_table.values}", f"Porcentajes: {percentages.values.round(1)}", test_result ]) freq_matrix = freq_table.values percentages_matrix = percentages.values tot = freq_matrix[0][1] + freq_matrix[1][1] totf = 100 * tot / len(dfi[var]) resultsmody.append([ var, " ", f"{percentages_matrix[0][1]:.1f} ({freq_matrix[0][1]}/{len(data_group1)})", f"{percentages_matrix[1][1]:.1f} ({freq_matrix[1][1]}/{len(data_group2)})", f"{fisher_pval:.3f}", ("*" if fisher_pval < 0.05 else "NS"), f"{totf:.1f} ({tot}/{len(dfi[var])})" ]) # Crear DataFrame con los resultados results_df = pd.DataFrame(resultsmody, columns=[ "Variable", "Unidad", "{}\n n={}".format(glabel[group1], len(data_group1)),"{}\n n={}".format(glabel[group2], len(data_group2)), "Pvalue", " ", "Total" ]) return results_df def load_data(Reload=False): if os.path.isfile('MODY_data.xlsx'): import pandas as pd with pd.ExcelFile("MODY_data.xlsx") as xls: dsm1_complete = pd.read_excel(xls, sheet_name='Dataset MODY1') dsm2_complete = pd.read_excel(xls, sheet_name='Dataset MODY2') dsm3_complete = pd.read_excel(xls, sheet_name='Dataset MODY3') dsm5_complete = pd.read_excel(xls, sheet_name='Dataset MODY5') else: print("========================================================================================") if not os.path.isfile('HC.xlsx'): raise 'NoDatasetToLoad' import pandas as pd with pd.ExcelFile("HC.xlsx") as xls: raw_data = pd.read_excel(xls, header=0) # pd.read_excel('HC.xlsx', header=0) # Retiramos las columnas que no son de interes drop_columns=['HC', 'probando', 'procedencia','apellido','fecha ingreso','edad','pago','factura','monto','Pendiente','método','Referencias','Analisis','aclar_pagos','tratamiento','notas','nro de familia', 'resultado'] raw_data.drop(columns=drop_columns, inplace=True) for index, var in raw_data.iterrows(): if not pd.isna(var['IMC']) and isinstance(var['IMC'], str): raw_data.loc[index, 'IMC'] = safe_float(var['IMC']) if not pd.isna(var['A1c']) and isinstance(var['A1c'], str): raw_data.loc[index, 'A1c'] = safe_float(var['A1c']) if not pd.isna(var['edad diag']) and isinstance(var['edad diag'], str): raw_data.loc[index, 'edad diag'] = round(safe_float(var['edad diag']),0) if not pd.isna(var['glu ayu']) and isinstance(var['glu ayu'], str): raw_data.loc[index, 'glu ayu'] = round(safe_float(var['glu ayu']),0) if not pd.isna(var['glu 120']) and isinstance(var['glu 120'], str): raw_data.loc[index, 'glu 120'] = round(safe_float(var['glu 120']),0) raw_data['IMC'] = raw_data['IMC'].astype(np.float64) raw_data['A1c'] = raw_data['A1c'].astype(np.float64) raw_data['edad diag'] = raw_data['edad diag'].astype(np.float64) raw_data['glu ayu'] = raw_data['glu ayu'].astype(np.float64) raw_data['glu 120'] = raw_data['glu 120'].astype(np.float64) diagnosticos = [] for index, var in raw_data.iterrows(): if var['sospecha MODY'] == '2': diagnosticos.append(var['diagnostico']) print("Total elementos en el dataset con sospecha MODY2:\t{}".format(len(diagnosticos))) print("Diagnosticos del grupo:") diagnosticos = list(set(diagnosticos)) for diagnostico in diagnosticos: print("- '{}'".format(diagnostico)) print("========================================================================================") diagnosticos = [] for index, var in raw_data.iterrows(): if var['sospecha MODY'] == '3': diagnosticos.append(var['diagnostico']) print("Total elementos en el dataset con sospecha MODY3:\t{}".format(len(diagnosticos))) print("Diagnosticos del grupo:") diagnosticos = list(set(diagnosticos)) for diagnostico in diagnosticos: print("- '{}'".format(diagnostico)) print("========================================================================================") diagnosticos = [] for index, var in raw_data.iterrows(): if var['sospecha MODY'] not in ['2', '3']: diagnosticos.append(var['diagnostico']) print("Total elementos en el dataset con sospechas diferentes a 2 o 3:\t{}".format(len(diagnosticos))) print("Diagnosticos del grupo:") diagnosticos = list(set(diagnosticos)) for diagnostico in diagnosticos: print("- '{}'".format(diagnostico)) ## generación de las clases en base a la confirmación de la sospecha raw_data['MODY1_pos'] = False raw_data['MODY1_neg'] = False raw_data['MODY2_pos'] = False raw_data['MODY2_neg'] = False raw_data['MODY3_pos'] = False raw_data['MODY3_neg'] = False raw_data['MODY5_pos'] = False raw_data['MODY5_neg'] = False raw_data['SiEntiqueta'] = False raw_data['Normal'] = False raw_data.loc[ (raw_data['diagnostico'].str.contains('Diagnóstico MODY1', case=False, na=False)), 'MODY1_pos'] = True raw_data.loc[ (raw_data['diagnostico'].str.contains('Diagnóstico MODY2', case=False, na=False)), 'MODY2_pos'] = True raw_data.loc[ (raw_data['diagnostico'].str.contains('Diagnóstico MODY3', case=False, na=False)), 'MODY3_pos'] = True raw_data.loc[ (raw_data['diagnostico'].str.contains('Diagnóstico MODY5', case=False, na=False)), 'MODY5_pos'] = True raw_data.loc[ (raw_data['sospecha MODY'] == '1') & (raw_data['diagnostico'].str.contains('No se confirma', case=False, na=False)), 'MODY1_neg'] = True raw_data.loc[ (raw_data['sospecha MODY'] == '2') & (raw_data['diagnostico'].str.contains('No se confirma', case=False, na=False)), 'MODY2_neg'] = True raw_data.loc[ (raw_data['sospecha MODY'] == '3') & (raw_data['diagnostico'].str.contains('No se confirma', case=False, na=False)), 'MODY3_neg'] = True raw_data.loc[ (raw_data['sospecha MODY'] == '5') & (raw_data['diagnostico'].str.contains('No se confirma', case=False, na=False)), 'MODY5_neg'] = True raw_data.loc[ (raw_data['diagnostico'].str.contains('Normal', case=False, na=False)), 'Normal' ] = True raw_data.loc[ (raw_data['diagnostico'].str.contains('No se hace', case=False, na=False)), 'SiEntiqueta'] = True raw_data.loc[ (raw_data['diagnostico'].str.contains('Sin diagnóstico', case=False, na=False)), 'SiEntiqueta'] = True raw_data.loc[ (raw_data['diagnostico'].str.contains('Otros', case=False, na=False)), 'SiEntiqueta'] = True raw_data.loc[ (raw_data['diagnostico'].str.contains('No es MODY', case=False, na=False)), 'SiEntiqueta'] = True raw_data.loc[ (raw_data['diagnostico'].str.contains('Falta definir', case=False, na=False)), 'SiEntiqueta'] = True raw_data.loc[ (~raw_data['sospecha MODY'].isin(['1', '2', '3', '5'])) & (raw_data['diagnostico'].str.contains('No se confirma', case=False, na=False)), 'SiEntiqueta'] = True raw_data.loc[ pd.isna(raw_data['diagnostico']), 'SiEntiqueta'] = True print("================== Datos sin confirmar/descartar ningún MODY ===========================") tipos = ['MODY1_pos', 'MODY1_neg', 'MODY2_pos', 'MODY2_neg', 'MODY3_pos', 'MODY3_neg','MODY5_pos', 'MODY5_neg','Normal','SiEntiqueta'] sinconfirmar = 0 ## Datos que no cumplen con el criterio for index, var in raw_data.iterrows(): # imprime los registros que no pertenecen a ninguna categoria: if not any(var[col] for col in tipos): print("sujeto: {} \t| sospecha: {} \t| diagnostico: {:18} \t | historial: {} ".format(var['protocolo'],var['sospecha MODY'], var['diagnostico'], var['historial'])) sinconfirmar += 1 print("====================== Diagnosticos confirmados/descartados ==========================") contador = {} for tipo in tipos: contador[tipo] = 0 for index, var in raw_data.iterrows(): for tipo in tipos: if var[tipo]: contador[tipo] += 1 for tipo in tipos: print("{:20} \t {} ({}%)".format(tipo, contador[tipo], round((contador[tipo]/len(raw_data))*100, 2))) print("=========================== ==================== ==================================") label_vars = ['protocolo', 'nombre', 'edad diag', 'IMC', 'antecedentes fam', 'glu ayu', 'glu 120', 'A1c','MODY1_pos', 'MODY1_neg', 'MODY2_pos', 'MODY2_neg', 'MODY3_pos', 'MODY3_neg','MODY5_pos', 'MODY5_neg','Normal'] pre_labeled_data = raw_data[raw_data['SiEntiqueta'] == False][label_vars] pre_labeled_data.head() """## 2.2. Antecedentes familiares Se genera el campo a partir del comentario del grupo familiar """ pre_labeled_data['diabetes_familia'] = np.nan ## -1 == no hay antecedentes familiares de diabetes pre_labeled_data.loc[pre_labeled_data['antecedentes fam'].str.lower().str.startswith('no', na=False), 'diabetes_familia'] = -1.0 pre_labeled_data.loc[ pre_labeled_data['antecedentes fam'].str.contains('no dm', case=False, na=False), 'diabetes_familia'] = -1.0 ## 1 == si hay antecedentes familiares de diabetes pre_labeled_data.loc[ pre_labeled_data['antecedentes fam'].str.lower().str.startswith('si', na=False), 'diabetes_familia'] = 1.0 pre_labeled_data.loc[ pre_labeled_data['antecedentes fam'].str.lower().str.startswith('her', na=False), 'diabetes_familia'] = 1.0 #hermana o hermano pre_labeled_data.loc[ pre_labeled_data['antecedentes fam'].str.lower().str.startswith('pad', na=False), 'diabetes_familia'] = 1.0 pre_labeled_data.loc[ pre_labeled_data['antecedentes fam'].str.lower().str.startswith('mad', na=False), 'diabetes_familia'] = 1.0 pre_labeled_data.loc[ pre_labeled_data['antecedentes fam'].str.lower().str.startswith('amb', na=False), 'diabetes_familia'] = 1.0 pre_labeled_data.loc[ pre_labeled_data['antecedentes fam'].str.lower().str.startswith('hij', na=False), 'diabetes_familia'] = 1.0 #hija o hijo pre_labeled_data.loc[ pre_labeled_data['antecedentes fam'].str.lower().str.startswith('multi', na=False), 'diabetes_familia'] = 1.0 pre_labeled_data.loc[ pre_labeled_data['antecedentes fam'].str.lower().str.startswith('ti', na=False), 'diabetes_familia'] = 1.0 #tia o tio pre_labeled_data.loc[ pre_labeled_data['antecedentes fam'].str.lower().str.startswith('abu', na=False), 'diabetes_familia'] = 1.0 #abuela o abuelo pre_labeled_data.loc[ pre_labeled_data['antecedentes fam'].str.lower().str.startswith('diab', na=False), 'diabetes_familia'] = 1.0 ## 0 == no se sabe: sin información (Ej: adoptado) # clean_data.loc[ clean_data['diabetes_familia'] == 0, 'antecedentes fam'].unique() #muestra los valores que no tienen match con lo indicado anteriormente pre_labeled_data.loc[ pre_labeled_data['antecedentes fam'].str.lower().str.startswith('mare', na=False), 'diabetes_familia'] = 1.0 #anomalía, madre mal escrito print("==================================== Clasificados =============================================") for value, count in pre_labeled_data[~pre_labeled_data['diabetes_familia'].isna()]['diabetes_familia'].value_counts(dropna=False).items(): print(f"Value: {value}, Count: {count}") print("==================================== No se pudo Clasificar =============================================") for value, count in pre_labeled_data[pre_labeled_data['diabetes_familia'].isna()]['antecedentes fam'].value_counts(dropna=False).items(): print(f"Value: {value}, Count: {count}") """## 2.3. Sexo Se infiere el sexo a partir de los nombres """ pre_labeled_data['sexo'] = np.nan ## 1 == Mujer nombres_f = ['andrea', 'agustina', 'antonella', 'angelica', 'alicia', 'alejandra', 'ariana', 'ayelen', 'ayleen', 'belen', 'bianca', 'camila', 'carolina', 'catalina', 'claudia', 'delfina', 'eliana', 'estefania', 'eva', 'karina', 'florencia', 'gabriela', 'georgina', 'geraldine', 'guillermina', 'jazmin', 'jessica', 'julieta', 'karen', 'laura', 'lidia', 'lucia', 'magali', 'mina', 'mabel', 'malena', 'malena', 'mariana', 'marina', 'martina', 'micaela', 'micalela', 'milagros', 'milena', 'miriam', 'morena', 'natalia', 'noemi', 'nayla', 'rocio', 'rosa', 'sandra', 'sara', 'sasha', 'silvia', 'silvana', 'sofia', 'solange', 'soledad', 'valentina', 'victoria', 'vanina', 'vanesa', 'virginia', 'yanina', 'zamira', 'abril', 'adriana', 'ailen', 'aixa', 'ambar', 'ana', 'ana esmerlada', 'ana iris', 'anahi', 'analia', 'aylen', 'barbara', 'brenda', 'brisa', 'candela', 'carmela (carmen)', 'chiara', 'elizabeth', 'ema', 'emilia', 'emma', 'eugenia', 'fiorella', 'flavia', 'franca', 'francesca', 'graciela', 'helena', 'isabela', 'isabella', 'jacinta', 'jesica', 'jorgelina', 'julia', 'lorena', 'lucila', 'lucía', 'magdalena', 'maricruz', 'mariel', 'mariela', 'marilina', 'marixa', 'martha', 'maría emilia', 'maría verónica', 'melany', 'mercedes', 'monica', 'nancy rosa alba', 'nerina', 'oriana', 'paola', 'patricia', 'paula', 'pilar', 'priscila', 'renata', 'romina', 'roxana', 'ruth', 'shirley', 'tamara', 'valeria' ] nombres_f.append('zahirah') # dejo los nombres que me hacen duda en forma individual nombres_f.append('antu') nombres_f.append('tali') nombres_f.append('ma laura') nombres_f.append('qian') # nombre femenino de origen chino nombres_f.append('maria') for nombre_f in nombres_f: pre_labeled_data.loc[ pre_labeled_data['sexo'].isna() & (pre_labeled_data['nombre'].str.lower().str.startswith(nombre_f, na=False)), 'sexo'] = 1.0 ## -1 == Hombre nombres_h = ['agustin', 'alejandro', 'alvaro', 'augusto', 'benjamin', 'bruno', 'camilo', 'cristian', 'damian', 'dario', 'daniel', 'dante', 'david', 'diego', 'emiliano', 'elian', 'enzo', 'ezequiel', 'facundo', 'federico', 'felipe', 'fernando', 'felix', 'franco', 'german', 'gonzalo', 'gustavo', 'guillermo', 'ignacio', 'ian','joaquin', 'juan', 'julian', 'leandro', 'lorenzo', 'lucas', 'luka', 'marcelo', 'marcos', 'martin', 'martin', 'maximiliano', 'mateo', 'matias', 'pablo', 'nehemias', 'nicolas', 'ramiro', 'rogelio', 'rodrigo', 'santiago', 'santino', 'sebastian', 'thiago', 'tomas', 'alan', 'alfredo', 'antonio', 'axel', 'benicio', 'carlos', 'carlos gonzalo', 'claudio', 'dylan', 'eduardo', 'emanuel', 'ernesto', 'fabian', 'farid', 'fidel', 'francisco', 'gabriel facundo', 'gael', 'gerardo', 'gerónimo', 'hernan', 'ivan', 'javier', 'jorge', 'julio', 'mauricio', 'miguel angel', 'oscar', 'pedro', 'raul', 'rene', 'ricardo', 'roberto', 'sergio', 'teo', 'tiago', 'tobias', 'walter'] nombres_h.append('agustín') for nombre_h in nombres_h: pre_labeled_data.loc[ pre_labeled_data['sexo'].isna() & (pre_labeled_data['nombre'].str.lower().str.startswith(nombre_h, na=False)), 'sexo'] = -1.0 print("==================================== Clasificados =============================================") for value, count in pre_labeled_data[~pre_labeled_data['sexo'].isna()]['sexo'].value_counts(dropna=False).items(): print(f"Value: {value}, Count: {count}") listnames = [] print("==================================== No se pudo Clasificar =============================================") for value, count in pre_labeled_data[pre_labeled_data['sexo'].isna()]['nombre'].value_counts(dropna=False).items(): print(f"Value: {value}, Count: {count}") listnames.append(value) print(sorted([x for x in listnames if isinstance(x, str)])) """## 2.1. Registros incompletos Se desplegan información sobre valores faltantes en las variables de interés, sujetos sin datos y se genera una versión que solo incluye los registros que contienen toda la información para poder ser usados en el entrenamiento. """ import pandas as pd variables = ['sexo', 'diabetes_familia','edad diag', 'IMC', 'glu ayu', 'glu 120', 'A1c'] print("========================================================================================") print("Total registros en el dataset etiquetado:\t{}".format(pre_labeled_data.shape[0])) print("Variables:\t{}".format(str(variables))) print("==================== Desglose por N de variables faltantes ==============================") for num in range(len(variables)+1): nrows = len(pre_labeled_data[pre_labeled_data[variables].isnull().sum(axis=1) == num]) print("Le faltan {}/{} variables:\t{}\t({}%)".format(num, len(variables), nrows, round(nrows*100/pre_labeled_data.shape[0], 2))) print("============================ Desglose por variables =====000=============================") for var in variables: nrows = pre_labeled_data[var].isna().astype(int).sum() print("Variable {} ausente en \t\t {} ({}%) registros ".format(var, nrows, round(nrows*100/pre_labeled_data.shape[0], 2))) pre_labeled_data['MODY1_label'] = np.nan pre_labeled_data.loc[pre_labeled_data['MODY1_pos'], 'MODY1_label'] = 1 pre_labeled_data.loc[pre_labeled_data['MODY1_neg'], 'MODY1_label'] = 0#-1 pre_labeled_data['MODY2_label'] = np.nan pre_labeled_data.loc[pre_labeled_data['MODY2_pos'], 'MODY2_label'] = 1 pre_labeled_data.loc[pre_labeled_data['MODY2_neg'], 'MODY2_label'] = 0#-1 pre_labeled_data['MODY3_label'] = np.nan pre_labeled_data.loc[pre_labeled_data['MODY3_pos'], 'MODY3_label'] = 1 pre_labeled_data.loc[pre_labeled_data['MODY3_neg'], 'MODY3_label'] = 0#-1 pre_labeled_data['MODY5_label'] = np.nan pre_labeled_data.loc[pre_labeled_data['MODY5_pos'], 'MODY5_label'] = 1 pre_labeled_data.loc[pre_labeled_data['MODY5_neg'], 'MODY5_label'] = 0#-1 """# 3. Datos iniciales""" dsm1_complete = pre_labeled_data[~pre_labeled_data['MODY1_label'].isna()][variables+['MODY1_label']] dsm2_complete = pre_labeled_data[~pre_labeled_data['MODY2_label'].isna()][variables+['MODY2_label']] dsm3_complete = pre_labeled_data[~pre_labeled_data['MODY3_label'].isna()][variables+['MODY3_label']] dsm5_complete = pre_labeled_data[~pre_labeled_data['MODY5_label'].isna()][variables+['MODY5_label']] dsnormal_complete = pre_labeled_data[pre_labeled_data['Normal']][variables] """# 4. Salida intermedia de los datos para verificación manual Guarda los dataframes en un excel para verificación """ with pd.ExcelWriter("MODY_data.xlsx", engine='xlsxwriter') as xls: raw_data.to_excel(xls, sheet_name='HC Original', index=False) pre_labeled_data.to_excel(xls, sheet_name='Datos etiquetados', index=False) raw_data[raw_data['SiEntiqueta'] == True].to_excel(xls, sheet_name='Datos excluídos', index=False) dsm1_complete.to_excel(xls, sheet_name='Dataset MODY1', index=False) dsm1_complete.dropna().to_excel(xls, sheet_name='Dataset MODY1 sin ausentes', index=False) dsm2_complete.to_excel(xls, sheet_name='Dataset MODY2', index=False) dsm2_complete.dropna().to_excel(xls, sheet_name='Dataset MODY2 sin ausentes', index=False) dsm3_complete.to_excel(xls, sheet_name='Dataset MODY3', index=False) dsm3_complete.dropna().to_excel(xls, sheet_name='Dataset MODY3 sin ausentes', index=False) dsm5_complete.to_excel(xls, sheet_name='Dataset MODY5', index=False) dsm5_complete.dropna().to_excel(xls, sheet_name='Dataset MODY5 sin ausentes', index=False) dsnormal_complete.to_excel(xls, sheet_name='Sin Diabetes', index=False) return dsm1_complete, dsm2_complete, dsm3_complete, dsm5_complete