main
ifiguero 2025-01-18 23:14:21 -03:00
parent e0acfa3623
commit daa15858db
1 changed files with 25 additions and 2 deletions

View File

@ -31,6 +31,7 @@ def analisis_univariado(dfi, target=None, continuas=[], discretas=[]):
data_group2 = dfi[dfi[target] == group2][label_columns]
results = []
resultsmody = []
# Análisis de variables continuas
for var in continuas:
@ -44,6 +45,7 @@ def analisis_univariado(dfi, target=None, continuas=[], discretas=[]):
if normal:
# Distribución normal: media, desviación estándar, y test t
meang, stdg = dfi[var].mean(), dfi[var].std()
mean1, std1 = group1_values.mean(), group1_values.std()
mean2, std2 = group2_values.mean(), group2_values.std()
t_stat, t_pval = ttest_ind(group1_values, group2_values, equal_var=False)
@ -53,8 +55,18 @@ def analisis_univariado(dfi, target=None, continuas=[], discretas=[]):
f"mean: {mean2:.2f}, stdev: {std2:.2f}",
f"t Student: p={t_pval:.3f} " + ("Dif Significativa" if t_pval < 0.05 else "Dif No-Significativa")
])
resultsmody.append([
var, " ", f"{mean1:.1f} ± {std1:.1f}", f"{mean2:.1f} ± {std2:.1f}",
f"{t_pval:.3f}", ("*" if t_pval < 0.05 else "NS"), f"{meang:.1f} ± {stdg:.1f}"
])
else:
# Distribución no normal: mediana, rango intercuartil, y test Mann-Whitney
mediang, qrg, qlg = group1_values.median(), group1_values.quantile(0.75), group1_values.quantile(0.25)
qr1, ql1 = group1_values.quantile(0.75), group1_values.quantile(0.25)
qr2, ql2 = group2_values.quantile(0.75), group2_values.quantile(0.25)
median1, iqr1 = group1_values.median(), group1_values.quantile(0.75) - group1_values.quantile(0.25)
median2, iqr2 = group2_values.median(), group2_values.quantile(0.75) - group2_values.quantile(0.25)
mw_stat, mw_pval = mannwhitneyu(group1_values, group2_values)
@ -65,6 +77,11 @@ def analisis_univariado(dfi, target=None, continuas=[], discretas=[]):
f"Mann-Whitney: p={mw_pval:.3f} " + ("Dif Significativa" if mw_pval < 0.05 else "Dif No-Significativa")
])
resultsmody.append([
var, " ", f"{median1:.1f} ({ql1:.1f} - {qr1:.1f})", f"{median1:.1f} ({ql2:.1f} - {qr2:.1f})",
f"{t_pval:.3f}", ("*" if t_pval < 0.05 else "NS"), f"{mediang:.1f} ({qlg:.1f} - {qrg:.1f})"
])
# Análisis de variables discretas
for var in discretas:
freq_table = dfi.groupby([target, var]).size().unstack(fill_value=0)
@ -86,10 +103,16 @@ def analisis_univariado(dfi, target=None, continuas=[], discretas=[]):
f"Porcentajes: {percentages.values.round(1)}",
test_result
])
tot = freq_table[0][1] + freq_table[1][1]
totf = 100 * tot / len(dfi[var])
resultsmody.append([
var, " ", f"{percentages[0][1]:.1f} ({freq_table[0][1]}/{len(group1)})", f"{percentages[1][1]:.1f} ({freq_table[1][1]}/{len(group2)})"
f"{t_pval:.3f}", ("*" if t_pval < 0.05 else "NS"), f"{totf:.1f} ({tot}/{len(dfi[var])})"
])
# Crear DataFrame con los resultados
results_df = pd.DataFrame(results, columns=[
"Variable", "Tipo", "Shapiro-Wilk", glabel[group1], glabel[group2], "Comparación"
results_df = pd.DataFrame(resultsmody, columns=[
"Variable", "Unidad", "{}\n={}".format(glabel[group1], len(group1)),"{}\n={}".format(glabel[group2], len(group2)), "Pvalue", " ", "Total"
])
return results_df