© Derechos Reservados - Esteban Arroyo 2023
Inicio
estebanlab@gmail.com
Página para Estadística Descriptiva Básica
Ingrese un archivo/fichero en formato Excel
packages = ["https://cdn.holoviz.org/panel/1.3.1/dist/wheels/bokeh-3.3.0-py3-none-any.whl", "https://cdn.holoviz.org/panel/1.3.1/dist/wheels/panel-1.3.1-py3-none-any.whl", "openpyxl", "scipy", "matplotlib"]
import io #from io import BytesIO import asyncio import panel as pn import js from js import document import pandas as pd import numpy as np import matplotlib import matplotlib.pyplot as plt #from panel.io.pyodide import show import openpyxl #from scipy import stats pn.extension() #pn.extension('tabulator') #file_input = pn.widgets.FileInput(name='ingresa', accept='.csv', width=300) file_input = pn.widgets.FileInput(name='ingresa', width=300) button_upload = pn.widgets.Button(name='Cargar', button_type='primary', width=100) row = pn.Row(file_input, button_upload, height=100).servable(target='fileinput') #df = pd.read_csv(io.BytesIO(file_input.value), engine='python') df = None #columns_names = df.columns.values #checkbox_group = pn.widgets.CheckBoxGroup(name='Checkbox Group', options=columns_names, inline=False) checkbox_group = None #document.getElementById("btn_generar").style.display = "none" #table = pn.widgets.Tabulator(pagination='remote', page_size=10, disabled=True ) document.getElementById('table').style.display = 'none' parent = document.querySelector('#navigation') parent2 = document.querySelector('#navigation2') parent3 = document.querySelector('#crosstab') nom_var = document.getElementById('titulo_variable') res = document.getElementById('resultado') #tab_cross = pn.widgets.Tabulator(layout='fit_data', disabled=True) #document.getElementById('tab_crosstab').style.display = 'none' #tab_estad = pn.widgets.Tabulator(show_index=False, disabled=True) #tab_estad.theme = 'modern' #document.getElementById('resultado').style.display = 'none' #button_generate = document.getElementById('btn_generar') button_generate = pn.widgets.Button(name='Procesar', button_type='primary', width=100) #column = pn.Column(button_generate, height=100) row = pn.Row(button_generate, height=100) button_generate2 = pn.widgets.Button(name='Procesar', button_type='primary', width=100) row2 = pn.Row(button_generate2, height=100) limpia = pn.widgets.Button(name='Limpiar', button_type='primary', width=100) row3 = pn.Row(limpia, height=100) def process_file(*ags, **kws): if file_input.value is not None: global df #df = pd.read_csv(io.BytesIO(file_input.value), engine='python') df = pd.read_excel(io.BytesIO(file_input.value)) #table.value = df #pn.Row(table).servable(target='table') pn.Column("## Base de Datos", pn.pane.DataFrame(df, index=False, max_rows=10, show_dimensions=True)).servable(target='table') document.getElementById("conte_nav1").classList.add('border') document.getElementById("conte_nav2").classList.add('border') #document.getElementById("crosstab").classList.add('border') #document.getElementById("res_crosstab").classList.add('border') document.getElementById('table').style.display = 'block' #document.getElementById("btn_generar").style.display = 'block' #document.getElementById("btn_generar2").style.display = 'block' #document.getElementById("btn_limpiar").style.display = 'block' row.servable(target='btn_generar') button_generate.on_click(on_btn_click) row2.servable(target='btn_generar2') button_generate2.on_click(on_btn_crosstab) limpia.on_click(on_btn_limpiar) row3.servable(target='btn_limpiar') columns_names = df.columns.values #n = len(columns_names) #depend = document.createElement("h4") #depend.innerHTML = "Elija Una variable Para realizar la estadística:" #parent.appendChild(depend) if document.getElementById('titulo1'): document.getElementById('titulo1').innerHTML = "Elija Una variable contínua Para realizar la estadística:" else: titulo1 = document.createElement("div") titulo1.setAttribute("id", "titulo1") titulo1.innerHTML = "Elija Una variable contínua Para realizar la estadística:" titulo1.classList.add('h4') parent.appendChild(titulo1) if document.getElementById('titulo2'): document.getElementById('titulo2').innerHTML = "Elija dos variables categoricas Para realizar una tabla de contingencia:" else: titulo2 = document.createElement("div") titulo2.setAttribute("id", "titulo2") titulo2.innerHTML = "Elija dos variables categoricas Para realizar una tabla de contingencia:" titulo2.classList.add('h4') parent3.appendChild(titulo2) global checkbox_group checkbox_group = pn.widgets.CheckBoxGroup(name='Checkbox Group', options=list(columns_names), inline=False) #checkbox_names = checkbox_group.value pn.Row(checkbox_group).servable(target='crosstab') for text in columns_names: item = document.createElement("input") item.type = "radio" item.value = text item.id = text item.name = "flexRadioDisabled" item.className = "form-check-input" label = document.createElement('label') label.htmlFor = text label.className = "form-check-label" description = document.createTextNode(text) label.appendChild(description) newline = document.createElement('br') parent.appendChild(item) parent.appendChild(label) parent.appendChild(newline) #check = document.createElement("input") #check.type = "checkbox" #check.value = text #check.id = text #check.name = "check_name" #check.className = "form-check-input" #label2 = document.createElement('label') #label2.htmlFor = text #label2.className = "form-check-label" #description = document.createTextNode(text) #label2.appendChild(description) #newline2 = document.createElement('br') #parent3.appendChild(check) #parent3.appendChild(label2) #parent3.appendChild(newline2) parent2.innerHTML = "Resultados" def on_btn_click(*ags, **kws): nombre = document.querySelector("[name='flexRadioDisabled']:checked") #Obtengo el nombre de la variable del radiobutton variable=df.loc[:,nombre.value] #Selecciona la columna de datos en función del radiobutton nom_var.innerHTML = "Estadísticas para la variable: "+ nombre.value #Insertamos el nombre de la variable en el html data = pd.DataFrame(variable) #Convierto la columna de datos en un dataframe columns_names = df.columns.values cuenta = data.count() media = data.mean() mediana = data.median() desviacion = data.std() asimetria = data.skew() kurtosis = data.kurt() minimo = data.min() maximo = data.max() #resultados = {"Cuenta":cuenta[0], "Media":media[0], "Mediana":mediana[0], "Desviación": desviacion[0], "Asimetría":asimetria[0], "Curtosis":kurtosis[0], "Mínimo":minimo[0], "Máximo":maximo[0]} freq = ["Cuenta",cuenta[0]] prom = ["Media",media[0]] med = ["Mediana",mediana[0]] des = ["Desviación", desviacion[0]] asi = ["Asimetría",asimetria[0]] cur = ["Curtosis",kurtosis[0]] min = ["Mínimo", minimo[0]] max = ["Máximo", maximo[0]] resultados = [freq, prom, med, des, asi, cur, min, max] estadisticas = pd.DataFrame(resultados, columns =['Estadistico','Valor']) #tab_estad.value = estadisticas #pn.Row(tab_estad).servable(target='resultado') pn.Column(pn.pane.DataFrame(estadisticas, index=False)).servable(target='resultado') intervalos = range(int(minimo[0]), int(maximo[0])) int_range_slider = pn.widgets.IntRangeSlider(name='Bins', start=int(minimo[0]), end=int(maximo[0])) plt.rcParams.update({'font.size': 5, 'figure.max_open_warning': 0}) @pn.depends(int_range_slider) def update(value): current_v = int_range_slider.value intervalos_new = range(int(current_v[0]), int(current_v[1])) plt.rcParams.update({'font.size': 5, 'figure.max_open_warning': 0}) fig, ax = plt.subplots(figsize=(2.5, 2.5), dpi=60, layout='constrained') if int_range_slider.value == (0,1): ax.hist(x=variable, color='#F2AB6D', rwidth=0.95) else: ax.hist(x=variable, color='#F2AB6D', bins=intervalos_new, rwidth=0.95) ax.set_xlabel(nombre.value) ax.set_ylabel("Frecuencia") ax.set_title("Histograma") curr_fig=plt.gcf() return curr_fig pn.Column(int_range_slider, update).servable(target='grafico1') #fig, ax = plt.subplots() #n, bins, patches = plt.hist(variable, color='#F2AB6D', rwidth=0.95) #ax.set_xlabel(nombre.value) #ax.set_ylabel("Frecuencia") #ax.set_title("Histograma") #pyscript.write('grafico1', fig) #fig1, ax1 = plt.subplots() #ax1.set_title('Boxplot') #ax1.boxplot(x=data, patch_artist=True) #ax1.boxplot(x=variable, patch_artist=True) #ax1.set_xlabel(nombre.value) #ax1.set_ylabel('Valores Observados') #pyscript.write('grafico2', fig1) plt.close('all') seleccion = df.select_dtypes(include=["object", "int64"]) seleccion_names = seleccion.columns.values radio_group = pn.widgets.RadioBoxGroup(name='Radio Button Group', value = [], options=list(seleccion_names), inline=True, width=300) @pn.depends(radio_group) def grafico_box(value): medianprops = dict(color='black') meanprops = dict(marker='D', markerfacecolor='firebrick', markeredgecolor='firebrick') #plt.rcParams.update({'font.size': 5}) #fig1, ax1 = plt.subplots(figsize=(2, 2), dpi=50, layout='constrained') if radio_group.value == []: ax1 = df.boxplot(column=nombre.value, figsize=(3,3), fontsize=5, showmeans=True, patch_artist=True, medianprops=medianprops, meanprops=meanprops) elif radio_group.value != []: plt.rcParams.update({'font.size': 10}) fig1, ax1 = plt.subplots(figsize=(4, 4), dpi=60, layout='constrained') selec_radio_button = radio_group.value ax1 = df.boxplot(column=nombre.value, by=selec_radio_button, showmeans=True, patch_artist=True, medianprops=medianprops, meanprops=meanprops) ax1.set_xlabel(nombre.value) ax1.set_ylabel('Valores Observados') curr_fig2=plt.gcf() return curr_fig2 value_x = pn.widgets.Select(name = 'x', options=list(columns_names), value=nombre.value, width=200) value_y = pn.widgets.Select(name = 'y', options=list(columns_names), value=nombre.value, width=200) @pn.depends(value_x, value_y) def create_scatter_plots(x,y): plt.rcParams.update({'font.size': 5, 'figure.max_open_warning': 0}) fig2, ax2 = plt.subplots(figsize=(2.5, 2.5), dpi=60, layout='constrained') ax2.scatter(x = df[x], y = df[y]) ax2.set_title('Gráfico de Dispersión') ax2.set_xlabel(x) ax2.set_ylabel(y) curr_fig3=plt.gcf() return curr_fig3 pn.Column(radio_group, grafico_box, pn.Row(value_x, value_y), create_scatter_plots).servable(target='grafico2') #colum2 = pn.Column(radio_group, grafico_box) plt.close('all') def on_btn_crosstab(*ags, **kws): document.getElementById("titulo_cosstab").innerHTML = "Tabla de Contingencia de: "+ checkbox_group.value[0] + " y " + checkbox_group.value[1] #Insertamos el nombre de la variable en el html col1 = df.loc[:,checkbox_group.value[0]] col2 = df.loc[:,checkbox_group.value[1]] crosstab = pd.crosstab(col1, col2, margins=True, margins_name="Total") #tab_cross.value = crosstab #tab_cross.width=550 #pn.Row(tab_cross).servable(target='tab_crosstab') pn.Column(pn.pane.DataFrame(crosstab), width=550).servable(target='tab_crosstab') x_values1 = col1.unique() y_values1 = col1.value_counts().tolist() x_values2 = col2.unique() y_values2 = col2.value_counts().tolist() #fig2, ax2 = plt.subplots() plt.rcParams.update({'font.size': 10}) plt.figure(figsize=(4,4), dpi=70, layout='constrained') plt.bar(x_values1, y_values1, color = 'orange') plt.title('Grafico de Barra de '+ checkbox_group.value[0]) plt.xlabel(checkbox_group.value[0]) plt.ylabel('Frecuencia') for i, txt in enumerate(y_values1): plt.annotate(txt, (x_values1[i], y_values1[i])) pyscript.write('tab_grafico1', plt) plt.close('all') plt.figure(figsize=(4,4), dpi=70, layout='constrained') plt.bar(x_values2, y_values2, color = 'red') plt.title('Grafico de Barra de '+ checkbox_group.value[1]) plt.xlabel(checkbox_group.value[1]) plt.ylabel('Frecuencia') for i, txt in enumerate(y_values2): plt.annotate(txt, (x_values2[i], y_values2[i])) pyscript.write('tab_grafico2', plt) plt.close('all') crosstab2 = pd.crosstab(col1, col2) #plt.figure(figsize=(3,3), dpi=50, layout='constrained') ax = crosstab2.plot.bar(rot=0, figsize=(5,5)) #ax = crosstab2.plot(kind='bar', rot=0) ax.set_title('Barras Agrupadas') ax.set_ylabel('Frecuencias') #ax.legend(loc="center right", bbox_to_anchor=(1.2, 0.5)) for p in ax.patches: ax.annotate(format(p.get_height(), '.0f'), (p.get_x() + p.get_width() / 2., p.get_height()), ha = 'center', va = 'center') pyscript.write('tab_grafico3', plt) plt.close('all') def on_btn_limpiar(*ags, **kws): #df_tem = pd.DataFrame() #table.value = df_tem document.getElementById('table').innerHTML="" document.getElementsByTagName('INPUT')[0].value = "" document.getElementById('titulo_variable').innerHTML="" #tab_estad.value = df_tem parent2.innerHTML = "" document.getElementById('resultado').innerHTML="" document.getElementById('grafico1').innerHTML="" document.getElementById('grafico2').innerHTML="" document.getElementById('navigation').innerHTML="" document.getElementById('crosstab').innerHTML="" #tab_cross.value = df_tem document.getElementById('tab_crosstab').innerHTML="" document.getElementById('tab_grafico1').innerHTML="" document.getElementById('tab_grafico2').innerHTML="" document.getElementById('tab_grafico3').innerHTML="" document.getElementById('titulo_cosstab').innerHTML="" document.getElementById('btn_generar').innerHTML="" document.getElementById('btn_generar2').innerHTML="" document.getElementById('btn_limpiar').innerHTML="" plt.close('all') button_upload.on_click(process_file) #button_generate.on_click(on_btn_click) #await show(row, 'fileinput') #await show(table, 'table') #await show(tab_estad, 'resultado') #await show(tab_cross, 'tab_crosstab')