reab5555 commited on
Commit
b04e9fe
·
verified ·
1 Parent(s): d398f50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -13
app.py CHANGED
@@ -12,9 +12,8 @@ import tempfile
12
  # Suppress specific runtime warnings
13
  warnings.filterwarnings("ignore", category=RuntimeWarning)
14
 
15
-
16
  # Function to check distribution type
17
- def check_distribution(target_column):
18
  data = target_column.dropna()
19
 
20
  # Distribution dictionaries
@@ -69,10 +68,11 @@ def check_distribution(target_column):
69
  # Create a single plot for all distributions
70
  plt.figure(figsize=(12, 8), dpi=400)
71
 
72
- # Plot the original data distribution as a histogram
73
- sns.histplot(data, kde=False, stat="density", bins=50, color=actual_data_color, label='Actual Data Distribution')
 
74
 
75
- # Overlay the actual data KDE line
76
  sns.kdeplot(data, color=actual_data_color, lw=2, label='Actual Data Distribution Line')
77
 
78
  # Overlay the top 3 best fit distributions
@@ -110,7 +110,8 @@ def check_distribution(target_column):
110
  p_value_text = "<0.001" if normal_p_value < 0.001 else f"{normal_p_value:.5f}"
111
 
112
  plt.figure(figsize=(12, 8), dpi=400)
113
- sns.histplot(data, kde=False, stat="density", bins=50, color=actual_data_color, label='Actual Data Distribution')
 
114
  sns.kdeplot(data, color=actual_data_color, lw=2, label='Actual Data Distribution Line')
115
  plt.plot(normal_best_fit_data, normal_pdf, color=normal_color, lw=2, label=f'Normal Fit (p-value={p_value_text})')
116
  plt.title("Comparison with Normal Distribution")
@@ -125,26 +126,24 @@ def check_distribution(target_column):
125
 
126
  return result_text, best_fit_plot, normal_comparison_plot
127
 
128
-
129
  # Function to load the CSV file and extract numeric column names
130
  def load_file(file):
131
  df = pd.read_csv(file.name)
132
  numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist()
133
  return gr.update(choices=numeric_columns), df
134
 
135
-
136
  # Function to analyze the selected column
137
- def analyze_column(selected_column, df):
138
- result_text, best_fit_plot, normal_comparison_plot = check_distribution(df[selected_column])
139
  return result_text, best_fit_plot, normal_comparison_plot
140
 
141
-
142
  # Define the Gradio app layout
143
  with gr.Blocks() as demo:
144
  gr.Markdown("# Data Distribution Fit\n")
145
 
146
  file_input = gr.File(label="Upload CSV File")
147
  column_selector = gr.Dropdown(label="Select Target Column", choices=[])
 
148
  analyze_button = gr.Button("Fit")
149
  output_text = gr.Textbox(label="Results")
150
  best_fit_plot_output = gr.Image(label="Best Fit Distributions")
@@ -157,7 +156,7 @@ with gr.Blocks() as demo:
157
  file_input.upload(load_file, inputs=file_input, outputs=[column_selector, df_state])
158
 
159
  # Perform analysis on the selected column
160
- analyze_button.click(analyze_column, inputs=[column_selector, df_state],
161
  outputs=[output_text, best_fit_plot_output, normal_comparison_output])
162
 
163
- demo.launch()
 
12
  # Suppress specific runtime warnings
13
  warnings.filterwarnings("ignore", category=RuntimeWarning)
14
 
 
15
  # Function to check distribution type
16
+ def check_distribution(target_column, show_histogram):
17
  data = target_column.dropna()
18
 
19
  # Distribution dictionaries
 
68
  # Create a single plot for all distributions
69
  plt.figure(figsize=(12, 8), dpi=400)
70
 
71
+ # Plot the original data distribution as a histogram if show_histogram is True
72
+ if show_histogram:
73
+ sns.histplot(data, kde=False, stat="density", bins=50, color=actual_data_color, label='Actual Data Distribution')
74
 
75
+ # Always plot the KDE line
76
  sns.kdeplot(data, color=actual_data_color, lw=2, label='Actual Data Distribution Line')
77
 
78
  # Overlay the top 3 best fit distributions
 
110
  p_value_text = "<0.001" if normal_p_value < 0.001 else f"{normal_p_value:.5f}"
111
 
112
  plt.figure(figsize=(12, 8), dpi=400)
113
+ if show_histogram:
114
+ sns.histplot(data, kde=False, stat="density", bins=50, color=actual_data_color, label='Actual Data Distribution')
115
  sns.kdeplot(data, color=actual_data_color, lw=2, label='Actual Data Distribution Line')
116
  plt.plot(normal_best_fit_data, normal_pdf, color=normal_color, lw=2, label=f'Normal Fit (p-value={p_value_text})')
117
  plt.title("Comparison with Normal Distribution")
 
126
 
127
  return result_text, best_fit_plot, normal_comparison_plot
128
 
 
129
  # Function to load the CSV file and extract numeric column names
130
  def load_file(file):
131
  df = pd.read_csv(file.name)
132
  numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist()
133
  return gr.update(choices=numeric_columns), df
134
 
 
135
  # Function to analyze the selected column
136
+ def analyze_column(selected_column, df, show_histogram):
137
+ result_text, best_fit_plot, normal_comparison_plot = check_distribution(df[selected_column], show_histogram)
138
  return result_text, best_fit_plot, normal_comparison_plot
139
 
 
140
  # Define the Gradio app layout
141
  with gr.Blocks() as demo:
142
  gr.Markdown("# Data Distribution Fit\n")
143
 
144
  file_input = gr.File(label="Upload CSV File")
145
  column_selector = gr.Dropdown(label="Select Target Column", choices=[])
146
+ show_histogram = gr.Checkbox(label="Show Histogram", value=True)
147
  analyze_button = gr.Button("Fit")
148
  output_text = gr.Textbox(label="Results")
149
  best_fit_plot_output = gr.Image(label="Best Fit Distributions")
 
156
  file_input.upload(load_file, inputs=file_input, outputs=[column_selector, df_state])
157
 
158
  # Perform analysis on the selected column
159
+ analyze_button.click(analyze_column, inputs=[column_selector, df_state, show_histogram],
160
  outputs=[output_text, best_fit_plot_output, normal_comparison_output])
161
 
162
+ demo.launch()