Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -70,7 +70,7 @@ def check_distribution(target_column):
|
|
| 70 |
plt.figure(figsize=(12, 8), dpi=400)
|
| 71 |
|
| 72 |
# Plot the original data distribution as a histogram
|
| 73 |
-
sns.histplot(data, kde=False, stat="density", bins=
|
| 74 |
|
| 75 |
# Overlay the actual data KDE line
|
| 76 |
sns.kdeplot(data, color=actual_data_color, lw=2, label='Actual Data Distribution Line')
|
|
@@ -79,7 +79,7 @@ def check_distribution(target_column):
|
|
| 79 |
for i, (name, p_value, params) in enumerate(top_3_results):
|
| 80 |
best_fit_data = np.linspace(min(data), max(data), 1000)
|
| 81 |
pdf = distributions[name].pdf(best_fit_data, *params)
|
| 82 |
-
p_value_text = "<0.
|
| 83 |
plt.plot(best_fit_data, pdf, color=distribution_colors[i], lw=2, label=f'{name} Fit (p-value={p_value_text})')
|
| 84 |
|
| 85 |
plt.title("Top 3 Best Fit Distributions Overlaid")
|
|
@@ -95,7 +95,7 @@ def check_distribution(target_column):
|
|
| 95 |
# Prepare result text with top 3 distributions and p-values
|
| 96 |
result_text = "Top 3 best matched distributions:\n"
|
| 97 |
for i, (name, p_value, _) in enumerate(top_3_results):
|
| 98 |
-
p_value_text = "<0.
|
| 99 |
result_text += f"Top {i + 1}: {name} with a p-value of {p_value_text}\n"
|
| 100 |
|
| 101 |
# Add disclaimer about p-value significance
|
|
@@ -107,10 +107,10 @@ def check_distribution(target_column):
|
|
| 107 |
normal_pdf = norm.pdf(normal_best_fit_data, mean, std)
|
| 108 |
ks_stat, normal_p_value = kstest(data, 'norm', args=(mean, std))
|
| 109 |
|
| 110 |
-
p_value_text = "<0.
|
| 111 |
|
| 112 |
plt.figure(figsize=(12, 8), dpi=400)
|
| 113 |
-
sns.histplot(data, kde=False, stat="density", bins=
|
| 114 |
sns.kdeplot(data, color=actual_data_color, lw=2, label='Actual Data Distribution Line')
|
| 115 |
plt.plot(normal_best_fit_data, normal_pdf, color=normal_color, lw=2, label=f'Normal Fit (p-value={p_value_text})')
|
| 116 |
plt.title("Comparison with Normal Distribution")
|
|
|
|
| 70 |
plt.figure(figsize=(12, 8), dpi=400)
|
| 71 |
|
| 72 |
# Plot the original data distribution as a histogram
|
| 73 |
+
sns.histplot(data, kde=False, stat="density", bins=50, color=actual_data_color, label='Actual Data Distribution')
|
| 74 |
|
| 75 |
# Overlay the actual data KDE line
|
| 76 |
sns.kdeplot(data, color=actual_data_color, lw=2, label='Actual Data Distribution Line')
|
|
|
|
| 79 |
for i, (name, p_value, params) in enumerate(top_3_results):
|
| 80 |
best_fit_data = np.linspace(min(data), max(data), 1000)
|
| 81 |
pdf = distributions[name].pdf(best_fit_data, *params)
|
| 82 |
+
p_value_text = "<0.001" if p_value < 0.001 else f"{p_value:.5f}"
|
| 83 |
plt.plot(best_fit_data, pdf, color=distribution_colors[i], lw=2, label=f'{name} Fit (p-value={p_value_text})')
|
| 84 |
|
| 85 |
plt.title("Top 3 Best Fit Distributions Overlaid")
|
|
|
|
| 95 |
# Prepare result text with top 3 distributions and p-values
|
| 96 |
result_text = "Top 3 best matched distributions:\n"
|
| 97 |
for i, (name, p_value, _) in enumerate(top_3_results):
|
| 98 |
+
p_value_text = "<0.001" if p_value < 0.001 else f"{p_value:.5f}"
|
| 99 |
result_text += f"Top {i + 1}: {name} with a p-value of {p_value_text}\n"
|
| 100 |
|
| 101 |
# Add disclaimer about p-value significance
|
|
|
|
| 107 |
normal_pdf = norm.pdf(normal_best_fit_data, mean, std)
|
| 108 |
ks_stat, normal_p_value = kstest(data, 'norm', args=(mean, std))
|
| 109 |
|
| 110 |
+
p_value_text = "<0.001" if normal_p_value < 0.001 else f"{normal_p_value:.5f}"
|
| 111 |
|
| 112 |
plt.figure(figsize=(12, 8), dpi=400)
|
| 113 |
+
sns.histplot(data, kde=False, stat="density", bins=50, color=actual_data_color, label='Actual Data Distribution')
|
| 114 |
sns.kdeplot(data, color=actual_data_color, lw=2, label='Actual Data Distribution Line')
|
| 115 |
plt.plot(normal_best_fit_data, normal_pdf, color=normal_color, lw=2, label=f'Normal Fit (p-value={p_value_text})')
|
| 116 |
plt.title("Comparison with Normal Distribution")
|