Spaces:

reab5555
/

EDA-Data-Analysis-Tool

Sleeping

App Files Files Community

reab5555 commited on Aug 24, 2024

Commit

083effd

verified ·

1 Parent(s): fb63a56

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -96

app.py CHANGED Viewed

@@ -38,96 +38,133 @@ def create_plots(df, feature_columns, target_column):
             # Create scatter plot
             plt.figure(figsize=(12, 10))
-            if is_numeric_target:
-                scatter_plot = sns.pairplot(df[features], kind='scatter',
-                                            plot_kws={'alpha': 0.6}, corner=True)
-                norm = plt.Normalize(df[target_column].min(), df[target_column].max())
                 for ax in scatter_plot.axes.flatten():
-                    if ax.get_xlabel() != ax.get_ylabel():
-                        scatter = ax.collections[0]
-                        scatter.set_cmap('viridis')
-                        scatter.set_norm(norm)
-                        scatter.set_array(df[target_column])
-                        plt.colorbar(scatter, ax=ax, label=target_column)
-            else:
-                scatter_plot = sns.pairplot(df[features], hue=target_column, kind='scatter', corner=True)
-            scatter_plot.fig.suptitle(f'Scatter Plots - Group {group}', y=1.02, fontsize=16)
-            # Adjust label size and spacing
-            for ax in scatter_plot.axes.flatten():
-                ax.tick_params(labelsize=10)
-                ax.set_xlabel(ax.get_xlabel(), fontsize=12)
-                ax.set_ylabel(ax.get_ylabel(), fontsize=12)
-            plt.tight_layout()
-            buf = io.BytesIO()
-            plt.savefig(buf, format='png', dpi=300)
-            buf.seek(0)
-            plots.append(buf)
-            plt.close()
             # Create histogram plot
             plt.figure(figsize=(12, 10))
-            if is_numeric_target:
-                hist_plot = sns.pairplot(df[features], kind='hist',
-                                         plot_kws={'alpha': 0.6}, corner=True)
                 for ax in hist_plot.axes.flatten():
-                    if ax.get_xlabel() == ax.get_ylabel():
-                        ax.clear()
-                        sns.histplot(df[ax.get_xlabel()], ax=ax, kde=True)
-                    else:
-                        scatter = ax.collections[0]
-                        scatter.set_cmap('viridis')
-                        scatter.set_norm(norm)
-                        scatter.set_array(df[target_column])
-                        plt.colorbar(scatter, ax=ax, label=target_column)
-            else:
-                hist_plot = sns.pairplot(df[features], kind='hist', hue=target_column, corner=True)
-            hist_plot.fig.suptitle(f'Histogram Plots - Group {group}', y=1.02, fontsize=16)
-            # Adjust label size and spacing
-            for ax in hist_plot.axes.flatten():
-                ax.tick_params(labelsize=10)
-                ax.set_xlabel(ax.get_xlabel(), fontsize=12)
-                ax.set_ylabel(ax.get_ylabel(), fontsize=12)
-            plt.tight_layout()
-            buf = io.BytesIO()
-            plt.savefig(buf, format='png', dpi=300)
-            buf.seek(0)
-            plots.append(buf)
-            plt.close()
             # Create regression plot
             n_features = len(features) - 1  # Exclude target column
             fig, axes = plt.subplots(n_features, n_features, figsize=(16, 14))
             fig.suptitle(f'Regression Plots - Group {group}', y=1.02, fontsize=16)
-            for i, feature1 in enumerate(features[:-1]):
-                for j, feature2 in enumerate(features[:-1]):
-                    if n_features == 1:
-                        ax = axes
-                    else:
-                        ax = axes[i, j]
-                    if i != j:
-                        if is_numeric_target:
-                            scatter = ax.scatter(df[feature1], df[feature2], c=df[target_column],
-                                                 cmap='viridis', alpha=0.6)
-                            plt.colorbar(scatter, ax=ax, label=target_column)
                         else:
-                            sns.regplot(x=feature1, y=feature2, data=df, ax=ax,
-                                        scatter_kws={'alpha': 0.6}, line_kws={'color': 'red'})
-                    else:
-                        sns.histplot(df[feature1], ax=ax, kde=True)
-                    ax.set_xlabel(feature1, fontsize=10)
-                    ax.set_ylabel(feature2, fontsize=10)
-                    ax.tick_params(labelsize=8)
-                    ax.set_title(f'{feature1} vs {feature2}', fontsize=12)
             plt.tight_layout()
@@ -135,25 +172,10 @@ def create_plots(df, feature_columns, target_column):
             plt.savefig(buf, format='png', dpi=300)
             buf.seek(0)
             plots.append(buf)
             plt.close()
-        # Calculate Pearson correlation values
-        correlation_matrix = df[feature_columns + [target_column]].corr()
-        # Create a heatmap of Pearson correlation values
-        plt.figure(figsize=(12, 10))
-        heatmap = sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm', square=True, cbar_kws={'shrink': .8})
-        heatmap.set_title('Pearson Correlation Heatmap', fontsize=16)
-        plt.xticks(rotation=45, ha='right', fontsize=10)
-        plt.yticks(fontsize=10)
-        plt.tight_layout()
-        buf = io.BytesIO()
-        plt.savefig(buf, format='png', dpi=300)
-        buf.seek(0)
-        plots.append(buf)
-        plt.close()
     except Exception as e:
         print(f"Error in create_plots: {str(e)}")

             # Create scatter plot
             plt.figure(figsize=(12, 10))
+            try:
+                if is_numeric_target:
+                    scatter_plot = sns.pairplot(df[features], kind='scatter',
+                                                plot_kws={'alpha': 0.6}, corner=True)
+                    norm = plt.Normalize(df[target_column].min(), df[target_column].max())
+                    for ax in scatter_plot.axes.flatten():
+                        if ax.get_xlabel() != ax.get_ylabel() and ax.get_xlabel() is not None:
+                            if len(ax.collections) > 0:
+                                scatter = ax.collections[0]
+                                scatter.set_cmap('viridis')
+                                scatter.set_norm(norm)
+                                scatter.set_array(df[target_column])
+                                plt.colorbar(scatter, ax=ax, label=target_column)
+                else:
+                    scatter_plot = sns.pairplot(df[features], hue=target_column, kind='scatter', corner=True)
+                scatter_plot.fig.suptitle(f'Scatter Plots - Group {group}', y=1.02, fontsize=16)
+                # Adjust label size and spacing
                 for ax in scatter_plot.axes.flatten():
+                    ax.tick_params(labelsize=10)
+                    if ax.get_xlabel():
+                        ax.set_xlabel(ax.get_xlabel(), fontsize=12)
+                    if ax.get_ylabel():
+                        ax.set_ylabel(ax.get_ylabel(), fontsize=12)
+                plt.tight_layout()
+                buf = io.BytesIO()
+                plt.savefig(buf, format='png', dpi=300)
+                buf.seek(0)
+                plots.append(buf)
+            except Exception as e:
+                print(f"Error in scatter plot for group {group}: {str(e)}")
+            finally:
+                plt.close()
             # Create histogram plot
             plt.figure(figsize=(12, 10))
+            try:
+                if is_numeric_target:
+                    hist_plot = sns.pairplot(df[features], kind='hist',
+                                             plot_kws={'alpha': 0.6}, corner=True)
+                    for ax in hist_plot.axes.flatten():
+                        if ax.get_xlabel() == ax.get_ylabel() and ax.get_xlabel() is not None:
+                            ax.clear()
+                            sns.histplot(df[ax.get_xlabel()], ax=ax, kde=True)
+                        elif ax.get_xlabel() is not None and ax.get_ylabel() is not None:
+                            if len(ax.collections) > 0:
+                                scatter = ax.collections[0]
+                                scatter.set_cmap('viridis')
+                                scatter.set_norm(norm)
+                                scatter.set_array(df[target_column])
+                                plt.colorbar(scatter, ax=ax, label=target_column)
+                else:
+                    hist_plot = sns.pairplot(df[features], kind='hist', hue=target_column, corner=True)
+                hist_plot.fig.suptitle(f'Histogram Plots - Group {group}', y=1.02, fontsize=16)
+                # Adjust label size and spacing
                 for ax in hist_plot.axes.flatten():
+                    ax.tick_params(labelsize=10)
+                    if ax.get_xlabel():
+                        ax.set_xlabel(ax.get_xlabel(), fontsize=12)
+                    if ax.get_ylabel():
+                        ax.set_ylabel(ax.get_ylabel(), fontsize=12)
+                plt.tight_layout()
+                buf = io.BytesIO()
+                plt.savefig(buf, format='png', dpi=300)
+                buf.seek(0)
+                plots.append(buf)
+            except Exception as e:
+                print(f"Error in histogram plot for group {group}: {str(e)}")
+            finally:
+                plt.close()
             # Create regression plot
             n_features = len(features) - 1  # Exclude target column
             fig, axes = plt.subplots(n_features, n_features, figsize=(16, 14))
             fig.suptitle(f'Regression Plots - Group {group}', y=1.02, fontsize=16)
+            try:
+                for i, feature1 in enumerate(features[:-1]):
+                    for j, feature2 in enumerate(features[:-1]):
+                        if n_features == 1:
+                            ax = axes
+                        else:
+                            ax = axes[i, j]
+                        if i != j:
+                            if is_numeric_target:
+                                scatter = ax.scatter(df[feature1], df[feature2], c=df[target_column],
+                                                     cmap='viridis', alpha=0.6)
+                                plt.colorbar(scatter, ax=ax, label=target_column)
+                            else:
+                                sns.regplot(x=feature1, y=feature2, data=df, ax=ax,
+                                            scatter_kws={'alpha': 0.6}, line_kws={'color': 'red'})
                         else:
+                            sns.histplot(df[feature1], ax=ax, kde=True)
+                        ax.set_xlabel(feature1, fontsize=10)
+                        ax.set_ylabel(feature2, fontsize=10)
+                        ax.tick_params(labelsize=8)
+                        ax.set_title(f'{feature1} vs {feature2}', fontsize=12)
+                plt.tight_layout()
+                buf = io.BytesIO()
+                plt.savefig(buf, format='png', dpi=300)
+                buf.seek(0)
+                plots.append(buf)
+            except Exception as e:
+                print(f"Error in regression plot for group {group}: {str(e)}")
+            finally:
+                plt.close()
+        # Calculate Pearson correlation values
+        correlation_matrix = df[feature_columns + [target_column]].corr()
+        # Create a heatmap of Pearson correlation values
+        plt.figure(figsize=(12, 10))
+        try:
+            heatmap = sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm', square=True, cbar_kws={'shrink': .8})
+            heatmap.set_title('Pearson Correlation Heatmap', fontsize=16)
+            plt.xticks(rotation=45, ha='right', fontsize=10)
+            plt.yticks(fontsize=10)
             plt.tight_layout()
             plt.savefig(buf, format='png', dpi=300)
             buf.seek(0)
             plots.append(buf)
+        except Exception as e:
+            print(f"Error in correlation heatmap: {str(e)}")
+        finally:
             plt.close()
     except Exception as e:
         print(f"Error in create_plots: {str(e)}")