PROBE

Running

App Files Files Community

mgyigit commited on Nov 25, 2024

Commit

b71cdae

verified ·

1 Parent(s): 7e80034

Update src/saving_utils.py

Browse files

Files changed (1) hide show

src/saving_utils.py +26 -25

src/saving_utils.py CHANGED Viewed

@@ -1,59 +1,60 @@
-import pandas as pd
 import os
-import sys
-script_dir = os.path.dirname(os.path.abspath(__file__))
-sys.path.append('..')
-sys.path.append('.')
 def save_similarity_output(output_dict, method_name, leaderboard_path="/home/user/app/src/data/leaderboard_results.csv", similarity_path="/home/user/app/src/data/similarity_results.csv"):
     # Load or initialize the DataFrames
-    print(script_dir)
     if os.path.exists(leaderboard_path):
         leaderboard_df = pd.read_csv(leaderboard_path)
     else:
-        print("Leaderboard df has not found!")
         return -1
     if os.path.exists(similarity_path):
         similarity_df = pd.read_csv(similarity_path)
     else:
-        print("Similarity df has not found!")
         return -1
-    # Check if method exists in similarity results
     if method_name not in similarity_df['Method'].values:
-        similarity_df = pd.concat([similarity_df, pd.DataFrame({'Method': [method_name]})], ignore_index=True)
     # Initialize storage for averages
     averages = {}
-    # Iterate through the output_dict and calculate averages if all aspects (MF, CC, BP) are present
     for dataset in ['sparse', '200', '500']:
         correlation_values = []
         pvalue_values = []
-        # Check each aspect within the dataset (MF, BP, CC)
         for aspect in ['MF', 'BP', 'CC']:
             correlation_key = f"{dataset}_{aspect}_correlation"
             pvalue_key = f"{dataset}_{aspect}_pvalue"
-            # Process correlation if present
             if correlation_key in output_dict:
                 correlation = output_dict[correlation_key].item()
                 correlation_values.append(correlation)
-                similarity_df.loc[similarity_df['Method'] == method_name, f"{dataset}_{aspect}_correlation"] = correlation
-                leaderboard_df.loc[leaderboard_df['Method'] == method_name, f"sim_{dataset}_{aspect}_correlation"] = correlation
-            # Process pvalue if present
             if pvalue_key in output_dict:
                 pvalue = output_dict[pvalue_key].item()
                 pvalue_values.append(pvalue)
-                similarity_df.loc[similarity_df['Method'] == method_name, f"{dataset}_{aspect}_pvalue"] = pvalue
-                leaderboard_df.loc[leaderboard_df['Method'] == method_name, f"sim_{dataset}_{aspect}_pvalue"] = pvalue
-        # Calculate averages if all three aspects (MF, BP, CC) are present
         if len(correlation_values) == 3:
             averages[f"{dataset}_Ave_correlation"] = sum(correlation_values) / 3
             similarity_df.loc[similarity_df['Method'] == method_name, f"{dataset}_Ave_correlation"] = averages[f"{dataset}_Ave_correlation"]
@@ -65,8 +66,8 @@ def save_similarity_output(output_dict, method_name, leaderboard_path="/home/use
             leaderboard_df.loc[leaderboard_df['Method'] == method_name, f"sim_{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"]
     # Save the updated DataFrames back to CSV
-    leaderboard_df.to_csv(leaderboard_path, index=False)
     similarity_df.to_csv(similarity_path, index=False)
     return 0

 import os
+import pandas as pd
 def save_similarity_output(output_dict, method_name, leaderboard_path="/home/user/app/src/data/leaderboard_results.csv", similarity_path="/home/user/app/src/data/similarity_results.csv"):
     # Load or initialize the DataFrames
     if os.path.exists(leaderboard_path):
         leaderboard_df = pd.read_csv(leaderboard_path)
     else:
+        print("Leaderboard file not found!")
         return -1
     if os.path.exists(similarity_path):
         similarity_df = pd.read_csv(similarity_path)
     else:
+        print("Similarity file not found!")
         return -1
+    # Ensure the method exists in the similarity DataFrame
     if method_name not in similarity_df['Method'].values:
+        # Create a new row for the method with default values
+        new_row = {col: None for col in similarity_df.columns}
+        new_row['Method'] = method_name
+        similarity_df = pd.concat([similarity_df, pd.DataFrame([new_row])], ignore_index=True)
+    # Same for the leaderboard DataFrame
+    if method_name not in leaderboard_df['Method'].values:
+        new_row = {col: None for col in leaderboard_df.columns}
+        new_row['Method'] = method_name
+        leaderboard_df = pd.concat([leaderboard_df, pd.DataFrame([new_row])], ignore_index=True)
     # Initialize storage for averages
     averages = {}
+    # Iterate through the datasets and calculate averages
     for dataset in ['sparse', '200', '500']:
         correlation_values = []
         pvalue_values = []
         for aspect in ['MF', 'BP', 'CC']:
             correlation_key = f"{dataset}_{aspect}_correlation"
             pvalue_key = f"{dataset}_{aspect}_pvalue"
+            # Update correlation if present
             if correlation_key in output_dict:
                 correlation = output_dict[correlation_key].item()
                 correlation_values.append(correlation)
+                similarity_df.loc[similarity_df['Method'] == method_name, correlation_key] = correlation
+                leaderboard_df.loc[leaderboard_df['Method'] == method_name, f"sim_{correlation_key}"] = correlation
+            # Update p-value if present
             if pvalue_key in output_dict:
                 pvalue = output_dict[pvalue_key].item()
                 pvalue_values.append(pvalue)
+                similarity_df.loc[similarity_df['Method'] == method_name, pvalue_key] = pvalue
+                leaderboard_df.loc[leaderboard_df['Method'] == method_name, f"sim_{pvalue_key}"] = pvalue
+        # Calculate averages if all three aspects are present
         if len(correlation_values) == 3:
             averages[f"{dataset}_Ave_correlation"] = sum(correlation_values) / 3
             similarity_df.loc[similarity_df['Method'] == method_name, f"{dataset}_Ave_correlation"] = averages[f"{dataset}_Ave_correlation"]
             leaderboard_df.loc[leaderboard_df['Method'] == method_name, f"sim_{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"]
     # Save the updated DataFrames back to CSV
     similarity_df.to_csv(similarity_path, index=False)
+    leaderboard_df.to_csv(leaderboard_path, index=False)
     return 0