feat: implement custom JSON encoder for numpy types and ensure type consistency in inference results
Browse files- app_mcp.py +18 -3
- utils/hf_logger.py +13 -5
app_mcp.py
CHANGED
|
@@ -38,6 +38,13 @@ os.environ['HF_HUB_CACHE'] = './models'
|
|
| 38 |
load_dotenv()
|
| 39 |
# print(os.getenv("HF_HUB_CACHE"))
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
# Ensure using GPU if available
|
| 42 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 43 |
|
|
@@ -221,8 +228,8 @@ def infer(image: Image.Image, model_id: str, confidence_threshold: float = 0.75)
|
|
| 221 |
result = entry.model(img)
|
| 222 |
scores = entry.postprocess(result, entry.class_names)
|
| 223 |
# Flatten output for Dataframe: include metadata and both class scores
|
| 224 |
-
ai_score = scores.get(entry.class_names[0], 0.0)
|
| 225 |
-
real_score = scores.get(entry.class_names[1], 0.0)
|
| 226 |
label = "AI" if ai_score >= confidence_threshold else ("REAL" if real_score >= confidence_threshold else "UNCERTAIN")
|
| 227 |
return {
|
| 228 |
"Model": entry.display_name,
|
|
@@ -462,8 +469,16 @@ def predict_image_with_json(img, confidence_threshold, augment_methods, rotate_d
|
|
| 462 |
|
| 463 |
logger.info(f"Cleaned forensic images types: {[type(img) for img in cleaned_forensics_images]}")
|
| 464 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 465 |
# Return raw model results as JSON string for debug_json component
|
| 466 |
-
json_results = json.dumps(results)
|
| 467 |
|
| 468 |
return img_pil, cleaned_forensics_images, table_rows, json_results, consensus_html
|
| 469 |
|
|
|
|
| 38 |
load_dotenv()
|
| 39 |
# print(os.getenv("HF_HUB_CACHE"))
|
| 40 |
|
| 41 |
+
# Custom JSON Encoder to handle numpy types
|
| 42 |
+
class NumpyEncoder(json.JSONEncoder):
|
| 43 |
+
def default(self, obj):
|
| 44 |
+
if isinstance(obj, np.float32):
|
| 45 |
+
return float(obj)
|
| 46 |
+
return json.JSONEncoder.default(self, obj)
|
| 47 |
+
|
| 48 |
# Ensure using GPU if available
|
| 49 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 50 |
|
|
|
|
| 228 |
result = entry.model(img)
|
| 229 |
scores = entry.postprocess(result, entry.class_names)
|
| 230 |
# Flatten output for Dataframe: include metadata and both class scores
|
| 231 |
+
ai_score = float(scores.get(entry.class_names[0], 0.0))
|
| 232 |
+
real_score = float(scores.get(entry.class_names[1], 0.0))
|
| 233 |
label = "AI" if ai_score >= confidence_threshold else ("REAL" if real_score >= confidence_threshold else "UNCERTAIN")
|
| 234 |
return {
|
| 235 |
"Model": entry.display_name,
|
|
|
|
| 469 |
|
| 470 |
logger.info(f"Cleaned forensic images types: {[type(img) for img in cleaned_forensics_images]}")
|
| 471 |
|
| 472 |
+
# Ensure numerical values in results are standard Python floats before JSON serialization
|
| 473 |
+
for i, res_dict in enumerate(results):
|
| 474 |
+
for key in ["AI Score", "Real Score"]:
|
| 475 |
+
value = res_dict.get(key)
|
| 476 |
+
if isinstance(value, np.float32):
|
| 477 |
+
res_dict[key] = float(value)
|
| 478 |
+
logger.info(f"Converted {key} for result {i} from numpy.float32 to float.")
|
| 479 |
+
|
| 480 |
# Return raw model results as JSON string for debug_json component
|
| 481 |
+
json_results = json.dumps(results, cls=NumpyEncoder)
|
| 482 |
|
| 483 |
return img_pil, cleaned_forensics_images, table_rows, json_results, consensus_html
|
| 484 |
|
utils/hf_logger.py
CHANGED
|
@@ -7,11 +7,19 @@ from PIL import Image
|
|
| 7 |
import logging
|
| 8 |
from datasets import Dataset, load_dataset, Features, Value, Sequence
|
| 9 |
import copy
|
|
|
|
| 10 |
|
| 11 |
logger = logging.getLogger(__name__)
|
| 12 |
|
| 13 |
HF_DATASET_NAME = "aiwithoutborders-xyz/degentic_rd0" # TODO: Replace with your actual HF username and dataset name
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
def _pil_to_base64(image: Image.Image) -> str:
|
| 16 |
"""Converts a PIL Image to a base64 string."""
|
| 17 |
# Explicitly check if the input is a PIL Image
|
|
@@ -91,12 +99,12 @@ def log_inference_data(
|
|
| 91 |
new_entry = {
|
| 92 |
"timestamp": datetime.datetime.now().isoformat(),
|
| 93 |
"image": original_image_b64,
|
| 94 |
-
"inference_request": json.dumps(inference_params),
|
| 95 |
-
"model_predictions": json.dumps(model_predictions),
|
| 96 |
-
"ensemble_output": json.dumps(ensemble_output),
|
| 97 |
"forensic_outputs": forensic_images_b64, # This is already a list of strings
|
| 98 |
-
"agent_monitoring_data": json.dumps(agent_monitoring_data),
|
| 99 |
-
"human_feedback": json.dumps(human_feedback if human_feedback is not None else {})
|
| 100 |
}
|
| 101 |
|
| 102 |
# Get current dataset features
|
|
|
|
| 7 |
import logging
|
| 8 |
from datasets import Dataset, load_dataset, Features, Value, Sequence
|
| 9 |
import copy
|
| 10 |
+
import numpy as np
|
| 11 |
|
| 12 |
logger = logging.getLogger(__name__)
|
| 13 |
|
| 14 |
HF_DATASET_NAME = "aiwithoutborders-xyz/degentic_rd0" # TODO: Replace with your actual HF username and dataset name
|
| 15 |
|
| 16 |
+
# Custom JSON Encoder to handle numpy types (copy from app_mcp.py if it's identical)
|
| 17 |
+
class NumpyEncoder(json.JSONEncoder):
|
| 18 |
+
def default(self, obj):
|
| 19 |
+
if isinstance(obj, np.float32):
|
| 20 |
+
return float(obj)
|
| 21 |
+
return json.JSONEncoder.default(self, obj)
|
| 22 |
+
|
| 23 |
def _pil_to_base64(image: Image.Image) -> str:
|
| 24 |
"""Converts a PIL Image to a base64 string."""
|
| 25 |
# Explicitly check if the input is a PIL Image
|
|
|
|
| 99 |
new_entry = {
|
| 100 |
"timestamp": datetime.datetime.now().isoformat(),
|
| 101 |
"image": original_image_b64,
|
| 102 |
+
"inference_request": json.dumps(inference_params, cls=NumpyEncoder),
|
| 103 |
+
"model_predictions": json.dumps(model_predictions, cls=NumpyEncoder),
|
| 104 |
+
"ensemble_output": json.dumps(ensemble_output, cls=NumpyEncoder),
|
| 105 |
"forensic_outputs": forensic_images_b64, # This is already a list of strings
|
| 106 |
+
"agent_monitoring_data": json.dumps(agent_monitoring_data, cls=NumpyEncoder),
|
| 107 |
+
"human_feedback": json.dumps(human_feedback if human_feedback is not None else {}, cls=NumpyEncoder)
|
| 108 |
}
|
| 109 |
|
| 110 |
# Get current dataset features
|