Spaces:
Runtime error
Runtime error
| from typing import Dict, List, Tuple, Optional | |
| import json | |
| import sys | |
| import glob | |
| from pathlib import Path | |
| from collections import defaultdict | |
| def get_latest_log() -> str: | |
| """Find the most recently modified log file in the current directory. | |
| Returns: | |
| str: Path to the most recently modified log file | |
| Raises: | |
| SystemExit: If no log files are found in current directory | |
| """ | |
| log_pattern = "api_usage_*.json" | |
| logs = list(Path(".").glob(log_pattern)) | |
| if not logs: | |
| print(f"No files matching pattern '{log_pattern}' found in current directory") | |
| sys.exit(1) | |
| return str(max(logs, key=lambda p: p.stat().st_mtime)) | |
| def analyze_log_file(filename: str) -> Tuple[List[Dict], List[Dict], Dict[str, List[str]]]: | |
| """Analyze a log file for entries missing images and errors. | |
| Args: | |
| filename: Path to the log file to analyze | |
| Returns: | |
| Tuple containing: | |
| - List of entries with no images | |
| - List of skipped/error entries | |
| - Dict of processing errors by type | |
| Raises: | |
| SystemExit: If file cannot be found or read | |
| """ | |
| no_images = [] | |
| errors = defaultdict(list) | |
| skipped = [] | |
| try: | |
| with open(filename, "r") as f: | |
| for line_num, line in enumerate(f, 1): | |
| # Skip HTTP request logs | |
| if line.startswith("HTTP Request:") or line.strip() == "": | |
| continue | |
| try: | |
| # Try to parse the JSON line | |
| if not line.strip().startswith("{"): | |
| continue | |
| entry = json.loads(line.strip()) | |
| case_id = entry.get("case_id") | |
| question_id = entry.get("question_id") | |
| # Skip if we can't identify the question | |
| if not case_id or not question_id: | |
| continue | |
| # Check for explicit skip/error status | |
| if entry.get("status") in ["skipped", "error"]: | |
| skipped.append( | |
| { | |
| "case_id": case_id, | |
| "question_id": question_id, | |
| "reason": entry.get("reason"), | |
| "status": entry.get("status"), | |
| } | |
| ) | |
| continue | |
| # Check user content for images | |
| messages = entry.get("input", {}).get("messages", []) | |
| has_image = False | |
| for msg in messages: | |
| content = msg.get("content", []) | |
| if isinstance(content, list): | |
| for item in content: | |
| if isinstance(item, dict) and item.get("type") == "image_url": | |
| has_image = True | |
| break | |
| if not has_image: | |
| no_images.append( | |
| { | |
| "case_id": case_id, | |
| "question_id": question_id, | |
| "question": entry.get("input", {}) | |
| .get("question_data", {}) | |
| .get("question", "")[:100] | |
| + "...", # First 100 chars of question | |
| } | |
| ) | |
| except json.JSONDecodeError: | |
| errors["json_decode"].append(f"Line {line_num}: Invalid JSON") | |
| continue | |
| except Exception as e: | |
| errors["other"].append(f"Line {line_num}: Error processing entry: {str(e)}") | |
| except FileNotFoundError: | |
| print(f"Error: Could not find log file: {filename}") | |
| sys.exit(1) | |
| except Exception as e: | |
| print(f"Error reading file {filename}: {str(e)}") | |
| sys.exit(1) | |
| return no_images, skipped, errors | |
| def print_results( | |
| filename: str, no_images: List[Dict], skipped: List[Dict], errors: Dict[str, List[str]] | |
| ) -> None: | |
| """Print analysis results. | |
| Args: | |
| filename: Name of the analyzed log file | |
| no_images: List of entries with no images | |
| skipped: List of skipped/error entries | |
| errors: Dict of processing errors by type | |
| """ | |
| print(f"\nAnalyzing log file: {filename}") | |
| print("\n=== Questions with No Images ===") | |
| if no_images: | |
| for entry in no_images: | |
| print(f"\nCase ID: {entry['case_id']}") | |
| print(f"Question ID: {entry['question_id']}") | |
| print(f"Question Preview: {entry['question']}") | |
| print(f"\nTotal questions without images: {len(no_images)}") | |
| print("\n=== Skipped/Error Questions ===") | |
| if skipped: | |
| for entry in skipped: | |
| print(f"\nCase ID: {entry['case_id']}") | |
| print(f"Question ID: {entry['question_id']}") | |
| print(f"Status: {entry['status']}") | |
| print(f"Reason: {entry.get('reason', 'unknown')}") | |
| print(f"\nTotal skipped/error questions: {len(skipped)}") | |
| if errors: | |
| print("\n=== Processing Errors ===") | |
| for error_type, messages in errors.items(): | |
| if messages: | |
| print(f"\n{error_type}:") | |
| for msg in messages: | |
| print(f" {msg}") | |
| def main() -> None: | |
| """Main entry point for log validation script.""" | |
| # If a file is specified as an argument, use it; otherwise find the latest log | |
| if len(sys.argv) > 1: | |
| log_file = sys.argv[1] | |
| else: | |
| log_file = get_latest_log() | |
| no_images, skipped, errors = analyze_log_file(log_file) | |
| print_results(log_file, no_images, skipped, errors) | |
| if __name__ == "__main__": | |
| main() | |