Spaces:

nbroad
/

HF-API-monitor

Runtime error

App Files Files Community

HF-API-monitor / app.py

nbroad

plot failures separately

8bb57b2 verified about 1 year ago

raw

history blame

3.2 kB

	import os
	import json
	from datetime import datetime
	from typing import List, Dict

	import requests
	from fastapi import FastAPI, HTTPException
	from fastapi.staticfiles import StaticFiles
	from fastapi.responses import FileResponse
	from pydantic import BaseModel
	import plotly.graph_objs as go
	from apscheduler.schedulers.asyncio import AsyncIOScheduler

	from huggingface_hub import AsyncInferenceClient

	app = FastAPI()

	# Configuration
	models = [
	"meta-llama/Meta-Llama-3.1-8B-Instruct",
	"meta-llama/Meta-Llama-3.1-70B-Instruct",
	"meta-llama/Meta-Llama-3-8B-Instruct",
	"meta-llama/Meta-Llama-3-70B-Instruct",
	"meta-llama/Llama-Guard-3-8B",
	"meta-llama/Llama-2-7b-chat-hf",
	"meta-llama/Llama-2-13b-chat-hf",
	"deepseek-ai/DeepSeek-Coder-V2-Instruct",
	"mistralai/Mistral-7B-Instruct-v0.3",
	"mistralai/Mixtral-8x7B-Instruct-v0.1",
	]
	LOG_FILE = "/data/api_logs.json"


	client = AsyncInferenceClient(token=os.environ["HF_INFERENCE_API_TOKEN"])

	# Ensure log file exists
	if not os.path.exists(LOG_FILE):
	with open(LOG_FILE, "w") as f:
	json.dump([], f)

	class LogEntry(BaseModel):
	model: str
	success: bool
	timestamp: str
	failure_message: str

	async def check_apis():
	results = []
	for model in models:
	try:
	response = await client.chat_completion(
	messages=[{"role": "user", "content": "What is the capital of France?"}],
	max_tokens=10,
	)
	success = True
	e = 'success'
	except Exception as e:
	print(e)
	success = False

	results.append(LogEntry(
	model=model,
	success=success,
	timestamp=datetime.now().isoformat(),
	failure_message=str(e)
	))

	with open(LOG_FILE, "r+") as f:
	logs = json.load(f)
	logs.extend([result.dict() for result in results])
	f.seek(0)
	json.dump(logs, f)

	@app.on_event("startup")
	async def start_scheduler():
	scheduler = AsyncIOScheduler()
	scheduler.add_job(check_apis, 'interval', minutes=10)
	scheduler.start()

	@app.get("/")
	async def index():
	return FileResponse("static/index.html")

	@app.get("/api/logs", response_model=List[LogEntry])
	async def get_logs():
	with open(LOG_FILE, "r") as f:
	logs = json.load(f)
	return logs

	@app.get("/api/chart-data", response_model=Dict[str, Dict[str, Dict[str, List]]])
	async def get_chart_data():
	with open(LOG_FILE, "r") as f:
	logs = json.load(f)

	chart_data = {}
	for log in logs:
	model = log['model']
	if model not in chart_data:
	chart_data[model] = {
	'success': {'x': [], 'y': []},
	'failure': {'x': [], 'y': []}
	}

	status = 'success' if log['success'] else 'failure'
	chart_data[model][status]['x'].append(log['timestamp'])
	chart_data[model][status]['y'].append(1)

	return chart_data

	# Mount the static files directory
	app.mount("/static", StaticFiles(directory="static"), name="static")

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)