Spaces:

abaryan
/

BioXP-0.5b-v2

Sleeping

App Files Files Community

Abaryan commited on Jun 1

Commit

dc3747b

verified ·

1 Parent(s): 30ca71a

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -255

app.py CHANGED Viewed

@@ -1,263 +1,53 @@
-from fastapi import FastAPI, HTTPException
-from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
-import os
-from datasets import load_dataset
-import random
-from typing import Optional, List, Tuple, Union
-import gradio as gr
-from contextlib import asynccontextmanager
-# Global variables
-model = None
-tokenizer = None
-dataset = None
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    # Startup: Load the model
-    global model, tokenizer, dataset
-    try:
-        # Load your fine-tuned model and tokenizer
-        model_name = os.getenv("MODEL_NAME", "rgb2gbr/BioXP-0.5B-MedMCQA")
-        model = AutoModelForCausalLM.from_pretrained(model_name)
-        tokenizer = AutoTokenizer.from_pretrained(model_name)
-        # Load MedMCQA dataset
-        dataset = load_dataset("openlifescienceai/medmcqa")
-        # Move model to GPU if available
-        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        model = model.to(device)
-        model.eval()
-    except Exception as e:
-        print(f"Error loading model: {str(e)}")
-        raise e
-    yield  # This is where FastAPI serves the application
-    # Shutdown: Clean up resources if needed
-    if model is not None:
-        del model
-    if tokenizer is not None:
-        del tokenizer
-    if dataset is not None:
-        del dataset
-    torch.cuda.empty_cache()
-app = FastAPI(lifespan=lifespan)
-# Add CORS middleware for Gradio
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# Define input models
-class QuestionRequest(BaseModel):
-    question: str
-    options: list[str]  # List of 4 options
-class DatasetQuestion(BaseModel):
-    question: str
-    opa: str
-    opb: str
-    opc: str
-    opd: str
-    cop: Optional[int] = None  # Correct option (0-3)
-    exp: Optional[str] = None  # Explanation if available
-def format_prompt(question: str, options: List[str]) -> str:
-    """Format the prompt for the model"""
-    prompt = f"Question: {question}\n\nOptions:\n"
-    for i, opt in enumerate(options):
-        prompt += f"{chr(65+i)}. {opt}\n"
-    prompt += "\nAnswer:"
-    return prompt
-def get_question(index: Optional[int] = None, random_question: bool = False, format: str = "api") -> Union[DatasetQuestion, Tuple[str, str, str, str, str]]:
-    """
-    Get a question from the dataset.
-    Args:
-        index: Optional question index
-        random_question: Whether to get a random question
-        format: 'api' for DatasetQuestion object, 'gradio' for tuple
-    """
-    if dataset is None:
-        raise Exception("Dataset not loaded")
-    if random_question:
-        index = random.randint(0, len(dataset['train']) - 1)
-    elif index is None:
-        raise ValueError("Either index or random_question must be provided")
-    question_data = dataset['train'][index]
-    if format == "gradio":
-        return (
-            question_data['question'],
-            question_data['opa'],
-            question_data['opb'],
-            question_data['opc'],
-            question_data['opd']
-        )
-    return DatasetQuestion(
-        question=question_data['question'],
-        opa=question_data['opa'],
-        opb=question_data['opb'],
-        opc=question_data['opc'],
-        opd=question_data['opd'],
-        cop=question_data['cop'] if 'cop' in question_data else None,
-        exp=question_data['exp'] if 'exp' in question_data else None
-    )
-def predict_gradio(question: str, option_a: str, option_b: str, option_c: str, option_d: str):
-    """Gradio interface prediction function"""
-    try:
-        options = [option_a, option_b, option_c, option_d]
-        # Format the prompt
-        prompt = format_prompt(question, options)
-        # Tokenize the input
-        inputs = tokenizer(
-            prompt,
-            return_tensors="pt",
-            padding=True,
-            truncation=True,
-            max_length=512
         )
-        device = next(model.parameters()).device
-        inputs = {k: v.to(device) for k, v in inputs.items()}
-        # Generate prediction
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=10,
-                num_return_sequences=1,
-                temperature=0.7,
-                do_sample=False,
-                pad_token_id=tokenizer.eos_token_id
-            )
-        # Decode the output
-        prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extract the answer from the prediction
-        answer = prediction.split("Answer:")[-1].strip()
-        # Format the output for Gradio
-        result = f"Model Output:\n{prediction}\n\n"
-        result += f"Extracted Answer: {answer}"
-        return result
-    except Exception as e:
-        return f"Error: {str(e)}"
 # Create Gradio interface
-with gr.Blocks(title="Medical MCQ Predictor") as demo:
-    gr.Markdown("# Medical MCQ Predictor")
-    gr.Markdown("Enter a medical question and its options, or get a random question from MedMCQA dataset.")
-    with gr.Row():
-        with gr.Column():
-            question = gr.Textbox(label="Question", lines=3)
-            option_a = gr.Textbox(label="Option A")
-            option_b = gr.Textbox(label="Option B")
-            option_c = gr.Textbox(label="Option C")
-            option_d = gr.Textbox(label="Option D")
-            with gr.Row():
-                predict_btn = gr.Button("Predict")
-                random_btn = gr.Button("Get Random Question")
-            output = gr.Textbox(label="Prediction", lines=5)
-    predict_btn.click(
-        fn=predict_gradio,
-        inputs=[question, option_a, option_b, option_c, option_d],
-        outputs=output
-    )
-    random_btn.click(
-        fn=lambda: get_question(random_question=True, format="gradio"),
-        inputs=[],
-        outputs=[question, option_a, option_b, option_c, option_d]
-    )
-# Mount Gradio app to FastAPI
-app = gr.mount_gradio_app(app, demo, path="/")
-@app.get("/dataset/question")
-async def get_dataset_question(index: Optional[int] = None, random_question: bool = False):
-    """Get a question from the MedMCQA dataset"""
-    try:
-        return get_question(index=index, random_question=random_question)
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.post("/predict")
-async def predict(request: QuestionRequest):
-    if len(request.options) != 4:
-        raise HTTPException(status_code=400, detail="Exactly 4 options are required")
-    try:
-        # Format the prompt
-        prompt = format_prompt(request.question, request.options)
-        # Tokenize the input
-        inputs = tokenizer(
-            prompt,
-            return_tensors="pt",
-            padding=True,
-            truncation=True,
-            max_length=512
-        )
-        device = next(model.parameters()).device
-        inputs = {k: v.to(device) for k, v in inputs.items()}
-        # Generate prediction
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=10,
-                num_return_sequences=1,
-                temperature=0.7,
-                do_sample=False,
-                pad_token_id=tokenizer.eos_token_id
-            )
-        # Decode the output
-        prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extract the answer from the prediction
-        answer = prediction.split("Answer:")[-1].strip()
-        response = {
-            "model_output": prediction,
-            "extracted_answer": answer,
-            "full_response": prediction
-        }
-        return response
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.get("/health")
-async def health_check():
-    return {
-        "status": "healthy",
-        "model_loaded": model is not None,
-        "dataset_loaded": dataset is not None
-    }

+import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+# Load model and tokenizer
+model_name = "rgb2gbr/GRPO_BioMedmcqa_Qwen2.5-0.5B"
+model = AutoModelForCausalLM.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Move model to GPU if available
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = model.to(device)
+model.eval()
+def predict(question: str, option_a: str, option_b: str, option_c: str, option_d: str):
+    # Format the prompt
+    prompt = f"Question: {question}\n\nOptions:\nA. {option_a}\nB. {option_b}\nC. {option_c}\nD. {option_d}\n\nAnswer:"
+    # Tokenize and generate
+    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512)
+    inputs = {k: v.to(device) for k, v in inputs.items()}
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=10,
+            temperature=0.7,
+            do_sample=False,
+            pad_token_id=tokenizer.eos_token_id
         )
+    # Get prediction
+    prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return prediction
 # Create Gradio interface
+demo = gr.Interface(
+    fn=predict,
+    inputs=[
+        gr.Textbox(label="Question", lines=3),
+        gr.Textbox(label="Option A"),
+        gr.Textbox(label="Option B"),
+        gr.Textbox(label="Option C"),
+        gr.Textbox(label="Option D")
+    ],
+    outputs=gr.Textbox(label="Model's Answer", lines=5),
+    title="Medical MCQ Predictor",
+    description="Enter a medical question and its options to get the model's prediction."
+)
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()