Spaces:
Runtime error
Runtime error
| from fastapi import FastAPI | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from llama_cpp import Llama | |
| # Create an instance of the FastAPI class | |
| app = FastAPI() | |
| # Define a route for the root endpoint | |
| async def read_root(): | |
| llm = Llama.from_pretrained( | |
| repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF", | |
| filename="*q8_0.gguf", | |
| verbose=False) | |
| output = llm( | |
| "Q: Name the planets in the solar system? A: ", # Prompt | |
| max_tokens=32, # Generate up to 32 tokens, set to None to generate up to the end of the context window | |
| stop=["Q:", "\n"], # Stop generating just before the model would generate a new question | |
| echo=True # Echo the prompt back in the output) | |
| return {"message": output} | |