File size: 953 Bytes
0314811
37527e9
0314811
97f6c69
 
 
 
 
37527e9
f726f33
2aebf81
daf036c
3d2a79c
daf036c
2aebf81
 
daf036c
 
97f6c69
 
bcd9770
 
 
 
97f6c69
0314811
 
 
97f6c69
 
 
 
 
 
0314811
84927e5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from fastapi import FastAPI, HTTPException
from transformers import AutoModelForCausalLM, AutoTokenizer
import os
import copy
import time
import llama_cpp
from llama_cpp import Llama
from huggingface_hub import hf_hub_download  

app = FastAPI()
"""
model_path = hf_hub_download(
        repo_id="TheBloke/Mistral-7B-v0.1-GGUF",
        filename="mistral-7b-v0.1.Q4_K_M.gguf")
"""
model_path = "/code/cache/hub/models--TheBloke--Mistral-7B-v0.1-GGUF/snapshots/d4ae605152c8de0d6570cf624c083fa57dd0d551/mistral-7b-v0.1.Q4_K_M.gguf"
llm = Llama(
    model_path=model_path,
    n_ctx=2048,
) 
print("kaki")
print(model_path)
print("kaki")


@app.get("/")
async def generate_text():
    try:
        output = llm(
          "Q: Name the planets in the solar system? A: ", 
          max_tokens=32, 
          stop=["Q:", "\n"], 
          echo=True)
        return output
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))