Spaces:
Sleeping
Sleeping
| from threading import Thread | |
| import os | |
| from typing import Iterator | |
| import gradio as gr | |
| import torch | |
| from gradio.components import textbox | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| from transformers import AutoModelForCausalLM, TextIteratorStreamer, LlamaTokenizer | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| llm = Llama.from_pretrained( | |
| repo_id="igor-im/flux_prompt_expander", | |
| filename="unsloth.Q8_0.gguf" | |
| # n_gpu_layers=-1, # Uncomment to use GPU acceleration | |
| # seed=1337, # Uncomment to set a specific seed | |
| # n_ctx=2048, # Uncomment to increase the context window | |
| ) | |
| def response(prompt): | |
| llm_response = llm( | |
| prompt, # Prompt | |
| max_tokens=200, # Generate up to 32 tokens, set to None to generate up to the end of the context window | |
| echo=True # Echo the prompt back in the output | |
| ) | |
| return llm_response.get('choices')[0].get('text') | |
| interface = gr.Interface(fn=response, inputs='textbox', outputs='textbox') | |
| interface.launch() |