Spaces:
Sleeping
Sleeping
| import logging | |
| from typing import cast | |
| from threading import Lock | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| import torch | |
| from conversation import get_default_conv_template | |
| import gradio as gr | |
| from llama_cpp import Llama | |
| import json | |
| from huggingface_hub import hf_hub_download | |
| model_path = "starling-lm-7b-alpha.Q6_K.gguf" | |
| #mdlpath = hf_hub_download(repo_id="afrideva/MiniChat-3B-GGUF", filename=model_path) | |
| lcpp_model = Llama(model_path=model_path) | |
| global otxt | |
| otxt = "" | |
| def m3b_talk(text): | |
| global otxt | |
| resp = "" | |
| formattedQuery = "GPT4 User: " + text + "<|end_of_text|>GPT4 Assistant:" | |
| r = lcpp_model(formattedQuery, stop=["GPT4 User:", "\n\n\n"], echo=True, stream=True) | |
| rfq = False | |
| for c in r: | |
| otxt += c["choices"][0]["text"] | |
| if formattedQuery in otxt and not rfq: | |
| otxt.replace(formattedQuery, "") | |
| rfq = True | |
| else: | |
| yield otxt | |
| print(resp) | |
| return otxt | |
| #return resp.replace(formattedQuery, "") | |
| def main(): | |
| global otxt | |
| logging.basicConfig(level=logging.INFO) | |
| with gr.Blocks() as demo: | |
| with gr.Row(variant="panel"): | |
| gr.Markdown("## Talk to MiniChat-3B\n\nTalk to MiniChat-3B.") | |
| with gr.Row(variant="panel"): | |
| with gr.Column(variant="panel"): | |
| m3b_talk_input = gr.Textbox(label="Message", placeholder="Type something here...") | |
| with gr.Column(variant="panel"): | |
| m3b_talk_output = gr.Textbox() | |
| m3b_talk_btn = gr.Button("Send") | |
| m3b_talk_btn.click(m3b_talk, inputs=m3b_talk_input, outputs=m3b_talk_output, api_name="talk_m3b") | |
| demo.queue().launch() | |
| if __name__ == "__main__": | |
| main() | |