File size: 2,111 Bytes
efa9075
738953f
 
117ba24
 
efa9075
 
 
b409549
738953f
20622fc
117ba24
b409549
efa9075
b409549
efa9075
20622fc
b409549
efa9075
b409549
 
efa9075
 
 
738953f
 
 
67f9ec4
 
 
738953f
 
17ffb0c
738953f
 
 
 
 
 
 
 
 
 
 
 
 
 
d40212f
738953f
 
 
 
 
 
 
 
 
d40212f
17ffb0c
a000d3e
d40212f
a000d3e
17ffb0c
a000d3e
 
 
 
dd1a4cb
efa9075
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#from huggingface_hub import InferenceClient
import gradio as gr



#client = InferenceClient("""K00B404/BagOMistral_14X_Coders-ties-7B""")
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

model_id = 'pip install accelerate'
gr.load(f"models/{model_id}").launch()
tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(model_id)

text = "TinyPixel/Llama-2-7B-bf16-sharded"
inputs = tokenizer(text, return_tensors="pt")

outputs = model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


"""
def format_prompt(message, history):
  prompt = "<s>"
  for user_prompt, bot_response in history:
    prompt += f"[INST] {user_prompt} [/INST]"
    prompt += f" {bot_response}</s> "
    prompt += f"[INST] {message} [/INST]"
  return prompt

def generate(prompt, history, temperature=0.2, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2
    top_p = float(top_p)

    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )

    formatted_prompt = format_prompt(prompt, history)

    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""

    for response in stream:
        output += response.token.text
        yield output
    return output

    
mychatbot = gr.Chatbot(avatar_images=["./user.png", "./botm.png"], bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True)

demo = gr.ChatInterface(fn=generate, 
                        chatbot=mychatbot,
                        title="K00B404's Merged Models Test Chat",
                        retry_btn=None,
                        undo_btn=None
                       )

demo.queue().launch(show_api=False)
"""