mamba

Paused

reach-vb commited on Dec 4, 2023

Commit

77ac825

1 Parent(s): ef0b2de

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,13 +10,14 @@ from mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel
 device = "cuda"
 tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
-model = MambaLMHeadModel.from_pretrained("state-spaces/mamba-130m", device=device, dtype=torch.float16)
 def pred(text_in):
     tokens = tokenizer(text_in, return_tensors="pt")
     input_ids = tokens.input_ids.to(device=device)
     attn_mask = tokens.attention_mask.to(device=device)
-    max_length = input_ids.shape[1] + 100
     fn = lambda: model.generate(
         input_ids=input_ids,
         max_length=max_length,
@@ -24,13 +25,13 @@ def pred(text_in):
         return_dict_in_generate=True,
         output_scores=True,
         enable_timing=False,
-        temperature=1.0,
         top_k=1,
-        top_p=1.0,
     )
     out = fn()
     text_out = tokenizer.batch_decode(out.sequences.tolist())
-    return text_out
 demo = gr.Interface(fn=pred, inputs="text", outputs="text")

 device = "cuda"
 tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
+model = MambaLMHeadModel.from_pretrained("state-spaces/mamba-2.8b", device=device, dtype=torch.float16)
+genlen = 200
 def pred(text_in):
     tokens = tokenizer(text_in, return_tensors="pt")
     input_ids = tokens.input_ids.to(device=device)
     attn_mask = tokens.attention_mask.to(device=device)
+    max_length = input_ids.shape[1] + genlen
     fn = lambda: model.generate(
         input_ids=input_ids,
         max_length=max_length,
         return_dict_in_generate=True,
         output_scores=True,
         enable_timing=False,
+        temperature=0.5,
         top_k=1,
+        top_p=0.9,
     )
     out = fn()
     text_out = tokenizer.batch_decode(out.sequences.tolist())
+    return text_out[0]
 demo = gr.Interface(fn=pred, inputs="text", outputs="text")