Spaces:

Pmal
/

numinatest

Runtime error

Pmal commited on Nov 29, 2024

Commit

df48fb9

verified ·

1 Parent(s): f0f445c

update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -22,7 +22,8 @@ def load_model():
         # "AI-MO/NuminaMath-72B-CoT",
         "AI-MO/NuminaMath-7B-CoT",
         device_map="auto",  # Automatically map to available GPU
-        offload_folder="offload"  # Offload unused parts to disk
     )
     return pipeline("text-generation", model=model, tokenizer=tokenizer)
@@ -32,7 +33,7 @@ model_pipeline = load_model()
 # Define the function to process inputs
 def solve_math_question(prompt):
     # Generate output using the model
-    outputs = model_pipeline(prompt, max_new_tokens=1024, do_sample=False)
     return outputs[0]["generated_text"]
 # Define the Gradio interface
@@ -45,7 +46,7 @@ with gr.Blocks() as app:
     with gr.Row():
         question = gr.Textbox(
             label="Your Math Question",
-            placeholder="E.g., For how many values of the constant k will the polynomial x^2 + kx + 36 have two distinct integer roots?",
         )
         output = gr.Textbox(label="Model Output")

         # "AI-MO/NuminaMath-72B-CoT",
         "AI-MO/NuminaMath-7B-CoT",
         device_map="auto",  # Automatically map to available GPU
+        # offload_folder="offload"  # Offload unused parts to disk
+        load_in_8bit=True  # Load model in 8-bit precision
     )
     return pipeline("text-generation", model=model, tokenizer=tokenizer)
 # Define the function to process inputs
 def solve_math_question(prompt):
     # Generate output using the model
+    outputs = model_pipeline(prompt, max_new_tokens=300, do_sample=False)
     return outputs[0]["generated_text"]
 # Define the Gradio interface
     with gr.Row():
         question = gr.Textbox(
             label="Your Math Question",
+            placeholder="what is 2+2?",
         )
         output = gr.Textbox(label="Model Output")