Spaces:

dx2102
/

llama-midi

Running on Zero

App Files Files Community

dx2102 commited on 18 days ago

Commit

71d42c2

verified ·

1 Parent(s): feecefa

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -37

app.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import random
 import os
 import time
@@ -18,13 +20,19 @@ os.makedirs('./temp', exist_ok=True)
 print('\n\n\n')
 print('Loading model...')
 pipe = transformers.pipeline(
-    "text-generation",
-    model="dx2102/llama-midi",
-    # revision="c303c108399aba837146e893375849b918f413b3",
-    torch_dtype="bfloat16",
-    device="cuda",
 )
-cpu_pipe = pipe.to("cpu")
 print('Done')
 example_prefix = '''pitch duration wait velocity instrument
@@ -78,7 +86,7 @@ def postprocess(txt, path):
             ))
             now += wait
         except Exception as e:
-            print(f'Postprocess: Ignored line: "{line}" because of error:', e)
     print(f'Postprocess: Got {sum(len(track.notes) for track in tracks.values())} notes')
@@ -97,21 +105,21 @@ def postprocess(txt, path):
 with gr.Blocks() as demo:
-    chatbot_box = gr.Chatbot(type="messages", render_markdown=False, sanitize_html=False)
-    prefix_box = gr.TextArea(value="Twinkle Twinkle Little Star", label="Score title / text prefix")
     with gr.Row():
-        submit_btn = gr.Button("Generate")
-        continue_btn = gr.Button("Continue")
-        clear_btn = gr.Button("Clear history")
     with gr.Row():
-        get_audio_btn = gr.Button("Convert to audio")
-        get_midi_btn = gr.Button("Convert to MIDI")
     audio_box = gr.Audio()
     midi_box = gr.File()
     piano_roll_box = gr.Image()
     server_box = gr.Dropdown(
-        choices=["Huggingface ZeroGPU", "CPU"],
-        label="GPU Server",
     )
     gr.Markdown('''
 ZeroGPU comes with a time limit currently:
@@ -124,23 +132,23 @@ CPUs will be slower but there is no time limit.
     example_box = gr.Examples(
         [
             # [example_prefix],
-            ["Twinkle Twinkle Little Star"], ["Twinkle Twinkle Little Star (Minor Key Version)"],
-            ["The Entertainer - Scott Joplin (Piano Solo)"], ["Clair de Lune – Debussy"], ["Nocturne | Frederic Chopin"],
-            ["Fugue I in C major, BWV 846"], ["Beethoven Symphony No. 7 (2nd movement) Piano solo"],
-            ["Guitar"],
-            # ["Composer: Chopin"], ["Composer: Bach"], ["Composer: Beethoven"], ["Composer: Debussy"],
         ],
         inputs=prefix_box,
         examples_per_page=9999,
     )
     def user_fn(user_message, history: list):
-        return "", history + [{"role": "user", "content": user_message}]
     def get_last(history: list):
         if len(history) == 0:
-            raise gr.Error('''No messages to read yet. Try the "Generate" button first!''')
-        return history[-1]["content"]
     def generate_fn(history, server):
          # continue from user input
@@ -151,14 +159,14 @@ CPUs will be slower but there is no time limit.
             # add '\n' to prevent model from continuing the title
             prefix += '\n'
-        history.append({"role": "assistant", "content": ""})
-        # history[-1]["content"] += "Generating with the given prefix...\n"
         for history in model_fn(prefix, history, server):
             yield history
     def continue_fn(history, server):
          # continue from the last model output
-        prefix = history[-1]["content"]
         for history in model_fn(prefix, history, server):
             yield history
@@ -166,14 +174,14 @@ CPUs will be slower but there is no time limit.
     def model_fn(prefix, history, server):
-        if server == "Huggingface ZeroGPU":
             generator = zerogpu_model_fn(prefix, history, pipe)
-        elif server == "CPU":
             generator = cpu_model_fn(prefix, history, cpu_pipe)
-        # elif server == "RunPod":
         #     generator = runpod_model_fn(prefix, history)
         else:
-            raise gr.Error(f"Unknown server: {server}")
         for history in generator:
             yield history
@@ -203,7 +211,7 @@ CPUs will be slower but there is no time limit.
             text = queue.get()
             if text is None:
                 break
-            history[-1]["content"] += text
             yield history
     zerogpu_model_fn = spaces.GPU(cpu_model_fn)
@@ -215,12 +223,12 @@ CPUs will be slower but there is no time limit.
         # synchronized request
         response = requests.post(
-            f"https://api.runpod.ai/v2/{runpod_endpoint}/runsync",
-            headers={"Authorization": f"Bearer {runpod_api_key}"},
-            json={"input": {"prompt": prefix}}
         ).json()['output'][0]['choices'][0]['tokens'][0]
         # yield just once
-        history[-1]["content"] += response
         yield history
@@ -266,7 +274,7 @@ CPUs will be slower but there is no time limit.
         import matplotlib.pyplot as plt
         plt.figure(figsize=(12, 4))
         now = 0
-        for line in history[-1]["content"].split('\n\n')[-1].split('\n'):
             try:
                 pitch, duration, wait, velocity, instrument = [int(x) for x in line.split()]
             except Exception as e:

+print('Starting...')
 import random
 import os
 import time
 print('\n\n\n')
 print('Loading model...')
 pipe = transformers.pipeline(
+    'text-generation',
+    model='dx2102/llama-midi',
+    # revision='c303c108399aba837146e893375849b918f413b3',
+    torch_dtype='bfloat16',
+    device='cuda',
+)
+cpu_pipe = transformers.pipeline(
+    'text-generation',
+    model='dx2102/llama-midi',
+    # revision='c303c108399aba837146e893375849b918f413b3',
+    torch_dtype='float32',
+    device='cpu',
 )
 print('Done')
 example_prefix = '''pitch duration wait velocity instrument
             ))
             now += wait
         except Exception as e:
+            print(f'Postprocess: Ignored line: '{line}' because of error:', e)
     print(f'Postprocess: Got {sum(len(track.notes) for track in tracks.values())} notes')
 with gr.Blocks() as demo:
+    chatbot_box = gr.Chatbot(type='messages', render_markdown=False, sanitize_html=False)
+    prefix_box = gr.TextArea(value='Twinkle Twinkle Little Star', label='Score title / text prefix')
     with gr.Row():
+        submit_btn = gr.Button('Generate')
+        continue_btn = gr.Button('Continue')
+        clear_btn = gr.Button('Clear history')
     with gr.Row():
+        get_audio_btn = gr.Button('Convert to audio')
+        get_midi_btn = gr.Button('Convert to MIDI')
     audio_box = gr.Audio()
     midi_box = gr.File()
     piano_roll_box = gr.Image()
     server_box = gr.Dropdown(
+        choices=['Huggingface ZeroGPU', 'CPU'],
+        label='GPU Server',
     )
     gr.Markdown('''
 ZeroGPU comes with a time limit currently:
     example_box = gr.Examples(
         [
             # [example_prefix],
+            ['Twinkle Twinkle Little Star'], ['Twinkle Twinkle Little Star (Minor Key Version)'],
+            ['The Entertainer - Scott Joplin (Piano Solo)'], ['Clair de Lune – Debussy'], ['Nocturne | Frederic Chopin'],
+            ['Fugue I in C major, BWV 846'], ['Beethoven Symphony No. 7 (2nd movement) Piano solo'],
+            ['Guitar'],
+            # ['Composer: Chopin'], ['Composer: Bach'], ['Composer: Beethoven'], ['Composer: Debussy'],
         ],
         inputs=prefix_box,
         examples_per_page=9999,
     )
     def user_fn(user_message, history: list):
+        return '', history + [{'role': 'user', 'content': user_message}]
     def get_last(history: list):
         if len(history) == 0:
+            raise gr.Error('''No messages to read yet. Try the 'Generate' button first!''')
+        return history[-1]['content']
     def generate_fn(history, server):
          # continue from user input
             # add '\n' to prevent model from continuing the title
             prefix += '\n'
+        history.append({'role': 'assistant', 'content': ''})
+        # history[-1]['content'] += 'Generating with the given prefix...\n'
         for history in model_fn(prefix, history, server):
             yield history
     def continue_fn(history, server):
          # continue from the last model output
+        prefix = history[-1]['content']
         for history in model_fn(prefix, history, server):
             yield history
     def model_fn(prefix, history, server):
+        if server == 'Huggingface ZeroGPU':
             generator = zerogpu_model_fn(prefix, history, pipe)
+        elif server == 'CPU':
             generator = cpu_model_fn(prefix, history, cpu_pipe)
+        # elif server == 'RunPod':
         #     generator = runpod_model_fn(prefix, history)
         else:
+            raise gr.Error(f'Unknown server: {server}')
         for history in generator:
             yield history
             text = queue.get()
             if text is None:
                 break
+            history[-1]['content'] += text
             yield history
     zerogpu_model_fn = spaces.GPU(cpu_model_fn)
         # synchronized request
         response = requests.post(
+            f'https://api.runpod.ai/v2/{runpod_endpoint}/runsync',
+            headers={'Authorization': f'Bearer {runpod_api_key}'},
+            json={'input': {'prompt': prefix}}
         ).json()['output'][0]['choices'][0]['tokens'][0]
         # yield just once
+        history[-1]['content'] += response
         yield history
         import matplotlib.pyplot as plt
         plt.figure(figsize=(12, 4))
         now = 0
+        for line in history[-1]['content'].split('\n\n')[-1].split('\n'):
             try:
                 pitch, duration, wait, velocity, instrument = [int(x) for x in line.split()]
             except Exception as e: