Spaces:

un-index
/

textgen6b

Sleeping

App Files Files Community

un-index commited on Dec 23, 2021

Commit

6779ce8

1 Parent(s): be94533

t

Browse files

Files changed (1) hide show

app.py +36 -22

app.py CHANGED Viewed

@@ -42,9 +42,9 @@ title = "text generator based on GPT models"
 examples = [
     # another machine learning example
-    [["For today's homework assignment, please describe the reasons for the US Civil War."], 0.8, 0.9, 50, "GPT-2"],
-    [["In a shocking discovery, scientists have found a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English."], 0.8, 0.9, 50, "GPT-2"],
-    [["The first step in the process of developing a new language is to invent a new word."], 0.8, 0.9, 50, "GPT-2"],
 ]
@@ -75,6 +75,16 @@ headers = {"Authorization": f"Bearer {os.environ['API_TOKEN']}"}
 # NOTE see build logs here: https://huggingface.co/spaces/un-index/textgen6b/logs/build
 def f(context, temperature, top_p, max_length, model_idx, SPACE_VERIFICATION_KEY):
     try:
@@ -87,17 +97,20 @@ def f(context, temperature, top_p, max_length, model_idx, SPACE_VERIFICATION_KEY
             if main_gpt_j_api_up:
                 # for this api, a length of > 250 instantly errors, so use a while loop or something
                 # that would fetch results in chunks of 250
-                generated_text = ""
                 while (max_length > 0):
-                    payload = {"inputs": context, "parameters": {"max_new_tokens": 250, "temperature": temperature, "top_p": top_p}}
                     response = requests.request("POST", API_URL, data=json.dumps(payload), headers=headers)
-                    context = json.loads(response.content.decode("utf-8"))#[0]['generated_text']
                     # handle inconsistent inference API
-                    if 'generated_text' in context[0]:
-                        context = context[0]['generated_text']
-                    else:
-                        context = context[0][0]['generated_text']
-                    generated_text += context
                     max_length -= 250
                 # payload = {"inputs": context, "parameters":{
@@ -105,7 +118,9 @@ def f(context, temperature, top_p, max_length, model_idx, SPACE_VERIFICATION_KEY
                 # data = json.dumps(payload)
                 # response = requests.request("POST", API_URL, data=data, headers=headers)
                 # generated_text = json.loads(response.content.decode("utf-8"))[0]['generated_text']
-                return generated_text
             # use secondary gpt-j-6B api, as the main one is down
             if not secondary_gpt_j_api_up:
@@ -127,7 +142,7 @@ def f(context, temperature, top_p, max_length, model_idx, SPACE_VERIFICATION_KEY
             response = requests.post(
                 "http://api.vicgalle.net:5000/generate", params=payload).json()
             return response['text']
-        else:
             # use GPT-2
             #
             try:
@@ -148,18 +163,16 @@ def f(context, temperature, top_p, max_length, model_idx, SPACE_VERIFICATION_KEY
                 # TODO if yes, then make max_length infinite because it seems to be counted as max input length, not output
                 # NOTE max_new_tokens does not seem to generate that many tokens
                 # however in the source that's what's used
                 generated_text = generator(context, max_length=896, max_new_tokens=max_length, top_p=top_p, temperature=temperature, num_return_sequences=1)
             except Exception as e:
                 return "Exception while generating text: " + str(e)
             # [0][0]['generated_text']
-            try:
-                if 'generated_text' in generated_text[0]:
-                    return generated_text[0]['generated_text']
-                else:
-                    return generated_text[0][0]['generated_text']
-            except:
-                return generated_text # was error due to timeout because of not enabling queue in gradio interface?
                 # if it works right now, then that was the reason for the JSON parsing error
             # except:
                 # generated_text = generator(context, max_length=max_length, top_p=top_p, temperature=temperature, num_return_sequences=1)[0]
@@ -169,7 +182,8 @@ def f(context, temperature, top_p, max_length, model_idx, SPACE_VERIFICATION_KEY
             # TODO use fallback gpt-2 inference api for this as well
             # TODO or just make it an option in the menu "GPT-2 inference"
     except Exception as e:
         return f"error with idx{model_idx}: "+str(e)
@@ -181,7 +195,7 @@ iface = gr.Interface(f, [
     top_p,
     gr.inputs.Slider(
         minimum=20, maximum=512, default=30, label="max length"),
-    gr.inputs.Dropdown(["GPT-J-6B", "GPT-2"], type="index", label="model"),
     gr.inputs.Textbox(lines=1, placeholder="xxxxxxxx", label="space verification key")
 ], outputs="text", title=title, examples=examples, enable_queue = True) # deprecated iwthin iface.launch: https://discuss.huggingface.co/t/is-there-a-timeout-max-runtime-for-spaces/12979/3?u=un-index

 examples = [
     # another machine learning example
+    [["For today's homework assignment, please describe the reasons for the US Civil War."], 0.8, 0.9, 50, "GPT2"],
+    [["In a shocking discovery, scientists have found a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English."], 0.8, 0.9, 50, "GPT2"],
+    [["The first step in the process of developing a new language is to invent a new word."], 0.8, 0.9, 50, "GPT2"],
 ]
 # NOTE see build logs here: https://huggingface.co/spaces/un-index/textgen6b/logs/build
+def get_generated_text(generated_text):
+    try:
+        if 'generated_text' in generated_text[0]:
+            return generated_text[0]['generated_text']
+        else:
+            return generated_text[0][0]['generated_text']
+    except:
+        return generated_text
 def f(context, temperature, top_p, max_length, model_idx, SPACE_VERIFICATION_KEY):
     try:
             if main_gpt_j_api_up:
                 # for this api, a length of > 250 instantly errors, so use a while loop or something
                 # that would fetch results in chunks of 250
+                generated_total = context
                 while (max_length > 0):
+                    payload = {"inputs": generated_total, "parameters": {"max_new_tokens": 250, "temperature": temperature, "top_p": top_p}}
                     response = requests.request("POST", API_URL, data=json.dumps(payload), headers=headers)
+                    generated_text = json.loads(response.content.decode("utf-8"))#[0]['generated_text']
+                    generated_text = get_generated_text(generated_text)
                     # handle inconsistent inference API
+                    # if 'generated_text' in context[0]:
+                    #     context = context[0]['generated_text']
+                    # else:
+                    #     context = context[0][0]['generated_text']
+                    generated_total += context
                     max_length -= 250
                 # payload = {"inputs": context, "parameters":{
                 # data = json.dumps(payload)
                 # response = requests.request("POST", API_URL, data=data, headers=headers)
                 # generated_text = json.loads(response.content.decode("utf-8"))[0]['generated_text']
+                # remove first n characters of generated total where n = len(context)
+                generated_total = generated_total[len(context):]
+                return generated_total
             # use secondary gpt-j-6B api, as the main one is down
             if not secondary_gpt_j_api_up:
             response = requests.post(
                 "http://api.vicgalle.net:5000/generate", params=payload).json()
             return response['text']
+        elif model_idx == 1:
             # use GPT-2
             #
             try:
                 # TODO if yes, then make max_length infinite because it seems to be counted as max input length, not output
                 # NOTE max_new_tokens does not seem to generate that many tokens
                 # however in the source that's what's used
+                # NOTE I think max_new_tokens is working now and punctuation characters count too
+                # NOTE set max_length to max_length to allow input text of any size
                 generated_text = generator(context, max_length=896, max_new_tokens=max_length, top_p=top_p, temperature=temperature, num_return_sequences=1)
             except Exception as e:
                 return "Exception while generating text: " + str(e)
             # [0][0]['generated_text']
+            return get_generated_text(generated_text)
+                # was error due to timeout because of not enabling queue in gradio interface?
                 # if it works right now, then that was the reason for the JSON parsing error
             # except:
                 # generated_text = generator(context, max_length=max_length, top_p=top_p, temperature=temperature, num_return_sequences=1)[0]
             # TODO use fallback gpt-2 inference api for this as well
             # TODO or just make it an option in the menu "GPT-2 inference"
+        else:
+            url = "https://api-inference.huggingface.co/models/distilgpt2"
     except Exception as e:
         return f"error with idx{model_idx}: "+str(e)
     top_p,
     gr.inputs.Slider(
         minimum=20, maximum=512, default=30, label="max length"),
+    gr.inputs.Dropdown(["GPT-J-6B", "GPT2", "DistilGPT2"], type="index", label="model"),
     gr.inputs.Textbox(lines=1, placeholder="xxxxxxxx", label="space verification key")
 ], outputs="text", title=title, examples=examples, enable_queue = True) # deprecated iwthin iface.launch: https://discuss.huggingface.co/t/is-there-a-timeout-max-runtime-for-spaces/12979/3?u=un-index