Spaces:

un-index
/

textgen6b

Sleeping

File size: 8,067 Bytes

75197d8
3dec2d1
 
1c8febf
0cc2838
a183160
 
5e3b1a8
75197d8
bba0271
1588190
5e3b1a8
 
 
1588190
7c1c1eb
 
 
 
1c8febf
7c1c1eb
 
 
 
 
1588190
1c8febf
68394f9
1c8febf
9ef03c3
5f283d9
68394f9
 
759a131
 
 
e74817f
 
571d832
 
 
68394f9
 
 
e74817f
9a78c86
e74817f
68394f9
e9b9296
 
e74817f
1c8febf
a183160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62a9753
a183160
3dec2d1
7216e2e
5e06490
759a131
7216e2e
 
a183160
759a131
 
 
a183160
60b60b7
 
 
 
 
 
1ef28ae
 
 
4fc4e44
1ef28ae
 
60b60b7
 
 
 
 
 
 
 
a183160
 
552084d
a183160
 
 
 
 
759a131
 
 
 
 
3dec2d1
759a131
 
 
a183160
759a131
 
 
 
 
e74817f
2eebfca
 
 
 
3dec2d1
2389098
182c51e
 
103f0d1
f25f764
6ac99e7
3a988df
4d0bba1
2eebfca
 
4d0bba1
6ac99e7
 
9663b06
 
6095690
6ac99e7
6095690
6ac99e7
 
6095690
 
9663b06
103f0d1
 
2389098
62a9753
3dec2d1
bbef3ac
1444a8d
6095690
3296004
 
5e06490
759a131
dd62ca3
4c97ef8
1c8febf
 
 
759a131
 
3dec2d1
759a131
127dd46
7216e2e
9a78c86
3dec2d1
1588190
0cc2838
95c5c80
1588190
3e6cabd
95c5c80


from random import randint
from transformers import pipeline, set_seed
import requests
import gradio as gr
import json

# # from transformers import AutoModelForCausalLM, AutoTokenizer

# stage, commit, push

# # prompt = "In a shocking finding, scientists discovered a herd of unicorns living in a remote, " \
# #          "previously unexplored valley, in the Andes Mountains. Even more surprising to the " \
# #          "researchers was the fact that the unicorns spoke perfect English."

# ex=None
# try:
#     from transformers import AutoModelForCausalLM, AutoTokenizer
#     tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")

#     # "EluttherAI" on this line and for the next occurence only
#     # tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
#     # model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B")
# except Exception as e:
#     ex = e

temperature = gr.inputs.Slider(
    minimum=0, maximum=1.5, default=0.8, label="temperature")
top_p = gr.inputs.Slider(minimum=0, maximum=1.0,
                         default=0.9, label="top_p")

# gradio checkbutton

generator = pipeline('text-generation', model='gpt2')


title = "GPT-J-6B"


title = "text generator based on GPT models"
# TODO TODO TODO TODO  support fine tuned models or models for text generation for different purposes

examples = [
    # another machine learning example
    [["For today's homework assignment, please describe the reasons for the US Civil War."], 0.8, 0.9, 50, "GPT-2"],
    [["In a shocking discovery, scientists have found a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English."], 0.8, 0.9, 50, "GPT-2"],
    [["The first step in the process of developing a new language is to invent a new word."], 0.8, 0.9, 50, "GPT-2"],
]

            


# check if api.vicgalle.net:5000/generate is down with timeout of 10 seconds
def is_up(url):
    try:
        requests.head(url, timeout=10)
        return True
    except Exception:
        return False

# gpt_j_api_down = False

import os

API_URL = "https://api-inference.huggingface.co/models/EleutherAI/gpt-j-6B"
main_gpt_j_api_up = is_up(API_URL)
secondary_gpt_j_api_up = False
if not main_gpt_j_api_up:
    # check whether secondary api is available
    API_URL = "https://api.vicgalle.net:5000/generate"
    secondary_gpt_j_api_up = is_up(API_URL)

headers = {"Authorization": f"Bearer {os.environ['API_TOKEN']}"}

# NOTE see build logs here: https://huggingface.co/spaces/un-index/textgen6b/logs/build
    

def f(context, temperature, top_p, max_length, model_idx, SPACE_VERIFICATION_KEY):
    try:

        if os.environ['SPACE_VERIFICATION_KEY'] != SPACE_VERIFICATION_KEY:
            return "invalid SPACE_VERIFICATION_KEY; see project secrets to view key"

        # maybe try "0" instead or 1, or "1"
        # use GPT-J-6B
        if model_idx == 0:
            if main_gpt_j_api_up:
                # for this api, a length of > 250 instantly errors, so use a while loop or something
                # that would fetch results in chunks of 250
                generated_text = ""
                while (max_length > 0):
                    payload = {"inputs": context, "parameters": {"max_new_tokens": 250, "temperature": temperature, "top_p": top_p}}
                    response = requests.request("POST", API_URL, data=json.dumps(payload), headers=headers)
                    context = json.loads(response.content.decode("utf-8"))#[0]['generated_text']
                    # handle inconsistent inference API
                    if 'generated_text' in context[0]:
                        context = context[0]['generated_text']
                    else:
                        context = context[0][0]['generated_text']
                    generated_text += context
                    max_length -= 250

                # payload = {"inputs": context, "parameters":{
                #     "max_new_tokens":max_length, "temperature":temperature, "top_p":top_p}}
                # data = json.dumps(payload)
                # response = requests.request("POST", API_URL, data=data, headers=headers)
                # generated_text = json.loads(response.content.decode("utf-8"))[0]['generated_text']
                return generated_text

            # use secondary gpt-j-6B api, as the main one is down
            if not secondary_gpt_j_api_up:
                return "ERR: both GPT-J-6B APIs are down, please try again later (will use a third fallback in the future)"

            # use fallback API
            # 
            # http://api.vicgalle.net:5000/docs#/default/generate_generate_post
            # https://pythonrepo.com/repo/vicgalle-gpt-j-api-python-natural-language-processing

            payload = {
                "context": context,
                "token_max_length": max_length,  # 512,
                "temperature": temperature,
                "top_p": top_p,
            }
            
            response = requests.post(
                "http://api.vicgalle.net:5000/generate", params=payload).json()
            return response['text']
        else:
            # use GPT-2
            #
            try:
                set_seed(randint(1, 2**31))
            except Exception as e:
                return "Exception while setting seed: " + str(e)
            # return sequences specifies how many to return
            
            # for some reson indexing with 'generated-text' doesn't work
            # edit: maybe because I was using generated-text, not generated_text (note the underscore in the second)
            # try:
            # NOTE sometimes it seems to contain another array, weird
            try:
                # todo fix max_length below, maybe there is a max_new_tokens parameter
                # try max_length=len(context)+max_length or =len(context)+max_length or make max_length inf or unspecified
                # note: added max_new_tokens parameter to see whether it actually works, if not remove, 
                # if yes, then make max_length infinite because it seems to be counted as max input length, not output
                generated_text = generator(context, max_length=400, max_new_tokens=max_length, top_p=top_p, temperature=temperature, num_return_sequences=1)
            except Exception as e:
                return "Exception while generating text: " + str(e)
            # [0][0]['generated_text']

            try:
                if 'generated_text' in generated_text[0]:
                    return generated_text[0]['generated_text']
                else:
                    return generated_text[0][0]['generated_text']
            except:
                return generated_text

            # except: 
                # generated_text = generator(context, max_length=max_length, top_p=top_p, temperature=temperature, num_return_sequences=1)[0]
                
            # return generated_text
            # args found in the source: https://github.com/huggingface/transformers/blob/27b3031de2fb8195dec9bc2093e3e70bdb1c4bff/src/transformers/generation_tf_utils.py#L348-L376

            # TODO use fallback gpt-2 inference api for this as well 
            # TODO or just make it an option in the menu "GPT-2 inference"


    except Exception as e:
        return f"error with idx{model_idx} : \n"+str(e)


iface = gr.Interface(f, [
    "text",
    temperature,
    top_p,
    gr.inputs.Slider(
        minimum=20, maximum=512, default=30, label="max length"),
    gr.inputs.Dropdown(["GPT-J-6B", "GPT-2"], type="index", label="model"),
    gr.inputs.Textbox(lines=1, placeholder="xxxxxxxx", label="space verification key")

], outputs="text", title=title, examples=examples)
iface.launch()  # enable_queue=True

# all below works but testing
# import gradio as gr


# gr.Interface.load("huggingface/EleutherAI/gpt-j-6B",
#     inputs=gr.inputs.Textbox(lines=10, label="Input Text"),
#     title=title, examples=examples).launch();