File size: 5,722 Bytes
75197d8
3dec2d1
 
1c8febf
0cc2838
a183160
 
5e3b1a8
75197d8
bba0271
1588190
5e3b1a8
 
 
1588190
7c1c1eb
 
 
 
1c8febf
7c1c1eb
 
 
 
 
1588190
1c8febf
68394f9
1c8febf
9ef03c3
5f283d9
68394f9
 
759a131
 
 
e74817f
 
 
 
 
 
 
 
 
68394f9
 
 
e74817f
9a78c86
e74817f
68394f9
e74817f
 
 
 
1c8febf
a183160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3dec2d1
 
5e06490
759a131
a183160
759a131
 
 
a183160
 
 
 
 
 
 
 
 
 
 
 
 
759a131
 
 
 
 
3dec2d1
759a131
 
 
a183160
759a131
 
 
 
 
e74817f
e840be8
3dec2d1
a183160
3dec2d1
bbef3ac
5e06490
759a131
dd62ca3
4c97ef8
1c8febf
 
 
759a131
 
3dec2d1
759a131
9a78c86
3dec2d1
1588190
0cc2838
95c5c80
1588190
3e6cabd
95c5c80
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144

from random import randint
from transformers import pipeline, set_seed
import requests
import gradio as gr
import json

# # from transformers import AutoModelForCausalLM, AutoTokenizer

# stage, commit, push

# # prompt = "In a shocking finding, scientists discovered a herd of unicorns living in a remote, " \
# #          "previously unexplored valley, in the Andes Mountains. Even more surprising to the " \
# #          "researchers was the fact that the unicorns spoke perfect English."

# ex=None
# try:
#     from transformers import AutoModelForCausalLM, AutoTokenizer
#     tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")

#     # "EluttherAI" on this line and for the next occurence only
#     # tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
#     # model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B")
# except Exception as e:
#     ex = e

temperature = gr.inputs.Slider(
    minimum=0, maximum=1.5, default=0.8, label="temperature")
top_p = gr.inputs.Slider(minimum=0, maximum=1.0,
                         default=0.9, label="top_p")

# gradio checkbutton

generator = pipeline('text-generation', model='gpt2')


title = "GPT-J-6B"

# examples = [
#     # another machine learning example
#     ["For today's homework assignment, please describe the reasons for the US Civil War."],
#     ["In a shocking discovery, scientists have found a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English."],
#     ["The first step in the process of developing a new language is to invent a new word."],
# ]
title = "GPT-J-6B"

examples = [
    # another machine learning example
    [["For today's homework assignment, please describe the reasons for the US Civil War."], 0.8, 0.9, 50, "GPT-2"],
    [["In a shocking discovery, scientists have found a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English."], 0.8, 0.9, 50, "GPT-2"],
    [["The first step in the process of developing a new language is to invent a new word."], 0.8, 0.9, 50, "GPT-2"],
]
#             # # could easily use the inference API in /gptinference.py but don't know if it supports length>250
#             set_seed(randint(1, 2**31))
            # args found in the source: https://github.com/huggingface/transformers/blob/27b3031de2fb8195dec9bc2093e3e70bdb1c4bff/src/transformers/generation_tf_utils.py#L348-L376


# check if api.vicgalle.net:5000/generate is down with timeout of 10 seconds
def is_up(url):
    try:
        requests.head(url, timeout=10)
        return True
    except Exception:
        return False

# gpt_j_api_down = False

import os

API_URL = "https://api-inference.huggingface.co/models/EleutherAI/gpt-j-6B"
main_gpt_j_api_up = is_up(API_URL)
secondary_gpt_j_api_up = False
if not main_gpt_j_api_up:
    # check whether secondary api is available
    API_URL = "https://api.vicgalle.net:5000/generate"
    secondary_gpt_j_api_up = is_up(API_URL)

headers = {"Authorization": f"Bearer {os.environ['API_TOKEN']}"}

    

def f(context, temperature, top_p, max_length, model_idx):
    try:


        # maybe try "0" instead or 1, or "1"
        # use GPT-J-6B
        if model_idx == 0:
            if main_gpt_j_api_up:
                payload = {"inputs": context, "parameters":{
                    "max_new_tokens":max_length, "temperature":temperature, "top_p":top_p}}
                data = json.dumps(payload)
                response = requests.request("POST", API_URL, data=data, headers=headers)
                generated_text = json.loads(response.content.decode("utf-8"))[0]['generated_text']
                return generated_text

            if not secondary_gpt_j_api_up:
                return "ERR: both GPT-J-6B APIs are down, please try again later (will use a third fallback in the future)"

            # use fallback API
            # 
            # http://api.vicgalle.net:5000/docs#/default/generate_generate_post
            # https://pythonrepo.com/repo/vicgalle-gpt-j-api-python-natural-language-processing

            payload = {
                "context": context,
                "token_max_length": max_length,  # 512,
                "temperature": temperature,
                "top_p": top_p,
            }
            
            response = requests.post(
                "http://api.vicgalle.net:5000/generate", params=payload).json()
            return response['text']
        else:
            # use GPT-2
            #
            set_seed(randint(1, 2**31))
            # return sequences specifies how many to return
            return generator(context, max_length=max_length, top_p=top_p, temperature=temperature, num_return_sequences=1)[0]['generated-text']
            # args found in the source: https://github.com/huggingface/transformers/blob/27b3031de2fb8195dec9bc2093e3e70bdb1c4bff/src/transformers/generation_tf_utils.py#L348-L376

    except Exception as e:
        return f"error with idx{model_idx} : \n"+str(e)


iface = gr.Interface(f, [
    "text",
    temperature,
    top_p,
    gr.inputs.Slider(
        minimum=20, maximum=512, default=30, label="max length"),
    gr.inputs.Dropdown(["GPT-J-6B", "GPT-2"], type="index", label="model"),
], outputs="text", title=title, examples=examples)
iface.launch()  # enable_queue=True

# all below works but testing
# import gradio as gr


# gr.Interface.load("huggingface/EleutherAI/gpt-j-6B",
#     inputs=gr.inputs.Textbox(lines=10, label="Input Text"),
#     title=title, examples=examples).launch();