un-index commited on
Commit
85f4499
·
1 Parent(s): 3deace5
Files changed (1) hide show
  1. app.py +49 -41
app.py CHANGED
@@ -1,4 +1,5 @@
1
 
 
2
  from random import randint
3
  from transformers import pipeline, set_seed
4
  import requests
@@ -42,13 +43,13 @@ title = "text generator based on GPT models"
42
 
43
  examples = [
44
  # another machine learning example
45
- [["For today's homework assignment, please describe the reasons for the US Civil War."], 0.8, 0.9, 50, "GPT2"],
 
46
  [["In a shocking discovery, scientists have found a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English."], 0.8, 0.9, 50, "GPT2"],
47
- [["The first step in the process of developing a new language is to invent a new word."], 0.8, 0.9, 50, "GPT2"],
 
48
  ]
49
 
50
-
51
-
52
 
53
  # check if api.vicgalle.net:5000/generate is down with timeout of 10 seconds
54
  def is_up(url):
@@ -60,7 +61,6 @@ def is_up(url):
60
 
61
  # gpt_j_api_down = False
62
 
63
- import os
64
 
65
  API_URL = "https://api-inference.huggingface.co/models/EleutherAI/gpt-j-6B"
66
  main_gpt_j_api_up = is_up(API_URL)
@@ -73,7 +73,7 @@ if not main_gpt_j_api_up:
73
  headers = {"Authorization": f"Bearer {os.environ['API_TOKEN']}"}
74
 
75
  # NOTE see build logs here: https://huggingface.co/spaces/un-index/textgen6b/logs/build
76
-
77
 
78
  def get_generated_text(generated_text):
79
  try:
@@ -83,13 +83,13 @@ def get_generated_text(generated_text):
83
  return generated_text[0][0]['generated_text']
84
  except:
85
  # recursively loop through generated_text till we get the text
86
- # don't know if this will work
87
  for gt in generated_text:
88
  if 'generated_text' in gt:
89
  return gt['generated_text']
90
  else:
91
  return get_generated_text(gt)
92
- # return generated_text
93
 
94
 
95
  def f(context, temperature, top_p, max_length, model_idx, SPACE_VERIFICATION_KEY):
@@ -106,21 +106,24 @@ def f(context, temperature, top_p, max_length, model_idx, SPACE_VERIFICATION_KEY
106
  # that would fetch results in chunks of 250
107
  # NOTE change so it uses previous generated input every time
108
  # _context = context
109
- generated_text = ""#context #""
110
  while (max_length > 0):
111
  # context becomes the previous generated context
112
  # NOTE I've set return_full_text to false, see how this plays out
113
- payload = {"inputs": context, "parameters": {"max_new_tokens": max_length>250 and 250 or max_length, "temperature": temperature, "top_p": top_p}}
114
- response = requests.request("POST", API_URL, data=json.dumps(payload), headers=headers)
115
- context = json.loads(response.content.decode("utf-8"))#[0]['generated_text']
 
 
 
116
  # context = get_generated_text(generated_context)
117
-
118
  # handle inconsistent inference API
119
  # if 'generated_text' in context[0]:
120
  # context = context[0]['generated_text']
121
  # else:
122
  # context = context[0][0]['generated_text']
123
-
124
  context = get_generated_text(context)
125
 
126
  generated_text += context
@@ -131,14 +134,14 @@ def f(context, temperature, top_p, max_length, model_idx, SPACE_VERIFICATION_KEY
131
  # data = json.dumps(payload)
132
  # response = requests.request("POST", API_URL, data=data, headers=headers)
133
  # generated_text = json.loads(response.content.decode("utf-8"))[0]['generated_text']
134
- return generated_text#context #_context+generated_text
135
 
136
  # use secondary gpt-j-6B api, as the main one is down
137
  if not secondary_gpt_j_api_up:
138
  return "ERR: both GPT-J-6B APIs are down, please try again later (will use a third fallback in the future)"
139
 
140
  # use fallback API
141
- #
142
  # http://api.vicgalle.net:5000/docs#/default/generate_generate_post
143
  # https://pythonrepo.com/repo/vicgalle-gpt-j-api-python-natural-language-processing
144
 
@@ -149,7 +152,7 @@ def f(context, temperature, top_p, max_length, model_idx, SPACE_VERIFICATION_KEY
149
  "top_p": top_p,
150
  "max_time": 120.0
151
  }
152
-
153
  response = requests.post(
154
  "http://api.vicgalle.net:5000/generate", params=payload).json()
155
  return response['text']
@@ -161,7 +164,7 @@ def f(context, temperature, top_p, max_length, model_idx, SPACE_VERIFICATION_KEY
161
  except Exception as e:
162
  return "Exception while setting seed: " + str(e)
163
  # return sequences specifies how many to return
164
-
165
  # for some reson indexing with 'generated-text' doesn't work
166
  # edit: maybe because I was using generated-text, not generated_text (note the underscore in the second)
167
  # try:
@@ -170,49 +173,52 @@ def f(context, temperature, top_p, max_length, model_idx, SPACE_VERIFICATION_KEY
170
  # NOTE after exactly 60 seconds the fn function seems to error: https://discuss.huggingface.co/t/gradio-fn-function-errors-whenever-60-seconds-passed/13048
171
  # todo fix max_length below, maybe there is a max_new_tokens parameter
172
  # try max_length=len(context)+max_length or =len(context)+max_length or make max_length inf or unspecified
173
- # note: added max_new_tokens parameter to see whether it actually works, if not remove,
174
  # TODO if yes, then make max_length infinite because it seems to be counted as max input length, not output
175
  # NOTE max_new_tokens does not seem to generate that many tokens
176
  # however in the source that's what's used
177
  # NOTE I think max_new_tokens is working now and punctuation characters count too
178
  # NOTE set max_length to max_length to allow input text of any size
179
- generated_text = generator(context, max_length=896, max_new_tokens=max_length, top_p=top_p, temperature=temperature, num_return_sequences=1)
 
180
  except Exception as e:
181
  return "Exception while generating text: " + str(e)
182
  # [0][0]['generated_text']
183
 
184
  return get_generated_text(generated_text)
185
-
186
- # was error due to timeout because of not enabling queue in gradio interface?
187
- # if it works right now, then that was the reason for the JSON parsing error
188
- # except:
189
- # generated_text = generator(context, max_length=max_length, top_p=top_p, temperature=temperature, num_return_sequences=1)[0]
190
-
191
  # return generated_text
192
  # args found in the source: https://github.com/huggingface/transformers/blob/27b3031de2fb8195dec9bc2093e3e70bdb1c4bff/src/transformers/generation_tf_utils.py#L348-L376
193
 
194
- # TODO use fallback gpt-2 inference api for this as well
195
  # TODO or just make it an option in the menu "GPT-2 inference"
196
- else:
197
  API_URL = "https://api-inference.huggingface.co/models/distilgpt2"
198
- generated_text=""
199
  while (max_length > 0):
200
- # NOTE see original implementation above for gpt-J-6B
201
- payload = {"inputs": context, "parameters": {"max_new_tokens": max_length>250 and 250 or max_length, "temperature": temperature, "top_p": top_p}}
202
- response = requests.request("POST", API_URL, data=json.dumps(payload), headers=headers)
203
- context = json.loads(response.content.decode("utf-8"))
204
-
205
- context = get_generated_text(context)
206
 
207
- generated_text += context
208
- max_length -= 250
 
 
209
 
210
  # payload = {"inputs": context, "parameters":{
211
  # "max_new_tokens":max_length, "temperature":temperature, "top_p":top_p}}
212
  # data = json.dumps(payload)
213
  # response = requests.request("POST", API_URL, data=data, headers=headers)
214
  # generated_text = json.loads(response.content.decode("utf-8"))[0]['generated_text']
215
- return generated_text#context #_context+generated_text
216
 
217
  except Exception as e:
218
  return f"error with idx{model_idx}: "+str(e)
@@ -224,10 +230,12 @@ iface = gr.Interface(f, [
224
  top_p,
225
  gr.inputs.Slider(
226
  minimum=20, maximum=512, default=30, label="max length"),
227
- gr.inputs.Dropdown(["GPT-J-6B", "GPT2", "DistilGPT2"], type="index", label="model"),
228
- gr.inputs.Textbox(lines=1, placeholder="xxxxxxxx", label="space verification key")
 
 
229
 
230
- ], outputs="text", title=title, examples=examples, enable_queue = True) # deprecated iwthin iface.launch: https://discuss.huggingface.co/t/is-there-a-timeout-max-runtime-for-spaces/12979/3?u=un-index
231
  iface.launch() # enable_queue=True
232
 
233
  # all below works but testing
 
1
 
2
+ import os
3
  from random import randint
4
  from transformers import pipeline, set_seed
5
  import requests
 
43
 
44
  examples = [
45
  # another machine learning example
46
+ [["For today's homework assignment, please describe the reasons for the US Civil War."],
47
+ 0.8, 0.9, 50, "GPT2"],
48
  [["In a shocking discovery, scientists have found a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English."], 0.8, 0.9, 50, "GPT2"],
49
+ [["The first step in the process of developing a new language is to invent a new word."],
50
+ 0.8, 0.9, 50, "GPT2"],
51
  ]
52
 
 
 
53
 
54
  # check if api.vicgalle.net:5000/generate is down with timeout of 10 seconds
55
  def is_up(url):
 
61
 
62
  # gpt_j_api_down = False
63
 
 
64
 
65
  API_URL = "https://api-inference.huggingface.co/models/EleutherAI/gpt-j-6B"
66
  main_gpt_j_api_up = is_up(API_URL)
 
73
  headers = {"Authorization": f"Bearer {os.environ['API_TOKEN']}"}
74
 
75
  # NOTE see build logs here: https://huggingface.co/spaces/un-index/textgen6b/logs/build
76
+
77
 
78
  def get_generated_text(generated_text):
79
  try:
 
83
  return generated_text[0][0]['generated_text']
84
  except:
85
  # recursively loop through generated_text till we get the text
86
+ # don't know if this will work
87
  for gt in generated_text:
88
  if 'generated_text' in gt:
89
  return gt['generated_text']
90
  else:
91
  return get_generated_text(gt)
92
+ # return generated_text
93
 
94
 
95
  def f(context, temperature, top_p, max_length, model_idx, SPACE_VERIFICATION_KEY):
 
106
  # that would fetch results in chunks of 250
107
  # NOTE change so it uses previous generated input every time
108
  # _context = context
109
+ generated_text = "" # context #""
110
  while (max_length > 0):
111
  # context becomes the previous generated context
112
  # NOTE I've set return_full_text to false, see how this plays out
113
+ payload = {"inputs": context, "parameters": {"max_new_tokens": max_length >
114
+ 250 and 250 or max_length, "temperature": temperature, "top_p": top_p}}
115
+ response = requests.request(
116
+ "POST", API_URL, data=json.dumps(payload), headers=headers)
117
+ context = json.loads(response.content.decode(
118
+ "utf-8")) # [0]['generated_text']
119
  # context = get_generated_text(generated_context)
120
+
121
  # handle inconsistent inference API
122
  # if 'generated_text' in context[0]:
123
  # context = context[0]['generated_text']
124
  # else:
125
  # context = context[0][0]['generated_text']
126
+
127
  context = get_generated_text(context)
128
 
129
  generated_text += context
 
134
  # data = json.dumps(payload)
135
  # response = requests.request("POST", API_URL, data=data, headers=headers)
136
  # generated_text = json.loads(response.content.decode("utf-8"))[0]['generated_text']
137
+ return generated_text # context #_context+generated_text
138
 
139
  # use secondary gpt-j-6B api, as the main one is down
140
  if not secondary_gpt_j_api_up:
141
  return "ERR: both GPT-J-6B APIs are down, please try again later (will use a third fallback in the future)"
142
 
143
  # use fallback API
144
+ #
145
  # http://api.vicgalle.net:5000/docs#/default/generate_generate_post
146
  # https://pythonrepo.com/repo/vicgalle-gpt-j-api-python-natural-language-processing
147
 
 
152
  "top_p": top_p,
153
  "max_time": 120.0
154
  }
155
+
156
  response = requests.post(
157
  "http://api.vicgalle.net:5000/generate", params=payload).json()
158
  return response['text']
 
164
  except Exception as e:
165
  return "Exception while setting seed: " + str(e)
166
  # return sequences specifies how many to return
167
+
168
  # for some reson indexing with 'generated-text' doesn't work
169
  # edit: maybe because I was using generated-text, not generated_text (note the underscore in the second)
170
  # try:
 
173
  # NOTE after exactly 60 seconds the fn function seems to error: https://discuss.huggingface.co/t/gradio-fn-function-errors-whenever-60-seconds-passed/13048
174
  # todo fix max_length below, maybe there is a max_new_tokens parameter
175
  # try max_length=len(context)+max_length or =len(context)+max_length or make max_length inf or unspecified
176
+ # note: added max_new_tokens parameter to see whether it actually works, if not remove,
177
  # TODO if yes, then make max_length infinite because it seems to be counted as max input length, not output
178
  # NOTE max_new_tokens does not seem to generate that many tokens
179
  # however in the source that's what's used
180
  # NOTE I think max_new_tokens is working now and punctuation characters count too
181
  # NOTE set max_length to max_length to allow input text of any size
182
+ generated_text = generator(context, max_length=896, max_new_tokens=max_length,
183
+ top_p=top_p, temperature=temperature, num_return_sequences=1)
184
  except Exception as e:
185
  return "Exception while generating text: " + str(e)
186
  # [0][0]['generated_text']
187
 
188
  return get_generated_text(generated_text)
189
+
190
+ # was error due to timeout because of not enabling queue in gradio interface?
191
+ # if it works right now, then that was the reason for the JSON parsing error
192
+ # except:
193
+ # generated_text = generator(context, max_length=max_length, top_p=top_p, temperature=temperature, num_return_sequences=1)[0]
194
+
195
  # return generated_text
196
  # args found in the source: https://github.com/huggingface/transformers/blob/27b3031de2fb8195dec9bc2093e3e70bdb1c4bff/src/transformers/generation_tf_utils.py#L348-L376
197
 
198
+ # TODO use fallback gpt-2 inference api for this as well
199
  # TODO or just make it an option in the menu "GPT-2 inference"
200
+ else:
201
  API_URL = "https://api-inference.huggingface.co/models/distilgpt2"
202
+ generated_text = ""
203
  while (max_length > 0):
204
+ # NOTE see original implementation above for gpt-J-6B
205
+ payload = {"inputs": context, "parameters": {"max_new_tokens": max_length >
206
+ 250 and 250 or max_length, "temperature": temperature, "top_p": top_p}}
207
+ response = requests.request(
208
+ "POST", API_URL, data=json.dumps(payload), headers=headers)
209
+ context = json.loads(response.content.decode("utf-8"))
210
 
211
+ context = get_generated_text(context)
212
+
213
+ generated_text += context
214
+ max_length -= 250
215
 
216
  # payload = {"inputs": context, "parameters":{
217
  # "max_new_tokens":max_length, "temperature":temperature, "top_p":top_p}}
218
  # data = json.dumps(payload)
219
  # response = requests.request("POST", API_URL, data=data, headers=headers)
220
  # generated_text = json.loads(response.content.decode("utf-8"))[0]['generated_text']
221
+ return generated_text # context #_context+generated_text
222
 
223
  except Exception as e:
224
  return f"error with idx{model_idx}: "+str(e)
 
230
  top_p,
231
  gr.inputs.Slider(
232
  minimum=20, maximum=512, default=30, label="max length"),
233
+ gr.inputs.Dropdown(["GPT-J-6B", "GPT2", "DistilGPT2"],
234
+ type="index", label="model"),
235
+ gr.inputs.Textbox(lines=1, placeholder="xxxxxxxx",
236
+ label="space verification key")
237
 
238
+ ], outputs="text", title=title, examples=examples, enable_queue=True) # deprecated iwthin iface.launch: https://discuss.huggingface.co/t/is-there-a-timeout-max-runtime-for-spaces/12979/3?u=un-index
239
  iface.launch() # enable_queue=True
240
 
241
  # all below works but testing