un-index commited on
Commit
cbabcb5
·
1 Parent(s): 85f4499
Files changed (1) hide show
  1. app.py +36 -44
app.py CHANGED
@@ -1,5 +1,4 @@
1
 
2
- import os
3
  from random import randint
4
  from transformers import pipeline, set_seed
5
  import requests
@@ -43,13 +42,13 @@ title = "text generator based on GPT models"
43
 
44
  examples = [
45
  # another machine learning example
46
- [["For today's homework assignment, please describe the reasons for the US Civil War."],
47
- 0.8, 0.9, 50, "GPT2"],
48
  [["In a shocking discovery, scientists have found a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English."], 0.8, 0.9, 50, "GPT2"],
49
- [["The first step in the process of developing a new language is to invent a new word."],
50
- 0.8, 0.9, 50, "GPT2"],
51
  ]
52
 
 
 
53
 
54
  # check if api.vicgalle.net:5000/generate is down with timeout of 10 seconds
55
  def is_up(url):
@@ -61,6 +60,7 @@ def is_up(url):
61
 
62
  # gpt_j_api_down = False
63
 
 
64
 
65
  API_URL = "https://api-inference.huggingface.co/models/EleutherAI/gpt-j-6B"
66
  main_gpt_j_api_up = is_up(API_URL)
@@ -73,7 +73,7 @@ if not main_gpt_j_api_up:
73
  headers = {"Authorization": f"Bearer {os.environ['API_TOKEN']}"}
74
 
75
  # NOTE see build logs here: https://huggingface.co/spaces/un-index/textgen6b/logs/build
76
-
77
 
78
  def get_generated_text(generated_text):
79
  try:
@@ -83,13 +83,13 @@ def get_generated_text(generated_text):
83
  return generated_text[0][0]['generated_text']
84
  except:
85
  # recursively loop through generated_text till we get the text
86
- # don't know if this will work
87
  for gt in generated_text:
88
  if 'generated_text' in gt:
89
  return gt['generated_text']
90
  else:
91
  return get_generated_text(gt)
92
- # return generated_text
93
 
94
 
95
  def f(context, temperature, top_p, max_length, model_idx, SPACE_VERIFICATION_KEY):
@@ -106,24 +106,21 @@ def f(context, temperature, top_p, max_length, model_idx, SPACE_VERIFICATION_KEY
106
  # that would fetch results in chunks of 250
107
  # NOTE change so it uses previous generated input every time
108
  # _context = context
109
- generated_text = "" # context #""
110
  while (max_length > 0):
111
  # context becomes the previous generated context
112
  # NOTE I've set return_full_text to false, see how this plays out
113
- payload = {"inputs": context, "parameters": {"max_new_tokens": max_length >
114
- 250 and 250 or max_length, "temperature": temperature, "top_p": top_p}}
115
- response = requests.request(
116
- "POST", API_URL, data=json.dumps(payload), headers=headers)
117
- context = json.loads(response.content.decode(
118
- "utf-8")) # [0]['generated_text']
119
  # context = get_generated_text(generated_context)
120
-
121
  # handle inconsistent inference API
122
  # if 'generated_text' in context[0]:
123
  # context = context[0]['generated_text']
124
  # else:
125
  # context = context[0][0]['generated_text']
126
-
127
  context = get_generated_text(context)
128
 
129
  generated_text += context
@@ -134,14 +131,14 @@ def f(context, temperature, top_p, max_length, model_idx, SPACE_VERIFICATION_KEY
134
  # data = json.dumps(payload)
135
  # response = requests.request("POST", API_URL, data=data, headers=headers)
136
  # generated_text = json.loads(response.content.decode("utf-8"))[0]['generated_text']
137
- return generated_text # context #_context+generated_text
138
 
139
  # use secondary gpt-j-6B api, as the main one is down
140
  if not secondary_gpt_j_api_up:
141
  return "ERR: both GPT-J-6B APIs are down, please try again later (will use a third fallback in the future)"
142
 
143
  # use fallback API
144
- #
145
  # http://api.vicgalle.net:5000/docs#/default/generate_generate_post
146
  # https://pythonrepo.com/repo/vicgalle-gpt-j-api-python-natural-language-processing
147
 
@@ -152,7 +149,7 @@ def f(context, temperature, top_p, max_length, model_idx, SPACE_VERIFICATION_KEY
152
  "top_p": top_p,
153
  "max_time": 120.0
154
  }
155
-
156
  response = requests.post(
157
  "http://api.vicgalle.net:5000/generate", params=payload).json()
158
  return response['text']
@@ -164,7 +161,7 @@ def f(context, temperature, top_p, max_length, model_idx, SPACE_VERIFICATION_KEY
164
  except Exception as e:
165
  return "Exception while setting seed: " + str(e)
166
  # return sequences specifies how many to return
167
-
168
  # for some reson indexing with 'generated-text' doesn't work
169
  # edit: maybe because I was using generated-text, not generated_text (note the underscore in the second)
170
  # try:
@@ -173,41 +170,38 @@ def f(context, temperature, top_p, max_length, model_idx, SPACE_VERIFICATION_KEY
173
  # NOTE after exactly 60 seconds the fn function seems to error: https://discuss.huggingface.co/t/gradio-fn-function-errors-whenever-60-seconds-passed/13048
174
  # todo fix max_length below, maybe there is a max_new_tokens parameter
175
  # try max_length=len(context)+max_length or =len(context)+max_length or make max_length inf or unspecified
176
- # note: added max_new_tokens parameter to see whether it actually works, if not remove,
177
  # TODO if yes, then make max_length infinite because it seems to be counted as max input length, not output
178
  # NOTE max_new_tokens does not seem to generate that many tokens
179
  # however in the source that's what's used
180
  # NOTE I think max_new_tokens is working now and punctuation characters count too
181
  # NOTE set max_length to max_length to allow input text of any size
182
- generated_text = generator(context, max_length=896, max_new_tokens=max_length,
183
- top_p=top_p, temperature=temperature, num_return_sequences=1)
184
  except Exception as e:
185
  return "Exception while generating text: " + str(e)
186
  # [0][0]['generated_text']
187
 
188
  return get_generated_text(generated_text)
189
-
190
- # was error due to timeout because of not enabling queue in gradio interface?
191
- # if it works right now, then that was the reason for the JSON parsing error
192
- # except:
193
- # generated_text = generator(context, max_length=max_length, top_p=top_p, temperature=temperature, num_return_sequences=1)[0]
194
-
195
  # return generated_text
196
  # args found in the source: https://github.com/huggingface/transformers/blob/27b3031de2fb8195dec9bc2093e3e70bdb1c4bff/src/transformers/generation_tf_utils.py#L348-L376
197
 
198
- # TODO use fallback gpt-2 inference api for this as well
199
  # TODO or just make it an option in the menu "GPT-2 inference"
200
- else:
201
  API_URL = "https://api-inference.huggingface.co/models/distilgpt2"
202
- generated_text = ""
203
  while (max_length > 0):
204
  # NOTE see original implementation above for gpt-J-6B
205
- payload = {"inputs": context, "parameters": {"max_new_tokens": max_length >
206
- 250 and 250 or max_length, "temperature": temperature, "top_p": top_p}}
207
- response = requests.request(
208
- "POST", API_URL, data=json.dumps(payload), headers=headers)
209
  context = json.loads(response.content.decode("utf-8"))
210
-
211
  context = get_generated_text(context)
212
 
213
  generated_text += context
@@ -218,7 +212,7 @@ def f(context, temperature, top_p, max_length, model_idx, SPACE_VERIFICATION_KEY
218
  # data = json.dumps(payload)
219
  # response = requests.request("POST", API_URL, data=data, headers=headers)
220
  # generated_text = json.loads(response.content.decode("utf-8"))[0]['generated_text']
221
- return generated_text # context #_context+generated_text
222
 
223
  except Exception as e:
224
  return f"error with idx{model_idx}: "+str(e)
@@ -230,12 +224,10 @@ iface = gr.Interface(f, [
230
  top_p,
231
  gr.inputs.Slider(
232
  minimum=20, maximum=512, default=30, label="max length"),
233
- gr.inputs.Dropdown(["GPT-J-6B", "GPT2", "DistilGPT2"],
234
- type="index", label="model"),
235
- gr.inputs.Textbox(lines=1, placeholder="xxxxxxxx",
236
- label="space verification key")
237
 
238
- ], outputs="text", title=title, examples=examples, enable_queue=True) # deprecated iwthin iface.launch: https://discuss.huggingface.co/t/is-there-a-timeout-max-runtime-for-spaces/12979/3?u=un-index
239
  iface.launch() # enable_queue=True
240
 
241
  # all below works but testing
 
1
 
 
2
  from random import randint
3
  from transformers import pipeline, set_seed
4
  import requests
 
42
 
43
  examples = [
44
  # another machine learning example
45
+ [["For today's homework assignment, please describe the reasons for the US Civil War."], 0.8, 0.9, 50, "GPT2"],
 
46
  [["In a shocking discovery, scientists have found a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English."], 0.8, 0.9, 50, "GPT2"],
47
+ [["The first step in the process of developing a new language is to invent a new word."], 0.8, 0.9, 50, "GPT2"],
 
48
  ]
49
 
50
+
51
+
52
 
53
  # check if api.vicgalle.net:5000/generate is down with timeout of 10 seconds
54
  def is_up(url):
 
60
 
61
  # gpt_j_api_down = False
62
 
63
+ import os
64
 
65
  API_URL = "https://api-inference.huggingface.co/models/EleutherAI/gpt-j-6B"
66
  main_gpt_j_api_up = is_up(API_URL)
 
73
  headers = {"Authorization": f"Bearer {os.environ['API_TOKEN']}"}
74
 
75
  # NOTE see build logs here: https://huggingface.co/spaces/un-index/textgen6b/logs/build
76
+
77
 
78
  def get_generated_text(generated_text):
79
  try:
 
83
  return generated_text[0][0]['generated_text']
84
  except:
85
  # recursively loop through generated_text till we get the text
86
+ # don't know if this will work
87
  for gt in generated_text:
88
  if 'generated_text' in gt:
89
  return gt['generated_text']
90
  else:
91
  return get_generated_text(gt)
92
+ # return generated_text
93
 
94
 
95
  def f(context, temperature, top_p, max_length, model_idx, SPACE_VERIFICATION_KEY):
 
106
  # that would fetch results in chunks of 250
107
  # NOTE change so it uses previous generated input every time
108
  # _context = context
109
+ generated_text = ""#context #""
110
  while (max_length > 0):
111
  # context becomes the previous generated context
112
  # NOTE I've set return_full_text to false, see how this plays out
113
+ payload = {"inputs": context, "parameters": {"max_new_tokens": max_length>250 and 250 or max_length, "temperature": temperature, "top_p": top_p}}
114
+ response = requests.request("POST", API_URL, data=json.dumps(payload), headers=headers)
115
+ context = json.loads(response.content.decode("utf-8"))#[0]['generated_text']
 
 
 
116
  # context = get_generated_text(generated_context)
117
+
118
  # handle inconsistent inference API
119
  # if 'generated_text' in context[0]:
120
  # context = context[0]['generated_text']
121
  # else:
122
  # context = context[0][0]['generated_text']
123
+
124
  context = get_generated_text(context)
125
 
126
  generated_text += context
 
131
  # data = json.dumps(payload)
132
  # response = requests.request("POST", API_URL, data=data, headers=headers)
133
  # generated_text = json.loads(response.content.decode("utf-8"))[0]['generated_text']
134
+ return generated_text#context #_context+generated_text
135
 
136
  # use secondary gpt-j-6B api, as the main one is down
137
  if not secondary_gpt_j_api_up:
138
  return "ERR: both GPT-J-6B APIs are down, please try again later (will use a third fallback in the future)"
139
 
140
  # use fallback API
141
+ #
142
  # http://api.vicgalle.net:5000/docs#/default/generate_generate_post
143
  # https://pythonrepo.com/repo/vicgalle-gpt-j-api-python-natural-language-processing
144
 
 
149
  "top_p": top_p,
150
  "max_time": 120.0
151
  }
152
+
153
  response = requests.post(
154
  "http://api.vicgalle.net:5000/generate", params=payload).json()
155
  return response['text']
 
161
  except Exception as e:
162
  return "Exception while setting seed: " + str(e)
163
  # return sequences specifies how many to return
164
+
165
  # for some reson indexing with 'generated-text' doesn't work
166
  # edit: maybe because I was using generated-text, not generated_text (note the underscore in the second)
167
  # try:
 
170
  # NOTE after exactly 60 seconds the fn function seems to error: https://discuss.huggingface.co/t/gradio-fn-function-errors-whenever-60-seconds-passed/13048
171
  # todo fix max_length below, maybe there is a max_new_tokens parameter
172
  # try max_length=len(context)+max_length or =len(context)+max_length or make max_length inf or unspecified
173
+ # note: added max_new_tokens parameter to see whether it actually works, if not remove,
174
  # TODO if yes, then make max_length infinite because it seems to be counted as max input length, not output
175
  # NOTE max_new_tokens does not seem to generate that many tokens
176
  # however in the source that's what's used
177
  # NOTE I think max_new_tokens is working now and punctuation characters count too
178
  # NOTE set max_length to max_length to allow input text of any size
179
+ generated_text = generator(context, max_length=896, max_new_tokens=max_length, top_p=top_p, temperature=temperature, num_return_sequences=1)
 
180
  except Exception as e:
181
  return "Exception while generating text: " + str(e)
182
  # [0][0]['generated_text']
183
 
184
  return get_generated_text(generated_text)
185
+
186
+ # was error due to timeout because of not enabling queue in gradio interface?
187
+ # if it works right now, then that was the reason for the JSON parsing error
188
+ # except:
189
+ # generated_text = generator(context, max_length=max_length, top_p=top_p, temperature=temperature, num_return_sequences=1)[0]
190
+
191
  # return generated_text
192
  # args found in the source: https://github.com/huggingface/transformers/blob/27b3031de2fb8195dec9bc2093e3e70bdb1c4bff/src/transformers/generation_tf_utils.py#L348-L376
193
 
194
+ # TODO use fallback gpt-2 inference api for this as well
195
  # TODO or just make it an option in the menu "GPT-2 inference"
196
+ else:
197
  API_URL = "https://api-inference.huggingface.co/models/distilgpt2"
198
+ generated_text=""
199
  while (max_length > 0):
200
  # NOTE see original implementation above for gpt-J-6B
201
+ payload = {"inputs": context, "parameters": {"max_new_tokens": 250, "temperature": temperature, "top_p": top_p}}
202
+ response = requests.request("POST", API_URL, data=json.dumps(payload), headers=headers)
 
 
203
  context = json.loads(response.content.decode("utf-8"))
204
+
205
  context = get_generated_text(context)
206
 
207
  generated_text += context
 
212
  # data = json.dumps(payload)
213
  # response = requests.request("POST", API_URL, data=data, headers=headers)
214
  # generated_text = json.loads(response.content.decode("utf-8"))[0]['generated_text']
215
+ return generated_text#context #_context+generated_text
216
 
217
  except Exception as e:
218
  return f"error with idx{model_idx}: "+str(e)
 
224
  top_p,
225
  gr.inputs.Slider(
226
  minimum=20, maximum=512, default=30, label="max length"),
227
+ gr.inputs.Dropdown(["GPT-J-6B", "GPT2", "DistilGPT2"], type="index", label="model"),
228
+ gr.inputs.Textbox(lines=1, placeholder="xxxxxxxx", label="space verification key")
 
 
229
 
230
+ ], outputs="text", title=title, examples=examples, enable_queue = True) # deprecated iwthin iface.launch: https://discuss.huggingface.co/t/is-there-a-timeout-max-runtime-for-spaces/12979/3?u=un-index
231
  iface.launch() # enable_queue=True
232
 
233
  # all below works but testing