Spaces:
Runtime error
Runtime error
Commit
Β·
5471e91
1
Parent(s):
f3e41d6
text to video
Browse files- app.py +4 -2
- awesome_chat.py +9 -4
- config.gradio.yaml +1 -1
- models_server.py +24 -24
app.py
CHANGED
|
@@ -115,7 +115,8 @@ with gr.Blocks() as demo:
|
|
| 115 |
openai_api_key = gr.Textbox(
|
| 116 |
show_label=False,
|
| 117 |
placeholder="Set your OpenAI API key here and press Enter",
|
| 118 |
-
lines=1
|
|
|
|
| 119 |
).style(container=False)
|
| 120 |
with gr.Column(scale=0.15, min_width=0):
|
| 121 |
btn1 = gr.Button("Submit").style(full_height=True)
|
|
@@ -125,7 +126,8 @@ with gr.Blocks() as demo:
|
|
| 125 |
hugging_face_token = gr.Textbox(
|
| 126 |
show_label=False,
|
| 127 |
placeholder="Set your Hugging Face Token here and press Enter",
|
| 128 |
-
lines=1
|
|
|
|
| 129 |
).style(container=False)
|
| 130 |
with gr.Column(scale=0.15, min_width=0):
|
| 131 |
btn3 = gr.Button("Submit").style(full_height=True)
|
|
|
|
| 115 |
openai_api_key = gr.Textbox(
|
| 116 |
show_label=False,
|
| 117 |
placeholder="Set your OpenAI API key here and press Enter",
|
| 118 |
+
lines=1,
|
| 119 |
+
type="password"
|
| 120 |
).style(container=False)
|
| 121 |
with gr.Column(scale=0.15, min_width=0):
|
| 122 |
btn1 = gr.Button("Submit").style(full_height=True)
|
|
|
|
| 126 |
hugging_face_token = gr.Textbox(
|
| 127 |
show_label=False,
|
| 128 |
placeholder="Set your Hugging Face Token here and press Enter",
|
| 129 |
+
lines=1,
|
| 130 |
+
type="password"
|
| 131 |
).style(container=False)
|
| 132 |
with gr.Column(scale=0.15, min_width=0):
|
| 133 |
btn3 = gr.Button("Submit").style(full_height=True)
|
awesome_chat.py
CHANGED
|
@@ -152,6 +152,8 @@ def send_request(data):
|
|
| 152 |
|
| 153 |
response = requests.post(endpoint, json=data, headers=HEADER, proxies=PROXY)
|
| 154 |
logger.debug(response.text.strip())
|
|
|
|
|
|
|
| 155 |
if use_completion:
|
| 156 |
return response.json()["choices"][0]["text"].strip()
|
| 157 |
else:
|
|
@@ -576,14 +578,14 @@ def model_inference(model_id, data, hosted_on, task, huggingfacetoken=None):
|
|
| 576 |
HUGGINGFACE_HEADERS = None
|
| 577 |
if hosted_on == "unknown":
|
| 578 |
r = status(model_id)
|
| 579 |
-
logger.debug("Local Server Status: " + str(r
|
| 580 |
-
if
|
| 581 |
hosted_on = "local"
|
| 582 |
else:
|
| 583 |
huggingfaceStatusUrl = f"https://api-inference.huggingface.co/status/{model_id}"
|
| 584 |
r = requests.get(huggingfaceStatusUrl, headers=HUGGINGFACE_HEADERS, proxies=PROXY)
|
| 585 |
logger.debug("Huggingface Status: " + str(r.json()))
|
| 586 |
-
if
|
| 587 |
hosted_on = "huggingface"
|
| 588 |
try:
|
| 589 |
if hosted_on == "local":
|
|
@@ -603,7 +605,7 @@ def get_model_status(model_id, url, headers, queue = None):
|
|
| 603 |
r = requests.get(url, headers=headers, proxies=PROXY)
|
| 604 |
else:
|
| 605 |
r = status(model_id)
|
| 606 |
-
if
|
| 607 |
if queue:
|
| 608 |
queue.put((model_id, True, endpoint_type))
|
| 609 |
return True
|
|
@@ -836,6 +838,9 @@ def chat_huggingface(messages, openaikey = None, huggingfacetoken = None, return
|
|
| 836 |
task_str = parse_task(context, input, openaikey).strip()
|
| 837 |
logger.info(task_str)
|
| 838 |
|
|
|
|
|
|
|
|
|
|
| 839 |
if task_str == "[]": # using LLM response for empty task
|
| 840 |
record_case(success=False, **{"input": input, "task": [], "reason": "task parsing fail: empty", "op": "chitchat"})
|
| 841 |
response = chitchat(messages, openaikey)
|
|
|
|
| 152 |
|
| 153 |
response = requests.post(endpoint, json=data, headers=HEADER, proxies=PROXY)
|
| 154 |
logger.debug(response.text.strip())
|
| 155 |
+
if "choices" not in response.json():
|
| 156 |
+
return response.json()
|
| 157 |
if use_completion:
|
| 158 |
return response.json()["choices"][0]["text"].strip()
|
| 159 |
else:
|
|
|
|
| 578 |
HUGGINGFACE_HEADERS = None
|
| 579 |
if hosted_on == "unknown":
|
| 580 |
r = status(model_id)
|
| 581 |
+
logger.debug("Local Server Status: " + str(r))
|
| 582 |
+
if "loaded" in r and r["loaded"]:
|
| 583 |
hosted_on = "local"
|
| 584 |
else:
|
| 585 |
huggingfaceStatusUrl = f"https://api-inference.huggingface.co/status/{model_id}"
|
| 586 |
r = requests.get(huggingfaceStatusUrl, headers=HUGGINGFACE_HEADERS, proxies=PROXY)
|
| 587 |
logger.debug("Huggingface Status: " + str(r.json()))
|
| 588 |
+
if "loaded" in r and r["loaded"]:
|
| 589 |
hosted_on = "huggingface"
|
| 590 |
try:
|
| 591 |
if hosted_on == "local":
|
|
|
|
| 605 |
r = requests.get(url, headers=headers, proxies=PROXY)
|
| 606 |
else:
|
| 607 |
r = status(model_id)
|
| 608 |
+
if "loaded" in r and r["loaded"]:
|
| 609 |
if queue:
|
| 610 |
queue.put((model_id, True, endpoint_type))
|
| 611 |
return True
|
|
|
|
| 838 |
task_str = parse_task(context, input, openaikey).strip()
|
| 839 |
logger.info(task_str)
|
| 840 |
|
| 841 |
+
if "error" in task_str:
|
| 842 |
+
return {"message": "You exceeded your current quota, please check your plan and billing details."}
|
| 843 |
+
|
| 844 |
if task_str == "[]": # using LLM response for empty task
|
| 845 |
record_case(success=False, **{"input": input, "task": [], "reason": "task parsing fail: empty", "op": "chitchat"})
|
| 846 |
response = chitchat(messages, openaikey)
|
config.gradio.yaml
CHANGED
|
@@ -8,7 +8,7 @@ log_file: logs/debug.log
|
|
| 8 |
model: text-davinci-003 # text-davinci-003
|
| 9 |
use_completion: true
|
| 10 |
inference_mode: hybrid # local, huggingface or hybrid
|
| 11 |
-
local_deployment:
|
| 12 |
num_candidate_models: 5
|
| 13 |
max_description_length: 100
|
| 14 |
proxy:
|
|
|
|
| 8 |
model: text-davinci-003 # text-davinci-003
|
| 9 |
use_completion: true
|
| 10 |
inference_mode: hybrid # local, huggingface or hybrid
|
| 11 |
+
local_deployment: full # minimal, standard or full
|
| 12 |
num_candidate_models: 5
|
| 13 |
max_description_length: 100
|
| 14 |
proxy:
|
models_server.py
CHANGED
|
@@ -78,9 +78,9 @@ def load_pipes(local_deployment):
|
|
| 78 |
if local_deployment in ["full"]:
|
| 79 |
other_pipes = {
|
| 80 |
"nlpconnect/vit-gpt2-image-captioning":{
|
| 81 |
-
"model": VisionEncoderDecoderModel.from_pretrained(f"nlpconnect/vit-gpt2-image-captioning"),
|
| 82 |
-
"feature_extractor": ViTImageProcessor.from_pretrained(f"nlpconnect/vit-gpt2-image-captioning"),
|
| 83 |
-
"tokenizer": AutoTokenizer.from_pretrained(f"nlpconnect/vit-gpt2-image-captioning"),
|
| 84 |
"device": "cuda:0"
|
| 85 |
},
|
| 86 |
# "Salesforce/blip-image-captioning-large": {
|
|
@@ -89,7 +89,7 @@ def load_pipes(local_deployment):
|
|
| 89 |
# "device": "cuda:0"
|
| 90 |
# },
|
| 91 |
"damo-vilab/text-to-video-ms-1.7b": {
|
| 92 |
-
"model": DiffusionPipeline.from_pretrained(f"damo-vilab/text-to-video-ms-1.7b", torch_dtype=torch.float16, variant="fp16"),
|
| 93 |
"device": "cuda:0"
|
| 94 |
},
|
| 95 |
# "facebook/maskformer-swin-large-ade": {
|
|
@@ -112,11 +112,11 @@ def load_pipes(local_deployment):
|
|
| 112 |
"device": "cuda:0"
|
| 113 |
},
|
| 114 |
"espnet/kan-bayashi_ljspeech_vits": {
|
| 115 |
-
"model": Text2Speech.from_pretrained(
|
| 116 |
"device": "cuda:0"
|
| 117 |
},
|
| 118 |
"lambdalabs/sd-image-variations-diffusers": {
|
| 119 |
-
"model": DiffusionPipeline.from_pretrained(f"lambdalabs/sd-image-variations-diffusers"), #torch_dtype=torch.float16
|
| 120 |
"device": "cuda:0"
|
| 121 |
},
|
| 122 |
# "CompVis/stable-diffusion-v1-4": {
|
|
@@ -128,7 +128,7 @@ def load_pipes(local_deployment):
|
|
| 128 |
# "device": "cuda:0"
|
| 129 |
# },
|
| 130 |
"runwayml/stable-diffusion-v1-5": {
|
| 131 |
-
"model": DiffusionPipeline.from_pretrained(f"runwayml/stable-diffusion-v1-5"),
|
| 132 |
"device": "cuda:0"
|
| 133 |
},
|
| 134 |
# "microsoft/speecht5_tts":{
|
|
@@ -143,10 +143,10 @@ def load_pipes(local_deployment):
|
|
| 143 |
# "device": "cuda:0"
|
| 144 |
# },
|
| 145 |
"microsoft/speecht5_vc":{
|
| 146 |
-
"processor": SpeechT5Processor.from_pretrained(f"microsoft/speecht5_vc"),
|
| 147 |
-
"model": SpeechT5ForSpeechToSpeech.from_pretrained(f"microsoft/speecht5_vc"),
|
| 148 |
-
"vocoder": SpeechT5HifiGan.from_pretrained(f"microsoft/speecht5_hifigan"),
|
| 149 |
-
"embeddings_dataset": load_dataset(f"Matthijs/cmu-arctic-xvectors", split="validation"),
|
| 150 |
"device": "cuda:0"
|
| 151 |
},
|
| 152 |
# "julien-c/wine-quality": {
|
|
@@ -158,13 +158,13 @@ def load_pipes(local_deployment):
|
|
| 158 |
# "device": "cuda:0"
|
| 159 |
# },
|
| 160 |
"facebook/maskformer-swin-base-coco": {
|
| 161 |
-
"feature_extractor": MaskFormerFeatureExtractor.from_pretrained(f"facebook/maskformer-swin-base-coco"),
|
| 162 |
-
"model": MaskFormerForInstanceSegmentation.from_pretrained(f"facebook/maskformer-swin-base-coco"),
|
| 163 |
"device": "cuda:0"
|
| 164 |
},
|
| 165 |
"Intel/dpt-hybrid-midas": {
|
| 166 |
-
"model": DPTForDepthEstimation.from_pretrained(f"Intel/dpt-hybrid-midas", low_cpu_mem_usage=True),
|
| 167 |
-
"feature_extractor": DPTFeatureExtractor.from_pretrained(f"Intel/dpt-hybrid-midas"),
|
| 168 |
"device": "cuda:0"
|
| 169 |
}
|
| 170 |
}
|
|
@@ -176,15 +176,15 @@ def load_pipes(local_deployment):
|
|
| 176 |
# "device": "cuda:0"
|
| 177 |
# },
|
| 178 |
"openai/whisper-base": {
|
| 179 |
-
"model": pipeline(task="automatic-speech-recognition", model=f"openai/whisper-base"),
|
| 180 |
"device": "cuda:0"
|
| 181 |
},
|
| 182 |
"microsoft/speecht5_asr": {
|
| 183 |
-
"model": pipeline(task="automatic-speech-recognition", model=f"microsoft/speecht5_asr"),
|
| 184 |
"device": "cuda:0"
|
| 185 |
},
|
| 186 |
"Intel/dpt-large": {
|
| 187 |
-
"model": pipeline(task="depth-estimation", model=f"Intel/dpt-large"),
|
| 188 |
"device": "cuda:0"
|
| 189 |
},
|
| 190 |
# "microsoft/beit-base-patch16-224-pt22k-ft22k": {
|
|
@@ -192,11 +192,11 @@ def load_pipes(local_deployment):
|
|
| 192 |
# "device": "cuda:0"
|
| 193 |
# },
|
| 194 |
"facebook/detr-resnet-50-panoptic": {
|
| 195 |
-
"model": pipeline(task="image-segmentation", model=f"facebook/detr-resnet-50-panoptic"),
|
| 196 |
"device": "cuda:0"
|
| 197 |
},
|
| 198 |
"facebook/detr-resnet-101": {
|
| 199 |
-
"model": pipeline(task="object-detection", model=f"facebook/detr-resnet-101"),
|
| 200 |
"device": "cuda:0"
|
| 201 |
},
|
| 202 |
# "openai/clip-vit-large-patch14": {
|
|
@@ -204,7 +204,7 @@ def load_pipes(local_deployment):
|
|
| 204 |
# "device": "cuda:0"
|
| 205 |
# },
|
| 206 |
"google/owlvit-base-patch32": {
|
| 207 |
-
"model": pipeline(task="zero-shot-object-detection", model=f"google/owlvit-base-patch32"),
|
| 208 |
"device": "cuda:0"
|
| 209 |
},
|
| 210 |
# "microsoft/DialoGPT-medium": {
|
|
@@ -248,15 +248,15 @@ def load_pipes(local_deployment):
|
|
| 248 |
# "device": "cuda:0"
|
| 249 |
# },
|
| 250 |
"impira/layoutlm-document-qa": {
|
| 251 |
-
"model": pipeline(task="document-question-answering", model=f"impira/layoutlm-document-qa"),
|
| 252 |
"device": "cuda:0"
|
| 253 |
},
|
| 254 |
"ydshieh/vit-gpt2-coco-en": {
|
| 255 |
-
"model": pipeline(task="image-to-text", model=f"ydshieh/vit-gpt2-coco-en"),
|
| 256 |
"device": "cuda:0"
|
| 257 |
},
|
| 258 |
"dandelin/vilt-b32-finetuned-vqa": {
|
| 259 |
-
"model": pipeline(task="visual-question-answering", model=f"dandelin/vilt-b32-finetuned-vqa"),
|
| 260 |
"device": "cuda:0"
|
| 261 |
}
|
| 262 |
}
|
|
|
|
| 78 |
if local_deployment in ["full"]:
|
| 79 |
other_pipes = {
|
| 80 |
"nlpconnect/vit-gpt2-image-captioning":{
|
| 81 |
+
"model": VisionEncoderDecoderModel.from_pretrained(f"{local_models}nlpconnect/vit-gpt2-image-captioning"),
|
| 82 |
+
"feature_extractor": ViTImageProcessor.from_pretrained(f"{local_models}nlpconnect/vit-gpt2-image-captioning"),
|
| 83 |
+
"tokenizer": AutoTokenizer.from_pretrained(f"{local_models}nlpconnect/vit-gpt2-image-captioning"),
|
| 84 |
"device": "cuda:0"
|
| 85 |
},
|
| 86 |
# "Salesforce/blip-image-captioning-large": {
|
|
|
|
| 89 |
# "device": "cuda:0"
|
| 90 |
# },
|
| 91 |
"damo-vilab/text-to-video-ms-1.7b": {
|
| 92 |
+
"model": DiffusionPipeline.from_pretrained(f"{local_models}damo-vilab/text-to-video-ms-1.7b", torch_dtype=torch.float16, variant="fp16"),
|
| 93 |
"device": "cuda:0"
|
| 94 |
},
|
| 95 |
# "facebook/maskformer-swin-large-ade": {
|
|
|
|
| 112 |
"device": "cuda:0"
|
| 113 |
},
|
| 114 |
"espnet/kan-bayashi_ljspeech_vits": {
|
| 115 |
+
"model": Text2Speech.from_pretrained("espnet/kan-bayashi_ljspeech_vits"),
|
| 116 |
"device": "cuda:0"
|
| 117 |
},
|
| 118 |
"lambdalabs/sd-image-variations-diffusers": {
|
| 119 |
+
"model": DiffusionPipeline.from_pretrained(f"{local_models}lambdalabs/sd-image-variations-diffusers"), #torch_dtype=torch.float16
|
| 120 |
"device": "cuda:0"
|
| 121 |
},
|
| 122 |
# "CompVis/stable-diffusion-v1-4": {
|
|
|
|
| 128 |
# "device": "cuda:0"
|
| 129 |
# },
|
| 130 |
"runwayml/stable-diffusion-v1-5": {
|
| 131 |
+
"model": DiffusionPipeline.from_pretrained(f"{local_models}runwayml/stable-diffusion-v1-5"),
|
| 132 |
"device": "cuda:0"
|
| 133 |
},
|
| 134 |
# "microsoft/speecht5_tts":{
|
|
|
|
| 143 |
# "device": "cuda:0"
|
| 144 |
# },
|
| 145 |
"microsoft/speecht5_vc":{
|
| 146 |
+
"processor": SpeechT5Processor.from_pretrained(f"{local_models}microsoft/speecht5_vc"),
|
| 147 |
+
"model": SpeechT5ForSpeechToSpeech.from_pretrained(f"{local_models}microsoft/speecht5_vc"),
|
| 148 |
+
"vocoder": SpeechT5HifiGan.from_pretrained(f"{local_models}microsoft/speecht5_hifigan"),
|
| 149 |
+
"embeddings_dataset": load_dataset(f"{local_models}Matthijs/cmu-arctic-xvectors", split="validation"),
|
| 150 |
"device": "cuda:0"
|
| 151 |
},
|
| 152 |
# "julien-c/wine-quality": {
|
|
|
|
| 158 |
# "device": "cuda:0"
|
| 159 |
# },
|
| 160 |
"facebook/maskformer-swin-base-coco": {
|
| 161 |
+
"feature_extractor": MaskFormerFeatureExtractor.from_pretrained(f"{local_models}facebook/maskformer-swin-base-coco"),
|
| 162 |
+
"model": MaskFormerForInstanceSegmentation.from_pretrained(f"{local_models}facebook/maskformer-swin-base-coco"),
|
| 163 |
"device": "cuda:0"
|
| 164 |
},
|
| 165 |
"Intel/dpt-hybrid-midas": {
|
| 166 |
+
"model": DPTForDepthEstimation.from_pretrained(f"{local_models}Intel/dpt-hybrid-midas", low_cpu_mem_usage=True),
|
| 167 |
+
"feature_extractor": DPTFeatureExtractor.from_pretrained(f"{local_models}Intel/dpt-hybrid-midas"),
|
| 168 |
"device": "cuda:0"
|
| 169 |
}
|
| 170 |
}
|
|
|
|
| 176 |
# "device": "cuda:0"
|
| 177 |
# },
|
| 178 |
"openai/whisper-base": {
|
| 179 |
+
"model": pipeline(task="automatic-speech-recognition", model=f"{local_models}openai/whisper-base"),
|
| 180 |
"device": "cuda:0"
|
| 181 |
},
|
| 182 |
"microsoft/speecht5_asr": {
|
| 183 |
+
"model": pipeline(task="automatic-speech-recognition", model=f"{local_models}microsoft/speecht5_asr"),
|
| 184 |
"device": "cuda:0"
|
| 185 |
},
|
| 186 |
"Intel/dpt-large": {
|
| 187 |
+
"model": pipeline(task="depth-estimation", model=f"{local_models}Intel/dpt-large"),
|
| 188 |
"device": "cuda:0"
|
| 189 |
},
|
| 190 |
# "microsoft/beit-base-patch16-224-pt22k-ft22k": {
|
|
|
|
| 192 |
# "device": "cuda:0"
|
| 193 |
# },
|
| 194 |
"facebook/detr-resnet-50-panoptic": {
|
| 195 |
+
"model": pipeline(task="image-segmentation", model=f"{local_models}facebook/detr-resnet-50-panoptic"),
|
| 196 |
"device": "cuda:0"
|
| 197 |
},
|
| 198 |
"facebook/detr-resnet-101": {
|
| 199 |
+
"model": pipeline(task="object-detection", model=f"{local_models}facebook/detr-resnet-101"),
|
| 200 |
"device": "cuda:0"
|
| 201 |
},
|
| 202 |
# "openai/clip-vit-large-patch14": {
|
|
|
|
| 204 |
# "device": "cuda:0"
|
| 205 |
# },
|
| 206 |
"google/owlvit-base-patch32": {
|
| 207 |
+
"model": pipeline(task="zero-shot-object-detection", model=f"{local_models}google/owlvit-base-patch32"),
|
| 208 |
"device": "cuda:0"
|
| 209 |
},
|
| 210 |
# "microsoft/DialoGPT-medium": {
|
|
|
|
| 248 |
# "device": "cuda:0"
|
| 249 |
# },
|
| 250 |
"impira/layoutlm-document-qa": {
|
| 251 |
+
"model": pipeline(task="document-question-answering", model=f"{local_models}impira/layoutlm-document-qa"),
|
| 252 |
"device": "cuda:0"
|
| 253 |
},
|
| 254 |
"ydshieh/vit-gpt2-coco-en": {
|
| 255 |
+
"model": pipeline(task="image-to-text", model=f"{local_models}ydshieh/vit-gpt2-coco-en"),
|
| 256 |
"device": "cuda:0"
|
| 257 |
},
|
| 258 |
"dandelin/vilt-b32-finetuned-vqa": {
|
| 259 |
+
"model": pipeline(task="visual-question-answering", model=f"{local_models}dandelin/vilt-b32-finetuned-vqa"),
|
| 260 |
"device": "cuda:0"
|
| 261 |
}
|
| 262 |
}
|