Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
caption using an auxiliary space if on spaces
#3
by
multimodalart
HF Staff
- opened
- app.py +29 -17
- requirements.txt +0 -3
- requirements_local.txt +0 -2
app.py
CHANGED
|
@@ -4,18 +4,10 @@ from typing import Union
|
|
| 4 |
from huggingface_hub import whoami
|
| 5 |
is_spaces = True if os.environ.get("SPACE_ID") else False
|
| 6 |
is_canonical = True if os.environ.get("SPACE_ID") == "autotrain-projects/train-flux-lora-ease" else False
|
| 7 |
-
|
| 8 |
-
if is_spaces:
|
| 9 |
-
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
| 10 |
-
import spaces
|
| 11 |
|
| 12 |
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
| 13 |
import sys
|
| 14 |
|
| 15 |
-
from dotenv import load_dotenv
|
| 16 |
-
|
| 17 |
-
load_dotenv()
|
| 18 |
-
|
| 19 |
# Add the current working directory to the Python path
|
| 20 |
sys.path.insert(0, os.getcwd())
|
| 21 |
|
|
@@ -28,9 +20,13 @@ import shutil
|
|
| 28 |
import json
|
| 29 |
import yaml
|
| 30 |
from slugify import slugify
|
| 31 |
-
from transformers import AutoProcessor, AutoModelForCausalLM
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
if not is_spaces:
|
|
|
|
| 34 |
sys.path.insert(0, "ai-toolkit")
|
| 35 |
from toolkit.job import get_job
|
| 36 |
gr.OAuthProfile = None
|
|
@@ -38,7 +34,6 @@ if not is_spaces:
|
|
| 38 |
|
| 39 |
MAX_IMAGES = 150
|
| 40 |
|
| 41 |
-
|
| 42 |
def load_captioning(uploaded_files, concept_sentence):
|
| 43 |
uploaded_images = [file for file in uploaded_files if not file.endswith('.txt')]
|
| 44 |
txt_files = [file for file in uploaded_files if file.endswith('.txt')]
|
|
@@ -71,7 +66,6 @@ def load_captioning(uploaded_files, concept_sentence):
|
|
| 71 |
print(base_name)
|
| 72 |
print(image_value)
|
| 73 |
if base_name in txt_files_dict:
|
| 74 |
-
print("entrou")
|
| 75 |
with open(txt_files_dict[base_name], 'r') as file:
|
| 76 |
corresponding_caption = file.read()
|
| 77 |
|
|
@@ -112,13 +106,13 @@ def create_dataset(*inputs):
|
|
| 112 |
return destination_folder
|
| 113 |
|
| 114 |
|
| 115 |
-
def
|
| 116 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 117 |
torch_dtype = torch.float16
|
| 118 |
model = AutoModelForCausalLM.from_pretrained(
|
| 119 |
-
"
|
| 120 |
).to(device)
|
| 121 |
-
processor = AutoProcessor.from_pretrained("
|
| 122 |
|
| 123 |
captions = list(captions)
|
| 124 |
for i, image_path in enumerate(images):
|
|
@@ -147,8 +141,26 @@ def run_captioning(images, concept_sentence, *captions):
|
|
| 147 |
del model
|
| 148 |
del processor
|
| 149 |
|
| 150 |
-
|
| 151 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
|
| 153 |
def recursive_update(d, u):
|
| 154 |
for k, v in u.items():
|
|
@@ -548,7 +560,7 @@ with gr.Blocks(theme=theme, css=css) as demo:
|
|
| 548 |
outputs=progress_area,
|
| 549 |
)
|
| 550 |
|
| 551 |
-
do_captioning.click(fn=
|
| 552 |
demo.load(fn=swap_visibilty, outputs=main_ui)
|
| 553 |
|
| 554 |
if __name__ == "__main__":
|
|
|
|
| 4 |
from huggingface_hub import whoami
|
| 5 |
is_spaces = True if os.environ.get("SPACE_ID") else False
|
| 6 |
is_canonical = True if os.environ.get("SPACE_ID") == "autotrain-projects/train-flux-lora-ease" else False
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
| 9 |
import sys
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
# Add the current working directory to the Python path
|
| 12 |
sys.path.insert(0, os.getcwd())
|
| 13 |
|
|
|
|
| 20 |
import json
|
| 21 |
import yaml
|
| 22 |
from slugify import slugify
|
|
|
|
| 23 |
|
| 24 |
+
if is_spaces:
|
| 25 |
+
from gradio_client import Client, handle_file
|
| 26 |
+
client = Client("multimodalart/Florence-2-l4")
|
| 27 |
+
|
| 28 |
if not is_spaces:
|
| 29 |
+
from transformers import AutoProcessor, AutoModelForCausalLM
|
| 30 |
sys.path.insert(0, "ai-toolkit")
|
| 31 |
from toolkit.job import get_job
|
| 32 |
gr.OAuthProfile = None
|
|
|
|
| 34 |
|
| 35 |
MAX_IMAGES = 150
|
| 36 |
|
|
|
|
| 37 |
def load_captioning(uploaded_files, concept_sentence):
|
| 38 |
uploaded_images = [file for file in uploaded_files if not file.endswith('.txt')]
|
| 39 |
txt_files = [file for file in uploaded_files if file.endswith('.txt')]
|
|
|
|
| 66 |
print(base_name)
|
| 67 |
print(image_value)
|
| 68 |
if base_name in txt_files_dict:
|
|
|
|
| 69 |
with open(txt_files_dict[base_name], 'r') as file:
|
| 70 |
corresponding_caption = file.read()
|
| 71 |
|
|
|
|
| 106 |
return destination_folder
|
| 107 |
|
| 108 |
|
| 109 |
+
def run_captioning_local(images, concept_sentence, *captions):
|
| 110 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 111 |
torch_dtype = torch.float16
|
| 112 |
model = AutoModelForCausalLM.from_pretrained(
|
| 113 |
+
"multimodalart/Florence-2-large-no-flash-attn", torch_dtype=torch_dtype, trust_remote_code=True
|
| 114 |
).to(device)
|
| 115 |
+
processor = AutoProcessor.from_pretrained("multimodalart/Florence-2-large-no-flash-attn", trust_remote_code=True)
|
| 116 |
|
| 117 |
captions = list(captions)
|
| 118 |
for i, image_path in enumerate(images):
|
|
|
|
| 141 |
del model
|
| 142 |
del processor
|
| 143 |
|
| 144 |
+
def run_captioning_spaces(images, concept_sentence, *captions):
|
| 145 |
+
captions = list(captions)
|
| 146 |
+
for i, image_path in enumerate(images):
|
| 147 |
+
print(captions[i])
|
| 148 |
+
if isinstance(image_path, str): # If image is a file path
|
| 149 |
+
image = Image.open(image_path).convert("RGB")
|
| 150 |
+
|
| 151 |
+
answer = client.predict(
|
| 152 |
+
image=handle_file(image_path),
|
| 153 |
+
task_prompt="Detailed Caption",
|
| 154 |
+
text_input=None,
|
| 155 |
+
api_name="/process_image"
|
| 156 |
+
)[0].replace("'", '"')
|
| 157 |
+
parsed_answer = json.loads(answer)
|
| 158 |
+
caption_text = parsed_answer["<DETAILED_CAPTION>"].replace("The image shows ", "")
|
| 159 |
+
if concept_sentence:
|
| 160 |
+
caption_text = f"{caption_text} [trigger]"
|
| 161 |
+
captions[i] = caption_text
|
| 162 |
+
|
| 163 |
+
yield captions
|
| 164 |
|
| 165 |
def recursive_update(d, u):
|
| 166 |
for k, v in u.items():
|
|
|
|
| 560 |
outputs=progress_area,
|
| 561 |
)
|
| 562 |
|
| 563 |
+
do_captioning.click(fn=run_captioning_spaces if is_spaces else run_captioning_local, inputs=[images, concept_sentence] + caption_list, outputs=caption_list)
|
| 564 |
demo.load(fn=swap_visibilty, outputs=main_ui)
|
| 565 |
|
| 566 |
if __name__ == "__main__":
|
requirements.txt
CHANGED
|
@@ -1,7 +1,4 @@
|
|
| 1 |
-
transformers
|
| 2 |
-
accelerate
|
| 3 |
python-slugify
|
| 4 |
-
python-dotenv
|
| 5 |
einops
|
| 6 |
timm
|
| 7 |
autotrain-advanced
|
|
|
|
|
|
|
|
|
|
| 1 |
python-slugify
|
|
|
|
| 2 |
einops
|
| 3 |
timm
|
| 4 |
autotrain-advanced
|
requirements_local.txt
CHANGED
|
@@ -1,5 +1,3 @@
|
|
| 1 |
gradio
|
| 2 |
python-slugify
|
| 3 |
-
python-dotenv
|
| 4 |
-
flash-attn
|
| 5 |
huggingface_hub
|
|
|
|
| 1 |
gradio
|
| 2 |
python-slugify
|
|
|
|
|
|
|
| 3 |
huggingface_hub
|