Spaces:

MusIre
/

Dissertation

Sleeping

App Files Files Community

MusIre commited on Jan 14

Commit

67bbe81

verified ·

1 Parent(s): bf1dc6a

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -6

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ import torch.nn as nn
 from sklearn.metrics import classification_report
 from torch.optim.lr_scheduler import ReduceLROnPlateau
 import gradio as gr
 # Device setup
 device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
@@ -25,7 +26,7 @@ data_transforms = transforms.Compose([
 # Load datasets for enriched prompts
 dataset_desc = pd.read_csv("dataset_desc.csv", delimiter=';', usecols=['Artists', 'Style', 'Description'])
 dataset_desc.columns = dataset_desc.columns.str.lower()
-style_desc = pd.read_csv("style_desc.csv", delimiter=';')  # CSV containing style-specific descriptions
 style_desc.columns = style_desc.columns.str.lower()
 # Function to enrich prompts with custom data
@@ -91,8 +92,7 @@ model_name = "EleutherAI/gpt-neo-1.3B"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model_gptneo = AutoModelForCausalLM.from_pretrained(model_name).to(device)
-def generate_description(image_path):
-    image = Image.open(image_path).convert("RGB")
     image_resnet = data_transforms(image).unsqueeze(0).to(device)
     model_resnet.eval()
@@ -112,13 +112,18 @@ def generate_description(image_path):
         "Describe its distinctive features, considering both the artist's techniques and the artistic style."
     )
-    input_ids = tokenizer.encode(full_prompt, return_tensors="pt").to(device)
     output = model_gptneo.generate(
         input_ids=input_ids,
         max_length=300,
         temperature=0.7,
         top_p=0.9,
-        repetition_penalty=1.2
     )
     description_text = tokenizer.decode(output[0], skip_special_tokens=True)
@@ -127,12 +132,20 @@ def generate_description(image_path):
 # Gradio interface
 def gradio_interface(image):
     predicted_style, predicted_artist, description = generate_description(image)
     return f"Predicted Style: {predicted_style}\nPredicted Artist: {predicted_artist}\n\nDescription:\n{description}"
 iface = gr.Interface(
     fn=gradio_interface,
-    inputs=gr.Image(type="filepath"),
     outputs="text",
     title="AI Artwork Analysis",
     description="Upload an image to predict its artistic style and creator, and generate a detailed description."

 from sklearn.metrics import classification_report
 from torch.optim.lr_scheduler import ReduceLROnPlateau
 import gradio as gr
+from io import BytesIO
 # Device setup
 device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
 # Load datasets for enriched prompts
 dataset_desc = pd.read_csv("dataset_desc.csv", delimiter=';', usecols=['Artists', 'Style', 'Description'])
 dataset_desc.columns = dataset_desc.columns.str.lower()
+style_desc = pd.read_csv("style_desc.csv", delimiter=';')
 style_desc.columns = style_desc.columns.str.lower()
 # Function to enrich prompts with custom data
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model_gptneo = AutoModelForCausalLM.from_pretrained(model_name).to(device)
+def generate_description(image):
     image_resnet = data_transforms(image).unsqueeze(0).to(device)
     model_resnet.eval()
         "Describe its distinctive features, considering both the artist's techniques and the artistic style."
     )
+    input_ids = tokenizer.encode(full_prompt, return_tensors="pt", padding=True).to(device)
+    attention_mask = input_ids != tokenizer.pad_token_id
     output = model_gptneo.generate(
         input_ids=input_ids,
+        attention_mask=attention_mask,
         max_length=300,
         temperature=0.7,
         top_p=0.9,
+        repetition_penalty=1.2,
+        do_sample=True,
+        pad_token_id=tokenizer.eos_token_id
     )
     description_text = tokenizer.decode(output[0], skip_special_tokens=True)
 # Gradio interface
 def gradio_interface(image):
+    if image is None:
+        return "No image provided. Please upload an image."
+    if isinstance(image, BytesIO):
+        image = Image.open(image).convert("RGB")
+    else:
+        image = Image.open(image).convert("RGB")
     predicted_style, predicted_artist, description = generate_description(image)
     return f"Predicted Style: {predicted_style}\nPredicted Artist: {predicted_artist}\n\nDescription:\n{description}"
 iface = gr.Interface(
     fn=gradio_interface,
+    inputs=gr.Image(type="file"),
     outputs="text",
     title="AI Artwork Analysis",
     description="Upload an image to predict its artistic style and creator, and generate a detailed description."