Spaces:

AIML-TUDA
/

LlavaGuard

Running on Zero

App Files Files Community

LukasHug commited on Jun 13, 2024

Commit

62b8fab

verified ·

1 Parent(s): 98390cc

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -22

app.py CHANGED Viewed

@@ -44,8 +44,7 @@ priority = {
 def run_llava(prompt, pil_image, temperature, top_p, max_new_tokens):
     image_size = pil_image.size
     image_tensor = image_processor.preprocess(pil_image, return_tensors='pt')['pixel_values'].half().cuda()
-    # images_tensor = load_images(images, image_processor)
-    image_tensor = image_tensor.to(model.device, dtype=torch.float16)
     input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt")
     input_ids = input_ids.unsqueeze(0).cuda()
     with torch.inference_mode():
@@ -67,16 +66,16 @@ def run_llava(prompt, pil_image, temperature, top_p, max_new_tokens):
     return outputs[0].strip()
-def load_selected_model(model_path):
-    model_name = model_path.split("/")[-1]
-    global tokenizer, model, image_processor, context_len
-    with warnings.catch_warnings(record=True) as w:
-        warnings.simplefilter("always")
-        tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, None, model_name)
-    for warning in w:
-        if "vision" not in str(warning.message).lower():
-            print(warning.message)
-    model.config.tokenizer_model_max_length = 2048 * 2
 def get_conv_log_filename():
@@ -496,24 +495,25 @@ Set the environment variable `model` to change the model:
     print(f"args: {args}")
     concurrency_count = int(os.getenv("concurrency_count", 5))
     api_key = os.getenv("token")
-    if api_key:
-        cmd = f"huggingface-cli login --token {api_key} --add-to-git-credential"
-        os.system(cmd)
-    else:
-        if '/workspace' not in sys.path:
-            sys.path.append('/workspace')
-        from llavaguard.hf_utils import set_up_env_and_token
-        api_key = set_up_env_and_token(read=True, write=False)
     models = [
         'LukasHug/LlavaGuard-7B-hf',
         'LukasHug/LlavaGuard-13B-hf',
         'LukasHug/LlavaGuard-34B-hf', ]
     bits = int(os.getenv("bits", 16))
-    model = os.getenv("model", models[1])
     available_devices = os.getenv("CUDA_VISIBLE_DEVICES", "0")
     model_path, model_name = model, model.split("/")[0]
     print(f"Loading model {model_path}")
     tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, None, model_name, token=api_key)
@@ -535,4 +535,4 @@ Set the environment variable `model` to change the model:
         print(e)
         exit_status = 1
     finally:
-        sys.exit(exit_status)

 def run_llava(prompt, pil_image, temperature, top_p, max_new_tokens):
     image_size = pil_image.size
     image_tensor = image_processor.preprocess(pil_image, return_tensors='pt')['pixel_values'].half().cuda()
+    # image_tensor = image_tensor.to(model.device, dtype=torch.float16)
     input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt")
     input_ids = input_ids.unsqueeze(0).cuda()
     with torch.inference_mode():
     return outputs[0].strip()
+# def load_selected_model(model_path):
+#     model_name = model_path.split("/")[-1]
+#     global tokenizer, model, image_processor, context_len
+#     with warnings.catch_warnings(record=True) as w:
+#         warnings.simplefilter("always")
+#         tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, None, model_name)
+#     for warning in w:
+#         if "vision" not in str(warning.message).lower():
+#             print(warning.message)
+#     model.config.tokenizer_model_max_length = 2048 * 2
 def get_conv_log_filename():
     print(f"args: {args}")
     concurrency_count = int(os.getenv("concurrency_count", 5))
     api_key = os.getenv("token")
     models = [
         'LukasHug/LlavaGuard-7B-hf',
         'LukasHug/LlavaGuard-13B-hf',
         'LukasHug/LlavaGuard-34B-hf', ]
     bits = int(os.getenv("bits", 16))
+    model = os.getenv("model", models[0])
     available_devices = os.getenv("CUDA_VISIBLE_DEVICES", "0")
     model_path, model_name = model, model.split("/")[0]
+    if api_key:
+        cmd = f"huggingface-cli login --token {api_key} --add-to-git-credential"
+        os.system(cmd)
+    else:
+        if '/workspace' not in sys.path:
+            sys.path.append('/workspace')
+        from llavaguard.hf_utils import set_up_env_and_token
+        api_key = set_up_env_and_token(read=True, write=False)
+        model_path = '/common-repos/LlavaGuard/models/LlavaGuard-v1.1-7b-full/smid_and_crawled_v2_with_augmented_policies/json-v16/llava'
     print(f"Loading model {model_path}")
     tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, None, model_name, token=api_key)
         print(e)
         exit_status = 1
     finally:
+        sys.exit(exit_status)