Spaces:

Yiyuan
/

VSA

Runtime error

App Files Files Community

cnzzx commited on Oct 29, 2024

Commit

271c21d

1 Parent(s): c640227

update

Browse files

Files changed (1) hide show

models/vsa_model.py +3 -93

models/vsa_model.py CHANGED Viewed

@@ -298,12 +298,12 @@ class VisionSearchAssistant:
         self.use_correlate = True
     @spaces.GPU
-    def __call__(
         self,
         image: Union[str, Image.Image, np.ndarray],
         text: str,
-        ground_classes: Union[List[str], None] = None
-    ):
         self.searcher = WebSearcher(
             model_path = self.search_model
         )
@@ -318,96 +318,6 @@ class VisionSearchAssistant:
             load_8bit = self.vlm_load_8bit
         )
-        # Create and clear the temporary directory.
-        if not os.access('temp', os.F_OK):
-            os.makedirs('temp')
-        for file in os.listdir('temp'):
-            os.remove(os.path.join('temp', file))
-        with open('temp/text.txt', 'w', encoding='utf-8') as wf:
-            wf.write(text)
-        # Load Image
-        if isinstance(image, str):
-            in_image = Image.open(image)
-        elif isinstance(image, Image.Image):
-            in_image = image
-        elif isinstance(image, np.ndarray):
-            in_image = Image.fromarray(image.astype(np.uint8))
-        else:
-            raise Exception('Unsupported input image format.')
-        # Visual Grounding
-        bboxes, labels, out_image = self.grounder(in_image, classes = ground_classes)
-        det_images = []
-        for bid, bbox in enumerate(bboxes):
-            crop_box = (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))
-            det_image = in_image.crop(crop_box)
-            det_image.save('temp/debug_bbox_image_{}.jpg'.format(bid))
-            det_images.append(det_image)
-        if len(det_images) == 0:  # No object detected, use the full image.
-            det_images.append(in_image)
-            labels.append('image')
-        # Visual Captioning
-        captions = []
-        for det_image, label in zip(det_images, labels):
-            inp = get_caption_prompt(label, text)
-            caption = self.vlm(det_image, inp)
-            captions.append(caption)
-        for cid, caption in enumerate(captions):
-            with open('temp/caption_{}.txt'.format(cid), 'w', encoding='utf-8') as wf:
-                wf.write(caption)
-        # Visual Correlation
-        if len(captions) >= 2 and self.use_correlate:
-            queries = []
-            for mid, det_image in enumerate(det_images):
-                caption = captions[mid]
-                other_captions = []
-                for cid in range(len(captions)):
-                    if cid == mid:
-                        continue
-                    other_captions.append(captions[cid])
-                inp = get_correlate_prompt(caption, other_captions)
-                query = self.vlm(det_image, inp)
-                queries.append(query)
-        else:
-            queries = captions
-        for qid, query in enumerate(queries):
-            with open('temp/query_{}.txt'.format(qid), 'w', encoding='utf-8') as wf:
-                wf.write(query)
-        queries = [text + " " + query for query in queries]
-        # Web Searching
-        contexts = self.searcher(queries)
-        # QA
-        TOKEN_LIMIT = 3500
-        max_length_per_context = TOKEN_LIMIT // len(contexts)
-        for cid, context in enumerate(contexts):
-            contexts[cid] = (queries[cid] + context)[:max_length_per_context]
-        inp = get_qa_prompt(text, contexts)
-        answer = self.vlm(in_image, inp)
-        with open('temp/answer.txt', 'w', encoding='utf-8') as wf:
-            wf.write(answer)
-        print(answer)
-        return answer
-    def app_run(
-        self,
-        image: Union[str, Image.Image, np.ndarray],
-        text: str,
-        ground_classes: List[str] = COCO_CLASSES
-    ):
         # Create and clear the temporary directory.
         if not os.access('temp', os.F_OK):
             os.makedirs('temp')

         self.use_correlate = True
     @spaces.GPU
+    def app_run(
         self,
         image: Union[str, Image.Image, np.ndarray],
         text: str,
+        ground_classes: List[str] = COCO_CLASSES
+    ):
         self.searcher = WebSearcher(
             model_path = self.search_model
         )
             load_8bit = self.vlm_load_8bit
         )
         # Create and clear the temporary directory.
         if not os.access('temp', os.F_OK):
             os.makedirs('temp')