Spaces:

zai-org
/

CogView2

Runtime error

App Files Files Community

hysts commited on Jun 22, 2022

Commit

89b3a2c

1 Parent(s): 223c6f1

Modify to work in Spaces

Browse files

Files changed (5) hide show

README.md +1 -0
app.py +14 -22
model.py +72 -26
packages.txt +0 -1
requirements.txt +3 -6

README.md CHANGED Viewed

@@ -5,6 +5,7 @@ colorFrom: pink
 colorTo: red
 sdk: gradio
 sdk_version: 3.0.19
 app_file: app.py
 pinned: false
 ---

 colorTo: red
 sdk: gradio
 sdk_version: 3.0.19
+python_version: 3.9.13
 app_file: app.py
 pinned: false
 ---

app.py CHANGED Viewed

@@ -2,25 +2,16 @@
 from __future__ import annotations
-import argparse
 import gradio as gr
 from model import AppModel
-DESCRIPTION = '''# CogView2 (text2image)
-This is an unofficial demo for <a href="https://github.com/THUDM/CogView2">https://github.com/THUDM/CogView2</a>.
-[This Space](https://huggingface.co/spaces/chinhon/translation_eng2ch) is used for translation from English to Chinese.
 '''
-def parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--only-first-stage', action='store_true')
-    parser.add_argument('--share', action='store_true')
-    return parser.parse_args()
 def set_example_text(example: list) -> dict:
@@ -28,8 +19,9 @@ def set_example_text(example: list) -> dict:
 def main():
-    args = parse_args()
-    model = AppModel(args.only_first_stage)
     with gr.Blocks(css='style.css') as demo:
         gr.Markdown(DESCRIPTION)
@@ -59,8 +51,8 @@ def main():
                                      label='Seed')
                     only_first_stage = gr.Checkbox(
                         label='Only First Stage',
-                        value=args.only_first_stage,
-                        visible=not args.only_first_stage)
                     num_images = gr.Slider(1,
                                            16,
                                            step=1,
@@ -80,6 +72,9 @@ def main():
                         with gr.TabItem('Output (Gallery)'):
                             result_gallery = gr.Gallery(show_label=False)
         run_button.click(fn=model.run_with_translation,
                          inputs=[
                              text,
@@ -98,10 +93,7 @@ def main():
                        inputs=examples,
                        outputs=examples.components)
-    demo.launch(
-        enable_queue=True,
-        share=args.share,
-    )
 if __name__ == '__main__':

 from __future__ import annotations
 import gradio as gr
 from model import AppModel
+DESCRIPTION = '# <a href="https://github.com/THUDM/CogView2">CogView2</a> (text2image)'
+NOTES = '''
+- This app is adapted from <a href="https://github.com/hysts/CogView2_demo">https://github.com/hysts/CogView2_demo</a>. It would be recommended to use the repo if you want to run the app yourself.
+- [This Space](https://huggingface.co/spaces/chinhon/translation_eng2ch) is used for translation from English to Chinese.
 '''
+FOOTER = '<img id="visitor-badge" alt="visitor badge" src="https://visitor-badge.glitch.me/badge?page_id=THUDM.CogView2" />'
 def set_example_text(example: list) -> dict:
 def main():
+    only_first_stage = True
+    max_inference_batch_size = 4
+    model = AppModel(max_inference_batch_size, only_first_stage)
     with gr.Blocks(css='style.css') as demo:
         gr.Markdown(DESCRIPTION)
                                      label='Seed')
                     only_first_stage = gr.Checkbox(
                         label='Only First Stage',
+                        value=only_first_stage,
+                        visible=not only_first_stage)
                     num_images = gr.Slider(1,
                                            16,
                                            step=1,
                         with gr.TabItem('Output (Gallery)'):
                             result_gallery = gr.Gallery(show_label=False)
+        gr.Markdown(NOTES)
+        gr.Markdown(FOOTER)
         run_button.click(fn=model.run_with_translation,
                          inputs=[
                              text,
                        inputs=examples,
                        outputs=examples.components)
+    demo.launch(enable_queue=True)
 if __name__ == '__main__':

model.py CHANGED Viewed

@@ -1,19 +1,68 @@
-#This code is adapted from https://github.com/THUDM/CogView2/blob/4e55cce981eb94b9c8c1f19ba9f632fd3ee42ba8/cogview2_text2image.py
 from __future__ import annotations
 import argparse
 import functools
 import logging
 import pathlib
 import sys
 import time
 from typing import Any
 import gradio as gr
 import numpy as np
 import torch
-from icetk import IceTokenizer
 from SwissArmyTransformer import get_args
 from SwissArmyTransformer.arguments import set_random_seed
 from SwissArmyTransformer.generation.autoregressive_sampling import \
@@ -38,7 +87,8 @@ logger.setLevel(logging.DEBUG)
 logger.propagate = False
 logger.addHandler(stream_handler)
-ICETK_MODEL_DIR = app_dir / 'icetk_models'
 def get_masks_and_position_ids_coglm(
@@ -140,11 +190,12 @@ def get_default_args() -> argparse.Namespace:
 class Model:
-    def __init__(self, only_first_stage: bool = False):
         self.args = get_default_args()
         self.args.only_first_stage = only_first_stage
-        self.tokenizer = self.load_tokenizer()
         self.model, self.args = self.load_model()
         self.strategy = self.load_strategy()
@@ -157,19 +208,6 @@ class Model:
         self.max_batch_size = self.args.max_inference_batch_size
         self.only_first_stage = self.args.only_first_stage
-    def load_tokenizer(self) -> IceTokenizer:
-        logger.info('--- load_tokenizer ---')
-        start = time.perf_counter()
-        tokenizer = IceTokenizer(ICETK_MODEL_DIR.as_posix())
-        tokenizer.add_special_tokens(
-            ['<start_of_image>', '<start_of_english>', '<start_of_chinese>'])
-        elapsed = time.perf_counter() - start
-        logger.info(f'Elapsed: {elapsed}')
-        logger.info('--- done ---')
-        return tokenizer
     def load_model(self) -> tuple[InferenceModel, argparse.Namespace]:
         logger.info('--- load_model ---')
         start = time.perf_counter()
@@ -185,7 +223,7 @@ class Model:
         logger.info('--- load_strategy ---')
         start = time.perf_counter()
-        invalid_slices = [slice(self.tokenizer.num_image_tokens, None)]
         strategy = CoglmStrategy(invalid_slices,
                                  temperature=self.args.temp_all_gen,
                                  top_k=self.args.topk_gen,
@@ -213,6 +251,7 @@ class Model:
         logger.info('--- update_style ---')
         start = time.perf_counter()
         self.args = argparse.Namespace(**(vars(self.args) | get_recipe(style)))
         self.query_template = self.args.query_template
         logger.info(f'{self.query_template=}')
@@ -233,14 +272,21 @@ class Model:
     def run(self, text: str, style: str, seed: int, only_first_stage: bool,
             num: int) -> list[np.ndarray] | None:
         set_random_seed(seed)
         seq, txt_len = self.preprocess_text(text)
         if seq is None:
             return None
-        self.update_style(style)
         self.only_first_stage = only_first_stage
         tokens = self.generate_tokens(seq, txt_len, num)
         res = self.generate_images(seq, txt_len, tokens)
         return res
     @torch.inference_mode()
@@ -251,7 +297,7 @@ class Model:
         text = self.query_template.format(text)
         logger.info(f'{text=}')
-        seq = self.tokenizer.encode(text)
         logger.info(f'{len(seq)=}')
         if len(seq) > 110:
             logger.info('The input text is too long.')
@@ -319,7 +365,7 @@ class Model:
         if self.only_first_stage:
             for i in range(len(tokens)):
                 seq = tokens[i]
-                decoded_img = self.tokenizer.decode(image_ids=seq[-400:])
                 decoded_img = torch.nn.functional.interpolate(decoded_img,
                                                               size=(480, 480))
                 decoded_img = self.postprocess(decoded_img[0])
@@ -327,7 +373,7 @@ class Model:
         else:  # sr
             iter_tokens = self.srg.sr_base(tokens[:, -400:], seq[:txt_len])
             for seq in iter_tokens:
-                decoded_img = self.tokenizer.decode(image_ids=seq[-3600:])
                 decoded_img = torch.nn.functional.interpolate(decoded_img,
                                                               size=(480, 480))
                 decoded_img = self.postprocess(decoded_img[0])
@@ -340,8 +386,8 @@ class Model:
 class AppModel(Model):
-    def __init__(self, only_first_stage: bool):
-        super().__init__(only_first_stage)
         self.translator = gr.Interface.load(
             'spaces/chinhon/translation_eng2ch')

+# This code is adapted from https://github.com/THUDM/CogView2/blob/4e55cce981eb94b9c8c1f19ba9f632fd3ee42ba8/cogview2_text2image.py
 from __future__ import annotations
 import argparse
 import functools
 import logging
+import os
 import pathlib
+import subprocess
 import sys
 import time
+import zipfile
 from typing import Any
+if os.getenv('SYSTEM') == 'spaces':
+    subprocess.run('pip install icetk==0.0.3'.split())
+    subprocess.run('pip install SwissArmyTransformer==0.2.4'.split())
+    subprocess.run(
+        'pip install git+https://github.com/Sleepychord/Image-Local-Attention@43fee31'
+        .split())
+    subprocess.run('git clone https://github.com/NVIDIA/apex'.split())
+    subprocess.run('git checkout 1403c21'.split(), cwd='apex')
+    subprocess.run(
+        'pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./'
+        .split(),
+        cwd='apex')
+    subprocess.run('rm -rf apex'.split())
+    with open('patch') as f:
+        subprocess.run('patch -p1'.split(), cwd='CogView2', stdin=f)
+    from huggingface_hub import hf_hub_download
+    def download_and_extract_icetk_models() -> None:
+        icetk_model_dir = pathlib.Path('/home/user/.icetk_models')
+        icetk_model_dir.mkdir()
+        path = hf_hub_download('THUDM/icetk',
+                               'models.zip',
+                               use_auth_token=os.getenv('HF_TOKEN'))
+        with zipfile.ZipFile(path) as f:
+            f.extractall(path=icetk_model_dir.as_posix())
+    def download_and_extract_cogview2_models(name: str) -> None:
+        path = hf_hub_download('THUDM/CogView2',
+                               name,
+                               use_auth_token=os.getenv('HF_TOKEN'))
+        with zipfile.ZipFile(path) as f:
+            f.extractall()
+        os.remove(path)
+    download_and_extract_icetk_models()
+    names = [
+        'coglm.zip',
+        'cogview2-dsr.zip',
+        #'cogview2-itersr.zip',
+    ]
+    for name in names:
+        download_and_extract_cogview2_models(name)
+    os.environ['SAT_HOME'] = '/home/user/app/sharefs/cogview-new'
 import gradio as gr
 import numpy as np
 import torch
+from icetk import icetk as tokenizer
 from SwissArmyTransformer import get_args
 from SwissArmyTransformer.arguments import set_random_seed
 from SwissArmyTransformer.generation.autoregressive_sampling import \
 logger.propagate = False
 logger.addHandler(stream_handler)
+tokenizer.add_special_tokens(
+    ['<start_of_image>', '<start_of_english>', '<start_of_chinese>'])
 def get_masks_and_position_ids_coglm(
 class Model:
+    def __init__(self,
+                 max_inference_batch_size: int,
+                 only_first_stage: bool = False):
         self.args = get_default_args()
         self.args.only_first_stage = only_first_stage
+        self.args.max_inference_batch_size = max_inference_batch_size
         self.model, self.args = self.load_model()
         self.strategy = self.load_strategy()
         self.max_batch_size = self.args.max_inference_batch_size
         self.only_first_stage = self.args.only_first_stage
     def load_model(self) -> tuple[InferenceModel, argparse.Namespace]:
         logger.info('--- load_model ---')
         start = time.perf_counter()
         logger.info('--- load_strategy ---')
         start = time.perf_counter()
+        invalid_slices = [slice(tokenizer.num_image_tokens, None)]
         strategy = CoglmStrategy(invalid_slices,
                                  temperature=self.args.temp_all_gen,
                                  top_k=self.args.topk_gen,
         logger.info('--- update_style ---')
         start = time.perf_counter()
+        self.style = style
         self.args = argparse.Namespace(**(vars(self.args) | get_recipe(style)))
         self.query_template = self.args.query_template
         logger.info(f'{self.query_template=}')
     def run(self, text: str, style: str, seed: int, only_first_stage: bool,
             num: int) -> list[np.ndarray] | None:
+        logger.info('==================== run ====================')
+        start = time.perf_counter()
+        self.update_style(style)
         set_random_seed(seed)
         seq, txt_len = self.preprocess_text(text)
         if seq is None:
             return None
         self.only_first_stage = only_first_stage
         tokens = self.generate_tokens(seq, txt_len, num)
         res = self.generate_images(seq, txt_len, tokens)
+        elapsed = time.perf_counter() - start
+        logger.info(f'Elapsed: {elapsed}')
+        logger.info('==================== done ====================')
         return res
     @torch.inference_mode()
         text = self.query_template.format(text)
         logger.info(f'{text=}')
+        seq = tokenizer.encode(text)
         logger.info(f'{len(seq)=}')
         if len(seq) > 110:
             logger.info('The input text is too long.')
         if self.only_first_stage:
             for i in range(len(tokens)):
                 seq = tokens[i]
+                decoded_img = tokenizer.decode(image_ids=seq[-400:])
                 decoded_img = torch.nn.functional.interpolate(decoded_img,
                                                               size=(480, 480))
                 decoded_img = self.postprocess(decoded_img[0])
         else:  # sr
             iter_tokens = self.srg.sr_base(tokens[:, -400:], seq[:txt_len])
             for seq in iter_tokens:
+                decoded_img = tokenizer.decode(image_ids=seq[-3600:])
                 decoded_img = torch.nn.functional.interpolate(decoded_img,
                                                               size=(480, 480))
                 decoded_img = self.postprocess(decoded_img[0])
 class AppModel(Model):
+    def __init__(self, max_inference_batch_size: int, only_first_stage: bool):
+        super().__init__(max_inference_batch_size, only_first_stage)
         self.translator = gr.Interface.load(
             'spaces/chinhon/translation_eng2ch')

packages.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- p7zip-full

requirements.txt CHANGED Viewed

@@ -1,7 +1,4 @@
-git+https://github.com/Sleepychord/Image-Local-Attention@43fee31
-gradio==3.0.17
-icetk==0.0.3
 numpy==1.22.4
-SwissArmyTransformer==0.2.4
-torch==1.11.0
-torchvision==0.12.0

+--extra-index-url https://download.pytorch.org/whl/cu113
 numpy==1.22.4
+torch==1.11.0+cu113
+torchvision==0.12.0+cu113