Spaces:
Runtime error
Runtime error
Xueqing Wu
commited on
Commit
·
99e8fc6
1
Parent(s):
e20ef71
download files from hub
Browse files- .gitignore +1 -1
- Dockerfile +0 -15
- app.sh +1 -0
- download_files_from_hub.py +6 -0
- pretrained_models/GLIP/configs/glip_Swin_L.yaml +120 -0
- requirements.txt +1 -1
.gitignore
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
__pycache__/
|
| 2 |
*.pyc
|
| 3 |
-
|
|
|
|
| 1 |
__pycache__/
|
| 2 |
*.pyc
|
| 3 |
+
.idea/
|
Dockerfile
CHANGED
|
@@ -31,27 +31,12 @@ RUN mkdir $HOME/.cache $HOME/.config \
|
|
| 31 |
&& rm ~/miniconda.sh \
|
| 32 |
&& conda clean -ya
|
| 33 |
|
| 34 |
-
# From here are my stuff
|
| 35 |
-
|
| 36 |
-
# Download models
|
| 37 |
-
RUN pip install --no-cache-dir gdown && \
|
| 38 |
-
mkdir -p ./pretrained_models/GLIP/checkpoints && \
|
| 39 |
-
mkdir -p ./pretrained_models/GLIP/configs && \
|
| 40 |
-
mkdir -p ./pretrained_models/xvlm && \
|
| 41 |
-
wget -nc -q -P ./pretrained_models/GLIP/checkpoints https://huggingface.co/GLIPModel/GLIP/resolve/main/glip_large_model.pth && \
|
| 42 |
-
wget -nc -q -P ./pretrained_models/GLIP/configs https://raw.githubusercontent.com/microsoft/GLIP/main/configs/pretrain/glip_Swin_L.yaml && \
|
| 43 |
-
gdown "https://drive.google.com/u/0/uc?id=1bv6_pZOsXW53EhlwU0ZgSk03uzFI61pN" -O ./pretrained_models/xvlm/retrieval_mscoco_checkpoint_9.pth
|
| 44 |
-
|
| 45 |
# Python packages
|
| 46 |
RUN --mount=target=requirements.txt,source=requirements.txt \
|
| 47 |
pip install --no-cache-dir torch torchvision && \
|
| 48 |
pip install --no-cache-dir git+https://github.com/openai/CLIP.git && \
|
| 49 |
pip install --no-cache-dir -r requirements.txt
|
| 50 |
|
| 51 |
-
RUN python -c "from transformers import AutoModel; _ = AutoModel.from_pretrained('codellama/CodeLlama-7b-Python-hf')"
|
| 52 |
-
RUN python -c "from transformers import AutoModel; _ = AutoModel.from_pretrained('VDebugger/VDebugger-critic-generalist-7B')"
|
| 53 |
-
RUN python -c "from transformers import AutoModel; _ = AutoModel.from_pretrained('VDebugger/VDebugger-refiner-generalist-7B')"
|
| 54 |
-
|
| 55 |
# Download GLIP dependencies, but unfortunately don't install yet...
|
| 56 |
RUN git clone https://github.com/sachit-menon/GLIP
|
| 57 |
|
|
|
|
| 31 |
&& rm ~/miniconda.sh \
|
| 32 |
&& conda clean -ya
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
# Python packages
|
| 35 |
RUN --mount=target=requirements.txt,source=requirements.txt \
|
| 36 |
pip install --no-cache-dir torch torchvision && \
|
| 37 |
pip install --no-cache-dir git+https://github.com/openai/CLIP.git && \
|
| 38 |
pip install --no-cache-dir -r requirements.txt
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
# Download GLIP dependencies, but unfortunately don't install yet...
|
| 41 |
RUN git clone https://github.com/sachit-menon/GLIP
|
| 42 |
|
app.sh
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
cd GLIP
|
| 2 |
python setup.py clean --all build develop --user
|
| 3 |
cd ../
|
|
|
|
| 1 |
+
python download_files_from_hub.py
|
| 2 |
cd GLIP
|
| 3 |
python setup.py clean --all build develop --user
|
| 4 |
cd ../
|
download_files_from_hub.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from huggingface_hub import hf_hub_download
|
| 2 |
+
|
| 3 |
+
hf_hub_download(repo_id="GLIPModel/GLIP", filename="glip_large_model.pth",
|
| 4 |
+
local_dir="./pretrained_models/GLIP/checkpoints")
|
| 5 |
+
hf_hub_download(repo_id="VDebugger/xvlm_retrieval_mscoco", filename="retrieval_mscoco_checkpoint_9.pth",
|
| 6 |
+
local_dir="./pretrained_models/xvlm/")
|
pretrained_models/GLIP/configs/glip_Swin_L.yaml
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MODEL:
|
| 2 |
+
META_ARCHITECTURE: "GeneralizedVLRCNN"
|
| 3 |
+
WEIGHT: "swin_large_patch4_window12_384_22k.pth"
|
| 4 |
+
RPN_ONLY: True
|
| 5 |
+
RPN_ARCHITECTURE: "VLDYHEAD"
|
| 6 |
+
|
| 7 |
+
BACKBONE:
|
| 8 |
+
CONV_BODY: "SWINT-FPN-RETINANET"
|
| 9 |
+
OUT_CHANNELS: 256
|
| 10 |
+
|
| 11 |
+
SWINT:
|
| 12 |
+
EMBED_DIM: 192
|
| 13 |
+
DEPTHS: (2, 2, 18, 2)
|
| 14 |
+
NUM_HEADS: (6, 12, 24, 48)
|
| 15 |
+
WINDOW_SIZE: 12
|
| 16 |
+
OUT_CHANNELS: (192, 384, 768, 1536)
|
| 17 |
+
DROP_PATH_RATE: 0.4
|
| 18 |
+
|
| 19 |
+
LANGUAGE_BACKBONE:
|
| 20 |
+
FREEZE: False
|
| 21 |
+
MODEL_TYPE: "bert-base-uncased" # "roberta-base", "clip"
|
| 22 |
+
MASK_SPECIAL: False
|
| 23 |
+
|
| 24 |
+
RPN:
|
| 25 |
+
USE_FPN: True
|
| 26 |
+
ANCHOR_SIZES: (64, 128, 256, 512, 1024)
|
| 27 |
+
ANCHOR_STRIDE: (8, 16, 32, 64, 128)
|
| 28 |
+
ASPECT_RATIOS: (1.0,)
|
| 29 |
+
SCALES_PER_OCTAVE: 1
|
| 30 |
+
|
| 31 |
+
DYHEAD:
|
| 32 |
+
CHANNELS: 256
|
| 33 |
+
NUM_CONVS: 8
|
| 34 |
+
USE_GN: True
|
| 35 |
+
USE_DYRELU: True
|
| 36 |
+
USE_DFCONV: True
|
| 37 |
+
USE_DYFUSE: True
|
| 38 |
+
TOPK: 9 # topk for selecting candidate positive samples from each level
|
| 39 |
+
SCORE_AGG: "MEAN"
|
| 40 |
+
LOG_SCALE: 0.0
|
| 41 |
+
|
| 42 |
+
USE_CHECKPOINT: True
|
| 43 |
+
FUSE_CONFIG:
|
| 44 |
+
USE_FUSED_FEATURES_DOT_PRODUCT: True
|
| 45 |
+
EARLY_FUSE_ON: True
|
| 46 |
+
TYPE: "MHA-B"
|
| 47 |
+
USE_CLASSIFICATION_LOSS: False
|
| 48 |
+
USE_TOKEN_LOSS: False
|
| 49 |
+
USE_CONTRASTIVE_ALIGN_LOSS: False
|
| 50 |
+
CONTRASTIVE_HIDDEN_DIM: 64
|
| 51 |
+
USE_DOT_PRODUCT_TOKEN_LOSS: True
|
| 52 |
+
USE_LAYER_SCALE: True
|
| 53 |
+
CLAMP_MIN_FOR_UNDERFLOW: True
|
| 54 |
+
CLAMP_MAX_FOR_OVERFLOW: True
|
| 55 |
+
CLAMP_BERTATTN_MIN_FOR_UNDERFLOW: True
|
| 56 |
+
CLAMP_BERTATTN_MAX_FOR_OVERFLOW: True
|
| 57 |
+
CLAMP_DOT_PRODUCT: True
|
| 58 |
+
|
| 59 |
+
DATASETS:
|
| 60 |
+
|
| 61 |
+
TRAIN: ("mixed_train_no_coco",) # Place holder dataset for now. To be updated in the next version
|
| 62 |
+
TEST: ("coco_2017_val", )
|
| 63 |
+
|
| 64 |
+
ONE_HOT: False
|
| 65 |
+
FLICKR_COPY: 8 # 0.15 * 8 = ~1.2M
|
| 66 |
+
MIXED_COPY: 4 # 0.6 * 4 = ~2.4M
|
| 67 |
+
OBJECT365_COPY: 2 # 1.4 * 2 = ~2.8M
|
| 68 |
+
VG_COPY: 3 # 0.4 * 3 = ~1.2M
|
| 69 |
+
IN_COPY: 2 # 0.67 * 2 = ~1.33M
|
| 70 |
+
OI_COPY: 1 # 2M * 1 = 2M
|
| 71 |
+
|
| 72 |
+
DISABLE_SHUFFLE: False
|
| 73 |
+
ADD_DET_PROMPT: False
|
| 74 |
+
RANDOM_SAMPLE_NEG: 85
|
| 75 |
+
CONTROL_PROB: (0.0, 0.0, 0.5, 0.0)
|
| 76 |
+
FURTHER_SCREEN: True
|
| 77 |
+
CAPTION_CONF: 0.5
|
| 78 |
+
CAPTION_NMS: -1.0
|
| 79 |
+
CAPTION_MIN_BOX: 1
|
| 80 |
+
|
| 81 |
+
SEPARATION_TOKENS: ". "
|
| 82 |
+
|
| 83 |
+
PACK_RANDOM_CAPTION_NUMBER: 20
|
| 84 |
+
NO_RANDOM_PACK_PROBABILITY: 0.4
|
| 85 |
+
RANDOM_PACK_PROB: 0.5
|
| 86 |
+
CAPTION_FORMAT_VERSION: "v2"
|
| 87 |
+
|
| 88 |
+
INPUT:
|
| 89 |
+
PIXEL_MEAN: [ 103.530, 116.280, 123.675 ]
|
| 90 |
+
PIXEL_STD: [ 57.375, 57.120, 58.395 ]
|
| 91 |
+
MIN_SIZE_TRAIN: 800
|
| 92 |
+
MAX_SIZE_TRAIN: 1333
|
| 93 |
+
MIN_SIZE_TEST: 800
|
| 94 |
+
MAX_SIZE_TEST: 1333
|
| 95 |
+
|
| 96 |
+
AUGMENT:
|
| 97 |
+
MULT_MIN_SIZE_TRAIN: (480,560,640,720,800)
|
| 98 |
+
|
| 99 |
+
DATALOADER:
|
| 100 |
+
SIZE_DIVISIBILITY: 32
|
| 101 |
+
|
| 102 |
+
SOLVER:
|
| 103 |
+
OPTIMIZER: ADAMW
|
| 104 |
+
BASE_LR: 0.0001
|
| 105 |
+
LANG_LR: 0.00001
|
| 106 |
+
WEIGHT_DECAY: 0.01
|
| 107 |
+
WEIGHT_DECAY_SCHEDULE: True
|
| 108 |
+
STEPS: (0.67, 0.89)
|
| 109 |
+
MAX_ITER: 1000000
|
| 110 |
+
IMS_PER_BATCH: 64
|
| 111 |
+
WARMUP_ITERS: 2000
|
| 112 |
+
WARMUP_FACTOR: 0.001
|
| 113 |
+
|
| 114 |
+
FIND_UNUSED_PARAMETERS: False
|
| 115 |
+
|
| 116 |
+
CLIP_GRADIENTS:
|
| 117 |
+
ENABLED: True
|
| 118 |
+
CLIP_TYPE: "full_model"
|
| 119 |
+
CLIP_VALUE: 1.0
|
| 120 |
+
NORM_TYPE: 2.0
|
requirements.txt
CHANGED
|
@@ -255,4 +255,4 @@ xxhash
|
|
| 255 |
yacs
|
| 256 |
yarl
|
| 257 |
gradio
|
| 258 |
-
|
|
|
|
| 255 |
yacs
|
| 256 |
yarl
|
| 257 |
gradio
|
| 258 |
+
huggingface_hub
|