Spaces:
Running
on
Zero
Running
on
Zero
foivospar
commited on
Commit
·
f40ac2c
1
Parent(s):
c044b60
update requirements
Browse files- arc2face/models.py +4 -3
- requirements.txt +2 -2
arc2face/models.py
CHANGED
|
@@ -2,7 +2,8 @@ import torch
|
|
| 2 |
from transformers import CLIPTextModel
|
| 3 |
from typing import Any, Callable, Dict, Optional, Tuple, Union, List
|
| 4 |
from transformers.modeling_outputs import BaseModelOutputWithPooling
|
| 5 |
-
from transformers.models.clip.modeling_clip import _make_causal_mask, _expand_mask
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
class CLIPTextModelWrapper(CLIPTextModel):
|
|
@@ -41,11 +42,11 @@ class CLIPTextModelWrapper(CLIPTextModel):
|
|
| 41 |
|
| 42 |
# CLIP's text model uses causal mask, prepare it here.
|
| 43 |
# https://github.com/openai/CLIP/blob/cfcffb90e69f37bf2ff1e988237a0fbe41f33c04/clip/model.py#L324
|
| 44 |
-
causal_attention_mask =
|
| 45 |
# expand attention_mask
|
| 46 |
if attention_mask is not None:
|
| 47 |
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
|
| 48 |
-
attention_mask =
|
| 49 |
|
| 50 |
encoder_outputs = self.text_model.encoder(
|
| 51 |
inputs_embeds=hidden_states,
|
|
|
|
| 2 |
from transformers import CLIPTextModel
|
| 3 |
from typing import Any, Callable, Dict, Optional, Tuple, Union, List
|
| 4 |
from transformers.modeling_outputs import BaseModelOutputWithPooling
|
| 5 |
+
#from transformers.models.clip.modeling_clip import _make_causal_mask, _expand_mask
|
| 6 |
+
from transformers.modeling_attn_mask_utils import _create_4d_causal_attention_mask, _prepare_4d_attention_mask # transformers 4.36.0
|
| 7 |
|
| 8 |
|
| 9 |
class CLIPTextModelWrapper(CLIPTextModel):
|
|
|
|
| 42 |
|
| 43 |
# CLIP's text model uses causal mask, prepare it here.
|
| 44 |
# https://github.com/openai/CLIP/blob/cfcffb90e69f37bf2ff1e988237a0fbe41f33c04/clip/model.py#L324
|
| 45 |
+
causal_attention_mask = _create_4d_causal_attention_mask(input_shape, hidden_states.dtype, device=hidden_states.device)
|
| 46 |
# expand attention_mask
|
| 47 |
if attention_mask is not None:
|
| 48 |
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
|
| 49 |
+
attention_mask = _prepare_4d_attention_mask(attention_mask, hidden_states.dtype)
|
| 50 |
|
| 51 |
encoder_outputs = self.text_model.encoder(
|
| 52 |
inputs_embeds=hidden_states,
|
requirements.txt
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
numpy
|
| 2 |
torch
|
| 3 |
torchvision
|
| 4 |
-
diffusers==0.
|
| 5 |
-
transformers==4.
|
| 6 |
peft
|
| 7 |
accelerate
|
| 8 |
insightface
|
|
|
|
| 1 |
numpy
|
| 2 |
torch
|
| 3 |
torchvision
|
| 4 |
+
diffusers==0.29.2
|
| 5 |
+
transformers==4.36.0
|
| 6 |
peft
|
| 7 |
accelerate
|
| 8 |
insightface
|