Spaces:

FoivosPar
/

Arc2Face

Running on Zero

foivospar commited on 29 days ago

Commit

f40ac2c

1 Parent(s): c044b60

update requirements

Files changed (2) hide show

arc2face/models.py CHANGED Viewed

@@ -2,7 +2,8 @@ import torch
 from transformers import CLIPTextModel
 from typing import Any, Callable, Dict, Optional, Tuple, Union, List
 from transformers.modeling_outputs import BaseModelOutputWithPooling
-from transformers.models.clip.modeling_clip import _make_causal_mask, _expand_mask
 class CLIPTextModelWrapper(CLIPTextModel):
@@ -41,11 +42,11 @@ class CLIPTextModelWrapper(CLIPTextModel):
         # CLIP's text model uses causal mask, prepare it here.
         # https://github.com/openai/CLIP/blob/cfcffb90e69f37bf2ff1e988237a0fbe41f33c04/clip/model.py#L324
-        causal_attention_mask = _make_causal_mask(input_shape, hidden_states.dtype, device=hidden_states.device)
         # expand attention_mask
         if attention_mask is not None:
             # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
-            attention_mask = _expand_mask(attention_mask, hidden_states.dtype)
         encoder_outputs = self.text_model.encoder(
             inputs_embeds=hidden_states,

 from transformers import CLIPTextModel
 from typing import Any, Callable, Dict, Optional, Tuple, Union, List
 from transformers.modeling_outputs import BaseModelOutputWithPooling
+#from transformers.models.clip.modeling_clip import _make_causal_mask, _expand_mask
+from transformers.modeling_attn_mask_utils import _create_4d_causal_attention_mask, _prepare_4d_attention_mask  # transformers 4.36.0
 class CLIPTextModelWrapper(CLIPTextModel):
         # CLIP's text model uses causal mask, prepare it here.
         # https://github.com/openai/CLIP/blob/cfcffb90e69f37bf2ff1e988237a0fbe41f33c04/clip/model.py#L324
+        causal_attention_mask = _create_4d_causal_attention_mask(input_shape, hidden_states.dtype, device=hidden_states.device)
         # expand attention_mask
         if attention_mask is not None:
             # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
+            attention_mask = _prepare_4d_attention_mask(attention_mask, hidden_states.dtype)
         encoder_outputs = self.text_model.encoder(
             inputs_embeds=hidden_states,

requirements.txt CHANGED Viewed

@@ -1,8 +1,8 @@
 numpy
 torch
 torchvision
-diffusers==0.23.0
-transformers==4.34.1
 peft
 accelerate
 insightface

 numpy
 torch
 torchvision
+diffusers==0.29.2
+transformers==4.36.0
 peft
 accelerate
 insightface