bhardwaj08sarthak commited on
Commit
2d6b358
·
verified ·
1 Parent(s): 355f5f7

Update level_classifier_tool_2.py

Browse files
Files changed (1) hide show
  1. level_classifier_tool_2.py +28 -15
level_classifier_tool_2.py CHANGED
@@ -21,37 +21,50 @@ class HFEmbeddingBackend:
21
  MODEL: Any = field(init=False, repr=False)
22
 
23
  def __post_init__(self):
24
- # 1) Try to disable Spaces ZeroGPU monkey-patch proactively
25
  os.environ.setdefault("SPACES_ZERO_DISABLED", "1")
26
  try:
27
- # If Spaces was already imported somewhere, explicitly disable its patch.
28
- from spaces import zero as _spaces_zero # safe import; no-op if not installed
29
- if hasattr(_spaces_zero, "disable"):
30
- _spaces_zero.disable()
 
 
 
 
 
 
 
 
 
 
31
  except Exception:
32
  pass
33
 
34
- # 2) Keep attention off Flash/MemEfficient (avoid vectorized mask paths)
35
  try:
36
  torch.backends.cuda.sdp_kernel(enable_math=True, enable_flash=False, enable_mem_efficient=False)
37
  except Exception:
38
  pass
39
 
40
- # 3) Load tokenizer/model and force eager attention (non-vmap route)
 
 
 
41
  self.TOK = AutoTokenizer.from_pretrained(self.model_name)
42
  self.MODEL = AutoModel.from_pretrained(self.model_name, attn_implementation="eager")
43
 
44
- # (extra safety) disable any sliding/windowed attention that can trigger the vmap mask path
45
  try:
 
 
 
46
  if hasattr(self.MODEL.config, "sliding_window"):
47
- self.MODEL.config.sliding_window = None
48
- if hasattr(self.MODEL, "generation_config") and hasattr(self.MODEL.generation_config, "sliding_window"):
49
- self.MODEL.generation_config.sliding_window = None
50
- except Exception:
51
- pass
52
 
53
- try:
54
- self.MODEL.config.attn_implementation = "eager"
 
55
  except Exception:
56
  pass
57
 
 
21
  MODEL: Any = field(init=False, repr=False)
22
 
23
  def __post_init__(self):
24
+ # Nuke Spaces ZeroGPU if present (still good to keep)
25
  os.environ.setdefault("SPACES_ZERO_DISABLED", "1")
26
  try:
27
+ import sys, importlib
28
+ for modname in (
29
+ "spaces.zero", "spaces.zero.torch.patching", "spaces.zero.torch",
30
+ "spaces.zero.patch", "spaces.zero.patching"
31
+ ):
32
+ try:
33
+ m = sys.modules.get(modname) or importlib.import_module(modname)
34
+ except Exception:
35
+ continue
36
+ for attr in ("disable", "unpatch", "deactivate"):
37
+ fn = getattr(m, attr, None)
38
+ if callable(fn):
39
+ try: fn()
40
+ except Exception: pass
41
  except Exception:
42
  pass
43
 
44
+ # Prefer simple math attention kernels
45
  try:
46
  torch.backends.cuda.sdp_kernel(enable_math=True, enable_flash=False, enable_mem_efficient=False)
47
  except Exception:
48
  pass
49
 
50
+ # Make eager attention the default everywhere
51
+ os.environ.setdefault("TRANSFORMERS_ATTENTION_IMPLEMENTATION", "eager")
52
+
53
+ # Load tokenizer/model with eager attention
54
  self.TOK = AutoTokenizer.from_pretrained(self.model_name)
55
  self.MODEL = AutoModel.from_pretrained(self.model_name, attn_implementation="eager")
56
 
57
+ # Keep eager; DO NOT set sliding_window=None (Gemma-3 expects an int in mask math)
58
  try:
59
+ self.MODEL.config.attn_implementation = "eager"
60
+
61
+ # If you want to effectively disable sliding attention, make it 0 (int), not None
62
  if hasattr(self.MODEL.config, "sliding_window"):
63
+ self.MODEL.config.sliding_window = 0
 
 
 
 
64
 
65
+ gen_cfg = getattr(self.MODEL, "generation_config", None)
66
+ if gen_cfg is not None and hasattr(gen_cfg, "sliding_window"):
67
+ gen_cfg.sliding_window = 0
68
  except Exception:
69
  pass
70