Spaces:

bhardwaj08sarthak
/

STEM-Question-Generator

Sleeping

App Files Files Community

bhardwaj08sarthak commited on Sep 25

Commit

2d6b358

verified ·

1 Parent(s): 355f5f7

Update level_classifier_tool_2.py

Browse files

Files changed (1) hide show

level_classifier_tool_2.py +28 -15

level_classifier_tool_2.py CHANGED Viewed

@@ -21,37 +21,50 @@ class HFEmbeddingBackend:
     MODEL: Any = field(init=False, repr=False)
     def __post_init__(self):
-    # 1) Try to disable Spaces ZeroGPU monkey-patch proactively
         os.environ.setdefault("SPACES_ZERO_DISABLED", "1")
         try:
-            # If Spaces was already imported somewhere, explicitly disable its patch.
-            from spaces import zero as _spaces_zero  # safe import; no-op if not installed
-            if hasattr(_spaces_zero, "disable"):
-                _spaces_zero.disable()
         except Exception:
             pass
-        # 2) Keep attention off Flash/MemEfficient (avoid vectorized mask paths)
         try:
             torch.backends.cuda.sdp_kernel(enable_math=True, enable_flash=False, enable_mem_efficient=False)
         except Exception:
             pass
-        # 3) Load tokenizer/model and force eager attention (non-vmap route)
         self.TOK = AutoTokenizer.from_pretrained(self.model_name)
         self.MODEL = AutoModel.from_pretrained(self.model_name, attn_implementation="eager")
-        # (extra safety) disable any sliding/windowed attention that can trigger the vmap mask path
         try:
             if hasattr(self.MODEL.config, "sliding_window"):
-                self.MODEL.config.sliding_window = None
-            if hasattr(self.MODEL, "generation_config") and hasattr(self.MODEL.generation_config, "sliding_window"):
-                self.MODEL.generation_config.sliding_window = None
-        except Exception:
-            pass
-        try:
-            self.MODEL.config.attn_implementation = "eager"
         except Exception:
             pass

     MODEL: Any = field(init=False, repr=False)
     def __post_init__(self):
+        # Nuke Spaces ZeroGPU if present (still good to keep)
         os.environ.setdefault("SPACES_ZERO_DISABLED", "1")
         try:
+            import sys, importlib
+            for modname in (
+                "spaces.zero", "spaces.zero.torch.patching", "spaces.zero.torch",
+                "spaces.zero.patch", "spaces.zero.patching"
+            ):
+                try:
+                    m = sys.modules.get(modname) or importlib.import_module(modname)
+                except Exception:
+                    continue
+                for attr in ("disable", "unpatch", "deactivate"):
+                    fn = getattr(m, attr, None)
+                    if callable(fn):
+                        try: fn()
+                        except Exception: pass
         except Exception:
             pass
+        # Prefer simple math attention kernels
         try:
             torch.backends.cuda.sdp_kernel(enable_math=True, enable_flash=False, enable_mem_efficient=False)
         except Exception:
             pass
+        # Make eager attention the default everywhere
+        os.environ.setdefault("TRANSFORMERS_ATTENTION_IMPLEMENTATION", "eager")
+        # Load tokenizer/model with eager attention
         self.TOK = AutoTokenizer.from_pretrained(self.model_name)
         self.MODEL = AutoModel.from_pretrained(self.model_name, attn_implementation="eager")
+        # Keep eager; DO NOT set sliding_window=None (Gemma-3 expects an int in mask math)
         try:
+            self.MODEL.config.attn_implementation = "eager"
+            # If you want to effectively disable sliding attention, make it 0 (int), not None
             if hasattr(self.MODEL.config, "sliding_window"):
+                self.MODEL.config.sliding_window = 0
+            gen_cfg = getattr(self.MODEL, "generation_config", None)
+            if gen_cfg is not None and hasattr(gen_cfg, "sliding_window"):
+                gen_cfg.sliding_window = 0
         except Exception:
             pass