import random import gradio as gr # ------------------------------- # LightBulbEnv 漚äč‰ class LightBulbEnv: def __init__(self, num_bulbs=5, seed=None, max_attempts=100, min_steps=5, expose_logic=False): self.num_bulbs = num_bulbs self.rng = random.Random(seed) self.seed_value = seed self.max_attempts = max_attempts self.min_steps = min_steps self.expose_logic = expose_logic self.reset() def reset(self): self.bulbs = {f"B{i}": False for i in range(self.num_bulbs)} self.steps = 0 for attempt in range(self.max_attempts): self._generate_dependencies() if self._validate_min_steps(): break else: self.rng.seed(self.seed_value + attempt + 1) return self._get_obs() def _generate_dependencies(self): self.logic_expr = {} bulbs = list(self.bulbs.keys()) n = len(bulbs) for i in range(n): if i == 0: self.logic_expr[bulbs[i]] = "True" continue dep_count = self.rng.randint(1, min(8, i)) dep_indices = self.rng.sample(range(i), dep_count) terms = [] for idx in dep_indices: name = bulbs[idx] if self.rng.random() < 0.5: terms.append(f"not {name}") else: terms.append(name) expr = terms[0] for term in terms[1:]: op = self.rng.choice(["and", "or"]) expr = f"({expr} {op} {term})" self.logic_expr[bulbs[i]] = expr def _validate_min_steps(self): bulbs_list = list(self.bulbs.keys()) visited = set() def dfs(state, path_len): key = tuple(state.values()) if key in visited: return None visited.add(key) if all(state.values()): return path_len min_len = None for bulb in bulbs_list: can_toggle = self._eval_logic(bulb, state) new_state = state.copy() if can_toggle: new_state[bulb] = not new_state[bulb] result = dfs(new_state, path_len + 1) if result is not None: if min_len is None or result < min_len: min_len = result return min_len min_path = dfs({k: False for k in bulbs_list}, 0) if min_path is None: return False return min_path >= self.min_steps def _eval_logic(self, bulb, state=None): if state is None: state = self.bulbs expr = self.logic_expr[bulb] local_vars = state.copy() try: return bool(eval(expr, {"__builtins__": {}}, local_vars)) except Exception: return False def step(self, action): bulb_name = f"B{action}" self.steps += 1 if self._eval_logic(bulb_name): self.bulbs[bulb_name] = not self.bulbs[bulb_name] hint = f"Toggled {bulb_name} to {self.bulbs[bulb_name]}" else: hint = f"{bulb_name} remains inactive... remaining bulbs should be in specific mode." done = all(self.bulbs.values()) return self._get_obs(), hint, done, {} def _get_obs(self): return [self.bulbs[f"B{i}"] for i in range(self.num_bulbs)] def return_obs(self): return " ".join(["💡" if self.bulbs[f"B{i}"] else "○" for i in range(self.num_bulbs)]) def get_logic_str(self): return "\n".join([f"{k}: {v}" for k, v in self.logic_expr.items()]) # ------------------------------- # Gradio App 郹戆 NUM_BULBS = 8 env = None def create_new_env(): global env env = LightBulbEnv(num_bulbs=NUM_BULBS, seed=random.randint(0, 99999), min_steps=10, expose_logic=False) return env def reset_env(show_logic): create_new_env() logic_text = env.get_logic_str() if show_logic else "" return env.return_obs(), "New environment created and reset.", logic_text def press_button(i, show_logic): global env if env is None: create_new_env() obs, hint, done, _ = env.step(i) if done: hint += " 🎉 All bulbs are ON! You win!" logic_text = env.get_logic_str() if show_logic else "" return env.return_obs(), hint, logic_text # ------------------------------- # Gradio UI with gr.Blocks() as demo: gr.Markdown("## 💡 LightBulb Logic Puzzle (with Debug Mode)") gr.Markdown("Toggle bulbs by clicking the buttons below. Each bulb may depend on others!") state_display = gr.Textbox(label="Current Bulb States", value="○ ○ ○ ○ ○ ○ ○ ○", interactive=False) hint_display = gr.Textbox(label="Hint", value="", interactive=False) logic_display = gr.Textbox(label="Logic (for debugging)", value="", interactive=False, lines=8) show_logic_checkbox = gr.Checkbox(label="Show Logic (for debugging)", value=False) with gr.Row(): buttons = [] for i in range(NUM_BULBS): btn = gr.Button(f"Toggle B{i}") buttons.append(btn) reset_btn = gr.Button("🔄 Reset Environment") for i, btn in enumerate(buttons): btn.click(press_button, inputs=[gr.Number(i, visible=False), show_logic_checkbox], outputs=[state_display, hint_display, logic_display]) reset_btn.click(reset_env, inputs=[show_logic_checkbox], outputs=[state_display, hint_display, logic_display]) # ------------------------------- if __name__ == "__main__": create_new_env() demo.launch()