Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| import os | |
| # Bibliothèques d'IA et de RL | |
| import gymnasium as gym | |
| from gymnasium import register, spaces | |
| from stable_baselines3 import PPO | |
| from huggingface_hub import hf_hub_download | |
| # --- PARAMÈTRES DU DÉPÔT HUGGING FACE --- | |
| REPO_ID = "Clemylia/MiRobot" | |
| MODEL_FILE = "mirobot_final_model.zip" | |
| ENV_SCRIPT_FILE = "MiRobotEnv.py" | |
| ENV_ID = 'MiRobot-v0' | |
| # --- CONSTANTES DE JEU --- | |
| GRID_SIZE = 10 | |
| FAIM_PENALTY_THRESHOLD = 0.9 | |
| INITIAL_LEVEL = 1 | |
| # --- INDICES D'ÉTAT --- | |
| CMD_AVANCER = 0 | |
| CMD_TOURNER = 1 | |
| ETAT_FAIM = 2 | |
| ETAT_SOMMEIL = 3 | |
| ETAT_HUMEUR = 4 | |
| # Map des actions | |
| ACTION_MAP_MODEL = {0: "S'Arrêter", 1: "Avancer", 2: "Tourner G", 3: "Tourner D"} | |
| ACTION_MAP_USER = { | |
| "AVANCER": 1, | |
| "TOURNER À GAUCHE": 2, | |
| "TOURNER À DROITE": 3, | |
| } | |
| # ---------------------------------------------------------------------- | |
| # 1. PRÉPARATION DU MODÈLE ET DE L'ENVIRONNEMENT | |
| # ---------------------------------------------------------------------- | |
| model = None | |
| env = None | |
| MiRobotEnv = None | |
| initial_faim = 0.0 | |
| initial_humeur = 0.0 | |
| try: | |
| print("Téléchargement des fichiers MiRobot...") | |
| TEMP_DIR = "./mirobot_assets" | |
| os.makedirs(TEMP_DIR, exist_ok=True) | |
| # --- 1. Chargement de la classe MiRobotEnv --- | |
| env_path = hf_hub_download(repo_id=REPO_ID, filename=ENV_SCRIPT_FILE, local_dir=TEMP_DIR) | |
| env_globals = {'gym': gym, 'np': np, 'spaces': spaces} | |
| with open(env_path, 'r') as f: | |
| exec(f.read(), env_globals) | |
| MiRobotEnv = env_globals['MiRobotEnv'] | |
| # --- 2. Enregistrement de l'environnement Custom --- | |
| register( | |
| id=ENV_ID, | |
| entry_point=MiRobotEnv, | |
| ) | |
| # --- 3. Chargement du Modèle et de l'Environnement --- | |
| model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILE, local_dir=TEMP_DIR) | |
| model = PPO.load(model_path) | |
| env = gym.make(ENV_ID) | |
| env.reset() | |
| # Lecture initiale sécurisée de l'état | |
| initial_faim = env.unwrapped.state[ETAT_FAIM] * 100 | |
| initial_humeur = env.unwrapped.state[ETAT_HUMEUR] | |
| print("✅ MiRobot prêt pour l'interface Gradio.") | |
| except Exception as e: | |
| print(f"❌ ERREUR CRITIQUE lors du chargement de MiRobot: {e}") | |
| # ---------------------------------------------------------------------- | |
| # 2. LOGIQUE DU JEU | |
| # ---------------------------------------------------------------------- | |
| game_state_initial = { | |
| 'level': INITIAL_LEVEL, | |
| 'puppy_pos': [GRID_SIZE // 2, GRID_SIZE // 2], | |
| 'reward_pos': [0, 0], | |
| 'message': 'Bienvenue ! Téléchargez une récompense et commencez !', | |
| 'reward_asset_path': None | |
| } | |
| def _get_env_state(env_instance): | |
| """Accès sécurisé à l'état de l'environnement, même avec un wrapper.""" | |
| if env_instance is None: | |
| return None | |
| # Utilise .unwrapped pour accéder à l'instance de MiRobotEnv | |
| return env_instance.unwrapped.state | |
| def _reset_game(reward_path): | |
| """Réinitialise les positions et l'état interne du chiot.""" | |
| new_state = game_state_initial.copy() | |
| if env is not None: | |
| obs, info = env.reset() | |
| current_state = _get_env_state(env) | |
| faim_display = current_state[ETAT_FAIM] * 100 | |
| humeur_display = current_state[ETAT_HUMEUR] | |
| else: | |
| faim_display = 0.0 | |
| humeur_display = 0.0 | |
| new_state.update({ | |
| 'reward_asset_path': reward_path, | |
| 'message': f'Jeu réinitialisé. Niveau {INITIAL_LEVEL}. Placez la récompense !' | |
| }) | |
| return new_state, new_state['puppy_pos'][0], new_state['puppy_pos'][1], new_state['reward_pos'][0], new_state['reward_pos'][1], faim_display, humeur_display, new_state['message'] | |
| def handle_user_command(current_state, command_text, reward_path): | |
| """Gère une commande utilisateur et l'action du modèle RL.""" | |
| game_state = current_state | |
| current_env_state = _get_env_state(env) | |
| if model is None or env is None: | |
| return game_state, command_text, 5, 5, 0, 0, '❌ Erreur: Le modèle MiRobot n\'a pas pu être chargé !' | |
| game_state['reward_asset_path'] = reward_path | |
| # 2. Vérification de la faim (défaite) | |
| faim_actuelle = current_env_state[ETAT_FAIM] | |
| if faim_actuelle >= FAIM_PENALTY_THRESHOLD: | |
| game_state['message'] = f'💔 Défaite ! MiRobot a trop faim ({faim_actuelle:.0%}). Jeu réinitialisé.' | |
| return _reset_game(reward_path) | |
| command_upper = command_text.upper() | |
| if command_upper not in ACTION_MAP_USER: | |
| game_state['message'] = f"🤔 MiRobot n'a pas compris l'ordre '{command_text}'. Sa faim augmente..." | |
| current_env_state[CMD_AVANCER] = 0.0 | |
| current_env_state[CMD_TOURNER] = 0.0 | |
| env.step(0) | |
| faim_display = current_env_state[ETAT_FAIM] * 100 | |
| humeur_display = current_env_state[ETAT_HUMEUR] | |
| return game_state, command_text, game_state['puppy_pos'][0], game_state['puppy_pos'][1], faim_display, humeur_display, game_state['message'] | |
| # 4. Exécution de la décision du Modèle | |
| command_action_name = command_upper | |
| current_env_state[CMD_AVANCER] = 1.0 if command_action_name == "AVANCER" else 0.0 | |
| current_env_state[CMD_TOURNER] = 1.0 if command_action_name.startswith("TOURNER") else 0.0 | |
| mirobot_action_id, _ = model.predict(current_env_state, deterministic=True) | |
| # **CORRECTION CRUCIALE**: Convertir le tableau NumPy en entier | |
| mirobot_action_id = mirobot_action_id.item() | |
| new_obs, reward, terminated, truncated, info = env.step(mirobot_action_id) | |
| dx, dy = 0, 0 | |
| if mirobot_action_id == ACTION_MAP_USER[command_action_name]: | |
| game_state['message'] = f"👏 MiRobot a obéi à '{command_action_name}'. Récompense RL: {reward:.2f}" | |
| if command_action_name == "AVANCER": | |
| rx, ry = game_state['reward_pos'] | |
| px, py = game_state['puppy_pos'] | |
| if abs(rx - px) > abs(ry - py): | |
| dx = 1 if rx > px else -1 | |
| elif abs(ry - py) > 0: | |
| dy = 1 if ry > py else -1 | |
| else: | |
| # Cette ligne est maintenant sûre car mirobot_action_id est un entier | |
| real_action_name = ACTION_MAP_MODEL[mirobot_action_id] | |
| game_state['message'] = f"😥 MiRobot a désobéi ! Il a fait '{real_action_name}' au lieu de '{command_action_name}'. Récompense RL: {reward:.2f}" | |
| # Mise à jour de la position | |
| new_x = np.clip(game_state['puppy_pos'][0] + dx, 0, GRID_SIZE - 1) | |
| new_y = np.clip(game_state['puppy_pos'][1] + dy, 0, GRID_SIZE - 1) | |
| game_state['puppy_pos'] = [new_x, new_y] | |
| current_env_state = _get_env_state(env) | |
| faim_display = current_env_state[ETAT_FAIM] * 100 | |
| humeur_display = current_env_state[ETAT_HUMEUR] | |
| return game_state, command_text, new_x, new_y, faim_display, humeur_display, game_state['message'] | |
| def handle_bravo(current_state): | |
| """Gère l'événement de récompense.""" | |
| game_state = current_state | |
| current_env_state = _get_env_state(env) | |
| if env is None: | |
| return game_state, game_state['puppy_pos'][0], game_state['puppy_pos'][1], 0, 0, '❌ Erreur: Modèle non chargé.' | |
| px, py = game_state['puppy_pos'] | |
| rx, ry = game_state['reward_pos'] | |
| if px == rx and py == ry: | |
| game_state['level'] += 1 | |
| # Modification de l'état | |
| current_env_state[ETAT_FAIM] = np.clip(current_env_state[ETAT_FAIM] - 0.5, 0.0, 1.0) | |
| current_env_state[ETAT_HUMEUR] = np.clip(current_env_state[ETAT_HUMEUR] + 0.5, -1.0, 1.0) | |
| game_state['message'] = f'🥳 BRAVO ! MiRobot a bien réussi ! Niveau suivant : {game_state["level"]}. Repositionnez la récompense pour continuer !' | |
| else: | |
| game_state['message'] = '😕 MiRobot doit être sur la case de la récompense pour recevoir un "BRAVO" !' | |
| faim_display = current_env_state[ETAT_FAIM] * 100 | |
| humeur_display = current_env_state[ETAT_HUMEUR] | |
| return game_state, game_state['puppy_pos'][0], game_state['puppy_pos'][1], faim_display, humeur_display, game_state['message'] | |
| def _draw_grid(px, py, rx, ry, reward_path): | |
| """Dessine la grille de jeu avec le chiot et la récompense.""" | |
| if reward_path is None: | |
| return "<p style='text-align: center; color: red;'>Veuillez télécharger une image de récompense pour afficher la grille.</p>" | |
| grid_html = f"<div style='width: 400px; height: 400px; display: grid; grid-template-columns: repeat({GRID_SIZE}, 1fr); border: 2px solid #333; margin: auto;'>" | |
| puppy_icon = "<span style='font-size: 30px;'>🐶</span>" | |
| reward_src = f"file/{reward_path}" | |
| for y in range(GRID_SIZE): | |
| for x in range(GRID_SIZE): | |
| style = "border: 1px dotted #ccc; display: flex; align-items: center; justify-content: center; position: relative;" | |
| content = "" | |
| is_puppy = (x == px and y == py) | |
| is_reward = (x == rx and y == ry) | |
| if is_puppy: | |
| content = puppy_icon | |
| if is_reward: | |
| if is_puppy: | |
| style += "background-color: #d4edda;" | |
| else: | |
| content += f"<img src='{reward_src}' style='width: 80%; height: 80%; object-fit: contain;'/>" | |
| style += "background-color: #fff3cd;" | |
| grid_html += f"<div style='{style}'>{content}</div>" | |
| grid_html += "</div>" | |
| return grid_html | |
| def update_reward_pos(current_state, reward_x, reward_y, reward_path): | |
| """Met à jour la position de la récompense dans l'état du jeu.""" | |
| game_state = current_state | |
| game_state['reward_pos'] = [reward_x, reward_y] | |
| game_state['reward_asset_path'] = reward_path | |
| game_state['message'] = f"Récompense placée à ({reward_x}, {reward_y}). Donnez un ordre !" | |
| return game_state, game_state['message'] | |
| # ---------------------------------------------------------------------- | |
| # 3. INTERFACE GRADIO | |
| # ---------------------------------------------------------------------- | |
| initial_grid_html = _draw_grid(game_state_initial['puppy_pos'][0], game_state_initial['puppy_pos'][1], | |
| game_state_initial['reward_pos'][0], game_state_initial['reward_pos'][1], | |
| None) | |
| if model is None: | |
| demo = gr.Interface( | |
| fn=lambda: "Le modèle MiRobot n'a pas pu être chargé. Vérifiez les logs ou le REPO_ID.", | |
| inputs=[], | |
| outputs=gr.HTML(label="État du Modèle"), | |
| title="❌ MiRobot Game - Erreur de Chargement", | |
| ) | |
| else: | |
| with gr.Blocks(title="MiRobot - Le Jeu d'Obéissance 🐾") as demo: | |
| game_state_json = gr.JSON(value=game_state_initial, visible=False) | |
| gr.Markdown( | |
| f""" | |
| # MiRobot - Le Jeu d'Obéissance 🐾 | |
| Bienvenue dans la simulation interactive du modèle IA **{REPO_ID}** ! | |
| **Objectif :** Guider MiRobot vers la récompense en donnant des ordres. Attention, sa **Faim** augmente à chaque pas ! | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| # Utilisation de gr.Image pour une meilleure gestion des uploads | |
| reward_image = gr.Image(label="1. Télécharger Image Récompense (Obligatoire)", type="filepath", height=150, width=150) | |
| with gr.Row(): | |
| reward_x = gr.Slider(minimum=0, maximum=GRID_SIZE - 1, step=1, value=0, label="2. Pos. Récompense X") | |
| reward_y = gr.Slider(minimum=0, maximum=GRID_SIZE - 1, step=1, value=0, label="2. Pos. Récompense Y") | |
| grid_display = gr.HTML(label="Plateau de Jeu (10x10)", value=initial_grid_html) | |
| with gr.Column(scale=1): | |
| level_display = gr.Markdown(f"### Niveau Actuel : {INITIAL_LEVEL}") | |
| faim_bar = gr.Slider(minimum=0, maximum=100, value=initial_faim, label="Faim de MiRobot (%)", interactive=False) | |
| humeur_bar = gr.Slider(minimum=-1.0, maximum=1.0, value=initial_humeur, label="Humeur de MiRobot", interactive=False) | |
| command_input = gr.Dropdown( | |
| choices=list(ACTION_MAP_USER.keys()), | |
| label="3. Ordre du Maître", | |
| value="AVANCER" | |
| ) | |
| action_btn = gr.Button("4. Donner l'Ordre 🗣️", variant="primary") | |
| bravo_btn = gr.Button("5. Bravo ! (Récompense)", variant="secondary") | |
| reset_btn = gr.Button("Redémarrer le Jeu 🔄", variant="secondary") | |
| message_output = gr.Markdown(f"**Message :** {game_state_initial['message']}") | |
| # États cachés | |
| puppy_pos_x = gr.State(game_state_initial['puppy_pos'][0]) | |
| puppy_pos_y = gr.State(game_state_initial['puppy_pos'][1]) | |
| faim_state = gr.State(initial_faim) | |
| humeur_state = gr.State(initial_humeur) | |
| # --- ÉVÉNEMENTS --- | |
| reward_x.change( | |
| fn=update_reward_pos, | |
| inputs=[game_state_json, reward_x, reward_y, reward_image], | |
| outputs=[game_state_json, message_output] | |
| ).then( | |
| fn=_draw_grid, | |
| inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image], | |
| outputs=grid_display | |
| ) | |
| reward_y.change( | |
| fn=update_reward_pos, | |
| inputs=[game_state_json, reward_x, reward_y, reward_image], | |
| outputs=[game_state_json, message_output] | |
| ).then( | |
| fn=_draw_grid, | |
| inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image], | |
| outputs=grid_display | |
| ) | |
| action_btn.click( | |
| fn=handle_user_command, | |
| inputs=[game_state_json, command_input, reward_image], | |
| outputs=[game_state_json, command_input, puppy_pos_x, puppy_pos_y, faim_state, humeur_state, message_output] | |
| ).then( | |
| fn=lambda g, f, h: [f"### Niveau Actuel : {g['level']}", f, h], | |
| inputs=[game_state_json, faim_state, humeur_state], | |
| outputs=[level_display, faim_bar, humeur_bar] | |
| ).then( | |
| fn=_draw_grid, | |
| inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image], | |
| outputs=grid_display | |
| ) | |
| bravo_btn.click( | |
| fn=handle_bravo, | |
| inputs=[game_state_json], | |
| outputs=[game_state_json, puppy_pos_x, puppy_pos_y, faim_state, humeur_state, message_output] | |
| ).then( | |
| fn=lambda g, f, h: [f"### Niveau Actuel : {g['level']}", f, h], | |
| inputs=[game_state_json, faim_state, humeur_state], | |
| outputs=[level_display, faim_bar, humeur_bar] | |
| ).then( | |
| fn=_draw_grid, | |
| inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image], | |
| outputs=grid_display | |
| ) | |
| reset_btn.click( | |
| fn=_reset_game, | |
| inputs=[reward_image], | |
| outputs=[game_state_json, puppy_pos_x, puppy_pos_y, reward_x, reward_y, faim_state, humeur_state, message_output] | |
| ).then( | |
| fn=lambda g: f"### Niveau Actuel : {g['level']}", | |
| inputs=[game_state_json], | |
| outputs=level_display | |
| ).then( | |
| fn=_draw_grid, | |
| inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image], | |
| outputs=grid_display | |
| ) | |
| reward_image.change( | |
| fn=_draw_grid, | |
| inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image], | |
| outputs=grid_display | |
| ) | |
| demo.launch() |