import gradio as gr import numpy as np import os import gymnasium as gym from gymnasium import register, spaces from stable_baselines3 import PPO from huggingface_hub import hf_hub_download # --- PARAMÈTRES DU DÉPÔT HUGGING FACE --- REPO_ID = "Clemylia/MiRobot" MODEL_FILE = "mirobot_final_model.zip" ENV_SCRIPT_FILE = "MiRobotEnv.py" ENV_ID = 'MiRobot-v0' # --- CONSTANTES DE JEU --- GRID_SIZE = 10 FAIM_PENALTY_THRESHOLD = 0.9 INITIAL_LEVEL = 1 # --- INDICES D'ÉTAT --- CMD_AVANCER = 0 CMD_TOURNER = 1 ETAT_FAIM = 2 ETAT_SOMMEIL = 3 ETAT_HUMEUR = 4 # Map des actions ACTION_MAP_MODEL = {0: "S'Arrêter", 1: "Avancer", 2: "Tourner G", 3: "Tourner D"} ACTION_MAP_USER = { "AVANCER": 1, "TOURNER À GAUCHE": 2, "TOURNER À DROITE": 3, } # ---------------------------------------------------------------------- # 1. PRÉPARATION DU MODÈLE ET DE L'ENVIRONNEMENT # ---------------------------------------------------------------------- model = None env = None MiRobotEnv = None initial_faim = 0.0 initial_humeur = 0.0 try: print("Téléchargement des fichiers MiRobot...") TEMP_DIR = "./mirobot_assets" os.makedirs(TEMP_DIR, exist_ok=True) # --- 1. Chargement de la classe MiRobotEnv --- env_path = hf_hub_download(repo_id=REPO_ID, filename=ENV_SCRIPT_FILE, local_dir=TEMP_DIR) env_globals = {'gym': gym, 'np': np, 'spaces': spaces} with open(env_path, 'r') as f: exec(f.read(), env_globals) MiRobotEnv = env_globals['MiRobotEnv'] # --- 2. Enregistrement de l'environnement Custom --- register( id=ENV_ID, entry_point=MiRobotEnv, ) # --- 3. Chargement du Modèle et de l'Environnement --- model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILE, local_dir=TEMP_DIR) model = PPO.load(model_path) env = gym.make(ENV_ID) env.reset() # Lecture initiale sécurisée de l'état initial_faim = env.unwrapped.state[ETAT_FAIM] * 100 initial_humeur = env.unwrapped.state[ETAT_HUMEUR] print("✅ MiRobot prêt pour l'interface Gradio.") except Exception as e: print(f"❌ ERREUR CRITIQUE lors du chargement de MiRobot: {e}") # ---------------------------------------------------------------------- # 2. LOGIQUE DU JEU # ---------------------------------------------------------------------- game_state_initial = { 'level': INITIAL_LEVEL, 'puppy_pos': [GRID_SIZE // 2, GRID_SIZE // 2], 'reward_pos': [0, 0], 'message': 'Bienvenue ! Téléchargez une récompense et commencez !', 'reward_asset_path': None } def _get_env_state(env_instance): """Accès sécurisé à l'état de l'environnement, même avec un wrapper.""" if env_instance is None: return None # Utilise .unwrapped pour accéder à l'instance de MiRobotEnv return env_instance.unwrapped.state def _reset_game(reward_path): """Réinitialise les positions et l'état interne du chiot.""" new_state = game_state_initial.copy() if env is not None: obs, info = env.reset() current_state = _get_env_state(env) faim_display = current_state[ETAT_FAIM] * 100 humeur_display = current_state[ETAT_HUMEUR] else: faim_display = 0.0 humeur_display = 0.0 new_state.update({ 'reward_asset_path': reward_path, 'message': f'Jeu réinitialisé. Niveau {INITIAL_LEVEL}. Placez la récompense !' }) return new_state, new_state['puppy_pos'][0], new_state['puppy_pos'][1], new_state['reward_pos'][0], new_state['reward_pos'][1], faim_display, humeur_display, new_state['message'] def handle_user_command(current_state, command_text, reward_path): """Gère une commande utilisateur et l'action du modèle RL.""" game_state = current_state current_env_state = _get_env_state(env) if model is None or env is None: return game_state, command_text, 5, 5, 0, 0, '❌ Erreur: Le modèle MiRobot n\'a pas pu être chargé !' game_state['reward_asset_path'] = reward_path # 2. Vérification de la faim (défaite) faim_actuelle = current_env_state[ETAT_FAIM] if faim_actuelle >= FAIM_PENALTY_THRESHOLD: game_state['message'] = f'💔 Défaite ! MiRobot a trop faim ({faim_actuelle:.0%}). Jeu réinitialisé.' return _reset_game(reward_path) command_upper = command_text.upper() if command_upper not in ACTION_MAP_USER: game_state['message'] = f"🤔 MiRobot n'a pas compris l'ordre '{command_text}'. Sa faim augmente..." current_env_state[CMD_AVANCER] = 0.0 current_env_state[CMD_TOURNER] = 0.0 env.step(0) faim_display = current_env_state[ETAT_FAIM] * 100 humeur_display = current_env_state[ETAT_HUMEUR] return game_state, command_text, game_state['puppy_pos'][0], game_state['puppy_pos'][1], faim_display, humeur_display, game_state['message'] # 4. Exécution de la décision du Modèle command_action_name = command_upper current_env_state[CMD_AVANCER] = 1.0 if command_action_name == "AVANCER" else 0.0 current_env_state[CMD_TOURNER] = 1.0 if command_action_name.startswith("TOURNER") else 0.0 mirobot_action_id, _ = model.predict(current_env_state, deterministic=True) # **CORRECTION CRUCIALE**: Convertir le tableau NumPy en entier mirobot_action_id = mirobot_action_id.item() new_obs, reward, terminated, truncated, info = env.step(mirobot_action_id) dx, dy = 0, 0 if mirobot_action_id == ACTION_MAP_USER[command_action_name]: game_state['message'] = f"👏 MiRobot a obéi à '{command_action_name}'. Récompense RL: {reward:.2f}" if command_action_name == "AVANCER": rx, ry = game_state['reward_pos'] px, py = game_state['puppy_pos'] if abs(rx - px) > abs(ry - py): dx = 1 if rx > px else -1 elif abs(ry - py) > 0: dy = 1 if ry > py else -1 else: # Cette ligne est maintenant sûre car mirobot_action_id est un entier real_action_name = ACTION_MAP_MODEL[mirobot_action_id] game_state['message'] = f"😥 MiRobot a désobéi ! Il a fait '{real_action_name}' au lieu de '{command_action_name}'. Récompense RL: {reward:.2f}" # Mise à jour de la position new_x = np.clip(game_state['puppy_pos'][0] + dx, 0, GRID_SIZE - 1) new_y = np.clip(game_state['puppy_pos'][1] + dy, 0, GRID_SIZE - 1) game_state['puppy_pos'] = [new_x, new_y] current_env_state = _get_env_state(env) faim_display = current_env_state[ETAT_FAIM] * 100 humeur_display = current_env_state[ETAT_HUMEUR] return game_state, command_text, new_x, new_y, faim_display, humeur_display, game_state['message'] def handle_bravo(current_state): """Gère l'événement de récompense.""" game_state = current_state current_env_state = _get_env_state(env) if env is None: return game_state, game_state['puppy_pos'][0], game_state['puppy_pos'][1], 0, 0, '❌ Erreur: Modèle non chargé.' px, py = game_state['puppy_pos'] rx, ry = game_state['reward_pos'] if px == rx and py == ry: game_state['level'] += 1 # Modification de l'état current_env_state[ETAT_FAIM] = np.clip(current_env_state[ETAT_FAIM] - 0.5, 0.0, 1.0) current_env_state[ETAT_HUMEUR] = np.clip(current_env_state[ETAT_HUMEUR] + 0.5, -1.0, 1.0) game_state['message'] = f'🥳 BRAVO ! MiRobot a bien réussi ! Niveau suivant : {game_state["level"]}. Repositionnez la récompense pour continuer !' else: game_state['message'] = '😕 MiRobot doit être sur la case de la récompense pour recevoir un "BRAVO" !' faim_display = current_env_state[ETAT_FAIM] * 100 humeur_display = current_env_state[ETAT_HUMEUR] return game_state, game_state['puppy_pos'][0], game_state['puppy_pos'][1], faim_display, humeur_display, game_state['message'] def _draw_grid(px, py, rx, ry, reward_path): """Dessine la grille de jeu avec le chiot et la récompense.""" if reward_path is None: return "

Veuillez télécharger une image de récompense pour afficher la grille.

" grid_html = f"
" puppy_icon = "🐶" reward_src = f"file/{reward_path}" for y in range(GRID_SIZE): for x in range(GRID_SIZE): style = "border: 1px dotted #ccc; display: flex; align-items: center; justify-content: center; position: relative;" content = "" is_puppy = (x == px and y == py) is_reward = (x == rx and y == ry) if is_puppy: content = puppy_icon if is_reward: if is_puppy: style += "background-color: #d4edda;" else: content += f"" style += "background-color: #fff3cd;" grid_html += f"
{content}
" grid_html += "
" return grid_html def update_reward_pos(current_state, reward_x, reward_y, reward_path): """Met à jour la position de la récompense dans l'état du jeu.""" game_state = current_state game_state['reward_pos'] = [reward_x, reward_y] game_state['reward_asset_path'] = reward_path game_state['message'] = f"Récompense placée à ({reward_x}, {reward_y}). Donnez un ordre !" return game_state, game_state['message'] # ---------------------------------------------------------------------- # 3. INTERFACE GRADIO # ---------------------------------------------------------------------- initial_grid_html = _draw_grid(game_state_initial['puppy_pos'][0], game_state_initial['puppy_pos'][1], game_state_initial['reward_pos'][0], game_state_initial['reward_pos'][1], None) if model is None: demo = gr.Interface( fn=lambda: "Le modèle MiRobot n'a pas pu être chargé. Vérifiez les logs ou le REPO_ID.", inputs=[], outputs=gr.HTML(label="État du Modèle"), title="❌ MiRobot Game - Erreur de Chargement", ) else: with gr.Blocks(title="MiRobot - Le Jeu d'Obéissance 🐾") as demo: game_state_json = gr.JSON(value=game_state_initial, visible=False) gr.Markdown( f""" # MiRobot - Le Jeu d'Obéissance 🐾 Bienvenue dans la simulation interactive du modèle IA **{REPO_ID}** ! **Objectif :** Guider MiRobot vers la récompense en donnant des ordres. Attention, sa **Faim** augmente à chaque pas ! """ ) with gr.Row(): with gr.Column(scale=2): # Utilisation de gr.Image pour une meilleure gestion des uploads reward_image = gr.Image(label="1. Télécharger Image Récompense (Obligatoire)", type="filepath", height=150, width=150) with gr.Row(): reward_x = gr.Slider(minimum=0, maximum=GRID_SIZE - 1, step=1, value=0, label="2. Pos. Récompense X") reward_y = gr.Slider(minimum=0, maximum=GRID_SIZE - 1, step=1, value=0, label="2. Pos. Récompense Y") grid_display = gr.HTML(label="Plateau de Jeu (10x10)", value=initial_grid_html) with gr.Column(scale=1): level_display = gr.Markdown(f"### Niveau Actuel : {INITIAL_LEVEL}") faim_bar = gr.Slider(minimum=0, maximum=100, value=initial_faim, label="Faim de MiRobot (%)", interactive=False) humeur_bar = gr.Slider(minimum=-1.0, maximum=1.0, value=initial_humeur, label="Humeur de MiRobot", interactive=False) command_input = gr.Dropdown( choices=list(ACTION_MAP_USER.keys()), label="3. Ordre du Maître", value="AVANCER" ) action_btn = gr.Button("4. Donner l'Ordre 🗣️", variant="primary") bravo_btn = gr.Button("5. Bravo ! (Récompense)", variant="secondary") reset_btn = gr.Button("Redémarrer le Jeu 🔄", variant="secondary") message_output = gr.Markdown(f"**Message :** {game_state_initial['message']}") # États cachés puppy_pos_x = gr.State(game_state_initial['puppy_pos'][0]) puppy_pos_y = gr.State(game_state_initial['puppy_pos'][1]) faim_state = gr.State(initial_faim) humeur_state = gr.State(initial_humeur) # --- ÉVÉNEMENTS --- reward_x.change( fn=update_reward_pos, inputs=[game_state_json, reward_x, reward_y, reward_image], outputs=[game_state_json, message_output] ).then( fn=_draw_grid, inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image], outputs=grid_display ) reward_y.change( fn=update_reward_pos, inputs=[game_state_json, reward_x, reward_y, reward_image], outputs=[game_state_json, message_output] ).then( fn=_draw_grid, inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image], outputs=grid_display ) action_btn.click( fn=handle_user_command, inputs=[game_state_json, command_input, reward_image], outputs=[game_state_json, command_input, puppy_pos_x, puppy_pos_y, faim_state, humeur_state, message_output] ).then( fn=lambda g, f, h: [f"### Niveau Actuel : {g['level']}", f, h], inputs=[game_state_json, faim_state, humeur_state], outputs=[level_display, faim_bar, humeur_bar] ).then( fn=_draw_grid, inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image], outputs=grid_display ) bravo_btn.click( fn=handle_bravo, inputs=[game_state_json], outputs=[game_state_json, puppy_pos_x, puppy_pos_y, faim_state, humeur_state, message_output] ).then( fn=lambda g, f, h: [f"### Niveau Actuel : {g['level']}", f, h], inputs=[game_state_json, faim_state, humeur_state], outputs=[level_display, faim_bar, humeur_bar] ).then( fn=_draw_grid, inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image], outputs=grid_display ) reset_btn.click( fn=_reset_game, inputs=[reward_image], outputs=[game_state_json, puppy_pos_x, puppy_pos_y, reward_x, reward_y, faim_state, humeur_state, message_output] ).then( fn=lambda g: f"### Niveau Actuel : {g['level']}", inputs=[game_state_json], outputs=level_display ).then( fn=_draw_grid, inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image], outputs=grid_display ) reward_image.change( fn=_draw_grid, inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image], outputs=grid_display ) demo.launch()