Spaces:

Clemylia
/

Dog-game-guide

Sleeping

App Files Files Community

Clemylia commited on Oct 15

Commit

3965a9e

verified ·

1 Parent(s): 7b209a1

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -57

app.py CHANGED Viewed

@@ -35,7 +35,7 @@ ACTION_MAP_USER = {
 }
 # ----------------------------------------------------------------------
-# 1. PRÉPARATION DU MODÈLE ET DE L'ENVIRONNEMENT
 # ----------------------------------------------------------------------
 model = None
@@ -53,7 +53,6 @@ try:
     # --- 1. Chargement de la classe MiRobotEnv ---
     env_path = hf_hub_download(repo_id=REPO_ID, filename=ENV_SCRIPT_FILE, local_dir=TEMP_DIR)
-    # INJECTION DE DÉPENDANCES pour que MiRobotEnv.py fonctionne
     env_globals = {'gym': gym, 'np': np, 'spaces': spaces}
     with open(env_path, 'r') as f:
         exec(f.read(), env_globals)
@@ -74,17 +73,17 @@ try:
     model = PPO.load(model_path)
     env = gym.make(ENV_ID)
-    env.reset() # IMPORTANT : Assure la création de self.state
-    initial_faim = env.state[ETAT_FAIM] * 100
-    initial_humeur = env.state[ETAT_HUMEUR]
     print("✅ MiRobot prêt pour l'interface Gradio.")
 except Exception as e:
     print(f"❌ ERREUR CRITIQUE lors du chargement de MiRobot: {e}")
-    # initial_faim/humeur restent à 0.0
 # ----------------------------------------------------------------------
 # 2. LOGIQUE DU JEU
 # ----------------------------------------------------------------------
@@ -98,6 +97,13 @@ game_state_initial = {
     'reward_asset_path': None
 }
 def _reset_game(reward_path):
     """Réinitialise les positions et l'état interne du chiot."""
@@ -105,8 +111,9 @@ def _reset_game(reward_path):
     if env is not None:
         obs, info = env.reset()
-        faim_display = env.state[ETAT_FAIM] * 100
-        humeur_display = env.state[ETAT_HUMEUR]
     else:
         faim_display = 0.0
         humeur_display = 0.0
@@ -116,12 +123,12 @@ def _reset_game(reward_path):
         'message': f'Jeu réinitialisé. Niveau {INITIAL_LEVEL}. Placez la récompense !'
     })
-    # On retourne les composants séparément
     return new_state, new_state['puppy_pos'][0], new_state['puppy_pos'][1], new_state['reward_pos'][0], new_state['reward_pos'][1], faim_display, humeur_display, new_state['message']
 def handle_user_command(current_state, command_text, reward_path):
     """Gère une commande utilisateur et l'action du modèle RL."""
     game_state = current_state
     if model is None or env is None:
         return game_state, command_text, 5, 5, 0, 0, '❌ Erreur: Le modèle MiRobot n\'a pas pu être chargé !'
@@ -129,7 +136,7 @@ def handle_user_command(current_state, command_text, reward_path):
     game_state['reward_asset_path'] = reward_path
     # Vérification de la faim (défaite)
-    faim_actuelle = env.state[ETAT_FAIM]
     if faim_actuelle >= FAIM_PENALTY_THRESHOLD:
         game_state['message'] = f'💔 Défaite ! MiRobot a trop faim ({faim_actuelle:.0%}). Jeu réinitialisé.'
         return _reset_game(reward_path)
@@ -138,22 +145,23 @@ def handle_user_command(current_state, command_text, reward_path):
     if command_upper not in ACTION_MAP_USER:
         game_state['message'] = f"🤔 MiRobot n'a pas compris l'ordre '{command_text}'. Sa faim augmente..."
-        env.state[CMD_AVANCER] = 0.0
-        env.state[CMD_TOURNER] = 0.0
         env.step(0) # Action 0: S'arrêter
-        faim_display = env.state[ETAT_FAIM] * 100
-        humeur_display = env.state[ETAT_HUMEUR]
         return game_state, command_text, game_state['puppy_pos'][0], game_state['puppy_pos'][1], faim_display, humeur_display, game_state['message']
     # 4. Exécution de la décision du Modèle
     command_action_name = command_upper
-    env.state[CMD_AVANCER] = 1.0 if command_action_name == "AVANCER" else 0.0
-    env.state[CMD_TOURNER] = 1.0 if command_action_name.startswith("TOURNER") else 0.0
-    mirobot_action_id, _ = model.predict(env.state, deterministic=True)
     new_obs, reward, terminated, truncated, info = env.step(mirobot_action_id)
@@ -166,7 +174,6 @@ def handle_user_command(current_state, command_text, reward_path):
             rx, ry = game_state['reward_pos']
             px, py = game_state['puppy_pos']
-            # Déplacement d'une unité vers la récompense
             if abs(rx - px) > abs(ry - py):
                 dx = 1 if rx > px else -1
             elif abs(ry - py) > 0:
@@ -181,15 +188,17 @@ def handle_user_command(current_state, command_text, reward_path):
     new_y = np.clip(game_state['puppy_pos'][1] + dy, 0, GRID_SIZE - 1)
     game_state['puppy_pos'] = [new_x, new_y]
-    faim_display = env.state[ETAT_FAIM] * 100
-    humeur_display = env.state[ETAT_HUMEUR]
     return game_state, command_text, new_x, new_y, faim_display, humeur_display, game_state['message']
 def handle_bravo(current_state):
     """Gère l'événement de récompense."""
     game_state = current_state
     if env is None:
         return game_state, game_state['puppy_pos'][0], game_state['puppy_pos'][1], 0, 0, '❌ Erreur: Modèle non chargé.'
@@ -199,23 +208,21 @@ def handle_bravo(current_state):
     if px == rx and py == ry:
         game_state['level'] += 1
-        env.state[ETAT_FAIM] = np.clip(env.state[ETAT_FAIM] - 0.5, 0.0, 1.0)
-        env.state[ETAT_HUMEUR] = np.clip(env.state[ETAT_HUMEUR] + 0.5, -1.0, 1.0)
         game_state['message'] = f'🥳 BRAVO ! MiRobot a bien réussi ! Niveau suivant : {game_state["level"]}. Repositionnez la récompense pour continuer !'
     else:
         game_state['message'] = '😕 MiRobot doit être sur la case de la récompense pour recevoir un "BRAVO" !'
-    faim_display = env.state[ETAT_FAIM] * 100
-    humeur_display = env.state[ETAT_HUMEUR]
     return game_state, game_state['puppy_pos'][0], game_state['puppy_pos'][1], faim_display, humeur_display, game_state['message']
 def _draw_grid(px, py, rx, ry, reward_path):
-    """
-    Dessine la grille de jeu avec le chiot et la récompense.
-    REMARQUE : Prend maintenant les coordonnées séparément.
-    """
     if reward_path is None:
         return "<p style='text-align: center; color: red;'>Veuillez télécharger une image de récompense pour afficher la grille.</p>"
@@ -240,6 +247,7 @@ def _draw_grid(px, py, rx, ry, reward_path):
                 if is_puppy:
                     style += "background-color: #d4edda;"
                 else:
                     content += f"<img src='{reward_src}' style='width: 80%; height: 80%; object-fit: contain;'/>"
                     style += "background-color: #fff3cd;"
@@ -259,10 +267,9 @@ def update_reward_pos(current_state, reward_x, reward_y, reward_path):
 # ----------------------------------------------------------------------
-# 3. INTERFACE GRADIO
 # ----------------------------------------------------------------------
-# Assurer une valeur initiale pour l'affichage de la grille
 initial_grid_html = _draw_grid(game_state_initial['puppy_pos'][0], game_state_initial['puppy_pos'][1],
                                game_state_initial['reward_pos'][0], game_state_initial['reward_pos'][1],
                                None)
@@ -291,7 +298,8 @@ else:
         with gr.Row():
             with gr.Column(scale=2):
-                reward_file = gr.File(label="1. Télécharger Image Récompense (Obligatoire)", type="filepath")
                 with gr.Row():
                     reward_x = gr.Slider(minimum=0, maximum=GRID_SIZE - 1, step=1, value=0, label="2. Pos. Récompense X")
@@ -302,7 +310,6 @@ else:
             with gr.Column(scale=1):
                 level_display = gr.Markdown(f"### Niveau Actuel : {INITIAL_LEVEL}")
-                # Utilisation des valeurs initiales sûres
                 faim_bar = gr.Slider(minimum=0, maximum=100, value=initial_faim, label="Faim de MiRobot (%)", interactive=False)
                 humeur_bar = gr.Slider(minimum=-1.0, maximum=1.0, value=initial_humeur, label="Humeur de MiRobot", interactive=False)
@@ -324,48 +331,42 @@ else:
         faim_state = gr.State(initial_faim)
         humeur_state = gr.State(initial_humeur)
-        # --- ÉVÉNEMENTS ---
-        # 1. Mise à jour de la position de la récompense
         reward_x.change(
             fn=update_reward_pos,
-            inputs=[game_state_json, reward_x, reward_y, reward_file],
             outputs=[game_state_json, message_output]
         ).then(
-             # CORRECTION MAJEURE: Passer puppy_pos_x et puppy_pos_y séparément
             fn=_draw_grid,
-            inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_file],
             outputs=grid_display
         )
         reward_y.change(
             fn=update_reward_pos,
-            inputs=[game_state_json, reward_x, reward_y, reward_file],
             outputs=[game_state_json, message_output]
         ).then(
-            # CORRECTION MAJEURE: Passer puppy_pos_x et puppy_pos_y séparément
             fn=_draw_grid,
-            inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_file],
             outputs=grid_display
         )
-        # 2. Gestion de l'Action (Bouton "Donner l'Ordre")
         action_btn.click(
             fn=handle_user_command,
-            inputs=[game_state_json, command_input, reward_file],
             outputs=[game_state_json, command_input, puppy_pos_x, puppy_pos_y, faim_state, humeur_state, message_output]
         ).then(
             fn=lambda g, f, h: [f"### Niveau Actuel : {g['level']}", f, h],
             inputs=[game_state_json, faim_state, humeur_state],
             outputs=[level_display, faim_bar, humeur_bar]
         ).then(
-            # Mise à jour de la grille après le mouvement du chiot
             fn=_draw_grid,
-            inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_file],
             outputs=grid_display
         )
-        # 3. Gestion du "Bravo"
         bravo_btn.click(
             fn=handle_bravo,
             inputs=[game_state_json],
@@ -375,33 +376,28 @@ else:
             inputs=[game_state_json, faim_state, humeur_state],
             outputs=[level_display, faim_bar, humeur_bar]
         ).then(
-            # Mise à jour de la grille (même si la position ne change pas, les couleurs peuvent)
             fn=_draw_grid,
-            inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_file],
             outputs=grid_display
         )
-        # 4. Réinitialisation du Jeu
         reset_btn.click(
             fn=_reset_game,
-            inputs=[reward_file],
             outputs=[game_state_json, puppy_pos_x, puppy_pos_y, reward_x, reward_y, faim_state, humeur_state, message_output]
         ).then(
             fn=lambda g: f"### Niveau Actuel : {g['level']}",
             inputs=[game_state_json],
             outputs=level_display
         ).then(
-             # Mise à jour de la grille après réinitialisation
             fn=_draw_grid,
-            inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_file],
             outputs=grid_display
         )
-        # Mise à jour de la grille lorsque le fichier de récompense change
-        reward_file.change(
-             # CORRECTION MAJEURE: on passe les états de position (x, y) au lieu de les reconstruire
             fn=_draw_grid,
-            inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_file],
             outputs=grid_display
         )

 }
 # ----------------------------------------------------------------------
+# 1. PRÉPARATION DU MODÈLE ET DE L'ENVIRONNEMENT (CORRECTION UNWRAPPED)
 # ----------------------------------------------------------------------
 model = None
     # --- 1. Chargement de la classe MiRobotEnv ---
     env_path = hf_hub_download(repo_id=REPO_ID, filename=ENV_SCRIPT_FILE, local_dir=TEMP_DIR)
     env_globals = {'gym': gym, 'np': np, 'spaces': spaces}
     with open(env_path, 'r') as f:
         exec(f.read(), env_globals)
     model = PPO.load(model_path)
     env = gym.make(ENV_ID)
+    env.reset()
+    # **CORRECTION**: Accéder à l'état via env.unwrapped pour éviter l'erreur OrderEnforcing
+    initial_faim = env.unwrapped.state[ETAT_FAIM] * 100
+    initial_humeur = env.unwrapped.state[ETAT_HUMEUR]
     print("✅ MiRobot prêt pour l'interface Gradio.")
 except Exception as e:
     print(f"❌ ERREUR CRITIQUE lors du chargement de MiRobot: {e}")
 # ----------------------------------------------------------------------
 # 2. LOGIQUE DU JEU
 # ----------------------------------------------------------------------
     'reward_asset_path': None
 }
+def _get_env_state(env_instance):
+    """Accès sécurisé à l'état de l'environnement, même avec un wrapper."""
+    if env_instance is None:
+        return None
+    # Retourne l'état de l'environnement sous-jacent (unwrapped)
+    return env_instance.unwrapped.state
 def _reset_game(reward_path):
     """Réinitialise les positions et l'état interne du chiot."""
     if env is not None:
         obs, info = env.reset()
+        current_state = _get_env_state(env)
+        faim_display = current_state[ETAT_FAIM] * 100
+        humeur_display = current_state[ETAT_HUMEUR]
     else:
         faim_display = 0.0
         humeur_display = 0.0
         'message': f'Jeu réinitialisé. Niveau {INITIAL_LEVEL}. Placez la récompense !'
     })
     return new_state, new_state['puppy_pos'][0], new_state['puppy_pos'][1], new_state['reward_pos'][0], new_state['reward_pos'][1], faim_display, humeur_display, new_state['message']
 def handle_user_command(current_state, command_text, reward_path):
     """Gère une commande utilisateur et l'action du modèle RL."""
     game_state = current_state
+    current_env_state = _get_env_state(env)
     if model is None or env is None:
         return game_state, command_text, 5, 5, 0, 0, '❌ Erreur: Le modèle MiRobot n\'a pas pu être chargé !'
     game_state['reward_asset_path'] = reward_path
     # Vérification de la faim (défaite)
+    faim_actuelle = current_env_state[ETAT_FAIM]
     if faim_actuelle >= FAIM_PENALTY_THRESHOLD:
         game_state['message'] = f'💔 Défaite ! MiRobot a trop faim ({faim_actuelle:.0%}). Jeu réinitialisé.'
         return _reset_game(reward_path)
     if command_upper not in ACTION_MAP_USER:
         game_state['message'] = f"🤔 MiRobot n'a pas compris l'ordre '{command_text}'. Sa faim augmente..."
+        current_env_state[CMD_AVANCER] = 0.0
+        current_env_state[CMD_TOURNER] = 0.0
         env.step(0) # Action 0: S'arrêter
+        faim_display = current_env_state[ETAT_FAIM] * 100
+        humeur_display = current_env_state[ETAT_HUMEUR]
         return game_state, command_text, game_state['puppy_pos'][0], game_state['puppy_pos'][1], faim_display, humeur_display, game_state['message']
     # 4. Exécution de la décision du Modèle
     command_action_name = command_upper
+    current_env_state[CMD_AVANCER] = 1.0 if command_action_name == "AVANCER" else 0.0
+    current_env_state[CMD_TOURNER] = 1.0 if command_action_name.startswith("TOURNER") else 0.0
+    mirobot_action_id, _ = model.predict(current_env_state, deterministic=True)
     new_obs, reward, terminated, truncated, info = env.step(mirobot_action_id)
             rx, ry = game_state['reward_pos']
             px, py = game_state['puppy_pos']
             if abs(rx - px) > abs(ry - py):
                 dx = 1 if rx > px else -1
             elif abs(ry - py) > 0:
     new_y = np.clip(game_state['puppy_pos'][1] + dy, 0, GRID_SIZE - 1)
     game_state['puppy_pos'] = [new_x, new_y]
+    current_env_state = _get_env_state(env) # Relecture après le step
+    faim_display = current_env_state[ETAT_FAIM] * 100
+    humeur_display = current_env_state[ETAT_HUMEUR]
     return game_state, command_text, new_x, new_y, faim_display, humeur_display, game_state['message']
 def handle_bravo(current_state):
     """Gère l'événement de récompense."""
     game_state = current_state
+    current_env_state = _get_env_state(env)
     if env is None:
         return game_state, game_state['puppy_pos'][0], game_state['puppy_pos'][1], 0, 0, '❌ Erreur: Modèle non chargé.'
     if px == rx and py == ry:
         game_state['level'] += 1
+        # Modification de l'état via current_env_state
+        current_env_state[ETAT_FAIM] = np.clip(current_env_state[ETAT_FAIM] - 0.5, 0.0, 1.0)
+        current_env_state[ETAT_HUMEUR] = np.clip(current_env_state[ETAT_HUMEUR] + 0.5, -1.0, 1.0)
         game_state['message'] = f'🥳 BRAVO ! MiRobot a bien réussi ! Niveau suivant : {game_state["level"]}. Repositionnez la récompense pour continuer !'
     else:
         game_state['message'] = '😕 MiRobot doit être sur la case de la récompense pour recevoir un "BRAVO" !'
+    faim_display = current_env_state[ETAT_FAIM] * 100
+    humeur_display = current_env_state[ETAT_HUMEUR]
     return game_state, game_state['puppy_pos'][0], game_state['puppy_pos'][1], faim_display, humeur_display, game_state['message']
 def _draw_grid(px, py, rx, ry, reward_path):
+    """Dessine la grille de jeu avec le chiot et la récompense."""
     if reward_path is None:
         return "<p style='text-align: center; color: red;'>Veuillez télécharger une image de récompense pour afficher la grille.</p>"
                 if is_puppy:
                     style += "background-color: #d4edda;"
                 else:
+                    # Affichage de l'image
                     content += f"<img src='{reward_src}' style='width: 80%; height: 80%; object-fit: contain;'/>"
                     style += "background-color: #fff3cd;"
 # ----------------------------------------------------------------------
+# 3. INTERFACE GRADIO (Correction gr.Image)
 # ----------------------------------------------------------------------
 initial_grid_html = _draw_grid(game_state_initial['puppy_pos'][0], game_state_initial['puppy_pos'][1],
                                game_state_initial['reward_pos'][0], game_state_initial['reward_pos'][1],
                                None)
         with gr.Row():
             with gr.Column(scale=2):
+                # **CORRECTION**: Utilisation de gr.Image pour une meilleure gestion des uploads
+                reward_image = gr.Image(label="1. Télécharger Image Récompense (Obligatoire)", type="filepath", height=150, width=150)
                 with gr.Row():
                     reward_x = gr.Slider(minimum=0, maximum=GRID_SIZE - 1, step=1, value=0, label="2. Pos. Récompense X")
             with gr.Column(scale=1):
                 level_display = gr.Markdown(f"### Niveau Actuel : {INITIAL_LEVEL}")
                 faim_bar = gr.Slider(minimum=0, maximum=100, value=initial_faim, label="Faim de MiRobot (%)", interactive=False)
                 humeur_bar = gr.Slider(minimum=-1.0, maximum=1.0, value=initial_humeur, label="Humeur de MiRobot", interactive=False)
         faim_state = gr.State(initial_faim)
         humeur_state = gr.State(initial_humeur)
+        # --- ÉVÉNEMENTS (Mise à jour des inputs pour 'reward_image') ---
         reward_x.change(
             fn=update_reward_pos,
+            inputs=[game_state_json, reward_x, reward_y, reward_image],
             outputs=[game_state_json, message_output]
         ).then(
             fn=_draw_grid,
+            inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image],
             outputs=grid_display
         )
         reward_y.change(
             fn=update_reward_pos,
+            inputs=[game_state_json, reward_x, reward_y, reward_image],
             outputs=[game_state_json, message_output]
         ).then(
             fn=_draw_grid,
+            inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image],
             outputs=grid_display
         )
         action_btn.click(
             fn=handle_user_command,
+            inputs=[game_state_json, command_input, reward_image],
             outputs=[game_state_json, command_input, puppy_pos_x, puppy_pos_y, faim_state, humeur_state, message_output]
         ).then(
             fn=lambda g, f, h: [f"### Niveau Actuel : {g['level']}", f, h],
             inputs=[game_state_json, faim_state, humeur_state],
             outputs=[level_display, faim_bar, humeur_bar]
         ).then(
             fn=_draw_grid,
+            inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image],
             outputs=grid_display
         )
         bravo_btn.click(
             fn=handle_bravo,
             inputs=[game_state_json],
             inputs=[game_state_json, faim_state, humeur_state],
             outputs=[level_display, faim_bar, humeur_bar]
         ).then(
             fn=_draw_grid,
+            inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image],
             outputs=grid_display
         )
         reset_btn.click(
             fn=_reset_game,
+            inputs=[reward_image],
             outputs=[game_state_json, puppy_pos_x, puppy_pos_y, reward_x, reward_y, faim_state, humeur_state, message_output]
         ).then(
             fn=lambda g: f"### Niveau Actuel : {g['level']}",
             inputs=[game_state_json],
             outputs=level_display
         ).then(
             fn=_draw_grid,
+            inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image],
             outputs=grid_display
         )
+        reward_image.change(
             fn=_draw_grid,
+            inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image],
             outputs=grid_display
         )