Spaces:

Clemylia
/

Dog-game-guide

Sleeping

App Files Files Community

Clemylia commited on Oct 15

Commit

44a2276

verified ·

1 Parent(s): 3965a9e

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -16

app.py CHANGED Viewed

@@ -35,7 +35,7 @@ ACTION_MAP_USER = {
 }
 # ----------------------------------------------------------------------
-# 1. PRÉPARATION DU MODÈLE ET DE L'ENVIRONNEMENT (CORRECTION UNWRAPPED)
 # ----------------------------------------------------------------------
 model = None
@@ -58,8 +58,6 @@ try:
         exec(f.read(), env_globals)
     MiRobotEnv = env_globals['MiRobotEnv']
-    print("Classe MiRobotEnv définie avec succès.")
     # --- 2. Enregistrement de l'environnement Custom ---
     register(
@@ -75,7 +73,7 @@ try:
     env = gym.make(ENV_ID)
     env.reset()
-    # **CORRECTION**: Accéder à l'état via env.unwrapped pour éviter l'erreur OrderEnforcing
     initial_faim = env.unwrapped.state[ETAT_FAIM] * 100
     initial_humeur = env.unwrapped.state[ETAT_HUMEUR]
@@ -88,7 +86,6 @@ except Exception as e:
 # 2. LOGIQUE DU JEU
 # ----------------------------------------------------------------------
-# L'état initial du jeu
 game_state_initial = {
     'level': INITIAL_LEVEL,
     'puppy_pos': [GRID_SIZE // 2, GRID_SIZE // 2],
@@ -101,7 +98,7 @@ def _get_env_state(env_instance):
     """Accès sécurisé à l'état de l'environnement, même avec un wrapper."""
     if env_instance is None:
         return None
-    # Retourne l'état de l'environnement sous-jacent (unwrapped)
     return env_instance.unwrapped.state
 def _reset_game(reward_path):
@@ -135,7 +132,7 @@ def handle_user_command(current_state, command_text, reward_path):
     game_state['reward_asset_path'] = reward_path
-    # Vérification de la faim (défaite)
     faim_actuelle = current_env_state[ETAT_FAIM]
     if faim_actuelle >= FAIM_PENALTY_THRESHOLD:
         game_state['message'] = f'💔 Défaite ! MiRobot a trop faim ({faim_actuelle:.0%}). Jeu réinitialisé.'
@@ -148,7 +145,7 @@ def handle_user_command(current_state, command_text, reward_path):
         current_env_state[CMD_AVANCER] = 0.0
         current_env_state[CMD_TOURNER] = 0.0
-        env.step(0) # Action 0: S'arrêter
         faim_display = current_env_state[ETAT_FAIM] * 100
         humeur_display = current_env_state[ETAT_HUMEUR]
@@ -162,6 +159,10 @@ def handle_user_command(current_state, command_text, reward_path):
     current_env_state[CMD_TOURNER] = 1.0 if command_action_name.startswith("TOURNER") else 0.0
     mirobot_action_id, _ = model.predict(current_env_state, deterministic=True)
     new_obs, reward, terminated, truncated, info = env.step(mirobot_action_id)
@@ -180,7 +181,8 @@ def handle_user_command(current_state, command_text, reward_path):
                 dy = 1 if ry > py else -1
     else:
-        real_action_name = ACTION_MAP_MODEL[mirobot_action_id]
         game_state['message'] = f"😥 MiRobot a désobéi ! Il a fait '{real_action_name}' au lieu de '{command_action_name}'. Récompense RL: {reward:.2f}"
     # Mise à jour de la position
@@ -188,7 +190,7 @@ def handle_user_command(current_state, command_text, reward_path):
     new_y = np.clip(game_state['puppy_pos'][1] + dy, 0, GRID_SIZE - 1)
     game_state['puppy_pos'] = [new_x, new_y]
-    current_env_state = _get_env_state(env) # Relecture après le step
     faim_display = current_env_state[ETAT_FAIM] * 100
     humeur_display = current_env_state[ETAT_HUMEUR]
     return game_state, command_text, new_x, new_y, faim_display, humeur_display, game_state['message']
@@ -208,7 +210,7 @@ def handle_bravo(current_state):
     if px == rx and py == ry:
         game_state['level'] += 1
-        # Modification de l'état via current_env_state
         current_env_state[ETAT_FAIM] = np.clip(current_env_state[ETAT_FAIM] - 0.5, 0.0, 1.0)
         current_env_state[ETAT_HUMEUR] = np.clip(current_env_state[ETAT_HUMEUR] + 0.5, -1.0, 1.0)
@@ -247,7 +249,6 @@ def _draw_grid(px, py, rx, ry, reward_path):
                 if is_puppy:
                     style += "background-color: #d4edda;"
                 else:
-                    # Affichage de l'image
                     content += f"<img src='{reward_src}' style='width: 80%; height: 80%; object-fit: contain;'/>"
                     style += "background-color: #fff3cd;"
@@ -267,7 +268,7 @@ def update_reward_pos(current_state, reward_x, reward_y, reward_path):
 # ----------------------------------------------------------------------
-# 3. INTERFACE GRADIO (Correction gr.Image)
 # ----------------------------------------------------------------------
 initial_grid_html = _draw_grid(game_state_initial['puppy_pos'][0], game_state_initial['puppy_pos'][1],
@@ -290,7 +291,7 @@ else:
         gr.Markdown(
             f"""
             # MiRobot - Le Jeu d'Obéissance 🐾
-            Bienvenue dans la simulation interactive de votre modèle RL **{REPO_ID}** !
             **Objectif :** Guider MiRobot vers la récompense en donnant des ordres. Attention, sa **Faim** augmente à chaque pas !
             """
         )
@@ -298,7 +299,7 @@ else:
         with gr.Row():
             with gr.Column(scale=2):
-                # **CORRECTION**: Utilisation de gr.Image pour une meilleure gestion des uploads
                 reward_image = gr.Image(label="1. Télécharger Image Récompense (Obligatoire)", type="filepath", height=150, width=150)
                 with gr.Row():
@@ -331,7 +332,7 @@ else:
         faim_state = gr.State(initial_faim)
         humeur_state = gr.State(initial_humeur)
-        # --- ÉVÉNEMENTS (Mise à jour des inputs pour 'reward_image') ---
         reward_x.change(
             fn=update_reward_pos,

 }
 # ----------------------------------------------------------------------
+# 1. PRÉPARATION DU MODÈLE ET DE L'ENVIRONNEMENT
 # ----------------------------------------------------------------------
 model = None
         exec(f.read(), env_globals)
     MiRobotEnv = env_globals['MiRobotEnv']
     # --- 2. Enregistrement de l'environnement Custom ---
     register(
     env = gym.make(ENV_ID)
     env.reset()
+    # Lecture initiale sécurisée de l'état
     initial_faim = env.unwrapped.state[ETAT_FAIM] * 100
     initial_humeur = env.unwrapped.state[ETAT_HUMEUR]
 # 2. LOGIQUE DU JEU
 # ----------------------------------------------------------------------
 game_state_initial = {
     'level': INITIAL_LEVEL,
     'puppy_pos': [GRID_SIZE // 2, GRID_SIZE // 2],
     """Accès sécurisé à l'état de l'environnement, même avec un wrapper."""
     if env_instance is None:
         return None
+    # Utilise .unwrapped pour accéder à l'instance de MiRobotEnv
     return env_instance.unwrapped.state
 def _reset_game(reward_path):
     game_state['reward_asset_path'] = reward_path
+    # 2. Vérification de la faim (défaite)
     faim_actuelle = current_env_state[ETAT_FAIM]
     if faim_actuelle >= FAIM_PENALTY_THRESHOLD:
         game_state['message'] = f'💔 Défaite ! MiRobot a trop faim ({faim_actuelle:.0%}). Jeu réinitialisé.'
         current_env_state[CMD_AVANCER] = 0.0
         current_env_state[CMD_TOURNER] = 0.0
+        env.step(0)
         faim_display = current_env_state[ETAT_FAIM] * 100
         humeur_display = current_env_state[ETAT_HUMEUR]
     current_env_state[CMD_TOURNER] = 1.0 if command_action_name.startswith("TOURNER") else 0.0
     mirobot_action_id, _ = model.predict(current_env_state, deterministic=True)
+    # **CORRECTION CRUCIALE**: Convertir le tableau NumPy en entier
+    mirobot_action_id = mirobot_action_id.item()
     new_obs, reward, terminated, truncated, info = env.step(mirobot_action_id)
                 dy = 1 if ry > py else -1
     else:
+        # Cette ligne est maintenant sûre car mirobot_action_id est un entier
+        real_action_name = ACTION_MAP_MODEL[mirobot_action_id]
         game_state['message'] = f"😥 MiRobot a désobéi ! Il a fait '{real_action_name}' au lieu de '{command_action_name}'. Récompense RL: {reward:.2f}"
     # Mise à jour de la position
     new_y = np.clip(game_state['puppy_pos'][1] + dy, 0, GRID_SIZE - 1)
     game_state['puppy_pos'] = [new_x, new_y]
+    current_env_state = _get_env_state(env)
     faim_display = current_env_state[ETAT_FAIM] * 100
     humeur_display = current_env_state[ETAT_HUMEUR]
     return game_state, command_text, new_x, new_y, faim_display, humeur_display, game_state['message']
     if px == rx and py == ry:
         game_state['level'] += 1
+        # Modification de l'état
         current_env_state[ETAT_FAIM] = np.clip(current_env_state[ETAT_FAIM] - 0.5, 0.0, 1.0)
         current_env_state[ETAT_HUMEUR] = np.clip(current_env_state[ETAT_HUMEUR] + 0.5, -1.0, 1.0)
                 if is_puppy:
                     style += "background-color: #d4edda;"
                 else:
                     content += f"<img src='{reward_src}' style='width: 80%; height: 80%; object-fit: contain;'/>"
                     style += "background-color: #fff3cd;"
 # ----------------------------------------------------------------------
+# 3. INTERFACE GRADIO
 # ----------------------------------------------------------------------
 initial_grid_html = _draw_grid(game_state_initial['puppy_pos'][0], game_state_initial['puppy_pos'][1],
         gr.Markdown(
             f"""
             # MiRobot - Le Jeu d'Obéissance 🐾
+            Bienvenue dans la simulation interactive du modèle IA **{REPO_ID}** !
             **Objectif :** Guider MiRobot vers la récompense en donnant des ordres. Attention, sa **Faim** augmente à chaque pas !
             """
         )
         with gr.Row():
             with gr.Column(scale=2):
+                # Utilisation de gr.Image pour une meilleure gestion des uploads
                 reward_image = gr.Image(label="1. Télécharger Image Récompense (Obligatoire)", type="filepath", height=150, width=150)
                 with gr.Row():
         faim_state = gr.State(initial_faim)
         humeur_state = gr.State(initial_humeur)
+        # --- ÉVÉNEMENTS ---
         reward_x.change(
             fn=update_reward_pos,