Dog-game-guide / app.py
Clemylia's picture
Update app.py
44a2276 verified
raw
history blame
15.8 kB
import gradio as gr
import numpy as np
import os
# Bibliothèques d'IA et de RL
import gymnasium as gym
from gymnasium import register, spaces
from stable_baselines3 import PPO
from huggingface_hub import hf_hub_download
# --- PARAMÈTRES DU DÉPÔT HUGGING FACE ---
REPO_ID = "Clemylia/MiRobot"
MODEL_FILE = "mirobot_final_model.zip"
ENV_SCRIPT_FILE = "MiRobotEnv.py"
ENV_ID = 'MiRobot-v0'
# --- CONSTANTES DE JEU ---
GRID_SIZE = 10
FAIM_PENALTY_THRESHOLD = 0.9
INITIAL_LEVEL = 1
# --- INDICES D'ÉTAT ---
CMD_AVANCER = 0
CMD_TOURNER = 1
ETAT_FAIM = 2
ETAT_SOMMEIL = 3
ETAT_HUMEUR = 4
# Map des actions
ACTION_MAP_MODEL = {0: "S'Arrêter", 1: "Avancer", 2: "Tourner G", 3: "Tourner D"}
ACTION_MAP_USER = {
"AVANCER": 1,
"TOURNER À GAUCHE": 2,
"TOURNER À DROITE": 3,
}
# ----------------------------------------------------------------------
# 1. PRÉPARATION DU MODÈLE ET DE L'ENVIRONNEMENT
# ----------------------------------------------------------------------
model = None
env = None
MiRobotEnv = None
initial_faim = 0.0
initial_humeur = 0.0
try:
print("Téléchargement des fichiers MiRobot...")
TEMP_DIR = "./mirobot_assets"
os.makedirs(TEMP_DIR, exist_ok=True)
# --- 1. Chargement de la classe MiRobotEnv ---
env_path = hf_hub_download(repo_id=REPO_ID, filename=ENV_SCRIPT_FILE, local_dir=TEMP_DIR)
env_globals = {'gym': gym, 'np': np, 'spaces': spaces}
with open(env_path, 'r') as f:
exec(f.read(), env_globals)
MiRobotEnv = env_globals['MiRobotEnv']
# --- 2. Enregistrement de l'environnement Custom ---
register(
id=ENV_ID,
entry_point=MiRobotEnv,
)
# --- 3. Chargement du Modèle et de l'Environnement ---
model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILE, local_dir=TEMP_DIR)
model = PPO.load(model_path)
env = gym.make(ENV_ID)
env.reset()
# Lecture initiale sécurisée de l'état
initial_faim = env.unwrapped.state[ETAT_FAIM] * 100
initial_humeur = env.unwrapped.state[ETAT_HUMEUR]
print("✅ MiRobot prêt pour l'interface Gradio.")
except Exception as e:
print(f"❌ ERREUR CRITIQUE lors du chargement de MiRobot: {e}")
# ----------------------------------------------------------------------
# 2. LOGIQUE DU JEU
# ----------------------------------------------------------------------
game_state_initial = {
'level': INITIAL_LEVEL,
'puppy_pos': [GRID_SIZE // 2, GRID_SIZE // 2],
'reward_pos': [0, 0],
'message': 'Bienvenue ! Téléchargez une récompense et commencez !',
'reward_asset_path': None
}
def _get_env_state(env_instance):
"""Accès sécurisé à l'état de l'environnement, même avec un wrapper."""
if env_instance is None:
return None
# Utilise .unwrapped pour accéder à l'instance de MiRobotEnv
return env_instance.unwrapped.state
def _reset_game(reward_path):
"""Réinitialise les positions et l'état interne du chiot."""
new_state = game_state_initial.copy()
if env is not None:
obs, info = env.reset()
current_state = _get_env_state(env)
faim_display = current_state[ETAT_FAIM] * 100
humeur_display = current_state[ETAT_HUMEUR]
else:
faim_display = 0.0
humeur_display = 0.0
new_state.update({
'reward_asset_path': reward_path,
'message': f'Jeu réinitialisé. Niveau {INITIAL_LEVEL}. Placez la récompense !'
})
return new_state, new_state['puppy_pos'][0], new_state['puppy_pos'][1], new_state['reward_pos'][0], new_state['reward_pos'][1], faim_display, humeur_display, new_state['message']
def handle_user_command(current_state, command_text, reward_path):
"""Gère une commande utilisateur et l'action du modèle RL."""
game_state = current_state
current_env_state = _get_env_state(env)
if model is None or env is None:
return game_state, command_text, 5, 5, 0, 0, '❌ Erreur: Le modèle MiRobot n\'a pas pu être chargé !'
game_state['reward_asset_path'] = reward_path
# 2. Vérification de la faim (défaite)
faim_actuelle = current_env_state[ETAT_FAIM]
if faim_actuelle >= FAIM_PENALTY_THRESHOLD:
game_state['message'] = f'💔 Défaite ! MiRobot a trop faim ({faim_actuelle:.0%}). Jeu réinitialisé.'
return _reset_game(reward_path)
command_upper = command_text.upper()
if command_upper not in ACTION_MAP_USER:
game_state['message'] = f"🤔 MiRobot n'a pas compris l'ordre '{command_text}'. Sa faim augmente..."
current_env_state[CMD_AVANCER] = 0.0
current_env_state[CMD_TOURNER] = 0.0
env.step(0)
faim_display = current_env_state[ETAT_FAIM] * 100
humeur_display = current_env_state[ETAT_HUMEUR]
return game_state, command_text, game_state['puppy_pos'][0], game_state['puppy_pos'][1], faim_display, humeur_display, game_state['message']
# 4. Exécution de la décision du Modèle
command_action_name = command_upper
current_env_state[CMD_AVANCER] = 1.0 if command_action_name == "AVANCER" else 0.0
current_env_state[CMD_TOURNER] = 1.0 if command_action_name.startswith("TOURNER") else 0.0
mirobot_action_id, _ = model.predict(current_env_state, deterministic=True)
# **CORRECTION CRUCIALE**: Convertir le tableau NumPy en entier
mirobot_action_id = mirobot_action_id.item()
new_obs, reward, terminated, truncated, info = env.step(mirobot_action_id)
dx, dy = 0, 0
if mirobot_action_id == ACTION_MAP_USER[command_action_name]:
game_state['message'] = f"👏 MiRobot a obéi à '{command_action_name}'. Récompense RL: {reward:.2f}"
if command_action_name == "AVANCER":
rx, ry = game_state['reward_pos']
px, py = game_state['puppy_pos']
if abs(rx - px) > abs(ry - py):
dx = 1 if rx > px else -1
elif abs(ry - py) > 0:
dy = 1 if ry > py else -1
else:
# Cette ligne est maintenant sûre car mirobot_action_id est un entier
real_action_name = ACTION_MAP_MODEL[mirobot_action_id]
game_state['message'] = f"😥 MiRobot a désobéi ! Il a fait '{real_action_name}' au lieu de '{command_action_name}'. Récompense RL: {reward:.2f}"
# Mise à jour de la position
new_x = np.clip(game_state['puppy_pos'][0] + dx, 0, GRID_SIZE - 1)
new_y = np.clip(game_state['puppy_pos'][1] + dy, 0, GRID_SIZE - 1)
game_state['puppy_pos'] = [new_x, new_y]
current_env_state = _get_env_state(env)
faim_display = current_env_state[ETAT_FAIM] * 100
humeur_display = current_env_state[ETAT_HUMEUR]
return game_state, command_text, new_x, new_y, faim_display, humeur_display, game_state['message']
def handle_bravo(current_state):
"""Gère l'événement de récompense."""
game_state = current_state
current_env_state = _get_env_state(env)
if env is None:
return game_state, game_state['puppy_pos'][0], game_state['puppy_pos'][1], 0, 0, '❌ Erreur: Modèle non chargé.'
px, py = game_state['puppy_pos']
rx, ry = game_state['reward_pos']
if px == rx and py == ry:
game_state['level'] += 1
# Modification de l'état
current_env_state[ETAT_FAIM] = np.clip(current_env_state[ETAT_FAIM] - 0.5, 0.0, 1.0)
current_env_state[ETAT_HUMEUR] = np.clip(current_env_state[ETAT_HUMEUR] + 0.5, -1.0, 1.0)
game_state['message'] = f'🥳 BRAVO ! MiRobot a bien réussi ! Niveau suivant : {game_state["level"]}. Repositionnez la récompense pour continuer !'
else:
game_state['message'] = '😕 MiRobot doit être sur la case de la récompense pour recevoir un "BRAVO" !'
faim_display = current_env_state[ETAT_FAIM] * 100
humeur_display = current_env_state[ETAT_HUMEUR]
return game_state, game_state['puppy_pos'][0], game_state['puppy_pos'][1], faim_display, humeur_display, game_state['message']
def _draw_grid(px, py, rx, ry, reward_path):
"""Dessine la grille de jeu avec le chiot et la récompense."""
if reward_path is None:
return "<p style='text-align: center; color: red;'>Veuillez télécharger une image de récompense pour afficher la grille.</p>"
grid_html = f"<div style='width: 400px; height: 400px; display: grid; grid-template-columns: repeat({GRID_SIZE}, 1fr); border: 2px solid #333; margin: auto;'>"
puppy_icon = "<span style='font-size: 30px;'>🐶</span>"
reward_src = f"file/{reward_path}"
for y in range(GRID_SIZE):
for x in range(GRID_SIZE):
style = "border: 1px dotted #ccc; display: flex; align-items: center; justify-content: center; position: relative;"
content = ""
is_puppy = (x == px and y == py)
is_reward = (x == rx and y == ry)
if is_puppy:
content = puppy_icon
if is_reward:
if is_puppy:
style += "background-color: #d4edda;"
else:
content += f"<img src='{reward_src}' style='width: 80%; height: 80%; object-fit: contain;'/>"
style += "background-color: #fff3cd;"
grid_html += f"<div style='{style}'>{content}</div>"
grid_html += "</div>"
return grid_html
def update_reward_pos(current_state, reward_x, reward_y, reward_path):
"""Met à jour la position de la récompense dans l'état du jeu."""
game_state = current_state
game_state['reward_pos'] = [reward_x, reward_y]
game_state['reward_asset_path'] = reward_path
game_state['message'] = f"Récompense placée à ({reward_x}, {reward_y}). Donnez un ordre !"
return game_state, game_state['message']
# ----------------------------------------------------------------------
# 3. INTERFACE GRADIO
# ----------------------------------------------------------------------
initial_grid_html = _draw_grid(game_state_initial['puppy_pos'][0], game_state_initial['puppy_pos'][1],
game_state_initial['reward_pos'][0], game_state_initial['reward_pos'][1],
None)
if model is None:
demo = gr.Interface(
fn=lambda: "Le modèle MiRobot n'a pas pu être chargé. Vérifiez les logs ou le REPO_ID.",
inputs=[],
outputs=gr.HTML(label="État du Modèle"),
title="❌ MiRobot Game - Erreur de Chargement",
)
else:
with gr.Blocks(title="MiRobot - Le Jeu d'Obéissance 🐾") as demo:
game_state_json = gr.JSON(value=game_state_initial, visible=False)
gr.Markdown(
f"""
# MiRobot - Le Jeu d'Obéissance 🐾
Bienvenue dans la simulation interactive du modèle IA **{REPO_ID}** !
**Objectif :** Guider MiRobot vers la récompense en donnant des ordres. Attention, sa **Faim** augmente à chaque pas !
"""
)
with gr.Row():
with gr.Column(scale=2):
# Utilisation de gr.Image pour une meilleure gestion des uploads
reward_image = gr.Image(label="1. Télécharger Image Récompense (Obligatoire)", type="filepath", height=150, width=150)
with gr.Row():
reward_x = gr.Slider(minimum=0, maximum=GRID_SIZE - 1, step=1, value=0, label="2. Pos. Récompense X")
reward_y = gr.Slider(minimum=0, maximum=GRID_SIZE - 1, step=1, value=0, label="2. Pos. Récompense Y")
grid_display = gr.HTML(label="Plateau de Jeu (10x10)", value=initial_grid_html)
with gr.Column(scale=1):
level_display = gr.Markdown(f"### Niveau Actuel : {INITIAL_LEVEL}")
faim_bar = gr.Slider(minimum=0, maximum=100, value=initial_faim, label="Faim de MiRobot (%)", interactive=False)
humeur_bar = gr.Slider(minimum=-1.0, maximum=1.0, value=initial_humeur, label="Humeur de MiRobot", interactive=False)
command_input = gr.Dropdown(
choices=list(ACTION_MAP_USER.keys()),
label="3. Ordre du Maître",
value="AVANCER"
)
action_btn = gr.Button("4. Donner l'Ordre 🗣️", variant="primary")
bravo_btn = gr.Button("5. Bravo ! (Récompense)", variant="secondary")
reset_btn = gr.Button("Redémarrer le Jeu 🔄", variant="secondary")
message_output = gr.Markdown(f"**Message :** {game_state_initial['message']}")
# États cachés
puppy_pos_x = gr.State(game_state_initial['puppy_pos'][0])
puppy_pos_y = gr.State(game_state_initial['puppy_pos'][1])
faim_state = gr.State(initial_faim)
humeur_state = gr.State(initial_humeur)
# --- ÉVÉNEMENTS ---
reward_x.change(
fn=update_reward_pos,
inputs=[game_state_json, reward_x, reward_y, reward_image],
outputs=[game_state_json, message_output]
).then(
fn=_draw_grid,
inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image],
outputs=grid_display
)
reward_y.change(
fn=update_reward_pos,
inputs=[game_state_json, reward_x, reward_y, reward_image],
outputs=[game_state_json, message_output]
).then(
fn=_draw_grid,
inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image],
outputs=grid_display
)
action_btn.click(
fn=handle_user_command,
inputs=[game_state_json, command_input, reward_image],
outputs=[game_state_json, command_input, puppy_pos_x, puppy_pos_y, faim_state, humeur_state, message_output]
).then(
fn=lambda g, f, h: [f"### Niveau Actuel : {g['level']}", f, h],
inputs=[game_state_json, faim_state, humeur_state],
outputs=[level_display, faim_bar, humeur_bar]
).then(
fn=_draw_grid,
inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image],
outputs=grid_display
)
bravo_btn.click(
fn=handle_bravo,
inputs=[game_state_json],
outputs=[game_state_json, puppy_pos_x, puppy_pos_y, faim_state, humeur_state, message_output]
).then(
fn=lambda g, f, h: [f"### Niveau Actuel : {g['level']}", f, h],
inputs=[game_state_json, faim_state, humeur_state],
outputs=[level_display, faim_bar, humeur_bar]
).then(
fn=_draw_grid,
inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image],
outputs=grid_display
)
reset_btn.click(
fn=_reset_game,
inputs=[reward_image],
outputs=[game_state_json, puppy_pos_x, puppy_pos_y, reward_x, reward_y, faim_state, humeur_state, message_output]
).then(
fn=lambda g: f"### Niveau Actuel : {g['level']}",
inputs=[game_state_json],
outputs=level_display
).then(
fn=_draw_grid,
inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image],
outputs=grid_display
)
reward_image.change(
fn=_draw_grid,
inputs=[puppy_pos_x, puppy_pos_y, reward_x, reward_y, reward_image],
outputs=grid_display
)
demo.launch()