fork download
  1. /* package whatever; // don't place package name! */
  2.  
  3. import java.util.*;
  4. import java.lang.*;
  5. import java.io.*;
  6.  
  7. /* Name of the class has to be "Main" only if the class is public. */
  8. class Ideone
  9. {
  10. public static void main (String[] args) throws java.lang.Exception
  11. {
  12. // your code goes here
  13. }
  14. }
Success #stdin #stdout 0.1s 52576KB
stdin
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

# ====== Définition de l'agent ======
class Agent:
    def __init__(self, n_joints=2):
        self.n_joints = n_joints
        self.angles = np.zeros(n_joints)   # angle de chaque articulation
        self.velocities = np.zeros(n_joints)
        self.position = 0.0  # position horizontale du corps

    def step(self, torques, dt=0.1):
        # physique simplifiée
        self.velocities += torques * dt
        self.angles += self.velocities * dt
        self.position += np.sum(np.sin(self.angles)) * dt  # approximation du déplacement

# ====== Réseau neuronal simple ======
class Policy(nn.Module):
    def __init__(self, n_inputs, n_outputs):
        super().__init__()
        self.fc = nn.Sequential(
            nn.Linear(n_inputs, 64),
            nn.Tanh(),
            nn.Linear(64, n_outputs),
            nn.Tanh()
        )
    def forward(self, x):
        return self.fc(x)

# ====== Simulation multi-agents ======
n_agents = 5
agents = [Agent() for _ in range(n_agents)]
policy = Policy(n_inputs=4, n_outputs=2)  # 2 joints + 2 vitesses
optimizer = optim.Adam(policy.parameters(), lr=0.01)

for episode in range(1000):
    total_reward = 0
    for agent in agents:
        state = torch.tensor(np.concatenate([agent.angles, agent.velocities]), dtype=torch.float32)
        action = policy(state)
        agent.step(action.detach().numpy())
        reward = agent.position  # plus il avance, mieux c'est
        total_reward += reward

    # Backprop simple (pour illustrer)
    loss = -torch.tensor(total_reward / n_agents, requires_grad=True)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if episode % 50 == 0:
        print(f"Episode {episode}, moyenne distance: {total_reward/n_agents:.2f}")
stdout
Standard output is empty