86 lines
2.8 KiB
Java
86 lines
2.8 KiB
Java
package evironment.antGame;
|
|
|
|
import core.*;
|
|
|
|
import java.awt.*;
|
|
|
|
public class AntWorld {
|
|
/**
|
|
*
|
|
*/
|
|
private Grid grid;
|
|
/**
|
|
* Intern (backend) representation of the ant.
|
|
* The AntWorld essentially acts like the game host of the original AntGame.
|
|
*/
|
|
private MyAnt myAnt;
|
|
/**
|
|
* The client agent. In the original AntGame the host would send jade messages
|
|
* of the current observation to each client on every tick.
|
|
* In this reinforcement learning environment, the agent is part of
|
|
* "backend" to make this environment an MDP. The environment should (convention of
|
|
* openGym) return all vital information from the .step() method (nextState, reward, done).
|
|
* But the antGame itself only returns observation for each ant on each tick. These
|
|
* observation are not markov, hence a "middleware" has to compute the unique markov states
|
|
* based upon these receiving observation -> the (client) ant!
|
|
* The AntAgent has an intern strategy to generate markov states from observations,
|
|
* through an intern grid clone (brain), for example. A history as mentioned in
|
|
* various lectures could be possible as well.
|
|
*/
|
|
private AntAgent antAgent;
|
|
|
|
public AntWorld(int width, int height, double foodDensity){
|
|
grid = new Grid(width, height, foodDensity);
|
|
antAgent = new AntAgent(width, height);
|
|
}
|
|
|
|
public AntWorld(){
|
|
this(Constants.DEFAULT_GRID_WIDTH, Constants.DEFAULT_GRID_HEIGHT, Constants.DEFAULT_FOOD_DENSITY);
|
|
}
|
|
|
|
private static class MyAnt{
|
|
int x,y;
|
|
boolean hasFood;
|
|
boolean spawned;
|
|
}
|
|
|
|
public StepResult step(DiscreteAction<AntAction> action){
|
|
AntObservation observation;
|
|
State newState;
|
|
if(!myAnt.spawned){
|
|
observation = new AntObservation(grid.getCell(grid.getStartPoint()));
|
|
newState = antAgent.feedObservation(observation);
|
|
return new StepResult(newState, 0.0, false, "Just spawned on the map");
|
|
}
|
|
switch (action.getValue()) {
|
|
case MOVE_UP:
|
|
break;
|
|
case MOVE_RIGHT:
|
|
break;
|
|
case MOVE_DOWN:
|
|
break;
|
|
case MOVE_LEFT:
|
|
break;
|
|
case PICK_UP:
|
|
break;
|
|
case DROP_DOWN:
|
|
break;
|
|
default:
|
|
throw new RuntimeException(String.format("Action <%s> is not a valid action!", action.toString()));
|
|
break;
|
|
}
|
|
newState = antAgent.feedObservation(observation);
|
|
return new StepResult(newState, 0.0, false, "");
|
|
}
|
|
|
|
public void reset() {
|
|
RNG.reseed();
|
|
grid.initRandomWorld();
|
|
myAnt = new MyAnt();
|
|
}
|
|
|
|
public Point getSpawningPoint(){
|
|
return grid.getStartPoint();
|
|
}
|
|
}
|