189 lines
6.6 KiB
Java
189 lines
6.6 KiB
Java
package evironment.antGame;
|
|
|
|
import core.*;
|
|
|
|
import java.awt.*;
|
|
|
|
public class AntWorld {
|
|
/**
|
|
*
|
|
*/
|
|
private Grid grid;
|
|
/**
|
|
* Intern (backend) representation of the ant.
|
|
* The AntWorld essentially acts like the game host of the original AntGame.
|
|
*/
|
|
private MyAnt myAnt;
|
|
/**
|
|
* The client agent. In the original AntGame the host would send jade messages
|
|
* of the current observation to each client on every tick.
|
|
* In this reinforcement learning environment, the agent is part of
|
|
* "backend" to make this environment an MDP. The environment should (convention of
|
|
* openGym) return all vital information from the .step() method (nextState, reward, done).
|
|
* But the antGame itself only returns observation for each ant on each tick. These
|
|
* observation are not markov, hence a "middleware" has to compute the unique markov states
|
|
* based upon these receiving observation -> the (client) ant!
|
|
* The AntAgent has an intern strategy to generate markov states from observations,
|
|
* through an intern grid clone (brain), for example. A history as mentioned in
|
|
* various lectures could be possible as well.
|
|
*/
|
|
private AntAgent antAgent;
|
|
|
|
private int tick;
|
|
private int maxEpisodeTicks;
|
|
|
|
public AntWorld(int width, int height, double foodDensity){
|
|
grid = new Grid(width, height, foodDensity);
|
|
antAgent = new AntAgent(width, height);
|
|
tick = 0;
|
|
maxEpisodeTicks = 1000;
|
|
}
|
|
|
|
public AntWorld(){
|
|
this(Constants.DEFAULT_GRID_WIDTH, Constants.DEFAULT_GRID_HEIGHT, Constants.DEFAULT_FOOD_DENSITY);
|
|
}
|
|
|
|
private static class MyAnt{
|
|
Point pos;
|
|
boolean hasFood;
|
|
boolean spawned;
|
|
}
|
|
|
|
public StepResult step(DiscreteAction<AntAction> action){
|
|
AntObservation observation;
|
|
State newState;
|
|
double reward = 0;
|
|
String info = "";
|
|
boolean done = false;
|
|
|
|
if(!myAnt.spawned){
|
|
myAnt.spawned = true;
|
|
myAnt.pos = grid.getStartPoint();
|
|
|
|
observation = new AntObservation(grid.getCell(myAnt.pos), myAnt.pos, myAnt.hasFood);
|
|
newState = antAgent.feedObservation(observation);
|
|
reward = 0.0;
|
|
return new StepResult(newState, reward, false, "Just spawned on the map");
|
|
}
|
|
|
|
Cell currentCell = grid.getCell(myAnt.pos);
|
|
Point potentialNextPos = new Point(myAnt.pos.x, myAnt.pos.y);
|
|
boolean stayOnCell = true;
|
|
// flag to enable a check if all food has been collected only fired if food was dropped
|
|
// on the starting position
|
|
boolean checkCompletion = false;
|
|
|
|
switch (action.getValue()) {
|
|
case MOVE_UP:
|
|
potentialNextPos.y -= 1;
|
|
stayOnCell = false;
|
|
break;
|
|
case MOVE_RIGHT:
|
|
potentialNextPos.x += 1;
|
|
stayOnCell = false;
|
|
break;
|
|
case MOVE_DOWN:
|
|
potentialNextPos.y += 1;
|
|
stayOnCell = false;
|
|
break;
|
|
case MOVE_LEFT:
|
|
potentialNextPos.x -= 1;
|
|
stayOnCell = false;
|
|
break;
|
|
case PICK_UP:
|
|
if(myAnt.hasFood){
|
|
// Ant tries to pick up food but can only hold one piece
|
|
reward = Reward.FOOD_PICK_UP_FAIL_HAS_FOOD_ALREADY;
|
|
}else if(currentCell.getFood() == 0){
|
|
// Ant tries to pick up food on cell that has no food on it
|
|
reward = Reward.FOOD_PICK_UP_FAIL_NO_FOOD;
|
|
}else if(currentCell.getFood() > 0){
|
|
// Ant successfully picks up food
|
|
currentCell.setFood(currentCell.getFood() - 1);
|
|
myAnt.hasFood = true;
|
|
reward = Reward.FOOD_DROP_DOWN_SUCCESS;
|
|
}
|
|
break;
|
|
case DROP_DOWN:
|
|
if(!myAnt.hasFood){
|
|
// Ant had no food to drop
|
|
reward = Reward.FOOD_DROP_DOWN_FAIL_NO_FOOD;
|
|
}else{
|
|
// Drop food onto the ground
|
|
currentCell.setFood(currentCell.getFood() + 1);
|
|
myAnt.hasFood = false;
|
|
|
|
// negative reward if the agent drops food on any other field
|
|
// than the starting point
|
|
if(currentCell.getType() != CellType.START){
|
|
reward = Reward.FOOD_DROP_DOWN_FAIL_NOT_START;
|
|
}else{
|
|
reward = Reward.FOOD_DROP_DOWN_SUCCESS;
|
|
checkCompletion = true;
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
throw new RuntimeException(String.format("Action <%s> is not a valid action!", action.toString()));
|
|
}
|
|
|
|
// movement action was selected
|
|
if(!stayOnCell){
|
|
if(!isInGrid(potentialNextPos)){
|
|
stayOnCell = true;
|
|
reward = Reward.RAN_INTO_WALL;
|
|
}else if(hitObstacle(potentialNextPos)){
|
|
stayOnCell = true;
|
|
reward = Reward.RAN_INTO_OBSTACLE;
|
|
}
|
|
}
|
|
|
|
// valid movement
|
|
if(!stayOnCell){
|
|
myAnt.pos = potentialNextPos;
|
|
if(antAgent.getCell(myAnt.pos).getType() == CellType.UNKNOWN){
|
|
// the ant will move to a cell that was previously unknown
|
|
reward = Reward.UNKNOWN_FIELD_EXPLORED;
|
|
}else{
|
|
reward = 0;
|
|
}
|
|
}
|
|
|
|
// get observation after action was computed
|
|
observation = new AntObservation(grid.getCell(myAnt.pos), myAnt.pos, myAnt.hasFood);
|
|
|
|
// let the ant agent process the observation to create a valid markov state
|
|
newState = antAgent.feedObservation(observation);
|
|
|
|
if(checkCompletion){
|
|
done = grid.isAllFoodCollected();
|
|
}
|
|
|
|
if(++tick == maxEpisodeTicks){
|
|
done = true;
|
|
}
|
|
return new StepResult(newState, reward, done, info);
|
|
}
|
|
|
|
private boolean isInGrid(Point pos){
|
|
return pos.x > 0 && pos.x < grid.getWidth() && pos.y > 0 && pos.y < grid.getHeight();
|
|
}
|
|
|
|
private boolean hitObstacle(Point pos){
|
|
return grid.getCell(pos).getType() == CellType.OBSTACLE;
|
|
}
|
|
|
|
public void reset() {
|
|
RNG.reseed();
|
|
grid.initRandomWorld();
|
|
myAnt = new MyAnt();
|
|
}
|
|
|
|
public void setMaxEpisodeLength(int maxTicks){
|
|
this.maxEpisodeTicks = maxTicks;
|
|
}
|
|
public Point getSpawningPoint(){
|
|
return grid.getStartPoint();
|
|
}
|
|
}
|