225 lines
7.7 KiB
Java
225 lines
7.7 KiB
Java
package evironment.antGame;
|
|
|
|
import core.Environment;
|
|
import core.State;
|
|
import core.StepResultEnvironment;
|
|
import core.gui.Visualizable;
|
|
import evironment.antGame.gui.AntWorldComponent;
|
|
|
|
import javax.swing.*;
|
|
import java.awt.*;
|
|
|
|
/**
|
|
* Episodic AntWorld
|
|
*/
|
|
public class AntWorld implements Environment<AntAction>, Visualizable {
|
|
/**
|
|
*
|
|
*/
|
|
protected Grid grid;
|
|
/**
|
|
* Intern (backend) representation of the ant.
|
|
* The AntWorld essentially acts like the game host of the original AntGame.
|
|
*/
|
|
protected Ant myAnt;
|
|
/**
|
|
* The client agent. In the original AntGame the host would send jade messages
|
|
* of the current observation to each client on every tick.
|
|
* In this reinforcement learning environment, the agent is part of
|
|
* "backend" to make this environment an MDP. The environment should (convention of
|
|
* openGym) return all vital information from the .step() method (nextState, reward, done).
|
|
* But the antGame itself only returns observation for each ant on each tick. These
|
|
* observation are not markov, hence a "middleware" has to compute the unique markov states
|
|
* based upon these receiving observation -> the (client) ant!
|
|
* The AntAgent has an intern strategy to generate markov states from observations,
|
|
* through an intern grid clone (brain), for example. A history as mentioned in
|
|
* various lectures could be possible as well.
|
|
*/
|
|
protected AntAgent antAgent;
|
|
|
|
protected int tick;
|
|
private int maxEpisodeTicks;
|
|
|
|
public AntWorld(int width, int height) {
|
|
grid = new Grid(width, height);
|
|
antAgent = new AntAgent(width, height);
|
|
myAnt = new Ant();
|
|
maxEpisodeTicks = 1000;
|
|
reset();
|
|
}
|
|
|
|
public AntWorld(){
|
|
this(Constants.DEFAULT_GRID_WIDTH, Constants.DEFAULT_GRID_HEIGHT);
|
|
}
|
|
|
|
protected StepCalculation processStep(AntAction action) {
|
|
StepCalculation sc = new StepCalculation();
|
|
sc.reward = Reward.DEFAULT_REWARD;
|
|
sc.info = "";
|
|
sc.done = false;
|
|
Cell currentCell = grid.getCell(myAnt.getPos());
|
|
sc.potentialNextPos = new Point(myAnt.getPos().x, myAnt.getPos().y);
|
|
sc.stayOnCell = true;
|
|
// flag to enable a check if all food has been collected only fired if food was dropped
|
|
// on the starting position
|
|
sc.checkCompletion = false;
|
|
|
|
switch(action) {
|
|
case MOVE_UP:
|
|
sc.potentialNextPos.y -= 1;
|
|
sc.stayOnCell = false;
|
|
break;
|
|
case MOVE_RIGHT:
|
|
sc.potentialNextPos.x += 1;
|
|
sc.stayOnCell = false;
|
|
break;
|
|
case MOVE_DOWN:
|
|
sc.potentialNextPos.y += 1;
|
|
sc.stayOnCell = false;
|
|
break;
|
|
case MOVE_LEFT:
|
|
sc.potentialNextPos.x -= 1;
|
|
sc.stayOnCell = false;
|
|
break;
|
|
case PICK_UP:
|
|
if(myAnt.hasFood()) {
|
|
// Ant tries to pick up food but can only hold one piece
|
|
sc.reward = Reward.FOOD_PICK_UP_FAIL_HAS_FOOD_ALREADY;
|
|
} else if(currentCell.getFood() == 0) {
|
|
// Ant tries to pick up food on cell that has no food on it
|
|
sc.reward = Reward.FOOD_PICK_UP_FAIL_NO_FOOD;
|
|
} else if(currentCell.getFood() > 0) {
|
|
// Ant successfully picks up food
|
|
currentCell.setFood(currentCell.getFood() - 1);
|
|
myAnt.setHasFood(true);
|
|
sc.reward = Reward.FOOD_PICK_UP_SUCCESS;
|
|
}
|
|
break;
|
|
case DROP_DOWN:
|
|
if(!myAnt.hasFood()) {
|
|
// Ant had no food to drop
|
|
sc.reward = Reward.FOOD_DROP_DOWN_FAIL_NO_FOOD;
|
|
} else {
|
|
myAnt.setHasFood(false);
|
|
// negative reward if the agent drops food on any other field
|
|
// than the starting point
|
|
if(currentCell.getType() != CellType.START) {
|
|
sc.reward = Reward.FOOD_DROP_DOWN_FAIL_NOT_START;
|
|
// Drop food onto the ground
|
|
currentCell.setFood(currentCell.getFood() + 1);
|
|
} else {
|
|
sc.reward = Reward.FOOD_DROP_DOWN_SUCCESS;
|
|
myAnt.setPoints(myAnt.getPoints() + 1);
|
|
sc.checkCompletion = true;
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
throw new RuntimeException(String.format("Action <%s> is not a valid action!", action.toString()));
|
|
}
|
|
|
|
// movement action was selected
|
|
if(!sc.stayOnCell) {
|
|
if(!isInGrid(sc.potentialNextPos)) {
|
|
sc.stayOnCell = true;
|
|
sc.reward = Reward.RAN_INTO_WALL;
|
|
} else if(hitObstacle(sc.potentialNextPos)) {
|
|
sc.stayOnCell = true;
|
|
sc.reward = Reward.RAN_INTO_OBSTACLE;
|
|
}
|
|
}
|
|
|
|
return sc;
|
|
}
|
|
|
|
@Override
|
|
public StepResultEnvironment step(AntAction action){
|
|
StepCalculation sc = processStep(action);
|
|
|
|
// valid movement
|
|
if(!sc.stayOnCell) {
|
|
myAnt.getPos().setLocation(sc.potentialNextPos);
|
|
if(antAgent.getCell(myAnt.getPos()).getType() == CellType.UNKNOWN){
|
|
// the ant will move to a cell that was previously unknown
|
|
// TODO: not optimal for going straight for food
|
|
// sc.reward = Reward.UNKNOWN_FIELD_EXPLORED;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
if(sc.checkCompletion) {
|
|
sc.done = grid.isAllFoodCollected();
|
|
}
|
|
|
|
if(++tick == maxEpisodeTicks){
|
|
sc.done = true;
|
|
}
|
|
|
|
return new StepResultEnvironment(generateReturnState(), sc.reward, sc.done, sc.info);
|
|
}
|
|
|
|
protected State generateReturnState(){
|
|
// get observation after action was computed
|
|
AntObservation observation = new AntObservation(grid.getCell(myAnt.getPos()), myAnt.getPos(), myAnt.hasFood());
|
|
|
|
// let the ant agent process the observation to create a valid markov state
|
|
return antAgent.feedObservation(observation);
|
|
}
|
|
|
|
protected boolean isInGrid(Point pos) {
|
|
return pos.x >= 0 && pos.x < grid.getWidth() && pos.y >= 0 && pos.y < grid.getHeight();
|
|
}
|
|
|
|
protected boolean hitObstacle(Point pos) {
|
|
return grid.getCell(pos).getType() == CellType.OBSTACLE;
|
|
}
|
|
|
|
protected class StepCalculation {
|
|
double reward;
|
|
String info;
|
|
boolean done;
|
|
Point potentialNextPos = new Point(myAnt.getPos().x, myAnt.getPos().y);
|
|
boolean stayOnCell = true;
|
|
// flag to enable a check if all food has been collected only fired if food was dropped
|
|
// on the starting position
|
|
boolean checkCompletion = false;
|
|
}
|
|
|
|
public State reset() {
|
|
grid.resetWorld();
|
|
antAgent.initUnknownWorld();
|
|
tick = 0;
|
|
myAnt.getPos().setLocation(grid.getStartPoint());
|
|
myAnt.setPoints(0);
|
|
myAnt.setHasFood(false);
|
|
AntObservation observation = new AntObservation(grid.getCell(myAnt.getPos()), myAnt.getPos(), myAnt.hasFood());
|
|
return antAgent.feedObservation(observation);
|
|
}
|
|
|
|
public void setMaxEpisodeLength(int maxTicks){
|
|
this.maxEpisodeTicks = maxTicks;
|
|
}
|
|
|
|
public Point getSpawningPoint(){
|
|
return grid.getStartPoint();
|
|
}
|
|
|
|
public Cell[][] getCellArray(){
|
|
return grid.getGrid();
|
|
}
|
|
|
|
public int getTick(){
|
|
return tick;
|
|
}
|
|
|
|
public Ant getAnt(){
|
|
return myAnt;
|
|
}
|
|
|
|
@Override
|
|
public JComponent visualize() {
|
|
return new AntWorldComponent(this, this.antAgent);
|
|
}
|
|
}
|