refo/src/main/java/evironment/antGame/AntWorld.java

225 lines
7.7 KiB
Java

package evironment.antGame;
import core.Environment;
import core.State;
import core.StepResultEnvironment;
import core.gui.Visualizable;
import evironment.antGame.gui.AntWorldComponent;
import javax.swing.*;
import java.awt.*;
/**
* Episodic AntWorld
*/
public class AntWorld implements Environment<AntAction>, Visualizable {
/**
*
*/
protected Grid grid;
/**
* Intern (backend) representation of the ant.
* The AntWorld essentially acts like the game host of the original AntGame.
*/
protected Ant myAnt;
/**
* The client agent. In the original AntGame the host would send jade messages
* of the current observation to each client on every tick.
* In this reinforcement learning environment, the agent is part of
* "backend" to make this environment an MDP. The environment should (convention of
* openGym) return all vital information from the .step() method (nextState, reward, done).
* But the antGame itself only returns observation for each ant on each tick. These
* observation are not markov, hence a "middleware" has to compute the unique markov states
* based upon these receiving observation -> the (client) ant!
* The AntAgent has an intern strategy to generate markov states from observations,
* through an intern grid clone (brain), for example. A history as mentioned in
* various lectures could be possible as well.
*/
protected AntAgent antAgent;
protected int tick;
private int maxEpisodeTicks;
public AntWorld(int width, int height) {
grid = new Grid(width, height);
antAgent = new AntAgent(width, height);
myAnt = new Ant();
maxEpisodeTicks = 1000;
reset();
}
public AntWorld(){
this(Constants.DEFAULT_GRID_WIDTH, Constants.DEFAULT_GRID_HEIGHT);
}
protected StepCalculation processStep(AntAction action) {
StepCalculation sc = new StepCalculation();
sc.reward = Reward.DEFAULT_REWARD;
sc.info = "";
sc.done = false;
Cell currentCell = grid.getCell(myAnt.getPos());
sc.potentialNextPos = new Point(myAnt.getPos().x, myAnt.getPos().y);
sc.stayOnCell = true;
// flag to enable a check if all food has been collected only fired if food was dropped
// on the starting position
sc.checkCompletion = false;
switch(action) {
case MOVE_UP:
sc.potentialNextPos.y -= 1;
sc.stayOnCell = false;
break;
case MOVE_RIGHT:
sc.potentialNextPos.x += 1;
sc.stayOnCell = false;
break;
case MOVE_DOWN:
sc.potentialNextPos.y += 1;
sc.stayOnCell = false;
break;
case MOVE_LEFT:
sc.potentialNextPos.x -= 1;
sc.stayOnCell = false;
break;
case PICK_UP:
if(myAnt.hasFood()) {
// Ant tries to pick up food but can only hold one piece
sc.reward = Reward.FOOD_PICK_UP_FAIL_HAS_FOOD_ALREADY;
} else if(currentCell.getFood() == 0) {
// Ant tries to pick up food on cell that has no food on it
sc.reward = Reward.FOOD_PICK_UP_FAIL_NO_FOOD;
} else if(currentCell.getFood() > 0) {
// Ant successfully picks up food
currentCell.setFood(currentCell.getFood() - 1);
myAnt.setHasFood(true);
sc.reward = Reward.FOOD_PICK_UP_SUCCESS;
}
break;
case DROP_DOWN:
if(!myAnt.hasFood()) {
// Ant had no food to drop
sc.reward = Reward.FOOD_DROP_DOWN_FAIL_NO_FOOD;
} else {
myAnt.setHasFood(false);
// negative reward if the agent drops food on any other field
// than the starting point
if(currentCell.getType() != CellType.START) {
sc.reward = Reward.FOOD_DROP_DOWN_FAIL_NOT_START;
// Drop food onto the ground
currentCell.setFood(currentCell.getFood() + 1);
} else {
sc.reward = Reward.FOOD_DROP_DOWN_SUCCESS;
myAnt.setPoints(myAnt.getPoints() + 1);
sc.checkCompletion = true;
}
}
break;
default:
throw new RuntimeException(String.format("Action <%s> is not a valid action!", action.toString()));
}
// movement action was selected
if(!sc.stayOnCell) {
if(!isInGrid(sc.potentialNextPos)) {
sc.stayOnCell = true;
sc.reward = Reward.RAN_INTO_WALL;
} else if(hitObstacle(sc.potentialNextPos)) {
sc.stayOnCell = true;
sc.reward = Reward.RAN_INTO_OBSTACLE;
}
}
return sc;
}
@Override
public StepResultEnvironment step(AntAction action){
StepCalculation sc = processStep(action);
// valid movement
if(!sc.stayOnCell) {
myAnt.getPos().setLocation(sc.potentialNextPos);
if(antAgent.getCell(myAnt.getPos()).getType() == CellType.UNKNOWN){
// the ant will move to a cell that was previously unknown
// TODO: not optimal for going straight for food
// sc.reward = Reward.UNKNOWN_FIELD_EXPLORED;
}
}
if(sc.checkCompletion) {
sc.done = grid.isAllFoodCollected();
}
if(++tick == maxEpisodeTicks){
sc.done = true;
}
return new StepResultEnvironment(generateReturnState(), sc.reward, sc.done, sc.info);
}
protected State generateReturnState(){
// get observation after action was computed
AntObservation observation = new AntObservation(grid.getCell(myAnt.getPos()), myAnt.getPos(), myAnt.hasFood());
// let the ant agent process the observation to create a valid markov state
return antAgent.feedObservation(observation);
}
protected boolean isInGrid(Point pos) {
return pos.x >= 0 && pos.x < grid.getWidth() && pos.y >= 0 && pos.y < grid.getHeight();
}
protected boolean hitObstacle(Point pos) {
return grid.getCell(pos).getType() == CellType.OBSTACLE;
}
protected class StepCalculation {
double reward;
String info;
boolean done;
Point potentialNextPos = new Point(myAnt.getPos().x, myAnt.getPos().y);
boolean stayOnCell = true;
// flag to enable a check if all food has been collected only fired if food was dropped
// on the starting position
boolean checkCompletion = false;
}
public State reset() {
grid.resetWorld();
antAgent.initUnknownWorld();
tick = 0;
myAnt.getPos().setLocation(grid.getStartPoint());
myAnt.setPoints(0);
myAnt.setHasFood(false);
AntObservation observation = new AntObservation(grid.getCell(myAnt.getPos()), myAnt.getPos(), myAnt.hasFood());
return antAgent.feedObservation(observation);
}
public void setMaxEpisodeLength(int maxTicks){
this.maxEpisodeTicks = maxTicks;
}
public Point getSpawningPoint(){
return grid.getStartPoint();
}
public Cell[][] getCellArray(){
return grid.getGrid();
}
public int getTick(){
return tick;
}
public Ant getAnt(){
return myAnt;
}
@Override
public JComponent visualize() {
return new AntWorldComponent(this, this.antAgent);
}
}