add default structure for AntAgent
This commit is contained in:
parent
581cf6b28b
commit
ec67ce60c9
|
@ -7,6 +7,10 @@ public class DiscreteAction<A extends Enum> implements Action{
|
|||
this.action = action;
|
||||
}
|
||||
|
||||
public A getValue(){
|
||||
return action;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getIndex(){
|
||||
return action.ordinal();
|
||||
|
|
|
@ -8,7 +8,7 @@ import lombok.Setter;
|
|||
@Setter
|
||||
@AllArgsConstructor
|
||||
public class StepResult {
|
||||
private Observation observation;
|
||||
private State observation;
|
||||
private double reward;
|
||||
private boolean done;
|
||||
private String info;
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
package evironment.antGame;
|
||||
|
||||
|
||||
import java.awt.*;
|
||||
|
||||
public class AntAgent {
|
||||
// the brain
|
||||
private Cell[][] knownWorld;
|
||||
private Point pos;
|
||||
|
||||
public AntAgent(int width, int height){
|
||||
knownWorld = new Cell[width][height];
|
||||
initUnknownWorld();
|
||||
}
|
||||
|
||||
public AntState feedObservation(AntObservation observation){
|
||||
|
||||
}
|
||||
|
||||
private void initUnknownWorld(){
|
||||
for(int x = 0; x < knownWorld.length; ++x){
|
||||
for(int y = 0; y < knownWorld[x].length; ++y){
|
||||
knownWorld[x][y] = new Cell(new Point(x,y), CellType.UNKNOWN);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Point getPos(){
|
||||
return pos;
|
||||
}
|
||||
}
|
|
@ -1,6 +1,15 @@
|
|||
package evironment.antGame;
|
||||
|
||||
import core.Observation;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
import java.awt.*;
|
||||
|
||||
@AllArgsConstructor
|
||||
@Getter
|
||||
@Setter
|
||||
public class AntObservation implements Observation {
|
||||
private Cell cell;
|
||||
}
|
||||
|
|
|
@ -4,4 +4,14 @@ import core.State;
|
|||
|
||||
// somewhat the "brain" of the agent, current known setting of the environment
|
||||
public class AntState implements State {
|
||||
private Grid knownGrid;
|
||||
|
||||
public AntState(int width, int height){
|
||||
knownGrid = new Grid(width, height);
|
||||
|
||||
}
|
||||
|
||||
public AntState(){
|
||||
this(Constants.DEFAULT_GRID_WIDTH, Constants.DEFAULT_GRID_HEIGHT);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,28 +1,85 @@
|
|||
package evironment.antGame;
|
||||
|
||||
import core.DiscreteAction;
|
||||
import core.Observation;
|
||||
import core.RNG;
|
||||
import core.StepResult;
|
||||
import core.*;
|
||||
|
||||
import java.awt.*;
|
||||
|
||||
public class AntWorld {
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private Grid grid;
|
||||
/**
|
||||
* Intern (backend) representation of the ant.
|
||||
* The AntWorld essentially acts like the game host of the original AntGame.
|
||||
*/
|
||||
private MyAnt myAnt;
|
||||
/**
|
||||
* The client agent. In the original AntGame the host would send jade messages
|
||||
* of the current observation to each client on every tick.
|
||||
* In this reinforcement learning environment, the agent is part of
|
||||
* "backend" to make this environment an MDP. The environment should (convention of
|
||||
* openGym) return all vital information from the .step() method (nextState, reward, done).
|
||||
* But the antGame itself only returns observation for each ant on each tick. These
|
||||
* observation are not markov, hence a "middleware" has to compute the unique markov states
|
||||
* based upon these receiving observation -> the (client) ant!
|
||||
* The AntAgent has an intern strategy to generate markov states from observations,
|
||||
* through an intern grid clone (brain), for example. A history as mentioned in
|
||||
* various lectures could be possible as well.
|
||||
*/
|
||||
private AntAgent antAgent;
|
||||
|
||||
public AntWorld(int width, int height, double foodDensity){
|
||||
grid = new Grid(width, height, foodDensity);
|
||||
antAgent = new AntAgent(width, height);
|
||||
}
|
||||
|
||||
public AntWorld(){
|
||||
this(30, 30, 0.1);
|
||||
this(Constants.DEFAULT_GRID_WIDTH, Constants.DEFAULT_GRID_HEIGHT, Constants.DEFAULT_FOOD_DENSITY);
|
||||
}
|
||||
|
||||
private static class MyAnt{
|
||||
int x,y;
|
||||
boolean hasFood;
|
||||
boolean spawned;
|
||||
}
|
||||
|
||||
public StepResult step(DiscreteAction<AntAction> action){
|
||||
Observation observation = new AntObservation();
|
||||
return new StepResult(observation, 0.0, false, "");
|
||||
AntObservation observation;
|
||||
State newState;
|
||||
if(!myAnt.spawned){
|
||||
observation = new AntObservation(grid.getCell(grid.getStartPoint()));
|
||||
newState = antAgent.feedObservation(observation);
|
||||
return new StepResult(newState, 0.0, false, "Just spawned on the map");
|
||||
}
|
||||
switch (action.getValue()) {
|
||||
case MOVE_UP:
|
||||
break;
|
||||
case MOVE_RIGHT:
|
||||
break;
|
||||
case MOVE_DOWN:
|
||||
break;
|
||||
case MOVE_LEFT:
|
||||
break;
|
||||
case PICK_UP:
|
||||
break;
|
||||
case DROP_DOWN:
|
||||
break;
|
||||
default:
|
||||
throw new RuntimeException(String.format("Action <%s> is not a valid action!", action.toString()));
|
||||
break;
|
||||
}
|
||||
newState = antAgent.feedObservation(observation);
|
||||
return new StepResult(newState, 0.0, false, "");
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
RNG.reseed();
|
||||
grid.initCells();
|
||||
grid.initRandomWorld();
|
||||
myAnt = new MyAnt();
|
||||
}
|
||||
|
||||
public Point getSpawningPoint(){
|
||||
return grid.getStartPoint();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,23 +1,27 @@
|
|||
package evironment.antGame;
|
||||
|
||||
public class Cell {
|
||||
private CellType type;
|
||||
private int food;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
public Cell(CellType cellType, int foodAmount){
|
||||
import java.awt.*;
|
||||
|
||||
public class Cell {
|
||||
@Getter
|
||||
private CellType type;
|
||||
@Getter
|
||||
@Setter
|
||||
private int food;
|
||||
@Getter
|
||||
private Point pos;
|
||||
|
||||
public Cell(Point pos, CellType cellType, int foodAmount){
|
||||
this.pos = pos;
|
||||
type = cellType;
|
||||
food = foodAmount;
|
||||
}
|
||||
|
||||
public Cell(CellType cellType){
|
||||
this(cellType, 0);
|
||||
public Cell( Point pos, CellType cellType){
|
||||
this(pos, cellType, 0);
|
||||
}
|
||||
|
||||
public void setFoodCount(int amount){
|
||||
food = amount;
|
||||
}
|
||||
|
||||
public int getFoodCount(){
|
||||
return food;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,4 +5,6 @@ public enum CellType {
|
|||
FREE,
|
||||
OBSTACLE,
|
||||
FOOD,
|
||||
UNKNOWN,
|
||||
POSSIBLE_FOOD,
|
||||
}
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
package evironment.antGame;
|
||||
|
||||
public class Constants {
|
||||
public static final int DEFAULT_GRID_WIDTH = 30;
|
||||
public static final int DEFAULT_GRID_HEIGHT = 30;
|
||||
public static final double DEFAULT_FOOD_DENSITY = 0.1;
|
||||
}
|
|
@ -19,18 +19,22 @@ public class Grid {
|
|||
grid = new Cell[width][height];
|
||||
}
|
||||
|
||||
public void initCells(){
|
||||
public Grid(int width, int height){
|
||||
this(width, height, 0);
|
||||
}
|
||||
|
||||
public void initRandomWorld(){
|
||||
for(int x = 0; x < width; ++x){
|
||||
for(int y = 0; y < height; ++y){
|
||||
if( RNG.getRandom().nextDouble() < foodDensity){
|
||||
grid[x][y] = new Cell(CellType.FOOD, 1);
|
||||
grid[x][y] = new Cell(new Point(x,y), CellType.FOOD, 1);
|
||||
}else{
|
||||
grid[x][y] = new Cell(CellType.FREE);
|
||||
grid[x][y] = new Cell(new Point(x,y), CellType.FREE);
|
||||
}
|
||||
}
|
||||
}
|
||||
start = new Point(RNG.getRandom().nextInt(width), RNG.getRandom().nextInt(height));
|
||||
grid[start.x][start.y] = new Cell(CellType.START);
|
||||
grid[start.x][start.y] = new Cell(new Point(start.x, start.y), CellType.START);
|
||||
}
|
||||
|
||||
public Point getStartPoint(){
|
||||
|
@ -41,6 +45,12 @@ public class Grid {
|
|||
return grid;
|
||||
}
|
||||
|
||||
public Cell getCell(Point pos){
|
||||
return grid[pos.x][pos.y];
|
||||
}
|
||||
public Cell getCell(int x, int y){
|
||||
return grid[x][y];
|
||||
}
|
||||
public int getWidth(){
|
||||
return width;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue