add default structure for AntAgent
This commit is contained in:
parent
581cf6b28b
commit
ec67ce60c9
|
@ -7,6 +7,10 @@ public class DiscreteAction<A extends Enum> implements Action{
|
||||||
this.action = action;
|
this.action = action;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public A getValue(){
|
||||||
|
return action;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getIndex(){
|
public int getIndex(){
|
||||||
return action.ordinal();
|
return action.ordinal();
|
||||||
|
|
|
@ -8,7 +8,7 @@ import lombok.Setter;
|
||||||
@Setter
|
@Setter
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
public class StepResult {
|
public class StepResult {
|
||||||
private Observation observation;
|
private State observation;
|
||||||
private double reward;
|
private double reward;
|
||||||
private boolean done;
|
private boolean done;
|
||||||
private String info;
|
private String info;
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
package evironment.antGame;
|
||||||
|
|
||||||
|
|
||||||
|
import java.awt.*;
|
||||||
|
|
||||||
|
public class AntAgent {
|
||||||
|
// the brain
|
||||||
|
private Cell[][] knownWorld;
|
||||||
|
private Point pos;
|
||||||
|
|
||||||
|
public AntAgent(int width, int height){
|
||||||
|
knownWorld = new Cell[width][height];
|
||||||
|
initUnknownWorld();
|
||||||
|
}
|
||||||
|
|
||||||
|
public AntState feedObservation(AntObservation observation){
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initUnknownWorld(){
|
||||||
|
for(int x = 0; x < knownWorld.length; ++x){
|
||||||
|
for(int y = 0; y < knownWorld[x].length; ++y){
|
||||||
|
knownWorld[x][y] = new Cell(new Point(x,y), CellType.UNKNOWN);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Point getPos(){
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,6 +1,15 @@
|
||||||
package evironment.antGame;
|
package evironment.antGame;
|
||||||
|
|
||||||
import core.Observation;
|
import core.Observation;
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.Setter;
|
||||||
|
|
||||||
|
import java.awt.*;
|
||||||
|
|
||||||
|
@AllArgsConstructor
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
public class AntObservation implements Observation {
|
public class AntObservation implements Observation {
|
||||||
|
private Cell cell;
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,4 +4,14 @@ import core.State;
|
||||||
|
|
||||||
// somewhat the "brain" of the agent, current known setting of the environment
|
// somewhat the "brain" of the agent, current known setting of the environment
|
||||||
public class AntState implements State {
|
public class AntState implements State {
|
||||||
|
private Grid knownGrid;
|
||||||
|
|
||||||
|
public AntState(int width, int height){
|
||||||
|
knownGrid = new Grid(width, height);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public AntState(){
|
||||||
|
this(Constants.DEFAULT_GRID_WIDTH, Constants.DEFAULT_GRID_HEIGHT);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,28 +1,85 @@
|
||||||
package evironment.antGame;
|
package evironment.antGame;
|
||||||
|
|
||||||
import core.DiscreteAction;
|
import core.*;
|
||||||
import core.Observation;
|
|
||||||
import core.RNG;
|
import java.awt.*;
|
||||||
import core.StepResult;
|
|
||||||
|
|
||||||
public class AntWorld {
|
public class AntWorld {
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
private Grid grid;
|
private Grid grid;
|
||||||
|
/**
|
||||||
|
* Intern (backend) representation of the ant.
|
||||||
|
* The AntWorld essentially acts like the game host of the original AntGame.
|
||||||
|
*/
|
||||||
|
private MyAnt myAnt;
|
||||||
|
/**
|
||||||
|
* The client agent. In the original AntGame the host would send jade messages
|
||||||
|
* of the current observation to each client on every tick.
|
||||||
|
* In this reinforcement learning environment, the agent is part of
|
||||||
|
* "backend" to make this environment an MDP. The environment should (convention of
|
||||||
|
* openGym) return all vital information from the .step() method (nextState, reward, done).
|
||||||
|
* But the antGame itself only returns observation for each ant on each tick. These
|
||||||
|
* observation are not markov, hence a "middleware" has to compute the unique markov states
|
||||||
|
* based upon these receiving observation -> the (client) ant!
|
||||||
|
* The AntAgent has an intern strategy to generate markov states from observations,
|
||||||
|
* through an intern grid clone (brain), for example. A history as mentioned in
|
||||||
|
* various lectures could be possible as well.
|
||||||
|
*/
|
||||||
|
private AntAgent antAgent;
|
||||||
|
|
||||||
public AntWorld(int width, int height, double foodDensity){
|
public AntWorld(int width, int height, double foodDensity){
|
||||||
grid = new Grid(width, height, foodDensity);
|
grid = new Grid(width, height, foodDensity);
|
||||||
|
antAgent = new AntAgent(width, height);
|
||||||
}
|
}
|
||||||
|
|
||||||
public AntWorld(){
|
public AntWorld(){
|
||||||
this(30, 30, 0.1);
|
this(Constants.DEFAULT_GRID_WIDTH, Constants.DEFAULT_GRID_HEIGHT, Constants.DEFAULT_FOOD_DENSITY);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class MyAnt{
|
||||||
|
int x,y;
|
||||||
|
boolean hasFood;
|
||||||
|
boolean spawned;
|
||||||
}
|
}
|
||||||
|
|
||||||
public StepResult step(DiscreteAction<AntAction> action){
|
public StepResult step(DiscreteAction<AntAction> action){
|
||||||
Observation observation = new AntObservation();
|
AntObservation observation;
|
||||||
return new StepResult(observation, 0.0, false, "");
|
State newState;
|
||||||
|
if(!myAnt.spawned){
|
||||||
|
observation = new AntObservation(grid.getCell(grid.getStartPoint()));
|
||||||
|
newState = antAgent.feedObservation(observation);
|
||||||
|
return new StepResult(newState, 0.0, false, "Just spawned on the map");
|
||||||
|
}
|
||||||
|
switch (action.getValue()) {
|
||||||
|
case MOVE_UP:
|
||||||
|
break;
|
||||||
|
case MOVE_RIGHT:
|
||||||
|
break;
|
||||||
|
case MOVE_DOWN:
|
||||||
|
break;
|
||||||
|
case MOVE_LEFT:
|
||||||
|
break;
|
||||||
|
case PICK_UP:
|
||||||
|
break;
|
||||||
|
case DROP_DOWN:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new RuntimeException(String.format("Action <%s> is not a valid action!", action.toString()));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
newState = antAgent.feedObservation(observation);
|
||||||
|
return new StepResult(newState, 0.0, false, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
public void reset(){
|
public void reset() {
|
||||||
RNG.reseed();
|
RNG.reseed();
|
||||||
grid.initCells();
|
grid.initRandomWorld();
|
||||||
|
myAnt = new MyAnt();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Point getSpawningPoint(){
|
||||||
|
return grid.getStartPoint();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,23 +1,27 @@
|
||||||
package evironment.antGame;
|
package evironment.antGame;
|
||||||
|
|
||||||
public class Cell {
|
import lombok.Getter;
|
||||||
private CellType type;
|
import lombok.Setter;
|
||||||
private int food;
|
|
||||||
|
|
||||||
public Cell(CellType cellType, int foodAmount){
|
import java.awt.*;
|
||||||
|
|
||||||
|
public class Cell {
|
||||||
|
@Getter
|
||||||
|
private CellType type;
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
private int food;
|
||||||
|
@Getter
|
||||||
|
private Point pos;
|
||||||
|
|
||||||
|
public Cell(Point pos, CellType cellType, int foodAmount){
|
||||||
|
this.pos = pos;
|
||||||
type = cellType;
|
type = cellType;
|
||||||
food = foodAmount;
|
food = foodAmount;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Cell(CellType cellType){
|
public Cell( Point pos, CellType cellType){
|
||||||
this(cellType, 0);
|
this(pos, cellType, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setFoodCount(int amount){
|
|
||||||
food = amount;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getFoodCount(){
|
|
||||||
return food;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,4 +5,6 @@ public enum CellType {
|
||||||
FREE,
|
FREE,
|
||||||
OBSTACLE,
|
OBSTACLE,
|
||||||
FOOD,
|
FOOD,
|
||||||
|
UNKNOWN,
|
||||||
|
POSSIBLE_FOOD,
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
package evironment.antGame;
|
||||||
|
|
||||||
|
public class Constants {
|
||||||
|
public static final int DEFAULT_GRID_WIDTH = 30;
|
||||||
|
public static final int DEFAULT_GRID_HEIGHT = 30;
|
||||||
|
public static final double DEFAULT_FOOD_DENSITY = 0.1;
|
||||||
|
}
|
|
@ -19,18 +19,22 @@ public class Grid {
|
||||||
grid = new Cell[width][height];
|
grid = new Cell[width][height];
|
||||||
}
|
}
|
||||||
|
|
||||||
public void initCells(){
|
public Grid(int width, int height){
|
||||||
|
this(width, height, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void initRandomWorld(){
|
||||||
for(int x = 0; x < width; ++x){
|
for(int x = 0; x < width; ++x){
|
||||||
for(int y = 0; y < height; ++y){
|
for(int y = 0; y < height; ++y){
|
||||||
if( RNG.getRandom().nextDouble() < foodDensity){
|
if( RNG.getRandom().nextDouble() < foodDensity){
|
||||||
grid[x][y] = new Cell(CellType.FOOD, 1);
|
grid[x][y] = new Cell(new Point(x,y), CellType.FOOD, 1);
|
||||||
}else{
|
}else{
|
||||||
grid[x][y] = new Cell(CellType.FREE);
|
grid[x][y] = new Cell(new Point(x,y), CellType.FREE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
start = new Point(RNG.getRandom().nextInt(width), RNG.getRandom().nextInt(height));
|
start = new Point(RNG.getRandom().nextInt(width), RNG.getRandom().nextInt(height));
|
||||||
grid[start.x][start.y] = new Cell(CellType.START);
|
grid[start.x][start.y] = new Cell(new Point(start.x, start.y), CellType.START);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Point getStartPoint(){
|
public Point getStartPoint(){
|
||||||
|
@ -41,6 +45,12 @@ public class Grid {
|
||||||
return grid;
|
return grid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Cell getCell(Point pos){
|
||||||
|
return grid[pos.x][pos.y];
|
||||||
|
}
|
||||||
|
public Cell getCell(int x, int y){
|
||||||
|
return grid[x][y];
|
||||||
|
}
|
||||||
public int getWidth(){
|
public int getWidth(){
|
||||||
return width;
|
return width;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue