diff --git a/src/main/java/core/DiscreteAction.java b/src/main/java/core/DiscreteAction.java index 6413ce8..bb90c9d 100644 --- a/src/main/java/core/DiscreteAction.java +++ b/src/main/java/core/DiscreteAction.java @@ -7,6 +7,10 @@ public class DiscreteAction implements Action{ this.action = action; } + public A getValue(){ + return action; + } + @Override public int getIndex(){ return action.ordinal(); diff --git a/src/main/java/core/StepResult.java b/src/main/java/core/StepResult.java index f515ee6..89c3bf9 100644 --- a/src/main/java/core/StepResult.java +++ b/src/main/java/core/StepResult.java @@ -8,7 +8,7 @@ import lombok.Setter; @Setter @AllArgsConstructor public class StepResult { - private Observation observation; + private State observation; private double reward; private boolean done; private String info; diff --git a/src/main/java/evironment/antGame/AntAgent.java b/src/main/java/evironment/antGame/AntAgent.java new file mode 100644 index 0000000..8cf920f --- /dev/null +++ b/src/main/java/evironment/antGame/AntAgent.java @@ -0,0 +1,31 @@ +package evironment.antGame; + + +import java.awt.*; + +public class AntAgent { + // the brain + private Cell[][] knownWorld; + private Point pos; + + public AntAgent(int width, int height){ + knownWorld = new Cell[width][height]; + initUnknownWorld(); + } + + public AntState feedObservation(AntObservation observation){ + + } + + private void initUnknownWorld(){ + for(int x = 0; x < knownWorld.length; ++x){ + for(int y = 0; y < knownWorld[x].length; ++y){ + knownWorld[x][y] = new Cell(new Point(x,y), CellType.UNKNOWN); + } + } + } + + public Point getPos(){ + return pos; + } +} diff --git a/src/main/java/evironment/antGame/AntObservation.java b/src/main/java/evironment/antGame/AntObservation.java index fa4413e..57ddea0 100644 --- a/src/main/java/evironment/antGame/AntObservation.java +++ b/src/main/java/evironment/antGame/AntObservation.java @@ -1,6 +1,15 @@ package evironment.antGame; import core.Observation; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.Setter; +import java.awt.*; + +@AllArgsConstructor +@Getter +@Setter public class AntObservation implements Observation { + private Cell cell; } diff --git a/src/main/java/evironment/antGame/AntState.java b/src/main/java/evironment/antGame/AntState.java index a9f2181..a499c84 100644 --- a/src/main/java/evironment/antGame/AntState.java +++ b/src/main/java/evironment/antGame/AntState.java @@ -4,4 +4,14 @@ import core.State; // somewhat the "brain" of the agent, current known setting of the environment public class AntState implements State { + private Grid knownGrid; + + public AntState(int width, int height){ + knownGrid = new Grid(width, height); + + } + + public AntState(){ + this(Constants.DEFAULT_GRID_WIDTH, Constants.DEFAULT_GRID_HEIGHT); + } } diff --git a/src/main/java/evironment/antGame/AntWorld.java b/src/main/java/evironment/antGame/AntWorld.java index a6cfda4..d156751 100644 --- a/src/main/java/evironment/antGame/AntWorld.java +++ b/src/main/java/evironment/antGame/AntWorld.java @@ -1,28 +1,85 @@ package evironment.antGame; -import core.DiscreteAction; -import core.Observation; -import core.RNG; -import core.StepResult; +import core.*; + +import java.awt.*; public class AntWorld { + /** + * + */ private Grid grid; + /** + * Intern (backend) representation of the ant. + * The AntWorld essentially acts like the game host of the original AntGame. + */ + private MyAnt myAnt; + /** + * The client agent. In the original AntGame the host would send jade messages + * of the current observation to each client on every tick. + * In this reinforcement learning environment, the agent is part of + * "backend" to make this environment an MDP. The environment should (convention of + * openGym) return all vital information from the .step() method (nextState, reward, done). + * But the antGame itself only returns observation for each ant on each tick. These + * observation are not markov, hence a "middleware" has to compute the unique markov states + * based upon these receiving observation -> the (client) ant! + * The AntAgent has an intern strategy to generate markov states from observations, + * through an intern grid clone (brain), for example. A history as mentioned in + * various lectures could be possible as well. + */ + private AntAgent antAgent; public AntWorld(int width, int height, double foodDensity){ grid = new Grid(width, height, foodDensity); + antAgent = new AntAgent(width, height); } public AntWorld(){ - this(30, 30, 0.1); + this(Constants.DEFAULT_GRID_WIDTH, Constants.DEFAULT_GRID_HEIGHT, Constants.DEFAULT_FOOD_DENSITY); + } + + private static class MyAnt{ + int x,y; + boolean hasFood; + boolean spawned; } public StepResult step(DiscreteAction action){ - Observation observation = new AntObservation(); - return new StepResult(observation, 0.0, false, ""); + AntObservation observation; + State newState; + if(!myAnt.spawned){ + observation = new AntObservation(grid.getCell(grid.getStartPoint())); + newState = antAgent.feedObservation(observation); + return new StepResult(newState, 0.0, false, "Just spawned on the map"); + } + switch (action.getValue()) { + case MOVE_UP: + break; + case MOVE_RIGHT: + break; + case MOVE_DOWN: + break; + case MOVE_LEFT: + break; + case PICK_UP: + break; + case DROP_DOWN: + break; + default: + throw new RuntimeException(String.format("Action <%s> is not a valid action!", action.toString())); + break; + } + newState = antAgent.feedObservation(observation); + return new StepResult(newState, 0.0, false, ""); } - public void reset(){ + public void reset() { RNG.reseed(); - grid.initCells(); + grid.initRandomWorld(); + myAnt = new MyAnt(); + } + + public Point getSpawningPoint(){ + return grid.getStartPoint(); } } diff --git a/src/main/java/evironment/antGame/Cell.java b/src/main/java/evironment/antGame/Cell.java index 836d1ee..ab726fc 100644 --- a/src/main/java/evironment/antGame/Cell.java +++ b/src/main/java/evironment/antGame/Cell.java @@ -1,23 +1,27 @@ package evironment.antGame; -public class Cell { - private CellType type; - private int food; +import lombok.Getter; +import lombok.Setter; - public Cell(CellType cellType, int foodAmount){ +import java.awt.*; + +public class Cell { + @Getter + private CellType type; + @Getter + @Setter + private int food; + @Getter + private Point pos; + + public Cell(Point pos, CellType cellType, int foodAmount){ + this.pos = pos; type = cellType; food = foodAmount; } - public Cell(CellType cellType){ - this(cellType, 0); + public Cell( Point pos, CellType cellType){ + this(pos, cellType, 0); } - public void setFoodCount(int amount){ - food = amount; - } - - public int getFoodCount(){ - return food; - } } diff --git a/src/main/java/evironment/antGame/CellType.java b/src/main/java/evironment/antGame/CellType.java index 1cb1ad3..30a7e46 100644 --- a/src/main/java/evironment/antGame/CellType.java +++ b/src/main/java/evironment/antGame/CellType.java @@ -5,4 +5,6 @@ public enum CellType { FREE, OBSTACLE, FOOD, + UNKNOWN, + POSSIBLE_FOOD, } diff --git a/src/main/java/evironment/antGame/Constants.java b/src/main/java/evironment/antGame/Constants.java new file mode 100644 index 0000000..416b647 --- /dev/null +++ b/src/main/java/evironment/antGame/Constants.java @@ -0,0 +1,7 @@ +package evironment.antGame; + +public class Constants { + public static final int DEFAULT_GRID_WIDTH = 30; + public static final int DEFAULT_GRID_HEIGHT = 30; + public static final double DEFAULT_FOOD_DENSITY = 0.1; +} diff --git a/src/main/java/evironment/antGame/Grid.java b/src/main/java/evironment/antGame/Grid.java index 409a4e4..ad5f45d 100644 --- a/src/main/java/evironment/antGame/Grid.java +++ b/src/main/java/evironment/antGame/Grid.java @@ -19,18 +19,22 @@ public class Grid { grid = new Cell[width][height]; } - public void initCells(){ + public Grid(int width, int height){ + this(width, height, 0); + } + + public void initRandomWorld(){ for(int x = 0; x < width; ++x){ for(int y = 0; y < height; ++y){ if( RNG.getRandom().nextDouble() < foodDensity){ - grid[x][y] = new Cell(CellType.FOOD, 1); + grid[x][y] = new Cell(new Point(x,y), CellType.FOOD, 1); }else{ - grid[x][y] = new Cell(CellType.FREE); + grid[x][y] = new Cell(new Point(x,y), CellType.FREE); } } } start = new Point(RNG.getRandom().nextInt(width), RNG.getRandom().nextInt(height)); - grid[start.x][start.y] = new Cell(CellType.START); + grid[start.x][start.y] = new Cell(new Point(start.x, start.y), CellType.START); } public Point getStartPoint(){ @@ -41,6 +45,12 @@ public class Grid { return grid; } + public Cell getCell(Point pos){ + return grid[pos.x][pos.y]; + } + public Cell getCell(int x, int y){ + return grid[x][y]; + } public int getWidth(){ return width; }