From ec67ce60c9ed935f256483f668c6dea6c8c8f082 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20L=C3=B6wenstrom?= <jan.loewenstrom@web.de>
Date: Sun, 8 Dec 2019 13:15:20 +0100
Subject: [PATCH] add default structure for AntAgent

---
 src/main/java/core/DiscreteAction.java        |  4 +
 src/main/java/core/StepResult.java            |  2 +-
 .../java/evironment/antGame/AntAgent.java     | 31 ++++++++
 .../evironment/antGame/AntObservation.java    |  9 +++
 .../java/evironment/antGame/AntState.java     | 10 +++
 .../java/evironment/antGame/AntWorld.java     | 75 ++++++++++++++++---
 src/main/java/evironment/antGame/Cell.java    | 30 ++++----
 .../java/evironment/antGame/CellType.java     |  2 +
 .../java/evironment/antGame/Constants.java    |  7 ++
 src/main/java/evironment/antGame/Grid.java    | 18 ++++-
 10 files changed, 161 insertions(+), 27 deletions(-)
 create mode 100644 src/main/java/evironment/antGame/AntAgent.java
 create mode 100644 src/main/java/evironment/antGame/Constants.java

diff --git a/src/main/java/core/DiscreteAction.java b/src/main/java/core/DiscreteAction.java
index 6413ce8..bb90c9d 100644
--- a/src/main/java/core/DiscreteAction.java
+++ b/src/main/java/core/DiscreteAction.java
@@ -7,6 +7,10 @@ public class DiscreteAction<A extends Enum> implements Action{
         this.action = action;
     }
 
+    public A getValue(){
+        return action;
+    }
+
     @Override
     public int getIndex(){
        return action.ordinal();
diff --git a/src/main/java/core/StepResult.java b/src/main/java/core/StepResult.java
index f515ee6..89c3bf9 100644
--- a/src/main/java/core/StepResult.java
+++ b/src/main/java/core/StepResult.java
@@ -8,7 +8,7 @@ import lombok.Setter;
 @Setter
 @AllArgsConstructor
 public class StepResult {
-    private Observation observation;
+    private State observation;
     private double reward;
     private boolean done;
     private String info;
diff --git a/src/main/java/evironment/antGame/AntAgent.java b/src/main/java/evironment/antGame/AntAgent.java
new file mode 100644
index 0000000..8cf920f
--- /dev/null
+++ b/src/main/java/evironment/antGame/AntAgent.java
@@ -0,0 +1,31 @@
+package evironment.antGame;
+
+
+import java.awt.*;
+
+public class AntAgent {
+    // the brain
+    private Cell[][] knownWorld;
+    private Point pos;
+
+    public AntAgent(int width, int height){
+        knownWorld = new Cell[width][height];
+        initUnknownWorld();
+    }
+
+    public AntState feedObservation(AntObservation observation){
+
+    }
+
+    private void initUnknownWorld(){
+        for(int x = 0; x < knownWorld.length; ++x){
+            for(int y = 0; y < knownWorld[x].length; ++y){
+                knownWorld[x][y] = new Cell(new Point(x,y), CellType.UNKNOWN);
+            }
+        }
+    }
+
+    public Point getPos(){
+        return pos;
+    }
+}
diff --git a/src/main/java/evironment/antGame/AntObservation.java b/src/main/java/evironment/antGame/AntObservation.java
index fa4413e..57ddea0 100644
--- a/src/main/java/evironment/antGame/AntObservation.java
+++ b/src/main/java/evironment/antGame/AntObservation.java
@@ -1,6 +1,15 @@
 package evironment.antGame;
 
 import core.Observation;
+import lombok.AllArgsConstructor;
+import lombok.Getter;
+import lombok.Setter;
 
+import java.awt.*;
+
+@AllArgsConstructor
+@Getter
+@Setter
 public class AntObservation implements Observation {
+   private Cell cell;
 }
diff --git a/src/main/java/evironment/antGame/AntState.java b/src/main/java/evironment/antGame/AntState.java
index a9f2181..a499c84 100644
--- a/src/main/java/evironment/antGame/AntState.java
+++ b/src/main/java/evironment/antGame/AntState.java
@@ -4,4 +4,14 @@ import core.State;
 
 // somewhat the "brain" of the agent, current known setting of the environment
 public class AntState implements State {
+    private Grid knownGrid;
+
+    public AntState(int width, int height){
+        knownGrid = new Grid(width, height);
+
+    }
+
+    public AntState(){
+        this(Constants.DEFAULT_GRID_WIDTH, Constants.DEFAULT_GRID_HEIGHT);
+    }
 }
diff --git a/src/main/java/evironment/antGame/AntWorld.java b/src/main/java/evironment/antGame/AntWorld.java
index a6cfda4..d156751 100644
--- a/src/main/java/evironment/antGame/AntWorld.java
+++ b/src/main/java/evironment/antGame/AntWorld.java
@@ -1,28 +1,85 @@
 package evironment.antGame;
 
-import core.DiscreteAction;
-import core.Observation;
-import core.RNG;
-import core.StepResult;
+import core.*;
+
+import java.awt.*;
 
 public class AntWorld {
+    /**
+     * 
+     */
     private Grid grid;
+    /**
+     * Intern (backend) representation of the ant.
+     * The AntWorld essentially acts like the game host of the original AntGame.
+     */
+    private MyAnt myAnt;
+    /**
+     * The client agent. In the original AntGame the host would send jade messages
+     * of the current observation to each client on every tick.
+     * In this reinforcement learning environment, the agent is part of
+     * "backend" to make this environment an MDP. The environment should (convention of
+     * openGym) return all vital information from the .step() method (nextState, reward, done).
+     * But the antGame itself only returns observation for each ant on each tick. These
+     * observation are not markov, hence a "middleware" has to compute the unique markov states
+     * based upon these receiving observation -> the (client) ant!
+     * The AntAgent has an intern strategy to generate markov states from observations,
+     * through an intern grid clone (brain), for example. A history as mentioned in
+     * various lectures could be possible as well.
+     */
+    private AntAgent antAgent;
 
     public AntWorld(int width, int height, double foodDensity){
         grid = new Grid(width, height, foodDensity);
+        antAgent = new AntAgent(width, height);
     }
 
     public AntWorld(){
-        this(30, 30, 0.1);
+        this(Constants.DEFAULT_GRID_WIDTH, Constants.DEFAULT_GRID_HEIGHT, Constants.DEFAULT_FOOD_DENSITY);
+    }
+
+    private static class MyAnt{
+        int x,y;
+        boolean hasFood;
+        boolean spawned;
     }
 
     public StepResult step(DiscreteAction<AntAction> action){
-        Observation observation = new AntObservation();
-        return new StepResult(observation, 0.0, false, "");
+        AntObservation observation;
+        State newState;
+        if(!myAnt.spawned){
+            observation = new AntObservation(grid.getCell(grid.getStartPoint()));
+            newState = antAgent.feedObservation(observation);
+            return new StepResult(newState, 0.0, false, "Just spawned on the map");
+        }
+        switch (action.getValue()) {
+            case MOVE_UP:
+                break;
+            case MOVE_RIGHT:
+                break;
+            case MOVE_DOWN:
+                break;
+            case MOVE_LEFT:
+                break;
+            case PICK_UP:
+                break;
+            case DROP_DOWN:
+                break;
+            default:
+                throw new RuntimeException(String.format("Action <%s> is not a valid action!", action.toString()));
+                break;
+        }
+        newState = antAgent.feedObservation(observation);
+        return new StepResult(newState, 0.0, false, "");
     }
 
-    public void reset(){
+    public void reset() {
         RNG.reseed();
-        grid.initCells();
+        grid.initRandomWorld();
+        myAnt = new MyAnt();
+    }
+
+    public Point getSpawningPoint(){
+        return grid.getStartPoint();
     }
 }
diff --git a/src/main/java/evironment/antGame/Cell.java b/src/main/java/evironment/antGame/Cell.java
index 836d1ee..ab726fc 100644
--- a/src/main/java/evironment/antGame/Cell.java
+++ b/src/main/java/evironment/antGame/Cell.java
@@ -1,23 +1,27 @@
 package evironment.antGame;
 
-public class Cell {
-    private CellType type;
-    private int food;
+import lombok.Getter;
+import lombok.Setter;
 
-    public Cell(CellType cellType, int foodAmount){
+import java.awt.*;
+
+public class Cell {
+    @Getter
+    private CellType type;
+    @Getter
+    @Setter
+    private int food;
+    @Getter
+    private Point pos;
+
+    public Cell(Point pos, CellType cellType, int foodAmount){
+        this.pos = pos;
         type = cellType;
         food = foodAmount;
     }
 
-    public Cell(CellType cellType){
-       this(cellType, 0);
+    public Cell( Point pos, CellType cellType){
+       this(pos, cellType, 0);
     }
 
-    public void setFoodCount(int amount){
-        food = amount;
-    }
-
-    public int getFoodCount(){
-        return food;
-    }
 }
diff --git a/src/main/java/evironment/antGame/CellType.java b/src/main/java/evironment/antGame/CellType.java
index 1cb1ad3..30a7e46 100644
--- a/src/main/java/evironment/antGame/CellType.java
+++ b/src/main/java/evironment/antGame/CellType.java
@@ -5,4 +5,6 @@ public enum CellType {
     FREE,
     OBSTACLE,
     FOOD,
+    UNKNOWN,
+    POSSIBLE_FOOD,
 }
diff --git a/src/main/java/evironment/antGame/Constants.java b/src/main/java/evironment/antGame/Constants.java
new file mode 100644
index 0000000..416b647
--- /dev/null
+++ b/src/main/java/evironment/antGame/Constants.java
@@ -0,0 +1,7 @@
+package evironment.antGame;
+
+public class Constants {
+    public static final int DEFAULT_GRID_WIDTH = 30;
+    public static final int DEFAULT_GRID_HEIGHT = 30;
+    public static final double DEFAULT_FOOD_DENSITY = 0.1;
+}
diff --git a/src/main/java/evironment/antGame/Grid.java b/src/main/java/evironment/antGame/Grid.java
index 409a4e4..ad5f45d 100644
--- a/src/main/java/evironment/antGame/Grid.java
+++ b/src/main/java/evironment/antGame/Grid.java
@@ -19,18 +19,22 @@ public class Grid {
         grid = new Cell[width][height];
     }
 
-    public void initCells(){
+    public Grid(int width, int height){
+        this(width, height, 0);
+    }
+
+    public void initRandomWorld(){
         for(int x = 0; x < width; ++x){
             for(int y = 0; y < height; ++y){
                 if( RNG.getRandom().nextDouble() < foodDensity){
-                    grid[x][y] = new Cell(CellType.FOOD, 1);
+                    grid[x][y] = new Cell(new Point(x,y), CellType.FOOD, 1);
                 }else{
-                    grid[x][y] = new Cell(CellType.FREE);
+                    grid[x][y] = new Cell(new Point(x,y), CellType.FREE);
                 }
             }
         }
         start = new Point(RNG.getRandom().nextInt(width), RNG.getRandom().nextInt(height));
-        grid[start.x][start.y] = new Cell(CellType.START);
+        grid[start.x][start.y] = new Cell(new Point(start.x, start.y), CellType.START);
     }
 
     public Point getStartPoint(){
@@ -41,6 +45,12 @@ public class Grid {
         return grid;
     }
 
+    public Cell getCell(Point pos){
+        return grid[pos.x][pos.y];
+    }
+    public Cell getCell(int x, int y){
+        return grid[x][y];
+    }
     public int getWidth(){
         return width;
     }