refo/src/main/java/evironment/antGame/AntWorld.java

package evironment.antGame;

import core.*;

import java.awt.*;

public class AntWorld {
    /**
     *
     */
    private Grid grid;
    /**
     * Intern (backend) representation of the ant.
     * The AntWorld essentially acts like the game host of the original AntGame.
     */
    private MyAnt myAnt;
    /**
     * The client agent. In the original AntGame the host would send jade messages
     * of the current observation to each client on every tick.
     * In this reinforcement learning environment, the agent is part of
     * "backend" to make this environment an MDP. The environment should (convention of
     * openGym) return all vital information from the .step() method (nextState, reward, done).
     * But the antGame itself only returns observation for each ant on each tick. These
     * observation are not markov, hence a "middleware" has to compute the unique markov states
     * based upon these receiving observation -> the (client) ant!
     * The AntAgent has an intern strategy to generate markov states from observations,
     * through an intern grid clone (brain), for example. A history as mentioned in
     * various lectures could be possible as well.
     */
    private AntAgent antAgent;

    private int tick;
    private int maxEpisodeTicks;

    public AntWorld(int width, int height, double foodDensity){
        grid = new Grid(width, height, foodDensity);
        antAgent = new AntAgent(width, height);
        tick = 0;
        maxEpisodeTicks = 1000;
    }

    public AntWorld(){
        this(Constants.DEFAULT_GRID_WIDTH, Constants.DEFAULT_GRID_HEIGHT, Constants.DEFAULT_FOOD_DENSITY);
    }

    private static class MyAnt{
        Point pos;
        boolean hasFood;
        boolean spawned;
    }

    public StepResult step(DiscreteAction<AntAction> action){
        AntObservation observation;
        State newState;
        double reward = 0;
        String info = "";
        boolean done = false;

        if(!myAnt.spawned){
            myAnt.spawned = true;
            myAnt.pos = grid.getStartPoint();

            observation = new AntObservation(grid.getCell(myAnt.pos), myAnt.pos, myAnt.hasFood);
            newState = antAgent.feedObservation(observation);
            reward = 0.0;
            return new StepResult(newState, reward, false, "Just spawned on the map");
        }

        Cell currentCell = grid.getCell(myAnt.pos);
        Point potentialNextPos = new Point(myAnt.pos.x, myAnt.pos.y);
        boolean stayOnCell = true;
        // flag to enable a check if all food has been collected only fired if food was dropped
        // on the starting position
        boolean checkCompletion = false;

        switch (action.getValue()) {
            case MOVE_UP:
                potentialNextPos.y -= 1;
                stayOnCell = false;
                break;
            case MOVE_RIGHT:
                potentialNextPos.x += 1;
                stayOnCell = false;
                break;
            case MOVE_DOWN:
                potentialNextPos.y += 1;
                stayOnCell = false;
                break;
            case MOVE_LEFT:
                potentialNextPos.x -= 1;
                stayOnCell = false;
                break;
            case PICK_UP:
                if(myAnt.hasFood){
                    // Ant tries to pick up food but can only hold one piece
                    reward = Reward.FOOD_PICK_UP_FAIL_HAS_FOOD_ALREADY;
                }else if(currentCell.getFood() == 0){
                    // Ant tries to pick up food on cell that has no food on it
                    reward = Reward.FOOD_PICK_UP_FAIL_NO_FOOD;
                }else if(currentCell.getFood() > 0){
                    // Ant successfully picks up food
                    currentCell.setFood(currentCell.getFood() - 1);
                    myAnt.hasFood = true;
                    reward = Reward.FOOD_DROP_DOWN_SUCCESS;
                }
                break;
            case DROP_DOWN:
                if(!myAnt.hasFood){
                    // Ant had no food to drop
                    reward = Reward.FOOD_DROP_DOWN_FAIL_NO_FOOD;
                }else{
                    // Drop food onto the ground
                    currentCell.setFood(currentCell.getFood() + 1);
                    myAnt.hasFood = false;

                    // negative reward if the agent drops food on any other field
                    // than the starting point
                    if(currentCell.getType() != CellType.START){
                        reward = Reward.FOOD_DROP_DOWN_FAIL_NOT_START;
                    }else{
                        reward = Reward.FOOD_DROP_DOWN_SUCCESS;
                        checkCompletion = true;
                    }
                }
                break;
            default:
                throw new RuntimeException(String.format("Action <%s> is not a valid action!", action.toString()));
        }

        // movement action was selected
        if(!stayOnCell){
            if(!isInGrid(potentialNextPos)){
                stayOnCell = true;
                reward = Reward.RAN_INTO_WALL;
            }else if(hitObstacle(potentialNextPos)){
                stayOnCell = true;
                reward = Reward.RAN_INTO_OBSTACLE;
            }
        }

        // valid movement
        if(!stayOnCell){
            myAnt.pos = potentialNextPos;
            if(antAgent.getCell(myAnt.pos).getType() == CellType.UNKNOWN){
                // the ant will move to a cell that was previously unknown
                reward = Reward.UNKNOWN_FIELD_EXPLORED;
            }else{
                reward = 0;
            }
        }

        // get observation after action was computed
        observation = new AntObservation(grid.getCell(myAnt.pos), myAnt.pos, myAnt.hasFood);

        // let the ant agent process the observation to create a valid markov state
        newState = antAgent.feedObservation(observation);

        if(checkCompletion){
            done = grid.isAllFoodCollected();
        }

        if(++tick == maxEpisodeTicks){
            done = true;
        }
        return new StepResult(newState, reward, done, info);
    }

    private boolean isInGrid(Point pos){
        return pos.x > 0 && pos.x < grid.getWidth() && pos.y > 0 && pos.y < grid.getHeight();
    }

    private boolean hitObstacle(Point pos){
        return grid.getCell(pos).getType() == CellType.OBSTACLE;
    }

    public void reset() {
        RNG.reseed();
        grid.initRandomWorld();
        myAnt = new MyAnt();
    }

    public void setMaxEpisodeLength(int maxTicks){
        this.maxEpisodeTicks = maxTicks;
    }
    public Point getSpawningPoint(){
        return grid.getStartPoint();
    }
}