package evironment.antGame; import core.*; import java.awt.*; public class AntWorld { /** * */ private Grid grid; /** * Intern (backend) representation of the ant. * The AntWorld essentially acts like the game host of the original AntGame. */ private MyAnt myAnt; /** * The client agent. In the original AntGame the host would send jade messages * of the current observation to each client on every tick. * In this reinforcement learning environment, the agent is part of * "backend" to make this environment an MDP. The environment should (convention of * openGym) return all vital information from the .step() method (nextState, reward, done). * But the antGame itself only returns observation for each ant on each tick. These * observation are not markov, hence a "middleware" has to compute the unique markov states * based upon these receiving observation -> the (client) ant! * The AntAgent has an intern strategy to generate markov states from observations, * through an intern grid clone (brain), for example. A history as mentioned in * various lectures could be possible as well. */ private AntAgent antAgent; private int tick; private int maxEpisodeTicks; public AntWorld(int width, int height, double foodDensity){ grid = new Grid(width, height, foodDensity); antAgent = new AntAgent(width, height); tick = 0; maxEpisodeTicks = 1000; } public AntWorld(){ this(Constants.DEFAULT_GRID_WIDTH, Constants.DEFAULT_GRID_HEIGHT, Constants.DEFAULT_FOOD_DENSITY); } private static class MyAnt{ Point pos; boolean hasFood; boolean spawned; } public StepResult step(DiscreteAction action){ AntObservation observation; State newState; double reward = 0; String info = ""; boolean done = false; if(!myAnt.spawned){ myAnt.spawned = true; myAnt.pos = grid.getStartPoint(); observation = new AntObservation(grid.getCell(myAnt.pos), myAnt.pos, myAnt.hasFood); newState = antAgent.feedObservation(observation); reward = 0.0; return new StepResult(newState, reward, false, "Just spawned on the map"); } Cell currentCell = grid.getCell(myAnt.pos); Point potentialNextPos = new Point(myAnt.pos.x, myAnt.pos.y); boolean stayOnCell = true; // flag to enable a check if all food has been collected only fired if food was dropped // on the starting position boolean checkCompletion = false; switch (action.getValue()) { case MOVE_UP: potentialNextPos.y -= 1; stayOnCell = false; break; case MOVE_RIGHT: potentialNextPos.x += 1; stayOnCell = false; break; case MOVE_DOWN: potentialNextPos.y += 1; stayOnCell = false; break; case MOVE_LEFT: potentialNextPos.x -= 1; stayOnCell = false; break; case PICK_UP: if(myAnt.hasFood){ // Ant tries to pick up food but can only hold one piece reward = Reward.FOOD_PICK_UP_FAIL_HAS_FOOD_ALREADY; }else if(currentCell.getFood() == 0){ // Ant tries to pick up food on cell that has no food on it reward = Reward.FOOD_PICK_UP_FAIL_NO_FOOD; }else if(currentCell.getFood() > 0){ // Ant successfully picks up food currentCell.setFood(currentCell.getFood() - 1); myAnt.hasFood = true; reward = Reward.FOOD_DROP_DOWN_SUCCESS; } break; case DROP_DOWN: if(!myAnt.hasFood){ // Ant had no food to drop reward = Reward.FOOD_DROP_DOWN_FAIL_NO_FOOD; }else{ // Drop food onto the ground currentCell.setFood(currentCell.getFood() + 1); myAnt.hasFood = false; // negative reward if the agent drops food on any other field // than the starting point if(currentCell.getType() != CellType.START){ reward = Reward.FOOD_DROP_DOWN_FAIL_NOT_START; }else{ reward = Reward.FOOD_DROP_DOWN_SUCCESS; checkCompletion = true; } } break; default: throw new RuntimeException(String.format("Action <%s> is not a valid action!", action.toString())); } // movement action was selected if(!stayOnCell){ if(!isInGrid(potentialNextPos)){ stayOnCell = true; reward = Reward.RAN_INTO_WALL; }else if(hitObstacle(potentialNextPos)){ stayOnCell = true; reward = Reward.RAN_INTO_OBSTACLE; } } // valid movement if(!stayOnCell){ myAnt.pos = potentialNextPos; if(antAgent.getCell(myAnt.pos).getType() == CellType.UNKNOWN){ // the ant will move to a cell that was previously unknown reward = Reward.UNKNOWN_FIELD_EXPLORED; }else{ reward = 0; } } // get observation after action was computed observation = new AntObservation(grid.getCell(myAnt.pos), myAnt.pos, myAnt.hasFood); // let the ant agent process the observation to create a valid markov state newState = antAgent.feedObservation(observation); if(checkCompletion){ done = grid.isAllFoodCollected(); } if(++tick == maxEpisodeTicks){ done = true; } return new StepResult(newState, reward, done, info); } private boolean isInGrid(Point pos){ return pos.x > 0 && pos.x < grid.getWidth() && pos.y > 0 && pos.y < grid.getHeight(); } private boolean hitObstacle(Point pos){ return grid.getCell(pos).getType() == CellType.OBSTACLE; } public void reset() { RNG.reseed(); grid.initRandomWorld(); myAnt = new MyAnt(); } public void setMaxEpisodeLength(int maxTicks){ this.maxEpisodeTicks = maxTicks; } public Point getSpawningPoint(){ return grid.getStartPoint(); } }