86 lines
		
	
	
		
			2.8 KiB
		
	
	
	
		
			Java
		
	
	
	
			
		
		
	
	
			86 lines
		
	
	
		
			2.8 KiB
		
	
	
	
		
			Java
		
	
	
	
package evironment.antGame;
 | 
						|
 | 
						|
import core.*;
 | 
						|
 | 
						|
import java.awt.*;
 | 
						|
 | 
						|
public class AntWorld {
 | 
						|
    /**
 | 
						|
     * 
 | 
						|
     */
 | 
						|
    private Grid grid;
 | 
						|
    /**
 | 
						|
     * Intern (backend) representation of the ant.
 | 
						|
     * The AntWorld essentially acts like the game host of the original AntGame.
 | 
						|
     */
 | 
						|
    private MyAnt myAnt;
 | 
						|
    /**
 | 
						|
     * The client agent. In the original AntGame the host would send jade messages
 | 
						|
     * of the current observation to each client on every tick.
 | 
						|
     * In this reinforcement learning environment, the agent is part of
 | 
						|
     * "backend" to make this environment an MDP. The environment should (convention of
 | 
						|
     * openGym) return all vital information from the .step() method (nextState, reward, done).
 | 
						|
     * But the antGame itself only returns observation for each ant on each tick. These
 | 
						|
     * observation are not markov, hence a "middleware" has to compute the unique markov states
 | 
						|
     * based upon these receiving observation -> the (client) ant!
 | 
						|
     * The AntAgent has an intern strategy to generate markov states from observations,
 | 
						|
     * through an intern grid clone (brain), for example. A history as mentioned in
 | 
						|
     * various lectures could be possible as well.
 | 
						|
     */
 | 
						|
    private AntAgent antAgent;
 | 
						|
 | 
						|
    public AntWorld(int width, int height, double foodDensity){
 | 
						|
        grid = new Grid(width, height, foodDensity);
 | 
						|
        antAgent = new AntAgent(width, height);
 | 
						|
    }
 | 
						|
 | 
						|
    public AntWorld(){
 | 
						|
        this(Constants.DEFAULT_GRID_WIDTH, Constants.DEFAULT_GRID_HEIGHT, Constants.DEFAULT_FOOD_DENSITY);
 | 
						|
    }
 | 
						|
 | 
						|
    private static class MyAnt{
 | 
						|
        int x,y;
 | 
						|
        boolean hasFood;
 | 
						|
        boolean spawned;
 | 
						|
    }
 | 
						|
 | 
						|
    public StepResult step(DiscreteAction<AntAction> action){
 | 
						|
        AntObservation observation;
 | 
						|
        State newState;
 | 
						|
        if(!myAnt.spawned){
 | 
						|
            observation = new AntObservation(grid.getCell(grid.getStartPoint()));
 | 
						|
            newState = antAgent.feedObservation(observation);
 | 
						|
            return new StepResult(newState, 0.0, false, "Just spawned on the map");
 | 
						|
        }
 | 
						|
        switch (action.getValue()) {
 | 
						|
            case MOVE_UP:
 | 
						|
                break;
 | 
						|
            case MOVE_RIGHT:
 | 
						|
                break;
 | 
						|
            case MOVE_DOWN:
 | 
						|
                break;
 | 
						|
            case MOVE_LEFT:
 | 
						|
                break;
 | 
						|
            case PICK_UP:
 | 
						|
                break;
 | 
						|
            case DROP_DOWN:
 | 
						|
                break;
 | 
						|
            default:
 | 
						|
                throw new RuntimeException(String.format("Action <%s> is not a valid action!", action.toString()));
 | 
						|
                break;
 | 
						|
        }
 | 
						|
        newState = antAgent.feedObservation(observation);
 | 
						|
        return new StepResult(newState, 0.0, false, "");
 | 
						|
    }
 | 
						|
 | 
						|
    public void reset() {
 | 
						|
        RNG.reseed();
 | 
						|
        grid.initRandomWorld();
 | 
						|
        myAnt = new MyAnt();
 | 
						|
    }
 | 
						|
 | 
						|
    public Point getSpawningPoint(){
 | 
						|
        return grid.getStartPoint();
 | 
						|
    }
 | 
						|
}
 |