diff --git a/.idea/misc.xml b/.idea/misc.xml index 384e5d8..a19b0c6 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -11,7 +11,7 @@ - + \ No newline at end of file diff --git a/AdvancedZ3B1MonteCarlo.png b/AdvancedZ3B1MonteCarlo.png deleted file mode 100644 index 6d2eb8b..0000000 Binary files a/AdvancedZ3B1MonteCarlo.png and /dev/null differ diff --git a/AntAnalysis05Disc.png b/AntAnalysis05Disc.png deleted file mode 100644 index 41d8204..0000000 Binary files a/AntAnalysis05Disc.png and /dev/null differ diff --git a/AntAnalysis099Disc.png b/AntAnalysis099Disc.png deleted file mode 100644 index 1b42886..0000000 Binary files a/AntAnalysis099Disc.png and /dev/null differ diff --git a/AntAnalysis09Disc.png b/AntAnalysis09Disc.png deleted file mode 100644 index a5b57f4..0000000 Binary files a/AntAnalysis09Disc.png and /dev/null differ diff --git a/src/main/java/core/algo/Learning.java b/src/main/java/core/algo/Learning.java index 1bb5207..20a9ec1 100644 --- a/src/main/java/core/algo/Learning.java +++ b/src/main/java/core/algo/Learning.java @@ -16,8 +16,6 @@ import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.concurrent.CopyOnWriteArrayList; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; /** * @@ -99,7 +97,7 @@ public abstract class Learning{ public void save(ObjectOutputStream oos) throws IOException { oos.writeObject(rewardHistory); - oos.writeObject(stateActionTable); + // oos.writeObject(stateActionTable); } public void load(ObjectInputStream ois) throws IOException, ClassNotFoundException { diff --git a/src/main/java/core/algo/td/QLearningOffPolicyTDControl.java b/src/main/java/core/algo/td/QLearningOffPolicyTDControl.java index 89e0bc8..7375891 100644 --- a/src/main/java/core/algo/td/QLearningOffPolicyTDControl.java +++ b/src/main/java/core/algo/td/QLearningOffPolicyTDControl.java @@ -7,7 +7,6 @@ import core.policy.GreedyPolicy; import core.policy.Policy; import evironment.antGame.Reward; import example.ContinuousAnt; -import example.DinoSampling; import java.io.File; import java.io.IOException; @@ -77,7 +76,7 @@ public class QLearningOffPolicyTDControl extends EpisodicLearnin foodCollected++; foodTimestampsTotal += timestampTilFood; if(foodCollected % 1000 == 0){ - System.out.println(foodTimestampsTotal/1000f); + System.out.println(foodTimestampsTotal / 1000f + " " + timestampCurrentEpisode); File file = new File(ContinuousAnt.FILE_NAME); try { Files.writeString(Path.of(file.getPath()), foodTimestampsTotal/1000f +",", StandardOpenOption.APPEND); diff --git a/src/main/java/evironment/antGame/AntWorld.java b/src/main/java/evironment/antGame/AntWorld.java index 9f35bfc..0a71a31 100644 --- a/src/main/java/evironment/antGame/AntWorld.java +++ b/src/main/java/evironment/antGame/AntWorld.java @@ -54,7 +54,7 @@ public class AntWorld implements Environment, Visualizable { protected StepCalculation processStep(AntAction action) { StepCalculation sc = new StepCalculation(); - sc.reward = -1; + sc.reward = Reward.DEFAULT_REWARD; sc.info = ""; sc.done = false; Cell currentCell = grid.getCell(myAnt.getPos()); diff --git a/src/main/java/evironment/antGame/Reward.java b/src/main/java/evironment/antGame/Reward.java index 3a910b8..c2e008d 100644 --- a/src/main/java/evironment/antGame/Reward.java +++ b/src/main/java/evironment/antGame/Reward.java @@ -1,6 +1,7 @@ package evironment.antGame; public class Reward { + public static final double DEFAULT_REWARD = -1; public static final double FOOD_PICK_UP_SUCCESS = 0; public static final double FOOD_PICK_UP_FAIL_NO_FOOD = -1; public static final double FOOD_PICK_UP_FAIL_HAS_FOOD_ALREADY = -1; diff --git a/src/main/java/evironment/jumpingDino/DinoWorld.java b/src/main/java/evironment/jumpingDino/DinoWorld.java index 06bb60f..5b60cd7 100644 --- a/src/main/java/evironment/jumpingDino/DinoWorld.java +++ b/src/main/java/evironment/jumpingDino/DinoWorld.java @@ -44,7 +44,7 @@ public class DinoWorld implements Environment, Visualizable { @Override public StepResultEnvironment step(DinoAction action) { boolean done = false; - int reward = 0; + int reward = 1; if(action == DinoAction.JUMP){ dino.jump(); @@ -68,7 +68,7 @@ public class DinoWorld implements Environment, Visualizable { spawnNewObstacle(); } if(ranIntoObstacle()) { - reward = -1; + reward = 0; done = true; } diff --git a/src/main/java/example/ContinuousAnt.java b/src/main/java/example/ContinuousAnt.java index 08308fa..e6d89dd 100644 --- a/src/main/java/example/ContinuousAnt.java +++ b/src/main/java/example/ContinuousAnt.java @@ -3,10 +3,8 @@ package example; import core.RNG; import core.algo.Method; import core.controller.RLController; -import core.controller.RLControllerGUI; import evironment.antGame.AntAction; import evironment.antGame.AntWorldContinuous; -import evironment.antGame.AntWorldContinuousOriginalState; import java.io.File; import java.io.IOException; @@ -31,7 +29,7 @@ public class ContinuousAnt { rl.setNrOfEpisodes(1); //0.99 0.9 0.5 //0.99 0.95 0.9 0.7 0.5 0.3 0.1 - rl.setDiscountFactor(0.05f); + rl.setDiscountFactor(0.1f); // 0.1, 0.3, 0.5, 0.7 0.9 rl.setLearningRate(0.9f); rl.setEpsilon(0.2f); diff --git a/src/main/java/example/DinoSampling.java b/src/main/java/example/DinoSampling.java index 12fcdbc..c4bbde7 100644 --- a/src/main/java/example/DinoSampling.java +++ b/src/main/java/example/DinoSampling.java @@ -6,7 +6,6 @@ import core.controller.RLController; import core.controller.RLControllerGUI; import evironment.jumpingDino.DinoAction; import evironment.jumpingDino.DinoWorld; -import evironment.jumpingDino.DinoWorldAdvanced; import java.io.File; import java.io.IOException; @@ -34,13 +33,13 @@ public class DinoSampling { System.out.println("seed: " + i * 13); RNG.setSeed(i * 13); - RLController rl = new RLController<>( - new DinoWorldAdvanced(), - Method.Q_LEARNING_OFF_POLICY_CONTROL, + RLController rl = new RLControllerGUI<>( + new DinoWorld(), + Method.MC_CONTROL_FIRST_VISIT, DinoAction.values()); - rl.setDelay(0); - rl.setDiscountFactor(0.99f); - rl.setEpsilon(f); + rl.setDelay(300); + rl.setDiscountFactor(1f); + rl.setEpsilon(0.5f); rl.setLearningRate(0.9f); rl.setNrOfEpisodes(400000); rl.start();