diff --git a/src/main/java/core/RNG.java b/src/main/java/core/RNG.java index 22e9eeb..e506167 100644 --- a/src/main/java/core/RNG.java +++ b/src/main/java/core/RNG.java @@ -20,9 +20,8 @@ public class RNG { private static int envSeed = 13; static { rng = new Random(); - rng.setSeed(seed); rngEnv = new Random(); - rngEnv.setSeed(13); + setSeed(seed, true); } public static Random getRandom() { @@ -32,9 +31,16 @@ public class RNG { return rngEnv; } - public static void setSeed(int seed){ + public static void setSeed(int seed, boolean setEnvRandom) { RNG.seed = seed; rng.setSeed(seed); - rngEnv.setSeed(13); + if(setEnvRandom) { + rngEnv.setSeed(13); + } } + + public static void setSeed(int seed) { + setSeed(seed, true); + } + } diff --git a/src/main/java/core/algo/EpisodicLearning.java b/src/main/java/core/algo/EpisodicLearning.java index 6e57943..885b6d0 100644 --- a/src/main/java/core/algo/EpisodicLearning.java +++ b/src/main/java/core/algo/EpisodicLearning.java @@ -5,17 +5,12 @@ import core.Environment; import core.LearningConfig; import core.StepResult; import core.listener.LearningListener; -import example.DinoSampling; import lombok.Getter; import lombok.Setter; -import java.io.File; import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardOpenOption; import java.util.ArrayList; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; @@ -80,18 +75,6 @@ public abstract class EpisodicLearning extends Learning imple private void dispatchEpisodeStart(){ ++currentEpisode; - /* - 2f 0.02 => 100 - 1.5f 0.02 => 75 - 1.4f 0.02 => fail - 1.5f 0.1 => 16 ! - */ -// if(this.policy instanceof EpsilonGreedyPolicy){ -// float ep = 2f/(float)currentEpisode; -// if(ep < 0.02) ep = 0; -// ((EpsilonGreedyPolicy) this.policy).setEpsilon(ep); -// System.out.println(ep); -// } episodesToLearn.decrementAndGet(); for(LearningListener l: learningListeners){ l.onEpisodeStart(); @@ -103,17 +86,6 @@ public abstract class EpisodicLearning extends Learning imple super.dispatchStepEnd(); timestamp++; timestampCurrentEpisode++; - // TODO: more sophisticated way to check convergence - if(false){ - // t - File file = new File(DinoSampling.FILE_NAME); - try { - Files.writeString(Path.of(file.getPath()), currentEpisode/2 + ",", StandardOpenOption.APPEND); - } catch (IOException e) { - e.printStackTrace(); - } - // System.out.println("converged after: " + currentEpisode/2 + " episode!"); - } } @Override diff --git a/src/main/java/evironment/blackjack/cards/CardDeck.java b/src/main/java/evironment/blackjack/cards/CardDeck.java index 4878b62..4db62c4 100644 --- a/src/main/java/evironment/blackjack/cards/CardDeck.java +++ b/src/main/java/evironment/blackjack/cards/CardDeck.java @@ -29,6 +29,6 @@ public class CardDeck { nextInt(int bound) returns random int value from (inclusive) 0 and EXCLUSIVE! bound */ - return cards.get(RNG.getRandom().nextInt(cards.size())); + return cards.get(RNG.getRandomEnv().nextInt(cards.size())); } } diff --git a/src/main/java/example/ContinuousAnt.java b/src/main/java/example/ContinuousAnt.java index e9d7ace..5c1df45 100644 --- a/src/main/java/example/ContinuousAnt.java +++ b/src/main/java/example/ContinuousAnt.java @@ -20,7 +20,7 @@ public class ContinuousAnt { } catch (IOException e) { e.printStackTrace(); } - RNG.setSeed(13); + RNG.setSeed(13, true); RLController rl = new RLControllerGUI<>( new AntWorldContinuous(8, 8), Method.Q_LEARNING_OFF_POLICY_CONTROL, @@ -33,7 +33,5 @@ public class ContinuousAnt { rl.setLearningRate(0.9f); rl.setEpsilon(0.2f); rl.start(); - - } } diff --git a/src/main/java/example/DinoSampling.java b/src/main/java/example/DinoSampling.java deleted file mode 100644 index 6c748ac..0000000 --- a/src/main/java/example/DinoSampling.java +++ /dev/null @@ -1,55 +0,0 @@ -package example; - -import core.RNG; -import core.algo.Method; -import core.controller.RLController; -import core.controller.RLControllerGUI; -import evironment.jumpingDino.DinoAction; -import evironment.jumpingDino.DinoWorld; - -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardOpenOption; - -public class DinoSampling { - public static final String FILE_NAME = "converge.txt"; - - public static void main(String[] args) { - File file = new File(FILE_NAME); - try { - file.createNewFile(); - } catch (IOException e) { - e.printStackTrace(); - } - for(float f = 0.05f; f <= 1.003; f += 0.05f) { - try { - Files.writeString(Path.of(file.getPath()), f + ",", StandardOpenOption.APPEND); - } catch (IOException e) { - e.printStackTrace(); - } - for(int i = 1; i <= 100; i++) { - int seed = i * 13; - System.out.println("seed: " + seed); - RNG.setSeed(seed); - - RLController rl = new RLControllerGUI<>( - new DinoWorld(), - Method.MC_CONTROL_FIRST_VISIT, - DinoAction.values()); - rl.setDelay(300); - rl.setDiscountFactor(1f); - rl.setEpsilon(0.5f); - rl.setLearningRate(0.9f); - rl.setNrOfEpisodes(400000); - rl.start(); - } - try { - Files.writeString(Path.of(file.getPath()), "\n", StandardOpenOption.APPEND); - } catch (IOException e) { - e.printStackTrace(); - } - } - } -}