Merge remote-tracking branch 'origin/antWorldRewardAnalysis' into antWorldRewardAnalysis
# Conflicts: # OptimalityDifferentDiscountFactors.R # src/main/java/core/algo/td/QLearningOffPolicyTDControl.java # src/main/java/example/ContinuousAnt.java
This commit is contained in:
commit
4402d70467
|
@ -32,7 +32,6 @@ public class QLearningOffPolicyTDControl<A extends Enum> extends EpisodicLearnin
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void nextEpisode() {
|
protected void nextEpisode() {
|
||||||
|
|
||||||
State state = environment.reset();
|
State state = environment.reset();
|
||||||
try {
|
try {
|
||||||
Thread.sleep(delay);
|
Thread.sleep(delay);
|
||||||
|
@ -99,13 +98,13 @@ public class QLearningOffPolicyTDControl<A extends Enum> extends EpisodicLearnin
|
||||||
System.out.println("final 0 expl");
|
System.out.println("final 0 expl");
|
||||||
((EpsilonGreedyPolicy<A>) this.policy).setEpsilon(0.00f);
|
((EpsilonGreedyPolicy<A>) this.policy).setEpsilon(0.00f);
|
||||||
}
|
}
|
||||||
if(foodCollected == 30000) {
|
if(foodCollected == 15000){
|
||||||
try {
|
try {
|
||||||
Files.writeString(Path.of(file.getPath()), "\n", StandardOpenOption.APPEND);
|
Files.writeString(Path.of(file.getPath()), "\n", StandardOpenOption.APPEND);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
// return;
|
return;
|
||||||
}
|
}
|
||||||
iterations++;
|
iterations++;
|
||||||
timestampTilFood = 0;
|
timestampTilFood = 0;
|
||||||
|
|
|
@ -11,10 +11,10 @@ import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
public class ContinuousAnt {
|
public class ContinuousAnt {
|
||||||
public static final String FILE_NAME = "optDiscTimestampsNew.txt";
|
public static final String FILE_NAME = "converge22.txt";
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
int k = 4 + 4 + 4 + 6 + 6 + 6 + 8 + 10 + 12 + 14 + 14 + 16 + 16 + 16 + 18 + 18 + 18 + 20 + 20 + 20 + 22 + 22 + 22 + 24 + 24 + 24 + 24 + 26 + 26 + 26 + 26 + 26 + 28 + 28 + 28 + 28 + 28 + 30 + 30 + 30 + 30 + 32 + 32 + 32 + 34 + 34 + 34 + 36 + 36 + 38 + 40 + 42;
|
int i = 4+4+4+6+6+6+8+10+12+14+14+16+16+16+18+18+18+20+20+20+22+22+22+24+24+24+24+26+26+26+26+26+28+28+28+28+28+30+30+30+30+32+32+32+34+34+34+36+36+38+40+42;
|
||||||
System.out.println(k / 52f);
|
System.out.println(i/52f);
|
||||||
File file = new File(FILE_NAME);
|
File file = new File(FILE_NAME);
|
||||||
try {
|
try {
|
||||||
file.createNewFile();
|
file.createNewFile();
|
||||||
|
|
|
@ -1,30 +0,0 @@
|
||||||
import core.RNG;
|
|
||||||
import core.algo.Method;
|
|
||||||
import core.controller.RLController;
|
|
||||||
import core.controller.RLControllerGUI;
|
|
||||||
import evironment.jumpingDino.DinoAction;
|
|
||||||
import evironment.jumpingDino.DinoWorld;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
public class MCFirstVisit {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test if the action sequence is deterministic
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void deterministicActionSequence(){
|
|
||||||
RNG.setSeed(55);
|
|
||||||
|
|
||||||
RLController<DinoAction> rl = new RLControllerGUI<>(
|
|
||||||
new DinoWorld(false, false),
|
|
||||||
Method.MC_CONTROL_FIRST_VISIT,
|
|
||||||
DinoAction.values());
|
|
||||||
|
|
||||||
rl.setDelay(10);
|
|
||||||
rl.setDiscountFactor(1f);
|
|
||||||
rl.setEpsilon(0.1f);
|
|
||||||
rl.setLearningRate(0.8f);
|
|
||||||
rl.setNrOfEpisodes(4000000);
|
|
||||||
rl.start();
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue