Merge remote-tracking branch 'origin/antWorldRewardAnalysis' into antWorldRewardAnalysis

# Conflicts:
#	OptimalityDifferentDiscountFactors.R
#	src/main/java/core/algo/td/QLearningOffPolicyTDControl.java
#	src/main/java/example/ContinuousAnt.java
This commit is contained in:
Jan Löwenstrom 2020-04-05 12:03:23 +02:00
commit 4402d70467
3 changed files with 5 additions and 36 deletions

View File

@ -32,7 +32,6 @@ public class QLearningOffPolicyTDControl<A extends Enum> extends EpisodicLearnin
@Override
protected void nextEpisode() {
State state = environment.reset();
try {
Thread.sleep(delay);
@ -99,13 +98,13 @@ public class QLearningOffPolicyTDControl<A extends Enum> extends EpisodicLearnin
System.out.println("final 0 expl");
((EpsilonGreedyPolicy<A>) this.policy).setEpsilon(0.00f);
}
if(foodCollected == 30000) {
if(foodCollected == 15000){
try {
Files.writeString(Path.of(file.getPath()), "\n", StandardOpenOption.APPEND);
} catch (IOException e) {
e.printStackTrace();
}
// return;
return;
}
iterations++;
timestampTilFood = 0;

View File

@ -11,10 +11,10 @@ import java.io.File;
import java.io.IOException;
public class ContinuousAnt {
public static final String FILE_NAME = "optDiscTimestampsNew.txt";
public static final String FILE_NAME = "converge22.txt";
public static void main(String[] args) {
int k = 4 + 4 + 4 + 6 + 6 + 6 + 8 + 10 + 12 + 14 + 14 + 16 + 16 + 16 + 18 + 18 + 18 + 20 + 20 + 20 + 22 + 22 + 22 + 24 + 24 + 24 + 24 + 26 + 26 + 26 + 26 + 26 + 28 + 28 + 28 + 28 + 28 + 30 + 30 + 30 + 30 + 32 + 32 + 32 + 34 + 34 + 34 + 36 + 36 + 38 + 40 + 42;
System.out.println(k / 52f);
int i = 4+4+4+6+6+6+8+10+12+14+14+16+16+16+18+18+18+20+20+20+22+22+22+24+24+24+24+26+26+26+26+26+28+28+28+28+28+30+30+30+30+32+32+32+34+34+34+36+36+38+40+42;
System.out.println(i/52f);
File file = new File(FILE_NAME);
try {
file.createNewFile();

View File

@ -1,30 +0,0 @@
import core.RNG;
import core.algo.Method;
import core.controller.RLController;
import core.controller.RLControllerGUI;
import evironment.jumpingDino.DinoAction;
import evironment.jumpingDino.DinoWorld;
import org.junit.Test;
public class MCFirstVisit {
/**
* Test if the action sequence is deterministic
*/
@Test
public void deterministicActionSequence(){
RNG.setSeed(55);
RLController<DinoAction> rl = new RLControllerGUI<>(
new DinoWorld(false, false),
Method.MC_CONTROL_FIRST_VISIT,
DinoAction.values());
rl.setDelay(10);
rl.setDiscountFactor(1f);
rl.setEpsilon(0.1f);
rl.setLearningRate(0.8f);
rl.setNrOfEpisodes(4000000);
rl.start();
}
}