Merge remote-tracking branch 'origin/antWorldRewardAnalysis' into antWorldRewardAnalysis
# Conflicts: # OptimalityDifferentDiscountFactors.R # src/main/java/core/algo/td/QLearningOffPolicyTDControl.java # src/main/java/example/ContinuousAnt.java
This commit is contained in:
commit
4402d70467
|
@ -32,7 +32,6 @@ public class QLearningOffPolicyTDControl<A extends Enum> extends EpisodicLearnin
|
|||
|
||||
@Override
|
||||
protected void nextEpisode() {
|
||||
|
||||
State state = environment.reset();
|
||||
try {
|
||||
Thread.sleep(delay);
|
||||
|
@ -99,13 +98,13 @@ public class QLearningOffPolicyTDControl<A extends Enum> extends EpisodicLearnin
|
|||
System.out.println("final 0 expl");
|
||||
((EpsilonGreedyPolicy<A>) this.policy).setEpsilon(0.00f);
|
||||
}
|
||||
if(foodCollected == 30000) {
|
||||
if(foodCollected == 15000){
|
||||
try {
|
||||
Files.writeString(Path.of(file.getPath()), "\n", StandardOpenOption.APPEND);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
// return;
|
||||
return;
|
||||
}
|
||||
iterations++;
|
||||
timestampTilFood = 0;
|
||||
|
|
|
@ -11,10 +11,10 @@ import java.io.File;
|
|||
import java.io.IOException;
|
||||
|
||||
public class ContinuousAnt {
|
||||
public static final String FILE_NAME = "optDiscTimestampsNew.txt";
|
||||
public static final String FILE_NAME = "converge22.txt";
|
||||
public static void main(String[] args) {
|
||||
int k = 4 + 4 + 4 + 6 + 6 + 6 + 8 + 10 + 12 + 14 + 14 + 16 + 16 + 16 + 18 + 18 + 18 + 20 + 20 + 20 + 22 + 22 + 22 + 24 + 24 + 24 + 24 + 26 + 26 + 26 + 26 + 26 + 28 + 28 + 28 + 28 + 28 + 30 + 30 + 30 + 30 + 32 + 32 + 32 + 34 + 34 + 34 + 36 + 36 + 38 + 40 + 42;
|
||||
System.out.println(k / 52f);
|
||||
int i = 4+4+4+6+6+6+8+10+12+14+14+16+16+16+18+18+18+20+20+20+22+22+22+24+24+24+24+26+26+26+26+26+28+28+28+28+28+30+30+30+30+32+32+32+34+34+34+36+36+38+40+42;
|
||||
System.out.println(i/52f);
|
||||
File file = new File(FILE_NAME);
|
||||
try {
|
||||
file.createNewFile();
|
||||
|
|
|
@ -1,30 +0,0 @@
|
|||
import core.RNG;
|
||||
import core.algo.Method;
|
||||
import core.controller.RLController;
|
||||
import core.controller.RLControllerGUI;
|
||||
import evironment.jumpingDino.DinoAction;
|
||||
import evironment.jumpingDino.DinoWorld;
|
||||
import org.junit.Test;
|
||||
|
||||
public class MCFirstVisit {
|
||||
|
||||
/**
|
||||
* Test if the action sequence is deterministic
|
||||
*/
|
||||
@Test
|
||||
public void deterministicActionSequence(){
|
||||
RNG.setSeed(55);
|
||||
|
||||
RLController<DinoAction> rl = new RLControllerGUI<>(
|
||||
new DinoWorld(false, false),
|
||||
Method.MC_CONTROL_FIRST_VISIT,
|
||||
DinoAction.values());
|
||||
|
||||
rl.setDelay(10);
|
||||
rl.setDiscountFactor(1f);
|
||||
rl.setEpsilon(0.1f);
|
||||
rl.setLearningRate(0.8f);
|
||||
rl.setNrOfEpisodes(4000000);
|
||||
rl.start();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue