diff --git a/OptimalityAdancedDiffDiscounts.txt b/OptimalityAdancedDiffDiscounts.txt
deleted file mode 100644
index 6f3b51d..0000000
--- a/OptimalityAdancedDiffDiscounts.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-// Discount factor per row 0.99 0.95 0.9 0.7 0.5 0.3 0.1 0.05; col: 1000 foodCollected
-2227.291,293.974,167.684,117.521,96.234,76.003,42.731,85.039,34.988,51.885,50.998,28.669,41.673,52.083,30.215
-537.895,59.768,41.445,27.915,25.43,24.127,22.922,23.301,23.769,22.993,22.633,22.866,22.95,23.823,22.323
-286.021,37.013,29.013,24.655,23.493,23.244,22.643,23.159,23.582,22.924,22.508,22.844,22.951,23.686,22.229
-120.419,32.193,26.495,23.832,23.36,23.116,22.624,23.132,23.568,22.886,22.48,22.842,22.932,23.674,22.222
-93.303,31.302,26.482,23.775,23.36,23.116,22.624,23.132,23.568,22.886,22.48,22.842,22.932,23.674,22.222
-82.588,31.458,26.56,23.885,23.36,23.116,22.624,23.132,23.568,22.886,22.48,22.842,22.932,23.674,22.222
-87.161,34.038,28.755,25.818,25.476,24.604,24.102,25.556,25.074,25.03,24.418,24.88,24.432,25.524,23.53
-107.639,54.045,50.417,44.948,44.374,42.504,41.216,43.762,44.542,42.036,43.182,43.158,41.802,48.35,39.682
diff --git a/OptimalityDifferentDiscountFactorTotalTS.R b/OptimalityDifferentDiscountFactorTotalTS.R
new file mode 100644
index 0000000..7af1295
--- /dev/null
+++ b/OptimalityDifferentDiscountFactorTotalTS.R
@@ -0,0 +1,33 @@
+# Libraries
+library(ggplot2)
+library(matrixStats)
+ta <- as.matrix(read.table(file.choose(), sep=",", header = FALSE, skip = 1))
+ta <- t(ta)
+dim(ta)
+head(ta)
+# Create dummy data
+data <- data.frame(
+ y=ta[,1],
+ y2=ta[,2],
+ y3=ta[,3],
+ y4=ta[,4],
+ y5=ta[,5],
+ y6=ta[,6],
+ y7=ta[,7],
+ y8=ta[,8],
+ x=seq(1, length(ta[,1]))
+)
+ggplot(data, aes(x*1000)) +
+ labs( x ="Gesammeltes Futter", y = "Zeitstempel insgesamt", color = "Diskontierungsfaktor") +
+ #geom_hline(yintercept=23, linetype="dashed")+
+ geom_text(aes(20000,40000,label = "opt. Verhalten", vjust = -1)) +
+ geom_line(aes(y = x*1000*23), size=1)+
+ geom_line(aes(y = y, colour = "0.05"), size=1)+
+ geom_line(aes(y = y2, colour = "0.1"), size=1) +
+ geom_line(aes(y = y3, colour = "0.3"), size=1) +
+ geom_line(aes(y = y4, colour = "0.5"), size=1) +
+ geom_line(aes(y = y5, colour = "0.7"), size=1)+
+ geom_line(aes(y = y6, colour = "0.9"), size=1) +
+ geom_line(aes(y = y7, colour = "0.95"), size=1) +
+ geom_line(aes(y = y8, colour = "0.99"), size=1) +
+ theme_bw(base_size = 24)
diff --git a/OptimalityDifferentDiscountFactors.R b/OptimalityDifferentDiscountFactors.R
index 7f55e68..6a1f58a 100644
--- a/OptimalityDifferentDiscountFactors.R
+++ b/OptimalityDifferentDiscountFactors.R
@@ -5,33 +5,30 @@ ta <- as.matrix(read.table(file.choose(), sep=",", header = FALSE, skip = 1))
ta <- t(ta)
dim(ta)
head(ta)
-
+print(ta[1:20,1])
# Create dummy data
data <- data.frame(
- y=ta[,1],
- y2=ta[,2],
- y3=ta[,3],
- y4=ta[,4],
- y5=ta[,5],
- y6=ta[,6],
- y7=ta[,7],
- y8=ta[,8],
- x=seq(1, length(ta[,1]))
+ y=ta[1:20,1],
+ y2=ta[1:20,2],
+ y3=ta[1:20,3],
+ y4=ta[1:20,4],
+ y5=ta[1:20,5],
+ y6=ta[1:20,6],
+ y7=ta[1:20,7],
+ y8=ta[1:20,8],
+ x=seq(1, length(ta[1:20,1]))
)
ggplot(data, aes(x*1000)) +
labs( x ="Gesamtanzahl gesammeltes Futter", y = "Ø Zeitstempel pro Futter (log)", color = "Diskontierungsfaktor") +
scale_y_log10()+
geom_hline(yintercept=23, linetype="dashed")+
geom_text(aes(0,23,label = "opt", vjust = -1)) +
- geom_line(aes(y = y, colour = "0.99"), size=1)+
- geom_line(aes(y = y2, colour = "0.95"), size=1) +
- geom_line(aes(y = y3, colour = "0.9"), size=1) +
- geom_line(aes(y = y4, colour = "0.7"), size=1) +
- geom_line(aes(y = y5, colour = "0.5"), size=1)+
- geom_line(aes(y = y6, colour = "0.3"), size=1) +
- geom_line(aes(y = y7, colour = "0.1"), size=1) +
- geom_line(aes(y = y8, colour = "0.05"), size=1) +
+ geom_line(aes(y = y, colour = "0.05"), size=1)+
+ geom_line(aes(y = y2, colour = "0.1"), size=1) +
+ geom_line(aes(y = y3, colour = "0.3"), size=1) +
+ geom_line(aes(y = y4, colour = "0.5"), size=1) +
+ geom_line(aes(y = y5, colour = "0.7"), size=1)+
+ geom_line(aes(y = y6, colour = "0.9"), size=1) +
+ geom_line(aes(y = y7, colour = "0.95"), size=1) +
+ geom_line(aes(y = y8, colour = "0.99"), size=1) +
theme_bw(base_size = 24)
-
-
-
\ No newline at end of file
diff --git a/optDisc.png b/optDisc.png
new file mode 100644
index 0000000..8def1d3
Binary files /dev/null and b/optDisc.png differ
diff --git a/optDiscNew.txt b/optDiscNew.txt
new file mode 100644
index 0000000..40f9006
--- /dev/null
+++ b/optDiscNew.txt
@@ -0,0 +1,9 @@
+# 0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 0.99
+103.857,48.501,49.283,44.149,44.51,42.402,45.102,41.896,43.104,40.76,41.0,39.134,46.756,44.282,45.834,44.942,41.078,43.66,42.812,41.672,44.022,41.196,42.106,41.384,42.174,42.736,45.792,43.482,48.132,44.14
+86.807,32.319,29.317,26.51,25.664,24.348,24.994,23.972,24.46,24.252,24.128,23.758,25.184,24.872,25.286,24.356,24.322,24.262,24.26,24.446,24.888,24.302,24.866,24.236,24.548,24.866,25.894,24.872,26.212,24.632
+81.071,30.951,26.07,24.804,23.196,22.67,23.374,22.388,22.666,22.776,22.432,22.294,23.318,23.018,23.44,22.97,22.502,22.776,22.692,22.382,23.178,22.932,23.098,22.726,22.584,23.062,23.548,22.824,23.672,22.73
+92.437,29.962,26.044,24.465,23.196,22.67,23.374,22.388,22.666,22.776,22.432,22.294,23.318,23.018,23.44,22.97,22.502,22.776,22.692,22.382,23.178,22.932,23.098,22.726,22.584,23.062,23.548,22.824,23.672,22.73
+120.382,29.993,26.154,24.518,23.196,22.67,23.374,22.388,22.666,22.776,22.432,22.294,23.318,23.018,23.44,22.97,22.502,22.776,22.692,22.382,23.178,22.932,23.098,22.726,22.584,23.062,23.548,22.824,23.672,22.73
+281.912,37.184,28.825,26.807,23.42,23.018,23.428,22.431,22.718,22.802,22.444,22.352,23.414,23.031,23.448,22.97,22.508,22.782,22.698,22.391,23.184,22.936,23.102,22.736,22.584,23.062,23.55,22.835,23.674,22.738
+536.915,54.205,42.919,33.225,26.309,23.071,23.713,22.6,22.675,22.81,22.478,22.313,23.675,23.25,23.601,23.064,22.599,22.866,23.312,22.591,23.195,23.55,23.354,22.735,22.604,23.066,23.549,22.854,23.698,22.76
+2205.811,288.101,186.619,128.568,108.486,53.722,67.856,58.127,53.659,39.438,44.677,28.751,54.808,33.351,26.386,25.356,24.812,25.128,25.056,24.712,25.626,25.316,25.552,25.112,24.874,25.446,26.05,25.17,26.16,25.058
diff --git a/optDiscTimestampsNew.txt b/optDiscTimestampsNew.txt
new file mode 100644
index 0000000..7b3ec46
--- /dev/null
+++ b/optDiscTimestampsNew.txt
@@ -0,0 +1,9 @@
+# timestamp needed for x*1000 collected food; 0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 0.99
+103856,152357,201640,245789,290299,332701,377803,419699,462803,503563,544563,583697,630453,674735,720569,765511,806589,850249,893061,934733,978755,1019951,1062057,1103441,1145615,1188351,1234143,1277625,1325757,1369897
+86806,119125,148442,174952,200616,224964,249958,273930,298390,322642,346770,370528,395712,420584,445870,470226,494548,518810,543070,567516,592404,616706,641572,665808,690356,715222,741116,765988,792200,816832
+81070,112021,138091,162895,186091,208761,232135,254523,277189,299965,322397,344691,368009,391027,414467,437437,459939,482715,505407,527789,550967,573899,596997,619723,642307,665369,688917,711741,735413,758143
+92436,122398,148442,172907,196103,218773,242147,264535,287201,309977,332409,354703,378021,401039,424479,447449,469951,492727,515419,537801,560979,583911,607009,629735,652319,675381,698929,721753,745425,768155
+120381,150374,176528,201046,224242,246912,270286,292674,315340,338116,360548,382842,406160,429178,452618,475588,498090,520866,543558,565940,589118,612050,635148,657874,680458,703520,727068,749892,773564,796294
+281911,319095,347920,374727,398147,421165,444593,467024,489742,512544,534988,557340,580754,603785,627233,650203,672711,695493,718191,740582,763766,786702,809804,832540,855124,878186,901736,924571,948245,970983
+536914,591119,634038,667263,693572,716643,740356,762956,785631,808441,830919,853232,876907,900157,923758,946822,969421,992287,1015599,1038190,1061385,1084935,1108289,1131024,1153628,1176694,1200243,1223097,1246795,1269555
+2205810,2493911,2680530,2809098,2917584,2971306,3039162,3097289,3150948,3190386,3235063,3263814,3318622,3351973,3378359,3403715,3428527,3453655,3478711,3503423,3529049,3554365,3579917,3605029,3629903,3655349,3681399,3706569,3732729,3757787
diff --git a/optDiscTotalTS.png b/optDiscTotalTS.png
new file mode 100644
index 0000000..8a825ee
Binary files /dev/null and b/optDiscTotalTS.png differ
diff --git a/optimalityDiffDiscA.png b/optimalityDiffDiscA.png
deleted file mode 100644
index 13a3e33..0000000
Binary files a/optimalityDiffDiscA.png and /dev/null differ
diff --git a/optimalityDiffDiscount.png b/optimalityDiffDiscount.png
deleted file mode 100644
index 4893fab..0000000
Binary files a/optimalityDiffDiscount.png and /dev/null differ
diff --git a/src/main/java/core/RNG.java b/src/main/java/core/RNG.java
index 959ae59..22e9eeb 100644
--- a/src/main/java/core/RNG.java
+++ b/src/main/java/core/RNG.java
@@ -17,11 +17,12 @@ public class RNG {
private static Random rng;
private static Random rngEnv;
private static int seed = 123;
+ private static int envSeed = 13;
static {
rng = new Random();
rng.setSeed(seed);
rngEnv = new Random();
- rngEnv.setSeed(seed);
+ rngEnv.setSeed(13);
}
public static Random getRandom() {
@@ -34,7 +35,6 @@ public class RNG {
public static void setSeed(int seed){
RNG.seed = seed;
rng.setSeed(seed);
- rngEnv = new Random();
- rngEnv.setSeed(seed);
+ rngEnv.setSeed(13);
}
}
diff --git a/src/main/java/core/algo/td/QLearningOffPolicyTDControl.java b/src/main/java/core/algo/td/QLearningOffPolicyTDControl.java
index 7375891..79ba81f 100644
--- a/src/main/java/core/algo/td/QLearningOffPolicyTDControl.java
+++ b/src/main/java/core/algo/td/QLearningOffPolicyTDControl.java
@@ -32,6 +32,7 @@ public class QLearningOffPolicyTDControl extends EpisodicLearnin
@Override
protected void nextEpisode() {
+
State state = environment.reset();
try {
Thread.sleep(delay);
@@ -72,19 +73,19 @@ public class QLearningOffPolicyTDControl extends EpisodicLearnin
}*/
- if(reward == Reward.FOOD_DROP_DOWN_SUCCESS){
+ if(reward == Reward.FOOD_DROP_DOWN_SUCCESS) {
foodCollected++;
foodTimestampsTotal += timestampTilFood;
- if(foodCollected % 1000 == 0){
- System.out.println(foodTimestampsTotal / 1000f + " " + timestampCurrentEpisode);
- File file = new File(ContinuousAnt.FILE_NAME);
+ //System.out.println(foodCollected + " " + timestampCurrentEpisode);
+ File file = new File(ContinuousAnt.FILE_NAME);
+ if(foodCollected % 1000 == 0) {
try {
- Files.writeString(Path.of(file.getPath()), foodTimestampsTotal/1000f +",", StandardOpenOption.APPEND);
+ Files.writeString(Path.of(file.getPath()), timestampCurrentEpisode + ",", StandardOpenOption.APPEND);
} catch (IOException e) {
e.printStackTrace();
}
- foodTimestampsTotal = 0;
}
+ foodTimestampsTotal = 0;
if(foodCollected == 1000){
((EpsilonGreedyPolicy) this.policy).setEpsilon(0.15f);
}
@@ -98,8 +99,7 @@ public class QLearningOffPolicyTDControl extends EpisodicLearnin
System.out.println("final 0 expl");
((EpsilonGreedyPolicy) this.policy).setEpsilon(0.00f);
}
- if(foodCollected == 15000){
- File file = new File(ContinuousAnt.FILE_NAME);
+ if(foodCollected == 30000) {
try {
Files.writeString(Path.of(file.getPath()), "\n", StandardOpenOption.APPEND);
} catch (IOException e) {
diff --git a/src/main/java/example/ContinuousAnt.java b/src/main/java/example/ContinuousAnt.java
index e6d89dd..0a75ec2 100644
--- a/src/main/java/example/ContinuousAnt.java
+++ b/src/main/java/example/ContinuousAnt.java
@@ -8,31 +8,47 @@ import evironment.antGame.AntWorldContinuous;
import java.io.File;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
public class ContinuousAnt {
- public static final String FILE_NAME = "converge22.txt";
+ public static final String FILE_NAME = "optDiscTimestampsNew.txt";
public static void main(String[] args) {
- int i = 4+4+4+6+6+6+8+10+12+14+14+16+16+16+18+18+18+20+20+20+22+22+22+24+24+24+24+26+26+26+26+26+28+28+28+28+28+30+30+30+30+32+32+32+34+34+34+36+36+38+40+42;
- System.out.println(i/52f);
+ int k = 4 + 4 + 4 + 6 + 6 + 6 + 8 + 10 + 12 + 14 + 14 + 16 + 16 + 16 + 18 + 18 + 18 + 20 + 20 + 20 + 22 + 22 + 22 + 24 + 24 + 24 + 24 + 26 + 26 + 26 + 26 + 26 + 28 + 28 + 28 + 28 + 28 + 30 + 30 + 30 + 30 + 32 + 32 + 32 + 34 + 34 + 34 + 36 + 36 + 38 + 40 + 42;
+ System.out.println(k / 52f);
File file = new File(FILE_NAME);
try {
file.createNewFile();
} catch (IOException e) {
e.printStackTrace();
}
- RNG.setSeed(56);
- RLController rl = new RLController<>(
- new AntWorldContinuous(8, 8),
- Method.Q_LEARNING_OFF_POLICY_CONTROL,
- AntAction.values());
- rl.setDelay(0);
- rl.setNrOfEpisodes(1);
- //0.99 0.9 0.5
- //0.99 0.95 0.9 0.7 0.5 0.3 0.1
- rl.setDiscountFactor(0.1f);
- // 0.1, 0.3, 0.5, 0.7 0.9
- rl.setLearningRate(0.9f);
- rl.setEpsilon(0.2f);
- rl.start();
+ List discValues = new ArrayList<>() {
+ };
+ discValues.add(0.05f);
+ discValues.add(0.1f);
+ discValues.add(0.3f);
+ discValues.add(0.5f);
+ discValues.add(0.7f);
+ discValues.add(0.9f);
+ discValues.add(0.95f);
+ discValues.add(0.99f);
+
+ for(float disc : discValues) {
+ RNG.setSeed(13);
+ RLController rl = new RLController<>(
+ new AntWorldContinuous(8, 8),
+ Method.Q_LEARNING_OFF_POLICY_CONTROL,
+ AntAction.values());
+ rl.setDelay(0);
+ rl.setNrOfEpisodes(1);
+ //0.99 0.9 0.5
+ //0.99 0.95 0.9 0.7 0.5 0.3 0.1
+ rl.setDiscountFactor(disc);
+ // 0.1, 0.3, 0.5, 0.7 0.9
+ rl.setLearningRate(0.9f);
+ rl.setEpsilon(0.2f);
+ rl.start();
+ }
+
}
}