diff --git a/OptimalityAdancedDiffDiscounts.txt b/OptimalityAdancedDiffDiscounts.txt deleted file mode 100644 index 6f3b51d..0000000 --- a/OptimalityAdancedDiffDiscounts.txt +++ /dev/null @@ -1,9 +0,0 @@ -// Discount factor per row 0.99 0.95 0.9 0.7 0.5 0.3 0.1 0.05; col: 1000 foodCollected -2227.291,293.974,167.684,117.521,96.234,76.003,42.731,85.039,34.988,51.885,50.998,28.669,41.673,52.083,30.215 -537.895,59.768,41.445,27.915,25.43,24.127,22.922,23.301,23.769,22.993,22.633,22.866,22.95,23.823,22.323 -286.021,37.013,29.013,24.655,23.493,23.244,22.643,23.159,23.582,22.924,22.508,22.844,22.951,23.686,22.229 -120.419,32.193,26.495,23.832,23.36,23.116,22.624,23.132,23.568,22.886,22.48,22.842,22.932,23.674,22.222 -93.303,31.302,26.482,23.775,23.36,23.116,22.624,23.132,23.568,22.886,22.48,22.842,22.932,23.674,22.222 -82.588,31.458,26.56,23.885,23.36,23.116,22.624,23.132,23.568,22.886,22.48,22.842,22.932,23.674,22.222 -87.161,34.038,28.755,25.818,25.476,24.604,24.102,25.556,25.074,25.03,24.418,24.88,24.432,25.524,23.53 -107.639,54.045,50.417,44.948,44.374,42.504,41.216,43.762,44.542,42.036,43.182,43.158,41.802,48.35,39.682 diff --git a/OptimalityDifferentDiscountFactorTotalTS.R b/OptimalityDifferentDiscountFactorTotalTS.R new file mode 100644 index 0000000..7af1295 --- /dev/null +++ b/OptimalityDifferentDiscountFactorTotalTS.R @@ -0,0 +1,33 @@ +# Libraries +library(ggplot2) +library(matrixStats) +ta <- as.matrix(read.table(file.choose(), sep=",", header = FALSE, skip = 1)) +ta <- t(ta) +dim(ta) +head(ta) +# Create dummy data +data <- data.frame( + y=ta[,1], + y2=ta[,2], + y3=ta[,3], + y4=ta[,4], + y5=ta[,5], + y6=ta[,6], + y7=ta[,7], + y8=ta[,8], + x=seq(1, length(ta[,1])) +) +ggplot(data, aes(x*1000)) + + labs( x ="Gesammeltes Futter", y = "Zeitstempel insgesamt", color = "Diskontierungsfaktor") + + #geom_hline(yintercept=23, linetype="dashed")+ + geom_text(aes(20000,40000,label = "opt. Verhalten", vjust = -1)) + + geom_line(aes(y = x*1000*23), size=1)+ + geom_line(aes(y = y, colour = "0.05"), size=1)+ + geom_line(aes(y = y2, colour = "0.1"), size=1) + + geom_line(aes(y = y3, colour = "0.3"), size=1) + + geom_line(aes(y = y4, colour = "0.5"), size=1) + + geom_line(aes(y = y5, colour = "0.7"), size=1)+ + geom_line(aes(y = y6, colour = "0.9"), size=1) + + geom_line(aes(y = y7, colour = "0.95"), size=1) + + geom_line(aes(y = y8, colour = "0.99"), size=1) + + theme_bw(base_size = 24) diff --git a/OptimalityDifferentDiscountFactors.R b/OptimalityDifferentDiscountFactors.R index 7f55e68..9ced426 100644 --- a/OptimalityDifferentDiscountFactors.R +++ b/OptimalityDifferentDiscountFactors.R @@ -1,37 +1,34 @@ # Libraries library(ggplot2) library(matrixStats) -ta <- as.matrix(read.table(file.choose(), sep=",", header = FALSE, skip = 1)) -ta <- t(ta) +#ta <- as.matrix(read.table(file.choose(), sep=",", header = FALSE, skip = 1)) +#ta <- t(ta) dim(ta) head(ta) - +print(ta[1:20,1]) # Create dummy data data <- data.frame( - y=ta[,1], - y2=ta[,2], - y3=ta[,3], - y4=ta[,4], - y5=ta[,5], - y6=ta[,6], - y7=ta[,7], - y8=ta[,8], - x=seq(1, length(ta[,1])) + y=ta[1:20,1], + y2=ta[1:20,2], + y3=ta[1:20,3], + y4=ta[1:20,4], + y5=ta[1:20,5], + y6=ta[1:20,6], + y7=ta[1:20,7], + y8=ta[1:20,8], + x=seq(1, length(ta[1:20,1])) ) ggplot(data, aes(x*1000)) + labs( x ="Gesamtanzahl gesammeltes Futter", y = "Ø Zeitstempel pro Futter (log)", color = "Diskontierungsfaktor") + scale_y_log10()+ geom_hline(yintercept=23, linetype="dashed")+ geom_text(aes(0,23,label = "opt", vjust = -1)) + - geom_line(aes(y = y, colour = "0.99"), size=1)+ - geom_line(aes(y = y2, colour = "0.95"), size=1) + - geom_line(aes(y = y3, colour = "0.9"), size=1) + - geom_line(aes(y = y4, colour = "0.7"), size=1) + - geom_line(aes(y = y5, colour = "0.5"), size=1)+ - geom_line(aes(y = y6, colour = "0.3"), size=1) + - geom_line(aes(y = y7, colour = "0.1"), size=1) + - geom_line(aes(y = y8, colour = "0.05"), size=1) + + geom_line(aes(y = y, colour = "0.05"), size=1)+ + geom_line(aes(y = y2, colour = "0.1"), size=1) + + geom_line(aes(y = y3, colour = "0.3"), size=1) + + geom_line(aes(y = y4, colour = "0.5"), size=1) + + geom_line(aes(y = y5, colour = "0.7"), size=1)+ + geom_line(aes(y = y6, colour = "0.9"), size=1) + + geom_line(aes(y = y7, colour = "0.95"), size=1) + + geom_line(aes(y = y8, colour = "0.99"), size=1) + theme_bw(base_size = 24) - - - \ No newline at end of file diff --git a/optDiscNew.txt b/optDiscNew.txt new file mode 100644 index 0000000..40f9006 --- /dev/null +++ b/optDiscNew.txt @@ -0,0 +1,9 @@ +# 0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 0.99 +103.857,48.501,49.283,44.149,44.51,42.402,45.102,41.896,43.104,40.76,41.0,39.134,46.756,44.282,45.834,44.942,41.078,43.66,42.812,41.672,44.022,41.196,42.106,41.384,42.174,42.736,45.792,43.482,48.132,44.14 +86.807,32.319,29.317,26.51,25.664,24.348,24.994,23.972,24.46,24.252,24.128,23.758,25.184,24.872,25.286,24.356,24.322,24.262,24.26,24.446,24.888,24.302,24.866,24.236,24.548,24.866,25.894,24.872,26.212,24.632 +81.071,30.951,26.07,24.804,23.196,22.67,23.374,22.388,22.666,22.776,22.432,22.294,23.318,23.018,23.44,22.97,22.502,22.776,22.692,22.382,23.178,22.932,23.098,22.726,22.584,23.062,23.548,22.824,23.672,22.73 +92.437,29.962,26.044,24.465,23.196,22.67,23.374,22.388,22.666,22.776,22.432,22.294,23.318,23.018,23.44,22.97,22.502,22.776,22.692,22.382,23.178,22.932,23.098,22.726,22.584,23.062,23.548,22.824,23.672,22.73 +120.382,29.993,26.154,24.518,23.196,22.67,23.374,22.388,22.666,22.776,22.432,22.294,23.318,23.018,23.44,22.97,22.502,22.776,22.692,22.382,23.178,22.932,23.098,22.726,22.584,23.062,23.548,22.824,23.672,22.73 +281.912,37.184,28.825,26.807,23.42,23.018,23.428,22.431,22.718,22.802,22.444,22.352,23.414,23.031,23.448,22.97,22.508,22.782,22.698,22.391,23.184,22.936,23.102,22.736,22.584,23.062,23.55,22.835,23.674,22.738 +536.915,54.205,42.919,33.225,26.309,23.071,23.713,22.6,22.675,22.81,22.478,22.313,23.675,23.25,23.601,23.064,22.599,22.866,23.312,22.591,23.195,23.55,23.354,22.735,22.604,23.066,23.549,22.854,23.698,22.76 +2205.811,288.101,186.619,128.568,108.486,53.722,67.856,58.127,53.659,39.438,44.677,28.751,54.808,33.351,26.386,25.356,24.812,25.128,25.056,24.712,25.626,25.316,25.552,25.112,24.874,25.446,26.05,25.17,26.16,25.058 diff --git a/optDiscTimestampsNew.txt b/optDiscTimestampsNew.txt new file mode 100644 index 0000000..7b3ec46 --- /dev/null +++ b/optDiscTimestampsNew.txt @@ -0,0 +1,9 @@ +# timestamp needed for x*1000 collected food; 0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 0.99 +103856,152357,201640,245789,290299,332701,377803,419699,462803,503563,544563,583697,630453,674735,720569,765511,806589,850249,893061,934733,978755,1019951,1062057,1103441,1145615,1188351,1234143,1277625,1325757,1369897 +86806,119125,148442,174952,200616,224964,249958,273930,298390,322642,346770,370528,395712,420584,445870,470226,494548,518810,543070,567516,592404,616706,641572,665808,690356,715222,741116,765988,792200,816832 +81070,112021,138091,162895,186091,208761,232135,254523,277189,299965,322397,344691,368009,391027,414467,437437,459939,482715,505407,527789,550967,573899,596997,619723,642307,665369,688917,711741,735413,758143 +92436,122398,148442,172907,196103,218773,242147,264535,287201,309977,332409,354703,378021,401039,424479,447449,469951,492727,515419,537801,560979,583911,607009,629735,652319,675381,698929,721753,745425,768155 +120381,150374,176528,201046,224242,246912,270286,292674,315340,338116,360548,382842,406160,429178,452618,475588,498090,520866,543558,565940,589118,612050,635148,657874,680458,703520,727068,749892,773564,796294 +281911,319095,347920,374727,398147,421165,444593,467024,489742,512544,534988,557340,580754,603785,627233,650203,672711,695493,718191,740582,763766,786702,809804,832540,855124,878186,901736,924571,948245,970983 +536914,591119,634038,667263,693572,716643,740356,762956,785631,808441,830919,853232,876907,900157,923758,946822,969421,992287,1015599,1038190,1061385,1084935,1108289,1131024,1153628,1176694,1200243,1223097,1246795,1269555 +2205810,2493911,2680530,2809098,2917584,2971306,3039162,3097289,3150948,3190386,3235063,3263814,3318622,3351973,3378359,3403715,3428527,3453655,3478711,3503423,3529049,3554365,3579917,3605029,3629903,3655349,3681399,3706569,3732729,3757787 diff --git a/optDiscTotalTS.png b/optDiscTotalTS.png new file mode 100644 index 0000000..8a825ee Binary files /dev/null and b/optDiscTotalTS.png differ diff --git a/optimalityDiffDiscA.png b/optimalityDiffDiscA.png deleted file mode 100644 index 13a3e33..0000000 Binary files a/optimalityDiffDiscA.png and /dev/null differ diff --git a/optimalityDiffDiscount.png b/optimalityDiffDiscount.png deleted file mode 100644 index 4893fab..0000000 Binary files a/optimalityDiffDiscount.png and /dev/null differ diff --git a/src/main/java/core/RNG.java b/src/main/java/core/RNG.java index 959ae59..22e9eeb 100644 --- a/src/main/java/core/RNG.java +++ b/src/main/java/core/RNG.java @@ -17,11 +17,12 @@ public class RNG { private static Random rng; private static Random rngEnv; private static int seed = 123; + private static int envSeed = 13; static { rng = new Random(); rng.setSeed(seed); rngEnv = new Random(); - rngEnv.setSeed(seed); + rngEnv.setSeed(13); } public static Random getRandom() { @@ -34,7 +35,6 @@ public class RNG { public static void setSeed(int seed){ RNG.seed = seed; rng.setSeed(seed); - rngEnv = new Random(); - rngEnv.setSeed(seed); + rngEnv.setSeed(13); } } diff --git a/src/main/java/core/algo/td/QLearningOffPolicyTDControl.java b/src/main/java/core/algo/td/QLearningOffPolicyTDControl.java index 7375891..79ba81f 100644 --- a/src/main/java/core/algo/td/QLearningOffPolicyTDControl.java +++ b/src/main/java/core/algo/td/QLearningOffPolicyTDControl.java @@ -32,6 +32,7 @@ public class QLearningOffPolicyTDControl extends EpisodicLearnin @Override protected void nextEpisode() { + State state = environment.reset(); try { Thread.sleep(delay); @@ -72,19 +73,19 @@ public class QLearningOffPolicyTDControl extends EpisodicLearnin }*/ - if(reward == Reward.FOOD_DROP_DOWN_SUCCESS){ + if(reward == Reward.FOOD_DROP_DOWN_SUCCESS) { foodCollected++; foodTimestampsTotal += timestampTilFood; - if(foodCollected % 1000 == 0){ - System.out.println(foodTimestampsTotal / 1000f + " " + timestampCurrentEpisode); - File file = new File(ContinuousAnt.FILE_NAME); + //System.out.println(foodCollected + " " + timestampCurrentEpisode); + File file = new File(ContinuousAnt.FILE_NAME); + if(foodCollected % 1000 == 0) { try { - Files.writeString(Path.of(file.getPath()), foodTimestampsTotal/1000f +",", StandardOpenOption.APPEND); + Files.writeString(Path.of(file.getPath()), timestampCurrentEpisode + ",", StandardOpenOption.APPEND); } catch (IOException e) { e.printStackTrace(); } - foodTimestampsTotal = 0; } + foodTimestampsTotal = 0; if(foodCollected == 1000){ ((EpsilonGreedyPolicy) this.policy).setEpsilon(0.15f); } @@ -98,8 +99,7 @@ public class QLearningOffPolicyTDControl extends EpisodicLearnin System.out.println("final 0 expl"); ((EpsilonGreedyPolicy) this.policy).setEpsilon(0.00f); } - if(foodCollected == 15000){ - File file = new File(ContinuousAnt.FILE_NAME); + if(foodCollected == 30000) { try { Files.writeString(Path.of(file.getPath()), "\n", StandardOpenOption.APPEND); } catch (IOException e) { diff --git a/src/main/java/example/ContinuousAnt.java b/src/main/java/example/ContinuousAnt.java index e6d89dd..0a75ec2 100644 --- a/src/main/java/example/ContinuousAnt.java +++ b/src/main/java/example/ContinuousAnt.java @@ -8,31 +8,47 @@ import evironment.antGame.AntWorldContinuous; import java.io.File; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; public class ContinuousAnt { - public static final String FILE_NAME = "converge22.txt"; + public static final String FILE_NAME = "optDiscTimestampsNew.txt"; public static void main(String[] args) { - int i = 4+4+4+6+6+6+8+10+12+14+14+16+16+16+18+18+18+20+20+20+22+22+22+24+24+24+24+26+26+26+26+26+28+28+28+28+28+30+30+30+30+32+32+32+34+34+34+36+36+38+40+42; - System.out.println(i/52f); + int k = 4 + 4 + 4 + 6 + 6 + 6 + 8 + 10 + 12 + 14 + 14 + 16 + 16 + 16 + 18 + 18 + 18 + 20 + 20 + 20 + 22 + 22 + 22 + 24 + 24 + 24 + 24 + 26 + 26 + 26 + 26 + 26 + 28 + 28 + 28 + 28 + 28 + 30 + 30 + 30 + 30 + 32 + 32 + 32 + 34 + 34 + 34 + 36 + 36 + 38 + 40 + 42; + System.out.println(k / 52f); File file = new File(FILE_NAME); try { file.createNewFile(); } catch (IOException e) { e.printStackTrace(); } - RNG.setSeed(56); - RLController rl = new RLController<>( - new AntWorldContinuous(8, 8), - Method.Q_LEARNING_OFF_POLICY_CONTROL, - AntAction.values()); - rl.setDelay(0); - rl.setNrOfEpisodes(1); - //0.99 0.9 0.5 - //0.99 0.95 0.9 0.7 0.5 0.3 0.1 - rl.setDiscountFactor(0.1f); - // 0.1, 0.3, 0.5, 0.7 0.9 - rl.setLearningRate(0.9f); - rl.setEpsilon(0.2f); - rl.start(); + List discValues = new ArrayList<>() { + }; + discValues.add(0.05f); + discValues.add(0.1f); + discValues.add(0.3f); + discValues.add(0.5f); + discValues.add(0.7f); + discValues.add(0.9f); + discValues.add(0.95f); + discValues.add(0.99f); + + for(float disc : discValues) { + RNG.setSeed(13); + RLController rl = new RLController<>( + new AntWorldContinuous(8, 8), + Method.Q_LEARNING_OFF_POLICY_CONTROL, + AntAction.values()); + rl.setDelay(0); + rl.setNrOfEpisodes(1); + //0.99 0.9 0.5 + //0.99 0.95 0.9 0.7 0.5 0.3 0.1 + rl.setDiscountFactor(disc); + // 0.1, 0.3, 0.5, 0.7 0.9 + rl.setLearningRate(0.9f); + rl.setEpsilon(0.2f); + rl.start(); + } + } }