diff --git a/AdvancedZ3B1MonteCarlo.png b/AdvancedZ3B1MonteCarlo.png new file mode 100644 index 0000000..6d2eb8b Binary files /dev/null and b/AdvancedZ3B1MonteCarlo.png differ diff --git a/AntAnalysis05Disc.png b/AntAnalysis05Disc.png new file mode 100644 index 0000000..41d8204 Binary files /dev/null and b/AntAnalysis05Disc.png differ diff --git a/AntAnalysis099Disc.png b/AntAnalysis099Disc.png new file mode 100644 index 0000000..1b42886 Binary files /dev/null and b/AntAnalysis099Disc.png differ diff --git a/AntAnalysis09Disc.png b/AntAnalysis09Disc.png new file mode 100644 index 0000000..a5b57f4 Binary files /dev/null and b/AntAnalysis09Disc.png differ diff --git a/NeededTimestampsFromEverywhere.png b/NeededTimestampsFromEverywhere.png new file mode 100644 index 0000000..9d5d89e Binary files /dev/null and b/NeededTimestampsFromEverywhere.png differ diff --git a/SimpleZ1B2QLearning.png b/SimpleZ1B2QLearning.png new file mode 100644 index 0000000..f64036e Binary files /dev/null and b/SimpleZ1B2QLearning.png differ diff --git a/SimpleZ1B2SARSA.png b/SimpleZ1B2SARSA.png new file mode 100644 index 0000000..8932118 Binary files /dev/null and b/SimpleZ1B2SARSA.png differ diff --git a/SimpleZ2B1MonteCarlo.png b/SimpleZ2B1MonteCarlo.png new file mode 100644 index 0000000..a888931 Binary files /dev/null and b/SimpleZ2B1MonteCarlo.png differ diff --git a/SimpleZ2B1QLearning.png b/SimpleZ2B1QLearning.png new file mode 100644 index 0000000..24830b5 Binary files /dev/null and b/SimpleZ2B1QLearning.png differ diff --git a/SimpleZ2B1QLearningMaxValueAsDefault.png b/SimpleZ2B1QLearningMaxValueAsDefault.png new file mode 100644 index 0000000..b0fc2d3 Binary files /dev/null and b/SimpleZ2B1QLearningMaxValueAsDefault.png differ diff --git a/SimpleZ2B2QLearning.png b/SimpleZ2B2QLearning.png new file mode 100644 index 0000000..ca533e1 Binary files /dev/null and b/SimpleZ2B2QLearning.png differ diff --git a/SimpleZ2B2SARSA.png b/SimpleZ2B2SARSA.png new file mode 100644 index 0000000..6efe083 Binary files /dev/null and b/SimpleZ2B2SARSA.png differ diff --git a/SimpleZ2B4QLearning.png b/SimpleZ2B4QLearning.png new file mode 100644 index 0000000..8090ffe Binary files /dev/null and b/SimpleZ2B4QLearning.png differ diff --git a/convResAdvScale.png b/convResAdvScale.png deleted file mode 100644 index 7cce6c5..0000000 Binary files a/convResAdvScale.png and /dev/null differ diff --git a/convResultAdv.png b/convResultAdv.png deleted file mode 100644 index 0fe903a..0000000 Binary files a/convResultAdv.png and /dev/null differ diff --git a/convResultSimple.png b/convResultSimple.png deleted file mode 100644 index dc1e2e4..0000000 Binary files a/convResultSimple.png and /dev/null differ diff --git a/convergenceAdvanced.txt b/convergenceAdvanced.txt index 827d701..5ae1340 100644 --- a/convergenceAdvanced.txt +++ b/convergenceAdvanced.txt @@ -17,5 +17,5 @@ 0.8,19870,3288,13724,4492,8159,5058,16764,5648,9462,19071,3914,1242,8262,26004,4036,9421,4914,2535,5362,7298,9587,37133,1837,35325,15272,14922,14138,7115,17236,5123,12157,37380,6086,37390,1672,15573,14241,2049,2602,6802,22362,7936,7544,5330,13155,16016,4544,1489,3780,6326,7794,31553,2808,1493,7788,12646,30464,22312,1681,12084,4163,2197,7950,22478,5106,26771,4382,10615,2586,12214,4799,6297,7589,4585,30365,32302,15734,5480,8626,7387,11932,4245,21532,1710,12737,7132,4740,14578,10680,8266,17300,4213,3264,35920,38026,10272,3984,2279,9739,33900 0.85,5493,10568,19366,5705,15430,8183,5721,13314,36667,33059,3753,40243,23888,25085,21843,6856,2803,9434,4794,29944,10730,39271,4484,23990,6350,16180,8099,4298,11220,4624,5946,24895,8464,4416,6619,2800,4081,12459,1981,12488,6380,9597,10328,1901,24563,13059,3639,12988,2604,4440,22666,1775,4078,5175,1144,3759,11119,1856,34970,10831,2229,5333,17121,9698,14919,2353,3963,8189,36145,13920,5301,16516,2446,46848,3985,,20640,151501,17556,1882,44216,39795,1638,57957,62050,3130,3693,5563,9780,3327,22969,39357,13749,37555,60070,9249,35426,4405,8340,18973 0.9,27355,24592,18962,2318,17604,35725,14327,38167,25602,50236,4999,9023,5562,7541,11799,25139,8724,12642,28509,57095,2147,5909,5414,12572,10018,68830,45393,18962,51656,25601,3444,45667,16813,57110,16492,3991,7315,17775,69277,34769,29824,11087,26371,3479,2540,9597,32593,13169,8588,2794,40136,56004,65307,24864,35523,19491,2673,5363,4799,5852,28566,42427,44011,40146,3757,1115,49574,5798,24249,2576,118943,6169,65584,7057,49505,116138,52083,1809,127776,3214,25689,103442,15260,62754,12390,3233,35309,68989,6615,30593,2503,29359,98237,11900,3240,64969,84134,25361,7384,13141 -0.95,24269,14543,6828,3800,41079,47279,27177,17286,9802,7114,3756,85275,14507,34993,15139,15184,90742,27554,23713,6453,15157,7045,8048,47550,84540,93729,68601,6274,4713,30578,5024,94239,7315,8193,46871,96466,3695,70915,62947,32258,66228,2114,5084,12686,62905,19158,20940,36270,9037,34034,15016,15530,46276,11063,8586,15635,7196,70708,50836,22464,13463,86986,43541,2001,40565,28534,44700,5625,6552,16140,2450,8492,3304,22904,20951,100472,131147,131728,43674,514,79827,181148,31431,4761,1515,2075,138139,137795,71014170145,60000,42790,179835,18982,48085,28398,56788,126115,5442,118289,9386 +0.95,24269,14543,6828,3800,41079,47279,27177,17286,9802,7114,3756,85275,14507,34993,15139,15184,90742,27554,23713,6453,15157,7045,8048,47550,84540,93729,68601,6274,4713,30578,5024,94239,7315,8193,46871,96466,3695,70915,62947,32258,66228,2114,5084,12686,62905,19158,20940,36270,9037,34034,15016,15530,46276,11063,8586,15635,7196,70708,50836,22464,13463,86986,43541,2001,40565,28534,44700,5625,6552,16140,2450,8492,3304,22904,20951,100472,131147,131728,43674,514,79827,181148,31431,4761,1515,2075,138139,137795,71014,170145,60000,42790,179835,18982,48085,28398,56788,126115,5442,118289,9386 1.0,11364,6363,8012,109822,19730,8425,21388,7864,18427,34072,3126,52381,35105,86487,73913,88033,76264,105864,30103,9522,31049,3180,4838,4078,133687,39236,59239,22968,21540,98395,109063,4050,5612,4990,9933,83766,140114,116077,135653,130826,130070,92207,14994,87801,1577,70868,133816,79790,1587,23322,22071,13903,3584,9721,,38605,52375,67392,10075,97733,46173,29647,2558,28151,162569,4054,10537,30871,45538,97835,45132,35042,70203,3862,100614,84525,140691,81880,80914,35187,11596,51448,2945,56551,39236,84707,64324,100588,78645,12929,32701,63306,163991,2864,34802,72929,198161,71332,98627,137754 diff --git a/epsilonValues.R b/epsilonValues.R index ff0bd53..3c8291c 100644 --- a/epsilonValues.R +++ b/epsilonValues.R @@ -12,14 +12,51 @@ data <- data.frame( y2=ta[,2], y3=ta[,3], y4=ta[,4], - x=seq(1, length(ta)) + y5=ta[,5], + y6=ta[,6], + y7=ta[,7], + y8=ta[,8], + y9=ta[,9], + y10=ta[,10], + y11=ta[,11], + y12=ta[,12], + y13=ta[,13], + y14=ta[,14], + y15=ta[,15], + x=seq(1, length(ta[,1])) ) -ggplot(data, aes(x)) + - geom_line(aes(y = y, colour = "var0")) + - geom_line(aes(y = y2, colour = "var1")) + - geom_line(aes(y = y3, colour = "var2")) + - geom_line(aes(y = y4, colour = "var3")) + - scale_x_log10( breaks=c(1,5,10,15,20,50,100,200), limits=c(1,200) ) +ggplot(data, aes(x*1000)) + + labs(title="Discount factor = 0.99", + x ="Timestamp", y = "Avg. reward per timestamp", color = "Learning rate") + + ylim(-1.5,0.6) + + geom_line(aes(y = y, colour = "0.1"), size=1)+ + geom_line(aes(y = y2, colour = "0.3"), size=1) + + geom_line(aes(y = y3, colour = "0.5"), size=1) + + geom_line(aes(y = y4, colour = "0.7"), size=1) + + geom_line(aes(y = y5, colour = "0.9"), size=1) + +ggplot(data, aes(x*1000)) + + labs(title="Discount factor = 0.9", + x ="Timestamp", y = "Avg. reward per timestamp", color = "Learning rate") + + ylim(-1.5,0.6) + + geom_line(aes(y = y6, colour = "0.1"), size=1) + + geom_line(aes(y = y7, colour = "0.3"), size=1) + + geom_line(aes(y = y8, colour = "0.5"), size=1) + + geom_line(aes(y = y9, colour = "0.7"), size=1) + + geom_line(aes(y = y10, colour = "0.9"), size=1) + +ggplot(data, aes(x*1000) ) + + labs(title="Discount factor = 0.5", + x ="Timestamp", y = "Avg. reward per timestamp", color = "Learning rate") + + ylim(-1.5,0.6) + + geom_line(aes(y = y11, colour = "0.1"), size=1) + + geom_line(aes(y = y12, colour = "0.3"), size=1) + + geom_line(aes(y = y13, colour = "0.5"), size=1) + + geom_line(aes(y = y14, colour = "0.7"), size=1) + + geom_line(aes(y = y15, colour = "0.9"), size=1) + + # scale_x_log10(limits=c(1,150) ) + # scale_y_log10( breaks=c(1,50,500,2500,25000), limits=c(1,25000) ) plot(ta, x=x*1000, log="x", type="o") diff --git a/optimalityDiffDiscount.png b/optimalityDiffDiscount.png new file mode 100644 index 0000000..4893fab Binary files /dev/null and b/optimalityDiffDiscount.png differ diff --git a/src/main/java/core/algo/td/QLearningOffPolicyTDControl.java b/src/main/java/core/algo/td/QLearningOffPolicyTDControl.java index 4adfef3..89e0bc8 100644 --- a/src/main/java/core/algo/td/QLearningOffPolicyTDControl.java +++ b/src/main/java/core/algo/td/QLearningOffPolicyTDControl.java @@ -48,6 +48,8 @@ public class QLearningOffPolicyTDControl extends EpisodicLearnin int timestampTilFood = 0; int rewardsPer1000 = 0; int foodCollected = 0; + int iterations = 0; + int foodTimestampsTotal= 0; while(envResult == null || !envResult.isDone()) { actionValues = stateActionTable.getActionValues(state); A action = policy.chooseAction(actionValues); @@ -57,11 +59,10 @@ public class QLearningOffPolicyTDControl extends EpisodicLearnin double reward = envResult.getReward(); State nextState = envResult.getState(); sumOfRewards += reward; - rewardsPer1000+=reward; timestampTilFood++; - if(foodCollected == 10000){ + /* if(iterations == 100){ File file = new File(ContinuousAnt.FILE_NAME); try { Files.writeString(Path.of(file.getPath()), "\n", StandardOpenOption.APPEND); @@ -69,17 +70,46 @@ public class QLearningOffPolicyTDControl extends EpisodicLearnin e.printStackTrace(); } return; - } + }*/ + + if(reward == Reward.FOOD_DROP_DOWN_SUCCESS){ foodCollected++; - File file = new File(ContinuousAnt.FILE_NAME); - try { - Files.writeString(Path.of(file.getPath()), timestampTilFood + ",", StandardOpenOption.APPEND); - } catch (IOException e) { - e.printStackTrace(); + foodTimestampsTotal += timestampTilFood; + if(foodCollected % 1000 == 0){ + System.out.println(foodTimestampsTotal/1000f); + File file = new File(ContinuousAnt.FILE_NAME); + try { + Files.writeString(Path.of(file.getPath()), foodTimestampsTotal/1000f +",", StandardOpenOption.APPEND); + } catch (IOException e) { + e.printStackTrace(); + } + foodTimestampsTotal = 0; } + if(foodCollected == 1000){ + ((EpsilonGreedyPolicy) this.policy).setEpsilon(0.15f); + } + if(foodCollected == 2000){ + ((EpsilonGreedyPolicy) this.policy).setEpsilon(0.10f); + } + if(foodCollected == 3000){ + ((EpsilonGreedyPolicy) this.policy).setEpsilon(0.05f); + } + if(foodCollected == 4000){ + System.out.println("final 0 expl"); + ((EpsilonGreedyPolicy) this.policy).setEpsilon(0.00f); + } + if(foodCollected == 15000){ + File file = new File(ContinuousAnt.FILE_NAME); + try { + Files.writeString(Path.of(file.getPath()), "\n", StandardOpenOption.APPEND); + } catch (IOException e) { + e.printStackTrace(); + } + return; + } + iterations++; timestampTilFood = 0; - rewardsPer1000 = 0; } // Q Update diff --git a/src/main/java/evironment/antGame/Reward.java b/src/main/java/evironment/antGame/Reward.java index 05e7a1f..3a910b8 100644 --- a/src/main/java/evironment/antGame/Reward.java +++ b/src/main/java/evironment/antGame/Reward.java @@ -1,13 +1,13 @@ package evironment.antGame; public class Reward { - public static final double FOOD_PICK_UP_SUCCESS = 1; + public static final double FOOD_PICK_UP_SUCCESS = 0; public static final double FOOD_PICK_UP_FAIL_NO_FOOD = -1; public static final double FOOD_PICK_UP_FAIL_HAS_FOOD_ALREADY = -1; public static final double FOOD_DROP_DOWN_FAIL_NO_FOOD = -1; public static final double FOOD_DROP_DOWN_FAIL_NOT_START = -1; - public static final double FOOD_DROP_DOWN_SUCCESS = 40; + public static final double FOOD_DROP_DOWN_SUCCESS = 1; public static final double UNKNOWN_FIELD_EXPLORED = 0; diff --git a/src/main/java/example/ContinuousAnt.java b/src/main/java/example/ContinuousAnt.java index 9005956..08308fa 100644 --- a/src/main/java/example/ContinuousAnt.java +++ b/src/main/java/example/ContinuousAnt.java @@ -12,8 +12,10 @@ import java.io.File; import java.io.IOException; public class ContinuousAnt { - public static final String FILE_NAME = "converge05.txt"; + public static final String FILE_NAME = "converge22.txt"; public static void main(String[] args) { + int i = 4+4+4+6+6+6+8+10+12+14+14+16+16+16+18+18+18+20+20+20+22+22+22+24+24+24+24+26+26+26+26+26+28+28+28+28+28+30+30+30+30+32+32+32+34+34+34+36+36+38+40+42; + System.out.println(i/52f); File file = new File(FILE_NAME); try { file.createNewFile(); @@ -27,9 +29,12 @@ public class ContinuousAnt { AntAction.values()); rl.setDelay(0); rl.setNrOfEpisodes(1); - rl.setDiscountFactor(0.7f); - rl.setLearningRate(0.2f); - rl.setEpsilon(0.5f); + //0.99 0.9 0.5 + //0.99 0.95 0.9 0.7 0.5 0.3 0.1 + rl.setDiscountFactor(0.05f); + // 0.1, 0.3, 0.5, 0.7 0.9 + rl.setLearningRate(0.9f); + rl.setEpsilon(0.2f); rl.start(); } } diff --git a/src/main/java/example/JumpingDino.java b/src/main/java/example/JumpingDino.java index b738fbb..0a03b16 100644 --- a/src/main/java/example/JumpingDino.java +++ b/src/main/java/example/JumpingDino.java @@ -17,9 +17,9 @@ public class JumpingDino { DinoAction.values()); rl.setDelay(0); - rl.setDiscountFactor(1f); + rl.setDiscountFactor(9f); rl.setEpsilon(0.05f); - rl.setLearningRate(1f); + rl.setLearningRate(0.8f); rl.setNrOfEpisodes(100000); rl.start(); }