diff --git a/SimpleZ1B2QLearning.png b/SimpleZ1B2QLearning.png deleted file mode 100644 index f64036e..0000000 Binary files a/SimpleZ1B2QLearning.png and /dev/null differ diff --git a/SimpleZ1B2SARSA.png b/SimpleZ1B2SARSA.png deleted file mode 100644 index 8932118..0000000 Binary files a/SimpleZ1B2SARSA.png and /dev/null differ diff --git a/SimpleZ1B2SarsaA.png b/SimpleZ1B2SarsaA.png deleted file mode 100644 index 2ac3f0f..0000000 Binary files a/SimpleZ1B2SarsaA.png and /dev/null differ diff --git a/SimpleZ2B1MonteCarlo.png b/SimpleZ2B1MonteCarlo.png deleted file mode 100644 index a888931..0000000 Binary files a/SimpleZ2B1MonteCarlo.png and /dev/null differ diff --git a/SimpleZ2B1MonteCarloA.png b/SimpleZ2B1MonteCarloA.png deleted file mode 100644 index e2d8257..0000000 Binary files a/SimpleZ2B1MonteCarloA.png and /dev/null differ diff --git a/SimpleZ2B1QLearning.png b/SimpleZ2B1QLearning.png deleted file mode 100644 index 24830b5..0000000 Binary files a/SimpleZ2B1QLearning.png and /dev/null differ diff --git a/SimpleZ2B1QLearningMaxValueAsDefault.png b/SimpleZ2B1QLearningMaxValueAsDefault.png deleted file mode 100644 index b0fc2d3..0000000 Binary files a/SimpleZ2B1QLearningMaxValueAsDefault.png and /dev/null differ diff --git a/SimpleZ2B2QLearning.png b/SimpleZ2B2QLearning.png deleted file mode 100644 index ca533e1..0000000 Binary files a/SimpleZ2B2QLearning.png and /dev/null differ diff --git a/SimpleZ2B2SARSA.png b/SimpleZ2B2SARSA.png deleted file mode 100644 index 6efe083..0000000 Binary files a/SimpleZ2B2SARSA.png and /dev/null differ diff --git a/SimpleZ2B4QLearning.png b/SimpleZ2B4QLearning.png deleted file mode 100644 index 8090ffe..0000000 Binary files a/SimpleZ2B4QLearning.png and /dev/null differ diff --git a/epsilonValues.R b/epsilonValues.R deleted file mode 100644 index 3c8291c..0000000 --- a/epsilonValues.R +++ /dev/null @@ -1,84 +0,0 @@ -# Libraries -library(ggplot2) -library(matrixStats) -ta <- as.matrix(read.table(file.choose(), sep=",", header = FALSE)) -ta <- t(ta) -dim(ta) -head(ta) - -# Create dummy data -data <- data.frame( - y=ta[,1], - y2=ta[,2], - y3=ta[,3], - y4=ta[,4], - y5=ta[,5], - y6=ta[,6], - y7=ta[,7], - y8=ta[,8], - y9=ta[,9], - y10=ta[,10], - y11=ta[,11], - y12=ta[,12], - y13=ta[,13], - y14=ta[,14], - y15=ta[,15], - x=seq(1, length(ta[,1])) -) -ggplot(data, aes(x*1000)) + - labs(title="Discount factor = 0.99", - x ="Timestamp", y = "Avg. reward per timestamp", color = "Learning rate") + - ylim(-1.5,0.6) + - geom_line(aes(y = y, colour = "0.1"), size=1)+ - geom_line(aes(y = y2, colour = "0.3"), size=1) + - geom_line(aes(y = y3, colour = "0.5"), size=1) + - geom_line(aes(y = y4, colour = "0.7"), size=1) + - geom_line(aes(y = y5, colour = "0.9"), size=1) - -ggplot(data, aes(x*1000)) + - labs(title="Discount factor = 0.9", - x ="Timestamp", y = "Avg. reward per timestamp", color = "Learning rate") + - ylim(-1.5,0.6) + - geom_line(aes(y = y6, colour = "0.1"), size=1) + - geom_line(aes(y = y7, colour = "0.3"), size=1) + - geom_line(aes(y = y8, colour = "0.5"), size=1) + - geom_line(aes(y = y9, colour = "0.7"), size=1) + - geom_line(aes(y = y10, colour = "0.9"), size=1) - -ggplot(data, aes(x*1000) ) + - labs(title="Discount factor = 0.5", - x ="Timestamp", y = "Avg. reward per timestamp", color = "Learning rate") + - ylim(-1.5,0.6) + - geom_line(aes(y = y11, colour = "0.1"), size=1) + - geom_line(aes(y = y12, colour = "0.3"), size=1) + - geom_line(aes(y = y13, colour = "0.5"), size=1) + - geom_line(aes(y = y14, colour = "0.7"), size=1) + - geom_line(aes(y = y15, colour = "0.9"), size=1) - - # scale_x_log10(limits=c(1,150) ) - # scale_y_log10( breaks=c(1,50,500,2500,25000), limits=c(1,25000) ) - -plot(ta, x=x*1000, log="x", type="o") - -convergence <- read.csv(file.choose(), header=FALSE, row.names=1) - -sds <- rowSds(sapply(convergence[,-1], `length<-`, max(lengths(convergence[,-1]))), na.rm=TRUE) -men <- rowMeans(sapply(convergence[,-1], `length<-`, max(lengths(convergence[,-1]))), na.rm=TRUE) -print(sds) - -# create dummy data -data <- data.frame( - names=rownames(convergence), - means=men, - sds=sds -) - -ggplot(data) + - geom_bar(aes(x=names, y=means), stat="identity", fill="skyblue", alpha=0.7) + - geom_errorbar( aes(x=names, ymin=means-sds, ymax=means+sds), width=0.4, colour="orange", alpha=0.9, size=1.3) + - geom_text(aes(label=as.integer(means), x =names, y=means), position=position_dodge(width=0.9), vjust=-0.25) + - xlab("Epsilon") + ylab("avg. amount of episodes until convergence") - - -ba <- barplot(names=rownames(convergence), height=men, ylim=c(0, max(men)*1.2), ylab = "avg. episodes until convergence", xlab = "epsilon value") -text(x = ba, y = men, label = as.integer(men), pos = 3, cex = 0.8, col = "red") diff --git a/EpsilonAnalysis.R b/rScripts/avgEpisodesUntilConvergence.R similarity index 92% rename from EpsilonAnalysis.R rename to rScripts/avgEpisodesUntilConvergence.R index 2e260ab..2c75632 100644 --- a/EpsilonAnalysis.R +++ b/rScripts/avgEpisodesUntilConvergence.R @@ -7,7 +7,7 @@ sds <- rowSds(sapply(convergence[,-1], `length<-`, max(lengths(convergence[,-1]) men <- rowMeans(sapply(convergence[,-1], `length<-`, max(lengths(convergence[,-1]))), na.rm=TRUE) print(sds) -# create dummy data +# create data frame data <- data.frame( names=rownames(convergence), means=men, @@ -20,6 +20,6 @@ convergence.m <- melt(convergence, id.vars = "groups") ggplot(data) + geom_bar(aes(x=names, y=means, fill=means), stat="identity", colour="black", alpha=0.8) + geom_errorbar( aes(x=names, ymin=means, ymax=means+sds), width=0.4, colour="black", alpha=0.8, size=0.6) + - ylim(0,104000) + + # ylim(0,104000) + xlab("Epsilon") + ylab("Ø Episoden bis Konvergenz") + theme_bw(base_size = 24) \ No newline at end of file diff --git a/DiscAndLRonQLearningAdvanced.R b/rScripts/avgRewardPerTimestampDiffLRandDiscQ.R similarity index 97% rename from DiscAndLRonQLearningAdvanced.R rename to rScripts/avgRewardPerTimestampDiffLRandDiscQ.R index f840762..6ee30c3 100644 --- a/DiscAndLRonQLearningAdvanced.R +++ b/rScripts/avgRewardPerTimestampDiffLRandDiscQ.R @@ -6,7 +6,7 @@ ta <- t(ta) dim(ta) head(ta) -# Create dummy data +# Create data frame data <- data.frame( y=ta[,1], y2=ta[,2], diff --git a/OptimalityDifferentDiscountFactors.R b/rScripts/avgTimestampsPerFood.R similarity index 94% rename from OptimalityDifferentDiscountFactors.R rename to rScripts/avgTimestampsPerFood.R index 6a1f58a..4d09818 100644 --- a/OptimalityDifferentDiscountFactors.R +++ b/rScripts/avgTimestampsPerFood.R @@ -5,8 +5,8 @@ ta <- as.matrix(read.table(file.choose(), sep=",", header = FALSE, skip = 1)) ta <- t(ta) dim(ta) head(ta) -print(ta[1:20,1]) -# Create dummy data + +# Create data frame data <- data.frame( y=ta[1:20,1], y2=ta[1:20,2], @@ -20,7 +20,7 @@ data <- data.frame( ) ggplot(data, aes(x*1000)) + labs( x ="Gesamtanzahl gesammeltes Futter", y = "Ø Zeitstempel pro Futter (log)", color = "Diskontierungsfaktor") + - scale_y_log10()+ + #scale_y_log10()+ geom_hline(yintercept=23, linetype="dashed")+ geom_text(aes(0,23,label = "opt", vjust = -1)) + geom_line(aes(y = y, colour = "0.05"), size=1)+ diff --git a/OptimalityDifferentDiscountFactorTotalTS.R b/rScripts/totalTimestampsWhileCollecting.R similarity index 97% rename from OptimalityDifferentDiscountFactorTotalTS.R rename to rScripts/totalTimestampsWhileCollecting.R index 7af1295..1d34138 100644 --- a/OptimalityDifferentDiscountFactorTotalTS.R +++ b/rScripts/totalTimestampsWhileCollecting.R @@ -5,7 +5,8 @@ ta <- as.matrix(read.table(file.choose(), sep=",", header = FALSE, skip = 1)) ta <- t(ta) dim(ta) head(ta) -# Create dummy data + +# Create data frame data <- data.frame( y=ta[,1], y2=ta[,2], diff --git a/antAnalysisAvgReward.txt b/rawResults/antAnalysisAvgReward.txt similarity index 100% rename from antAnalysisAvgReward.txt rename to rawResults/antAnalysisAvgReward.txt diff --git a/optDiscNew.txt b/rawResults/avgTimestampsPerFoodDiffDiscounts.txt similarity index 91% rename from optDiscNew.txt rename to rawResults/avgTimestampsPerFoodDiffDiscounts.txt index 40f9006..7b7ecd1 100644 --- a/optDiscNew.txt +++ b/rawResults/avgTimestampsPerFoodDiffDiscounts.txt @@ -1,4 +1,4 @@ -# 0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 0.99 +# Average Timestamps needed to collect one food piece, mean over 1000 collected food; Discount factor for rows: 0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 0.99 103.857,48.501,49.283,44.149,44.51,42.402,45.102,41.896,43.104,40.76,41.0,39.134,46.756,44.282,45.834,44.942,41.078,43.66,42.812,41.672,44.022,41.196,42.106,41.384,42.174,42.736,45.792,43.482,48.132,44.14 86.807,32.319,29.317,26.51,25.664,24.348,24.994,23.972,24.46,24.252,24.128,23.758,25.184,24.872,25.286,24.356,24.322,24.262,24.26,24.446,24.888,24.302,24.866,24.236,24.548,24.866,25.894,24.872,26.212,24.632 81.071,30.951,26.07,24.804,23.196,22.67,23.374,22.388,22.666,22.776,22.432,22.294,23.318,23.018,23.44,22.97,22.502,22.776,22.692,22.382,23.178,22.932,23.098,22.726,22.584,23.062,23.548,22.824,23.672,22.73 diff --git a/convergencSimpleQLearning099disc09LR.txt b/rawResults/convergencSimpleQLearning099disc09LR.txt similarity index 100% rename from convergencSimpleQLearning099disc09LR.txt rename to rawResults/convergencSimpleQLearning099disc09LR.txt diff --git a/convergenceAdvanced.txt b/rawResults/convergenceAdvancedMC.txt similarity index 99% rename from convergenceAdvanced.txt rename to rawResults/convergenceAdvancedMC.txt index 9099183..4ecea2c 100644 --- a/convergenceAdvanced.txt +++ b/rawResults/convergenceAdvancedMC.txt @@ -1,4 +1,4 @@ -# 4 speeds, 4 distance. Distance, inJump and incomming speed; Monte Carlo +# 4 speeds, 4 distance. Distance, inJump and incoming speed; Monte Carlo 0.05,8565,3803,4010,30744,5419,1733,30335,4070,7502,7880,12776,2242,10489,4384,23578,14718,17801,7770,4097,5905,4027,5969,8492,1994,1542,18577,2346,13550,2279,5072,39861,15109,4419,10970,6150,2359,3093,7131,5932,18531,6887,22726,4925,3320,6832,13704,10372,25447,22945,28991,4526,12461,1831,20964,21963,2749,7112,3260,33941,14907,9168,7133,6590,21075,5302,2546,10164,3101,3204,7850,40776,38750,32898,14204,4431,12038,13430,13976,7385,7647,9634,12230,6635,9532,4181,12857,10473,2609,3827,4800,4316,5114,4153,6282,4737,5078,7358,11660,4750,16256 0.1,1550,4416,3473,2821,2101,3635,5168,9641,7074,5623,2571,2253,2595,4347,24575,4239,4552,6372,6638,2686,33218,1976,7638,2479,3555,4496,2321,11712,1934,3668,9398,1312,4422,3217,9472,7053,5539,2140,2928,1576,12121,5023,2458,5947,2374,10197,1701,6186,7013,3061,3108,1856,4052,2327,2287,2026,1606,1567,2414,3756,2648,3409,4195,4803,11940,2171,1574,2656,3221,2044,2250,1528,1348,7785,3141,6622,25696,4520,2118,2489,7125,4182,1495,2475,5037,3839,4218,1929,2302,5339,1827,5444,3710,47322,2555,2373,2108,1550,4416,3473 0.15,2328,3189,5422,1943,2057,1630,1405,2079,8071,22741,4056,1704,2727,2339,10381,1246,1831,6658,2760,9416,4036,1243,3716,1199,8678,5480,4038,42601,1133,3576,3852,4014,1487,5849,4275,3766,2240,3917,2164,3695,1954,1869,1854,1751,2785,2334,2821,5942,1310,1680,7867,1987,2345,1785,3898,1598,4382,2881,4874,2345,2483,3962,8058,10204,6444,2885,8494,5135,7150,1041,8020,24960,1720,45430,3546,3751,2077,2269,6051,5372,4247,10739,7114,1169,1757,1162,2699,1765,1367,3226,2290,3695,1756,2452,7385,2599,1426,7600,15285,4712 diff --git a/convergenceAdvancedMCnegRew.txt b/rawResults/convergenceAdvancedMCnegRew.txt similarity index 100% rename from convergenceAdvancedMCnegRew.txt rename to rawResults/convergenceAdvancedMCnegRew.txt diff --git a/convergenceSimple.txt b/rawResults/convergenceSimpleMC.txt similarity index 100% rename from convergenceSimple.txt rename to rawResults/convergenceSimpleMC.txt diff --git a/convergenceSimpleNegRewInJumpQLearning099disc09LR.txt b/rawResults/convergenceSimpleNegRewInJumpQLearning099disc09LR.txt similarity index 98% rename from convergenceSimpleNegRewInJumpQLearning099disc09LR.txt rename to rawResults/convergenceSimpleNegRewInJumpQLearning099disc09LR.txt index e8c8644..aca0ba4 100644 --- a/convergenceSimpleNegRewInJumpQLearning099disc09LR.txt +++ b/rawResults/convergenceSimpleNegRewInJumpQLearning099disc09LR.txt @@ -1,4 +1,4 @@ -# Simple, Dist and inJump, QLearning 0.99 disc 0.9 Learning rade, -1 reward when in jump and jumping +# Simple, Dist and inJump, QLearning 0.99 disc 0.9 Learning rate, -1 reward when in jump and jumping 0.05,6,6,10,8,8,7,6,10,8,6,8,7,8,6,10,10,9,9,12,7,7,8,9,11,8,12,7,8,8,8,9,8,10,7,10,8,9,9,7,7,8,7,7,5,11,8,8,7,8,7,8,8,7,7,9,8,6,5,10,10,7,10,8,7,7,7,7,11,8,9,10,8,10,8,7,6,5,8,7,8,8,8,3,8,9,9,4,8,6,11,7,8,8,8,9,8,12,7,10,9 0.1,6,6,8,10,8,7,6,7,9,6,8,10,8,7,10,22,7,11,9,9,8,14,8,29,8,10,7,7,9,7,10,8,11,7,9,9,10,7,7,14,7,7,10,9,5,6,10,8,9,10,8,6,10,9,10,8,6,18,9,32,6,10,8,7,8,7,7,5,8,9,8,8,8,8,6,6,5,9,7,6,8,8,6,12,9,9,4,8,6,11,8,8,9,7,8,7,10,8,12,37 0.15,7,6,5,9,7,7,6,6,8,15,7,7,13,8,8,16,7,16,12,12,8,21,7,11,7,26,4,7,10,7,10,8,7,7,12,10,6,10,6,6,6,15,20,9,5,8,8,7,10,10,7,48,10,8,12,7,7,6,8,11,6,10,9,7,8,35,8,5,10,5,14,14,10,8,12,8,7,7,7,8,8,7,7,20,9,13,4,10,6,26,12,8,8,10,7,8,11,63,13,16 diff --git a/convergenceSimpleNoJumpInfo.txt b/rawResults/convergenceSimpleNoJumpInfoMCfirstVisit.txt similarity index 96% rename from convergenceSimpleNoJumpInfo.txt rename to rawResults/convergenceSimpleNoJumpInfoMCfirstVisit.txt index c4e74ce..cf26515 100644 --- a/convergenceSimpleNoJumpInfo.txt +++ b/rawResults/convergenceSimpleNoJumpInfoMCfirstVisit.txt @@ -1,4 +1,4 @@ -#same speed and distance. ONLY DISTANCE TO OBSTACLE. 100 trials, rest did not convergec; MONTE CARLO +#same speed and distance. ONLY DISTANCE TO OBSTACLE. 100 trials, rest did not converged!; MONTE CARLO 0.05,8,64,62,15,84,5,72,183 0.1,15,15,6,18,46,28,307 0.15,177,20,12,9,21,4,6,65,173 diff --git a/convergenceSimpleNoJumpingQLearning099disc09LR.txt b/rawResults/convergenceSimpleNoJumpingQLearning099disc09LR.txt similarity index 100% rename from convergenceSimpleNoJumpingQLearning099disc09LR.txt rename to rawResults/convergenceSimpleNoJumpingQLearning099disc09LR.txt diff --git a/convergenceSimpleNoJumpingSARSA099disc09LR.txt b/rawResults/convergenceSimpleNoJumpingSARSA099disc09LR.txt similarity index 100% rename from convergenceSimpleNoJumpingSARSA099disc09LR.txt rename to rawResults/convergenceSimpleNoJumpingSARSA099disc09LR.txt diff --git a/convergenceSimpleQ099Disc09LRSameRewardsAsMC.txt b/rawResults/convergenceSimpleQ099Disc09LRSameRewardsAsMC.txt similarity index 100% rename from convergenceSimpleQ099Disc09LRSameRewardsAsMC.txt rename to rawResults/convergenceSimpleQ099Disc09LRSameRewardsAsMC.txt diff --git a/convergenceSimpleQ099Disc09LRSameRewardsAsMCMinus1DefaultValue.txt b/rawResults/convergenceSimpleQ099Disc09LRSameRewardsAsMCMinus1DefaultValue.txt similarity index 100% rename from convergenceSimpleQ099Disc09LRSameRewardsAsMCMinus1DefaultValue.txt rename to rawResults/convergenceSimpleQ099Disc09LRSameRewardsAsMCMinus1DefaultValue.txt diff --git a/convergenceSimpleSARSA099disc09LR.txt b/rawResults/convergenceSimpleSARSA099disc09LR.txt similarity index 100% rename from convergenceSimpleSARSA099disc09LR.txt rename to rawResults/convergenceSimpleSARSA099disc09LR.txt diff --git a/optDiscTimestampsNew.txt b/rawResults/totalTimestampsNeededForFoodDiffDiscounts.txt similarity index 93% rename from optDiscTimestampsNew.txt rename to rawResults/totalTimestampsNeededForFoodDiffDiscounts.txt index 7b3ec46..dda8f33 100644 --- a/optDiscTimestampsNew.txt +++ b/rawResults/totalTimestampsNeededForFoodDiffDiscounts.txt @@ -1,4 +1,4 @@ -# timestamp needed for x*1000 collected food; 0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 0.99 +# timestamp needed for x*1000 collected food; Discount factor for rows: 0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 0.99 103856,152357,201640,245789,290299,332701,377803,419699,462803,503563,544563,583697,630453,674735,720569,765511,806589,850249,893061,934733,978755,1019951,1062057,1103441,1145615,1188351,1234143,1277625,1325757,1369897 86806,119125,148442,174952,200616,224964,249958,273930,298390,322642,346770,370528,395712,420584,445870,470226,494548,518810,543070,567516,592404,616706,641572,665808,690356,715222,741116,765988,792200,816832 81070,112021,138091,162895,186091,208761,232135,254523,277189,299965,322397,344691,368009,391027,414467,437437,459939,482715,505407,527789,550967,573899,596997,619723,642307,665369,688917,711741,735413,758143 diff --git a/AdvancedZ3B1MonteCarloA.png b/resultDiagrams/AdvancedZ3B1MonteCarlo.png similarity index 100% rename from AdvancedZ3B1MonteCarloA.png rename to resultDiagrams/AdvancedZ3B1MonteCarlo.png diff --git a/AdvancedZ3B3MonteA.png b/resultDiagrams/AdvancedZ3B3Monte.png similarity index 100% rename from AdvancedZ3B3MonteA.png rename to resultDiagrams/AdvancedZ3B3Monte.png diff --git a/SimpleZ1B2QLearningA.png b/resultDiagrams/SimpleZ1B2QLearningDisc099LR09.png similarity index 100% rename from SimpleZ1B2QLearningA.png rename to resultDiagrams/SimpleZ1B2QLearningDisc099LR09.png diff --git a/SimpleZ2B1QLearningA.png b/resultDiagrams/SimpleZ2B1QLearningDisc099LR09.png similarity index 100% rename from SimpleZ2B1QLearningA.png rename to resultDiagrams/SimpleZ2B1QLearningDisc099LR09.png diff --git a/SimpleZ2B1QLearningMaxA.png b/resultDiagrams/SimpleZ2B1QLearningMaxInitValuesDisc099LR09.png similarity index 100% rename from SimpleZ2B1QLearningMaxA.png rename to resultDiagrams/SimpleZ2B1QLearningMaxInitValuesDisc099LR09.png diff --git a/SimpleZ2B2QLearningA.png b/resultDiagrams/SimpleZ2B2QLearningDisc099LR09.png similarity index 100% rename from SimpleZ2B2QLearningA.png rename to resultDiagrams/SimpleZ2B2QLearningDisc099LR09.png diff --git a/SimpleZ2B2SarsaA.png b/resultDiagrams/SimpleZ2B2SarsaDisc099LR09.png similarity index 100% rename from SimpleZ2B2SarsaA.png rename to resultDiagrams/SimpleZ2B2SarsaDisc099LR09.png diff --git a/SimpleZ2B4QLearningA.png b/resultDiagrams/SimpleZ2B4QLearningDisc099LR09.png similarity index 100% rename from SimpleZ2B4QLearningA.png rename to resultDiagrams/SimpleZ2B4QLearningDisc099LR09.png diff --git a/antGameAnalysis05DiscA.png b/resultDiagrams/antGameAnalysis05Disc.png similarity index 100% rename from antGameAnalysis05DiscA.png rename to resultDiagrams/antGameAnalysis05Disc.png diff --git a/antGameAnalysis099DiscA.png b/resultDiagrams/antGameAnalysis099Disc.png similarity index 100% rename from antGameAnalysis099DiscA.png rename to resultDiagrams/antGameAnalysis099Disc.png diff --git a/antGameAnalysis09DiscA.png b/resultDiagrams/antGameAnalysis09Disc.png similarity index 100% rename from antGameAnalysis09DiscA.png rename to resultDiagrams/antGameAnalysis09Disc.png diff --git a/optDisc.png b/resultDiagrams/avgTimestampsPerFoodOptimality.png similarity index 100% rename from optDisc.png rename to resultDiagrams/avgTimestampsPerFoodOptimality.png diff --git a/NeededTimestampsFromEverywhere.png b/resultDiagrams/mininumNeededTimestampsAntGame.png similarity index 100% rename from NeededTimestampsFromEverywhere.png rename to resultDiagrams/mininumNeededTimestampsAntGame.png diff --git a/optDiscTotalTS.png b/resultDiagrams/totalTimestampsContinuousCollecting.png similarity index 100% rename from optDiscTotalTS.png rename to resultDiagrams/totalTimestampsContinuousCollecting.png diff --git a/src/main/java/core/algo/td/QLearningOffPolicyTDControl.java b/src/main/java/core/algo/td/QLearningOffPolicyTDControl.java index 79ba81f..470b60f 100644 --- a/src/main/java/core/algo/td/QLearningOffPolicyTDControl.java +++ b/src/main/java/core/algo/td/QLearningOffPolicyTDControl.java @@ -76,16 +76,16 @@ public class QLearningOffPolicyTDControl extends EpisodicLearnin if(reward == Reward.FOOD_DROP_DOWN_SUCCESS) { foodCollected++; foodTimestampsTotal += timestampTilFood; - //System.out.println(foodCollected + " " + timestampCurrentEpisode); File file = new File(ContinuousAnt.FILE_NAME); if(foodCollected % 1000 == 0) { + System.out.println(foodTimestampsTotal / 1000f + " " + timestampCurrentEpisode); try { - Files.writeString(Path.of(file.getPath()), timestampCurrentEpisode + ",", StandardOpenOption.APPEND); + Files.writeString(Path.of(file.getPath()), foodTimestampsTotal / 1000f + ",", StandardOpenOption.APPEND); } catch (IOException e) { e.printStackTrace(); } + foodTimestampsTotal = 0; } - foodTimestampsTotal = 0; if(foodCollected == 1000){ ((EpsilonGreedyPolicy) this.policy).setEpsilon(0.15f); } @@ -105,7 +105,7 @@ public class QLearningOffPolicyTDControl extends EpisodicLearnin } catch (IOException e) { e.printStackTrace(); } - return; + // return; } iterations++; timestampTilFood = 0; diff --git a/src/main/java/example/ContinuousAnt.java b/src/main/java/example/ContinuousAnt.java index 0a75ec2..1e7592e 100644 --- a/src/main/java/example/ContinuousAnt.java +++ b/src/main/java/example/ContinuousAnt.java @@ -3,13 +3,12 @@ package example; import core.RNG; import core.algo.Method; import core.controller.RLController; +import core.controller.RLControllerGUI; import evironment.antGame.AntAction; import evironment.antGame.AntWorldContinuous; import java.io.File; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; public class ContinuousAnt { public static final String FILE_NAME = "optDiscTimestampsNew.txt"; @@ -22,33 +21,21 @@ public class ContinuousAnt { } catch (IOException e) { e.printStackTrace(); } - List discValues = new ArrayList<>() { - }; - discValues.add(0.05f); - discValues.add(0.1f); - discValues.add(0.3f); - discValues.add(0.5f); - discValues.add(0.7f); - discValues.add(0.9f); - discValues.add(0.95f); - discValues.add(0.99f); - - for(float disc : discValues) { RNG.setSeed(13); - RLController rl = new RLController<>( + RLController rl = new RLControllerGUI<>( new AntWorldContinuous(8, 8), Method.Q_LEARNING_OFF_POLICY_CONTROL, AntAction.values()); - rl.setDelay(0); + rl.setDelay(20); rl.setNrOfEpisodes(1); //0.99 0.9 0.5 //0.99 0.95 0.9 0.7 0.5 0.3 0.1 - rl.setDiscountFactor(disc); + rl.setDiscountFactor(0.05f); // 0.1, 0.3, 0.5, 0.7 0.9 rl.setLearningRate(0.9f); rl.setEpsilon(0.2f); rl.start(); - } + } }