add multiple folders to organize results

2020-04-05 12:00:16 +02:00 · 2020-04-05 12:00:16 +02:00 · b9be640284
parent a08b8160a3
commit b9be640284
45 changed files with 22 additions and 118 deletions
--- a/SimpleZ1B2QLearning.png
+++ b/SimpleZ1B2QLearning.png
--- a/SimpleZ1B2SARSA.png
+++ b/SimpleZ1B2SARSA.png
--- a/SimpleZ1B2SarsaA.png
+++ b/SimpleZ1B2SarsaA.png
--- a/SimpleZ2B1MonteCarlo.png
+++ b/SimpleZ2B1MonteCarlo.png
--- a/SimpleZ2B1MonteCarloA.png
+++ b/SimpleZ2B1MonteCarloA.png
--- a/SimpleZ2B1QLearning.png
+++ b/SimpleZ2B1QLearning.png
--- a/SimpleZ2B1QLearningMaxValueAsDefault.png
+++ b/SimpleZ2B1QLearningMaxValueAsDefault.png
--- a/SimpleZ2B2QLearning.png
+++ b/SimpleZ2B2QLearning.png
--- a/SimpleZ2B2SARSA.png
+++ b/SimpleZ2B2SARSA.png
--- a/SimpleZ2B4QLearning.png
+++ b/SimpleZ2B4QLearning.png
--- a/epsilonValues.R
+++ b/epsilonValues.R
@ -1,84 +0,0 @@
-# Libraries
-library(ggplot2)
-library(matrixStats)
-ta  <- as.matrix(read.table(file.choose(), sep=",", header = FALSE))
-ta <- t(ta)
-dim(ta)
-head(ta)
-
-# Create dummy data
-data <- data.frame(
-  y=ta[,1],
-  y2=ta[,2],
-  y3=ta[,3],
-  y4=ta[,4],
-  y5=ta[,5],
-  y6=ta[,6],
-  y7=ta[,7],
-  y8=ta[,8],
-  y9=ta[,9],
-  y10=ta[,10],
-  y11=ta[,11],
-  y12=ta[,12],
-  y13=ta[,13],
-  y14=ta[,14],
-  y15=ta[,15],
-  x=seq(1, length(ta[,1]))
-)
-ggplot(data, aes(x*1000)) +
-  labs(title="Discount factor = 0.99",
-       x ="Timestamp", y = "Avg. reward per timestamp", color = "Learning rate") +
-  ylim(-1.5,0.6) +
-  geom_line(aes(y = y, colour = "0.1"), size=1)+
-  geom_line(aes(y = y2, colour = "0.3"), size=1) +
-  geom_line(aes(y = y3, colour = "0.5"), size=1) +
-  geom_line(aes(y = y4, colour = "0.7"), size=1) +
-  geom_line(aes(y = y5, colour = "0.9"), size=1)
-  
-ggplot(data, aes(x*1000)) +
-  labs(title="Discount factor = 0.9",
-       x ="Timestamp", y = "Avg. reward per timestamp", color = "Learning rate") +
-  ylim(-1.5,0.6) +
-  geom_line(aes(y = y6, colour = "0.1"), size=1) +
-  geom_line(aes(y = y7, colour = "0.3"), size=1) +
-  geom_line(aes(y = y8, colour = "0.5"), size=1) +
-  geom_line(aes(y = y9, colour = "0.7"), size=1) +
-  geom_line(aes(y = y10, colour = "0.9"), size=1)
-
-ggplot(data, aes(x*1000) ) +
-  labs(title="Discount factor = 0.5",
-       x ="Timestamp", y = "Avg. reward per timestamp", color = "Learning rate") +
-  ylim(-1.5,0.6) +
-  geom_line(aes(y = y11, colour = "0.1"), size=1) +
-  geom_line(aes(y = y12, colour = "0.3"), size=1) +
-  geom_line(aes(y = y13, colour = "0.5"), size=1) +
-  geom_line(aes(y = y14, colour = "0.7"), size=1) +
-  geom_line(aes(y = y15, colour = "0.9"), size=1) 
-  
- # scale_x_log10(limits=c(1,150) ) 
- # scale_y_log10( breaks=c(1,50,500,2500,25000), limits=c(1,25000) )
-
-plot(ta, x=x*1000, log="x", type="o")
-
-convergence <- read.csv(file.choose(), header=FALSE, row.names=1)
-
-sds <- rowSds(sapply(convergence[,-1], `length<-`, max(lengths(convergence[,-1]))), na.rm=TRUE)
-men <- rowMeans(sapply(convergence[,-1], `length<-`, max(lengths(convergence[,-1]))), na.rm=TRUE)
-print(sds)
-
-# create dummy data
-data <- data.frame(
-  names=rownames(convergence),
-  means=men,
-  sds=sds
-)
-
-ggplot(data) +
-  geom_bar(aes(x=names, y=means), stat="identity", fill="skyblue", alpha=0.7) +
-  geom_errorbar( aes(x=names, ymin=means-sds, ymax=means+sds), width=0.4, colour="orange", alpha=0.9, size=1.3) +
-  geom_text(aes(label=as.integer(means), x =names, y=means), position=position_dodge(width=0.9), vjust=-0.25) +
-  xlab("Epsilon") + ylab("avg. amount of episodes until convergence") 
-
-
-ba <- barplot(names=rownames(convergence), height=men, ylim=c(0, max(men)*1.2), ylab = "avg. episodes until convergence", xlab = "epsilon value")
-text(x = ba, y = men, label = as.integer(men), pos = 3, cex = 0.8, col = "red")
--- a/rScripts/avgEpisodesUntilConvergence.R
+++ b/rScripts/avgEpisodesUntilConvergence.R
@ -7,7 +7,7 @@ sds <- rowSds(sapply(convergence[,-1], `length<-`, max(lengths(convergence[,-1])
 men <- rowMeans(sapply(convergence[,-1], `length<-`, max(lengths(convergence[,-1]))), na.rm=TRUE)
 print(sds)

-# create dummy data
+# create data frame
 data <- data.frame(
  names=rownames(convergence),
  means=men,
@ -20,6 +20,6 @@ convergence.m <- melt(convergence, id.vars = "groups")
 ggplot(data) +
  geom_bar(aes(x=names, y=means, fill=means), stat="identity", colour="black", alpha=0.8) +
  geom_errorbar( aes(x=names, ymin=means, ymax=means+sds), width=0.4, colour="black", alpha=0.8, size=0.6) +
-  ylim(0,104000) +
+ # ylim(0,104000) +
  xlab("Epsilon") + ylab("Ø Episoden bis Konvergenz") +
  theme_bw(base_size = 24)
--- a/rScripts/avgRewardPerTimestampDiffLRandDiscQ.R
+++ b/rScripts/avgRewardPerTimestampDiffLRandDiscQ.R
@ -6,7 +6,7 @@ ta <- t(ta)
 dim(ta)
 head(ta)

-# Create dummy data
+# Create data frame
 data <- data.frame(
  y=ta[,1],
  y2=ta[,2],
--- a/OptimalityDifferentDiscountFactors.R
+++ b/OptimalityDifferentDiscountFactors.R
@ -5,8 +5,8 @@ ta  <- as.matrix(read.table(file.choose(), sep=",", header = FALSE, skip = 1))
 ta <- t(ta)
 dim(ta)
 head(ta)
-print(ta[1:20,1])
-# Create dummy data
+
+# Create data frame
 data <- data.frame(
  y=ta[1:20,1],
  y2=ta[1:20,2],
@ -20,7 +20,7 @@ data <- data.frame(
 )
 ggplot(data, aes(x*1000)) +
  labs( x ="Gesamtanzahl gesammeltes Futter", y = "Ø Zeitstempel pro Futter (log)", color = "Diskontierungsfaktor") +
-  scale_y_log10()+
+  #scale_y_log10()+
  geom_hline(yintercept=23, linetype="dashed")+
  geom_text(aes(0,23,label = "opt", vjust = -1)) +
  geom_line(aes(y = y, colour = "0.05"), size=1)+
--- a/OptimalityDifferentDiscountFactorTotalTS.R
+++ b/OptimalityDifferentDiscountFactorTotalTS.R
@ -5,7 +5,8 @@ ta  <- as.matrix(read.table(file.choose(), sep=",", header = FALSE, skip = 1))
 ta <- t(ta)
 dim(ta)
 head(ta)
-# Create dummy data
+
+# Create data frame
 data <- data.frame(
  y=ta[,1],
  y2=ta[,2],
--- a/rawResults/antAnalysisAvgReward.txt
+++ b/rawResults/antAnalysisAvgReward.txt
--- a/rawResults/avgTimestampsPerFoodDiffDiscounts.txt
+++ b/rawResults/avgTimestampsPerFoodDiffDiscounts.txt
@ -1,4 +1,4 @@
-# 0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 0.99
+# Average Timestamps needed to collect one food piece, mean over 1000 collected food; Discount factor for rows: 0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 0.99
 103.857,48.501,49.283,44.149,44.51,42.402,45.102,41.896,43.104,40.76,41.0,39.134,46.756,44.282,45.834,44.942,41.078,43.66,42.812,41.672,44.022,41.196,42.106,41.384,42.174,42.736,45.792,43.482,48.132,44.14
 86.807,32.319,29.317,26.51,25.664,24.348,24.994,23.972,24.46,24.252,24.128,23.758,25.184,24.872,25.286,24.356,24.322,24.262,24.26,24.446,24.888,24.302,24.866,24.236,24.548,24.866,25.894,24.872,26.212,24.632
 81.071,30.951,26.07,24.804,23.196,22.67,23.374,22.388,22.666,22.776,22.432,22.294,23.318,23.018,23.44,22.97,22.502,22.776,22.692,22.382,23.178,22.932,23.098,22.726,22.584,23.062,23.548,22.824,23.672,22.73
--- a/rawResults/convergencSimpleQLearning099disc09LR.txt
+++ b/rawResults/convergencSimpleQLearning099disc09LR.txt
--- a/rawResults/convergenceAdvancedMC.txt
+++ b/rawResults/convergenceAdvancedMC.txt
@ -1,4 +1,4 @@
-# 4 speeds, 4 distance. Distance, inJump and incomming speed; Monte Carlo
+# 4 speeds, 4 distance. Distance, inJump and incoming speed; Monte Carlo
 0.05,8565,3803,4010,30744,5419,1733,30335,4070,7502,7880,12776,2242,10489,4384,23578,14718,17801,7770,4097,5905,4027,5969,8492,1994,1542,18577,2346,13550,2279,5072,39861,15109,4419,10970,6150,2359,3093,7131,5932,18531,6887,22726,4925,3320,6832,13704,10372,25447,22945,28991,4526,12461,1831,20964,21963,2749,7112,3260,33941,14907,9168,7133,6590,21075,5302,2546,10164,3101,3204,7850,40776,38750,32898,14204,4431,12038,13430,13976,7385,7647,9634,12230,6635,9532,4181,12857,10473,2609,3827,4800,4316,5114,4153,6282,4737,5078,7358,11660,4750,16256
 0.1,1550,4416,3473,2821,2101,3635,5168,9641,7074,5623,2571,2253,2595,4347,24575,4239,4552,6372,6638,2686,33218,1976,7638,2479,3555,4496,2321,11712,1934,3668,9398,1312,4422,3217,9472,7053,5539,2140,2928,1576,12121,5023,2458,5947,2374,10197,1701,6186,7013,3061,3108,1856,4052,2327,2287,2026,1606,1567,2414,3756,2648,3409,4195,4803,11940,2171,1574,2656,3221,2044,2250,1528,1348,7785,3141,6622,25696,4520,2118,2489,7125,4182,1495,2475,5037,3839,4218,1929,2302,5339,1827,5444,3710,47322,2555,2373,2108,1550,4416,3473
 0.15,2328,3189,5422,1943,2057,1630,1405,2079,8071,22741,4056,1704,2727,2339,10381,1246,1831,6658,2760,9416,4036,1243,3716,1199,8678,5480,4038,42601,1133,3576,3852,4014,1487,5849,4275,3766,2240,3917,2164,3695,1954,1869,1854,1751,2785,2334,2821,5942,1310,1680,7867,1987,2345,1785,3898,1598,4382,2881,4874,2345,2483,3962,8058,10204,6444,2885,8494,5135,7150,1041,8020,24960,1720,45430,3546,3751,2077,2269,6051,5372,4247,10739,7114,1169,1757,1162,2699,1765,1367,3226,2290,3695,1756,2452,7385,2599,1426,7600,15285,4712
--- a/rawResults/convergenceAdvancedMCnegRew.txt
+++ b/rawResults/convergenceAdvancedMCnegRew.txt
--- a/rawResults/convergenceSimpleMC.txt
+++ b/rawResults/convergenceSimpleMC.txt
--- a/rawResults/convergenceSimpleNegRewInJumpQLearning099disc09LR.txt
+++ b/rawResults/convergenceSimpleNegRewInJumpQLearning099disc09LR.txt
@ -1,4 +1,4 @@
-# Simple, Dist and inJump, QLearning 0.99 disc 0.9 Learning rade, -1 reward when in jump and jumping
+# Simple, Dist and inJump, QLearning 0.99 disc 0.9 Learning rate, -1 reward when in jump and jumping
 0.05,6,6,10,8,8,7,6,10,8,6,8,7,8,6,10,10,9,9,12,7,7,8,9,11,8,12,7,8,8,8,9,8,10,7,10,8,9,9,7,7,8,7,7,5,11,8,8,7,8,7,8,8,7,7,9,8,6,5,10,10,7,10,8,7,7,7,7,11,8,9,10,8,10,8,7,6,5,8,7,8,8,8,3,8,9,9,4,8,6,11,7,8,8,8,9,8,12,7,10,9
 0.1,6,6,8,10,8,7,6,7,9,6,8,10,8,7,10,22,7,11,9,9,8,14,8,29,8,10,7,7,9,7,10,8,11,7,9,9,10,7,7,14,7,7,10,9,5,6,10,8,9,10,8,6,10,9,10,8,6,18,9,32,6,10,8,7,8,7,7,5,8,9,8,8,8,8,6,6,5,9,7,6,8,8,6,12,9,9,4,8,6,11,8,8,9,7,8,7,10,8,12,37
 0.15,7,6,5,9,7,7,6,6,8,15,7,7,13,8,8,16,7,16,12,12,8,21,7,11,7,26,4,7,10,7,10,8,7,7,12,10,6,10,6,6,6,15,20,9,5,8,8,7,10,10,7,48,10,8,12,7,7,6,8,11,6,10,9,7,8,35,8,5,10,5,14,14,10,8,12,8,7,7,7,8,8,7,7,20,9,13,4,10,6,26,12,8,8,10,7,8,11,63,13,16
--- a/rawResults/convergenceSimpleNoJumpInfoMCfirstVisit.txt
+++ b/rawResults/convergenceSimpleNoJumpInfoMCfirstVisit.txt
@ -1,4 +1,4 @@
-#same speed and distance. ONLY DISTANCE TO OBSTACLE. 100 trials, rest did not convergec; MONTE CARLO
+#same speed and distance. ONLY DISTANCE TO OBSTACLE. 100 trials, rest did not converged!; MONTE CARLO
 0.05,8,64,62,15,84,5,72,183
 0.1,15,15,6,18,46,28,307
 0.15,177,20,12,9,21,4,6,65,173
--- a/rawResults/convergenceSimpleNoJumpingQLearning099disc09LR.txt
+++ b/rawResults/convergenceSimpleNoJumpingQLearning099disc09LR.txt
--- a/rawResults/convergenceSimpleNoJumpingSARSA099disc09LR.txt
+++ b/rawResults/convergenceSimpleNoJumpingSARSA099disc09LR.txt
--- a/rawResults/convergenceSimpleQ099Disc09LRSameRewardsAsMC.txt
+++ b/rawResults/convergenceSimpleQ099Disc09LRSameRewardsAsMC.txt
--- a/rawResults/convergenceSimpleQ099Disc09LRSameRewardsAsMCMinus1DefaultValue.txt
+++ b/rawResults/convergenceSimpleQ099Disc09LRSameRewardsAsMCMinus1DefaultValue.txt
--- a/rawResults/convergenceSimpleSARSA099disc09LR.txt
+++ b/rawResults/convergenceSimpleSARSA099disc09LR.txt
--- a/rawResults/totalTimestampsNeededForFoodDiffDiscounts.txt
+++ b/rawResults/totalTimestampsNeededForFoodDiffDiscounts.txt
@ -1,4 +1,4 @@
-# timestamp needed for x*1000 collected food; 0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 0.99
+# timestamp needed for x*1000 collected food; Discount factor for rows: 0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 0.99
 103856,152357,201640,245789,290299,332701,377803,419699,462803,503563,544563,583697,630453,674735,720569,765511,806589,850249,893061,934733,978755,1019951,1062057,1103441,1145615,1188351,1234143,1277625,1325757,1369897
 86806,119125,148442,174952,200616,224964,249958,273930,298390,322642,346770,370528,395712,420584,445870,470226,494548,518810,543070,567516,592404,616706,641572,665808,690356,715222,741116,765988,792200,816832
 81070,112021,138091,162895,186091,208761,232135,254523,277189,299965,322397,344691,368009,391027,414467,437437,459939,482715,505407,527789,550967,573899,596997,619723,642307,665369,688917,711741,735413,758143
--- a/resultDiagrams/AdvancedZ3B1MonteCarlo.png
+++ b/resultDiagrams/AdvancedZ3B1MonteCarlo.png
--- a/resultDiagrams/AdvancedZ3B3Monte.png
+++ b/resultDiagrams/AdvancedZ3B3Monte.png
--- a/resultDiagrams/SimpleZ1B2QLearningDisc099LR09.png
+++ b/resultDiagrams/SimpleZ1B2QLearningDisc099LR09.png
--- a/resultDiagrams/SimpleZ2B1QLearningDisc099LR09.png
+++ b/resultDiagrams/SimpleZ2B1QLearningDisc099LR09.png
--- a/resultDiagrams/SimpleZ2B1QLearningMaxInitValuesDisc099LR09.png
+++ b/resultDiagrams/SimpleZ2B1QLearningMaxInitValuesDisc099LR09.png
--- a/resultDiagrams/SimpleZ2B2QLearningDisc099LR09.png
+++ b/resultDiagrams/SimpleZ2B2QLearningDisc099LR09.png
--- a/resultDiagrams/SimpleZ2B2SarsaDisc099LR09.png
+++ b/resultDiagrams/SimpleZ2B2SarsaDisc099LR09.png
--- a/resultDiagrams/SimpleZ2B4QLearningDisc099LR09.png
+++ b/resultDiagrams/SimpleZ2B4QLearningDisc099LR09.png
--- a/resultDiagrams/antGameAnalysis05Disc.png
+++ b/resultDiagrams/antGameAnalysis05Disc.png
--- a/resultDiagrams/antGameAnalysis099Disc.png
+++ b/resultDiagrams/antGameAnalysis099Disc.png
--- a/resultDiagrams/antGameAnalysis09Disc.png
+++ b/resultDiagrams/antGameAnalysis09Disc.png
--- a/resultDiagrams/avgTimestampsPerFoodOptimality.png
+++ b/resultDiagrams/avgTimestampsPerFoodOptimality.png
--- a/resultDiagrams/mininumNeededTimestampsAntGame.png
+++ b/resultDiagrams/mininumNeededTimestampsAntGame.png
--- a/resultDiagrams/totalTimestampsContinuousCollecting.png
+++ b/resultDiagrams/totalTimestampsContinuousCollecting.png
--- a/src/main/java/core/algo/td/QLearningOffPolicyTDControl.java
+++ b/src/main/java/core/algo/td/QLearningOffPolicyTDControl.java
@ -76,16 +76,16 @@ public class QLearningOffPolicyTDControl<A extends Enum> extends EpisodicLearnin
            if(reward == Reward.FOOD_DROP_DOWN_SUCCESS) {
                foodCollected++;
                foodTimestampsTotal += timestampTilFood;
-                //System.out.println(foodCollected + " " + timestampCurrentEpisode);
                File file = new File(ContinuousAnt.FILE_NAME);
                if(foodCollected % 1000 == 0) {
+                    System.out.println(foodTimestampsTotal / 1000f + " " + timestampCurrentEpisode);
                    try {
-                        Files.writeString(Path.of(file.getPath()), timestampCurrentEpisode + ",", StandardOpenOption.APPEND);
+                        Files.writeString(Path.of(file.getPath()), foodTimestampsTotal / 1000f + ",", StandardOpenOption.APPEND);
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
+                    foodTimestampsTotal = 0;
                }
-                foodTimestampsTotal = 0;
                if(foodCollected == 1000){
                    ((EpsilonGreedyPolicy<A>) this.policy).setEpsilon(0.15f);
                }
@ -105,7 +105,7 @@ public class QLearningOffPolicyTDControl<A extends Enum> extends EpisodicLearnin
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
-                    return;
+                    // return;
                }
                iterations++;
                timestampTilFood = 0;
--- a/src/main/java/example/ContinuousAnt.java
+++ b/src/main/java/example/ContinuousAnt.java
@ -3,13 +3,12 @@ package example;
 import core.RNG;
 import core.algo.Method;
 import core.controller.RLController;
+import core.controller.RLControllerGUI;
 import evironment.antGame.AntAction;
 import evironment.antGame.AntWorldContinuous;

 import java.io.File;
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;

 public class ContinuousAnt {
    public static final String FILE_NAME = "optDiscTimestampsNew.txt";
@ -22,33 +21,21 @@ public class ContinuousAnt {
        } catch (IOException e) {
            e.printStackTrace();
        }
-        List<Float> discValues = new ArrayList<>() {
-        };
-        discValues.add(0.05f);
-        discValues.add(0.1f);
-        discValues.add(0.3f);
-        discValues.add(0.5f);
-        discValues.add(0.7f);
-        discValues.add(0.9f);
-        discValues.add(0.95f);
-        discValues.add(0.99f);
-
-        for(float disc : discValues) {
            RNG.setSeed(13);
-            RLController<AntAction> rl = new RLController<>(
+        RLController<AntAction> rl = new RLControllerGUI<>(
                    new AntWorldContinuous(8, 8),
                    Method.Q_LEARNING_OFF_POLICY_CONTROL,
                    AntAction.values());
-            rl.setDelay(0);
+        rl.setDelay(20);
            rl.setNrOfEpisodes(1);
            //0.99 0.9 0.5
            //0.99 0.95 0.9 0.7 0.5 0.3 0.1
-            rl.setDiscountFactor(disc);
+        rl.setDiscountFactor(0.05f);
            // 0.1, 0.3, 0.5, 0.7 0.9
            rl.setLearningRate(0.9f);
            rl.setEpsilon(0.2f);
            rl.start();
-        }
+

    }
 }