add specific environment RNG
This commit is contained in:
parent
42dfebb048
commit
e8f4fa06b6
Binary file not shown.
Before Width: | Height: | Size: 78 KiB |
Binary file not shown.
Before Width: | Height: | Size: 7.9 KiB |
Binary file not shown.
Before Width: | Height: | Size: 8.6 KiB |
Binary file not shown.
Before Width: | Height: | Size: 7.5 KiB |
|
@ -1,21 +0,0 @@
|
||||||
#Every visit MC; just distance
|
|
||||||
0.05,36,29147,7945,707,12,595,68,4913,8034,300,804,1731,715,64899,423,703,2685,173,468,8220,4182,80071,1418,17769,216,3859,44746,218,6794,2,4182,35,258,9015,2581,62,39,17439,7346,1000,56974,21,71655,20301,4834,18144,18393,613,5,39,609,586,6,28871,1021,18051,457,13629,59217,10278,173,24407,1373,48513,31749,190,25159,130,609,609,55,924,77,4379,72708,202,2096,25965,14394,1230,2833,1791,25512,230,16227,16090,3781,76,537,229,307,2651,2,4212,3,38602,159722,703,45887,31
|
|
||||||
0.1,742,1269,9139,6764,782,25068,539,284,678,9327,2166,3469,3057,2971,1335,64551,22900,35519,62,29,15732,906,16,17,960,190,32183,2,40863,3324,37,6610,97,1578,12006,1784,3388,54,576,64,404,1638,95916,108,281,26288,1825,28,89,2359,49218,853,17821,10514,283,44669,256,6,12708,13085,3466,56628,8513,870,15,53906,20,1533,2164,1458,538,226,99,163,16,31398,76,2075,1474,36605,434,120,3744,91,2885,112,3285,69283,21176,3029,10113,2691,213,729,76694,9,1304,2,30950,74
|
|
||||||
0.15,1085,42379,2065,4847,124,27681,2257,1617,4597,1033,39095,254,120,1343,2007,1907,2992,15207,95,7835,495,16224,362,97541,141,8692,32124,1302,1768,10853,480,2,2099,25,1889,66,67,11874,290,77899,7253,31,1190,72,95,141,7732,32,26,16,5,181,361,12321,22,1631,7180,18,40,310,95444,7146,102,5210,6,19593,434,3581,3120,35,14,980,19,3816,677,1494,337,68,2764,53854,147,28,732,23755,270,4818,15,3046,1557,117,13049,323,26709,2036,431,85,113,179178,13782,705
|
|
||||||
0.2,11,119,8993,10,949,1495,20497,1325,5672,39392,35828,233,56,2707,456,12,3630,112,257,936,108,618,2353,52,2563,1632,10094,12336,24,1390,11,1130,4381,78,14658,1228,514,4706,215,15898,100,639,110,723,340,21,25,836,4,83,922,90321,22,747,11599,88,13,11,2101,43,642,1272,1742,169,614,832,681,41884,17,3159,392,9,178,2183,3271,10,378,69,35741,293,63,2733,3825,745,9,3151,2934,588,214,30150,1138,1304,98,566,1497,4474,24,944,1593,563
|
|
||||||
0.25,21,222,230,10720,108,2978,2377,1398,3310,10274,27275,1674,22,720,293,52,863,16756,76,520,28985,247,195,5507,97,5916,96,11,1346,77,157,8,14,501,40,1502,32,58,561,8,327,74,91,20,5444,914,4278,30,27,399,2186,4,75,1149,2386,22,3415,3918,56,438,8,160,451,18150,520,329,33850,16,625,128,463,73,147,33074,2545,198,1210,637,1143,180,824,171,293,16,9592,699,8,929,664,1637,2524,18383,256,31,540,233,265,998,9667,685
|
|
||||||
0.3,15,2847,8254,2332,2921,62,61322,1967,2375,4488,1142,1502,83,5177,519,128,7727,58,25,552,365,119,141,11859,7,9742,8,754,415,1534,641,8,498,12364,38,1062,75,336,3247,8,540,683,13,396,9242,194,24,935,62,83,611,4,5,5047,3134,243,1647,3294,96,257,9,1777,38112,6059,118,1702,4305,48,25907,113,715,8233,797,1721,147,39342,1111,26,11991,516,3292,24,126,1012,3290,305,5371,145,11939,65779,9057,3241,76,6924,2896,266,4380,902,634,577
|
|
||||||
0.35,68,47,70,1307,4155,1031,15740,175,829,920,2649,599,214,494,544,5026,1647,2007,48,182,294,670,156,613,20,1161,250,1710,4153,270,78,81,17,81,4816,5955,87,428,189,8,235,225,3151,7884,205,1322,25,31,787,15,3728,4,5,1314,3193,454,1175,167,176,7596,49,74,3110,389,385,2940,317,17,3846,94,99,202,88,22,1957,749,2767,57,334,326,27205,1928,14,285,3293,43,6288,43,643,36225,58,2835,1126,8335,1101,1874,2916,391,2455,13320
|
|
||||||
0.40,430,2482,4292,2558,7285,165,295,845,3480,43,2391,3,269,448,886,7400,6280,704,61,178,2706,4046,1089,139,947,855,46,415,1668,6922,235,22,5447,19,401,706,67,5379,73,527,178,3307,1254,996,276,4191,760,554,1140,15987,175,4,22,7793,4102,129,583,1056,228,27992,615,51,32,259,1508,867,2,1314,170,1135,390,3982,69,1228,1556,324,288,4165,504,504,173,313,85,2101,390,62,26544,468,793,1533,63,1838,167,3753,4042,402,6142,21336,841,72251
|
|
||||||
0.45,684,893,1075,3710,10131,9449,999,337,136,3812,80,3,3113,132,11540,277,639,315,74,55,2061,53,23,3426,513,4798,60,37,7000,76680,2757,22,1241,87,1207,4540,49,4373,48,10391,561,1698,214,507,382,251,245,998,142,1680,290,4,21,390,13326,209,916,2873,108,38,417,16,13,503,247,4716,8,23,4030,1862,104,681,114,40,638,647,3693,22959,90,249,85,485,1448,346,1575,524,429,3365,489,1124,177,3188,188,1880,110,263,108,34,408,25180
|
|
||||||
0.50,42,16830,1941,220,3775,815,2031,498,3436,2897,1061,3,2619,192,1034,672,14834,214,125,3387,1480,345,32,4599,682,350,45,572,863,45168,66,186,231,4381,596,1034,723,3776,10695,110,3889,975,809,86,579,16,40,228,180,259,54,4,1241,649,22145,372,671,1055,2444,1949,61,229,17,2392,266,1586,1820,24,2182,3007,124,270,233,46,1526,295,1276,996,352,1225,17,341,271,512,3197,474,3692,8,888,581,1091,71,4663,87,4268,1127,3638,1529,661,418
|
|
||||||
0.55,3815,777,987,185,27,1230,1594,2210,312,303,15,2,2256,3099,7131,134,3533,158,5467,1954,3945,160,281,1095,216,676,60,6476,429,267,1781,966,1156,2818,3082,16615,233,3614,95,1157,2991,1286,78,1461,783,16,40,1439,5110,22,322,4,878,5,111,1464,249,6612,631,10027,145,1804,1304,2465,992,196,32,13,204,1639,255,1151,409,6002,331,585,1023,2579,1625,730,16,249,929,265,782,327,3763,142,24,4778,130,27170,653,30,64,2757,7624,10265,242,782
|
|
||||||
0.60,72,219,1014,1553,13880,289,3735,1411,4136,112,3301,2,4284,1165,7631,675,1641,3996,728,1337,127,33,2489,3893,429,171,5,3243,128,5598,1049,1360,831,862,178,463,113,86,3557,1211,40,1309,185,34750,75,15,28,6889,743,49,15,4,582,3927,61,83,841,1639,4,984,195,2585,50,2949,2506,7500,33,14,901,2253,267,1783,254,2056,110,225,4108,3027,255,1567,24,905,91,3740,500,2585,2344,2102,160,468,258,3133,3619,5072,2158,2844,2325,343,18,2851
|
|
||||||
0.65,259,481,1669,5179,15,2300,180,118,200,5180,238,2,15,1424,6185,1967,149,291,9749,666,1616,680,3145,96,650,5056,1247,1082,310,374,1614,937,563,322,1963,161,22,38,78,821,206,1006,390,319,1613,1992,1230,535,96,320,17,4,3067,6450,552,991,17929,3373,28,204,47,143,73,632,2433,2131,78,14,412,3296,782,4784,1067,629,2132,4082,9844,6593,2112,412,401,19872,1336,368,2487,319,814,27,170,1362,100,1678,127,1800,6572,5779,693,1565,2088,798
|
|
||||||
0.70,1007,185,1973,2285,15,28,406,1468,382,1237,11692,2,55,1204,172,1042,3029,583,47,1430,2597,3431,2986,3363,4073,355,21980,1160,916,6140,893,535,13,418,578,535,3,47,3353,9003,816,3294,393,2578,2874,4252,1128,786,381,207,10,4,6397,895,2210,4982,763,54,11,1537,2,712,26,8748,123,3861,65,958,811,62,116,429,409,394,1029,1611,7386,298,1518,1322,3053,1359,5281,385,3349,4487,1939,5949,533,1506,3234,1492,2744,4,2985,7713,1492,3836,7254,342
|
|
||||||
0.75,6523,317,3968,233,15,80,2863,4066,16,3262,883,704,117,5443,2409,59,3848,3450,65,87,3764,1652,62,1646,2864,512,1226,1816,3587,378,4243,1837,10,5908,16119,4734,5,328,2639,897,353,6208,4281,1671,1520,610,451,399,3650,2542,10,3,23,66,245,2607,58,470,4,1989,2,60,3256,3603,4903,568,743,251,14701,2671,4777,698,176,26,1302,787,3275,2738,1335,2634,8791,5806,2222,13223,311,4218,8479,5535,8898,189,2425,1919,2482,4,1536,3469,2201,345,390,5228
|
|
||||||
0.80,83,2976,2164,17000,16090,4483,13563,987,14,505,282,3317,2053,387,12252,247,3519,4557,378,71,756,910,1921,2874,1862,1829,731,54,234,715,682,10015,530,633,3911,4709,265,116,104,9814,1312,601,61,19856,1102,595,10881,3728,3633,3388,9,3,811,1999,4075,2106,1888,2582,4,12564,154,383,15,2438,436,824,947,6212,3273,73,772,49,2818,11875,4322,4457,3292,3180,170,55,4707,100,5989,1835,4546,5414,808,65,7,811,1783,19664,219,4,248,1953,996,32118,7,257
|
|
||||||
0.85,2075,497,11265,1960,3624,34,366,957,14,2817,4413,18269,5069,2370,2272,12272,330,2063,4130,6284,3628,9,1938,613,15316,3474,5446,100,3894,2899,67,892,4003,348,135,1151,5953,6105,6149,5830,2078,1831,4313,626,269,877,3822,19,1106,139,9,3,18364,1119,8355,22487,3265,1415,5,13005,781,27358,728,1977,616,6789,341,7050,2368,24,1281,1913,15870,1985,619,7965,5509,538,620,2012,2022,10890,20395,1500,12,6141,8327,107,7,8407,109,45,100,4,874,632,2101,396,7,5154
|
|
||||||
0.90,902,3427,5470,42978,9193,2969,5514,2288,91,2048,1085,982,16,4210,3702,103,14119,5531,1076,2111,1360,16,10217,4417,544,607,1414,5350,1027,9041,3697,12502,43926,105,2941,31782,995,15661,773,338,153,19508,11985,33683,3006,4043,2942,2541,3817,2281,8561,6771,2305,37689,9198,19,899,13884,1818,12144,319,416,250,11815,44066,4825,783,4261,286,162,13125,138,3183,627,609,1736,1554,989,7559,3504,6641,3354,47,972,11,14096,278,15664,7,15571,389,286,1367,4,2434,25,297,3388,7,2743
|
|
||||||
0.95,493,6960,769,1563,660,4208,778,8917,98,72,21014,100,6872,7998,26519,154,4073,178,11475,3714,184,9,7250,2416,1161,38061,85,3569,25036,7121,30470,300,270,8458,668,5242,610,21,2330,15540,3238,2429,8334,103,3945,6469,166,581,27569,8,3,7695,4772,212,4868,15107,11469,132,2466,1252,4425,21010,6871,2827,155,4470,59064,5478,504,38380,5933,18,4313,4131,26218,5916,1479,4514,14507,64879,4715,2885,11,7370,2070,4241,7,11518,6675,3896,6503,26757,38,15158,1246,63,7,86,18039,5131
|
|
||||||
1.00,5753,31823,1131,5008,814,2498,3289,4148,1189,3345,455,16054,1727,4275,8701,177,354,21971,2838,2195,9,7887,682,34,1657,23710,296,29250,5562,79,7168,176,369,9651,140,19561,4518,6518,3711,20169,4594,3547,809,14814,220,774,16,2280,3,318,1921,6150,254,8587,7848,23995,2323,7108,2348,58820,3945,62059,984,10839,10909,28613,8046,20249,12086,4325,9060,176,86,47797,1107,119,12405,8481,12,4521,683,92,419,1714,584,596,102,47669,23214,665,98,8,486,2610,4602,98,3954,11155,3,14700
|
|
|
@ -15,18 +15,26 @@ import java.util.Random;
|
||||||
*/
|
*/
|
||||||
public class RNG {
|
public class RNG {
|
||||||
private static Random rng;
|
private static Random rng;
|
||||||
|
private static Random rngEnv;
|
||||||
private static int seed = 123;
|
private static int seed = 123;
|
||||||
static {
|
static {
|
||||||
rng = new Random();
|
rng = new Random();
|
||||||
rng.setSeed(seed);
|
setSeed(seed, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Random getRandom() {
|
public static Random getRandom() {
|
||||||
return rng;
|
return rng;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void setSeed(int seed){
|
public static Random getEnvRandom() {
|
||||||
|
return rngEnv;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void setSeed(int seed, boolean setEnvSeed) {
|
||||||
RNG.seed = seed;
|
RNG.seed = seed;
|
||||||
rng.setSeed(seed);
|
rng.setSeed(seed);
|
||||||
|
if(setEnvSeed) {
|
||||||
|
rngEnv.setSeed(seed);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -80,18 +80,6 @@ public abstract class EpisodicLearning<A extends Enum> extends Learning<A> imple
|
||||||
|
|
||||||
private void dispatchEpisodeStart(){
|
private void dispatchEpisodeStart(){
|
||||||
++currentEpisode;
|
++currentEpisode;
|
||||||
/*
|
|
||||||
2f 0.02 => 100
|
|
||||||
1.5f 0.02 => 75
|
|
||||||
1.4f 0.02 => fail
|
|
||||||
1.5f 0.1 => 16 !
|
|
||||||
*/
|
|
||||||
// if(this.policy instanceof EpsilonGreedyPolicy){
|
|
||||||
// float ep = 2f/(float)currentEpisode;
|
|
||||||
// if(ep < 0.02) ep = 0;
|
|
||||||
// ((EpsilonGreedyPolicy<A>) this.policy).setEpsilon(ep);
|
|
||||||
// System.out.println(ep);
|
|
||||||
// }
|
|
||||||
episodesToLearn.decrementAndGet();
|
episodesToLearn.decrementAndGet();
|
||||||
for(LearningListener l: learningListeners){
|
for(LearningListener l: learningListeners){
|
||||||
l.onEpisodeStart();
|
l.onEpisodeStart();
|
||||||
|
|
|
@ -19,7 +19,7 @@ import java.util.*;
|
||||||
* Change to Every-Visit by setting flag "useEveryVisit" in the constructor to true.
|
* Change to Every-Visit by setting flag "useEveryVisit" in the constructor to true.
|
||||||
* @param <A>
|
* @param <A>
|
||||||
*/
|
*/
|
||||||
public class MonteCarloControlFirstVisitEGreedy<A extends Enum> extends EpisodicLearning<A> {
|
public class MonteCarloControlEGreedy<A extends Enum> extends EpisodicLearning<A> {
|
||||||
|
|
||||||
private Map<Pair<State, A>, Double> returnSum;
|
private Map<Pair<State, A>, Double> returnSum;
|
||||||
private Map<Pair<State, A>, Integer> returnCount;
|
private Map<Pair<State, A>, Integer> returnCount;
|
||||||
|
@ -31,7 +31,7 @@ public class MonteCarloControlFirstVisitEGreedy<A extends Enum> extends Episodic
|
||||||
private Policy<A> greedyPolicy = new GreedyPolicy<>();
|
private Policy<A> greedyPolicy = new GreedyPolicy<>();
|
||||||
|
|
||||||
|
|
||||||
public MonteCarloControlFirstVisitEGreedy(Environment<A> environment, DiscreteActionSpace<A> actionSpace, float discountFactor, float epsilon, int delay, boolean useEveryVisit) {
|
public MonteCarloControlEGreedy(Environment<A> environment, DiscreteActionSpace<A> actionSpace, float discountFactor, float epsilon, int delay, boolean useEveryVisit) {
|
||||||
super(environment, actionSpace, discountFactor, delay);
|
super(environment, actionSpace, discountFactor, delay);
|
||||||
isEveryVisit = useEveryVisit;
|
isEveryVisit = useEveryVisit;
|
||||||
// t
|
// t
|
||||||
|
@ -42,11 +42,11 @@ public class MonteCarloControlFirstVisitEGreedy<A extends Enum> extends Episodic
|
||||||
returnCount = new HashMap<>();
|
returnCount = new HashMap<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
public MonteCarloControlFirstVisitEGreedy(Environment<A> environment, DiscreteActionSpace<A> actionSpace, float discountFactor, float epsilon, int delay) {
|
public MonteCarloControlEGreedy(Environment<A> environment, DiscreteActionSpace<A> actionSpace, float discountFactor, float epsilon, int delay) {
|
||||||
this(environment, actionSpace, discountFactor, epsilon, delay, false);
|
this(environment, actionSpace, discountFactor, epsilon, delay, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
public MonteCarloControlFirstVisitEGreedy(Environment<A> environment, DiscreteActionSpace<A> actionSpace, int delay) {
|
public MonteCarloControlEGreedy(Environment<A> environment, DiscreteActionSpace<A> actionSpace, int delay) {
|
||||||
this(environment, actionSpace, LearningConfig.DEFAULT_DISCOUNT_FACTOR, LearningConfig.DEFAULT_EPSILON, delay);
|
this(environment, actionSpace, LearningConfig.DEFAULT_DISCOUNT_FACTOR, LearningConfig.DEFAULT_EPSILON, delay);
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,9 +40,9 @@ public class QLearningOffPolicyTDControl<A extends Enum> extends EpisodicLearnin
|
||||||
while(envResult == null || !envResult.isDone()) {
|
while(envResult == null || !envResult.isDone()) {
|
||||||
actionValues = stateActionTable.getActionValues(state);
|
actionValues = stateActionTable.getActionValues(state);
|
||||||
A action;
|
A action;
|
||||||
if(currentEpisode % 2 == 0){
|
if(currentEpisode % 2 == 0) {
|
||||||
action = greedyPolicy.chooseAction(actionValues);
|
action = greedyPolicy.chooseAction(actionValues);
|
||||||
}else{
|
} else {
|
||||||
action = policy.chooseAction(actionValues);
|
action = policy.chooseAction(actionValues);
|
||||||
}
|
}
|
||||||
if(converged) return;
|
if(converged) return;
|
||||||
|
@ -51,7 +51,7 @@ public class QLearningOffPolicyTDControl<A extends Enum> extends EpisodicLearnin
|
||||||
double reward = envResult.getReward();
|
double reward = envResult.getReward();
|
||||||
State nextState = envResult.getState();
|
State nextState = envResult.getState();
|
||||||
sumOfRewards += reward;
|
sumOfRewards += reward;
|
||||||
if(currentEpisode % 2 == 0){
|
if(currentEpisode % 2 == 0) {
|
||||||
state = nextState;
|
state = nextState;
|
||||||
dispatchStepEnd();
|
dispatchStepEnd();
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -7,7 +7,7 @@ import core.ListDiscreteActionSpace;
|
||||||
import core.algo.EpisodicLearning;
|
import core.algo.EpisodicLearning;
|
||||||
import core.algo.Learning;
|
import core.algo.Learning;
|
||||||
import core.algo.Method;
|
import core.algo.Method;
|
||||||
import core.algo.mc.MonteCarloControlFirstVisitEGreedy;
|
import core.algo.mc.MonteCarloControlEGreedy;
|
||||||
import core.algo.td.QLearningOffPolicyTDControl;
|
import core.algo.td.QLearningOffPolicyTDControl;
|
||||||
import core.algo.td.SARSA;
|
import core.algo.td.SARSA;
|
||||||
import core.listener.LearningListener;
|
import core.listener.LearningListener;
|
||||||
|
@ -49,10 +49,10 @@ public class RLController<A extends Enum> implements LearningListener {
|
||||||
public void start() {
|
public void start() {
|
||||||
switch(method) {
|
switch(method) {
|
||||||
case MC_CONTROL_FIRST_VISIT:
|
case MC_CONTROL_FIRST_VISIT:
|
||||||
learning = new MonteCarloControlFirstVisitEGreedy<>(environment, discreteActionSpace, discountFactor, epsilon, delay);
|
learning = new MonteCarloControlEGreedy<>(environment, discreteActionSpace, discountFactor, epsilon, delay);
|
||||||
break;
|
break;
|
||||||
case MC_CONTROL_EVERY_VISIT:
|
case MC_CONTROL_EVERY_VISIT:
|
||||||
learning = new MonteCarloControlFirstVisitEGreedy<>(environment, discreteActionSpace, discountFactor, epsilon, delay, true);
|
learning = new MonteCarloControlEGreedy<>(environment, discreteActionSpace, discountFactor, epsilon, delay, true);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SARSA_ON_POLICY_CONTROL:
|
case SARSA_ON_POLICY_CONTROL:
|
||||||
|
|
|
@ -29,7 +29,7 @@ public class Grid {
|
||||||
initialGrid[x][y] = new Cell(new Point(x, y), CellType.FREE);
|
initialGrid[x][y] = new Cell(new Point(x, y), CellType.FREE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
start = new Point(RNG.getRandom().nextInt(width), RNG.getRandom().nextInt(height));
|
start = new Point(RNG.getEnvRandom().nextInt(width), RNG.getEnvRandom().nextInt(height));
|
||||||
initialGrid[start.x][start.y] = new Cell(new Point(start.x, start.y), CellType.START);
|
initialGrid[start.x][start.y] = new Cell(new Point(start.x, start.y), CellType.START);
|
||||||
spawnNewFood(initialGrid);
|
spawnNewFood(initialGrid);
|
||||||
spawnObstacles();
|
spawnObstacles();
|
||||||
|
@ -58,8 +58,8 @@ public class Grid {
|
||||||
Point potFood = new Point(0, 0);
|
Point potFood = new Point(0, 0);
|
||||||
CellType potFieldType;
|
CellType potFieldType;
|
||||||
while(!foodSpawned) {
|
while(!foodSpawned) {
|
||||||
potFood.x = RNG.getRandom().nextInt(width);
|
potFood.x = RNG.getEnvRandom().nextInt(width);
|
||||||
potFood.y = RNG.getRandom().nextInt(height);
|
potFood.y = RNG.getEnvRandom().nextInt(height);
|
||||||
potFieldType = grid[potFood.x][potFood.y].getType();
|
potFieldType = grid[potFood.x][potFood.y].getType();
|
||||||
if(potFieldType != CellType.START && grid[potFood.x][potFood.y].getFood() == 0 && potFieldType != CellType.OBSTACLE) {
|
if(potFieldType != CellType.START && grid[potFood.x][potFood.y].getFood() == 0 && potFieldType != CellType.OBSTACLE) {
|
||||||
grid[potFood.x][potFood.y].setFood(1);
|
grid[potFood.x][potFood.y].setFood(1);
|
||||||
|
|
|
@ -29,6 +29,6 @@ public class CardDeck {
|
||||||
nextInt(int bound) returns random int value from (inclusive) 0
|
nextInt(int bound) returns random int value from (inclusive) 0
|
||||||
and EXCLUSIVE! bound
|
and EXCLUSIVE! bound
|
||||||
*/
|
*/
|
||||||
return cards.get(RNG.getRandom().nextInt(cards.size()));
|
return cards.get(RNG.getEnvRandom().nextInt(cards.size()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,7 +31,7 @@ public class DinoWorldAdvanced extends DinoWorld{
|
||||||
protected void spawnNewObstacle() {
|
protected void spawnNewObstacle() {
|
||||||
int dx;
|
int dx;
|
||||||
int xSpawn;
|
int xSpawn;
|
||||||
double ran = RNG.getRandom().nextDouble();
|
double ran = RNG.getEnvRandom().nextDouble();
|
||||||
if(ran < 0.25){
|
if(ran < 0.25){
|
||||||
dx = -(int) (0.35 * Config.OBSTACLE_SPEED);
|
dx = -(int) (0.35 * Config.OBSTACLE_SPEED);
|
||||||
}else if(ran < 0.5){
|
}else if(ran < 0.5){
|
||||||
|
@ -41,7 +41,7 @@ public class DinoWorldAdvanced extends DinoWorld{
|
||||||
} else{
|
} else{
|
||||||
dx = -(int) (3.5 * Config.OBSTACLE_SPEED);
|
dx = -(int) (3.5 * Config.OBSTACLE_SPEED);
|
||||||
}
|
}
|
||||||
double ran2 = RNG.getRandom().nextDouble();
|
double ran2 = RNG.getEnvRandom().nextDouble();
|
||||||
if(ran2 < 0.25) {
|
if(ran2 < 0.25) {
|
||||||
// randomly spawning more right outside of the screen
|
// randomly spawning more right outside of the screen
|
||||||
xSpawn = Config.FRAME_WIDTH + Config.FRAME_WIDTH + Config.OBSTACLE_SIZE;
|
xSpawn = Config.FRAME_WIDTH + Config.FRAME_WIDTH + Config.OBSTACLE_SIZE;
|
||||||
|
|
|
@ -9,7 +9,7 @@ import evironment.blackjack.PlayerAction;
|
||||||
|
|
||||||
public class BlackJack {
|
public class BlackJack {
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
RNG.setSeed(55);
|
RNG.setSeed(55, true);
|
||||||
|
|
||||||
RLController<PlayerAction> rl = new RLControllerGUI<>(
|
RLController<PlayerAction> rl = new RLControllerGUI<>(
|
||||||
new BlackJackTable(),
|
new BlackJackTable(),
|
||||||
|
|
|
@ -13,7 +13,6 @@ import java.nio.file.Path;
|
||||||
import java.nio.file.StandardOpenOption;
|
import java.nio.file.StandardOpenOption;
|
||||||
|
|
||||||
public class DinoSampling {
|
public class DinoSampling {
|
||||||
public static final float f =0.05f;
|
|
||||||
public static final String FILE_NAME = "advancedEveryVisit.txt";
|
public static final String FILE_NAME = "advancedEveryVisit.txt";
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
File file = new File(FILE_NAME);
|
File file = new File(FILE_NAME);
|
||||||
|
@ -30,7 +29,7 @@ public class DinoSampling {
|
||||||
}
|
}
|
||||||
for(int i = 1; i <= 100; i++) {
|
for(int i = 1; i <= 100; i++) {
|
||||||
System.out.println("seed: " + i * 13);
|
System.out.println("seed: " + i * 13);
|
||||||
RNG.setSeed(i * 13);
|
RNG.setSeed(i * 13, true);
|
||||||
|
|
||||||
RLController<DinoAction> rl = new RLController<>(
|
RLController<DinoAction> rl = new RLController<>(
|
||||||
new DinoWorldAdvanced(),
|
new DinoWorldAdvanced(),
|
||||||
|
|
|
@ -4,12 +4,11 @@ import core.RNG;
|
||||||
import core.algo.Method;
|
import core.algo.Method;
|
||||||
import core.controller.RLController;
|
import core.controller.RLController;
|
||||||
import evironment.jumpingDino.DinoAction;
|
import evironment.jumpingDino.DinoAction;
|
||||||
import evironment.jumpingDino.DinoWorld;
|
|
||||||
import evironment.jumpingDino.DinoWorldAdvanced;
|
import evironment.jumpingDino.DinoWorldAdvanced;
|
||||||
|
|
||||||
public class JumpingDino {
|
public class JumpingDino {
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
RNG.setSeed(29);
|
RNG.setSeed(29, true);
|
||||||
|
|
||||||
RLController<DinoAction> rl = new RLController<>(
|
RLController<DinoAction> rl = new RLController<>(
|
||||||
new DinoWorldAdvanced(),
|
new DinoWorldAdvanced(),
|
||||||
|
|
|
@ -1,15 +0,0 @@
|
||||||
Method:
|
|
||||||
Epsilon = k / currentEpisode
|
|
||||||
set to 0 if Epsilon < b
|
|
||||||
|
|
||||||
k = 1.5
|
|
||||||
b = 0.1 => conv. 16
|
|
||||||
|
|
||||||
k = 1.5
|
|
||||||
b = 0.02 => 75
|
|
||||||
|
|
||||||
k = 1.4
|
|
||||||
b = 0.02 => fail
|
|
||||||
|
|
||||||
k = 2.0
|
|
||||||
b = 0.02 => conv. 100
|
|
|
@ -9,7 +9,7 @@ import evironment.antGame.AntWorld;
|
||||||
|
|
||||||
public class RunningAnt {
|
public class RunningAnt {
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
RNG.setSeed(56);
|
RNG.setSeed(56, true);
|
||||||
|
|
||||||
RLController<AntAction> rl = new RLControllerGUI<>(
|
RLController<AntAction> rl = new RLControllerGUI<>(
|
||||||
new AntWorld(8, 8),
|
new AntWorld(8, 8),
|
||||||
|
@ -20,7 +20,6 @@ public class RunningAnt {
|
||||||
rl.setNrOfEpisodes(10000);
|
rl.setNrOfEpisodes(10000);
|
||||||
rl.setDiscountFactor(0.9f);
|
rl.setDiscountFactor(0.9f);
|
||||||
rl.setEpsilon(0.15f);
|
rl.setEpsilon(0.15f);
|
||||||
|
|
||||||
rl.start();
|
rl.start();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,52 +0,0 @@
|
||||||
package example;
|
|
||||||
|
|
||||||
public class Test {
|
|
||||||
interface Drawable{
|
|
||||||
void draw();
|
|
||||||
}
|
|
||||||
interface State{
|
|
||||||
int getInt();
|
|
||||||
}
|
|
||||||
|
|
||||||
static class A implements Drawable, State{
|
|
||||||
private int k;
|
|
||||||
public A(int a){
|
|
||||||
k = a;
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public void draw() {
|
|
||||||
System.out.println("draw " + k);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getInt() {
|
|
||||||
System.out.println("getInt" + k);
|
|
||||||
return k;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static class B implements State{
|
|
||||||
@Override
|
|
||||||
public int getInt() {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void main(String[] args) {
|
|
||||||
State state = new A(24);
|
|
||||||
State state2 = new B();
|
|
||||||
state.getInt();
|
|
||||||
|
|
||||||
System.out.println(state2 instanceof Drawable);
|
|
||||||
drawState(state2);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void drawState(State s){
|
|
||||||
if(s instanceof Drawable){
|
|
||||||
Drawable d = (Drawable) s;
|
|
||||||
d.draw();
|
|
||||||
}else{
|
|
||||||
System.out.println("invalid");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,30 +0,0 @@
|
||||||
import core.RNG;
|
|
||||||
import core.algo.Method;
|
|
||||||
import core.controller.RLController;
|
|
||||||
import core.controller.RLControllerGUI;
|
|
||||||
import evironment.jumpingDino.DinoAction;
|
|
||||||
import evironment.jumpingDino.DinoWorld;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
public class MCFirstVisit {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test if the action sequence is deterministic
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void deterministicActionSequence(){
|
|
||||||
RNG.setSeed(55);
|
|
||||||
|
|
||||||
RLController<DinoAction> rl = new RLControllerGUI<>(
|
|
||||||
new DinoWorld(false, false),
|
|
||||||
Method.MC_CONTROL_FIRST_VISIT,
|
|
||||||
DinoAction.values());
|
|
||||||
|
|
||||||
rl.setDelay(10);
|
|
||||||
rl.setDiscountFactor(1f);
|
|
||||||
rl.setEpsilon(0.1f);
|
|
||||||
rl.setLearningRate(0.8f);
|
|
||||||
rl.setNrOfEpisodes(4000000);
|
|
||||||
rl.start();
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue