add javaFX gradle plugin and switch to java11 and add system.outs for error detecting
- The current implementation will not converge to the correct behaviour. See comment in MonteCarlo class for more details
This commit is contained in:
parent
55d8bbf5dc
commit
584d6a1246
|
@ -1,7 +1,14 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<project version="4">
|
<project version="4">
|
||||||
<component name="ExternalStorageConfigurationManager" enabled="true" />
|
<component name="ExternalStorageConfigurationManager" enabled="true" />
|
||||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" project-jdk-name="1.8" project-jdk-type="JavaSDK">
|
<component name="MavenProjectsManager">
|
||||||
|
<option name="originalFiles">
|
||||||
|
<list>
|
||||||
|
<option value="$PROJECT_DIR$/pom.xml" />
|
||||||
|
</list>
|
||||||
|
</option>
|
||||||
|
</component>
|
||||||
|
<component name="ProjectRootManager" version="2" languageLevel="JDK_11" default="false" project-jdk-name="11" project-jdk-type="JavaSDK">
|
||||||
<output url="file://$PROJECT_DIR$/out" />
|
<output url="file://$PROJECT_DIR$/out" />
|
||||||
</component>
|
</component>
|
||||||
</project>
|
</project>
|
|
@ -1,8 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<project version="4">
|
|
||||||
<component name="ProjectModuleManager">
|
|
||||||
<modules>
|
|
||||||
<module fileurl="file://$PROJECT_DIR$/.idea/refo.iml" filepath="$PROJECT_DIR$/.idea/refo.iml" />
|
|
||||||
</modules>
|
|
||||||
</component>
|
|
||||||
</project>
|
|
|
@ -1,18 +1,2 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<module external.linked.project.id="refo" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$" external.system.id="GRADLE" external.system.module.group="net.lwenstrom.jan" external.system.module.version="1.0-SNAPSHOT" version="4">
|
<module external.linked.project.id="refo" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$" external.system.id="GRADLE" external.system.module.group="net.lwenstrom.jan" external.system.module.version="1.0-SNAPSHOT" version="4" />
|
||||||
<component name="NewModuleRootManager">
|
|
||||||
<output url="file://$MODULE_DIR$/build/classes/java/main" />
|
|
||||||
<output-test url="file://$MODULE_DIR$/build/classes/java/test" />
|
|
||||||
<exclude-output />
|
|
||||||
<content url="file://$MODULE_DIR$">
|
|
||||||
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
|
|
||||||
<excludeFolder url="file://$MODULE_DIR$/.gradle" />
|
|
||||||
<excludeFolder url="file://$MODULE_DIR$/build" />
|
|
||||||
</content>
|
|
||||||
<orderEntry type="inheritedJdk" />
|
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
|
||||||
<orderEntry type="library" scope="PROVIDED" name="Gradle: org.projectlombok:lombok:1.18.10" level="project" />
|
|
||||||
<orderEntry type="library" scope="TEST" name="Gradle: junit:junit:4.12" level="project" />
|
|
||||||
<orderEntry type="library" scope="TEST" name="Gradle: org.hamcrest:hamcrest-core:1.3" level="project" />
|
|
||||||
</component>
|
|
||||||
</module>
|
|
|
@ -1,11 +1,12 @@
|
||||||
plugins {
|
plugins {
|
||||||
id 'java'
|
id 'java'
|
||||||
|
id 'org.openjfx.javafxplugin' version '0.0.8'
|
||||||
}
|
}
|
||||||
|
|
||||||
group 'net.lwenstrom.jan'
|
group 'net.lwenstrom.jan'
|
||||||
version '1.0-SNAPSHOT'
|
version '1.0-SNAPSHOT'
|
||||||
|
|
||||||
sourceCompatibility = 1.8
|
sourceCompatibility = 11
|
||||||
|
|
||||||
repositories {
|
repositories {
|
||||||
mavenCentral()
|
mavenCentral()
|
||||||
|
@ -16,3 +17,7 @@ dependencies {
|
||||||
compileOnly 'org.projectlombok:lombok:1.18.10'
|
compileOnly 'org.projectlombok:lombok:1.18.10'
|
||||||
annotationProcessor 'org.projectlombok:lombok:1.18.10'
|
annotationProcessor 'org.projectlombok:lombok:1.18.10'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
javafx {
|
||||||
|
modules = [ 'javafx.controls', 'javafx.fxml' ]
|
||||||
|
}
|
||||||
|
|
|
@ -1,12 +1,29 @@
|
||||||
package core.algo.MC;
|
package core.algo.mc;
|
||||||
|
|
||||||
import core.*;
|
import core.*;
|
||||||
import core.algo.Learning;
|
import core.algo.Learning;
|
||||||
import core.policy.EpsilonGreedyPolicy;
|
import core.policy.EpsilonGreedyPolicy;
|
||||||
import javafx.util.Pair;
|
import javafx.util.Pair;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* TODO: Major problem:
|
||||||
|
* StateActionPairs are only unique accounting for their position in the episode.
|
||||||
|
* For example:
|
||||||
|
*
|
||||||
|
* startingState -> MOVE_LEFT : very first state action in the episode i = 1
|
||||||
|
* image the agent does not collect the food and drops it to the start, the agent will receive
|
||||||
|
* -1 for every timestamp hence (startingState -> MOVE_LEFT) will get a value of -10;
|
||||||
|
*
|
||||||
|
* BUT image moving left from the starting position will have no impact on the state because
|
||||||
|
* the agent ran into a wall. The known world stays the same.
|
||||||
|
* Taking an action after that will have the exact same state but a different action
|
||||||
|
* making the value of this stateActionPair -9 because the stateAction pair took place on the second
|
||||||
|
* timestamp, summing up all remaining rewards will be -9...
|
||||||
|
*
|
||||||
|
* How to encounter this problem?
|
||||||
|
* @param <A>
|
||||||
|
*/
|
||||||
public class MonteCarloOnPolicyEGreedy<A extends Enum> extends Learning<A> {
|
public class MonteCarloOnPolicyEGreedy<A extends Enum> extends Learning<A> {
|
||||||
|
|
||||||
public MonteCarloOnPolicyEGreedy(Environment<A> environment, DiscreteActionSpace<A> actionSpace) {
|
public MonteCarloOnPolicyEGreedy(Environment<A> environment, DiscreteActionSpace<A> actionSpace) {
|
||||||
|
@ -22,15 +39,17 @@ public class MonteCarloOnPolicyEGreedy<A extends Enum> extends Learning<A> {
|
||||||
Map<Pair<State, A>, Double> returnSum = new HashMap<>();
|
Map<Pair<State, A>, Double> returnSum = new HashMap<>();
|
||||||
Map<Pair<State, A>, Integer> returnCount = new HashMap<>();
|
Map<Pair<State, A>, Integer> returnCount = new HashMap<>();
|
||||||
|
|
||||||
|
State startingState = environment.reset();
|
||||||
for(int i = 0; i < nrOfEpisodes; ++i) {
|
for(int i = 0; i < nrOfEpisodes; ++i) {
|
||||||
|
|
||||||
List<StepResult<A>> episode = new ArrayList<>();
|
List<StepResult<A>> episode = new ArrayList<>();
|
||||||
State state = environment.reset();
|
State state = environment.reset();
|
||||||
for(int j=0; j < 100; ++j){
|
double rewardSum = 0;
|
||||||
|
for(int j=0; j < 10; ++j){
|
||||||
Map<A, Double> actionValues = stateActionTable.getActionValues(state);
|
Map<A, Double> actionValues = stateActionTable.getActionValues(state);
|
||||||
A chosenAction = policy.chooseAction(actionValues);
|
A chosenAction = policy.chooseAction(actionValues);
|
||||||
StepResultEnvironment envResult = environment.step(chosenAction);
|
StepResultEnvironment envResult = environment.step(chosenAction);
|
||||||
State nextState = envResult.getState();
|
State nextState = envResult.getState();
|
||||||
|
rewardSum += envResult.getReward();
|
||||||
episode.add(new StepResult<>(state, chosenAction, envResult.getReward()));
|
episode.add(new StepResult<>(state, chosenAction, envResult.getReward()));
|
||||||
|
|
||||||
if(envResult.isDone()) break;
|
if(envResult.isDone()) break;
|
||||||
|
@ -38,23 +57,25 @@ public class MonteCarloOnPolicyEGreedy<A extends Enum> extends Learning<A> {
|
||||||
state = nextState;
|
state = nextState;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Thread.sleep(10);
|
Thread.sleep(1);
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
System.out.printf("Episode %d \t Reward: %f \n", i, rewardSum);
|
||||||
Set<Pair<State, A>> stateActionPairs = new HashSet<>();
|
Set<Pair<State, A>> stateActionPairs = new HashSet<>();
|
||||||
|
|
||||||
for(StepResult<A> sr: episode){
|
for(StepResult<A> sr: episode){
|
||||||
stateActionPairs.add(new Pair<>(sr.getState(), sr.getAction()));
|
stateActionPairs.add(new Pair<>(sr.getState(), sr.getAction()));
|
||||||
}
|
}
|
||||||
|
System.out.println("stateActionPairs " + stateActionPairs.size());
|
||||||
for(Pair<State, A> stateActionPair: stateActionPairs){
|
for(Pair<State, A> stateActionPair: stateActionPairs){
|
||||||
int firstOccurenceIndex = 0;
|
int firstOccurenceIndex = 0;
|
||||||
// find first occurance of state action pair
|
// find first occurance of state action pair
|
||||||
for(StepResult<A> sr: episode){
|
for(StepResult<A> sr: episode){
|
||||||
if(stateActionPair.getKey().equals(sr.getState()) && stateActionPair.getValue().equals(sr.getAction())){
|
if(stateActionPair.getKey().equals(sr.getState()) && stateActionPair.getValue().equals(sr.getAction())){
|
||||||
|
;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
firstOccurenceIndex++;
|
firstOccurenceIndex++;
|
||||||
|
|
|
@ -5,6 +5,7 @@ import core.RNG;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
public class GreedyPolicy<A extends Enum> implements Policy<A> {
|
public class GreedyPolicy<A extends Enum> implements Policy<A> {
|
||||||
|
|
||||||
|
@ -17,7 +18,7 @@ public class GreedyPolicy<A extends Enum> implements Policy<A> {
|
||||||
List<A> equalHigh = new ArrayList<>();
|
List<A> equalHigh = new ArrayList<>();
|
||||||
|
|
||||||
for(Map.Entry<A, Double> actionValue : actionValues.entrySet()){
|
for(Map.Entry<A, Double> actionValue : actionValues.entrySet()){
|
||||||
System.out.println(actionValue.getKey()+ " " + actionValue.getValue() );
|
// System.out.println(actionValue.getKey() + " " + actionValue.getValue());
|
||||||
if(highestValueAction == null || highestValueAction < actionValue.getValue()){
|
if(highestValueAction == null || highestValueAction < actionValue.getValue()){
|
||||||
highestValueAction = actionValue.getValue();
|
highestValueAction = actionValue.getValue();
|
||||||
equalHigh.clear();
|
equalHigh.clear();
|
||||||
|
@ -27,6 +28,6 @@ public class GreedyPolicy<A extends Enum> implements Policy<A> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return equalHigh.get(RNG.getRandom().nextInt(equalHigh.size()));
|
return equalHigh.get(new Random().nextInt(equalHigh.size()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,7 @@ package evironment.antGame;
|
||||||
|
|
||||||
import core.*;
|
import core.*;
|
||||||
import core.algo.Learning;
|
import core.algo.Learning;
|
||||||
import core.algo.MC.MonteCarloOnPolicyEGreedy;
|
import core.algo.mc.MonteCarloOnPolicyEGreedy;
|
||||||
import evironment.antGame.gui.MainFrame;
|
import evironment.antGame.gui.MainFrame;
|
||||||
|
|
||||||
|
|
||||||
|
@ -113,6 +113,7 @@ public class AntWorld implements Environment<AntAction>{
|
||||||
// than the starting point
|
// than the starting point
|
||||||
if(currentCell.getType() != CellType.START){
|
if(currentCell.getType() != CellType.START){
|
||||||
reward = Reward.FOOD_DROP_DOWN_FAIL_NOT_START;
|
reward = Reward.FOOD_DROP_DOWN_FAIL_NOT_START;
|
||||||
|
done = true;
|
||||||
}else{
|
}else{
|
||||||
reward = Reward.FOOD_DROP_DOWN_SUCCESS;
|
reward = Reward.FOOD_DROP_DOWN_SUCCESS;
|
||||||
myAnt.setPoints(myAnt.getPoints() + 1);
|
myAnt.setPoints(myAnt.getPoints() + 1);
|
||||||
|
@ -156,10 +157,14 @@ public class AntWorld implements Environment<AntAction>{
|
||||||
done = grid.isAllFoodCollected();
|
done = grid.isAllFoodCollected();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(!done){
|
||||||
|
reward = -1;
|
||||||
|
}
|
||||||
if(++tick == maxEpisodeTicks){
|
if(++tick == maxEpisodeTicks){
|
||||||
done = true;
|
done = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
StepResultEnvironment result = new StepResultEnvironment(newState, reward, done, info);
|
StepResultEnvironment result = new StepResultEnvironment(newState, reward, done, info);
|
||||||
getGui().update(action, result);
|
getGui().update(action, result);
|
||||||
return result;
|
return result;
|
||||||
|
@ -211,6 +216,6 @@ public class AntWorld implements Environment<AntAction>{
|
||||||
new AntWorld(3, 3, 0.1),
|
new AntWorld(3, 3, 0.1),
|
||||||
new ListDiscreteActionSpace<>(AntAction.values())
|
new ListDiscreteActionSpace<>(AntAction.values())
|
||||||
);
|
);
|
||||||
monteCarlo.learn(100,5);
|
monteCarlo.learn(20000,5);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,7 +7,7 @@ public class Reward {
|
||||||
|
|
||||||
public static final double FOOD_DROP_DOWN_FAIL_NO_FOOD = 0;
|
public static final double FOOD_DROP_DOWN_FAIL_NO_FOOD = 0;
|
||||||
public static final double FOOD_DROP_DOWN_FAIL_NOT_START = 0;
|
public static final double FOOD_DROP_DOWN_FAIL_NOT_START = 0;
|
||||||
public static final double FOOD_DROP_DOWN_SUCCESS = 1000;
|
public static final double FOOD_DROP_DOWN_SUCCESS = 1;
|
||||||
|
|
||||||
public static final double UNKNOWN_FIELD_EXPLORED = 0;
|
public static final double UNKNOWN_FIELD_EXPLORED = 0;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue