add javaFX gradle plugin and switch to java11 and add system.outs for error detecting
- The current implementation will not converge to the correct behaviour. See comment in MonteCarlo class for more details
This commit is contained in:
		
							parent
							
								
									55d8bbf5dc
								
							
						
					
					
						commit
						584d6a1246
					
				|  | @ -1,7 +1,14 @@ | |||
| <?xml version="1.0" encoding="UTF-8"?> | ||||
| <project version="4"> | ||||
|   <component name="ExternalStorageConfigurationManager" enabled="true" /> | ||||
|   <component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" project-jdk-name="1.8" project-jdk-type="JavaSDK"> | ||||
|   <component name="MavenProjectsManager"> | ||||
|     <option name="originalFiles"> | ||||
|       <list> | ||||
|         <option value="$PROJECT_DIR$/pom.xml" /> | ||||
|       </list> | ||||
|     </option> | ||||
|   </component> | ||||
|   <component name="ProjectRootManager" version="2" languageLevel="JDK_11" default="false" project-jdk-name="11" project-jdk-type="JavaSDK"> | ||||
|     <output url="file://$PROJECT_DIR$/out" /> | ||||
|   </component> | ||||
| </project> | ||||
|  | @ -1,8 +0,0 @@ | |||
| <?xml version="1.0" encoding="UTF-8"?> | ||||
| <project version="4"> | ||||
|   <component name="ProjectModuleManager"> | ||||
|     <modules> | ||||
|       <module fileurl="file://$PROJECT_DIR$/.idea/refo.iml" filepath="$PROJECT_DIR$/.idea/refo.iml" /> | ||||
|     </modules> | ||||
|   </component> | ||||
| </project> | ||||
|  | @ -1,18 +1,2 @@ | |||
| <?xml version="1.0" encoding="UTF-8"?> | ||||
| <module external.linked.project.id="refo" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$" external.system.id="GRADLE" external.system.module.group="net.lwenstrom.jan" external.system.module.version="1.0-SNAPSHOT" version="4"> | ||||
|   <component name="NewModuleRootManager"> | ||||
|     <output url="file://$MODULE_DIR$/build/classes/java/main" /> | ||||
|     <output-test url="file://$MODULE_DIR$/build/classes/java/test" /> | ||||
|     <exclude-output /> | ||||
|     <content url="file://$MODULE_DIR$"> | ||||
|       <sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" /> | ||||
|       <excludeFolder url="file://$MODULE_DIR$/.gradle" /> | ||||
|       <excludeFolder url="file://$MODULE_DIR$/build" /> | ||||
|     </content> | ||||
|     <orderEntry type="inheritedJdk" /> | ||||
|     <orderEntry type="sourceFolder" forTests="false" /> | ||||
|     <orderEntry type="library" scope="PROVIDED" name="Gradle: org.projectlombok:lombok:1.18.10" level="project" /> | ||||
|     <orderEntry type="library" scope="TEST" name="Gradle: junit:junit:4.12" level="project" /> | ||||
|     <orderEntry type="library" scope="TEST" name="Gradle: org.hamcrest:hamcrest-core:1.3" level="project" /> | ||||
|   </component> | ||||
| </module> | ||||
| <module external.linked.project.id="refo" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$" external.system.id="GRADLE" external.system.module.group="net.lwenstrom.jan" external.system.module.version="1.0-SNAPSHOT" version="4" /> | ||||
|  | @ -1,11 +1,12 @@ | |||
| plugins { | ||||
|     id 'java' | ||||
|     id 'org.openjfx.javafxplugin' version '0.0.8' | ||||
| } | ||||
| 
 | ||||
| group 'net.lwenstrom.jan' | ||||
| version '1.0-SNAPSHOT' | ||||
| 
 | ||||
| sourceCompatibility = 1.8 | ||||
| sourceCompatibility = 11 | ||||
| 
 | ||||
| repositories { | ||||
|     mavenCentral() | ||||
|  | @ -16,3 +17,7 @@ dependencies { | |||
|     compileOnly 'org.projectlombok:lombok:1.18.10' | ||||
|     annotationProcessor 'org.projectlombok:lombok:1.18.10' | ||||
| } | ||||
| 
 | ||||
| javafx { | ||||
|     modules = [ 'javafx.controls', 'javafx.fxml' ] | ||||
| } | ||||
|  |  | |||
|  | @ -1,12 +1,29 @@ | |||
| package core.algo.MC; | ||||
| package core.algo.mc; | ||||
| 
 | ||||
| import core.*; | ||||
| import core.algo.Learning; | ||||
| import core.policy.EpsilonGreedyPolicy; | ||||
| import javafx.util.Pair; | ||||
| 
 | ||||
| import java.util.*; | ||||
| 
 | ||||
| /** | ||||
|  * TODO: Major problem: | ||||
|  * StateActionPairs are only unique accounting for their position in the episode. | ||||
|  * For example: | ||||
|  * | ||||
|  * startingState -> MOVE_LEFT : very first state action in the episode i = 1 | ||||
|  * image the agent does not collect the food and drops it to the start, the agent will receive | ||||
|  * -1 for every timestamp hence (startingState -> MOVE_LEFT) will get a value of -10; | ||||
|  * | ||||
|  * BUT image moving left from the starting position will have no impact on the state because | ||||
|  * the agent ran into a wall. The known world stays the same. | ||||
|  * Taking an action after that will have the exact same state but a different action | ||||
|  * making the value of this stateActionPair -9 because the stateAction pair took place on the second | ||||
|  * timestamp, summing up all remaining rewards will be -9... | ||||
|  * | ||||
|  * How to encounter this problem? | ||||
|  * @param <A> | ||||
|  */ | ||||
| public class MonteCarloOnPolicyEGreedy<A extends Enum> extends Learning<A> { | ||||
| 
 | ||||
|     public MonteCarloOnPolicyEGreedy(Environment<A> environment, DiscreteActionSpace<A> actionSpace) { | ||||
|  | @ -22,15 +39,17 @@ public class MonteCarloOnPolicyEGreedy<A extends Enum> extends Learning<A> { | |||
|         Map<Pair<State, A>, Double> returnSum = new HashMap<>(); | ||||
|         Map<Pair<State, A>, Integer> returnCount = new HashMap<>(); | ||||
| 
 | ||||
|         State startingState = environment.reset(); | ||||
|         for(int i = 0; i < nrOfEpisodes; ++i) { | ||||
| 
 | ||||
|             List<StepResult<A>> episode = new ArrayList<>(); | ||||
|             State state = environment.reset(); | ||||
|             for(int j=0; j < 100; ++j){ | ||||
|             double rewardSum = 0; | ||||
|             for(int j=0; j < 10; ++j){ | ||||
|                 Map<A, Double> actionValues = stateActionTable.getActionValues(state); | ||||
|                 A chosenAction = policy.chooseAction(actionValues); | ||||
|                 StepResultEnvironment envResult = environment.step(chosenAction); | ||||
|                 State nextState = envResult.getState(); | ||||
|                 rewardSum +=  envResult.getReward(); | ||||
|                 episode.add(new StepResult<>(state, chosenAction, envResult.getReward())); | ||||
| 
 | ||||
|                 if(envResult.isDone()) break; | ||||
|  | @ -38,23 +57,25 @@ public class MonteCarloOnPolicyEGreedy<A extends Enum> extends Learning<A> { | |||
|                 state = nextState; | ||||
| 
 | ||||
|                 try { | ||||
|                     Thread.sleep(10); | ||||
|                     Thread.sleep(1); | ||||
|                 } catch (InterruptedException e) { | ||||
|                     e.printStackTrace(); | ||||
|                 } | ||||
|             } | ||||
| 
 | ||||
|             System.out.printf("Episode %d \t Reward: %f \n", i, rewardSum); | ||||
|             Set<Pair<State, A>> stateActionPairs = new HashSet<>(); | ||||
| 
 | ||||
|             for(StepResult<A> sr: episode){ | ||||
|                 stateActionPairs.add(new Pair<>(sr.getState(), sr.getAction())); | ||||
|             } | ||||
| 
 | ||||
|             System.out.println("stateActionPairs " + stateActionPairs.size()); | ||||
|             for(Pair<State, A> stateActionPair: stateActionPairs){ | ||||
|                 int firstOccurenceIndex = 0; | ||||
|                 // find first occurance of state action pair | ||||
|                 for(StepResult<A> sr: episode){ | ||||
|                     if(stateActionPair.getKey().equals(sr.getState()) && stateActionPair.getValue().equals(sr.getAction())){ | ||||
| ; | ||||
|                         break; | ||||
|                     } | ||||
|                     firstOccurenceIndex++; | ||||
|  |  | |||
|  | @ -5,6 +5,7 @@ import core.RNG; | |||
| import java.util.ArrayList; | ||||
| import java.util.List; | ||||
| import java.util.Map; | ||||
| import java.util.Random; | ||||
| 
 | ||||
| public class GreedyPolicy<A extends Enum> implements Policy<A> { | ||||
| 
 | ||||
|  | @ -17,7 +18,7 @@ public class GreedyPolicy<A extends Enum> implements Policy<A> { | |||
|         List<A> equalHigh = new ArrayList<>(); | ||||
| 
 | ||||
|         for(Map.Entry<A, Double> actionValue : actionValues.entrySet()){ | ||||
|             System.out.println(actionValue.getKey()+ " " + actionValue.getValue() ); | ||||
|            // System.out.println(actionValue.getKey() + " " + actionValue.getValue()); | ||||
|             if(highestValueAction == null || highestValueAction < actionValue.getValue()){ | ||||
|                 highestValueAction = actionValue.getValue(); | ||||
|                 equalHigh.clear(); | ||||
|  | @ -27,6 +28,6 @@ public class GreedyPolicy<A extends Enum> implements Policy<A> { | |||
|             } | ||||
|         } | ||||
| 
 | ||||
|         return equalHigh.get(RNG.getRandom().nextInt(equalHigh.size())); | ||||
|         return equalHigh.get(new Random().nextInt(equalHigh.size())); | ||||
|     } | ||||
| } | ||||
|  |  | |||
|  | @ -2,7 +2,7 @@ package evironment.antGame; | |||
| 
 | ||||
| import core.*; | ||||
| import core.algo.Learning; | ||||
| import core.algo.MC.MonteCarloOnPolicyEGreedy; | ||||
| import core.algo.mc.MonteCarloOnPolicyEGreedy; | ||||
| import evironment.antGame.gui.MainFrame; | ||||
| 
 | ||||
| 
 | ||||
|  | @ -113,6 +113,7 @@ public class AntWorld implements Environment<AntAction>{ | |||
|                     // than the starting point | ||||
|                     if(currentCell.getType() != CellType.START){ | ||||
|                         reward = Reward.FOOD_DROP_DOWN_FAIL_NOT_START; | ||||
|                         done = true; | ||||
|                     }else{ | ||||
|                         reward = Reward.FOOD_DROP_DOWN_SUCCESS; | ||||
|                         myAnt.setPoints(myAnt.getPoints() + 1); | ||||
|  | @ -156,10 +157,14 @@ public class AntWorld implements Environment<AntAction>{ | |||
|             done = grid.isAllFoodCollected(); | ||||
|         } | ||||
| 
 | ||||
|         if(!done){ | ||||
|             reward = -1; | ||||
|         } | ||||
|         if(++tick == maxEpisodeTicks){ | ||||
|             done = true; | ||||
|         } | ||||
| 
 | ||||
| 
 | ||||
|         StepResultEnvironment result = new StepResultEnvironment(newState, reward, done, info); | ||||
|         getGui().update(action, result); | ||||
|         return result; | ||||
|  | @ -211,6 +216,6 @@ public class AntWorld implements Environment<AntAction>{ | |||
|                 new AntWorld(3, 3, 0.1), | ||||
|                 new ListDiscreteActionSpace<>(AntAction.values()) | ||||
|         ); | ||||
|         monteCarlo.learn(100,5); | ||||
|         monteCarlo.learn(20000,5); | ||||
|     } | ||||
| } | ||||
|  |  | |||
|  | @ -7,7 +7,7 @@ public class Reward { | |||
| 
 | ||||
|     public static final double FOOD_DROP_DOWN_FAIL_NO_FOOD = 0; | ||||
|     public static final double FOOD_DROP_DOWN_FAIL_NOT_START = 0; | ||||
|     public static final double FOOD_DROP_DOWN_SUCCESS = 1000; | ||||
|     public static final double FOOD_DROP_DOWN_SUCCESS = 1; | ||||
| 
 | ||||
|     public static final double UNKNOWN_FIELD_EXPLORED = 0; | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue