diff --git a/.idea/misc.xml b/.idea/misc.xml
index bc8d0a3..a59d74b 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,7 +1,14 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
   <component name="ExternalStorageConfigurationManager" enabled="true" />
-  <component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" project-jdk-name="1.8" project-jdk-type="JavaSDK">
+  <component name="MavenProjectsManager">
+    <option name="originalFiles">
+      <list>
+        <option value="$PROJECT_DIR$/pom.xml" />
+      </list>
+    </option>
+  </component>
+  <component name="ProjectRootManager" version="2" languageLevel="JDK_11" default="false" project-jdk-name="11" project-jdk-type="JavaSDK">
     <output url="file://$PROJECT_DIR$/out" />
   </component>
 </project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
deleted file mode 100644
index 4acc2aa..0000000
--- a/.idea/modules.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectModuleManager">
-    <modules>
-      <module fileurl="file://$PROJECT_DIR$/.idea/refo.iml" filepath="$PROJECT_DIR$/.idea/refo.iml" />
-    </modules>
-  </component>
-</project>
\ No newline at end of file
diff --git a/.idea/refo.iml b/.idea/refo.iml
index 9ec6aa6..e422b6d 100644
--- a/.idea/refo.iml
+++ b/.idea/refo.iml
@@ -1,18 +1,2 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<module external.linked.project.id="refo" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$" external.system.id="GRADLE" external.system.module.group="net.lwenstrom.jan" external.system.module.version="1.0-SNAPSHOT" version="4">
-  <component name="NewModuleRootManager">
-    <output url="file://$MODULE_DIR$/build/classes/java/main" />
-    <output-test url="file://$MODULE_DIR$/build/classes/java/test" />
-    <exclude-output />
-    <content url="file://$MODULE_DIR$">
-      <sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
-      <excludeFolder url="file://$MODULE_DIR$/.gradle" />
-      <excludeFolder url="file://$MODULE_DIR$/build" />
-    </content>
-    <orderEntry type="inheritedJdk" />
-    <orderEntry type="sourceFolder" forTests="false" />
-    <orderEntry type="library" scope="PROVIDED" name="Gradle: org.projectlombok:lombok:1.18.10" level="project" />
-    <orderEntry type="library" scope="TEST" name="Gradle: junit:junit:4.12" level="project" />
-    <orderEntry type="library" scope="TEST" name="Gradle: org.hamcrest:hamcrest-core:1.3" level="project" />
-  </component>
-</module>
\ No newline at end of file
+<module external.linked.project.id="refo" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$" external.system.id="GRADLE" external.system.module.group="net.lwenstrom.jan" external.system.module.version="1.0-SNAPSHOT" version="4" />
\ No newline at end of file
diff --git a/build.gradle b/build.gradle
index d223ea5..a28b89e 100644
--- a/build.gradle
+++ b/build.gradle
@@ -1,11 +1,12 @@
 plugins {
     id 'java'
+    id 'org.openjfx.javafxplugin' version '0.0.8'
 }
 
 group 'net.lwenstrom.jan'
 version '1.0-SNAPSHOT'
 
-sourceCompatibility = 1.8
+sourceCompatibility = 11
 
 repositories {
     mavenCentral()
@@ -16,3 +17,7 @@ dependencies {
     compileOnly 'org.projectlombok:lombok:1.18.10'
     annotationProcessor 'org.projectlombok:lombok:1.18.10'
 }
+
+javafx {
+    modules = [ 'javafx.controls', 'javafx.fxml' ]
+}
diff --git a/src/main/java/core/algo/MC/MonteCarloOnPolicyEGreedy.java b/src/main/java/core/algo/MC/MonteCarloOnPolicyEGreedy.java
index 55c70ed..54450e8 100644
--- a/src/main/java/core/algo/MC/MonteCarloOnPolicyEGreedy.java
+++ b/src/main/java/core/algo/MC/MonteCarloOnPolicyEGreedy.java
@@ -1,12 +1,29 @@
-package core.algo.MC;
+package core.algo.mc;
 
 import core.*;
 import core.algo.Learning;
 import core.policy.EpsilonGreedyPolicy;
 import javafx.util.Pair;
-
 import java.util.*;
 
+/**
+ * TODO: Major problem:
+ * StateActionPairs are only unique accounting for their position in the episode.
+ * For example:
+ *
+ * startingState -> MOVE_LEFT : very first state action in the episode i = 1
+ * image the agent does not collect the food and drops it to the start, the agent will receive
+ * -1 for every timestamp hence (startingState -> MOVE_LEFT) will get a value of -10;
+ *
+ * BUT image moving left from the starting position will have no impact on the state because
+ * the agent ran into a wall. The known world stays the same.
+ * Taking an action after that will have the exact same state but a different action
+ * making the value of this stateActionPair -9 because the stateAction pair took place on the second
+ * timestamp, summing up all remaining rewards will be -9...
+ *
+ * How to encounter this problem?
+ * @param <A>
+ */
 public class MonteCarloOnPolicyEGreedy<A extends Enum> extends Learning<A> {
 
     public MonteCarloOnPolicyEGreedy(Environment<A> environment, DiscreteActionSpace<A> actionSpace) {
@@ -22,15 +39,17 @@ public class MonteCarloOnPolicyEGreedy<A extends Enum> extends Learning<A> {
         Map<Pair<State, A>, Double> returnSum = new HashMap<>();
         Map<Pair<State, A>, Integer> returnCount = new HashMap<>();
 
+        State startingState = environment.reset();
         for(int i = 0; i < nrOfEpisodes; ++i) {
-
             List<StepResult<A>> episode = new ArrayList<>();
             State state = environment.reset();
-            for(int j=0; j < 100; ++j){
+            double rewardSum = 0;
+            for(int j=0; j < 10; ++j){
                 Map<A, Double> actionValues = stateActionTable.getActionValues(state);
                 A chosenAction = policy.chooseAction(actionValues);
                 StepResultEnvironment envResult = environment.step(chosenAction);
                 State nextState = envResult.getState();
+                rewardSum +=  envResult.getReward();
                 episode.add(new StepResult<>(state, chosenAction, envResult.getReward()));
 
                 if(envResult.isDone()) break;
@@ -38,23 +57,25 @@ public class MonteCarloOnPolicyEGreedy<A extends Enum> extends Learning<A> {
                 state = nextState;
 
                 try {
-                    Thread.sleep(10);
+                    Thread.sleep(1);
                 } catch (InterruptedException e) {
                     e.printStackTrace();
                 }
             }
 
+            System.out.printf("Episode %d \t Reward: %f \n", i, rewardSum);
             Set<Pair<State, A>> stateActionPairs = new HashSet<>();
 
             for(StepResult<A> sr: episode){
                 stateActionPairs.add(new Pair<>(sr.getState(), sr.getAction()));
             }
-
+            System.out.println("stateActionPairs " + stateActionPairs.size());
             for(Pair<State, A> stateActionPair: stateActionPairs){
                 int firstOccurenceIndex = 0;
                 // find first occurance of state action pair
                 for(StepResult<A> sr: episode){
                     if(stateActionPair.getKey().equals(sr.getState()) && stateActionPair.getValue().equals(sr.getAction())){
+;
                         break;
                     }
                     firstOccurenceIndex++;
diff --git a/src/main/java/core/policy/GreedyPolicy.java b/src/main/java/core/policy/GreedyPolicy.java
index 15b06f7..a727db3 100644
--- a/src/main/java/core/policy/GreedyPolicy.java
+++ b/src/main/java/core/policy/GreedyPolicy.java
@@ -5,6 +5,7 @@ import core.RNG;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
+import java.util.Random;
 
 public class GreedyPolicy<A extends Enum> implements Policy<A> {
 
@@ -17,7 +18,7 @@ public class GreedyPolicy<A extends Enum> implements Policy<A> {
         List<A> equalHigh = new ArrayList<>();
 
         for(Map.Entry<A, Double> actionValue : actionValues.entrySet()){
-            System.out.println(actionValue.getKey()+ " " + actionValue.getValue() );
+           // System.out.println(actionValue.getKey() + " " + actionValue.getValue());
             if(highestValueAction == null || highestValueAction < actionValue.getValue()){
                 highestValueAction = actionValue.getValue();
                 equalHigh.clear();
@@ -27,6 +28,6 @@ public class GreedyPolicy<A extends Enum> implements Policy<A> {
             }
         }
 
-        return equalHigh.get(RNG.getRandom().nextInt(equalHigh.size()));
+        return equalHigh.get(new Random().nextInt(equalHigh.size()));
     }
 }
diff --git a/src/main/java/evironment/antGame/AntWorld.java b/src/main/java/evironment/antGame/AntWorld.java
index ff48eb4..1d656e6 100644
--- a/src/main/java/evironment/antGame/AntWorld.java
+++ b/src/main/java/evironment/antGame/AntWorld.java
@@ -2,7 +2,7 @@ package evironment.antGame;
 
 import core.*;
 import core.algo.Learning;
-import core.algo.MC.MonteCarloOnPolicyEGreedy;
+import core.algo.mc.MonteCarloOnPolicyEGreedy;
 import evironment.antGame.gui.MainFrame;
 
 
@@ -113,6 +113,7 @@ public class AntWorld implements Environment<AntAction>{
                     // than the starting point
                     if(currentCell.getType() != CellType.START){
                         reward = Reward.FOOD_DROP_DOWN_FAIL_NOT_START;
+                        done = true;
                     }else{
                         reward = Reward.FOOD_DROP_DOWN_SUCCESS;
                         myAnt.setPoints(myAnt.getPoints() + 1);
@@ -156,10 +157,14 @@ public class AntWorld implements Environment<AntAction>{
             done = grid.isAllFoodCollected();
         }
 
+        if(!done){
+            reward = -1;
+        }
         if(++tick == maxEpisodeTicks){
             done = true;
         }
 
+
         StepResultEnvironment result = new StepResultEnvironment(newState, reward, done, info);
         getGui().update(action, result);
         return result;
@@ -211,6 +216,6 @@ public class AntWorld implements Environment<AntAction>{
                 new AntWorld(3, 3, 0.1),
                 new ListDiscreteActionSpace<>(AntAction.values())
         );
-        monteCarlo.learn(100,5);
+        monteCarlo.learn(20000,5);
     }
 }
diff --git a/src/main/java/evironment/antGame/Reward.java b/src/main/java/evironment/antGame/Reward.java
index 855c6ff..9a6926f 100644
--- a/src/main/java/evironment/antGame/Reward.java
+++ b/src/main/java/evironment/antGame/Reward.java
@@ -7,7 +7,7 @@ public class Reward {
 
     public static final double FOOD_DROP_DOWN_FAIL_NO_FOOD = 0;
     public static final double FOOD_DROP_DOWN_FAIL_NOT_START = 0;
-    public static final double FOOD_DROP_DOWN_SUCCESS = 1000;
+    public static final double FOOD_DROP_DOWN_SUCCESS = 1;
 
     public static final double UNKNOWN_FIELD_EXPLORED = 0;