diff --git a/src/main/java/core/Action.java b/src/main/java/core/Action.java
deleted file mode 100644
index f1630e1..0000000
--- a/src/main/java/core/Action.java
+++ /dev/null
@@ -1,4 +0,0 @@
-package core;
-
-public interface Action {
-}
diff --git a/src/main/java/core/DeterministicStateActionTable.java b/src/main/java/core/DeterministicStateActionTable.java
index e04c520..fbc44ca 100644
--- a/src/main/java/core/DeterministicStateActionTable.java
+++ b/src/main/java/core/DeterministicStateActionTable.java
@@ -17,13 +17,17 @@ public class DeterministicStateActionTable implements StateActio
this.discreteActionSpace = discreteActionSpace;
}
- /*
- If the state is not present in the table at the time of
- calling this method the DEFAULT_VALUE gets returned BUT
- no the missing state is not inserted into the table!
-
- Inserting of missing states is ONLY done in "setValue()"
- method.
+ /**
+ * If the state is not present in the table at the time of
+ * calling this method the DEFAULT_VALUE gets returned BUT
+ * no the missing state is not inserted into the table!
+ *
+ * Inserting of missing states is ONLY done in "setValue()"
+ * method.
+ *
+ * @param state given state
+ * @param action given action
+ * @return estimate value of state-action pair
*/
@Override
public double getValue(State state, A action) {
@@ -34,11 +38,15 @@ public class DeterministicStateActionTable implements StateActio
return DEFAULT_VALUE;
}
- /*
- Update the value of an action for a specific state.
- If the state is not present in the table yet,
- it will get stored in combination with every action
- from the action space initialized with the default value.
+ /**
+ * Update the value of an action for a specific state.
+ * If the state is not present in the table yet,
+ * it will get stored in combination with every action
+ * from the action space initialized with the default value.
+ *
+ * @param state given state
+ * @param action given action
+ * @param value new estimate of the state-action pair
*/
@Override
public void setValue(State state, A action, double value) {
@@ -52,6 +60,10 @@ public class DeterministicStateActionTable implements StateActio
actionValues.put(action, value);
}
+ /**
+ * @param state given state
+ * @return all available action in given state and their corresponding estimated values
+ */
@Override
public Map getActionValues(State state) {
if(table.get(state) == null){
@@ -60,6 +72,9 @@ public class DeterministicStateActionTable implements StateActio
return table.get(state);
}
+ /**
+ * @return Map with initial values for every available action
+ */
private Map createDefaultActionValues(){
final Map defaultActionValues = new LinkedHashMap<>();
for(A action: discreteActionSpace){
diff --git a/src/main/java/core/DiscreteActionSpace.java b/src/main/java/core/DiscreteActionSpace.java
index 91683f9..316d55c 100644
--- a/src/main/java/core/DiscreteActionSpace.java
+++ b/src/main/java/core/DiscreteActionSpace.java
@@ -1,5 +1,14 @@
package core;
+/**
+ * Collection of all available actions for a specific
+ * environment. It defines the actions the agent is able
+ * to choose from at every timestamp T.
+ * Extending from Iterable for easy
+ * "unmutable" iteration of the action space.
+ *
+ * @param Actions as defined in an based class
+ */
public interface DiscreteActionSpace extends Iterable {
int getNumberOfActions();
void addAction(A a);
diff --git a/src/main/java/core/Environment.java b/src/main/java/core/Environment.java
index e3fc91f..bd87d93 100644
--- a/src/main/java/core/Environment.java
+++ b/src/main/java/core/Environment.java
@@ -1,5 +1,17 @@
package core;
+/**
+ * Interface of the environment as defined in the RL framework.
+ * An agent is able to interact with its environment, submitting
+ * an action (.step()) every timestamp T and receiving a reward and a new
+ * observation.
+ * StepResultEnvironment is the datatype to combine all the
+ * receiving information.
+ * After each episode the environment is reset to its original state,
+ * returning the starting state.
+ *
+ * @param related which defines the available action for this environment
+ */
public interface Environment {
StepResultEnvironment step(A action);
State reset();
diff --git a/src/main/java/core/ListDiscreteActionSpace.java b/src/main/java/core/ListDiscreteActionSpace.java
index b456751..5c73a65 100644
--- a/src/main/java/core/ListDiscreteActionSpace.java
+++ b/src/main/java/core/ListDiscreteActionSpace.java
@@ -3,6 +3,13 @@ package core;
import java.io.Serializable;
import java.util.*;
+/**
+ * Implementation of a discrete action space.
+ * "Discrete" because actions are exclusively defined as Enum,
+ * realized as generic "A".
+ *
+ * @param Enum class of actions in the specific environment
+ */
public class ListDiscreteActionSpace implements DiscreteActionSpace, Serializable{
private static final long serialVersionUID = 1L;
private List actions;
diff --git a/src/main/java/core/Observation.java b/src/main/java/core/Observation.java
deleted file mode 100644
index 3dede06..0000000
--- a/src/main/java/core/Observation.java
+++ /dev/null
@@ -1,4 +0,0 @@
-package core;
-
-public interface Observation {
-}
diff --git a/src/main/java/core/RNG.java b/src/main/java/core/RNG.java
index daf03b4..1bc01e3 100644
--- a/src/main/java/core/RNG.java
+++ b/src/main/java/core/RNG.java
@@ -3,6 +3,15 @@ package core;
import java.security.SecureRandom;
import java.util.Random;
+/**
+ * To ensure deterministic behaviour of repeating program executions,
+ * this class is used for all random number generation methods.
+ * Do not use Math.random()!
+ * It is not necessary to set a seed explicit, because a default one
+ * "123" is defined. Nonetheless a set-method is exposed which should
+ * ONLY be called in the very beginning of the program. (Do not reseed while
+ * execution)
+ */
public class RNG {
private static SecureRandom rng;
private static int seed = 123;
diff --git a/src/main/java/core/SaveState.java b/src/main/java/core/SaveState.java
index 8f0b976..66f7b31 100644
--- a/src/main/java/core/SaveState.java
+++ b/src/main/java/core/SaveState.java
@@ -5,6 +5,15 @@ import lombok.Getter;
import java.io.Serializable;
+/**
+ * Small datatype to combine needed information to save and recover
+ * the learning progress. Essentially, only the Q-Table needs to be saved
+ * for all tabular methods because they all try to estimate
+ * the action values until convergence.
+ * For episodic method the number of episodes so far is also saved.
+ *
+ * @param enum class of action for a specific environment
+ */
@AllArgsConstructor
@Getter
public class SaveState implements Serializable {
diff --git a/src/main/java/core/State.java b/src/main/java/core/State.java
index 5acb4f0..0b8087e 100644
--- a/src/main/java/core/State.java
+++ b/src/main/java/core/State.java
@@ -1,5 +1,8 @@
package core;
+/**
+ *
+ */
public interface State {
String toString();
int hashCode();
diff --git a/src/main/java/core/StateActionTable.java b/src/main/java/core/StateActionTable.java
index 3f863ba..306f7d1 100644
--- a/src/main/java/core/StateActionTable.java
+++ b/src/main/java/core/StateActionTable.java
@@ -2,6 +2,12 @@ package core;
import java.util.Map;
+/**
+ * Q-Table which saves all seen states, all available actions for each state
+ * and their value (state-action values/ action values).
+ *
+ * @param
+ */
public interface StateActionTable {
double DEFAULT_VALUE = 0.0;
diff --git a/src/main/java/core/StepResult.java b/src/main/java/core/StepResult.java
index 7de2756..5410772 100644
--- a/src/main/java/core/StepResult.java
+++ b/src/main/java/core/StepResult.java
@@ -3,6 +3,14 @@ package core;
import lombok.AllArgsConstructor;
import lombok.Getter;
+/**
+ * Almost the same as the datatype StepResultEnvironment
+ * but includes the last action taken as well.
+ * The environment does not return the last action taken in its result
+ * StepResultEnvironment but is needed for the prediction problem.
+ *
+ * @param Enum class of last action taken
+ */
@AllArgsConstructor
@Getter
public class StepResult {
diff --git a/src/main/java/core/StepResultEnvironment.java b/src/main/java/core/StepResultEnvironment.java
index b1d1c06..b420a97 100644
--- a/src/main/java/core/StepResultEnvironment.java
+++ b/src/main/java/core/StepResultEnvironment.java
@@ -4,6 +4,13 @@ import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.Setter;
+/**
+ * After each timestamp the environment returns a reward
+ * for the previous action (still the same timestamp t), the resulting
+ * observation/state (environment is in charge to process the observation
+ * and build a markov state) and the information whether or not the episode
+ * has ended.
+ */
@Getter
@Setter
@AllArgsConstructor
diff --git a/src/main/java/core/Util.java b/src/main/java/core/Util.java
index 3c1324f..8d37854 100644
--- a/src/main/java/core/Util.java
+++ b/src/main/java/core/Util.java
@@ -1,6 +1,15 @@
package core;
+
public class Util {
+ /**
+ * Checks whether or not the given string input is a number or not.
+ * Used for example to check the input field to trigger additional
+ * episodes.
+ *
+ * @param strNum string input from user
+ * @return true if numeric and parsable, false if not
+ */
public static boolean isNumeric(String strNum) {
if (strNum == null) {
return false;
diff --git a/src/main/java/core/algo/Learning.java b/src/main/java/core/algo/Learning.java
index 9583696..8c589d2 100644
--- a/src/main/java/core/algo/Learning.java
+++ b/src/main/java/core/algo/Learning.java
@@ -14,6 +14,10 @@ import java.util.List;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArrayList;
+/**
+ *
+ * @param discrete action type for a specific environment
+ */
@Getter
public abstract class Learning{
protected Policy policy;
diff --git a/src/main/java/core/algo/Method.java b/src/main/java/core/algo/Method.java
index b2da8cb..3ac50cc 100644
--- a/src/main/java/core/algo/Method.java
+++ b/src/main/java/core/algo/Method.java
@@ -1,5 +1,9 @@
package core.algo;
+/**
+ * Instead of reflections this enum is used to determine
+ * which RL-algorithm should be used.
+ */
public enum Method {
MC_ONPOLICY_EGREEDY, TD_ONPOLICY
}
diff --git a/src/main/java/core/gui/LearningView.java b/src/main/java/core/gui/LearningView.java
index 6a4ceaa..92e3d1f 100644
--- a/src/main/java/core/gui/LearningView.java
+++ b/src/main/java/core/gui/LearningView.java
@@ -2,6 +2,9 @@ package core.gui;
import java.util.List;
+/**
+ * Switched out Views have to implement this interface.
+ */
public interface LearningView {
void repaintEnvironment();
void updateLearningInfoPanel();
diff --git a/src/main/java/core/gui/View.java b/src/main/java/core/gui/View.java
index 5ad77db..af63a8f 100644
--- a/src/main/java/core/gui/View.java
+++ b/src/main/java/core/gui/View.java
@@ -61,7 +61,7 @@ public class View implements LearningView{
fileMenu.add(new JMenuItem(new AbstractAction("Save") {
@Override
public void actionPerformed(ActionEvent e) {
- String fileName = JOptionPane.showInputDialog("Enter file name", "save");
+ String fileName = JOptionPane.showInputDialog("Enter file name", "path/to/file");
if(fileName != null){
viewListener.onSaveState(fileName);
}
diff --git a/src/main/java/core/gui/Visualizable.java b/src/main/java/core/gui/Visualizable.java
index e144a40..73e055d 100644
--- a/src/main/java/core/gui/Visualizable.java
+++ b/src/main/java/core/gui/Visualizable.java
@@ -2,6 +2,13 @@ package core.gui;
import javax.swing.*;
+/**
+ * Classes that implement this interface are able to create a visual component
+ * that can be utilized and displayed by the view. It is optional for an environment
+ * to defined a GUI (View is checking it via "instance of").
+ * Furthermore a state implement this interface, so it can be displayed from the
+ * state-action-table.
+ */
public interface Visualizable {
JComponent visualize();
}
diff --git a/src/main/java/core/listener/LearningListener.java b/src/main/java/core/listener/LearningListener.java
index 2891d16..d04d4cb 100644
--- a/src/main/java/core/listener/LearningListener.java
+++ b/src/main/java/core/listener/LearningListener.java
@@ -2,6 +2,9 @@ package core.listener;
import java.util.List;
+/**
+ * Methods that gets triggered to inform about the current learning process.
+ */
public interface LearningListener{
void onLearningStart();
void onLearningEnd();
diff --git a/src/main/java/core/listener/ViewListener.java b/src/main/java/core/listener/ViewListener.java
index abd0004..7651abe 100644
--- a/src/main/java/core/listener/ViewListener.java
+++ b/src/main/java/core/listener/ViewListener.java
@@ -1,5 +1,10 @@
package core.listener;
+/**
+ * Interface the controller is implementing and gets passed to
+ * the View. (Preventing the controller from adding all
+ * ActionListeners to view elements)
+ */
public interface ViewListener {
void onEpsilonChange(float epsilon);
void onDelayChange(int delay);
diff --git a/src/main/java/core/policy/EpsilonPolicy.java b/src/main/java/core/policy/EpsilonPolicy.java
index 76bff45..7c34006 100644
--- a/src/main/java/core/policy/EpsilonPolicy.java
+++ b/src/main/java/core/policy/EpsilonPolicy.java
@@ -1,5 +1,11 @@
package core.policy;
+/**
+ * Chooses the action with the highest values with possibility: 1-Ɛ + Ɛ/|A|
+ * With possibility of Ɛ, a random action is taken (highest values option included).
+ *
+ * @param Enum class of available action in specific environment
+ */
public interface EpsilonPolicy extends Policy {
float getEpsilon();
void setEpsilon(float epsilon);
diff --git a/src/main/java/core/policy/GreedyPolicy.java b/src/main/java/core/policy/GreedyPolicy.java
index 30d901f..210f766 100644
--- a/src/main/java/core/policy/GreedyPolicy.java
+++ b/src/main/java/core/policy/GreedyPolicy.java
@@ -6,6 +6,12 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Map;
+/**
+ * Always chooses the action with the highest value
+ * with ties broken arbitrarily.
+ *
+ * @param Enum class of available action in specific environment
+ */
public class GreedyPolicy implements Policy {
@Override
diff --git a/src/main/java/core/policy/Policy.java b/src/main/java/core/policy/Policy.java
index fcb9d04..85b9676 100644
--- a/src/main/java/core/policy/Policy.java
+++ b/src/main/java/core/policy/Policy.java
@@ -2,6 +2,12 @@ package core.policy;
import java.util.Map;
+/**
+ * Strategy to choose a specific action available in the agent's current
+ * state.
+ *
+ * @param Enum class of available action in specific environment
+ */
public interface Policy {
A chooseAction(Map actionValues);
}
diff --git a/src/main/java/core/policy/RandomPolicy.java b/src/main/java/core/policy/RandomPolicy.java
index ea99dd5..e989fc3 100644
--- a/src/main/java/core/policy/RandomPolicy.java
+++ b/src/main/java/core/policy/RandomPolicy.java
@@ -4,6 +4,11 @@ import core.RNG;
import java.util.Map;
+/**
+ * Chooses an action arbitrarily.
+ *
+ * @param Enum class of available action in specific environment
+ */
public class RandomPolicy implements Policy{
@Override
public A chooseAction(Map actionValues) {
diff --git a/src/main/java/evironment/antGame/AntObservation.java b/src/main/java/evironment/antGame/AntObservation.java
index d497214..53915bc 100644
--- a/src/main/java/evironment/antGame/AntObservation.java
+++ b/src/main/java/evironment/antGame/AntObservation.java
@@ -1,6 +1,5 @@
package evironment.antGame;
-import core.Observation;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Getter;
@@ -12,7 +11,7 @@ import java.awt.*;
@AllArgsConstructor
@Getter
@Setter
-public class AntObservation implements Observation {
+public class AntObservation {
private Cell cell;
private Point pos;