diff --git a/src/main/java/core/Action.java b/src/main/java/core/Action.java deleted file mode 100644 index f1630e1..0000000 --- a/src/main/java/core/Action.java +++ /dev/null @@ -1,4 +0,0 @@ -package core; - -public interface Action { -} diff --git a/src/main/java/core/DeterministicStateActionTable.java b/src/main/java/core/DeterministicStateActionTable.java index e04c520..fbc44ca 100644 --- a/src/main/java/core/DeterministicStateActionTable.java +++ b/src/main/java/core/DeterministicStateActionTable.java @@ -17,13 +17,17 @@ public class DeterministicStateActionTable implements StateActio this.discreteActionSpace = discreteActionSpace; } - /* - If the state is not present in the table at the time of - calling this method the DEFAULT_VALUE gets returned BUT - no the missing state is not inserted into the table! - - Inserting of missing states is ONLY done in "setValue()" - method. + /** + * If the state is not present in the table at the time of + * calling this method the DEFAULT_VALUE gets returned BUT + * no the missing state is not inserted into the table! + * + * Inserting of missing states is ONLY done in "setValue()" + * method. + * + * @param state given state + * @param action given action + * @return estimate value of state-action pair */ @Override public double getValue(State state, A action) { @@ -34,11 +38,15 @@ public class DeterministicStateActionTable implements StateActio return DEFAULT_VALUE; } - /* - Update the value of an action for a specific state. - If the state is not present in the table yet, - it will get stored in combination with every action - from the action space initialized with the default value. + /** + * Update the value of an action for a specific state. + * If the state is not present in the table yet, + * it will get stored in combination with every action + * from the action space initialized with the default value. + * + * @param state given state + * @param action given action + * @param value new estimate of the state-action pair */ @Override public void setValue(State state, A action, double value) { @@ -52,6 +60,10 @@ public class DeterministicStateActionTable implements StateActio actionValues.put(action, value); } + /** + * @param state given state + * @return all available action in given state and their corresponding estimated values + */ @Override public Map getActionValues(State state) { if(table.get(state) == null){ @@ -60,6 +72,9 @@ public class DeterministicStateActionTable implements StateActio return table.get(state); } + /** + * @return Map with initial values for every available action + */ private Map createDefaultActionValues(){ final Map defaultActionValues = new LinkedHashMap<>(); for(A action: discreteActionSpace){ diff --git a/src/main/java/core/DiscreteActionSpace.java b/src/main/java/core/DiscreteActionSpace.java index 91683f9..316d55c 100644 --- a/src/main/java/core/DiscreteActionSpace.java +++ b/src/main/java/core/DiscreteActionSpace.java @@ -1,5 +1,14 @@ package core; +/** + * Collection of all available actions for a specific + * environment. It defines the actions the agent is able + * to choose from at every timestamp T. + * Extending from Iterable for easy + * "unmutable" iteration of the action space. + * + * @param Actions as defined in an based class + */ public interface DiscreteActionSpace extends Iterable { int getNumberOfActions(); void addAction(A a); diff --git a/src/main/java/core/Environment.java b/src/main/java/core/Environment.java index e3fc91f..bd87d93 100644 --- a/src/main/java/core/Environment.java +++ b/src/main/java/core/Environment.java @@ -1,5 +1,17 @@ package core; +/** + * Interface of the environment as defined in the RL framework. + * An agent is able to interact with its environment, submitting + * an action (.step()) every timestamp T and receiving a reward and a new + * observation. + * StepResultEnvironment is the datatype to combine all the + * receiving information. + * After each episode the environment is reset to its original state, + * returning the starting state. + * + * @param related which defines the available action for this environment + */ public interface Environment { StepResultEnvironment step(A action); State reset(); diff --git a/src/main/java/core/ListDiscreteActionSpace.java b/src/main/java/core/ListDiscreteActionSpace.java index b456751..5c73a65 100644 --- a/src/main/java/core/ListDiscreteActionSpace.java +++ b/src/main/java/core/ListDiscreteActionSpace.java @@ -3,6 +3,13 @@ package core; import java.io.Serializable; import java.util.*; +/** + * Implementation of a discrete action space. + * "Discrete" because actions are exclusively defined as Enum, + * realized as generic "A". + * + * @param Enum class of actions in the specific environment + */ public class ListDiscreteActionSpace implements DiscreteActionSpace, Serializable{ private static final long serialVersionUID = 1L; private List actions; diff --git a/src/main/java/core/Observation.java b/src/main/java/core/Observation.java deleted file mode 100644 index 3dede06..0000000 --- a/src/main/java/core/Observation.java +++ /dev/null @@ -1,4 +0,0 @@ -package core; - -public interface Observation { -} diff --git a/src/main/java/core/RNG.java b/src/main/java/core/RNG.java index daf03b4..1bc01e3 100644 --- a/src/main/java/core/RNG.java +++ b/src/main/java/core/RNG.java @@ -3,6 +3,15 @@ package core; import java.security.SecureRandom; import java.util.Random; +/** + * To ensure deterministic behaviour of repeating program executions, + * this class is used for all random number generation methods. + * Do not use Math.random()! + * It is not necessary to set a seed explicit, because a default one + * "123" is defined. Nonetheless a set-method is exposed which should + * ONLY be called in the very beginning of the program. (Do not reseed while + * execution) + */ public class RNG { private static SecureRandom rng; private static int seed = 123; diff --git a/src/main/java/core/SaveState.java b/src/main/java/core/SaveState.java index 8f0b976..66f7b31 100644 --- a/src/main/java/core/SaveState.java +++ b/src/main/java/core/SaveState.java @@ -5,6 +5,15 @@ import lombok.Getter; import java.io.Serializable; +/** + * Small datatype to combine needed information to save and recover + * the learning progress. Essentially, only the Q-Table needs to be saved + * for all tabular methods because they all try to estimate + * the action values until convergence. + * For episodic method the number of episodes so far is also saved. + * + * @param enum class of action for a specific environment + */ @AllArgsConstructor @Getter public class SaveState implements Serializable { diff --git a/src/main/java/core/State.java b/src/main/java/core/State.java index 5acb4f0..0b8087e 100644 --- a/src/main/java/core/State.java +++ b/src/main/java/core/State.java @@ -1,5 +1,8 @@ package core; +/** + * + */ public interface State { String toString(); int hashCode(); diff --git a/src/main/java/core/StateActionTable.java b/src/main/java/core/StateActionTable.java index 3f863ba..306f7d1 100644 --- a/src/main/java/core/StateActionTable.java +++ b/src/main/java/core/StateActionTable.java @@ -2,6 +2,12 @@ package core; import java.util.Map; +/** + * Q-Table which saves all seen states, all available actions for each state + * and their value (state-action values/ action values). + * + * @param + */ public interface StateActionTable { double DEFAULT_VALUE = 0.0; diff --git a/src/main/java/core/StepResult.java b/src/main/java/core/StepResult.java index 7de2756..5410772 100644 --- a/src/main/java/core/StepResult.java +++ b/src/main/java/core/StepResult.java @@ -3,6 +3,14 @@ package core; import lombok.AllArgsConstructor; import lombok.Getter; +/** + * Almost the same as the datatype StepResultEnvironment + * but includes the last action taken as well. + * The environment does not return the last action taken in its result + * StepResultEnvironment but is needed for the prediction problem. + * + * @param Enum class of last action taken + */ @AllArgsConstructor @Getter public class StepResult { diff --git a/src/main/java/core/StepResultEnvironment.java b/src/main/java/core/StepResultEnvironment.java index b1d1c06..b420a97 100644 --- a/src/main/java/core/StepResultEnvironment.java +++ b/src/main/java/core/StepResultEnvironment.java @@ -4,6 +4,13 @@ import lombok.AllArgsConstructor; import lombok.Getter; import lombok.Setter; +/** + * After each timestamp the environment returns a reward + * for the previous action (still the same timestamp t), the resulting + * observation/state (environment is in charge to process the observation + * and build a markov state) and the information whether or not the episode + * has ended. + */ @Getter @Setter @AllArgsConstructor diff --git a/src/main/java/core/Util.java b/src/main/java/core/Util.java index 3c1324f..8d37854 100644 --- a/src/main/java/core/Util.java +++ b/src/main/java/core/Util.java @@ -1,6 +1,15 @@ package core; + public class Util { + /** + * Checks whether or not the given string input is a number or not. + * Used for example to check the input field to trigger additional + * episodes. + * + * @param strNum string input from user + * @return true if numeric and parsable, false if not + */ public static boolean isNumeric(String strNum) { if (strNum == null) { return false; diff --git a/src/main/java/core/algo/Learning.java b/src/main/java/core/algo/Learning.java index 9583696..8c589d2 100644 --- a/src/main/java/core/algo/Learning.java +++ b/src/main/java/core/algo/Learning.java @@ -14,6 +14,10 @@ import java.util.List; import java.util.Set; import java.util.concurrent.CopyOnWriteArrayList; +/** + * + * @param discrete action type for a specific environment + */ @Getter public abstract class Learning{ protected Policy policy; diff --git a/src/main/java/core/algo/Method.java b/src/main/java/core/algo/Method.java index b2da8cb..3ac50cc 100644 --- a/src/main/java/core/algo/Method.java +++ b/src/main/java/core/algo/Method.java @@ -1,5 +1,9 @@ package core.algo; +/** + * Instead of reflections this enum is used to determine + * which RL-algorithm should be used. + */ public enum Method { MC_ONPOLICY_EGREEDY, TD_ONPOLICY } diff --git a/src/main/java/core/gui/LearningView.java b/src/main/java/core/gui/LearningView.java index 6a4ceaa..92e3d1f 100644 --- a/src/main/java/core/gui/LearningView.java +++ b/src/main/java/core/gui/LearningView.java @@ -2,6 +2,9 @@ package core.gui; import java.util.List; +/** + * Switched out Views have to implement this interface. + */ public interface LearningView { void repaintEnvironment(); void updateLearningInfoPanel(); diff --git a/src/main/java/core/gui/View.java b/src/main/java/core/gui/View.java index 5ad77db..af63a8f 100644 --- a/src/main/java/core/gui/View.java +++ b/src/main/java/core/gui/View.java @@ -61,7 +61,7 @@ public class View implements LearningView{ fileMenu.add(new JMenuItem(new AbstractAction("Save") { @Override public void actionPerformed(ActionEvent e) { - String fileName = JOptionPane.showInputDialog("Enter file name", "save"); + String fileName = JOptionPane.showInputDialog("Enter file name", "path/to/file"); if(fileName != null){ viewListener.onSaveState(fileName); } diff --git a/src/main/java/core/gui/Visualizable.java b/src/main/java/core/gui/Visualizable.java index e144a40..73e055d 100644 --- a/src/main/java/core/gui/Visualizable.java +++ b/src/main/java/core/gui/Visualizable.java @@ -2,6 +2,13 @@ package core.gui; import javax.swing.*; +/** + * Classes that implement this interface are able to create a visual component + * that can be utilized and displayed by the view. It is optional for an environment + * to defined a GUI (View is checking it via "instance of"). + * Furthermore a state implement this interface, so it can be displayed from the + * state-action-table. + */ public interface Visualizable { JComponent visualize(); } diff --git a/src/main/java/core/listener/LearningListener.java b/src/main/java/core/listener/LearningListener.java index 2891d16..d04d4cb 100644 --- a/src/main/java/core/listener/LearningListener.java +++ b/src/main/java/core/listener/LearningListener.java @@ -2,6 +2,9 @@ package core.listener; import java.util.List; +/** + * Methods that gets triggered to inform about the current learning process. + */ public interface LearningListener{ void onLearningStart(); void onLearningEnd(); diff --git a/src/main/java/core/listener/ViewListener.java b/src/main/java/core/listener/ViewListener.java index abd0004..7651abe 100644 --- a/src/main/java/core/listener/ViewListener.java +++ b/src/main/java/core/listener/ViewListener.java @@ -1,5 +1,10 @@ package core.listener; +/** + * Interface the controller is implementing and gets passed to + * the View. (Preventing the controller from adding all + * ActionListeners to view elements) + */ public interface ViewListener { void onEpsilonChange(float epsilon); void onDelayChange(int delay); diff --git a/src/main/java/core/policy/EpsilonPolicy.java b/src/main/java/core/policy/EpsilonPolicy.java index 76bff45..7c34006 100644 --- a/src/main/java/core/policy/EpsilonPolicy.java +++ b/src/main/java/core/policy/EpsilonPolicy.java @@ -1,5 +1,11 @@ package core.policy; +/** + * Chooses the action with the highest values with possibility: 1-Ɛ + Ɛ/|A| + * With possibility of Ɛ, a random action is taken (highest values option included). + * + * @param Enum class of available action in specific environment + */ public interface EpsilonPolicy extends Policy { float getEpsilon(); void setEpsilon(float epsilon); diff --git a/src/main/java/core/policy/GreedyPolicy.java b/src/main/java/core/policy/GreedyPolicy.java index 30d901f..210f766 100644 --- a/src/main/java/core/policy/GreedyPolicy.java +++ b/src/main/java/core/policy/GreedyPolicy.java @@ -6,6 +6,12 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +/** + * Always chooses the action with the highest value + * with ties broken arbitrarily. + * + * @param Enum class of available action in specific environment + */ public class GreedyPolicy implements Policy { @Override diff --git a/src/main/java/core/policy/Policy.java b/src/main/java/core/policy/Policy.java index fcb9d04..85b9676 100644 --- a/src/main/java/core/policy/Policy.java +++ b/src/main/java/core/policy/Policy.java @@ -2,6 +2,12 @@ package core.policy; import java.util.Map; +/** + * Strategy to choose a specific action available in the agent's current + * state. + * + * @param Enum class of available action in specific environment + */ public interface Policy { A chooseAction(Map actionValues); } diff --git a/src/main/java/core/policy/RandomPolicy.java b/src/main/java/core/policy/RandomPolicy.java index ea99dd5..e989fc3 100644 --- a/src/main/java/core/policy/RandomPolicy.java +++ b/src/main/java/core/policy/RandomPolicy.java @@ -4,6 +4,11 @@ import core.RNG; import java.util.Map; +/** + * Chooses an action arbitrarily. + * + * @param Enum class of available action in specific environment + */ public class RandomPolicy implements Policy{ @Override public A chooseAction(Map actionValues) { diff --git a/src/main/java/evironment/antGame/AntObservation.java b/src/main/java/evironment/antGame/AntObservation.java index d497214..53915bc 100644 --- a/src/main/java/evironment/antGame/AntObservation.java +++ b/src/main/java/evironment/antGame/AntObservation.java @@ -1,6 +1,5 @@ package evironment.antGame; -import core.Observation; import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Getter; @@ -12,7 +11,7 @@ import java.awt.*; @AllArgsConstructor @Getter @Setter -public class AntObservation implements Observation { +public class AntObservation { private Cell cell; private Point pos;