add javadoc

2019-12-27 00:50:59 +01:00 · 2019-12-27 00:50:59 +01:00 · 64355e0b93
parent b2c3854b3a
commit 64355e0b93
25 changed files with 157 additions and 23 deletions
--- a/src/main/java/core/Action.java
+++ b/src/main/java/core/Action.java
@ -1,4 +0,0 @@
-package core;
-
-public interface Action {
-}
--- a/src/main/java/core/DeterministicStateActionTable.java
+++ b/src/main/java/core/DeterministicStateActionTable.java
@ -17,13 +17,17 @@ public class DeterministicStateActionTable<A extends Enum> implements StateActio
        this.discreteActionSpace = discreteActionSpace;
    }

-    /*
-       If the state is not present in the table at the time of
-       calling this method the DEFAULT_VALUE gets returned BUT
-       no the missing state is not inserted into the table!
-
-       Inserting of missing states is ONLY done in "setValue()"
-       method.
+    /**
+     * If the state is not present in the table at the time of
+     * calling this method the DEFAULT_VALUE gets returned BUT
+     * no the missing state is not inserted into the table!
+     *
+     * Inserting of missing states is ONLY done in "setValue()"
+     * method.
+     *
+     * @param state given state
+     * @param action given action
+     * @return estimate value of state-action pair
     */
    @Override
    public double getValue(State state, A action) {
@ -34,11 +38,15 @@ public class DeterministicStateActionTable<A extends Enum> implements StateActio
        return DEFAULT_VALUE;
    }

-    /*
-       Update the value of an action for a specific state.
-       If the state is not present in the table yet,
-       it will get stored in combination with every action
-       from the action space initialized with the default value.
+    /**
+     * Update the value of an action for a specific state.
+     * If the state is not present in the table yet,
+     * it will get stored in combination with every action
+     * from the action space initialized with the default value.
+     *
+     * @param state given state
+     * @param action given action
+     * @param value new estimate of the state-action pair
     */
    @Override
    public void setValue(State state, A action, double value) {
@ -52,6 +60,10 @@ public class DeterministicStateActionTable<A extends Enum> implements StateActio
        actionValues.put(action, value);
    }

+    /**
+     * @param state given state
+     * @return all available action in given state and their corresponding estimated values
+     */
    @Override
    public Map<A, Double> getActionValues(State state) {
        if(table.get(state) == null){
@ -60,6 +72,9 @@ public class DeterministicStateActionTable<A extends Enum> implements StateActio
        return table.get(state);
    }

+    /**
+     * @return Map with initial values for every available action
+     */
    private Map<A, Double> createDefaultActionValues(){
        final Map<A, Double> defaultActionValues = new LinkedHashMap<>();
        for(A action: discreteActionSpace){
--- a/src/main/java/core/DiscreteActionSpace.java
+++ b/src/main/java/core/DiscreteActionSpace.java
@ -1,5 +1,14 @@
 package core;

+/**
+ * Collection of all available actions for a specific
+ * environment. It defines the actions the agent is able
+ * to choose from at every timestamp T.
+ * Extending from <interface>Iterable</interface> for easy
+ * "unmutable" iteration of the action space.
+ *
+ * @param <A> Actions as defined in an <Enum> based class
+ */
 public interface DiscreteActionSpace<A extends Enum> extends Iterable<A> {
    int getNumberOfActions();
    void addAction(A a);
--- a/src/main/java/core/Environment.java
+++ b/src/main/java/core/Environment.java
@ -1,5 +1,17 @@
 package core;

+/**
+ * Interface of the environment as defined in the RL framework.
+ * An agent is able to interact with its environment, submitting
+ * an action (.step()) every timestamp T and receiving a reward and a new
+ * observation.
+ * <class>StepResultEnvironment</class> is the datatype to combine all the
+ * receiving information.
+ * After each episode the environment is reset to its original state,
+ * returning the starting state.
+ *
+ * @param <A> related <Enum> which defines the available action for this environment
+ */
 public interface Environment<A extends Enum> {
    StepResultEnvironment step(A action);
    State reset();
--- a/src/main/java/core/ListDiscreteActionSpace.java
+++ b/src/main/java/core/ListDiscreteActionSpace.java
@ -3,6 +3,13 @@ package core;
 import java.io.Serializable;
 import java.util.*;

+/**
+ * Implementation of a discrete action space.
+ * "Discrete" because actions are exclusively defined as Enum,
+ * realized as generic "A".
+ *
+ * @param <A> Enum class of actions in the specific environment
+ */
 public class ListDiscreteActionSpace<A extends Enum> implements DiscreteActionSpace<A>, Serializable{
    private static final long serialVersionUID = 1L;
    private List<A> actions;
--- a/src/main/java/core/Observation.java
+++ b/src/main/java/core/Observation.java
@ -1,4 +0,0 @@
-package core;
-
-public interface Observation {
-}
--- a/src/main/java/core/RNG.java
+++ b/src/main/java/core/RNG.java
@ -3,6 +3,15 @@ package core;
 import java.security.SecureRandom;
 import java.util.Random;

+/**
+ * To ensure deterministic behaviour of repeating program executions,
+ * this class is used for all random number generation methods.
+ * Do not use Math.random()!
+ * It is not necessary to set a seed explicit, because a default one
+ * "123" is defined. Nonetheless a set-method is exposed which should
+ * ONLY be called in the very beginning of the program. (Do not reseed while
+ * execution)
+ */
 public class RNG {
    private static SecureRandom rng;
    private static int seed = 123;
--- a/src/main/java/core/SaveState.java
+++ b/src/main/java/core/SaveState.java
@ -5,6 +5,15 @@ import lombok.Getter;

 import java.io.Serializable;

+/**
+ * Small datatype to combine needed information to save and recover
+ * the learning progress. Essentially, only the Q-Table needs to be saved
+ * for all tabular methods because they all try to estimate
+ * the action values until convergence.
+ * For episodic method the number of episodes so far is also saved.
+ *
+ * @param <A> enum class of action for a specific environment
+ */
@AllArgsConstructor
@Getter
 public class SaveState<A extends Enum> implements Serializable {
--- a/src/main/java/core/State.java
+++ b/src/main/java/core/State.java
@ -1,5 +1,8 @@
 package core;

+/**
+ *
+ */
 public interface State {
    String toString();
    int hashCode();
--- a/src/main/java/core/StateActionTable.java
+++ b/src/main/java/core/StateActionTable.java
@ -2,6 +2,12 @@ package core;

 import java.util.Map;

+/**
+ * Q-Table which saves all seen states, all available actions for each state
+ * and their value (state-action values/ action values).
+ *
+ * @param <A>
+ */
 public interface StateActionTable<A extends Enum> {
    double DEFAULT_VALUE = 0.0;

--- a/src/main/java/core/StepResult.java
+++ b/src/main/java/core/StepResult.java
@ -3,6 +3,14 @@ package core;
 import lombok.AllArgsConstructor;
 import lombok.Getter;

+/**
+ * Almost the same as the datatype <class>StepResultEnvironment</class>
+ * but includes the last action taken as well.
+ * The environment does not return the last action taken in its result
+ * <class>StepResultEnvironment</class> but is needed for the prediction problem.
+ *
+ * @param <A> Enum class of last action taken
+ */
@AllArgsConstructor
@Getter
 public class StepResult<A extends Enum> {
--- a/src/main/java/core/StepResultEnvironment.java
+++ b/src/main/java/core/StepResultEnvironment.java
@ -4,6 +4,13 @@ import lombok.AllArgsConstructor;
 import lombok.Getter;
 import lombok.Setter;

+/**
+ * After each timestamp the environment returns a reward
+ * for the previous action (still the same timestamp t), the resulting
+ * observation/state (environment is in charge to process the observation
+ * and build a markov state) and the information whether or not the episode
+ * has ended.
+ */
@Getter
@Setter
@AllArgsConstructor
--- a/src/main/java/core/Util.java
+++ b/src/main/java/core/Util.java
@ -1,6 +1,15 @@
 package core;

+
 public class Util {
+    /**
+     * Checks whether or not the given string input is a number or not.
+     * Used for example to check the input field to trigger additional
+     * episodes.
+     *
+     * @param strNum string input from user
+     * @return true if numeric and parsable, false if not
+     */
    public static boolean isNumeric(String strNum) {
        if (strNum == null) {
            return false;
--- a/src/main/java/core/algo/Learning.java
+++ b/src/main/java/core/algo/Learning.java
@ -14,6 +14,10 @@ import java.util.List;
 import java.util.Set;
 import java.util.concurrent.CopyOnWriteArrayList;

+/**
+ *
+ * @param <A> discrete action type for a specific environment
+ */
@Getter
 public abstract class Learning<A extends Enum>{
    protected Policy<A> policy;
--- a/src/main/java/core/algo/Method.java
+++ b/src/main/java/core/algo/Method.java
@ -1,5 +1,9 @@
 package core.algo;

+/**
+ * Instead of reflections this enum is used to determine
+ * which RL-algorithm should be used.
+ */
 public enum Method {
    MC_ONPOLICY_EGREEDY, TD_ONPOLICY
 }
--- a/src/main/java/core/gui/LearningView.java
+++ b/src/main/java/core/gui/LearningView.java
@ -2,6 +2,9 @@ package core.gui;

 import java.util.List;

+/**
+ * Switched out Views have to implement this interface.
+ */
 public interface LearningView {
    void repaintEnvironment();
    void updateLearningInfoPanel();
--- a/src/main/java/core/gui/View.java
+++ b/src/main/java/core/gui/View.java
@ -61,7 +61,7 @@ public class View<A extends Enum> implements LearningView{
        fileMenu.add(new JMenuItem(new AbstractAction("Save") {
            @Override
            public void actionPerformed(ActionEvent e) {
-                String fileName = JOptionPane.showInputDialog("Enter file name", "save");
+                String fileName = JOptionPane.showInputDialog("Enter file name", "path/to/file");
                if(fileName != null){
                    viewListener.onSaveState(fileName);
                }
--- a/src/main/java/core/gui/Visualizable.java
+++ b/src/main/java/core/gui/Visualizable.java
@ -2,6 +2,13 @@ package core.gui;

 import javax.swing.*;

+/**
+ * Classes that implement this interface are able to create a visual component
+ * that can be utilized and displayed by the view. It is optional for an environment
+ * to defined a GUI (View is checking it via "instance of").
+ * Furthermore a state implement this interface, so it can be displayed from the
+ * state-action-table.
+ */
 public interface Visualizable {
    JComponent visualize();
 }
--- a/src/main/java/core/listener/LearningListener.java
+++ b/src/main/java/core/listener/LearningListener.java
@ -2,6 +2,9 @@ package core.listener;

 import java.util.List;

+/**
+ * Methods that gets triggered to inform about the current learning process.
+ */
 public interface LearningListener{
    void onLearningStart();
    void onLearningEnd();
--- a/src/main/java/core/listener/ViewListener.java
+++ b/src/main/java/core/listener/ViewListener.java
@ -1,5 +1,10 @@
 package core.listener;

+/**
+ * Interface the controller is implementing and gets passed to
+ * the View. (Preventing the controller from adding all
+ * ActionListeners to view elements)
+ */
 public interface ViewListener {
    void onEpsilonChange(float epsilon);
    void onDelayChange(int delay);
--- a/src/main/java/core/policy/EpsilonPolicy.java
+++ b/src/main/java/core/policy/EpsilonPolicy.java
@ -1,5 +1,11 @@
 package core.policy;

+/**
+ * Chooses the action with the highest values with possibility: 1-Ɛ + Ɛ/|A|
+ * With possibility of Ɛ, a random action is taken (highest values option included).
+ *
+ * @param <A> Enum class of available action in specific environment
+ */
 public interface EpsilonPolicy<A extends Enum> extends Policy<A> {
    float getEpsilon();
    void setEpsilon(float epsilon);
--- a/src/main/java/core/policy/GreedyPolicy.java
+++ b/src/main/java/core/policy/GreedyPolicy.java
@ -6,6 +6,12 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;

+/**
+ * Always chooses the action with the highest value
+ * with ties broken arbitrarily.
+ *
+ * @param <A> Enum class of available action in specific environment
+ */
 public class GreedyPolicy<A extends Enum> implements Policy<A> {

    @Override
--- a/src/main/java/core/policy/Policy.java
+++ b/src/main/java/core/policy/Policy.java
@ -2,6 +2,12 @@ package core.policy;

 import java.util.Map;

+/**
+ * Strategy to choose a specific action available in the agent's current
+ * state.
+ *
+ * @param <A> Enum class of available action in specific environment
+ */
 public interface Policy<A extends Enum> {
    A chooseAction(Map<A, Double> actionValues);
 }
--- a/src/main/java/core/policy/RandomPolicy.java
+++ b/src/main/java/core/policy/RandomPolicy.java
@ -4,6 +4,11 @@ import core.RNG;

 import java.util.Map;

+/**
+ * Chooses an action arbitrarily.
+ *
+ * @param <A> Enum class of available action in specific environment
+ */
 public class RandomPolicy<A extends Enum> implements Policy<A>{
    @Override
    public A chooseAction(Map<A, Double> actionValues) {
--- a/src/main/java/evironment/antGame/AntObservation.java
+++ b/src/main/java/evironment/antGame/AntObservation.java
@ -1,6 +1,5 @@
 package evironment.antGame;

-import core.Observation;
 import lombok.AccessLevel;
 import lombok.AllArgsConstructor;
 import lombok.Getter;
@ -12,7 +11,7 @@ import java.awt.*;
@AllArgsConstructor
@Getter
@Setter
-public class AntObservation implements Observation {
+public class AntObservation {
      private Cell cell;
      private Point pos;