add javadoc
This commit is contained in:
parent
b2c3854b3a
commit
64355e0b93
|
@ -1,4 +0,0 @@
|
|||
package core;
|
||||
|
||||
public interface Action {
|
||||
}
|
|
@ -17,13 +17,17 @@ public class DeterministicStateActionTable<A extends Enum> implements StateActio
|
|||
this.discreteActionSpace = discreteActionSpace;
|
||||
}
|
||||
|
||||
/*
|
||||
If the state is not present in the table at the time of
|
||||
calling this method the DEFAULT_VALUE gets returned BUT
|
||||
no the missing state is not inserted into the table!
|
||||
|
||||
Inserting of missing states is ONLY done in "setValue()"
|
||||
method.
|
||||
/**
|
||||
* If the state is not present in the table at the time of
|
||||
* calling this method the DEFAULT_VALUE gets returned BUT
|
||||
* no the missing state is not inserted into the table!
|
||||
*
|
||||
* Inserting of missing states is ONLY done in "setValue()"
|
||||
* method.
|
||||
*
|
||||
* @param state given state
|
||||
* @param action given action
|
||||
* @return estimate value of state-action pair
|
||||
*/
|
||||
@Override
|
||||
public double getValue(State state, A action) {
|
||||
|
@ -34,11 +38,15 @@ public class DeterministicStateActionTable<A extends Enum> implements StateActio
|
|||
return DEFAULT_VALUE;
|
||||
}
|
||||
|
||||
/*
|
||||
Update the value of an action for a specific state.
|
||||
If the state is not present in the table yet,
|
||||
it will get stored in combination with every action
|
||||
from the action space initialized with the default value.
|
||||
/**
|
||||
* Update the value of an action for a specific state.
|
||||
* If the state is not present in the table yet,
|
||||
* it will get stored in combination with every action
|
||||
* from the action space initialized with the default value.
|
||||
*
|
||||
* @param state given state
|
||||
* @param action given action
|
||||
* @param value new estimate of the state-action pair
|
||||
*/
|
||||
@Override
|
||||
public void setValue(State state, A action, double value) {
|
||||
|
@ -52,6 +60,10 @@ public class DeterministicStateActionTable<A extends Enum> implements StateActio
|
|||
actionValues.put(action, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param state given state
|
||||
* @return all available action in given state and their corresponding estimated values
|
||||
*/
|
||||
@Override
|
||||
public Map<A, Double> getActionValues(State state) {
|
||||
if(table.get(state) == null){
|
||||
|
@ -60,6 +72,9 @@ public class DeterministicStateActionTable<A extends Enum> implements StateActio
|
|||
return table.get(state);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Map with initial values for every available action
|
||||
*/
|
||||
private Map<A, Double> createDefaultActionValues(){
|
||||
final Map<A, Double> defaultActionValues = new LinkedHashMap<>();
|
||||
for(A action: discreteActionSpace){
|
||||
|
|
|
@ -1,5 +1,14 @@
|
|||
package core;
|
||||
|
||||
/**
|
||||
* Collection of all available actions for a specific
|
||||
* environment. It defines the actions the agent is able
|
||||
* to choose from at every timestamp T.
|
||||
* Extending from <interface>Iterable</interface> for easy
|
||||
* "unmutable" iteration of the action space.
|
||||
*
|
||||
* @param <A> Actions as defined in an <Enum> based class
|
||||
*/
|
||||
public interface DiscreteActionSpace<A extends Enum> extends Iterable<A> {
|
||||
int getNumberOfActions();
|
||||
void addAction(A a);
|
||||
|
|
|
@ -1,5 +1,17 @@
|
|||
package core;
|
||||
|
||||
/**
|
||||
* Interface of the environment as defined in the RL framework.
|
||||
* An agent is able to interact with its environment, submitting
|
||||
* an action (.step()) every timestamp T and receiving a reward and a new
|
||||
* observation.
|
||||
* <class>StepResultEnvironment</class> is the datatype to combine all the
|
||||
* receiving information.
|
||||
* After each episode the environment is reset to its original state,
|
||||
* returning the starting state.
|
||||
*
|
||||
* @param <A> related <Enum> which defines the available action for this environment
|
||||
*/
|
||||
public interface Environment<A extends Enum> {
|
||||
StepResultEnvironment step(A action);
|
||||
State reset();
|
||||
|
|
|
@ -3,6 +3,13 @@ package core;
|
|||
import java.io.Serializable;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Implementation of a discrete action space.
|
||||
* "Discrete" because actions are exclusively defined as Enum,
|
||||
* realized as generic "A".
|
||||
*
|
||||
* @param <A> Enum class of actions in the specific environment
|
||||
*/
|
||||
public class ListDiscreteActionSpace<A extends Enum> implements DiscreteActionSpace<A>, Serializable{
|
||||
private static final long serialVersionUID = 1L;
|
||||
private List<A> actions;
|
||||
|
|
|
@ -1,4 +0,0 @@
|
|||
package core;
|
||||
|
||||
public interface Observation {
|
||||
}
|
|
@ -3,6 +3,15 @@ package core;
|
|||
import java.security.SecureRandom;
|
||||
import java.util.Random;
|
||||
|
||||
/**
|
||||
* To ensure deterministic behaviour of repeating program executions,
|
||||
* this class is used for all random number generation methods.
|
||||
* Do not use Math.random()!
|
||||
* It is not necessary to set a seed explicit, because a default one
|
||||
* "123" is defined. Nonetheless a set-method is exposed which should
|
||||
* ONLY be called in the very beginning of the program. (Do not reseed while
|
||||
* execution)
|
||||
*/
|
||||
public class RNG {
|
||||
private static SecureRandom rng;
|
||||
private static int seed = 123;
|
||||
|
|
|
@ -5,6 +5,15 @@ import lombok.Getter;
|
|||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Small datatype to combine needed information to save and recover
|
||||
* the learning progress. Essentially, only the Q-Table needs to be saved
|
||||
* for all tabular methods because they all try to estimate
|
||||
* the action values until convergence.
|
||||
* For episodic method the number of episodes so far is also saved.
|
||||
*
|
||||
* @param <A> enum class of action for a specific environment
|
||||
*/
|
||||
@AllArgsConstructor
|
||||
@Getter
|
||||
public class SaveState<A extends Enum> implements Serializable {
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
package core;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public interface State {
|
||||
String toString();
|
||||
int hashCode();
|
||||
|
|
|
@ -2,6 +2,12 @@ package core;
|
|||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Q-Table which saves all seen states, all available actions for each state
|
||||
* and their value (state-action values/ action values).
|
||||
*
|
||||
* @param <A>
|
||||
*/
|
||||
public interface StateActionTable<A extends Enum> {
|
||||
double DEFAULT_VALUE = 0.0;
|
||||
|
||||
|
|
|
@ -3,6 +3,14 @@ package core;
|
|||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
|
||||
/**
|
||||
* Almost the same as the datatype <class>StepResultEnvironment</class>
|
||||
* but includes the last action taken as well.
|
||||
* The environment does not return the last action taken in its result
|
||||
* <class>StepResultEnvironment</class> but is needed for the prediction problem.
|
||||
*
|
||||
* @param <A> Enum class of last action taken
|
||||
*/
|
||||
@AllArgsConstructor
|
||||
@Getter
|
||||
public class StepResult<A extends Enum> {
|
||||
|
|
|
@ -4,6 +4,13 @@ import lombok.AllArgsConstructor;
|
|||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
/**
|
||||
* After each timestamp the environment returns a reward
|
||||
* for the previous action (still the same timestamp t), the resulting
|
||||
* observation/state (environment is in charge to process the observation
|
||||
* and build a markov state) and the information whether or not the episode
|
||||
* has ended.
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
@AllArgsConstructor
|
||||
|
|
|
@ -1,6 +1,15 @@
|
|||
package core;
|
||||
|
||||
|
||||
public class Util {
|
||||
/**
|
||||
* Checks whether or not the given string input is a number or not.
|
||||
* Used for example to check the input field to trigger additional
|
||||
* episodes.
|
||||
*
|
||||
* @param strNum string input from user
|
||||
* @return true if numeric and parsable, false if not
|
||||
*/
|
||||
public static boolean isNumeric(String strNum) {
|
||||
if (strNum == null) {
|
||||
return false;
|
||||
|
|
|
@ -14,6 +14,10 @@ import java.util.List;
|
|||
import java.util.Set;
|
||||
import java.util.concurrent.CopyOnWriteArrayList;
|
||||
|
||||
/**
|
||||
*
|
||||
* @param <A> discrete action type for a specific environment
|
||||
*/
|
||||
@Getter
|
||||
public abstract class Learning<A extends Enum>{
|
||||
protected Policy<A> policy;
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
package core.algo;
|
||||
|
||||
/**
|
||||
* Instead of reflections this enum is used to determine
|
||||
* which RL-algorithm should be used.
|
||||
*/
|
||||
public enum Method {
|
||||
MC_ONPOLICY_EGREEDY, TD_ONPOLICY
|
||||
}
|
||||
|
|
|
@ -2,6 +2,9 @@ package core.gui;
|
|||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Switched out Views have to implement this interface.
|
||||
*/
|
||||
public interface LearningView {
|
||||
void repaintEnvironment();
|
||||
void updateLearningInfoPanel();
|
||||
|
|
|
@ -61,7 +61,7 @@ public class View<A extends Enum> implements LearningView{
|
|||
fileMenu.add(new JMenuItem(new AbstractAction("Save") {
|
||||
@Override
|
||||
public void actionPerformed(ActionEvent e) {
|
||||
String fileName = JOptionPane.showInputDialog("Enter file name", "save");
|
||||
String fileName = JOptionPane.showInputDialog("Enter file name", "path/to/file");
|
||||
if(fileName != null){
|
||||
viewListener.onSaveState(fileName);
|
||||
}
|
||||
|
|
|
@ -2,6 +2,13 @@ package core.gui;
|
|||
|
||||
import javax.swing.*;
|
||||
|
||||
/**
|
||||
* Classes that implement this interface are able to create a visual component
|
||||
* that can be utilized and displayed by the view. It is optional for an environment
|
||||
* to defined a GUI (View is checking it via "instance of").
|
||||
* Furthermore a state implement this interface, so it can be displayed from the
|
||||
* state-action-table.
|
||||
*/
|
||||
public interface Visualizable {
|
||||
JComponent visualize();
|
||||
}
|
||||
|
|
|
@ -2,6 +2,9 @@ package core.listener;
|
|||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Methods that gets triggered to inform about the current learning process.
|
||||
*/
|
||||
public interface LearningListener{
|
||||
void onLearningStart();
|
||||
void onLearningEnd();
|
||||
|
|
|
@ -1,5 +1,10 @@
|
|||
package core.listener;
|
||||
|
||||
/**
|
||||
* Interface the controller is implementing and gets passed to
|
||||
* the View. (Preventing the controller from adding all
|
||||
* ActionListeners to view elements)
|
||||
*/
|
||||
public interface ViewListener {
|
||||
void onEpsilonChange(float epsilon);
|
||||
void onDelayChange(int delay);
|
||||
|
|
|
@ -1,5 +1,11 @@
|
|||
package core.policy;
|
||||
|
||||
/**
|
||||
* Chooses the action with the highest values with possibility: 1-Ɛ + Ɛ/|A|
|
||||
* With possibility of Ɛ, a random action is taken (highest values option included).
|
||||
*
|
||||
* @param <A> Enum class of available action in specific environment
|
||||
*/
|
||||
public interface EpsilonPolicy<A extends Enum> extends Policy<A> {
|
||||
float getEpsilon();
|
||||
void setEpsilon(float epsilon);
|
||||
|
|
|
@ -6,6 +6,12 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Always chooses the action with the highest value
|
||||
* with ties broken arbitrarily.
|
||||
*
|
||||
* @param <A> Enum class of available action in specific environment
|
||||
*/
|
||||
public class GreedyPolicy<A extends Enum> implements Policy<A> {
|
||||
|
||||
@Override
|
||||
|
|
|
@ -2,6 +2,12 @@ package core.policy;
|
|||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Strategy to choose a specific action available in the agent's current
|
||||
* state.
|
||||
*
|
||||
* @param <A> Enum class of available action in specific environment
|
||||
*/
|
||||
public interface Policy<A extends Enum> {
|
||||
A chooseAction(Map<A, Double> actionValues);
|
||||
}
|
||||
|
|
|
@ -4,6 +4,11 @@ import core.RNG;
|
|||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Chooses an action arbitrarily.
|
||||
*
|
||||
* @param <A> Enum class of available action in specific environment
|
||||
*/
|
||||
public class RandomPolicy<A extends Enum> implements Policy<A>{
|
||||
@Override
|
||||
public A chooseAction(Map<A, Double> actionValues) {
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
package evironment.antGame;
|
||||
|
||||
import core.Observation;
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
|
@ -12,7 +11,7 @@ import java.awt.*;
|
|||
@AllArgsConstructor
|
||||
@Getter
|
||||
@Setter
|
||||
public class AntObservation implements Observation {
|
||||
public class AntObservation {
|
||||
private Cell cell;
|
||||
private Point pos;
|
||||
|
||||
|
|
Loading…
Reference in New Issue