add javadoc
This commit is contained in:
parent
b2c3854b3a
commit
64355e0b93
|
@ -1,4 +0,0 @@
|
||||||
package core;
|
|
||||||
|
|
||||||
public interface Action {
|
|
||||||
}
|
|
|
@ -17,13 +17,17 @@ public class DeterministicStateActionTable<A extends Enum> implements StateActio
|
||||||
this.discreteActionSpace = discreteActionSpace;
|
this.discreteActionSpace = discreteActionSpace;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/**
|
||||||
If the state is not present in the table at the time of
|
* If the state is not present in the table at the time of
|
||||||
calling this method the DEFAULT_VALUE gets returned BUT
|
* calling this method the DEFAULT_VALUE gets returned BUT
|
||||||
no the missing state is not inserted into the table!
|
* no the missing state is not inserted into the table!
|
||||||
|
*
|
||||||
Inserting of missing states is ONLY done in "setValue()"
|
* Inserting of missing states is ONLY done in "setValue()"
|
||||||
method.
|
* method.
|
||||||
|
*
|
||||||
|
* @param state given state
|
||||||
|
* @param action given action
|
||||||
|
* @return estimate value of state-action pair
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public double getValue(State state, A action) {
|
public double getValue(State state, A action) {
|
||||||
|
@ -34,11 +38,15 @@ public class DeterministicStateActionTable<A extends Enum> implements StateActio
|
||||||
return DEFAULT_VALUE;
|
return DEFAULT_VALUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/**
|
||||||
Update the value of an action for a specific state.
|
* Update the value of an action for a specific state.
|
||||||
If the state is not present in the table yet,
|
* If the state is not present in the table yet,
|
||||||
it will get stored in combination with every action
|
* it will get stored in combination with every action
|
||||||
from the action space initialized with the default value.
|
* from the action space initialized with the default value.
|
||||||
|
*
|
||||||
|
* @param state given state
|
||||||
|
* @param action given action
|
||||||
|
* @param value new estimate of the state-action pair
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void setValue(State state, A action, double value) {
|
public void setValue(State state, A action, double value) {
|
||||||
|
@ -52,6 +60,10 @@ public class DeterministicStateActionTable<A extends Enum> implements StateActio
|
||||||
actionValues.put(action, value);
|
actionValues.put(action, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param state given state
|
||||||
|
* @return all available action in given state and their corresponding estimated values
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public Map<A, Double> getActionValues(State state) {
|
public Map<A, Double> getActionValues(State state) {
|
||||||
if(table.get(state) == null){
|
if(table.get(state) == null){
|
||||||
|
@ -60,6 +72,9 @@ public class DeterministicStateActionTable<A extends Enum> implements StateActio
|
||||||
return table.get(state);
|
return table.get(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return Map with initial values for every available action
|
||||||
|
*/
|
||||||
private Map<A, Double> createDefaultActionValues(){
|
private Map<A, Double> createDefaultActionValues(){
|
||||||
final Map<A, Double> defaultActionValues = new LinkedHashMap<>();
|
final Map<A, Double> defaultActionValues = new LinkedHashMap<>();
|
||||||
for(A action: discreteActionSpace){
|
for(A action: discreteActionSpace){
|
||||||
|
|
|
@ -1,5 +1,14 @@
|
||||||
package core;
|
package core;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Collection of all available actions for a specific
|
||||||
|
* environment. It defines the actions the agent is able
|
||||||
|
* to choose from at every timestamp T.
|
||||||
|
* Extending from <interface>Iterable</interface> for easy
|
||||||
|
* "unmutable" iteration of the action space.
|
||||||
|
*
|
||||||
|
* @param <A> Actions as defined in an <Enum> based class
|
||||||
|
*/
|
||||||
public interface DiscreteActionSpace<A extends Enum> extends Iterable<A> {
|
public interface DiscreteActionSpace<A extends Enum> extends Iterable<A> {
|
||||||
int getNumberOfActions();
|
int getNumberOfActions();
|
||||||
void addAction(A a);
|
void addAction(A a);
|
||||||
|
|
|
@ -1,5 +1,17 @@
|
||||||
package core;
|
package core;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Interface of the environment as defined in the RL framework.
|
||||||
|
* An agent is able to interact with its environment, submitting
|
||||||
|
* an action (.step()) every timestamp T and receiving a reward and a new
|
||||||
|
* observation.
|
||||||
|
* <class>StepResultEnvironment</class> is the datatype to combine all the
|
||||||
|
* receiving information.
|
||||||
|
* After each episode the environment is reset to its original state,
|
||||||
|
* returning the starting state.
|
||||||
|
*
|
||||||
|
* @param <A> related <Enum> which defines the available action for this environment
|
||||||
|
*/
|
||||||
public interface Environment<A extends Enum> {
|
public interface Environment<A extends Enum> {
|
||||||
StepResultEnvironment step(A action);
|
StepResultEnvironment step(A action);
|
||||||
State reset();
|
State reset();
|
||||||
|
|
|
@ -3,6 +3,13 @@ package core;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implementation of a discrete action space.
|
||||||
|
* "Discrete" because actions are exclusively defined as Enum,
|
||||||
|
* realized as generic "A".
|
||||||
|
*
|
||||||
|
* @param <A> Enum class of actions in the specific environment
|
||||||
|
*/
|
||||||
public class ListDiscreteActionSpace<A extends Enum> implements DiscreteActionSpace<A>, Serializable{
|
public class ListDiscreteActionSpace<A extends Enum> implements DiscreteActionSpace<A>, Serializable{
|
||||||
private static final long serialVersionUID = 1L;
|
private static final long serialVersionUID = 1L;
|
||||||
private List<A> actions;
|
private List<A> actions;
|
||||||
|
|
|
@ -1,4 +0,0 @@
|
||||||
package core;
|
|
||||||
|
|
||||||
public interface Observation {
|
|
||||||
}
|
|
|
@ -3,6 +3,15 @@ package core;
|
||||||
import java.security.SecureRandom;
|
import java.security.SecureRandom;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To ensure deterministic behaviour of repeating program executions,
|
||||||
|
* this class is used for all random number generation methods.
|
||||||
|
* Do not use Math.random()!
|
||||||
|
* It is not necessary to set a seed explicit, because a default one
|
||||||
|
* "123" is defined. Nonetheless a set-method is exposed which should
|
||||||
|
* ONLY be called in the very beginning of the program. (Do not reseed while
|
||||||
|
* execution)
|
||||||
|
*/
|
||||||
public class RNG {
|
public class RNG {
|
||||||
private static SecureRandom rng;
|
private static SecureRandom rng;
|
||||||
private static int seed = 123;
|
private static int seed = 123;
|
||||||
|
|
|
@ -5,6 +5,15 @@ import lombok.Getter;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Small datatype to combine needed information to save and recover
|
||||||
|
* the learning progress. Essentially, only the Q-Table needs to be saved
|
||||||
|
* for all tabular methods because they all try to estimate
|
||||||
|
* the action values until convergence.
|
||||||
|
* For episodic method the number of episodes so far is also saved.
|
||||||
|
*
|
||||||
|
* @param <A> enum class of action for a specific environment
|
||||||
|
*/
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
@Getter
|
@Getter
|
||||||
public class SaveState<A extends Enum> implements Serializable {
|
public class SaveState<A extends Enum> implements Serializable {
|
||||||
|
|
|
@ -1,5 +1,8 @@
|
||||||
package core;
|
package core;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
public interface State {
|
public interface State {
|
||||||
String toString();
|
String toString();
|
||||||
int hashCode();
|
int hashCode();
|
||||||
|
|
|
@ -2,6 +2,12 @@ package core;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Q-Table which saves all seen states, all available actions for each state
|
||||||
|
* and their value (state-action values/ action values).
|
||||||
|
*
|
||||||
|
* @param <A>
|
||||||
|
*/
|
||||||
public interface StateActionTable<A extends Enum> {
|
public interface StateActionTable<A extends Enum> {
|
||||||
double DEFAULT_VALUE = 0.0;
|
double DEFAULT_VALUE = 0.0;
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,14 @@ package core;
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Almost the same as the datatype <class>StepResultEnvironment</class>
|
||||||
|
* but includes the last action taken as well.
|
||||||
|
* The environment does not return the last action taken in its result
|
||||||
|
* <class>StepResultEnvironment</class> but is needed for the prediction problem.
|
||||||
|
*
|
||||||
|
* @param <A> Enum class of last action taken
|
||||||
|
*/
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
@Getter
|
@Getter
|
||||||
public class StepResult<A extends Enum> {
|
public class StepResult<A extends Enum> {
|
||||||
|
|
|
@ -4,6 +4,13 @@ import lombok.AllArgsConstructor;
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
import lombok.Setter;
|
import lombok.Setter;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* After each timestamp the environment returns a reward
|
||||||
|
* for the previous action (still the same timestamp t), the resulting
|
||||||
|
* observation/state (environment is in charge to process the observation
|
||||||
|
* and build a markov state) and the information whether or not the episode
|
||||||
|
* has ended.
|
||||||
|
*/
|
||||||
@Getter
|
@Getter
|
||||||
@Setter
|
@Setter
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
|
|
|
@ -1,6 +1,15 @@
|
||||||
package core;
|
package core;
|
||||||
|
|
||||||
|
|
||||||
public class Util {
|
public class Util {
|
||||||
|
/**
|
||||||
|
* Checks whether or not the given string input is a number or not.
|
||||||
|
* Used for example to check the input field to trigger additional
|
||||||
|
* episodes.
|
||||||
|
*
|
||||||
|
* @param strNum string input from user
|
||||||
|
* @return true if numeric and parsable, false if not
|
||||||
|
*/
|
||||||
public static boolean isNumeric(String strNum) {
|
public static boolean isNumeric(String strNum) {
|
||||||
if (strNum == null) {
|
if (strNum == null) {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -14,6 +14,10 @@ import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.CopyOnWriteArrayList;
|
import java.util.concurrent.CopyOnWriteArrayList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param <A> discrete action type for a specific environment
|
||||||
|
*/
|
||||||
@Getter
|
@Getter
|
||||||
public abstract class Learning<A extends Enum>{
|
public abstract class Learning<A extends Enum>{
|
||||||
protected Policy<A> policy;
|
protected Policy<A> policy;
|
||||||
|
|
|
@ -1,5 +1,9 @@
|
||||||
package core.algo;
|
package core.algo;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instead of reflections this enum is used to determine
|
||||||
|
* which RL-algorithm should be used.
|
||||||
|
*/
|
||||||
public enum Method {
|
public enum Method {
|
||||||
MC_ONPOLICY_EGREEDY, TD_ONPOLICY
|
MC_ONPOLICY_EGREEDY, TD_ONPOLICY
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,9 @@ package core.gui;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Switched out Views have to implement this interface.
|
||||||
|
*/
|
||||||
public interface LearningView {
|
public interface LearningView {
|
||||||
void repaintEnvironment();
|
void repaintEnvironment();
|
||||||
void updateLearningInfoPanel();
|
void updateLearningInfoPanel();
|
||||||
|
|
|
@ -61,7 +61,7 @@ public class View<A extends Enum> implements LearningView{
|
||||||
fileMenu.add(new JMenuItem(new AbstractAction("Save") {
|
fileMenu.add(new JMenuItem(new AbstractAction("Save") {
|
||||||
@Override
|
@Override
|
||||||
public void actionPerformed(ActionEvent e) {
|
public void actionPerformed(ActionEvent e) {
|
||||||
String fileName = JOptionPane.showInputDialog("Enter file name", "save");
|
String fileName = JOptionPane.showInputDialog("Enter file name", "path/to/file");
|
||||||
if(fileName != null){
|
if(fileName != null){
|
||||||
viewListener.onSaveState(fileName);
|
viewListener.onSaveState(fileName);
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,13 @@ package core.gui;
|
||||||
|
|
||||||
import javax.swing.*;
|
import javax.swing.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Classes that implement this interface are able to create a visual component
|
||||||
|
* that can be utilized and displayed by the view. It is optional for an environment
|
||||||
|
* to defined a GUI (View is checking it via "instance of").
|
||||||
|
* Furthermore a state implement this interface, so it can be displayed from the
|
||||||
|
* state-action-table.
|
||||||
|
*/
|
||||||
public interface Visualizable {
|
public interface Visualizable {
|
||||||
JComponent visualize();
|
JComponent visualize();
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,9 @@ package core.listener;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Methods that gets triggered to inform about the current learning process.
|
||||||
|
*/
|
||||||
public interface LearningListener{
|
public interface LearningListener{
|
||||||
void onLearningStart();
|
void onLearningStart();
|
||||||
void onLearningEnd();
|
void onLearningEnd();
|
||||||
|
|
|
@ -1,5 +1,10 @@
|
||||||
package core.listener;
|
package core.listener;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Interface the controller is implementing and gets passed to
|
||||||
|
* the View. (Preventing the controller from adding all
|
||||||
|
* ActionListeners to view elements)
|
||||||
|
*/
|
||||||
public interface ViewListener {
|
public interface ViewListener {
|
||||||
void onEpsilonChange(float epsilon);
|
void onEpsilonChange(float epsilon);
|
||||||
void onDelayChange(int delay);
|
void onDelayChange(int delay);
|
||||||
|
|
|
@ -1,5 +1,11 @@
|
||||||
package core.policy;
|
package core.policy;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Chooses the action with the highest values with possibility: 1-Ɛ + Ɛ/|A|
|
||||||
|
* With possibility of Ɛ, a random action is taken (highest values option included).
|
||||||
|
*
|
||||||
|
* @param <A> Enum class of available action in specific environment
|
||||||
|
*/
|
||||||
public interface EpsilonPolicy<A extends Enum> extends Policy<A> {
|
public interface EpsilonPolicy<A extends Enum> extends Policy<A> {
|
||||||
float getEpsilon();
|
float getEpsilon();
|
||||||
void setEpsilon(float epsilon);
|
void setEpsilon(float epsilon);
|
||||||
|
|
|
@ -6,6 +6,12 @@ import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Always chooses the action with the highest value
|
||||||
|
* with ties broken arbitrarily.
|
||||||
|
*
|
||||||
|
* @param <A> Enum class of available action in specific environment
|
||||||
|
*/
|
||||||
public class GreedyPolicy<A extends Enum> implements Policy<A> {
|
public class GreedyPolicy<A extends Enum> implements Policy<A> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -2,6 +2,12 @@ package core.policy;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Strategy to choose a specific action available in the agent's current
|
||||||
|
* state.
|
||||||
|
*
|
||||||
|
* @param <A> Enum class of available action in specific environment
|
||||||
|
*/
|
||||||
public interface Policy<A extends Enum> {
|
public interface Policy<A extends Enum> {
|
||||||
A chooseAction(Map<A, Double> actionValues);
|
A chooseAction(Map<A, Double> actionValues);
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,11 @@ import core.RNG;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Chooses an action arbitrarily.
|
||||||
|
*
|
||||||
|
* @param <A> Enum class of available action in specific environment
|
||||||
|
*/
|
||||||
public class RandomPolicy<A extends Enum> implements Policy<A>{
|
public class RandomPolicy<A extends Enum> implements Policy<A>{
|
||||||
@Override
|
@Override
|
||||||
public A chooseAction(Map<A, Double> actionValues) {
|
public A chooseAction(Map<A, Double> actionValues) {
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
package evironment.antGame;
|
package evironment.antGame;
|
||||||
|
|
||||||
import core.Observation;
|
|
||||||
import lombok.AccessLevel;
|
import lombok.AccessLevel;
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
|
@ -12,7 +11,7 @@ import java.awt.*;
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
@Getter
|
@Getter
|
||||||
@Setter
|
@Setter
|
||||||
public class AntObservation implements Observation {
|
public class AntObservation {
|
||||||
private Cell cell;
|
private Cell cell;
|
||||||
private Point pos;
|
private Point pos;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue