package core.policy;
import core.RNG;
import lombok.Getter;
import lombok.Setter;
import java.util.Map;
/**
* To prevent the agent from getting stuck only using the "best" action
* according to the current learning history, this policy
* will take random action with the probability of epsilon.
* (random action space includes the best action as well)
*
* @param Discrete Action Enum
*/
public class EpsilonGreedyPolicy implements EpsilonPolicy{
@Setter
@Getter
private float epsilon;
private RandomPolicy randomPolicy;
private GreedyPolicy greedyPolicy;
public EpsilonGreedyPolicy(float epsilon){
this.epsilon = epsilon;
randomPolicy = new RandomPolicy<>();
greedyPolicy = new GreedyPolicy<>();
}
@Override
public A chooseAction(Map actionValues) {
System.out.println("current epsilon " + epsilon);
if(RNG.getRandom().nextFloat() < epsilon){
// Take random action
return randomPolicy.chooseAction(actionValues);
}else{
// Take the action with the highest value
return greedyPolicy.chooseAction(actionValues);
}
}
}