diff --git a/Policy_Iteration.py b/Policy_Iteration.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Value_Interation.py b/Value_Interation.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Value_Iteration.py b/Value_Iteration.py new file mode 100644 index 0000000000000000000000000000000000000000..aea7540ac948a86396a3f58abd9aa2ef6c26e731 --- /dev/null +++ b/Value_Iteration.py @@ -0,0 +1,16 @@ + +import numpy as np +import pandas as pd + + +class rlalgorithm: + def __init__(self, actions, ...): + self.actions = actions + + def choose_action(self, observation): + # implement this. + return action + + def learn(...): + #implement this. Learn something from the transition +