# !pip install gymnasium numpy matplotlib

import gymnasium as gym
import numpy as np
import matplotlib.pyplot as plt

rng = np.random.default_rng(42)

env_lake = gym.make('FrozenLake-v1', is_slippery=True)
env_taxi = gym.make('Taxi-v3')
print('FrozenLake states/actions:', env_lake.observation_space.n, env_lake.action_space.n)
print('Taxi states/actions:', env_taxi.observation_space.n, env_taxi.action_space.n)

# YOUR TURN
# Function q_learn(env, episodes, alpha, gamma, eps_start, eps_end, eps_decay)
# returning Q-table and per-episode reward.

# YOUR TURN
# Train for 20_000 episodes. Plot 100-episode rolling mean of reward.
# Print the final greedy-policy success rate over 1000 evaluation episodes.

# YOUR TURN
# Train for 30_000 episodes. Tune alpha and eps_decay. Report final
# average return over 1000 evaluation episodes.

Lab 2 — Tabular Q-learning on FrozenLake and Taxi¶

Setup¶

Environments¶

Exercise 1 — Implement tabular Q-learning¶

Exercise 2 — Train on FrozenLake and plot the learning curve¶

Exercise 3 — Train on Taxi and tune¶

Done?¶