git clone https://github.com/RL4AA/rl-tutorial-ares-basic.git

python -m jupyter contrib nbextension install --user
python -m jupyter nbextension enable varInspector/main

python -m venv rl-icfa

# Importing the required packages
from time import sleep

import matplotlib.pyplot as plt
import names
import numpy as np
from gymnasium.wrappers import RescaleAction
from IPython.display import clear_output, display
from stable_baselines3 import PPO

from utils.helpers import (
    evaluate_ares_ea_agent,
    plot_ares_ea_training_history,
    show_video,
)
from utils.train import ARESEACheetah, make_env, read_from_yaml
from utils.train import train as train_ares_ea
from utils.utils import NotVecNormalize

# Create the environment
env = ARESEACheetah()
env.target_beam_mode = "constant"

target_beam = np.array([1e-3, 2e-4, 1e-3, 2e-4])  # Change it

env.target_beam_values = target_beam
env.reset()  ##
plt.figure(figsize=(7, 4))
plt.imshow(env.render())  # Plot the screen image

<matplotlib.image.AxesImage at 0x2a35fcfa0>

action = np.array([1, 0.5, 0.5, 1, 0.6])  # put your action here

env = RescaleAction(env, -1, 1)  # rescales the action to the interval [-1, 1]
env.reset()
env.step(action)
plt.figure(figsize=(7, 4))
plt.imshow(env.render())

<matplotlib.image.AxesImage at 0x2a36714e0>

env.reset()
steps = 10


def change_vertical_corrector(q1, q2, cv, q3, ch, steps, i):
    action = np.array([q1, q2, cv + 1 / steps * i, q3, ch])
    return action


fig, ax = plt.subplots(1, figsize=(7, 4))
for i in range(steps):
    action = change_vertical_corrector(0.2, -0.2, -0.5, 0.3, 0, steps, i)
    env.step(action)

    img = env.render()
    ax.imshow(img)
    display(fig)
    clear_output(wait=True)
    sleep(0.5)

agent_name = "Gary Buchwald"  # names are randomly generated in training

loaded_model = PPO.load(f"utils/models/{agent_name}/model")
loaded_config = read_from_yaml(f"utils/models/{agent_name}/config")

env = make_env(loaded_config, record_video=False)
env = NotVecNormalize(env, f"utils/models/{agent_name}/normalizer")

terminated = False
truncated = False
observation, _ = env.reset()
while not (terminated or truncated):
    action, _ = loaded_model.predict(observation)
    observation, reward, terminated, truncated, info = env.step(action)

    img = env.render()
    ax.imshow(img)
    display(fig)
    clear_output(wait=True)
    sleep(0.5)

agent_name = "David Archibald"  # names are randomly generated in training

loaded_model = PPO.load(f"utils/models/{agent_name}/model")
loaded_config = read_from_yaml(f"utils/models/{agent_name}/config")

env = make_env(loaded_config, record_video=False)
env = NotVecNormalize(env, f"utils/models/{agent_name}/normalizer")

terminated = False
truncated = False
observation, info = env.reset()
while not (terminated or truncated):
    action, _ = loaded_model.predict(observation)
    observation, reward, terminated, truncated, info = env.step(action)

    img = env.render()
    ax.imshow(img)
    display(fig)
    clear_output(wait=True)
    sleep(0.5)

agent_name = "Bertha Sparkman"  # names are randomly generated in training

loaded_model = PPO.load(f"utils/models/{agent_name}/model")
loaded_config = read_from_yaml(f"utils/models/{agent_name}/config")

env = make_env(loaded_config, record_video=False)
env = NotVecNormalize(env, f"utils/models/{agent_name}/normalizer")

terminated = False
truncated = False
observation, info = env.reset()
while not (terminated or truncated):
    action, _ = loaded_model.predict(observation)
    observation, reward, terminated, truncated, info = env.step(action)

    img = env.render()
    ax.imshow(img)
    display(fig)
    clear_output(wait=True)
    sleep(0.5)

agent_name = "Betty Gordon"  # names are randomly generated in training

loaded_model = PPO.load(f"utils/models/{agent_name}/model")
loaded_config = read_from_yaml(f"utils/models/{agent_name}/config")

env = make_env(loaded_config, record_video=False)
env = NotVecNormalize(env, f"utils/models/{agent_name}/normalizer")

terminated = False
truncated = False
observation, info = env.reset()
while not (terminated or truncated):
    action, _ = loaded_model.predict(observation)
    observation, reward, terminated, truncated, info = env.step(action)

    img = env.render()
    ax.imshow(img)
    display(fig)
    clear_output(wait=True)
    sleep(0.5)

agent_name = "Sean Kelley"  # names are randomly generated in training

loaded_model = PPO.load(f"utils/models/{agent_name}/model")
loaded_config = read_from_yaml(f"utils/models/{agent_name}/config")

env = make_env(loaded_config, record_video=False)
env = NotVecNormalize(env, f"utils/models/{agent_name}/normalizer")

terminated = False
truncated = False
observation, info = env.reset()
while not (terminated or truncated):
    action, _ = loaded_model.predict(observation)
    observation, reward, terminated, truncated, info = env.step(action)

    img = env.render()
    ax.imshow(img)
    display(fig)
    clear_output(wait=True)
    sleep(0.5)

# Feel free to change some of the configurations here.
config = {
    "n_envs": 40,
    "n_steps": 50,
    "batch_size": 100,
    "n_epochs": 10,
    "total_timesteps": 200_000,
    "abort_if_off_screen": False,
    "action_mode": "delta",
    "gamma": 0.99,
    "frame_stack": None,
    "net_arch": [64, 64],
    "normalize_observation": True,
    "normalize_reward": True,
    "rescale_action": (-3, 3),
    "reward_mode": "negative_objective",
    "run_name": names.get_full_name(),
    "target_sigma_x_threshold": None,
    "target_sigma_y_threshold": None,
    "threshold_hold": 5,
    "time_limit": 25,
    "time_reward": -0.0,
}

# Toggle comment to re-run the training (can take very long)
%time train_ares_ea(config)

agent_under_investigation = config["run_name"]
# agent_under_investigation = "Donna Brown"

# Training curves from this training
# Change `config["run_name"` to `"ml_worksop` to see curves from example training.
plot_ares_ea_training_history(agent_under_investigation)

plt.figure(figsize=(7, 4))
evaluate_ares_ea_agent(agent_under_investigation, include_position=False, n=200)

# Run final agent
fig, ax = plt.subplots()
agent_name = agent_under_investigation

loaded_model = PPO.load(f"utils/models/{agent_name}/model")
loaded_config = read_from_yaml(f"utils/models/{agent_name}/config")

env = make_env(loaded_config, record_video=True)
env = NotVecNormalize(env, f"utils/models/{agent_name}/normalizer")

terminated = False
truncated = False
observation, _ = env.reset()
while not (terminated or truncated):
    action, _ = loaded_model.predict(observation)
    observation, reward, terminated, truncated, info = env.step(action)

    img = env.render()
    ax.imshow(img)
    display(fig)
    clear_output(wait=True)
    sleep(0.3)

# Show polished donkey running (on real accelerator)
show_video("utils/real_world_episode_recording.mp4")

Applying Reinforcement Learning to Particle Accelerators: An Introduction

Use case: Transverse beam steering at ARES linear accelerator at DESY

Today!

Download the repository¶

Install dependencies¶

Install ffmpeg¶

Install dependencies¶

Using conda¶

Install dependencies¶

Using venv only¶

Part I: Introduction

Formulating the RL problem

ARES (Accelerator Research Experiment at SINBAD)

The accelerator problem we want to solve

Formulating the RL problem

Overview of our study case

Discussion

Formulating the RL problem

Actions

Formulating the RL problem

Observation / state

Discussion

Formulating the RL problem

The environment's state

Discussion

Formulating the RL problem

Our definition of observation

Discussion

Formulating the RL problem

Goal and reward

Discussion

Formulating the RL problem

Agent / algorithm

Discussion

Part II: Algorithm implementation in Python

About libraries for RL

Agent / algorithm

Environment

An overview of this RL project

Code directory structure

Code directory structure

What is Cheetah?

The ARES-EA (ARES Experimental Area) Environment

Set a target beam you want to achieve

Get familiar with the Gym environment

Part III: Reward definition!

Configurations

Configurations

Environment configurations

Configurations

Environment configurations

Question

What could go wrong?

Pre-trained Agent 1: "Gary Buchwald"

Relevant config parameters

Reward = objective_improvement

Question

Pre-trained Agent 2: "David Archibald"

Relevant config parameters

Reward = sum_of_pixels (focusing-only)

Question

Pre-trained Agent 3: "Bertha Sparkman"

Relevant config parameters

Reward = objective_improvement

Question

Pre-trained Agent 4: "Betty Gordon"

Relevant config parameters

Reward = objective_improvement

Question

Pre-trained Agent 5: "Sean Kelley"

Relevant config parameters

Reward = negative_objective"

Question

Part IV: Training an RL agent

What is inside an actor-critic agent like PPO?

What actually happens when you train a PPO agent?

Step 1: collect samples

What actually happens when you train a PPO agent?

Step 2: update the models (weights of NNs)

What actually happens when you train a PPO agent?

Install `ffmpeg`¶

Relevant `config` parameters

Reward = `objective_improvement`

Relevant `config` parameters

Reward = `sum_of_pixels` (focusing-only)

Relevant `config` parameters

Reward = `objective_improvement`

Relevant `config` parameters

Reward = `objective_improvement`

Relevant `config` parameters

Reward = `negative_objective"`