import gymnasium as gym import numpy as np import torch import tclab class TCLabEnv(gym.Env): def __init__(self, setpoint=50): super(TCLabEnv, self).__init__() self.lab = tclab.TCLabModel() # Connect to TCLab hardware with TCLab() self.setpoint = setpoint self.action_space = gym.spaces.Box(low=np.array([0]), high=np.array([100]), dtype=np.float32) self.observation_space = gym.spaces.Box(low=np.array([0]), high=np.array([100]), dtype=np.float32) def reset(self): self.lab.Q1(0) # Turn off heater self.lab.Q2(0) return np.array([self.lab.T1]), {} def step(self, action): self.lab.Q1(action[0]) # Apply action self.lab.Q2(action[0]) temperature = self.lab.T1 # Read temperature reward = -abs(temperature - self.setpoint) # Reward: minimize error done = False # No terminal state in continuous control return np.array([temperature]), reward, done, False, {} def close(self): self.lab.Q1(0) self.lab.Q2(0) self.lab.close()