import gymnasium as gym
import numpy as np
import torch
import tclab

class TCLabEnv(gym.Env):
    def __init__(self, setpoint=50):
        super(TCLabEnv, self).__init__()
        self.lab = tclab.TCLabModel()  # Connect to TCLab hardware with TCLab()
        self.setpoint = setpoint
        self.action_space = gym.spaces.Box(low=np.array([0]), high=np.array([100]), dtype=np.float32)
        self.observation_space = gym.spaces.Box(low=np.array([0]), high=np.array([100]), dtype=np.float32)

    def reset(self):
        self.lab.Q1(0)  # Turn off heater
        self.lab.Q2(0)
        return np.array([self.lab.T1]), {}

    def step(self, action):
        self.lab.Q1(action[0])  # Apply action
        self.lab.Q2(action[0])
        temperature = self.lab.T1  # Read temperature
        reward = -abs(temperature - self.setpoint)  # Reward: minimize error
        done = False  # No terminal state in continuous control
        return np.array([temperature]), reward, done, False, {}

    def close(self):
        self.lab.Q1(0)
        self.lab.Q2(0)
        self.lab.close()