# Replay Buffer for experience replay class ReplayBuffer: def __init__(self, capacity=100000): self.capacity = capacity self.buffer = [] self.pos = 0 # position to insert next entry (for circular buffer) def add(self, state, action, reward, next_state, done): if len(self.buffer) < self.capacity: self.buffer.append(None) self.buffer[self.pos] = (state, action, reward, next_state, done) # Move position pointer (overwrite oldest if full) self.pos = (self.pos + 1) % self.capacity def sample(self, batch_size): batch = np.random.choice(len(self.buffer), batch_size, replace=False) states, actions, rewards, next_states, dones = zip(*(self.buffer[i] for i in batch)) # Convert to torch tensors return (torch.tensor(np.array(states), dtype=torch.float32), torch.tensor(np.array(actions), dtype=torch.float32), torch.tensor(np.array(rewards), dtype=torch.float32).unsqueeze(1), torch.tensor(np.array(next_states), dtype=torch.float32), torch.tensor(np.array(dones), dtype=torch.float32).unsqueeze(1)) def __len__(self): return len(self.buffer)