当前位置：首页 > news >正文

【URL】一个简单基于Gym的2D随机游走环境，用于无监督强化学习（URL）

news 2025/7/14 8:36:49

import gym
from gym import spaces
import numpy as np
import pygameclass RandomWalk2DEnv(gym.Env):def __init__(self):super(RandomWalk2DEnv, self).__init__()# 定义状态空间为2D坐标（x, y）self.x_min, self.x_max = -10, 10  # 更新尺寸为 (-10, 10)self.y_min, self.y_max = -10, 10  # 更新尺寸为 (-10, 10)self.observation_space = spaces.Box(np.array([self.x_min, self.y_min]),np.array([self.x_max, self.y_max]),dtype=np.float32)# 动作空间定义为2D向量，x和y分量的范围为(-1, 1)self.action_space = spaces.Box(np.array([-1.0, -1.0]), np.array([1.0, 1.0]), dtype=np.float32)# 初始化状态self.state = np.array([0.0, 0.0])# 初始化步长（每次移动的距离）self.step_size = 1.0# 初始化pygamepygame.init()# 设置窗口self.screen_size = (800, 800)self.screen = pygame.display.set_mode(self.screen_size)pygame.display.set_caption("Random Walk 2D")# 坐标转换，将-10到10的坐标映射到屏幕的像素坐标self.scale = 40  # 缩放因子，决定每个单位坐标对应多少像素self.origin = np.array([self.x_max, self.y_max])  # 原点在右上角# 路径记录self.path_x = []self.path_y = []def reset(self):# 将智能体重置到原点 (0, 0)self.state = np.array([0.0, 0.0])self.path_x = [self.state[0]]  # 重置路径记录self.path_y = [self.state[1]]  # 重置路径记录return self.statedef step(self, action):# 自动修正超出范围的actionaction = np.clip(action, -1.0, 1.0)# 根据动作更新坐标dx = action[0] * self.step_size  # x方向的移动dy = action[1] * self.step_size  # y方向的移动self.state[0] += dxself.state[1] += dy# 保证坐标不超出边界self.state[0] = np.clip(self.state[0], self.x_min, self.x_max)self.state[1] = np.clip(self.state[1], self.y_min, self.y_max)# 判断是否触及边缘，如果触及边缘，设置done为Truedone = Falseif self.state[0] == self.x_min or self.state[0] == self.x_max or self.state[1] == self.y_min or self.state[1] == self.y_max:done = True# 记录路径self.path_x.append(self.state[0])self.path_y.append(self.state[1])# 计算奖励：简单的奖励策略，离中心越远奖励越低distance_from_center = np.linalg.norm(self.state)reward = -distance_from_center  # 离原点越远，奖励越低return self.state, reward, done, {}def render(self, mode='human'):# 清空屏幕self.screen.fill((255, 255, 255))  # 白色背景# 绘制虚线表格self.draw_grid()# 绘制路径（红色线条）for i in range(len(self.path_x) - 1):x1 = int(self.path_x[i] * self.scale + self.screen_size[0] // 2)y1 = int(self.screen_size[1] // 2 - self.path_y[i] * self.scale)x2 = int(self.path_x[i + 1] * self.scale + self.screen_size[0] // 2)y2 = int(self.screen_size[1] // 2 - self.path_y[i + 1] * self.scale)pygame.draw.line(self.screen, (255, 0, 0), (x1, y1), (x2, y2), 2)  # 红色轨迹# 绘制起始点（蓝色圆点）start_x = int(self.path_x[0] * self.scale + self.screen_size[0] // 2)start_y = int(self.screen_size[1] // 2 - self.path_y[0] * self.scale)pygame.draw.circle(self.screen, (0, 0, 255), (start_x, start_y), 5)  # 蓝色圆点# 绘制当前智能体位置（黑色小圆点，半径为蓝色起点圆点的2/3）current_x = int(self.state[0] * self.scale + self.screen_size[0] // 2)current_y = int(self.screen_size[1] // 2 - self.state[1] * self.scale)pygame.draw.circle(self.screen, (0, 0, 0), (current_x, current_y), 3)  # 黑色圆点，半径为5的2/3# 刷新屏幕pygame.display.flip()# 处理事件，避免程序卡死for event in pygame.event.get():if event.type == pygame.QUIT:pygame.quit()exit()def draw_grid(self):""" 绘制虚线网格，中心位置为实线 """# 设置线条颜色和宽度line_color = (200, 200, 200)  # 灰色center_line_color = (0, 0, 0)  # 中心线为黑色line_width = 1dash_length = 10  # 虚线的每段长度dash_gap = 5  # 虚线的间隔# 绘制垂直线for x in range(self.x_min, self.x_max + 1):x_pos = int(x * self.scale + self.screen_size[0] // 2)for y in range(self.y_min, self.y_max + 1):y_pos = int(self.screen_size[1] // 2 - y * self.scale)# 绘制最中间的线（交叉点处）if x == 0:pygame.draw.line(self.screen, center_line_color,(x_pos, self.screen_size[1] // 2 - self.y_max * self.scale),(x_pos, self.screen_size[1] // 2 + self.y_max * self.scale), line_width)elif y == 0:pygame.draw.line(self.screen, center_line_color,(self.screen_size[0] // 2 - self.x_max * self.scale, y_pos),(self.screen_size[0] // 2 + self.x_max * self.scale, y_pos), line_width)# 绘制垂直虚线elif x % 2 == 0:self.draw_dashed_line(x_pos, self.screen_size[1] // 2 - self.y_max * self.scale,x_pos, self.screen_size[1] // 2 + self.y_max * self.scale,line_color, dash_length, dash_gap)# 绘制水平虚线elif y % 2 == 0:self.draw_dashed_line(self.screen_size[0] // 2 - self.x_max * self.scale, y_pos,self.screen_size[0] // 2 + self.x_max * self.scale, y_pos,line_color, dash_length, dash_gap)def draw_dashed_line(self, x1, y1, x2, y2, color, dash_length, dash_gap):""" 绘制虚线 """total_length = np.linalg.norm([x2 - x1, y2 - y1])num_dashes = int(total_length / (dash_length + dash_gap))# 计算每段虚线的起始和结束点for i in range(num_dashes):start_x = x1 + (x2 - x1) * (i * (dash_length + dash_gap)) / total_lengthstart_y = y1 + (y2 - y1) * (i * (dash_length + dash_gap)) / total_lengthend_x = x1 + (x2 - x1) * ((i * (dash_length + dash_gap) + dash_length) / total_length)end_y = y1 + (y2 - y1) * ((i * (dash_length + dash_gap) + dash_length) / total_length)# 绘制虚线段pygame.draw.line(self.screen, color, (start_x, start_y), (end_x, end_y), 1)# 测试代码
if __name__ == "__main__":env = RandomWalk2DEnv()env.reset()for _ in range(50):action = env.action_space.sample()  # 采样一个动作（x 和 y 分量在 -1 到 1 之间）state, reward, done, info = env.step(action)env.render()if done:print("智能体触及边缘，回合结束")break