×

PyTorch教程23.7之效用函数和类

消耗积分:0 | 格式:pdf | 大小:0.18 MB | 2023-06-05

分享资料个

本节包含本书中使用的实用函数和类的实现。

import collections
import inspect
from IPython import display
from torch import nn
from d2l import torch as d2l
import collections
import inspect
import random
from IPython import display
from mxnet import autograd, gluon, np, npx
from mxnet.gluon import nn
from d2l import mxnet as d2l

npx.set_np()
import collections
import inspect
import jax
from IPython import display
from d2l import jax as d2l
No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)
import collections
import inspect
import tensorflow as tf
from IPython import display
from d2l import tensorflow as d2l

超参数。

@d2l.add_to_class(d2l.HyperParameters) #@save
def save_hyperparameters(self, ignore=[]):
  """Save function arguments into class attributes."""
  frame = inspect.currentframe().f_back
  _, _, _, local_vars = inspect.getargvalues(frame)
  self.hparams = {k:v for k, v in local_vars.items()
          if k not in set(ignore+['self']) and not k.startswith('_')}
  for k, v in self.hparams.items():
    setattr(self, k, v)

进度条。

@d2l.add_to_class(d2l.ProgressBoard) #@save
def draw(self, x, y, label, every_n=1):
  Point = collections.namedtuple('Point', ['x', 'y'])
  if not hasattr(self, 'raw_points'):
    self.raw_points = collections.OrderedDict()
    self.data = collections.OrderedDict()
  if label not in self.raw_points:
    self.raw_points[label] = []
    self.data[label] = []
  points = self.raw_points[label]
  line = self.data[label]
  points.append(Point(x, y))
  if len(points) != every_n:
    return
  mean = lambda x: sum(x) / len(x)
  line.append(Point(mean([p.x for p in points]),
           mean([p.y for p in points])))
  points.clear()
  if not self.display:
    return
  d2l.use_svg_display()
  if self.fig is None:
    self.fig = d2l.plt.figure(figsize=self.figsize)
  plt_lines, labels = [], []
  for (k, v), ls, color in zip(self.data.items(), self.ls, self.colors):
    plt_lines.append(d2l.plt.plot([p.x for p in v], [p.y for p in v],
                   linestyle=ls, color=color)[0])
    labels.append(k)
  axes = self.axes if self.axes else d2l.plt.gca()
  if self.xlim: axes.set_xlim(self.xlim)
  if self.ylim: axes.set_ylim(self.ylim)
  if not self.xlabel: self.xlabel = self.x
  axes.set_xlabel(self.xlabel)
  axes.set_ylabel(self.ylabel)
  axes.set_xscale(self.xscale)
  axes.set_yscale(self.yscale)
  axes.legend(plt_lines, labels)
  display.display(self.fig)
  display.clear_output(wait=True)

添加 FrozenLake 环境

def frozen_lake(seed): #@save
  # See https://www.gymlibrary.dev/environments/toy_text/frozen_lake/ to learn more about this env
  # How to process env.P.items is adpated from https://sites.google.com/view/deep-rl-bootcamp/labs

  env = gym.make('FrozenLake-v1', is_slippery=False)
  env.seed(seed)
  env.action_space.np_random.seed(seed)
  env.action_space.seed(seed)
  env_info = {}
  env_info['desc'] = env.desc # 2D array specifying what each grid item means
  env_info['num_states'] = env.nS # Number of observations/states or obs/state dim
  env_info['num_actions'] = env.nA # Number of actions or action dim
  # Define indices for (transition probability, nextstate, reward, done) tuple
  env_info['trans_prob_idx'] = 0 # Index of transition probability entry
  env_info['nextstate_idx'] = 1 # Index of next state entry
  env_info['reward_idx'] = 2 # Index of reward entry
  env_info['done_idx'] = 3 # Index of done entry
  env_info['mdp'] = {}
  env_info['env'] = env

  for (s, others) in env.P.items():
    # others(s) = {a0: [ (p(s'|s,a0), s', reward, done),...], a1:[...], ...}

    for (a, pxrds) in others.items():
      # pxrds is [(p1,next1,r1,d1),(p2,next2,r2,d2),..].
      # e.g. [(0.3, 0, 0, False), (0.3, 0, 0, False), (0.3, 4, 1, False)]
      env_info['mdp'][(s,a)] = pxrds

  return env_info

创造环境

def make_env(name ='', seed=0): #@save
  # Input parameters:
  # name: specifies a gym environment.
  # For Value iteration, only FrozenLake-v1 is supported.
  if name == 'FrozenLake-v1':
    return frozen_lake(seed)

  else:
    raise ValueError("%s env is not supported in this Notebook")

示值函数

def show_value_function_progress(env_desc, V, pi): #@save
  # This function visualizes how value and policy changes over time.
  # V: [num_iters, num_states]
  # pi: [num_iters, num_states]
  # How to visualize value function is adapted (but changed) from: https://sites.google.com/view/deep-rl-bootcamp/labs

  num_iters = V.shape[0]
  fig, ax = plt.subplots(figsize=(15, 15))

  for k in range(V.shape[0]):
    plt.subplot(4, 4, k + 1)
    plt.imshow(V[k].reshape(4,4), cmap="bone")
    ax = plt.gca()
    ax.set_xticks(np.arange(0, 5)-.5, minor=True)
    ax.set_yticks(np.arange(0, 5)-.5, minor=True)
    ax.grid(which="minor", color="w", linestyle='-', linewidth=3)
    ax.tick_params(which="minor", bottom=False, left=False)
    ax.set_xticks([])
    ax.set_yticks([])

    # LEFT action: 0, DOWN action: 1
    # RIGHT action: 2, UP action: 3
    action2dxdy = {0:(-.25, 0),1: (0, .25),
            2:(0.25, 0),3: (-.25, 0)}

    for y in range(4):
      for x in range(4):
        action = pi[k].reshape(4,4)[y, x]
        dx, dy = action2dxdy[action]

        if env_desc[y,x].decode() == 'H':
          ax.text(x, y, str(env_desc[y,x].decode()),
            ha="center", va="center", color="y",
             size=20, fontweight='bold')

        elif env_desc[y,x].decode() == 'G':
          ax.text(x, y, str(env_desc[y,x].decode()),
            ha="center", va="center", color="w",
             size=20, fontweight='bold')

        else:
          ax.text(x, y, str(env_desc[y,x].decode()),
            ha="center", va="center", color="g",
             size=15, fontweight='bold')

        # No arrow for cells with G and H labels
        if env_desc[y,x].decode() != 'G' and env_desc[y,x].decode() != 'H':
          ax.arrow(x, y, dx, dy, color='r', head_width=0.2, head_length=0.15)

    ax.set_title("Step = " + str(k + 1),

声明:本文内容及配图由入驻作者撰写或者入驻合作网站授权转载。文章观点仅代表作者本人,不代表电子发烧友网立场。文章及其配图仅供工程师学习之用,如有内容侵权或者其他违规问题,请联系本站处理。 举报投诉

评论(0)
发评论

下载排行榜

全部0条评论

快来发表一下你的评论吧 !